diff options
Diffstat (limited to '0001-Fix-illegal-opcode-bug-in-caffe2-40584.patch')
-rw-r--r-- | 0001-Fix-illegal-opcode-bug-in-caffe2-40584.patch | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/0001-Fix-illegal-opcode-bug-in-caffe2-40584.patch b/0001-Fix-illegal-opcode-bug-in-caffe2-40584.patch new file mode 100644 index 0000000..5fef70d --- /dev/null +++ b/0001-Fix-illegal-opcode-bug-in-caffe2-40584.patch @@ -0,0 +1,54 @@ +From 7676682584d0caf9243bce74ea0a88711ec4a807 Mon Sep 17 00:00:00 2001 +From: Mitchell Spryn <mspryn@fb.com> +Date: Fri, 26 Jun 2020 11:08:56 -0700 +Subject: [PATCH] Fix illegal opcode bug in caffe2 (#40584) + +Summary: +Pull Request resolved: https://github.com/pytorch/pytorch/pull/40584 + +Also patch [this github issue](https://github.com/pytorch/pytorch/issues/33124) +involving an illegal assembly instruction in 8x8-dq-aarch64-neon.S. + +Test Plan: +Build binaries, copy to shaker, run executables. Also run all +existing caffe tests. + +Reviewed By: kimishpatel + +Differential Revision: D22240670 + +fbshipit-source-id: 51960266ce58699fe6830bcf75632b92a122f638 +--- + .../cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +diff --git a/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S b/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S +index 7a67c6d401..b8bde02006 100644 +--- a/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S ++++ b/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/8x8-dq-aarch64-neon.S +@@ -687,14 +687,14 @@ BEGIN_FUNCTION pytorch_q8gemm_dq_ukernel_8x8__aarch64_neon + + SUB x1, x1, 4 + +- MOV V8.4s, V9.4s +- MOV v10.4s, v11.4s +- MOV v12.4s, V13.4s +- MOV V14.4s, V15.4s +- MOV V16.4s, V17.4s +- MOV V18.4s, V19.4s +- MOV V20.4s, V21.4s +- MOV V22.4s, V23.4s ++ MOV V8.16b, V9.16b ++ MOV v10.16b, v11.16b ++ MOV v12.16b, V13.16b ++ MOV V14.16b, V15.16b ++ MOV V16.16b, V17.16b ++ MOV V18.16b, V19.16b ++ MOV V20.16b, V21.16b ++ MOV V22.16b, V23.16b + + 5: + CMP x1, 2 +-- +2.18.4 + |