diff --git a/nacl/nacl-20110221/MACROS b/nacl/nacl-20110221/MACROS
new file mode 100644
index 00000000..26f6cd02
--- /dev/null
+++ b/nacl/nacl-20110221/MACROS
@@ -0,0 +1,56 @@
+crypto_verify
+crypto_verify_BYTES
+crypto_core
+crypto_core_OUTPUTBYTES
+crypto_core_INPUTBYTES
+crypto_core_KEYBYTES
+crypto_core_CONSTBYTES
+crypto_hashblocks
+crypto_hashblocks_STATEBYTES
+crypto_hashblocks_BLOCKBYTES
+crypto_hash
+crypto_hash_BYTES
+crypto_stream
+crypto_stream_xor
+crypto_stream_beforenm
+crypto_stream_afternm
+crypto_stream_xor_afternm
+crypto_stream_KEYBYTES
+crypto_stream_NONCEBYTES
+crypto_stream_BEFORENMBYTES
+crypto_onetimeauth
+crypto_onetimeauth_verify
+crypto_onetimeauth_BYTES
+crypto_onetimeauth_KEYBYTES
+crypto_auth
+crypto_auth_verify
+crypto_auth_BYTES
+crypto_auth_KEYBYTES
+crypto_secretbox
+crypto_secretbox_open
+crypto_secretbox_KEYBYTES
+crypto_secretbox_NONCEBYTES
+crypto_secretbox_ZEROBYTES
+crypto_secretbox_BOXZEROBYTES
+crypto_scalarmult
+crypto_scalarmult_base
+crypto_scalarmult_BYTES
+crypto_scalarmult_SCALARBYTES
+crypto_box
+crypto_box_open
+crypto_box_keypair
+crypto_box_beforenm
+crypto_box_afternm
+crypto_box_open_afternm
+crypto_box_PUBLICKEYBYTES
+crypto_box_SECRETKEYBYTES
+crypto_box_BEFORENMBYTES
+crypto_box_NONCEBYTES
+crypto_box_ZEROBYTES
+crypto_box_BOXZEROBYTES
+crypto_sign
+crypto_sign_open
+crypto_sign_keypair
+crypto_sign_BYTES
+crypto_sign_PUBLICKEYBYTES
+crypto_sign_SECRETKEYBYTES
diff --git a/nacl/nacl-20110221/OPERATIONS b/nacl/nacl-20110221/OPERATIONS
new file mode 100644
index 00000000..5fc25d35
--- /dev/null
+++ b/nacl/nacl-20110221/OPERATIONS
@@ -0,0 +1,11 @@
+crypto_verify
+crypto_core
+crypto_hashblocks
+crypto_hash
+crypto_stream
+crypto_onetimeauth
+crypto_auth
+crypto_secretbox
+crypto_scalarmult
+crypto_box
+crypto_sign
diff --git a/nacl/nacl-20110221/PROTOTYPES.c b/nacl/nacl-20110221/PROTOTYPES.c
new file mode 100644
index 00000000..bc8ca531
--- /dev/null
+++ b/nacl/nacl-20110221/PROTOTYPES.c
@@ -0,0 +1,26 @@
+extern int crypto_verify(const unsigned char *,const unsigned char *);
+extern int crypto_core(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_hashblocks(unsigned char *,const unsigned char *,unsigned long long);
+extern int crypto_hash(unsigned char *,const unsigned char *,unsigned long long);
+extern int crypto_stream(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_onetimeauth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_onetimeauth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_secretbox(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_secretbox_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_scalarmult(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_scalarmult_base(unsigned char *,const unsigned char *);
+extern int crypto_box(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_keypair(unsigned char *,unsigned char *);
+extern int crypto_box_beforenm(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_box_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_sign(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_open(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_keypair(unsigned char *,unsigned char *);
diff --git a/nacl/nacl-20110221/PROTOTYPES.cpp b/nacl/nacl-20110221/PROTOTYPES.cpp
new file mode 100644
index 00000000..4318a049
--- /dev/null
+++ b/nacl/nacl-20110221/PROTOTYPES.cpp
@@ -0,0 +1,17 @@
+extern std::string crypto_auth(const std::string &,const std::string &);
+extern void crypto_auth_verify(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_open(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_keypair(std::string *);
+extern std::string crypto_hash(const std::string &);
+extern std::string crypto_onetimeauth(const std::string &,const std::string &);
+extern void crypto_onetimeauth_verify(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_scalarmult(const std::string &,const std::string &);
+extern std::string crypto_scalarmult_base(const std::string &);
+extern std::string crypto_secretbox(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_secretbox_open(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_stream(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_xor(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_sign(const std::string &,const std::string &);
+extern std::string crypto_sign_open(const std::string &,const std::string &);
+extern std::string crypto_sign_keypair(std::string *);
diff --git a/nacl/nacl-20110221/build_android/MACROS b/nacl/nacl-20110221/build_android/MACROS
new file mode 100644
index 00000000..26f6cd02
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/MACROS
@@ -0,0 +1,56 @@
+crypto_verify
+crypto_verify_BYTES
+crypto_core
+crypto_core_OUTPUTBYTES
+crypto_core_INPUTBYTES
+crypto_core_KEYBYTES
+crypto_core_CONSTBYTES
+crypto_hashblocks
+crypto_hashblocks_STATEBYTES
+crypto_hashblocks_BLOCKBYTES
+crypto_hash
+crypto_hash_BYTES
+crypto_stream
+crypto_stream_xor
+crypto_stream_beforenm
+crypto_stream_afternm
+crypto_stream_xor_afternm
+crypto_stream_KEYBYTES
+crypto_stream_NONCEBYTES
+crypto_stream_BEFORENMBYTES
+crypto_onetimeauth
+crypto_onetimeauth_verify
+crypto_onetimeauth_BYTES
+crypto_onetimeauth_KEYBYTES
+crypto_auth
+crypto_auth_verify
+crypto_auth_BYTES
+crypto_auth_KEYBYTES
+crypto_secretbox
+crypto_secretbox_open
+crypto_secretbox_KEYBYTES
+crypto_secretbox_NONCEBYTES
+crypto_secretbox_ZEROBYTES
+crypto_secretbox_BOXZEROBYTES
+crypto_scalarmult
+crypto_scalarmult_base
+crypto_scalarmult_BYTES
+crypto_scalarmult_SCALARBYTES
+crypto_box
+crypto_box_open
+crypto_box_keypair
+crypto_box_beforenm
+crypto_box_afternm
+crypto_box_open_afternm
+crypto_box_PUBLICKEYBYTES
+crypto_box_SECRETKEYBYTES
+crypto_box_BEFORENMBYTES
+crypto_box_NONCEBYTES
+crypto_box_ZEROBYTES
+crypto_box_BOXZEROBYTES
+crypto_sign
+crypto_sign_open
+crypto_sign_keypair
+crypto_sign_BYTES
+crypto_sign_PUBLICKEYBYTES
+crypto_sign_SECRETKEYBYTES
diff --git a/nacl/nacl-20110221/build_android/PROTOTYPES.c b/nacl/nacl-20110221/build_android/PROTOTYPES.c
new file mode 100644
index 00000000..bc8ca531
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/PROTOTYPES.c
@@ -0,0 +1,26 @@
+extern int crypto_verify(const unsigned char *,const unsigned char *);
+extern int crypto_core(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_hashblocks(unsigned char *,const unsigned char *,unsigned long long);
+extern int crypto_hash(unsigned char *,const unsigned char *,unsigned long long);
+extern int crypto_stream(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_onetimeauth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_onetimeauth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_secretbox(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_secretbox_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_scalarmult(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_scalarmult_base(unsigned char *,const unsigned char *);
+extern int crypto_box(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_keypair(unsigned char *,unsigned char *);
+extern int crypto_box_beforenm(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_box_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_sign(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_open(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_keypair(unsigned char *,unsigned char *);
diff --git a/nacl/nacl-20110221/build_android/PROTOTYPES.cpp b/nacl/nacl-20110221/build_android/PROTOTYPES.cpp
new file mode 100644
index 00000000..4318a049
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/PROTOTYPES.cpp
@@ -0,0 +1,17 @@
+extern std::string crypto_auth(const std::string &,const std::string &);
+extern void crypto_auth_verify(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_open(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_keypair(std::string *);
+extern std::string crypto_hash(const std::string &);
+extern std::string crypto_onetimeauth(const std::string &,const std::string &);
+extern void crypto_onetimeauth_verify(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_scalarmult(const std::string &,const std::string &);
+extern std::string crypto_scalarmult_base(const std::string &);
+extern std::string crypto_secretbox(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_secretbox_open(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_stream(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_xor(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_sign(const std::string &,const std::string &);
+extern std::string crypto_sign_open(const std::string &,const std::string &);
+extern std::string crypto_sign_keypair(std::string *);
diff --git a/nacl/nacl-20110221/build_android/api.h b/nacl/nacl-20110221/build_android/api.h
new file mode 100644
index 00000000..352240c0
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/api.h
@@ -0,0 +1,3 @@
+#define CRYPTO_SECRETKEYBYTES 64
+#define CRYPTO_PUBLICKEYBYTES 32
+#define CRYPTO_BYTES 64
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256.h
new file mode 100644
index 00000000..6baadd66
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256.h
@@ -0,0 +1,32 @@
+#ifndef crypto_auth_hmacsha256_H
+#define crypto_auth_hmacsha256_H
+
+#define crypto_auth_hmacsha256_ref_BYTES 32
+#define crypto_auth_hmacsha256_ref_KEYBYTES 32
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_auth_hmacsha256_ref(const std::string &,const std::string &);
+extern void crypto_auth_hmacsha256_ref_verify(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_auth_hmacsha256_ref(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth_hmacsha256_ref_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_auth_hmacsha256 crypto_auth_hmacsha256_ref
+/* POTATO crypto_auth_hmacsha256_ref crypto_auth_hmacsha256_ref crypto_auth_hmacsha256 */
+#define crypto_auth_hmacsha256_verify crypto_auth_hmacsha256_ref_verify
+/* POTATO crypto_auth_hmacsha256_ref_verify crypto_auth_hmacsha256_ref crypto_auth_hmacsha256 */
+#define crypto_auth_hmacsha256_BYTES crypto_auth_hmacsha256_ref_BYTES
+/* POTATO crypto_auth_hmacsha256_ref_BYTES crypto_auth_hmacsha256_ref crypto_auth_hmacsha256 */
+#define crypto_auth_hmacsha256_KEYBYTES crypto_auth_hmacsha256_ref_KEYBYTES
+/* POTATO crypto_auth_hmacsha256_ref_KEYBYTES crypto_auth_hmacsha256_ref crypto_auth_hmacsha256 */
+#define crypto_auth_hmacsha256_IMPLEMENTATION "crypto_auth/hmacsha256/ref"
+#ifndef crypto_auth_hmacsha256_ref_VERSION
+#define crypto_auth_hmacsha256_ref_VERSION "-"
+#endif
+#define crypto_auth_hmacsha256_VERSION crypto_auth_hmacsha256_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/api.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/api.h
new file mode 100644
index 00000000..c224d9d5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/crypto_auth.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/crypto_auth.h
new file mode 100644
index 00000000..cdd597e6
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/crypto_auth.h
@@ -0,0 +1,18 @@
+#ifndef crypto_auth_H
+#define crypto_auth_H
+
+#include "crypto_auth_hmacsha256.h"
+
+#define crypto_auth crypto_auth_hmacsha256
+/* CHEESEBURGER crypto_auth_hmacsha256 */
+#define crypto_auth_verify crypto_auth_hmacsha256_verify
+/* CHEESEBURGER crypto_auth_hmacsha256_verify */
+#define crypto_auth_BYTES crypto_auth_hmacsha256_BYTES
+/* CHEESEBURGER crypto_auth_hmacsha256_BYTES */
+#define crypto_auth_KEYBYTES crypto_auth_hmacsha256_KEYBYTES
+/* CHEESEBURGER crypto_auth_hmacsha256_KEYBYTES */
+#define crypto_auth_PRIMITIVE "hmacsha256"
+#define crypto_auth_IMPLEMENTATION crypto_auth_hmacsha256_IMPLEMENTATION
+#define crypto_auth_VERSION crypto_auth_hmacsha256_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/hmac.c b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/hmac.c
new file mode 100644
index 00000000..8ab30bb4
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/hmac.c
@@ -0,0 +1,83 @@
+/*
+ * 20080913
+ * D. J. Bernstein
+ * Public domain.
+ * */
+
+#include "crypto_hashblocks_sha256.h"
+#include "crypto_auth.h"
+
+#define blocks crypto_hashblocks_sha256
+
+typedef unsigned int uint32;
+
+static const char iv[32] = {
+  0x6a,0x09,0xe6,0x67,
+  0xbb,0x67,0xae,0x85,
+  0x3c,0x6e,0xf3,0x72,
+  0xa5,0x4f,0xf5,0x3a,
+  0x51,0x0e,0x52,0x7f,
+  0x9b,0x05,0x68,0x8c,
+  0x1f,0x83,0xd9,0xab,
+  0x5b,0xe0,0xcd,0x19,
+} ;
+
+int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char h[32];
+  unsigned char padded[128];
+  int i;
+  unsigned long long bits = 512 + (inlen << 3);
+
+  for (i = 0;i < 32;++i) h[i] = iv[i];
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36;
+  for (i = 32;i < 64;++i) padded[i] = 0x36;
+
+  blocks(h,padded,64);
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 63;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 56) {
+    for (i = inlen + 1;i < 56;++i) padded[i] = 0;
+    padded[56] = bits >> 56;
+    padded[57] = bits >> 48;
+    padded[58] = bits >> 40;
+    padded[59] = bits >> 32;
+    padded[60] = bits >> 24;
+    padded[61] = bits >> 16;
+    padded[62] = bits >> 8;
+    padded[63] = bits;
+    blocks(h,padded,64);
+  } else {
+    for (i = inlen + 1;i < 120;++i) padded[i] = 0;
+    padded[120] = bits >> 56;
+    padded[121] = bits >> 48;
+    padded[122] = bits >> 40;
+    padded[123] = bits >> 32;
+    padded[124] = bits >> 24;
+    padded[125] = bits >> 16;
+    padded[126] = bits >> 8;
+    padded[127] = bits;
+    blocks(h,padded,128);
+  }
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c;
+  for (i = 32;i < 64;++i) padded[i] = 0x5c;
+  for (i = 0;i < 32;++i) padded[64 + i] = h[i];
+
+  for (i = 0;i < 32;++i) out[i] = iv[i];
+
+  for (i = 32;i < 64;++i) padded[64 + i] = 0;
+  padded[64 + 32] = 0x80;
+  padded[64 + 62] = 3;
+
+  blocks(out,padded,128);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/verify.c b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/verify.c
new file mode 100644
index 00000000..96ff0ea8
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha256_ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_32.h"
+#include "crypto_auth.h"
+
+int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[32];
+  crypto_auth(correct,in,inlen,k);
+  return crypto_verify_32(h,correct);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256.h
new file mode 100644
index 00000000..9de2043f
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256.h
@@ -0,0 +1,32 @@
+#ifndef crypto_auth_hmacsha512256_H
+#define crypto_auth_hmacsha512256_H
+
+#define crypto_auth_hmacsha512256_ref_BYTES 32
+#define crypto_auth_hmacsha512256_ref_KEYBYTES 32
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_auth_hmacsha512256_ref(const std::string &,const std::string &);
+extern void crypto_auth_hmacsha512256_ref_verify(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_auth_hmacsha512256_ref(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_auth_hmacsha512256_ref_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_auth_hmacsha512256 crypto_auth_hmacsha512256_ref
+/* POTATO crypto_auth_hmacsha512256_ref crypto_auth_hmacsha512256_ref crypto_auth_hmacsha512256 */
+#define crypto_auth_hmacsha512256_verify crypto_auth_hmacsha512256_ref_verify
+/* POTATO crypto_auth_hmacsha512256_ref_verify crypto_auth_hmacsha512256_ref crypto_auth_hmacsha512256 */
+#define crypto_auth_hmacsha512256_BYTES crypto_auth_hmacsha512256_ref_BYTES
+/* POTATO crypto_auth_hmacsha512256_ref_BYTES crypto_auth_hmacsha512256_ref crypto_auth_hmacsha512256 */
+#define crypto_auth_hmacsha512256_KEYBYTES crypto_auth_hmacsha512256_ref_KEYBYTES
+/* POTATO crypto_auth_hmacsha512256_ref_KEYBYTES crypto_auth_hmacsha512256_ref crypto_auth_hmacsha512256 */
+#define crypto_auth_hmacsha512256_IMPLEMENTATION "crypto_auth/hmacsha512256/ref"
+#ifndef crypto_auth_hmacsha512256_ref_VERSION
+#define crypto_auth_hmacsha512256_ref_VERSION "-"
+#endif
+#define crypto_auth_hmacsha512256_VERSION crypto_auth_hmacsha512256_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/api.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/api.h
new file mode 100644
index 00000000..c224d9d5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/crypto_auth.h b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/crypto_auth.h
new file mode 100644
index 00000000..d11716ee
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/crypto_auth.h
@@ -0,0 +1,18 @@
+#ifndef crypto_auth_H
+#define crypto_auth_H
+
+#include "crypto_auth_hmacsha512256.h"
+
+#define crypto_auth crypto_auth_hmacsha512256
+/* CHEESEBURGER crypto_auth_hmacsha512256 */
+#define crypto_auth_verify crypto_auth_hmacsha512256_verify
+/* CHEESEBURGER crypto_auth_hmacsha512256_verify */
+#define crypto_auth_BYTES crypto_auth_hmacsha512256_BYTES
+/* CHEESEBURGER crypto_auth_hmacsha512256_BYTES */
+#define crypto_auth_KEYBYTES crypto_auth_hmacsha512256_KEYBYTES
+/* CHEESEBURGER crypto_auth_hmacsha512256_KEYBYTES */
+#define crypto_auth_PRIMITIVE "hmacsha512256"
+#define crypto_auth_IMPLEMENTATION crypto_auth_hmacsha512256_IMPLEMENTATION
+#define crypto_auth_VERSION crypto_auth_hmacsha512256_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/hmac.c b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/hmac.c
new file mode 100644
index 00000000..56ebfa6b
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/hmac.c
@@ -0,0 +1,86 @@
+/*
+ * 20080913
+ * D. J. Bernstein
+ * Public domain.
+ * */
+
+#include "crypto_hashblocks_sha512.h"
+#include "crypto_auth.h"
+
+#define blocks crypto_hashblocks_sha512
+
+typedef unsigned long long uint64;
+
+static const unsigned char iv[64] = {
+  0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
+  0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
+  0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
+  0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
+  0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
+  0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
+  0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
+  0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
+} ;
+
+int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char h[64];
+  unsigned char padded[256];
+  int i;
+  unsigned long long bytes = 128 + inlen;
+
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36;
+  for (i = 32;i < 128;++i) padded[i] = 0x36;
+
+  blocks(h,padded,128);
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 127;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 112) {
+    for (i = inlen + 1;i < 119;++i) padded[i] = 0;
+    padded[119] = bytes >> 61;
+    padded[120] = bytes >> 53;
+    padded[121] = bytes >> 45;
+    padded[122] = bytes >> 37;
+    padded[123] = bytes >> 29;
+    padded[124] = bytes >> 21;
+    padded[125] = bytes >> 13;
+    padded[126] = bytes >> 5;
+    padded[127] = bytes << 3;
+    blocks(h,padded,128);
+  } else {
+    for (i = inlen + 1;i < 247;++i) padded[i] = 0;
+    padded[247] = bytes >> 61;
+    padded[248] = bytes >> 53;
+    padded[249] = bytes >> 45;
+    padded[250] = bytes >> 37;
+    padded[251] = bytes >> 29;
+    padded[252] = bytes >> 21;
+    padded[253] = bytes >> 13;
+    padded[254] = bytes >> 5;
+    padded[255] = bytes << 3;
+    blocks(h,padded,256);
+  }
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c;
+  for (i = 32;i < 128;++i) padded[i] = 0x5c;
+
+  for (i = 0;i < 64;++i) padded[128 + i] = h[i];
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  for (i = 64;i < 128;++i) padded[128 + i] = 0;
+  padded[128 + 64] = 0x80;
+  padded[128 + 126] = 6;
+
+  blocks(h,padded,256);
+  for (i = 0;i < 32;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/verify.c b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/verify.c
new file mode 100644
index 00000000..96ff0ea8
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_auth_hmacsha512256_ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_32.h"
+#include "crypto_auth.h"
+
+int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[32];
+  crypto_auth(correct,in,inlen,k);
+  return crypto_verify_32(h,correct);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305.h b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305.h
new file mode 100644
index 00000000..c5e989a2
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305.h
@@ -0,0 +1,57 @@
+#ifndef crypto_box_curve25519xsalsa20poly1305_H
+#define crypto_box_curve25519xsalsa20poly1305_H
+
+#define crypto_box_curve25519xsalsa20poly1305_ref_PUBLICKEYBYTES 32
+#define crypto_box_curve25519xsalsa20poly1305_ref_SECRETKEYBYTES 32
+#define crypto_box_curve25519xsalsa20poly1305_ref_BEFORENMBYTES 32
+#define crypto_box_curve25519xsalsa20poly1305_ref_NONCEBYTES 24
+#define crypto_box_curve25519xsalsa20poly1305_ref_ZEROBYTES 32
+#define crypto_box_curve25519xsalsa20poly1305_ref_BOXZEROBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_box_curve25519xsalsa20poly1305_ref(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_curve25519xsalsa20poly1305_ref_open(const std::string &,const std::string &,const std::string &,const std::string &);
+extern std::string crypto_box_curve25519xsalsa20poly1305_ref_keypair(std::string *);
+extern "C" {
+#endif
+extern int crypto_box_curve25519xsalsa20poly1305_ref(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_curve25519xsalsa20poly1305_ref_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_curve25519xsalsa20poly1305_ref_keypair(unsigned char *,unsigned char *);
+extern int crypto_box_curve25519xsalsa20poly1305_ref_beforenm(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_box_curve25519xsalsa20poly1305_ref_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_box_curve25519xsalsa20poly1305_ref_open_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_box_curve25519xsalsa20poly1305 crypto_box_curve25519xsalsa20poly1305_ref
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_open crypto_box_curve25519xsalsa20poly1305_ref_open
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_open crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_keypair crypto_box_curve25519xsalsa20poly1305_ref_keypair
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_keypair crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_beforenm crypto_box_curve25519xsalsa20poly1305_ref_beforenm
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_beforenm crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_afternm crypto_box_curve25519xsalsa20poly1305_ref_afternm
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_afternm crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_open_afternm crypto_box_curve25519xsalsa20poly1305_ref_open_afternm
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_open_afternm crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_ref_PUBLICKEYBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_ref_SECRETKEYBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_ref_BEFORENMBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_ref_NONCEBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ref_ZEROBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_ref_BOXZEROBYTES
+/* POTATO crypto_box_curve25519xsalsa20poly1305_ref_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_ref crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_curve25519xsalsa20poly1305_IMPLEMENTATION "crypto_box/curve25519xsalsa20poly1305/ref"
+#ifndef crypto_box_curve25519xsalsa20poly1305_ref_VERSION
+#define crypto_box_curve25519xsalsa20poly1305_ref_VERSION "-"
+#endif
+#define crypto_box_curve25519xsalsa20poly1305_VERSION crypto_box_curve25519xsalsa20poly1305_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/after.c b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/after.c
new file mode 100644
index 00000000..eb243e22
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/after.c
@@ -0,0 +1,22 @@
+#include "crypto_secretbox_xsalsa20poly1305.h"
+#include "crypto_box.h"
+
+int crypto_box_afternm(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  return crypto_secretbox_xsalsa20poly1305(c,m,mlen,n,k);
+}
+
+int crypto_box_open_afternm(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  return crypto_secretbox_xsalsa20poly1305_open(m,c,clen,n,k);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/api.h b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/api.h
new file mode 100644
index 00000000..ce7762df
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/api.h
@@ -0,0 +1,6 @@
+#define CRYPTO_PUBLICKEYBYTES 32
+#define CRYPTO_SECRETKEYBYTES 32
+#define CRYPTO_BEFORENMBYTES 32
+#define CRYPTO_NONCEBYTES 24
+#define CRYPTO_ZEROBYTES 32
+#define CRYPTO_BOXZEROBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/before.c b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/before.c
new file mode 100644
index 00000000..279bb12a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/before.c
@@ -0,0 +1,17 @@
+#include "crypto_core_hsalsa20.h"
+#include "crypto_scalarmult_curve25519.h"
+#include "crypto_box.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+static const unsigned char n[16] = {0};
+
+int crypto_box_beforenm(
+  unsigned char *k,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char s[32];
+  crypto_scalarmult_curve25519(s,sk,pk);
+  return crypto_core_hsalsa20(k,n,s,sigma);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/box.c b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/box.c
new file mode 100644
index 00000000..81ff72e2
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/box.c
@@ -0,0 +1,27 @@
+#include "crypto_box.h"
+
+int crypto_box(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char k[crypto_box_BEFORENMBYTES];
+  crypto_box_beforenm(k,pk,sk);
+  return crypto_box_afternm(c,m,mlen,n,k);
+}
+
+int crypto_box_open(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char k[crypto_box_BEFORENMBYTES];
+  crypto_box_beforenm(k,pk,sk);
+  return crypto_box_open_afternm(m,c,clen,n,k);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/crypto_box.h b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/crypto_box.h
new file mode 100644
index 00000000..a350bcf5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/crypto_box.h
@@ -0,0 +1,34 @@
+#ifndef crypto_box_H
+#define crypto_box_H
+
+#include "crypto_box_curve25519xsalsa20poly1305.h"
+
+#define crypto_box crypto_box_curve25519xsalsa20poly1305
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305 */
+#define crypto_box_open crypto_box_curve25519xsalsa20poly1305_open
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_open */
+#define crypto_box_keypair crypto_box_curve25519xsalsa20poly1305_keypair
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_keypair */
+#define crypto_box_beforenm crypto_box_curve25519xsalsa20poly1305_beforenm
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_beforenm */
+#define crypto_box_afternm crypto_box_curve25519xsalsa20poly1305_afternm
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_afternm */
+#define crypto_box_open_afternm crypto_box_curve25519xsalsa20poly1305_open_afternm
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_open_afternm */
+#define crypto_box_PUBLICKEYBYTES crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_PUBLICKEYBYTES */
+#define crypto_box_SECRETKEYBYTES crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_SECRETKEYBYTES */
+#define crypto_box_BEFORENMBYTES crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_BEFORENMBYTES */
+#define crypto_box_NONCEBYTES crypto_box_curve25519xsalsa20poly1305_NONCEBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_NONCEBYTES */
+#define crypto_box_ZEROBYTES crypto_box_curve25519xsalsa20poly1305_ZEROBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_ZEROBYTES */
+#define crypto_box_BOXZEROBYTES crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES
+/* CHEESEBURGER crypto_box_curve25519xsalsa20poly1305_BOXZEROBYTES */
+#define crypto_box_PRIMITIVE "curve25519xsalsa20poly1305"
+#define crypto_box_IMPLEMENTATION crypto_box_curve25519xsalsa20poly1305_IMPLEMENTATION
+#define crypto_box_VERSION crypto_box_curve25519xsalsa20poly1305_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/keypair.c b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/keypair.c
new file mode 100644
index 00000000..233bc950
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_box_curve25519xsalsa20poly1305_ref/keypair.c
@@ -0,0 +1,12 @@
+#include "crypto_scalarmult_curve25519.h"
+#include "crypto_box.h"
+#include "randombytes.h"
+
+int crypto_box_keypair(
+  unsigned char *pk,
+  unsigned char *sk
+)
+{
+  randombytes(sk,32);
+  return crypto_scalarmult_curve25519_base(pk,sk);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_core_hsalsa20.h b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20.h
new file mode 100644
index 00000000..890d62d9
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20.h
@@ -0,0 +1,33 @@
+#ifndef crypto_core_hsalsa20_H
+#define crypto_core_hsalsa20_H
+
+#define crypto_core_hsalsa20_ref_OUTPUTBYTES 32
+#define crypto_core_hsalsa20_ref_INPUTBYTES 16
+#define crypto_core_hsalsa20_ref_KEYBYTES 32
+#define crypto_core_hsalsa20_ref_CONSTBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_core_hsalsa20_ref(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_core_hsalsa20 crypto_core_hsalsa20_ref
+/* POTATO crypto_core_hsalsa20_ref crypto_core_hsalsa20_ref crypto_core_hsalsa20 */
+#define crypto_core_hsalsa20_OUTPUTBYTES crypto_core_hsalsa20_ref_OUTPUTBYTES
+/* POTATO crypto_core_hsalsa20_ref_OUTPUTBYTES crypto_core_hsalsa20_ref crypto_core_hsalsa20 */
+#define crypto_core_hsalsa20_INPUTBYTES crypto_core_hsalsa20_ref_INPUTBYTES
+/* POTATO crypto_core_hsalsa20_ref_INPUTBYTES crypto_core_hsalsa20_ref crypto_core_hsalsa20 */
+#define crypto_core_hsalsa20_KEYBYTES crypto_core_hsalsa20_ref_KEYBYTES
+/* POTATO crypto_core_hsalsa20_ref_KEYBYTES crypto_core_hsalsa20_ref crypto_core_hsalsa20 */
+#define crypto_core_hsalsa20_CONSTBYTES crypto_core_hsalsa20_ref_CONSTBYTES
+/* POTATO crypto_core_hsalsa20_ref_CONSTBYTES crypto_core_hsalsa20_ref crypto_core_hsalsa20 */
+#define crypto_core_hsalsa20_IMPLEMENTATION "crypto_core/hsalsa20/ref"
+#ifndef crypto_core_hsalsa20_ref_VERSION
+#define crypto_core_hsalsa20_ref_VERSION "-"
+#endif
+#define crypto_core_hsalsa20_VERSION crypto_core_hsalsa20_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/api.h b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/api.h
new file mode 100644
index 00000000..73bd8541
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 32
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/core.c b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/core.c
new file mode 100644
index 00000000..36118da0
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/core.c
@@ -0,0 +1,135 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  x0 -= load_littleendian(c + 0);
+  x5 -= load_littleendian(c + 4);
+  x10 -= load_littleendian(c + 8);
+  x15 -= load_littleendian(c + 12);
+  x6 -= load_littleendian(in + 0);
+  x7 -= load_littleendian(in + 4);
+  x8 -= load_littleendian(in + 8);
+  x9 -= load_littleendian(in + 12);
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x5);
+  store_littleendian(out + 8,x10);
+  store_littleendian(out + 12,x15);
+  store_littleendian(out + 16,x6);
+  store_littleendian(out + 20,x7);
+  store_littleendian(out + 24,x8);
+  store_littleendian(out + 28,x9);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/crypto_core.h b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/crypto_core.h
new file mode 100644
index 00000000..481ee537
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/crypto_core.h
@@ -0,0 +1,20 @@
+#ifndef crypto_core_H
+#define crypto_core_H
+
+#include "crypto_core_hsalsa20.h"
+
+#define crypto_core crypto_core_hsalsa20
+/* CHEESEBURGER crypto_core_hsalsa20 */
+#define crypto_core_OUTPUTBYTES crypto_core_hsalsa20_OUTPUTBYTES
+/* CHEESEBURGER crypto_core_hsalsa20_OUTPUTBYTES */
+#define crypto_core_INPUTBYTES crypto_core_hsalsa20_INPUTBYTES
+/* CHEESEBURGER crypto_core_hsalsa20_INPUTBYTES */
+#define crypto_core_KEYBYTES crypto_core_hsalsa20_KEYBYTES
+/* CHEESEBURGER crypto_core_hsalsa20_KEYBYTES */
+#define crypto_core_CONSTBYTES crypto_core_hsalsa20_CONSTBYTES
+/* CHEESEBURGER crypto_core_hsalsa20_CONSTBYTES */
+#define crypto_core_PRIMITIVE "hsalsa20"
+#define crypto_core_IMPLEMENTATION crypto_core_hsalsa20_IMPLEMENTATION
+#define crypto_core_VERSION crypto_core_hsalsa20_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/implementors b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_hsalsa20_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa20.h b/nacl/nacl-20110221/build_android/crypto_core_salsa20.h
new file mode 100644
index 00000000..6a6a1d99
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa20.h
@@ -0,0 +1,33 @@
+#ifndef crypto_core_salsa20_H
+#define crypto_core_salsa20_H
+
+#define crypto_core_salsa20_ref_OUTPUTBYTES 64
+#define crypto_core_salsa20_ref_INPUTBYTES 16
+#define crypto_core_salsa20_ref_KEYBYTES 32
+#define crypto_core_salsa20_ref_CONSTBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_core_salsa20_ref(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_core_salsa20 crypto_core_salsa20_ref
+/* POTATO crypto_core_salsa20_ref crypto_core_salsa20_ref crypto_core_salsa20 */
+#define crypto_core_salsa20_OUTPUTBYTES crypto_core_salsa20_ref_OUTPUTBYTES
+/* POTATO crypto_core_salsa20_ref_OUTPUTBYTES crypto_core_salsa20_ref crypto_core_salsa20 */
+#define crypto_core_salsa20_INPUTBYTES crypto_core_salsa20_ref_INPUTBYTES
+/* POTATO crypto_core_salsa20_ref_INPUTBYTES crypto_core_salsa20_ref crypto_core_salsa20 */
+#define crypto_core_salsa20_KEYBYTES crypto_core_salsa20_ref_KEYBYTES
+/* POTATO crypto_core_salsa20_ref_KEYBYTES crypto_core_salsa20_ref crypto_core_salsa20 */
+#define crypto_core_salsa20_CONSTBYTES crypto_core_salsa20_ref_CONSTBYTES
+/* POTATO crypto_core_salsa20_ref_CONSTBYTES crypto_core_salsa20_ref crypto_core_salsa20 */
+#define crypto_core_salsa20_IMPLEMENTATION "crypto_core/salsa20/ref"
+#ifndef crypto_core_salsa20_ref_VERSION
+#define crypto_core_salsa20_ref_VERSION "-"
+#endif
+#define crypto_core_salsa20_VERSION crypto_core_salsa20_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa2012.h b/nacl/nacl-20110221/build_android/crypto_core_salsa2012.h
new file mode 100644
index 00000000..990cadd5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa2012.h
@@ -0,0 +1,33 @@
+#ifndef crypto_core_salsa2012_H
+#define crypto_core_salsa2012_H
+
+#define crypto_core_salsa2012_ref_OUTPUTBYTES 64
+#define crypto_core_salsa2012_ref_INPUTBYTES 16
+#define crypto_core_salsa2012_ref_KEYBYTES 32
+#define crypto_core_salsa2012_ref_CONSTBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_core_salsa2012_ref(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_core_salsa2012 crypto_core_salsa2012_ref
+/* POTATO crypto_core_salsa2012_ref crypto_core_salsa2012_ref crypto_core_salsa2012 */
+#define crypto_core_salsa2012_OUTPUTBYTES crypto_core_salsa2012_ref_OUTPUTBYTES
+/* POTATO crypto_core_salsa2012_ref_OUTPUTBYTES crypto_core_salsa2012_ref crypto_core_salsa2012 */
+#define crypto_core_salsa2012_INPUTBYTES crypto_core_salsa2012_ref_INPUTBYTES
+/* POTATO crypto_core_salsa2012_ref_INPUTBYTES crypto_core_salsa2012_ref crypto_core_salsa2012 */
+#define crypto_core_salsa2012_KEYBYTES crypto_core_salsa2012_ref_KEYBYTES
+/* POTATO crypto_core_salsa2012_ref_KEYBYTES crypto_core_salsa2012_ref crypto_core_salsa2012 */
+#define crypto_core_salsa2012_CONSTBYTES crypto_core_salsa2012_ref_CONSTBYTES
+/* POTATO crypto_core_salsa2012_ref_CONSTBYTES crypto_core_salsa2012_ref crypto_core_salsa2012 */
+#define crypto_core_salsa2012_IMPLEMENTATION "crypto_core/salsa2012/ref"
+#ifndef crypto_core_salsa2012_ref_VERSION
+#define crypto_core_salsa2012_ref_VERSION "-"
+#endif
+#define crypto_core_salsa2012_VERSION crypto_core_salsa2012_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/api.h b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/core.c b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/core.c
new file mode 100644
index 00000000..d4b59e48
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 12
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/crypto_core.h b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/crypto_core.h
new file mode 100644
index 00000000..88fec9b1
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/crypto_core.h
@@ -0,0 +1,20 @@
+#ifndef crypto_core_H
+#define crypto_core_H
+
+#include "crypto_core_salsa2012.h"
+
+#define crypto_core crypto_core_salsa2012
+/* CHEESEBURGER crypto_core_salsa2012 */
+#define crypto_core_OUTPUTBYTES crypto_core_salsa2012_OUTPUTBYTES
+/* CHEESEBURGER crypto_core_salsa2012_OUTPUTBYTES */
+#define crypto_core_INPUTBYTES crypto_core_salsa2012_INPUTBYTES
+/* CHEESEBURGER crypto_core_salsa2012_INPUTBYTES */
+#define crypto_core_KEYBYTES crypto_core_salsa2012_KEYBYTES
+/* CHEESEBURGER crypto_core_salsa2012_KEYBYTES */
+#define crypto_core_CONSTBYTES crypto_core_salsa2012_CONSTBYTES
+/* CHEESEBURGER crypto_core_salsa2012_CONSTBYTES */
+#define crypto_core_PRIMITIVE "salsa2012"
+#define crypto_core_IMPLEMENTATION crypto_core_salsa2012_IMPLEMENTATION
+#define crypto_core_VERSION crypto_core_salsa2012_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/implementors b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa2012_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa208.h b/nacl/nacl-20110221/build_android/crypto_core_salsa208.h
new file mode 100644
index 00000000..7c2fac6c
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa208.h
@@ -0,0 +1,33 @@
+#ifndef crypto_core_salsa208_H
+#define crypto_core_salsa208_H
+
+#define crypto_core_salsa208_ref_OUTPUTBYTES 64
+#define crypto_core_salsa208_ref_INPUTBYTES 16
+#define crypto_core_salsa208_ref_KEYBYTES 32
+#define crypto_core_salsa208_ref_CONSTBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_core_salsa208_ref(unsigned char *,const unsigned char *,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_core_salsa208 crypto_core_salsa208_ref
+/* POTATO crypto_core_salsa208_ref crypto_core_salsa208_ref crypto_core_salsa208 */
+#define crypto_core_salsa208_OUTPUTBYTES crypto_core_salsa208_ref_OUTPUTBYTES
+/* POTATO crypto_core_salsa208_ref_OUTPUTBYTES crypto_core_salsa208_ref crypto_core_salsa208 */
+#define crypto_core_salsa208_INPUTBYTES crypto_core_salsa208_ref_INPUTBYTES
+/* POTATO crypto_core_salsa208_ref_INPUTBYTES crypto_core_salsa208_ref crypto_core_salsa208 */
+#define crypto_core_salsa208_KEYBYTES crypto_core_salsa208_ref_KEYBYTES
+/* POTATO crypto_core_salsa208_ref_KEYBYTES crypto_core_salsa208_ref crypto_core_salsa208 */
+#define crypto_core_salsa208_CONSTBYTES crypto_core_salsa208_ref_CONSTBYTES
+/* POTATO crypto_core_salsa208_ref_CONSTBYTES crypto_core_salsa208_ref crypto_core_salsa208 */
+#define crypto_core_salsa208_IMPLEMENTATION "crypto_core/salsa208/ref"
+#ifndef crypto_core_salsa208_ref_VERSION
+#define crypto_core_salsa208_ref_VERSION "-"
+#endif
+#define crypto_core_salsa208_VERSION crypto_core_salsa208_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/api.h b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/core.c b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/core.c
new file mode 100644
index 00000000..921e7a86
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 8
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/crypto_core.h b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/crypto_core.h
new file mode 100644
index 00000000..bf05f2fb
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/crypto_core.h
@@ -0,0 +1,20 @@
+#ifndef crypto_core_H
+#define crypto_core_H
+
+#include "crypto_core_salsa208.h"
+
+#define crypto_core crypto_core_salsa208
+/* CHEESEBURGER crypto_core_salsa208 */
+#define crypto_core_OUTPUTBYTES crypto_core_salsa208_OUTPUTBYTES
+/* CHEESEBURGER crypto_core_salsa208_OUTPUTBYTES */
+#define crypto_core_INPUTBYTES crypto_core_salsa208_INPUTBYTES
+/* CHEESEBURGER crypto_core_salsa208_INPUTBYTES */
+#define crypto_core_KEYBYTES crypto_core_salsa208_KEYBYTES
+/* CHEESEBURGER crypto_core_salsa208_KEYBYTES */
+#define crypto_core_CONSTBYTES crypto_core_salsa208_CONSTBYTES
+/* CHEESEBURGER crypto_core_salsa208_CONSTBYTES */
+#define crypto_core_PRIMITIVE "salsa208"
+#define crypto_core_IMPLEMENTATION crypto_core_salsa208_IMPLEMENTATION
+#define crypto_core_VERSION crypto_core_salsa208_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/implementors b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa208_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/api.h b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/core.c b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/core.c
new file mode 100644
index 00000000..910a0056
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/crypto_core.h b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/crypto_core.h
new file mode 100644
index 00000000..186c3d3e
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/crypto_core.h
@@ -0,0 +1,20 @@
+#ifndef crypto_core_H
+#define crypto_core_H
+
+#include "crypto_core_salsa20.h"
+
+#define crypto_core crypto_core_salsa20
+/* CHEESEBURGER crypto_core_salsa20 */
+#define crypto_core_OUTPUTBYTES crypto_core_salsa20_OUTPUTBYTES
+/* CHEESEBURGER crypto_core_salsa20_OUTPUTBYTES */
+#define crypto_core_INPUTBYTES crypto_core_salsa20_INPUTBYTES
+/* CHEESEBURGER crypto_core_salsa20_INPUTBYTES */
+#define crypto_core_KEYBYTES crypto_core_salsa20_KEYBYTES
+/* CHEESEBURGER crypto_core_salsa20_KEYBYTES */
+#define crypto_core_CONSTBYTES crypto_core_salsa20_CONSTBYTES
+/* CHEESEBURGER crypto_core_salsa20_CONSTBYTES */
+#define crypto_core_PRIMITIVE "salsa20"
+#define crypto_core_IMPLEMENTATION crypto_core_salsa20_IMPLEMENTATION
+#define crypto_core_VERSION crypto_core_salsa20_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/implementors b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_core_salsa20_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha256.h b/nacl/nacl-20110221/build_android/crypto_hash_sha256.h
new file mode 100644
index 00000000..9c8d1fe8
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha256.h
@@ -0,0 +1,25 @@
+#ifndef crypto_hash_sha256_H
+#define crypto_hash_sha256_H
+
+#define crypto_hash_sha256_ref_BYTES 32
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_hash_sha256_ref(const std::string &);
+extern "C" {
+#endif
+extern int crypto_hash_sha256_ref(unsigned char *,const unsigned char *,unsigned long long);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_hash_sha256 crypto_hash_sha256_ref
+/* POTATO crypto_hash_sha256_ref crypto_hash_sha256_ref crypto_hash_sha256 */
+#define crypto_hash_sha256_BYTES crypto_hash_sha256_ref_BYTES
+/* POTATO crypto_hash_sha256_ref_BYTES crypto_hash_sha256_ref crypto_hash_sha256 */
+#define crypto_hash_sha256_IMPLEMENTATION "crypto_hash/sha256/ref"
+#ifndef crypto_hash_sha256_ref_VERSION
+#define crypto_hash_sha256_ref_VERSION "-"
+#endif
+#define crypto_hash_sha256_VERSION crypto_hash_sha256_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/api.h b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/api.h
new file mode 100644
index 00000000..ae8c7f6a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/crypto_hash.h b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/crypto_hash.h
new file mode 100644
index 00000000..b8d3ac10
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/crypto_hash.h
@@ -0,0 +1,14 @@
+#ifndef crypto_hash_H
+#define crypto_hash_H
+
+#include "crypto_hash_sha256.h"
+
+#define crypto_hash crypto_hash_sha256
+/* CHEESEBURGER crypto_hash_sha256 */
+#define crypto_hash_BYTES crypto_hash_sha256_BYTES
+/* CHEESEBURGER crypto_hash_sha256_BYTES */
+#define crypto_hash_PRIMITIVE "sha256"
+#define crypto_hash_IMPLEMENTATION crypto_hash_sha256_IMPLEMENTATION
+#define crypto_hash_VERSION crypto_hash_sha256_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/hash.c b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/hash.c
new file mode 100644
index 00000000..21ce68a0
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/hash.c
@@ -0,0 +1,69 @@
+/*
+20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_hashblocks_sha256.h"
+#include "crypto_hash.h"
+
+#define blocks crypto_hashblocks_sha256
+
+typedef unsigned int uint32;
+
+static const char iv[32] = {
+  0x6a,0x09,0xe6,0x67,
+  0xbb,0x67,0xae,0x85,
+  0x3c,0x6e,0xf3,0x72,
+  0xa5,0x4f,0xf5,0x3a,
+  0x51,0x0e,0x52,0x7f,
+  0x9b,0x05,0x68,0x8c,
+  0x1f,0x83,0xd9,0xab,
+  0x5b,0xe0,0xcd,0x19,
+} ;
+
+int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen)
+{
+  unsigned char h[32];
+  unsigned char padded[128];
+  int i;
+  unsigned long long bits = inlen << 3;
+
+  for (i = 0;i < 32;++i) h[i] = iv[i];
+
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 63;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 56) {
+    for (i = inlen + 1;i < 56;++i) padded[i] = 0;
+    padded[56] = bits >> 56;
+    padded[57] = bits >> 48;
+    padded[58] = bits >> 40;
+    padded[59] = bits >> 32;
+    padded[60] = bits >> 24;
+    padded[61] = bits >> 16;
+    padded[62] = bits >> 8;
+    padded[63] = bits;
+    blocks(h,padded,64);
+  } else {
+    for (i = inlen + 1;i < 120;++i) padded[i] = 0;
+    padded[120] = bits >> 56;
+    padded[121] = bits >> 48;
+    padded[122] = bits >> 40;
+    padded[123] = bits >> 32;
+    padded[124] = bits >> 24;
+    padded[125] = bits >> 16;
+    padded[126] = bits >> 8;
+    padded[127] = bits;
+    blocks(h,padded,128);
+  }
+
+  for (i = 0;i < 32;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/implementors b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/implementors
new file mode 100644
index 00000000..962e7d8e
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha256_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein (wrapper around crypto_hashblocks/sha256)
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha512.h b/nacl/nacl-20110221/build_android/crypto_hash_sha512.h
new file mode 100644
index 00000000..3f95b17e
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha512.h
@@ -0,0 +1,25 @@
+#ifndef crypto_hash_sha512_H
+#define crypto_hash_sha512_H
+
+#define crypto_hash_sha512_ref_BYTES 64
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_hash_sha512_ref(const std::string &);
+extern "C" {
+#endif
+extern int crypto_hash_sha512_ref(unsigned char *,const unsigned char *,unsigned long long);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_hash_sha512 crypto_hash_sha512_ref
+/* POTATO crypto_hash_sha512_ref crypto_hash_sha512_ref crypto_hash_sha512 */
+#define crypto_hash_sha512_BYTES crypto_hash_sha512_ref_BYTES
+/* POTATO crypto_hash_sha512_ref_BYTES crypto_hash_sha512_ref crypto_hash_sha512 */
+#define crypto_hash_sha512_IMPLEMENTATION "crypto_hash/sha512/ref"
+#ifndef crypto_hash_sha512_ref_VERSION
+#define crypto_hash_sha512_ref_VERSION "-"
+#endif
+#define crypto_hash_sha512_VERSION crypto_hash_sha512_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/api.h b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/api.h
new file mode 100644
index 00000000..de9380d7
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 64
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/crypto_hash.h b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/crypto_hash.h
new file mode 100644
index 00000000..029cf077
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/crypto_hash.h
@@ -0,0 +1,14 @@
+#ifndef crypto_hash_H
+#define crypto_hash_H
+
+#include "crypto_hash_sha512.h"
+
+#define crypto_hash crypto_hash_sha512
+/* CHEESEBURGER crypto_hash_sha512 */
+#define crypto_hash_BYTES crypto_hash_sha512_BYTES
+/* CHEESEBURGER crypto_hash_sha512_BYTES */
+#define crypto_hash_PRIMITIVE "sha512"
+#define crypto_hash_IMPLEMENTATION crypto_hash_sha512_IMPLEMENTATION
+#define crypto_hash_VERSION crypto_hash_sha512_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/hash.c b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/hash.c
new file mode 100644
index 00000000..fc4347bb
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/hash.c
@@ -0,0 +1,71 @@
+/*
+20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_hashblocks_sha512.h"
+#include "crypto_hash.h"
+
+#define blocks crypto_hashblocks_sha512
+
+static const unsigned char iv[64] = {
+  0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
+  0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
+  0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
+  0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
+  0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
+  0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
+  0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
+  0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
+} ;
+
+typedef unsigned long long uint64;
+
+int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen)
+{
+  unsigned char h[64];
+  unsigned char padded[256];
+  int i;
+  unsigned long long bytes = inlen;
+
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 127;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 112) {
+    for (i = inlen + 1;i < 119;++i) padded[i] = 0;
+    padded[119] = bytes >> 61;
+    padded[120] = bytes >> 53;
+    padded[121] = bytes >> 45;
+    padded[122] = bytes >> 37;
+    padded[123] = bytes >> 29;
+    padded[124] = bytes >> 21;
+    padded[125] = bytes >> 13;
+    padded[126] = bytes >> 5;
+    padded[127] = bytes << 3;
+    blocks(h,padded,128);
+  } else {
+    for (i = inlen + 1;i < 247;++i) padded[i] = 0;
+    padded[247] = bytes >> 61;
+    padded[248] = bytes >> 53;
+    padded[249] = bytes >> 45;
+    padded[250] = bytes >> 37;
+    padded[251] = bytes >> 29;
+    padded[252] = bytes >> 21;
+    padded[253] = bytes >> 13;
+    padded[254] = bytes >> 5;
+    padded[255] = bytes << 3;
+    blocks(h,padded,256);
+  }
+
+  for (i = 0;i < 64;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/implementors b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/implementors
new file mode 100644
index 00000000..40afca09
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hash_sha512_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein (wrapper around crypto_hashblocks/sha512)
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256.h
new file mode 100644
index 00000000..c2ae1a01
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256.h
@@ -0,0 +1,27 @@
+#ifndef crypto_hashblocks_sha256_H
+#define crypto_hashblocks_sha256_H
+
+#define crypto_hashblocks_sha256_ref_STATEBYTES 32
+#define crypto_hashblocks_sha256_ref_BLOCKBYTES 64
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_hashblocks_sha256_ref(unsigned char *,const unsigned char *,unsigned long long);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_hashblocks_sha256 crypto_hashblocks_sha256_ref
+/* POTATO crypto_hashblocks_sha256_ref crypto_hashblocks_sha256_ref crypto_hashblocks_sha256 */
+#define crypto_hashblocks_sha256_STATEBYTES crypto_hashblocks_sha256_ref_STATEBYTES
+/* POTATO crypto_hashblocks_sha256_ref_STATEBYTES crypto_hashblocks_sha256_ref crypto_hashblocks_sha256 */
+#define crypto_hashblocks_sha256_BLOCKBYTES crypto_hashblocks_sha256_ref_BLOCKBYTES
+/* POTATO crypto_hashblocks_sha256_ref_BLOCKBYTES crypto_hashblocks_sha256_ref crypto_hashblocks_sha256 */
+#define crypto_hashblocks_sha256_IMPLEMENTATION "crypto_hashblocks/sha256/ref"
+#ifndef crypto_hashblocks_sha256_ref_VERSION
+#define crypto_hashblocks_sha256_ref_VERSION "-"
+#endif
+#define crypto_hashblocks_sha256_VERSION crypto_hashblocks_sha256_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/api.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/api.h
new file mode 100644
index 00000000..005a4f47
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 32
+#define CRYPTO_BLOCKBYTES 64
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/blocks.c b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/blocks.c
new file mode 100644
index 00000000..ad977945
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/blocks.c
@@ -0,0 +1,212 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned int uint32;
+
+static uint32 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[3]) \
+  | (((uint32) (x[2])) << 8) \
+  | (((uint32) (x[1])) << 16) \
+  | (((uint32) (x[0])) << 24)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint32 u)
+{
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
+#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
+#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
+#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(w,k) \
+  T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \
+  T2 = Sigma0(a) + Maj(a,b,c); \
+  h = g; \
+  g = f; \
+  f = e; \
+  e = d + T1; \
+  d = c; \
+  c = b; \
+  b = a; \
+  a = T1 + T2;
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint32 state[8];
+  uint32 a;
+  uint32 b;
+  uint32 c;
+  uint32 d;
+  uint32 e;
+  uint32 f;
+  uint32 g;
+  uint32 h;
+  uint32 T1;
+  uint32 T2;
+
+  a = load_bigendian(statebytes +  0); state[0] = a;
+  b = load_bigendian(statebytes +  4); state[1] = b;
+  c = load_bigendian(statebytes +  8); state[2] = c;
+  d = load_bigendian(statebytes + 12); state[3] = d;
+  e = load_bigendian(statebytes + 16); state[4] = e;
+  f = load_bigendian(statebytes + 20); state[5] = f;
+  g = load_bigendian(statebytes + 24); state[6] = g;
+  h = load_bigendian(statebytes + 28); state[7] = h;
+
+  while (inlen >= 64) {
+    uint32 w0  = load_bigendian(in +  0);
+    uint32 w1  = load_bigendian(in +  4);
+    uint32 w2  = load_bigendian(in +  8);
+    uint32 w3  = load_bigendian(in + 12);
+    uint32 w4  = load_bigendian(in + 16);
+    uint32 w5  = load_bigendian(in + 20);
+    uint32 w6  = load_bigendian(in + 24);
+    uint32 w7  = load_bigendian(in + 28);
+    uint32 w8  = load_bigendian(in + 32);
+    uint32 w9  = load_bigendian(in + 36);
+    uint32 w10 = load_bigendian(in + 40);
+    uint32 w11 = load_bigendian(in + 44);
+    uint32 w12 = load_bigendian(in + 48);
+    uint32 w13 = load_bigendian(in + 52);
+    uint32 w14 = load_bigendian(in + 56);
+    uint32 w15 = load_bigendian(in + 60);
+
+    F(w0 ,0x428a2f98)
+    F(w1 ,0x71374491)
+    F(w2 ,0xb5c0fbcf)
+    F(w3 ,0xe9b5dba5)
+    F(w4 ,0x3956c25b)
+    F(w5 ,0x59f111f1)
+    F(w6 ,0x923f82a4)
+    F(w7 ,0xab1c5ed5)
+    F(w8 ,0xd807aa98)
+    F(w9 ,0x12835b01)
+    F(w10,0x243185be)
+    F(w11,0x550c7dc3)
+    F(w12,0x72be5d74)
+    F(w13,0x80deb1fe)
+    F(w14,0x9bdc06a7)
+    F(w15,0xc19bf174)
+
+    EXPAND
+
+    F(w0 ,0xe49b69c1)
+    F(w1 ,0xefbe4786)
+    F(w2 ,0x0fc19dc6)
+    F(w3 ,0x240ca1cc)
+    F(w4 ,0x2de92c6f)
+    F(w5 ,0x4a7484aa)
+    F(w6 ,0x5cb0a9dc)
+    F(w7 ,0x76f988da)
+    F(w8 ,0x983e5152)
+    F(w9 ,0xa831c66d)
+    F(w10,0xb00327c8)
+    F(w11,0xbf597fc7)
+    F(w12,0xc6e00bf3)
+    F(w13,0xd5a79147)
+    F(w14,0x06ca6351)
+    F(w15,0x14292967)
+
+    EXPAND
+
+    F(w0 ,0x27b70a85)
+    F(w1 ,0x2e1b2138)
+    F(w2 ,0x4d2c6dfc)
+    F(w3 ,0x53380d13)
+    F(w4 ,0x650a7354)
+    F(w5 ,0x766a0abb)
+    F(w6 ,0x81c2c92e)
+    F(w7 ,0x92722c85)
+    F(w8 ,0xa2bfe8a1)
+    F(w9 ,0xa81a664b)
+    F(w10,0xc24b8b70)
+    F(w11,0xc76c51a3)
+    F(w12,0xd192e819)
+    F(w13,0xd6990624)
+    F(w14,0xf40e3585)
+    F(w15,0x106aa070)
+
+    EXPAND
+
+    F(w0 ,0x19a4c116)
+    F(w1 ,0x1e376c08)
+    F(w2 ,0x2748774c)
+    F(w3 ,0x34b0bcb5)
+    F(w4 ,0x391c0cb3)
+    F(w5 ,0x4ed8aa4a)
+    F(w6 ,0x5b9cca4f)
+    F(w7 ,0x682e6ff3)
+    F(w8 ,0x748f82ee)
+    F(w9 ,0x78a5636f)
+    F(w10,0x84c87814)
+    F(w11,0x8cc70208)
+    F(w12,0x90befffa)
+    F(w13,0xa4506ceb)
+    F(w14,0xbef9a3f7)
+    F(w15,0xc67178f2)
+
+    a += state[0];
+    b += state[1];
+    c += state[2];
+    d += state[3];
+    e += state[4];
+    f += state[5];
+    g += state[6];
+    h += state[7];
+  
+    state[0] = a;
+    state[1] = b;
+    state[2] = c;
+    state[3] = d;
+    state[4] = e;
+    state[5] = f;
+    state[6] = g;
+    state[7] = h;
+
+    in += 64;
+    inlen -= 64;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  4,state[1]);
+  store_bigendian(statebytes +  8,state[2]);
+  store_bigendian(statebytes + 12,state[3]);
+  store_bigendian(statebytes + 16,state[4]);
+  store_bigendian(statebytes + 20,state[5]);
+  store_bigendian(statebytes + 24,state[6]);
+  store_bigendian(statebytes + 28,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/crypto_hashblocks.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/crypto_hashblocks.h
new file mode 100644
index 00000000..dabd9d3a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/crypto_hashblocks.h
@@ -0,0 +1,16 @@
+#ifndef crypto_hashblocks_H
+#define crypto_hashblocks_H
+
+#include "crypto_hashblocks_sha256.h"
+
+#define crypto_hashblocks crypto_hashblocks_sha256
+/* CHEESEBURGER crypto_hashblocks_sha256 */
+#define crypto_hashblocks_STATEBYTES crypto_hashblocks_sha256_STATEBYTES
+/* CHEESEBURGER crypto_hashblocks_sha256_STATEBYTES */
+#define crypto_hashblocks_BLOCKBYTES crypto_hashblocks_sha256_BLOCKBYTES
+/* CHEESEBURGER crypto_hashblocks_sha256_BLOCKBYTES */
+#define crypto_hashblocks_PRIMITIVE "sha256"
+#define crypto_hashblocks_IMPLEMENTATION crypto_hashblocks_sha256_IMPLEMENTATION
+#define crypto_hashblocks_VERSION crypto_hashblocks_sha256_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/implementors b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha256_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512.h
new file mode 100644
index 00000000..fd1b3c70
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512.h
@@ -0,0 +1,27 @@
+#ifndef crypto_hashblocks_sha512_H
+#define crypto_hashblocks_sha512_H
+
+#define crypto_hashblocks_sha512_ref_STATEBYTES 64
+#define crypto_hashblocks_sha512_ref_BLOCKBYTES 128
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_hashblocks_sha512_ref(unsigned char *,const unsigned char *,unsigned long long);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_hashblocks_sha512 crypto_hashblocks_sha512_ref
+/* POTATO crypto_hashblocks_sha512_ref crypto_hashblocks_sha512_ref crypto_hashblocks_sha512 */
+#define crypto_hashblocks_sha512_STATEBYTES crypto_hashblocks_sha512_ref_STATEBYTES
+/* POTATO crypto_hashblocks_sha512_ref_STATEBYTES crypto_hashblocks_sha512_ref crypto_hashblocks_sha512 */
+#define crypto_hashblocks_sha512_BLOCKBYTES crypto_hashblocks_sha512_ref_BLOCKBYTES
+/* POTATO crypto_hashblocks_sha512_ref_BLOCKBYTES crypto_hashblocks_sha512_ref crypto_hashblocks_sha512 */
+#define crypto_hashblocks_sha512_IMPLEMENTATION "crypto_hashblocks/sha512/ref"
+#ifndef crypto_hashblocks_sha512_ref_VERSION
+#define crypto_hashblocks_sha512_ref_VERSION "-"
+#endif
+#define crypto_hashblocks_sha512_VERSION crypto_hashblocks_sha512_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/api.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/api.h
new file mode 100644
index 00000000..ac45d103
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 64
+#define CRYPTO_BLOCKBYTES 128
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/blocks.c b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/blocks.c
new file mode 100644
index 00000000..f8fae491
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/blocks.c
@@ -0,0 +1,239 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned long long uint64;
+
+static uint64 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint64) (x[7]) \
+  | (((uint64) (x[6])) << 8) \
+  | (((uint64) (x[5])) << 16) \
+  | (((uint64) (x[4])) << 24) \
+  | (((uint64) (x[3])) << 32) \
+  | (((uint64) (x[2])) << 40) \
+  | (((uint64) (x[1])) << 48) \
+  | (((uint64) (x[0])) << 56)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint64 u)
+{
+  x[7] = u; u >>= 8;
+  x[6] = u; u >>= 8;
+  x[5] = u; u >>= 8;
+  x[4] = u; u >>= 8;
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
+#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
+#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
+#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(w,k) \
+  T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \
+  T2 = Sigma0(a) + Maj(a,b,c); \
+  h = g; \
+  g = f; \
+  f = e; \
+  e = d + T1; \
+  d = c; \
+  c = b; \
+  b = a; \
+  a = T1 + T2;
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint64 state[8];
+  uint64 a;
+  uint64 b;
+  uint64 c;
+  uint64 d;
+  uint64 e;
+  uint64 f;
+  uint64 g;
+  uint64 h;
+  uint64 T1;
+  uint64 T2;
+
+  a = load_bigendian(statebytes +  0); state[0] = a;
+  b = load_bigendian(statebytes +  8); state[1] = b;
+  c = load_bigendian(statebytes + 16); state[2] = c;
+  d = load_bigendian(statebytes + 24); state[3] = d;
+  e = load_bigendian(statebytes + 32); state[4] = e;
+  f = load_bigendian(statebytes + 40); state[5] = f;
+  g = load_bigendian(statebytes + 48); state[6] = g;
+  h = load_bigendian(statebytes + 56); state[7] = h;
+
+  while (inlen >= 128) {
+    uint64 w0  = load_bigendian(in +   0);
+    uint64 w1  = load_bigendian(in +   8);
+    uint64 w2  = load_bigendian(in +  16);
+    uint64 w3  = load_bigendian(in +  24);
+    uint64 w4  = load_bigendian(in +  32);
+    uint64 w5  = load_bigendian(in +  40);
+    uint64 w6  = load_bigendian(in +  48);
+    uint64 w7  = load_bigendian(in +  56);
+    uint64 w8  = load_bigendian(in +  64);
+    uint64 w9  = load_bigendian(in +  72);
+    uint64 w10 = load_bigendian(in +  80);
+    uint64 w11 = load_bigendian(in +  88);
+    uint64 w12 = load_bigendian(in +  96);
+    uint64 w13 = load_bigendian(in + 104);
+    uint64 w14 = load_bigendian(in + 112);
+    uint64 w15 = load_bigendian(in + 120);
+
+    F(w0 ,0x428a2f98d728ae22ULL)
+    F(w1 ,0x7137449123ef65cdULL)
+    F(w2 ,0xb5c0fbcfec4d3b2fULL)
+    F(w3 ,0xe9b5dba58189dbbcULL)
+    F(w4 ,0x3956c25bf348b538ULL)
+    F(w5 ,0x59f111f1b605d019ULL)
+    F(w6 ,0x923f82a4af194f9bULL)
+    F(w7 ,0xab1c5ed5da6d8118ULL)
+    F(w8 ,0xd807aa98a3030242ULL)
+    F(w9 ,0x12835b0145706fbeULL)
+    F(w10,0x243185be4ee4b28cULL)
+    F(w11,0x550c7dc3d5ffb4e2ULL)
+    F(w12,0x72be5d74f27b896fULL)
+    F(w13,0x80deb1fe3b1696b1ULL)
+    F(w14,0x9bdc06a725c71235ULL)
+    F(w15,0xc19bf174cf692694ULL)
+
+    EXPAND
+
+    F(w0 ,0xe49b69c19ef14ad2ULL)
+    F(w1 ,0xefbe4786384f25e3ULL)
+    F(w2 ,0x0fc19dc68b8cd5b5ULL)
+    F(w3 ,0x240ca1cc77ac9c65ULL)
+    F(w4 ,0x2de92c6f592b0275ULL)
+    F(w5 ,0x4a7484aa6ea6e483ULL)
+    F(w6 ,0x5cb0a9dcbd41fbd4ULL)
+    F(w7 ,0x76f988da831153b5ULL)
+    F(w8 ,0x983e5152ee66dfabULL)
+    F(w9 ,0xa831c66d2db43210ULL)
+    F(w10,0xb00327c898fb213fULL)
+    F(w11,0xbf597fc7beef0ee4ULL)
+    F(w12,0xc6e00bf33da88fc2ULL)
+    F(w13,0xd5a79147930aa725ULL)
+    F(w14,0x06ca6351e003826fULL)
+    F(w15,0x142929670a0e6e70ULL)
+
+    EXPAND
+
+    F(w0 ,0x27b70a8546d22ffcULL)
+    F(w1 ,0x2e1b21385c26c926ULL)
+    F(w2 ,0x4d2c6dfc5ac42aedULL)
+    F(w3 ,0x53380d139d95b3dfULL)
+    F(w4 ,0x650a73548baf63deULL)
+    F(w5 ,0x766a0abb3c77b2a8ULL)
+    F(w6 ,0x81c2c92e47edaee6ULL)
+    F(w7 ,0x92722c851482353bULL)
+    F(w8 ,0xa2bfe8a14cf10364ULL)
+    F(w9 ,0xa81a664bbc423001ULL)
+    F(w10,0xc24b8b70d0f89791ULL)
+    F(w11,0xc76c51a30654be30ULL)
+    F(w12,0xd192e819d6ef5218ULL)
+    F(w13,0xd69906245565a910ULL)
+    F(w14,0xf40e35855771202aULL)
+    F(w15,0x106aa07032bbd1b8ULL)
+
+    EXPAND
+
+    F(w0 ,0x19a4c116b8d2d0c8ULL)
+    F(w1 ,0x1e376c085141ab53ULL)
+    F(w2 ,0x2748774cdf8eeb99ULL)
+    F(w3 ,0x34b0bcb5e19b48a8ULL)
+    F(w4 ,0x391c0cb3c5c95a63ULL)
+    F(w5 ,0x4ed8aa4ae3418acbULL)
+    F(w6 ,0x5b9cca4f7763e373ULL)
+    F(w7 ,0x682e6ff3d6b2b8a3ULL)
+    F(w8 ,0x748f82ee5defb2fcULL)
+    F(w9 ,0x78a5636f43172f60ULL)
+    F(w10,0x84c87814a1f0ab72ULL)
+    F(w11,0x8cc702081a6439ecULL)
+    F(w12,0x90befffa23631e28ULL)
+    F(w13,0xa4506cebde82bde9ULL)
+    F(w14,0xbef9a3f7b2c67915ULL)
+    F(w15,0xc67178f2e372532bULL)
+
+    EXPAND
+
+    F(w0 ,0xca273eceea26619cULL)
+    F(w1 ,0xd186b8c721c0c207ULL)
+    F(w2 ,0xeada7dd6cde0eb1eULL)
+    F(w3 ,0xf57d4f7fee6ed178ULL)
+    F(w4 ,0x06f067aa72176fbaULL)
+    F(w5 ,0x0a637dc5a2c898a6ULL)
+    F(w6 ,0x113f9804bef90daeULL)
+    F(w7 ,0x1b710b35131c471bULL)
+    F(w8 ,0x28db77f523047d84ULL)
+    F(w9 ,0x32caab7b40c72493ULL)
+    F(w10,0x3c9ebe0a15c9bebcULL)
+    F(w11,0x431d67c49c100d4cULL)
+    F(w12,0x4cc5d4becb3e42b6ULL)
+    F(w13,0x597f299cfc657e2aULL)
+    F(w14,0x5fcb6fab3ad6faecULL)
+    F(w15,0x6c44198c4a475817ULL)
+
+    a += state[0];
+    b += state[1];
+    c += state[2];
+    d += state[3];
+    e += state[4];
+    f += state[5];
+    g += state[6];
+    h += state[7];
+  
+    state[0] = a;
+    state[1] = b;
+    state[2] = c;
+    state[3] = d;
+    state[4] = e;
+    state[5] = f;
+    state[6] = g;
+    state[7] = h;
+
+    in += 128;
+    inlen -= 128;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  8,state[1]);
+  store_bigendian(statebytes + 16,state[2]);
+  store_bigendian(statebytes + 24,state[3]);
+  store_bigendian(statebytes + 32,state[4]);
+  store_bigendian(statebytes + 40,state[5]);
+  store_bigendian(statebytes + 48,state[6]);
+  store_bigendian(statebytes + 56,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/crypto_hashblocks.h b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/crypto_hashblocks.h
new file mode 100644
index 00000000..e9ece43a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/crypto_hashblocks.h
@@ -0,0 +1,16 @@
+#ifndef crypto_hashblocks_H
+#define crypto_hashblocks_H
+
+#include "crypto_hashblocks_sha512.h"
+
+#define crypto_hashblocks crypto_hashblocks_sha512
+/* CHEESEBURGER crypto_hashblocks_sha512 */
+#define crypto_hashblocks_STATEBYTES crypto_hashblocks_sha512_STATEBYTES
+/* CHEESEBURGER crypto_hashblocks_sha512_STATEBYTES */
+#define crypto_hashblocks_BLOCKBYTES crypto_hashblocks_sha512_BLOCKBYTES
+/* CHEESEBURGER crypto_hashblocks_sha512_BLOCKBYTES */
+#define crypto_hashblocks_PRIMITIVE "sha512"
+#define crypto_hashblocks_IMPLEMENTATION crypto_hashblocks_sha512_IMPLEMENTATION
+#define crypto_hashblocks_VERSION crypto_hashblocks_sha512_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/implementors b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_hashblocks_sha512_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305.h b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305.h
new file mode 100644
index 00000000..6d2d12c5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305.h
@@ -0,0 +1,32 @@
+#ifndef crypto_onetimeauth_poly1305_H
+#define crypto_onetimeauth_poly1305_H
+
+#define crypto_onetimeauth_poly1305_ref_BYTES 16
+#define crypto_onetimeauth_poly1305_ref_KEYBYTES 32
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_onetimeauth_poly1305_ref(const std::string &,const std::string &);
+extern void crypto_onetimeauth_poly1305_ref_verify(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_onetimeauth_poly1305_ref(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_onetimeauth_poly1305_ref_verify(const unsigned char *,const unsigned char *,unsigned long long,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_onetimeauth_poly1305 crypto_onetimeauth_poly1305_ref
+/* POTATO crypto_onetimeauth_poly1305_ref crypto_onetimeauth_poly1305_ref crypto_onetimeauth_poly1305 */
+#define crypto_onetimeauth_poly1305_verify crypto_onetimeauth_poly1305_ref_verify
+/* POTATO crypto_onetimeauth_poly1305_ref_verify crypto_onetimeauth_poly1305_ref crypto_onetimeauth_poly1305 */
+#define crypto_onetimeauth_poly1305_BYTES crypto_onetimeauth_poly1305_ref_BYTES
+/* POTATO crypto_onetimeauth_poly1305_ref_BYTES crypto_onetimeauth_poly1305_ref crypto_onetimeauth_poly1305 */
+#define crypto_onetimeauth_poly1305_KEYBYTES crypto_onetimeauth_poly1305_ref_KEYBYTES
+/* POTATO crypto_onetimeauth_poly1305_ref_KEYBYTES crypto_onetimeauth_poly1305_ref crypto_onetimeauth_poly1305 */
+#define crypto_onetimeauth_poly1305_IMPLEMENTATION "crypto_onetimeauth/poly1305/ref"
+#ifndef crypto_onetimeauth_poly1305_ref_VERSION
+#define crypto_onetimeauth_poly1305_ref_VERSION "-"
+#endif
+#define crypto_onetimeauth_poly1305_VERSION crypto_onetimeauth_poly1305_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/api.h b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/api.h
new file mode 100644
index 00000000..acc133ed
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 16
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/auth.c b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/auth.c
new file mode 100644
index 00000000..06cf115d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/auth.c
@@ -0,0 +1,104 @@
+/*
+20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_onetimeauth.h"
+
+static void add(unsigned int h[17],const unsigned int c[17])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 17;++j) { u += h[j] + c[j]; h[j] = u & 255; u >>= 8; }
+}
+
+static void squeeze(unsigned int h[17])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
+  u += h[16]; h[16] = u & 3;
+  u = 5 * (u >> 2);
+  for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
+  u += h[16]; h[16] = u;
+}
+
+static const unsigned int minusp[17] = {
+  5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252
+} ;
+
+static void freeze(unsigned int h[17])
+{
+  unsigned int horig[17];
+  unsigned int j;
+  unsigned int negative;
+  for (j = 0;j < 17;++j) horig[j] = h[j];
+  add(h,minusp);
+  negative = -(h[16] >> 7);
+  for (j = 0;j < 17;++j) h[j] ^= negative & (horig[j] ^ h[j]);
+}
+
+static void mulmod(unsigned int h[17],const unsigned int r[17])
+{
+  unsigned int hr[17];
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 17;++i) {
+    u = 0;
+    for (j = 0;j <= i;++j) u += h[j] * r[i - j];
+    for (j = i + 1;j < 17;++j) u += 320 * h[j] * r[i + 17 - j];
+    hr[i] = u;
+  }
+  for (i = 0;i < 17;++i) h[i] = hr[i];
+  squeeze(h);
+}
+
+int crypto_onetimeauth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned int j;
+  unsigned int r[17];
+  unsigned int h[17];
+  unsigned int c[17];
+
+  r[0] = k[0];
+  r[1] = k[1];
+  r[2] = k[2];
+  r[3] = k[3] & 15;
+  r[4] = k[4] & 252;
+  r[5] = k[5];
+  r[6] = k[6];
+  r[7] = k[7] & 15;
+  r[8] = k[8] & 252;
+  r[9] = k[9];
+  r[10] = k[10];
+  r[11] = k[11] & 15;
+  r[12] = k[12] & 252;
+  r[13] = k[13];
+  r[14] = k[14];
+  r[15] = k[15] & 15;
+  r[16] = 0;
+
+  for (j = 0;j < 17;++j) h[j] = 0;
+
+  while (inlen > 0) {
+    for (j = 0;j < 17;++j) c[j] = 0;
+    for (j = 0;(j < 16) && (j < inlen);++j) c[j] = in[j];
+    c[j] = 1;
+    in += j; inlen -= j;
+    add(h,c);
+    mulmod(h,r);
+  }
+
+  freeze(h);
+
+  for (j = 0;j < 16;++j) c[j] = k[j + 16];
+  c[16] = 0;
+  add(h,c);
+  for (j = 0;j < 16;++j) out[j] = h[j];
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/crypto_onetimeauth.h b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/crypto_onetimeauth.h
new file mode 100644
index 00000000..a3cb71ef
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/crypto_onetimeauth.h
@@ -0,0 +1,18 @@
+#ifndef crypto_onetimeauth_H
+#define crypto_onetimeauth_H
+
+#include "crypto_onetimeauth_poly1305.h"
+
+#define crypto_onetimeauth crypto_onetimeauth_poly1305
+/* CHEESEBURGER crypto_onetimeauth_poly1305 */
+#define crypto_onetimeauth_verify crypto_onetimeauth_poly1305_verify
+/* CHEESEBURGER crypto_onetimeauth_poly1305_verify */
+#define crypto_onetimeauth_BYTES crypto_onetimeauth_poly1305_BYTES
+/* CHEESEBURGER crypto_onetimeauth_poly1305_BYTES */
+#define crypto_onetimeauth_KEYBYTES crypto_onetimeauth_poly1305_KEYBYTES
+/* CHEESEBURGER crypto_onetimeauth_poly1305_KEYBYTES */
+#define crypto_onetimeauth_PRIMITIVE "poly1305"
+#define crypto_onetimeauth_IMPLEMENTATION crypto_onetimeauth_poly1305_IMPLEMENTATION
+#define crypto_onetimeauth_VERSION crypto_onetimeauth_poly1305_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/verify.c b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/verify.c
new file mode 100644
index 00000000..c7e063f1
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_onetimeauth_poly1305_ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_16.h"
+#include "crypto_onetimeauth.h"
+
+int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[16];
+  crypto_onetimeauth(correct,in,inlen,k);
+  return crypto_verify_16(h,correct);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519.h b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519.h
new file mode 100644
index 00000000..be9f01d6
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519.h
@@ -0,0 +1,32 @@
+#ifndef crypto_scalarmult_curve25519_H
+#define crypto_scalarmult_curve25519_H
+
+#define crypto_scalarmult_curve25519_ref_BYTES 32
+#define crypto_scalarmult_curve25519_ref_SCALARBYTES 32
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_scalarmult_curve25519_ref(const std::string &,const std::string &);
+extern std::string crypto_scalarmult_curve25519_ref_base(const std::string &);
+extern "C" {
+#endif
+extern int crypto_scalarmult_curve25519_ref(unsigned char *,const unsigned char *,const unsigned char *);
+extern int crypto_scalarmult_curve25519_ref_base(unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_scalarmult_curve25519 crypto_scalarmult_curve25519_ref
+/* POTATO crypto_scalarmult_curve25519_ref crypto_scalarmult_curve25519_ref crypto_scalarmult_curve25519 */
+#define crypto_scalarmult_curve25519_base crypto_scalarmult_curve25519_ref_base
+/* POTATO crypto_scalarmult_curve25519_ref_base crypto_scalarmult_curve25519_ref crypto_scalarmult_curve25519 */
+#define crypto_scalarmult_curve25519_BYTES crypto_scalarmult_curve25519_ref_BYTES
+/* POTATO crypto_scalarmult_curve25519_ref_BYTES crypto_scalarmult_curve25519_ref crypto_scalarmult_curve25519 */
+#define crypto_scalarmult_curve25519_SCALARBYTES crypto_scalarmult_curve25519_ref_SCALARBYTES
+/* POTATO crypto_scalarmult_curve25519_ref_SCALARBYTES crypto_scalarmult_curve25519_ref crypto_scalarmult_curve25519 */
+#define crypto_scalarmult_curve25519_IMPLEMENTATION "crypto_scalarmult/curve25519/ref"
+#ifndef crypto_scalarmult_curve25519_ref_VERSION
+#define crypto_scalarmult_curve25519_ref_VERSION "-"
+#endif
+#define crypto_scalarmult_curve25519_VERSION crypto_scalarmult_curve25519_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/api.h b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/api.h
new file mode 100644
index 00000000..60339596
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_SCALARBYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/base.c b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/base.c
new file mode 100644
index 00000000..ac2d7eb4
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/base.c
@@ -0,0 +1,16 @@
+/*
+version 20081011
+Matthew Dempsky
+Public domain.
+Derived from public domain code by D. J. Bernstein.
+*/
+
+#include "crypto_scalarmult.h"
+
+const unsigned char base[32] = {9};
+
+int crypto_scalarmult_base(unsigned char *q,
+  const unsigned char *n)
+{
+  return crypto_scalarmult(q,n,base);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/crypto_scalarmult.h b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/crypto_scalarmult.h
new file mode 100644
index 00000000..d9f45a14
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/crypto_scalarmult.h
@@ -0,0 +1,18 @@
+#ifndef crypto_scalarmult_H
+#define crypto_scalarmult_H
+
+#include "crypto_scalarmult_curve25519.h"
+
+#define crypto_scalarmult crypto_scalarmult_curve25519
+/* CHEESEBURGER crypto_scalarmult_curve25519 */
+#define crypto_scalarmult_base crypto_scalarmult_curve25519_base
+/* CHEESEBURGER crypto_scalarmult_curve25519_base */
+#define crypto_scalarmult_BYTES crypto_scalarmult_curve25519_BYTES
+/* CHEESEBURGER crypto_scalarmult_curve25519_BYTES */
+#define crypto_scalarmult_SCALARBYTES crypto_scalarmult_curve25519_SCALARBYTES
+/* CHEESEBURGER crypto_scalarmult_curve25519_SCALARBYTES */
+#define crypto_scalarmult_PRIMITIVE "curve25519"
+#define crypto_scalarmult_IMPLEMENTATION crypto_scalarmult_curve25519_IMPLEMENTATION
+#define crypto_scalarmult_VERSION crypto_scalarmult_curve25519_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/implementors b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/implementors
new file mode 100644
index 00000000..aa551790
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/implementors
@@ -0,0 +1 @@
+Matthew Dempsky (Mochi Media)
diff --git a/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/smult.c b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/smult.c
new file mode 100644
index 00000000..6a479558
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_scalarmult_curve25519_ref/smult.c
@@ -0,0 +1,265 @@
+/*
+version 20081011
+Matthew Dempsky
+Public domain.
+Derived from public domain code by D. J. Bernstein.
+*/
+
+#include "crypto_scalarmult.h"
+
+static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
+  u += a[31] + b[31]; out[31] = u;
+}
+
+static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 218;
+  for (j = 0;j < 31;++j) {
+    u += a[j] + 65280 - b[j];
+    out[j] = u & 255;
+    u >>= 8;
+  }
+  u += a[31] - b[31];
+  out[31] = u;
+}
+
+static void squeeze(unsigned int a[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
+  u += a[31]; a[31] = u & 127;
+  u = 19 * (u >> 7);
+  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
+  u += a[31]; a[31] = u;
+}
+
+static const unsigned int minusp[32] = {
+ 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
+} ;
+
+static void freeze(unsigned int a[32])
+{
+  unsigned int aorig[32];
+  unsigned int j;
+  unsigned int negative;
+
+  for (j = 0;j < 32;++j) aorig[j] = a[j];
+  add(a,a,minusp);
+  negative = -((a[31] >> 7) & 1);
+  for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
+}
+
+static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 32;++i) {
+    u = 0;
+    for (j = 0;j <= i;++j) u += a[j] * b[i - j];
+    for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
+    out[i] = u;
+  }
+  squeeze(out);
+}
+
+static void mult121665(unsigned int out[32],const unsigned int a[32])
+{
+  unsigned int j;
+  unsigned int u;
+
+  u = 0;
+  for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
+  u += 121665 * a[31]; out[31] = u & 127;
+  u = 19 * (u >> 7);
+  for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
+  u += out[j]; out[j] = u;
+}
+
+static void square(unsigned int out[32],const unsigned int a[32])
+{
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 32;++i) {
+    u = 0;
+    for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
+    for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
+    u *= 2;
+    if ((i & 1) == 0) {
+      u += a[i / 2] * a[i / 2];
+      u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
+    }
+    out[i] = u;
+  }
+  squeeze(out);
+}
+
+static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
+{
+  unsigned int j;
+  unsigned int t;
+  unsigned int bminus1;
+
+  bminus1 = b - 1;
+  for (j = 0;j < 64;++j) {
+    t = bminus1 & (r[j] ^ s[j]);
+    p[j] = s[j] ^ t;
+    q[j] = r[j] ^ t;
+  }
+}
+
+static void mainloop(unsigned int work[64],const unsigned char e[32])
+{
+  unsigned int xzm1[64];
+  unsigned int xzm[64];
+  unsigned int xzmb[64];
+  unsigned int xzm1b[64];
+  unsigned int xznb[64];
+  unsigned int xzn1b[64];
+  unsigned int a0[64];
+  unsigned int a1[64];
+  unsigned int b0[64];
+  unsigned int b1[64];
+  unsigned int c1[64];
+  unsigned int r[32];
+  unsigned int s[32];
+  unsigned int t[32];
+  unsigned int u[32];
+  unsigned int i;
+  unsigned int j;
+  unsigned int b;
+  int pos;
+
+  for (j = 0;j < 32;++j) xzm1[j] = work[j];
+  xzm1[32] = 1;
+  for (j = 33;j < 64;++j) xzm1[j] = 0;
+
+  xzm[0] = 1;
+  for (j = 1;j < 64;++j) xzm[j] = 0;
+
+  for (pos = 254;pos >= 0;--pos) {
+    b = e[pos / 8] >> (pos & 7);
+    b &= 1;
+    select(xzmb,xzm1b,xzm,xzm1,b);
+    add(a0,xzmb,xzmb + 32);
+    sub(a0 + 32,xzmb,xzmb + 32);
+    add(a1,xzm1b,xzm1b + 32);
+    sub(a1 + 32,xzm1b,xzm1b + 32);
+    square(b0,a0);
+    square(b0 + 32,a0 + 32);
+    mult(b1,a1,a0 + 32);
+    mult(b1 + 32,a1 + 32,a0);
+    add(c1,b1,b1 + 32);
+    sub(c1 + 32,b1,b1 + 32);
+    square(r,c1 + 32);
+    sub(s,b0,b0 + 32);
+    mult121665(t,s);
+    add(u,t,b0);
+    mult(xznb,b0,b0 + 32);
+    mult(xznb + 32,s,u);
+    square(xzn1b,c1);
+    mult(xzn1b + 32,r,work);
+    select(xzm,xzm1,xznb,xzn1b,b);
+  }
+
+  for (j = 0;j < 64;++j) work[j] = xzm[j];
+}
+
+static void recip(unsigned int out[32],const unsigned int z[32])
+{
+  unsigned int z2[32];
+  unsigned int z9[32];
+  unsigned int z11[32];
+  unsigned int z2_5_0[32];
+  unsigned int z2_10_0[32];
+  unsigned int z2_20_0[32];
+  unsigned int z2_50_0[32];
+  unsigned int z2_100_0[32];
+  unsigned int t0[32];
+  unsigned int t1[32];
+  int i;
+
+  /* 2 */ square(z2,z);
+  /* 4 */ square(t1,z2);
+  /* 8 */ square(t0,t1);
+  /* 9 */ mult(z9,t0,z);
+  /* 11 */ mult(z11,z9,z2);
+  /* 22 */ square(t0,z11);
+  /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ square(t0,z2_5_0);
+  /* 2^7 - 2^2 */ square(t1,t0);
+  /* 2^8 - 2^3 */ square(t0,t1);
+  /* 2^9 - 2^4 */ square(t1,t0);
+  /* 2^10 - 2^5 */ square(t0,t1);
+  /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ square(t0,z2_10_0);
+  /* 2^12 - 2^2 */ square(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ square(t0,z2_20_0);
+  /* 2^22 - 2^2 */ square(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ square(t1,t0);
+  /* 2^42 - 2^2 */ square(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ square(t0,z2_50_0);
+  /* 2^52 - 2^2 */ square(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ square(t1,z2_100_0);
+  /* 2^102 - 2^2 */ square(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ square(t0,t1);
+  /* 2^202 - 2^2 */ square(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ square(t1,t0);
+  /* 2^252 - 2^2 */ square(t0,t1);
+  /* 2^253 - 2^3 */ square(t1,t0);
+  /* 2^254 - 2^4 */ square(t0,t1);
+  /* 2^255 - 2^5 */ square(t1,t0);
+  /* 2^255 - 21 */ mult(out,t1,z11);
+}
+
+int crypto_scalarmult(unsigned char *q,
+  const unsigned char *n,
+  const unsigned char *p)
+{
+  unsigned int work[96];
+  unsigned char e[32];
+  unsigned int i;
+  for (i = 0;i < 32;++i) e[i] = n[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+  for (i = 0;i < 32;++i) work[i] = p[i];
+  mainloop(work,e);
+  recip(work + 32,work + 32);
+  mult(work + 64,work,work + 32);
+  freeze(work + 64);
+  for (i = 0;i < 32;++i) q[i] = work[64 + i];
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305.h b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305.h
new file mode 100644
index 00000000..63feddde
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305.h
@@ -0,0 +1,38 @@
+#ifndef crypto_secretbox_xsalsa20poly1305_H
+#define crypto_secretbox_xsalsa20poly1305_H
+
+#define crypto_secretbox_xsalsa20poly1305_ref_KEYBYTES 32
+#define crypto_secretbox_xsalsa20poly1305_ref_NONCEBYTES 24
+#define crypto_secretbox_xsalsa20poly1305_ref_ZEROBYTES 32
+#define crypto_secretbox_xsalsa20poly1305_ref_BOXZEROBYTES 16
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_secretbox_xsalsa20poly1305_ref(const std::string &,const std::string &,const std::string &);
+extern std::string crypto_secretbox_xsalsa20poly1305_ref_open(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_secretbox_xsalsa20poly1305_ref(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_secretbox_xsalsa20poly1305_ref_open(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_secretbox_xsalsa20poly1305 crypto_secretbox_xsalsa20poly1305_ref
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_open crypto_secretbox_xsalsa20poly1305_ref_open
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref_open crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_KEYBYTES crypto_secretbox_xsalsa20poly1305_ref_KEYBYTES
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref_KEYBYTES crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_NONCEBYTES crypto_secretbox_xsalsa20poly1305_ref_NONCEBYTES
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref_NONCEBYTES crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_ZEROBYTES crypto_secretbox_xsalsa20poly1305_ref_ZEROBYTES
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref_ZEROBYTES crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_BOXZEROBYTES crypto_secretbox_xsalsa20poly1305_ref_BOXZEROBYTES
+/* POTATO crypto_secretbox_xsalsa20poly1305_ref_BOXZEROBYTES crypto_secretbox_xsalsa20poly1305_ref crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_xsalsa20poly1305_IMPLEMENTATION "crypto_secretbox/xsalsa20poly1305/ref"
+#ifndef crypto_secretbox_xsalsa20poly1305_ref_VERSION
+#define crypto_secretbox_xsalsa20poly1305_ref_VERSION "-"
+#endif
+#define crypto_secretbox_xsalsa20poly1305_VERSION crypto_secretbox_xsalsa20poly1305_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/api.h b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/api.h
new file mode 100644
index 00000000..f5aeb356
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 24
+#define CRYPTO_ZEROBYTES 32
+#define CRYPTO_BOXZEROBYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/box.c b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/box.c
new file mode 100644
index 00000000..f1abb06f
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/box.c
@@ -0,0 +1,35 @@
+#include "crypto_onetimeauth_poly1305.h"
+#include "crypto_stream_xsalsa20.h"
+#include "crypto_secretbox.h"
+
+int crypto_secretbox(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  int i;
+  if (mlen < 32) return -1;
+  crypto_stream_xsalsa20_xor(c,m,mlen,n,k);
+  crypto_onetimeauth_poly1305(c + 16,c + 32,mlen - 32,c);
+  for (i = 0;i < 16;++i) c[i] = 0;
+  return 0;
+}
+
+int crypto_secretbox_open(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  int i;
+  unsigned char subkey[32];
+  if (clen < 32) return -1;
+  crypto_stream_xsalsa20(subkey,32,n,k);
+  if (crypto_onetimeauth_poly1305_verify(c + 16,c + 32,clen - 32,subkey) != 0) return -1;
+  crypto_stream_xsalsa20_xor(m,c,clen,n,k);
+  for (i = 0;i < 32;++i) m[i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/crypto_secretbox.h b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/crypto_secretbox.h
new file mode 100644
index 00000000..f454806d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_secretbox_xsalsa20poly1305_ref/crypto_secretbox.h
@@ -0,0 +1,22 @@
+#ifndef crypto_secretbox_H
+#define crypto_secretbox_H
+
+#include "crypto_secretbox_xsalsa20poly1305.h"
+
+#define crypto_secretbox crypto_secretbox_xsalsa20poly1305
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305 */
+#define crypto_secretbox_open crypto_secretbox_xsalsa20poly1305_open
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305_open */
+#define crypto_secretbox_KEYBYTES crypto_secretbox_xsalsa20poly1305_KEYBYTES
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305_KEYBYTES */
+#define crypto_secretbox_NONCEBYTES crypto_secretbox_xsalsa20poly1305_NONCEBYTES
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305_NONCEBYTES */
+#define crypto_secretbox_ZEROBYTES crypto_secretbox_xsalsa20poly1305_ZEROBYTES
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305_ZEROBYTES */
+#define crypto_secretbox_BOXZEROBYTES crypto_secretbox_xsalsa20poly1305_BOXZEROBYTES
+/* CHEESEBURGER crypto_secretbox_xsalsa20poly1305_BOXZEROBYTES */
+#define crypto_secretbox_PRIMITIVE "xsalsa20poly1305"
+#define crypto_secretbox_IMPLEMENTATION crypto_secretbox_xsalsa20poly1305_IMPLEMENTATION
+#define crypto_secretbox_VERSION crypto_secretbox_xsalsa20poly1305_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch.h
new file mode 100644
index 00000000..ff62816d
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch.h
@@ -0,0 +1,39 @@
+#ifndef crypto_sign_edwards25519sha512batch_H
+#define crypto_sign_edwards25519sha512batch_H
+
+#define crypto_sign_edwards25519sha512batch_ref_SECRETKEYBYTES 64
+#define crypto_sign_edwards25519sha512batch_ref_PUBLICKEYBYTES 32
+#define crypto_sign_edwards25519sha512batch_ref_BYTES 64
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_sign_edwards25519sha512batch_ref(const std::string &,const std::string &);
+extern std::string crypto_sign_edwards25519sha512batch_ref_open(const std::string &,const std::string &);
+extern std::string crypto_sign_edwards25519sha512batch_ref_keypair(std::string *);
+extern "C" {
+#endif
+extern int crypto_sign_edwards25519sha512batch_ref(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_edwards25519sha512batch_ref_open(unsigned char *,unsigned long long *,const unsigned char *,unsigned long long,const unsigned char *);
+extern int crypto_sign_edwards25519sha512batch_ref_keypair(unsigned char *,unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_sign_edwards25519sha512batch crypto_sign_edwards25519sha512batch_ref
+/* POTATO crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_open crypto_sign_edwards25519sha512batch_ref_open
+/* POTATO crypto_sign_edwards25519sha512batch_ref_open crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_keypair crypto_sign_edwards25519sha512batch_ref_keypair
+/* POTATO crypto_sign_edwards25519sha512batch_ref_keypair crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_BYTES crypto_sign_edwards25519sha512batch_ref_BYTES
+/* POTATO crypto_sign_edwards25519sha512batch_ref_BYTES crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES crypto_sign_edwards25519sha512batch_ref_PUBLICKEYBYTES
+/* POTATO crypto_sign_edwards25519sha512batch_ref_PUBLICKEYBYTES crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_SECRETKEYBYTES crypto_sign_edwards25519sha512batch_ref_SECRETKEYBYTES
+/* POTATO crypto_sign_edwards25519sha512batch_ref_SECRETKEYBYTES crypto_sign_edwards25519sha512batch_ref crypto_sign_edwards25519sha512batch */
+#define crypto_sign_edwards25519sha512batch_IMPLEMENTATION "crypto_sign/edwards25519sha512batch/ref"
+#ifndef crypto_sign_edwards25519sha512batch_ref_VERSION
+#define crypto_sign_edwards25519sha512batch_ref_VERSION "-"
+#endif
+#define crypto_sign_edwards25519sha512batch_VERSION crypto_sign_edwards25519sha512batch_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/api.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/api.h
new file mode 100644
index 00000000..352240c0
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/api.h
@@ -0,0 +1,3 @@
+#define CRYPTO_SECRETKEYBYTES 64
+#define CRYPTO_PUBLICKEYBYTES 32
+#define CRYPTO_BYTES 64
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/crypto_sign.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/crypto_sign.h
new file mode 100644
index 00000000..8998c90e
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/crypto_sign.h
@@ -0,0 +1,22 @@
+#ifndef crypto_sign_H
+#define crypto_sign_H
+
+#include "crypto_sign_edwards25519sha512batch.h"
+
+#define crypto_sign crypto_sign_edwards25519sha512batch
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch */
+#define crypto_sign_open crypto_sign_edwards25519sha512batch_open
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch_open */
+#define crypto_sign_keypair crypto_sign_edwards25519sha512batch_keypair
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch_keypair */
+#define crypto_sign_BYTES crypto_sign_edwards25519sha512batch_BYTES
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch_BYTES */
+#define crypto_sign_PUBLICKEYBYTES crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES */
+#define crypto_sign_SECRETKEYBYTES crypto_sign_edwards25519sha512batch_SECRETKEYBYTES
+/* CHEESEBURGER crypto_sign_edwards25519sha512batch_SECRETKEYBYTES */
+#define crypto_sign_PRIMITIVE "edwards25519sha512batch"
+#define crypto_sign_IMPLEMENTATION crypto_sign_edwards25519sha512batch_IMPLEMENTATION
+#define crypto_sign_VERSION crypto_sign_edwards25519sha512batch_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.c b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.c
new file mode 100644
index 00000000..a9f806d2
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.c
@@ -0,0 +1,345 @@
+#include "fe25519.h"
+
+#define WINDOWSIZE 4 /* Should be 1,2, or 4 */
+#define WINDOWMASK ((1<<WINDOWSIZE)-1)
+
+static void reduce_add_sub(fe25519 *r)
+{
+  crypto_uint32 t;
+  int i,rep;
+
+  for(rep=0;rep<4;rep++)
+  {
+    t = r->v[31] >> 7;
+    r->v[31] &= 127;
+    t *= 19;
+    r->v[0] += t;
+    for(i=0;i<31;i++)
+    {
+      t = r->v[i] >> 8;
+      r->v[i+1] += t;
+      r->v[i] &= 255;
+    }
+  }
+}
+
+static void reduce_mul(fe25519 *r)
+{
+  crypto_uint32 t;
+  int i,rep;
+
+  for(rep=0;rep<2;rep++)
+  {
+    t = r->v[31] >> 7;
+    r->v[31] &= 127;
+    t *= 19;
+    r->v[0] += t;
+    for(i=0;i<31;i++)
+    {
+      t = r->v[i] >> 8;
+      r->v[i+1] += t;
+      r->v[i] &= 255;
+    }
+  }
+}
+
+/* reduction modulo 2^255-19 */
+static void freeze(fe25519 *r) 
+{
+  int i;
+  unsigned int m = (r->v[31] == 127);
+  for(i=30;i>1;i--)
+    m *= (r->v[i] == 255);
+  m *= (r->v[0] >= 237);
+
+  r->v[31] -= m*127;
+  for(i=30;i>0;i--)
+    r->v[i] -= m*255;
+  r->v[0] -= m*237;
+}
+
+/*freeze input before calling isone*/
+static int isone(const fe25519 *x)
+{
+  int i;
+  int r = (x->v[0] == 1);
+  for(i=1;i<32;i++) 
+    r *= (x->v[i] == 0);
+  return r;
+}
+
+/*freeze input before calling iszero*/
+static int iszero(const fe25519 *x)
+{
+  int i;
+  int r = (x->v[0] == 0);
+  for(i=1;i<32;i++) 
+    r *= (x->v[i] == 0);
+  return r;
+}
+
+
+static int issquare(const fe25519 *x)
+{
+  unsigned char e[32] = {0xf6,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3f}; /* (p-1)/2 */
+  fe25519 t;
+
+  fe25519_pow(&t,x,e);
+  freeze(&t);
+  return isone(&t) || iszero(&t);
+}
+
+void fe25519_unpack(fe25519 *r, const unsigned char x[32])
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i] = x[i];
+  r->v[31] &= 127;
+}
+
+/* Assumes input x being reduced mod 2^255 */
+void fe25519_pack(unsigned char r[32], const fe25519 *x)
+{
+  int i;
+  for(i=0;i<32;i++) 
+    r[i] = x->v[i];
+  
+  /* freeze byte array */
+  unsigned int m = (r[31] == 127); /* XXX: some compilers might use branches; fix */
+  for(i=30;i>1;i--)
+    m *= (r[i] == 255);
+  m *= (r[0] >= 237);
+  r[31] -= m*127;
+  for(i=30;i>0;i--)
+    r[i] -= m*255;
+  r[0] -= m*237;
+}
+
+void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b)
+{
+  unsigned char nb = 1-b;
+  int i;
+  for(i=0;i<32;i++) r->v[i] = nb * r->v[i] + b * x->v[i];
+}
+
+unsigned char fe25519_getparity(const fe25519 *x)
+{
+  fe25519 t;
+  int i;
+  for(i=0;i<32;i++) t.v[i] = x->v[i];
+  freeze(&t);
+  return t.v[0] & 1;
+}
+
+void fe25519_setone(fe25519 *r)
+{
+  int i;
+  r->v[0] = 1;
+  for(i=1;i<32;i++) r->v[i]=0;
+}
+
+void fe25519_setzero(fe25519 *r)
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i]=0;
+}
+
+void fe25519_neg(fe25519 *r, const fe25519 *x)
+{
+  fe25519 t;
+  int i;
+  for(i=0;i<32;i++) t.v[i]=x->v[i];
+  fe25519_setzero(r);
+  fe25519_sub(r, r, &t);
+}
+
+void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i];
+  reduce_add_sub(r);
+}
+
+void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i;
+  crypto_uint32 t[32];
+  t[0] = x->v[0] + 0x1da;
+  t[31] = x->v[31] + 0xfe;
+  for(i=1;i<31;i++) t[i] = x->v[i] + 0x1fe;
+  for(i=0;i<32;i++) r->v[i] = t[i] - y->v[i];
+  reduce_add_sub(r);
+}
+
+void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i,j;
+  crypto_uint32 t[63];
+  for(i=0;i<63;i++)t[i] = 0;
+
+  for(i=0;i<32;i++)
+    for(j=0;j<32;j++)
+      t[i+j] += x->v[i] * y->v[j];
+
+  for(i=32;i<63;i++)
+    r->v[i-32] = t[i-32] + 38*t[i]; 
+  r->v[31] = t[31]; /* result now in r[0]...r[31] */
+
+  reduce_mul(r);
+}
+
+void fe25519_square(fe25519 *r, const fe25519 *x)
+{
+  fe25519_mul(r, x, x);
+}
+
+/*XXX: Make constant time! */
+void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e)
+{
+  /*
+  fe25519 g;
+  fe25519_setone(&g);
+  int i;
+  unsigned char j;
+  for(i=32;i>0;i--)
+  {
+    for(j=128;j>0;j>>=1)
+    {
+      fe25519_square(&g,&g);
+      if(e[i-1] & j) 
+        fe25519_mul(&g,&g,x);
+    }
+  }
+  for(i=0;i<32;i++) r->v[i] = g.v[i];
+  */
+  fe25519 g;
+  fe25519_setone(&g);
+  int i,j,k;
+  fe25519 pre[(1 << WINDOWSIZE)];
+  fe25519 t;
+  unsigned char w;
+
+  // Precomputation
+  fe25519_setone(pre);
+  pre[1] = *x;
+  for(i=2;i<(1<<WINDOWSIZE);i+=2)
+  {
+    fe25519_square(pre+i, pre+i/2);
+    fe25519_mul(pre+i+1, pre+i, pre+1);
+  }
+
+  // Fixed-window scalar multiplication
+  for(i=32;i>0;i--)
+  {
+    for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE)
+    {
+      for(k=0;k<WINDOWSIZE;k++)
+        fe25519_square(&g, &g);
+      // Cache-timing resistant loading of precomputed value:
+      w = (e[i-1]>>j) & WINDOWMASK;
+      t = pre[0];
+      for(k=1;k<(1<<WINDOWSIZE);k++)
+        fe25519_cmov(&t, &pre[k], k==w);
+      fe25519_mul(&g, &g, &t);
+    }
+  }
+  *r = g;
+}
+
+/* Return 0 on success, 1 otherwise */
+int fe25519_sqrt_vartime(fe25519 *r, const fe25519 *x, unsigned char parity)
+{
+  /* See HAC, Alg. 3.37 */
+  if (!issquare(x)) return -1;
+  unsigned char e[32] = {0xfb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x1f}; /* (p-1)/4 */
+  unsigned char e2[32] = {0xfe,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x0f}; /* (p+3)/8 */
+  unsigned char e3[32] = {0xfd,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x0f}; /* (p-5)/8 */
+  fe25519 p = {{0}};
+  fe25519 d;
+  int i;
+  fe25519_pow(&d,x,e);
+  freeze(&d);
+  if(isone(&d))
+    fe25519_pow(r,x,e2);
+  else
+  {
+    for(i=0;i<32;i++)
+      d.v[i] = 4*x->v[i];
+    fe25519_pow(&d,&d,e3);
+    for(i=0;i<32;i++)
+      r->v[i] = 2*x->v[i];
+    fe25519_mul(r,r,&d);
+  }
+  freeze(r);
+  if((r->v[0] & 1) != (parity & 1))
+  {
+    fe25519_sub(r,&p,r);
+  }
+  return 0;
+}
+
+void fe25519_invert(fe25519 *r, const fe25519 *x)
+{
+	fe25519 z2;
+	fe25519 z9;
+	fe25519 z11;
+	fe25519 z2_5_0;
+	fe25519 z2_10_0;
+	fe25519 z2_20_0;
+	fe25519 z2_50_0;
+	fe25519 z2_100_0;
+	fe25519 t0;
+	fe25519 t1;
+	int i;
+	
+	/* 2 */ fe25519_square(&z2,x);
+	/* 4 */ fe25519_square(&t1,&z2);
+	/* 8 */ fe25519_square(&t0,&t1);
+	/* 9 */ fe25519_mul(&z9,&t0,x);
+	/* 11 */ fe25519_mul(&z11,&z9,&z2);
+	/* 22 */ fe25519_square(&t0,&z11);
+	/* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t0,&z9);
+
+	/* 2^6 - 2^1 */ fe25519_square(&t0,&z2_5_0);
+	/* 2^7 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^8 - 2^3 */ fe25519_square(&t0,&t1);
+	/* 2^9 - 2^4 */ fe25519_square(&t1,&t0);
+	/* 2^10 - 2^5 */ fe25519_square(&t0,&t1);
+	/* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t0,&z2_5_0);
+
+	/* 2^11 - 2^1 */ fe25519_square(&t0,&z2_10_0);
+	/* 2^12 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t1,&z2_10_0);
+
+	/* 2^21 - 2^1 */ fe25519_square(&t0,&z2_20_0);
+	/* 2^22 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^40 - 2^0 */ fe25519_mul(&t0,&t1,&z2_20_0);
+
+	/* 2^41 - 2^1 */ fe25519_square(&t1,&t0);
+	/* 2^42 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
+	/* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t0,&z2_10_0);
+
+	/* 2^51 - 2^1 */ fe25519_square(&t0,&z2_50_0);
+	/* 2^52 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t1,&z2_50_0);
+
+	/* 2^101 - 2^1 */ fe25519_square(&t1,&z2_100_0);
+	/* 2^102 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
+	/* 2^200 - 2^0 */ fe25519_mul(&t1,&t0,&z2_100_0);
+
+	/* 2^201 - 2^1 */ fe25519_square(&t0,&t1);
+	/* 2^202 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^250 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0);
+
+	/* 2^251 - 2^1 */ fe25519_square(&t1,&t0);
+	/* 2^252 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^253 - 2^3 */ fe25519_square(&t1,&t0);
+	/* 2^254 - 2^4 */ fe25519_square(&t0,&t1);
+	/* 2^255 - 2^5 */ fe25519_square(&t1,&t0);
+	/* 2^255 - 21 */ fe25519_mul(r,&t1,&z11);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.h
new file mode 100644
index 00000000..e07ddba7
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/fe25519.h
@@ -0,0 +1,54 @@
+#ifndef FE25519_H
+#define FE25519_H
+
+#define fe25519 crypto_sign_edwards25519sha512batch_fe25519
+#define fe25519_unpack crypto_sign_edwards25519sha512batch_fe25519_unpack
+#define fe25519_pack crypto_sign_edwards25519sha512batch_fe25519_pack
+#define fe25519_cmov crypto_sign_edwards25519sha512batch_fe25519_cmov
+#define fe25519_setone crypto_sign_edwards25519sha512batch_fe25519_setone
+#define fe25519_setzero crypto_sign_edwards25519sha512batch_fe25519_setzero
+#define fe25519_neg crypto_sign_edwards25519sha512batch_fe25519_neg
+#define fe25519_getparity crypto_sign_edwards25519sha512batch_fe25519_getparity
+#define fe25519_add crypto_sign_edwards25519sha512batch_fe25519_add
+#define fe25519_sub crypto_sign_edwards25519sha512batch_fe25519_sub
+#define fe25519_mul crypto_sign_edwards25519sha512batch_fe25519_mul
+#define fe25519_square crypto_sign_edwards25519sha512batch_fe25519_square
+#define fe25519_pow crypto_sign_edwards25519sha512batch_fe25519_pow
+#define fe25519_sqrt_vartime crypto_sign_edwards25519sha512batch_fe25519_sqrt_vartime
+#define fe25519_invert crypto_sign_edwards25519sha512batch_fe25519_invert
+
+#include "crypto_uint32.h"
+
+typedef struct {
+  crypto_uint32 v[32]; 
+} fe25519;
+
+void fe25519_unpack(fe25519 *r, const unsigned char x[32]);
+
+void fe25519_pack(unsigned char r[32], const fe25519 *x);
+
+void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b);
+
+void fe25519_setone(fe25519 *r);
+
+void fe25519_setzero(fe25519 *r);
+
+void fe25519_neg(fe25519 *r, const fe25519 *x);
+
+unsigned char fe25519_getparity(const fe25519 *x);
+
+void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_square(fe25519 *r, const fe25519 *x);
+
+void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e);
+
+int fe25519_sqrt_vartime(fe25519 *r, const fe25519 *x, unsigned char parity);
+
+void fe25519_invert(fe25519 *r, const fe25519 *x);
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.c b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.c
new file mode 100644
index 00000000..a57b8f3c
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.c
@@ -0,0 +1,227 @@
+#include "fe25519.h"
+#include "sc25519.h"
+#include "ge25519.h"
+
+/* 
+ * Arithmetic on the twisted Edwards curve -x^2 + y^2 = 1 + dx^2y^2 
+ * with d = -(121665/121666) = 37095705934669439343138083508754565189542113879843219016388785533085940283555
+ * Base point: (15112221349535400772501151409588531511454012693041857206046113283949847762202,46316835694926478169428394003475163141307993866256225615783033603165251855960);
+ */
+
+typedef struct
+{
+  fe25519 x;
+  fe25519 z;
+  fe25519 y;
+  fe25519 t;
+} ge25519_p1p1;
+
+typedef struct
+{
+  fe25519 x;
+  fe25519 y;
+  fe25519 z;
+} ge25519_p2;
+
+#define ge25519_p3 ge25519
+
+/* Windowsize for fixed-window scalar multiplication */
+#define WINDOWSIZE 2                      /* Should be 1,2, or 4 */
+#define WINDOWMASK ((1<<WINDOWSIZE)-1)
+
+/* packed parameter d in the Edwards curve equation */
+static const unsigned char ecd[32] = {0xA3, 0x78, 0x59, 0x13, 0xCA, 0x4D, 0xEB, 0x75, 0xAB, 0xD8, 0x41, 0x41, 0x4D, 0x0A, 0x70, 0x00, 
+                                      0x98, 0xE8, 0x79, 0x77, 0x79, 0x40, 0xC7, 0x8C, 0x73, 0xFE, 0x6F, 0x2B, 0xEE, 0x6C, 0x03, 0x52};
+
+/* Packed coordinates of the base point */
+static const unsigned char ge25519_base_x[32] = {0x1A, 0xD5, 0x25, 0x8F, 0x60, 0x2D, 0x56, 0xC9, 0xB2, 0xA7, 0x25, 0x95, 0x60, 0xC7, 0x2C, 0x69, 
+                                                 0x5C, 0xDC, 0xD6, 0xFD, 0x31, 0xE2, 0xA4, 0xC0, 0xFE, 0x53, 0x6E, 0xCD, 0xD3, 0x36, 0x69, 0x21};
+static const unsigned char ge25519_base_y[32] = {0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 
+                                                 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66};
+static const unsigned char ge25519_base_z[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_base_t[32] = {0xA3, 0xDD, 0xB7, 0xA5, 0xB3, 0x8A, 0xDE, 0x6D, 0xF5, 0x52, 0x51, 0x77, 0x80, 0x9F, 0xF0, 0x20, 
+                                                 0x7D, 0xE3, 0xAB, 0x64, 0x8E, 0x4E, 0xEA, 0x66, 0x65, 0x76, 0x8B, 0xD7, 0x0F, 0x5F, 0x87, 0x67};
+
+/* Packed coordinates of the neutral element */
+static const unsigned char ge25519_neutral_x[32] = {0};
+static const unsigned char ge25519_neutral_y[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_neutral_z[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_neutral_t[32] = {0};
+
+static void p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
+{
+  fe25519_mul(&r->x, &p->x, &p->t);
+  fe25519_mul(&r->y, &p->y, &p->z);
+  fe25519_mul(&r->z, &p->z, &p->t);
+}
+
+static void p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
+{
+  p1p1_to_p2((ge25519_p2 *)r, p);
+  fe25519_mul(&r->t, &p->x, &p->y);
+}
+
+/* Constant-time version of: if(b) r = p */
+static void cmov_p3(ge25519_p3 *r, const ge25519_p3 *p, unsigned char b)
+{
+  fe25519_cmov(&r->x, &p->x, b);
+  fe25519_cmov(&r->y, &p->y, b);
+  fe25519_cmov(&r->z, &p->z, b);
+  fe25519_cmov(&r->t, &p->t, b);
+}
+
+/* See http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd */
+static void dbl_p1p1(ge25519_p1p1 *r, const ge25519_p2 *p)
+{
+  fe25519 a,b,c,d;
+  fe25519_square(&a, &p->x);
+  fe25519_square(&b, &p->y);
+  fe25519_square(&c, &p->z);
+  fe25519_add(&c, &c, &c);
+  fe25519_neg(&d, &a);
+
+  fe25519_add(&r->x, &p->x, &p->y);
+  fe25519_square(&r->x, &r->x);
+  fe25519_sub(&r->x, &r->x, &a);
+  fe25519_sub(&r->x, &r->x, &b);
+  fe25519_add(&r->z, &d, &b);
+  fe25519_sub(&r->t, &r->z, &c);
+  fe25519_sub(&r->y, &d, &b);
+}
+
+static void add_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_p3 *q)
+{
+  fe25519 a, b, c, d, t, fd;
+  fe25519_unpack(&fd, ecd);
+  
+  fe25519_sub(&a, &p->y, &p->x); // A = (Y1-X1)*(Y2-X2)
+  fe25519_sub(&t, &q->y, &q->x);
+  fe25519_mul(&a, &a, &t);
+  fe25519_add(&b, &p->x, &p->y); // B = (Y1+X1)*(Y2+X2)
+  fe25519_add(&t, &q->x, &q->y);
+  fe25519_mul(&b, &b, &t);
+  fe25519_mul(&c, &p->t, &q->t); //C = T1*k*T2
+  fe25519_mul(&c, &c, &fd);
+  fe25519_add(&c, &c, &c);       //XXX: Can save this addition by precomputing 2*ecd
+  fe25519_mul(&d, &p->z, &q->z); //D = Z1*2*Z2
+  fe25519_add(&d, &d, &d);
+  fe25519_sub(&r->x, &b, &a); // E = B-A
+  fe25519_sub(&r->t, &d, &c); // F = D-C
+  fe25519_add(&r->z, &d, &c); // G = D+C
+  fe25519_add(&r->y, &b, &a); // H = B+A
+}
+
+/* ********************************************************************
+ *                    EXPORTED FUNCTIONS
+ ******************************************************************** */
+
+/* return 0 on success, -1 otherwise */
+int ge25519_unpack_vartime(ge25519_p3 *r, const unsigned char p[32])
+{
+  int ret;
+  fe25519 t, fd;
+  fe25519_setone(&r->z);
+  fe25519_unpack(&fd, ecd);
+  unsigned char par = p[31] >> 7;
+  fe25519_unpack(&r->y, p);
+  fe25519_square(&r->x, &r->y);
+  fe25519_mul(&t, &r->x, &fd);
+  fe25519_sub(&r->x, &r->x, &r->z);
+  fe25519_add(&t, &r->z, &t);
+  fe25519_invert(&t, &t);
+  fe25519_mul(&r->x, &r->x, &t);
+  ret = fe25519_sqrt_vartime(&r->x, &r->x, par);
+  fe25519_mul(&r->t, &r->x, &r->y);
+  return ret;
+}
+
+void ge25519_pack(unsigned char r[32], const ge25519_p3 *p)
+{
+  fe25519 tx, ty, zi;
+  fe25519_invert(&zi, &p->z); 
+  fe25519_mul(&tx, &p->x, &zi);
+  fe25519_mul(&ty, &p->y, &zi);
+  fe25519_pack(r, &ty);
+  r[31] ^= fe25519_getparity(&tx) << 7;
+}
+
+void ge25519_add(ge25519_p3 *r, const ge25519_p3 *p, const ge25519_p3 *q)
+{
+  ge25519_p1p1 grp1p1;
+  add_p1p1(&grp1p1, p, q);
+  p1p1_to_p3(r, &grp1p1);
+}
+
+void ge25519_double(ge25519_p3 *r, const ge25519_p3 *p)
+{
+  ge25519_p1p1 grp1p1;
+  dbl_p1p1(&grp1p1, (ge25519_p2 *)p);
+  p1p1_to_p3(r, &grp1p1);
+}
+
+void ge25519_scalarmult(ge25519_p3 *r, const ge25519_p3 *p, const sc25519 *s)
+{
+  int i,j,k;
+  ge25519_p3 g;  
+  fe25519_unpack(&g.x, ge25519_neutral_x);
+  fe25519_unpack(&g.y, ge25519_neutral_y);
+  fe25519_unpack(&g.z, ge25519_neutral_z);
+  fe25519_unpack(&g.t, ge25519_neutral_t);
+
+  ge25519_p3 pre[(1 << WINDOWSIZE)];
+  ge25519_p3 t;
+  ge25519_p1p1 tp1p1;
+  unsigned char w;
+  unsigned char sb[32];
+  sc25519_to32bytes(sb, s);
+
+  // Precomputation
+  pre[0] = g;
+  pre[1] = *p;
+  for(i=2;i<(1<<WINDOWSIZE);i+=2)
+  {
+    dbl_p1p1(&tp1p1, (ge25519_p2 *)(pre+i/2));
+    p1p1_to_p3(pre+i, &tp1p1);
+    add_p1p1(&tp1p1, pre+i, pre+1);
+    p1p1_to_p3(pre+i+1, &tp1p1);
+  }
+
+  // Fixed-window scalar multiplication
+  for(i=32;i>0;i--)
+  {
+    for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE)
+    {
+      for(k=0;k<WINDOWSIZE-1;k++)
+      {
+        dbl_p1p1(&tp1p1, (ge25519_p2 *)&g);
+        p1p1_to_p2((ge25519_p2 *)&g, &tp1p1);
+      }
+      dbl_p1p1(&tp1p1, (ge25519_p2 *)&g);
+      p1p1_to_p3(&g, &tp1p1);
+      // Cache-timing resistant loading of precomputed value:
+      w = (sb[i-1]>>j) & WINDOWMASK;
+      t = pre[0];
+      for(k=1;k<(1<<WINDOWSIZE);k++)
+        cmov_p3(&t, &pre[k], k==w);
+
+      add_p1p1(&tp1p1, &g, &t);
+      if(j != 0) p1p1_to_p2((ge25519_p2 *)&g, &tp1p1);
+      else p1p1_to_p3(&g, &tp1p1); /* convert to p3 representation at the end */
+    }
+  }
+  r->x = g.x;
+  r->y = g.y;
+  r->z = g.z;
+  r->t = g.t;
+}
+
+void ge25519_scalarmult_base(ge25519_p3 *r, const sc25519 *s)
+{
+  /* XXX: Better algorithm for known-base-point scalar multiplication */
+  ge25519_p3 t;
+  fe25519_unpack(&t.x, ge25519_base_x);
+  fe25519_unpack(&t.y, ge25519_base_y);
+  fe25519_unpack(&t.z, ge25519_base_z);
+  fe25519_unpack(&t.t, ge25519_base_t);
+  ge25519_scalarmult(r, &t, s);          
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.h
new file mode 100644
index 00000000..49ad163a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/ge25519.h
@@ -0,0 +1,34 @@
+#ifndef GE25519_H
+#define GE25519_H
+
+#include "fe25519.h"
+#include "sc25519.h"
+
+#define ge25519 crypto_sign_edwards25519sha512batch_ge25519
+#define ge25519_unpack_vartime crypto_sign_edwards25519sha512batch_ge25519_unpack_vartime
+#define ge25519_pack crypto_sign_edwards25519sha512batch_ge25519_pack
+#define ge25519_add crypto_sign_edwards25519sha512batch_ge25519_add
+#define ge25519_double crypto_sign_edwards25519sha512batch_ge25519_double
+#define ge25519_scalarmult crypto_sign_edwards25519sha512batch_ge25519_scalarmult
+#define ge25519_scalarmult_base crypto_sign_edwards25519sha512batch_ge25519_scalarmult_base
+
+typedef struct {
+  fe25519 x;
+  fe25519 y;
+  fe25519 z;
+  fe25519 t;
+} ge25519;
+
+int ge25519_unpack_vartime(ge25519 *r, const unsigned char p[32]);
+
+void ge25519_pack(unsigned char r[32], const ge25519 *p);
+
+void ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q);
+
+void ge25519_double(ge25519 *r, const ge25519 *p);
+
+void ge25519_scalarmult(ge25519 *r, const ge25519 *p, const sc25519 *s);
+
+void ge25519_scalarmult_base(ge25519 *r, const sc25519 *s);
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.c b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.c
new file mode 100644
index 00000000..5f27eb1b
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.c
@@ -0,0 +1,146 @@
+#include "sc25519.h"
+
+/*Arithmetic modulo the group order n = 2^252 +  27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989 */
+
+static const crypto_uint32 m[32] = {0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2, 0xDE, 0xF9, 0xDE, 0x14, 
+                                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10};
+
+static const crypto_uint32 mu[33] = {0x1B, 0x13, 0x2C, 0x0A, 0xA3, 0xE5, 0x9C, 0xED, 0xA7, 0x29, 0x63, 0x08, 0x5D, 0x21, 0x06, 0x21, 
+                                     0xEB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F};
+
+/* Reduce coefficients of r before calling reduce_add_sub */
+static void reduce_add_sub(sc25519 *r)
+{
+  int i, b, pb=0, nb;
+  unsigned char t[32];
+
+  for(i=0;i<32;i++) 
+  {
+    b = (r->v[i]<pb+m[i]);
+    t[i] = r->v[i]-pb-m[i]+b*256;
+    pb = b;
+  }
+  nb = 1-b;
+  for(i=0;i<32;i++) 
+    r->v[i] = r->v[i]*b + t[i]*nb;
+}
+
+/* Reduce coefficients of x before calling barrett_reduce */
+static void barrett_reduce(sc25519 *r, const crypto_uint32 x[64])
+{
+  /* See HAC, Alg. 14.42 */
+  int i,j;
+  crypto_uint32 q2[66] = {0};
+  crypto_uint32 *q3 = q2 + 33;
+  crypto_uint32 r1[33];
+  crypto_uint32 r2[33] = {0};
+  crypto_uint32 carry;
+  int b, pb=0;
+
+  for(i=0;i<33;i++)
+    for(j=0;j<33;j++)
+      if(i+j >= 31) q2[i+j] += mu[i]*x[j+31];
+  carry = q2[31] >> 8;
+  q2[32] += carry;
+  carry = q2[32] >> 8;
+  q2[33] += carry;
+
+  for(i=0;i<33;i++)r1[i] = x[i];
+  for(i=0;i<32;i++)
+    for(j=0;j<33;j++)
+      if(i+j < 33) r2[i+j] += m[i]*q3[j];
+
+  for(i=0;i<32;i++)
+  {
+    carry = r2[i] >> 8;
+    r2[i+1] += carry;
+    r2[i] &= 0xff;
+  }
+
+  for(i=0;i<32;i++) 
+  {
+    b = (r1[i]<pb+r2[i]);
+    r->v[i] = r1[i]-pb-r2[i]+b*256;
+    pb = b;
+  }
+
+  /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 
+   * If so: Handle  it here!
+   */
+
+  reduce_add_sub(r);
+  reduce_add_sub(r);
+}
+
+/*
+static int iszero(const sc25519 *x)
+{
+  // Implement
+  return 0;
+}
+*/
+
+void sc25519_from32bytes(sc25519 *r, const unsigned char x[32])
+{
+  int i;
+  crypto_uint32 t[64] = {0};
+  for(i=0;i<32;i++) t[i] = x[i];
+  barrett_reduce(r, t);
+}
+
+void sc25519_from64bytes(sc25519 *r, const unsigned char x[64])
+{
+  int i;
+  crypto_uint32 t[64] = {0};
+  for(i=0;i<64;i++) t[i] = x[i];
+  barrett_reduce(r, t);
+}
+
+/* XXX: What we actually want for crypto_group is probably just something like
+ * void sc25519_frombytes(sc25519 *r, const unsigned char *x, size_t xlen)
+ */
+
+void sc25519_to32bytes(unsigned char r[32], const sc25519 *x)
+{
+  int i;
+  for(i=0;i<32;i++) r[i] = x->v[i];
+}
+
+void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y)
+{
+  int i, carry;
+  for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i];
+  for(i=0;i<31;i++)
+  {
+    carry = r->v[i] >> 8;
+    r->v[i+1] += carry;
+    r->v[i] &= 0xff;
+  }
+  reduce_add_sub(r);
+}
+
+void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y)
+{
+  int i,j,carry;
+  crypto_uint32 t[64];
+  for(i=0;i<64;i++)t[i] = 0;
+
+  for(i=0;i<32;i++)
+    for(j=0;j<32;j++)
+      t[i+j] += x->v[i] * y->v[j];
+
+  /* Reduce coefficients */
+  for(i=0;i<63;i++)
+  {
+    carry = t[i] >> 8;
+    t[i+1] += carry;
+    t[i] &= 0xff;
+  }
+
+  barrett_reduce(r, t);
+}
+
+void sc25519_square(sc25519 *r, const sc25519 *x)
+{
+  sc25519_mul(r, x, x);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.h b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.h
new file mode 100644
index 00000000..48584a85
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sc25519.h
@@ -0,0 +1,51 @@
+#ifndef SC25519_H
+#define SC25519_H
+
+#define sc25519 crypto_sign_edwards25519sha512batch_sc25519
+#define sc25519_from32bytes crypto_sign_edwards25519sha512batch_sc25519_from32bytes
+#define sc25519_from64bytes crypto_sign_edwards25519sha512batch_sc25519_from64bytes
+#define sc25519_to32bytes crypto_sign_edwards25519sha512batch_sc25519_to32bytes
+#define sc25519_pack crypto_sign_edwards25519sha512batch_sc25519_pack
+#define sc25519_getparity crypto_sign_edwards25519sha512batch_sc25519_getparity
+#define sc25519_setone crypto_sign_edwards25519sha512batch_sc25519_setone
+#define sc25519_setzero crypto_sign_edwards25519sha512batch_sc25519_setzero
+#define sc25519_neg crypto_sign_edwards25519sha512batch_sc25519_neg
+#define sc25519_add crypto_sign_edwards25519sha512batch_sc25519_add
+#define sc25519_sub crypto_sign_edwards25519sha512batch_sc25519_sub
+#define sc25519_mul crypto_sign_edwards25519sha512batch_sc25519_mul
+#define sc25519_square crypto_sign_edwards25519sha512batch_sc25519_square
+#define sc25519_invert crypto_sign_edwards25519sha512batch_sc25519_invert
+
+#include "crypto_uint32.h"
+
+typedef struct {
+  crypto_uint32 v[32]; 
+} sc25519;
+
+void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]);
+
+void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]);
+
+void sc25519_to32bytes(unsigned char r[32], const sc25519 *x);
+
+void sc25519_pack(unsigned char r[32], const sc25519 *x);
+
+unsigned char sc25519_getparity(const sc25519 *x);
+
+void sc25519_setone(sc25519 *r);
+
+void sc25519_setzero(sc25519 *r);
+
+void sc25519_neg(sc25519 *r, const sc25519 *x);
+
+void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_sub(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_square(sc25519 *r, const sc25519 *x);
+
+void sc25519_invert(sc25519 *r, const sc25519 *x);
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sign.c b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sign.c
new file mode 100644
index 00000000..f40e548b
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_sign_edwards25519sha512batch_ref/sign.c
@@ -0,0 +1,103 @@
+#include "api.h"
+#include "crypto_sign.h"
+#include "crypto_hash_sha512.h"
+#include "randombytes.h"
+#include "crypto_verify_32.h"
+
+#include "ge25519.h"
+
+int crypto_sign_keypair(
+    unsigned char *pk,
+    unsigned char *sk
+    )
+{
+  sc25519 scsk;
+  ge25519 gepk;
+
+  randombytes(sk, 32);
+  crypto_hash_sha512(sk, sk, 32);
+  sk[0] &= 248;
+  sk[31] &= 127;
+  sk[31] |= 64;
+
+  sc25519_from32bytes(&scsk,sk);
+  
+  ge25519_scalarmult_base(&gepk, &scsk);
+  ge25519_pack(pk, &gepk);
+  return 0;
+}
+
+int crypto_sign(
+    unsigned char *sm,unsigned long long *smlen,
+    const unsigned char *m,unsigned long long mlen,
+    const unsigned char *sk
+    )
+{
+  sc25519 sck, scs, scsk;
+  ge25519 ger;
+  unsigned char r[32];
+  unsigned char s[32];
+  unsigned long long i;
+  unsigned char hmg[crypto_hash_sha512_BYTES];
+  unsigned char hmr[crypto_hash_sha512_BYTES];
+
+  *smlen = mlen+64;
+  for(i=0;i<mlen;i++)
+    sm[32 + i] = m[i];
+  for(i=0;i<32;i++)
+    sm[i] = sk[32+i];
+  crypto_hash_sha512(hmg, sm, mlen+32); /* Generate k as h(m,sk[32],...,sk[63]) */
+
+  sc25519_from64bytes(&sck, hmg);
+  ge25519_scalarmult_base(&ger, &sck);
+  ge25519_pack(r, &ger);
+  
+  for(i=0;i<32;i++)
+    sm[i] = r[i];
+
+  crypto_hash_sha512(hmr, sm, mlen+32); /* Compute h(m,r) */
+  sc25519_from64bytes(&scs, hmr);
+  sc25519_mul(&scs, &scs, &sck);
+  
+  sc25519_from32bytes(&scsk, sk);
+  sc25519_add(&scs, &scs, &scsk);
+
+  sc25519_to32bytes(s,&scs); /* cat s */
+  for(i=0;i<32;i++)
+    sm[mlen+32+i] = s[i]; 
+
+  return 0;
+}
+
+int crypto_sign_open(
+    unsigned char *m,unsigned long long *mlen,
+    const unsigned char *sm,unsigned long long smlen,
+    const unsigned char *pk
+    )
+{
+  int i;
+  unsigned char t1[32], t2[32];
+  ge25519 get1, get2, gepk;
+  sc25519 schmr, scs;
+  unsigned char hmr[crypto_hash_sha512_BYTES];
+
+  if (ge25519_unpack_vartime(&get1, sm)) return -1;
+  if (ge25519_unpack_vartime(&gepk, pk)) return -1;
+
+  crypto_hash_sha512(hmr,sm,smlen-32);
+
+  sc25519_from64bytes(&schmr, hmr);
+  ge25519_scalarmult(&get1, &get1, &schmr);
+  ge25519_add(&get1, &get1, &gepk);
+  ge25519_pack(t1, &get1);
+
+  sc25519_from32bytes(&scs, &sm[smlen-32]);
+  ge25519_scalarmult_base(&get2, &scs);
+  ge25519_pack(t2, &get2);
+
+  for(i=0;i<smlen-64;i++)
+    m[i] = sm[i + 32];
+  *mlen = smlen-64;
+
+  return crypto_verify_32(t1, t2);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa20.h
new file mode 100644
index 00000000..fe270299
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20.h
@@ -0,0 +1,43 @@
+#ifndef crypto_stream_salsa20_H
+#define crypto_stream_salsa20_H
+
+#define crypto_stream_salsa20_ref_KEYBYTES 32
+#define crypto_stream_salsa20_ref_NONCEBYTES 8
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_stream_salsa20_ref(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_salsa20_ref_xor(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_stream_salsa20_ref(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa20_ref_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa20_ref_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa20_ref_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa20_ref_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_stream_salsa20 crypto_stream_salsa20_ref
+/* POTATO crypto_stream_salsa20_ref crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_xor crypto_stream_salsa20_ref_xor
+/* POTATO crypto_stream_salsa20_ref_xor crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_beforenm crypto_stream_salsa20_ref_beforenm
+/* POTATO crypto_stream_salsa20_ref_beforenm crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_afternm crypto_stream_salsa20_ref_afternm
+/* POTATO crypto_stream_salsa20_ref_afternm crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_xor_afternm crypto_stream_salsa20_ref_xor_afternm
+/* POTATO crypto_stream_salsa20_ref_xor_afternm crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_KEYBYTES crypto_stream_salsa20_ref_KEYBYTES
+/* POTATO crypto_stream_salsa20_ref_KEYBYTES crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_NONCEBYTES crypto_stream_salsa20_ref_NONCEBYTES
+/* POTATO crypto_stream_salsa20_ref_NONCEBYTES crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_BEFORENMBYTES crypto_stream_salsa20_ref_BEFORENMBYTES
+/* POTATO crypto_stream_salsa20_ref_BEFORENMBYTES crypto_stream_salsa20_ref crypto_stream_salsa20 */
+#define crypto_stream_salsa20_IMPLEMENTATION "crypto_stream/salsa20/ref"
+#ifndef crypto_stream_salsa20_ref_VERSION
+#define crypto_stream_salsa20_ref_VERSION "-"
+#endif
+#define crypto_stream_salsa20_VERSION crypto_stream_salsa20_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012.h
new file mode 100644
index 00000000..f8a20775
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012.h
@@ -0,0 +1,43 @@
+#ifndef crypto_stream_salsa2012_H
+#define crypto_stream_salsa2012_H
+
+#define crypto_stream_salsa2012_ref_KEYBYTES 32
+#define crypto_stream_salsa2012_ref_NONCEBYTES 8
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_stream_salsa2012_ref(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_salsa2012_ref_xor(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_stream_salsa2012_ref(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa2012_ref_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa2012_ref_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa2012_ref_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa2012_ref_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_stream_salsa2012 crypto_stream_salsa2012_ref
+/* POTATO crypto_stream_salsa2012_ref crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_xor crypto_stream_salsa2012_ref_xor
+/* POTATO crypto_stream_salsa2012_ref_xor crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_beforenm crypto_stream_salsa2012_ref_beforenm
+/* POTATO crypto_stream_salsa2012_ref_beforenm crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_afternm crypto_stream_salsa2012_ref_afternm
+/* POTATO crypto_stream_salsa2012_ref_afternm crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_xor_afternm crypto_stream_salsa2012_ref_xor_afternm
+/* POTATO crypto_stream_salsa2012_ref_xor_afternm crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_KEYBYTES crypto_stream_salsa2012_ref_KEYBYTES
+/* POTATO crypto_stream_salsa2012_ref_KEYBYTES crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_NONCEBYTES crypto_stream_salsa2012_ref_NONCEBYTES
+/* POTATO crypto_stream_salsa2012_ref_NONCEBYTES crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_BEFORENMBYTES crypto_stream_salsa2012_ref_BEFORENMBYTES
+/* POTATO crypto_stream_salsa2012_ref_BEFORENMBYTES crypto_stream_salsa2012_ref crypto_stream_salsa2012 */
+#define crypto_stream_salsa2012_IMPLEMENTATION "crypto_stream/salsa2012/ref"
+#ifndef crypto_stream_salsa2012_ref_VERSION
+#define crypto_stream_salsa2012_ref_VERSION "-"
+#endif
+#define crypto_stream_salsa2012_VERSION crypto_stream_salsa2012_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/api.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/crypto_stream.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/crypto_stream.h
new file mode 100644
index 00000000..57d3891f
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/crypto_stream.h
@@ -0,0 +1,26 @@
+#ifndef crypto_stream_H
+#define crypto_stream_H
+
+#include "crypto_stream_salsa2012.h"
+
+#define crypto_stream crypto_stream_salsa2012
+/* CHEESEBURGER crypto_stream_salsa2012 */
+#define crypto_stream_xor crypto_stream_salsa2012_xor
+/* CHEESEBURGER crypto_stream_salsa2012_xor */
+#define crypto_stream_beforenm crypto_stream_salsa2012_beforenm
+/* CHEESEBURGER crypto_stream_salsa2012_beforenm */
+#define crypto_stream_afternm crypto_stream_salsa2012_afternm
+/* CHEESEBURGER crypto_stream_salsa2012_afternm */
+#define crypto_stream_xor_afternm crypto_stream_salsa2012_xor_afternm
+/* CHEESEBURGER crypto_stream_salsa2012_xor_afternm */
+#define crypto_stream_KEYBYTES crypto_stream_salsa2012_KEYBYTES
+/* CHEESEBURGER crypto_stream_salsa2012_KEYBYTES */
+#define crypto_stream_NONCEBYTES crypto_stream_salsa2012_NONCEBYTES
+/* CHEESEBURGER crypto_stream_salsa2012_NONCEBYTES */
+#define crypto_stream_BEFORENMBYTES crypto_stream_salsa2012_BEFORENMBYTES
+/* CHEESEBURGER crypto_stream_salsa2012_BEFORENMBYTES */
+#define crypto_stream_PRIMITIVE "salsa2012"
+#define crypto_stream_IMPLEMENTATION crypto_stream_salsa2012_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_salsa2012_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/implementors b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/stream.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/stream.c
new file mode 100644
index 00000000..86053337
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa2012.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa2012(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/xor.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/xor.c
new file mode 100644
index 00000000..90206426
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa2012_ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa2012.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa208.h
new file mode 100644
index 00000000..962a4cba
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208.h
@@ -0,0 +1,43 @@
+#ifndef crypto_stream_salsa208_H
+#define crypto_stream_salsa208_H
+
+#define crypto_stream_salsa208_ref_KEYBYTES 32
+#define crypto_stream_salsa208_ref_NONCEBYTES 8
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_stream_salsa208_ref(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_salsa208_ref_xor(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_stream_salsa208_ref(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa208_ref_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa208_ref_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa208_ref_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_salsa208_ref_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_stream_salsa208 crypto_stream_salsa208_ref
+/* POTATO crypto_stream_salsa208_ref crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_xor crypto_stream_salsa208_ref_xor
+/* POTATO crypto_stream_salsa208_ref_xor crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_beforenm crypto_stream_salsa208_ref_beforenm
+/* POTATO crypto_stream_salsa208_ref_beforenm crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_afternm crypto_stream_salsa208_ref_afternm
+/* POTATO crypto_stream_salsa208_ref_afternm crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_xor_afternm crypto_stream_salsa208_ref_xor_afternm
+/* POTATO crypto_stream_salsa208_ref_xor_afternm crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_KEYBYTES crypto_stream_salsa208_ref_KEYBYTES
+/* POTATO crypto_stream_salsa208_ref_KEYBYTES crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_NONCEBYTES crypto_stream_salsa208_ref_NONCEBYTES
+/* POTATO crypto_stream_salsa208_ref_NONCEBYTES crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_BEFORENMBYTES crypto_stream_salsa208_ref_BEFORENMBYTES
+/* POTATO crypto_stream_salsa208_ref_BEFORENMBYTES crypto_stream_salsa208_ref crypto_stream_salsa208 */
+#define crypto_stream_salsa208_IMPLEMENTATION "crypto_stream/salsa208/ref"
+#ifndef crypto_stream_salsa208_ref_VERSION
+#define crypto_stream_salsa208_ref_VERSION "-"
+#endif
+#define crypto_stream_salsa208_VERSION crypto_stream_salsa208_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/api.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/crypto_stream.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/crypto_stream.h
new file mode 100644
index 00000000..b915d5b5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/crypto_stream.h
@@ -0,0 +1,26 @@
+#ifndef crypto_stream_H
+#define crypto_stream_H
+
+#include "crypto_stream_salsa208.h"
+
+#define crypto_stream crypto_stream_salsa208
+/* CHEESEBURGER crypto_stream_salsa208 */
+#define crypto_stream_xor crypto_stream_salsa208_xor
+/* CHEESEBURGER crypto_stream_salsa208_xor */
+#define crypto_stream_beforenm crypto_stream_salsa208_beforenm
+/* CHEESEBURGER crypto_stream_salsa208_beforenm */
+#define crypto_stream_afternm crypto_stream_salsa208_afternm
+/* CHEESEBURGER crypto_stream_salsa208_afternm */
+#define crypto_stream_xor_afternm crypto_stream_salsa208_xor_afternm
+/* CHEESEBURGER crypto_stream_salsa208_xor_afternm */
+#define crypto_stream_KEYBYTES crypto_stream_salsa208_KEYBYTES
+/* CHEESEBURGER crypto_stream_salsa208_KEYBYTES */
+#define crypto_stream_NONCEBYTES crypto_stream_salsa208_NONCEBYTES
+/* CHEESEBURGER crypto_stream_salsa208_NONCEBYTES */
+#define crypto_stream_BEFORENMBYTES crypto_stream_salsa208_BEFORENMBYTES
+/* CHEESEBURGER crypto_stream_salsa208_BEFORENMBYTES */
+#define crypto_stream_PRIMITIVE "salsa208"
+#define crypto_stream_IMPLEMENTATION crypto_stream_salsa208_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_salsa208_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/implementors b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/stream.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/stream.c
new file mode 100644
index 00000000..cdcfbc0e
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa208.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa208(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/xor.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/xor.c
new file mode 100644
index 00000000..c017ac42
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa208_ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa208.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/api.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/crypto_stream.h b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/crypto_stream.h
new file mode 100644
index 00000000..6f73e2f5
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/crypto_stream.h
@@ -0,0 +1,26 @@
+#ifndef crypto_stream_H
+#define crypto_stream_H
+
+#include "crypto_stream_salsa20.h"
+
+#define crypto_stream crypto_stream_salsa20
+/* CHEESEBURGER crypto_stream_salsa20 */
+#define crypto_stream_xor crypto_stream_salsa20_xor
+/* CHEESEBURGER crypto_stream_salsa20_xor */
+#define crypto_stream_beforenm crypto_stream_salsa20_beforenm
+/* CHEESEBURGER crypto_stream_salsa20_beforenm */
+#define crypto_stream_afternm crypto_stream_salsa20_afternm
+/* CHEESEBURGER crypto_stream_salsa20_afternm */
+#define crypto_stream_xor_afternm crypto_stream_salsa20_xor_afternm
+/* CHEESEBURGER crypto_stream_salsa20_xor_afternm */
+#define crypto_stream_KEYBYTES crypto_stream_salsa20_KEYBYTES
+/* CHEESEBURGER crypto_stream_salsa20_KEYBYTES */
+#define crypto_stream_NONCEBYTES crypto_stream_salsa20_NONCEBYTES
+/* CHEESEBURGER crypto_stream_salsa20_NONCEBYTES */
+#define crypto_stream_BEFORENMBYTES crypto_stream_salsa20_BEFORENMBYTES
+/* CHEESEBURGER crypto_stream_salsa20_BEFORENMBYTES */
+#define crypto_stream_PRIMITIVE "salsa20"
+#define crypto_stream_IMPLEMENTATION crypto_stream_salsa20_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_salsa20_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/implementors b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/stream.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/stream.c
new file mode 100644
index 00000000..2f0262eb
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa20.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa20(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/xor.c b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/xor.c
new file mode 100644
index 00000000..11c7e9f0
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_salsa20_ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa20.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20.h b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20.h
new file mode 100644
index 00000000..6b6b9036
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20.h
@@ -0,0 +1,43 @@
+#ifndef crypto_stream_xsalsa20_H
+#define crypto_stream_xsalsa20_H
+
+#define crypto_stream_xsalsa20_ref_KEYBYTES 32
+#define crypto_stream_xsalsa20_ref_NONCEBYTES 24
+#ifdef __cplusplus
+#include <string>
+extern std::string crypto_stream_xsalsa20_ref(size_t,const std::string &,const std::string &);
+extern std::string crypto_stream_xsalsa20_ref_xor(const std::string &,const std::string &,const std::string &);
+extern "C" {
+#endif
+extern int crypto_stream_xsalsa20_ref(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xsalsa20_ref_xor(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xsalsa20_ref_beforenm(unsigned char *,const unsigned char *);
+extern int crypto_stream_xsalsa20_ref_afternm(unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+extern int crypto_stream_xsalsa20_ref_xor_afternm(unsigned char *,const unsigned char *,unsigned long long,const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_stream_xsalsa20 crypto_stream_xsalsa20_ref
+/* POTATO crypto_stream_xsalsa20_ref crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_xor crypto_stream_xsalsa20_ref_xor
+/* POTATO crypto_stream_xsalsa20_ref_xor crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_beforenm crypto_stream_xsalsa20_ref_beforenm
+/* POTATO crypto_stream_xsalsa20_ref_beforenm crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_afternm crypto_stream_xsalsa20_ref_afternm
+/* POTATO crypto_stream_xsalsa20_ref_afternm crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_xor_afternm crypto_stream_xsalsa20_ref_xor_afternm
+/* POTATO crypto_stream_xsalsa20_ref_xor_afternm crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_KEYBYTES crypto_stream_xsalsa20_ref_KEYBYTES
+/* POTATO crypto_stream_xsalsa20_ref_KEYBYTES crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_NONCEBYTES crypto_stream_xsalsa20_ref_NONCEBYTES
+/* POTATO crypto_stream_xsalsa20_ref_NONCEBYTES crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_BEFORENMBYTES crypto_stream_xsalsa20_ref_BEFORENMBYTES
+/* POTATO crypto_stream_xsalsa20_ref_BEFORENMBYTES crypto_stream_xsalsa20_ref crypto_stream_xsalsa20 */
+#define crypto_stream_xsalsa20_IMPLEMENTATION "crypto_stream/xsalsa20/ref"
+#ifndef crypto_stream_xsalsa20_ref_VERSION
+#define crypto_stream_xsalsa20_ref_VERSION "-"
+#endif
+#define crypto_stream_xsalsa20_VERSION crypto_stream_xsalsa20_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/api.h b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/api.h
new file mode 100644
index 00000000..6910a7dc
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 24
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/crypto_stream.h b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/crypto_stream.h
new file mode 100644
index 00000000..96882f50
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/crypto_stream.h
@@ -0,0 +1,26 @@
+#ifndef crypto_stream_H
+#define crypto_stream_H
+
+#include "crypto_stream_xsalsa20.h"
+
+#define crypto_stream crypto_stream_xsalsa20
+/* CHEESEBURGER crypto_stream_xsalsa20 */
+#define crypto_stream_xor crypto_stream_xsalsa20_xor
+/* CHEESEBURGER crypto_stream_xsalsa20_xor */
+#define crypto_stream_beforenm crypto_stream_xsalsa20_beforenm
+/* CHEESEBURGER crypto_stream_xsalsa20_beforenm */
+#define crypto_stream_afternm crypto_stream_xsalsa20_afternm
+/* CHEESEBURGER crypto_stream_xsalsa20_afternm */
+#define crypto_stream_xor_afternm crypto_stream_xsalsa20_xor_afternm
+/* CHEESEBURGER crypto_stream_xsalsa20_xor_afternm */
+#define crypto_stream_KEYBYTES crypto_stream_xsalsa20_KEYBYTES
+/* CHEESEBURGER crypto_stream_xsalsa20_KEYBYTES */
+#define crypto_stream_NONCEBYTES crypto_stream_xsalsa20_NONCEBYTES
+/* CHEESEBURGER crypto_stream_xsalsa20_NONCEBYTES */
+#define crypto_stream_BEFORENMBYTES crypto_stream_xsalsa20_BEFORENMBYTES
+/* CHEESEBURGER crypto_stream_xsalsa20_BEFORENMBYTES */
+#define crypto_stream_PRIMITIVE "xsalsa20"
+#define crypto_stream_IMPLEMENTATION crypto_stream_xsalsa20_IMPLEMENTATION
+#define crypto_stream_VERSION crypto_stream_xsalsa20_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/implementors b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/stream.c b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/stream.c
new file mode 100644
index 00000000..2d710709
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/stream.c
@@ -0,0 +1,22 @@
+/*
+version 20080914
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+#include "crypto_stream.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char subkey[32];
+  crypto_core_hsalsa20(subkey,n,k,sigma);
+  return crypto_stream_salsa20(c,clen,n + 16,subkey);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/xor.c b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/xor.c
new file mode 100644
index 00000000..13f3134a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_stream_xsalsa20_ref/xor.c
@@ -0,0 +1,23 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+#include "crypto_stream.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char subkey[32];
+  crypto_core_hsalsa20(subkey,n,k,sigma);
+  return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey);
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_uint32.h b/nacl/nacl-20110221/build_android/crypto_uint32.h
new file mode 100644
index 00000000..2d473842
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_uint32.h
@@ -0,0 +1,6 @@
+#ifndef CRYPTO_UINT32
+#define CRYPTO_UINT32
+
+typedef unsigned int crypto_uint32;
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_16.h b/nacl/nacl-20110221/build_android/crypto_verify_16.h
new file mode 100644
index 00000000..5008cd8b
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_16.h
@@ -0,0 +1,24 @@
+#ifndef crypto_verify_16_H
+#define crypto_verify_16_H
+
+#define crypto_verify_16_ref_BYTES 16
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_verify_16_ref(const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_verify_16 crypto_verify_16_ref
+/* POTATO crypto_verify_16_ref crypto_verify_16_ref crypto_verify_16 */
+#define crypto_verify_16_BYTES crypto_verify_16_ref_BYTES
+/* POTATO crypto_verify_16_ref_BYTES crypto_verify_16_ref crypto_verify_16 */
+#define crypto_verify_16_IMPLEMENTATION "crypto_verify/16/ref"
+#ifndef crypto_verify_16_ref_VERSION
+#define crypto_verify_16_ref_VERSION "-"
+#endif
+#define crypto_verify_16_VERSION crypto_verify_16_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_16_ref/api.h b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/api.h
new file mode 100644
index 00000000..32be2f97
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 16
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_16_ref/crypto_verify.h b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/crypto_verify.h
new file mode 100644
index 00000000..d4800f7a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/crypto_verify.h
@@ -0,0 +1,14 @@
+#ifndef crypto_verify_H
+#define crypto_verify_H
+
+#include "crypto_verify_16.h"
+
+#define crypto_verify crypto_verify_16
+/* CHEESEBURGER crypto_verify_16 */
+#define crypto_verify_BYTES crypto_verify_16_BYTES
+/* CHEESEBURGER crypto_verify_16_BYTES */
+#define crypto_verify_PRIMITIVE "16"
+#define crypto_verify_IMPLEMENTATION crypto_verify_16_IMPLEMENTATION
+#define crypto_verify_VERSION crypto_verify_16_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_16_ref/verify.c b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/verify.c
new file mode 100644
index 00000000..d356060c
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_16_ref/verify.c
@@ -0,0 +1,24 @@
+#include "crypto_verify.h"
+
+int crypto_verify(const unsigned char *x,const unsigned char *y)
+{
+  unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+  F(0)
+  F(1)
+  F(2)
+  F(3)
+  F(4)
+  F(5)
+  F(6)
+  F(7)
+  F(8)
+  F(9)
+  F(10)
+  F(11)
+  F(12)
+  F(13)
+  F(14)
+  F(15)
+  return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_32.h b/nacl/nacl-20110221/build_android/crypto_verify_32.h
new file mode 100644
index 00000000..d48695d8
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_32.h
@@ -0,0 +1,24 @@
+#ifndef crypto_verify_32_H
+#define crypto_verify_32_H
+
+#define crypto_verify_32_ref_BYTES 32
+#ifdef __cplusplus
+#include <string>
+extern "C" {
+#endif
+extern int crypto_verify_32_ref(const unsigned char *,const unsigned char *);
+#ifdef __cplusplus
+}
+#endif
+
+#define crypto_verify_32 crypto_verify_32_ref
+/* POTATO crypto_verify_32_ref crypto_verify_32_ref crypto_verify_32 */
+#define crypto_verify_32_BYTES crypto_verify_32_ref_BYTES
+/* POTATO crypto_verify_32_ref_BYTES crypto_verify_32_ref crypto_verify_32 */
+#define crypto_verify_32_IMPLEMENTATION "crypto_verify/32/ref"
+#ifndef crypto_verify_32_ref_VERSION
+#define crypto_verify_32_ref_VERSION "-"
+#endif
+#define crypto_verify_32_VERSION crypto_verify_32_ref_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_32_ref/api.h b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/api.h
new file mode 100644
index 00000000..ae8c7f6a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 32
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_32_ref/crypto_verify.h b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/crypto_verify.h
new file mode 100644
index 00000000..7029edb3
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/crypto_verify.h
@@ -0,0 +1,14 @@
+#ifndef crypto_verify_H
+#define crypto_verify_H
+
+#include "crypto_verify_32.h"
+
+#define crypto_verify crypto_verify_32
+/* CHEESEBURGER crypto_verify_32 */
+#define crypto_verify_BYTES crypto_verify_32_BYTES
+/* CHEESEBURGER crypto_verify_32_BYTES */
+#define crypto_verify_PRIMITIVE "32"
+#define crypto_verify_IMPLEMENTATION crypto_verify_32_IMPLEMENTATION
+#define crypto_verify_VERSION crypto_verify_32_VERSION
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/crypto_verify_32_ref/verify.c b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/verify.c
new file mode 100644
index 00000000..a0e23afe
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/crypto_verify_32_ref/verify.c
@@ -0,0 +1,40 @@
+#include "crypto_verify.h"
+
+int crypto_verify(const unsigned char *x,const unsigned char *y)
+{
+  unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+  F(0)
+  F(1)
+  F(2)
+  F(3)
+  F(4)
+  F(5)
+  F(6)
+  F(7)
+  F(8)
+  F(9)
+  F(10)
+  F(11)
+  F(12)
+  F(13)
+  F(14)
+  F(15)
+  F(16)
+  F(17)
+  F(18)
+  F(19)
+  F(20)
+  F(21)
+  F(22)
+  F(23)
+  F(24)
+  F(25)
+  F(26)
+  F(27)
+  F(28)
+  F(29)
+  F(30)
+  F(31)
+  return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/nacl/nacl-20110221/build_android/nacl.h b/nacl/nacl-20110221/build_android/nacl.h
new file mode 100644
index 00000000..f0ad2892
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/nacl.h
@@ -0,0 +1,69 @@
+#include <api.h>
+#include <crypto_auth_hmacsha256.h>
+#include <crypto_auth_hmacsha256_ref/api.h>
+#include <crypto_auth_hmacsha256_ref/crypto_auth.h>
+#include <crypto_auth_hmacsha512256.h>
+#include <crypto_auth_hmacsha512256_ref/api.h>
+#include <crypto_auth_hmacsha512256_ref/crypto_auth.h>
+#include <crypto_box_curve25519xsalsa20poly1305.h>
+#include <crypto_box_curve25519xsalsa20poly1305_ref/api.h>
+#include <crypto_box_curve25519xsalsa20poly1305_ref/crypto_box.h>
+#include <crypto_core_hsalsa20.h>
+#include <crypto_core_hsalsa20_ref/api.h>
+#include <crypto_core_hsalsa20_ref/crypto_core.h>
+#include <crypto_core_salsa20.h>
+#include <crypto_core_salsa2012.h>
+#include <crypto_core_salsa2012_ref/api.h>
+#include <crypto_core_salsa2012_ref/crypto_core.h>
+#include <crypto_core_salsa208.h>
+#include <crypto_core_salsa208_ref/api.h>
+#include <crypto_core_salsa208_ref/crypto_core.h>
+#include <crypto_core_salsa20_ref/api.h>
+#include <crypto_core_salsa20_ref/crypto_core.h>
+#include <crypto_hash_sha256.h>
+#include <crypto_hash_sha256_ref/api.h>
+#include <crypto_hash_sha256_ref/crypto_hash.h>
+#include <crypto_hash_sha512.h>
+#include <crypto_hash_sha512_ref/api.h>
+#include <crypto_hash_sha512_ref/crypto_hash.h>
+#include <crypto_hashblocks_sha256.h>
+#include <crypto_hashblocks_sha256_ref/api.h>
+#include <crypto_hashblocks_sha256_ref/crypto_hashblocks.h>
+#include <crypto_hashblocks_sha512.h>
+#include <crypto_hashblocks_sha512_ref/api.h>
+#include <crypto_hashblocks_sha512_ref/crypto_hashblocks.h>
+#include <crypto_onetimeauth_poly1305.h>
+#include <crypto_onetimeauth_poly1305_ref/api.h>
+#include <crypto_onetimeauth_poly1305_ref/crypto_onetimeauth.h>
+#include <crypto_scalarmult_curve25519.h>
+#include <crypto_scalarmult_curve25519_ref/api.h>
+#include <crypto_scalarmult_curve25519_ref/crypto_scalarmult.h>
+#include <crypto_secretbox_xsalsa20poly1305.h>
+#include <crypto_secretbox_xsalsa20poly1305_ref/api.h>
+#include <crypto_secretbox_xsalsa20poly1305_ref/crypto_secretbox.h>
+#include <crypto_sign_edwards25519sha512batch.h>
+#include <crypto_sign_edwards25519sha512batch_ref/api.h>
+#include <crypto_sign_edwards25519sha512batch_ref/crypto_sign.h>
+#include <crypto_sign_edwards25519sha512batch_ref/fe25519.h>
+#include <crypto_sign_edwards25519sha512batch_ref/ge25519.h>
+#include <crypto_sign_edwards25519sha512batch_ref/sc25519.h>
+#include <crypto_stream_salsa20.h>
+#include <crypto_stream_salsa2012.h>
+#include <crypto_stream_salsa2012_ref/api.h>
+#include <crypto_stream_salsa2012_ref/crypto_stream.h>
+#include <crypto_stream_salsa208.h>
+#include <crypto_stream_salsa208_ref/api.h>
+#include <crypto_stream_salsa208_ref/crypto_stream.h>
+#include <crypto_stream_salsa20_ref/api.h>
+#include <crypto_stream_salsa20_ref/crypto_stream.h>
+#include <crypto_stream_xsalsa20.h>
+#include <crypto_stream_xsalsa20_ref/api.h>
+#include <crypto_stream_xsalsa20_ref/crypto_stream.h>
+#include <crypto_uint32.h>
+#include <crypto_verify_16.h>
+#include <crypto_verify_16_ref/api.h>
+#include <crypto_verify_16_ref/crypto_verify.h>
+#include <crypto_verify_32.h>
+#include <crypto_verify_32_ref/api.h>
+#include <crypto_verify_32_ref/crypto_verify.h>
+#include <randombytes.h>
diff --git a/nacl/nacl-20110221/build_android/randombytes.h b/nacl/nacl-20110221/build_android/randombytes.h
new file mode 100644
index 00000000..2e0caf8a
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/randombytes.h
@@ -0,0 +1,24 @@
+/*
+randombytes/devurandom.h version 20080713
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef randombytes_devurandom_H
+#define randombytes_devurandom_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void randombytes(unsigned char *,unsigned long long);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef randombytes_implementation
+#define randombytes_implementation "devurandom"
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/build_android/sources.mk b/nacl/nacl-20110221/build_android/sources.mk
new file mode 100644
index 00000000..18d44fad
--- /dev/null
+++ b/nacl/nacl-20110221/build_android/sources.mk
@@ -0,0 +1,2 @@
+NACL_SOURCES := \
+${NACL_BASE}/crypto_auth_hmacsha256_ref/hmac.c ${NACL_BASE}/crypto_auth_hmacsha256_ref/verify.c ${NACL_BASE}/crypto_auth_hmacsha512256_ref/hmac.c ${NACL_BASE}/crypto_auth_hmacsha512256_ref/verify.c ${NACL_BASE}/crypto_box_curve25519xsalsa20poly1305_ref/after.c ${NACL_BASE}/crypto_box_curve25519xsalsa20poly1305_ref/before.c ${NACL_BASE}/crypto_box_curve25519xsalsa20poly1305_ref/box.c ${NACL_BASE}/crypto_box_curve25519xsalsa20poly1305_ref/keypair.c ${NACL_BASE}/crypto_core_hsalsa20_ref/core.c ${NACL_BASE}/crypto_core_salsa2012_ref/core.c ${NACL_BASE}/crypto_core_salsa208_ref/core.c ${NACL_BASE}/crypto_core_salsa20_ref/core.c ${NACL_BASE}/crypto_hash_sha256_ref/hash.c ${NACL_BASE}/crypto_hash_sha512_ref/hash.c ${NACL_BASE}/crypto_hashblocks_sha256_ref/blocks.c ${NACL_BASE}/crypto_hashblocks_sha512_ref/blocks.c ${NACL_BASE}/crypto_onetimeauth_poly1305_ref/auth.c ${NACL_BASE}/crypto_onetimeauth_poly1305_ref/verify.c ${NACL_BASE}/crypto_scalarmult_curve25519_ref/base.c ${NACL_BASE}/crypto_scalarmult_curve25519_ref/smult.c ${NACL_BASE}/crypto_secretbox_xsalsa20poly1305_ref/box.c ${NACL_BASE}/crypto_sign_edwards25519sha512batch_ref/fe25519.c ${NACL_BASE}/crypto_sign_edwards25519sha512batch_ref/ge25519.c ${NACL_BASE}/crypto_sign_edwards25519sha512batch_ref/sc25519.c ${NACL_BASE}/crypto_sign_edwards25519sha512batch_ref/sign.c ${NACL_BASE}/crypto_stream_salsa2012_ref/stream.c ${NACL_BASE}/crypto_stream_salsa2012_ref/xor.c ${NACL_BASE}/crypto_stream_salsa208_ref/stream.c ${NACL_BASE}/crypto_stream_salsa208_ref/xor.c ${NACL_BASE}/crypto_stream_salsa20_ref/stream.c ${NACL_BASE}/crypto_stream_salsa20_ref/xor.c ${NACL_BASE}/crypto_stream_xsalsa20_ref/stream.c ${NACL_BASE}/crypto_stream_xsalsa20_ref/xor.c ${NACL_BASE}/crypto_verify_16_ref/verify.c ${NACL_BASE}/crypto_verify_32_ref/verify.c
diff --git a/nacl/nacl-20110221/commandline/nacl-sha256.c b/nacl/nacl-20110221/commandline/nacl-sha256.c
new file mode 100644
index 00000000..8e0df453
--- /dev/null
+++ b/nacl/nacl-20110221/commandline/nacl-sha256.c
@@ -0,0 +1,64 @@
+/*
+commandline/nacl-sha256.c version 20080713
+D. J. Bernstein
+Public domain.
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "crypto_hash_sha256.h"
+
+unsigned char *input;
+unsigned long long inputalloc;
+unsigned long long inputlen;
+
+unsigned char h[crypto_hash_sha256_BYTES];
+
+void h_print(void)
+{
+  int i;
+  for (i = 0;i < crypto_hash_sha256_BYTES;++i) printf("%02x",255 & (int) h[i]);
+  printf("\n");
+}
+
+int main()
+{
+  struct stat st;
+  int ch;
+
+  if (fstat(0,&st) == 0) {
+    input = mmap(0,st.st_size,PROT_READ,MAP_SHARED,0,0);
+    if (input != MAP_FAILED) {
+      crypto_hash_sha256(h,input,st.st_size);
+      h_print();
+      return 0;
+    }
+  }
+
+  input = 0;
+  inputalloc = 0;
+  inputlen = 0;
+
+  while ((ch = getchar()) != EOF) {
+    if (inputlen >= inputalloc) {
+      void *newinput;
+      while (inputlen >= inputalloc)
+        inputalloc = inputalloc * 2 + 1;
+      if (posix_memalign(&newinput,16,inputalloc) != 0) return 111;
+      memcpy(newinput,input,inputlen);
+      free(input);
+      input = newinput;
+    }
+    input[inputlen++] = ch;
+  }
+
+  crypto_hash_sha256(h,input,inputlen);
+  h_print();
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/commandline/nacl-sha512.c b/nacl/nacl-20110221/commandline/nacl-sha512.c
new file mode 100644
index 00000000..6864c76a
--- /dev/null
+++ b/nacl/nacl-20110221/commandline/nacl-sha512.c
@@ -0,0 +1,64 @@
+/*
+commandline/nacl-sha512.c version 20080713
+D. J. Bernstein
+Public domain.
+*/
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "crypto_hash_sha512.h"
+
+unsigned char *input;
+unsigned long long inputalloc;
+unsigned long long inputlen;
+
+unsigned char h[crypto_hash_sha512_BYTES];
+
+void h_print(void)
+{
+  int i;
+  for (i = 0;i < crypto_hash_sha512_BYTES;++i) printf("%02x",255 & (int) h[i]);
+  printf("\n");
+}
+
+int main()
+{
+  struct stat st;
+  int ch;
+
+  if (fstat(0,&st) == 0) {
+    input = mmap(0,st.st_size,PROT_READ,MAP_SHARED,0,0);
+    if (input != MAP_FAILED) {
+      crypto_hash_sha512(h,input,st.st_size);
+      h_print();
+      return 0;
+    }
+  }
+
+  input = 0;
+  inputalloc = 0;
+  inputlen = 0;
+
+  while ((ch = getchar()) != EOF) {
+    if (inputlen >= inputalloc) {
+      void *newinput;
+      while (inputlen >= inputalloc)
+        inputalloc = inputalloc * 2 + 1;
+      if (posix_memalign(&newinput,16,inputalloc) != 0) return 111;
+      memcpy(newinput,input,inputlen);
+      free(input);
+      input = newinput;
+    }
+    input[inputlen++] = ch;
+  }
+
+  crypto_hash_sha512(h,input,inputlen);
+  h_print();
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpucycles/alpha.c b/nacl/nacl-20110221/cpucycles/alpha.c
new file mode 100644
index 00000000..ef497999
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/alpha.c
@@ -0,0 +1,80 @@
+/*
+cpucycles/alpha.c version 20060316
+D. J. Bernstein
+Public domain.
+*/
+
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+static long long tod(void)
+{
+  struct timeval t;
+  gettimeofday(&t,(struct timezone *) 0);
+  return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+static long long rpcc(void)
+{
+  unsigned long long t;
+  asm volatile("rpcc %0" : "=r"(t));
+  return t & 0xffffffff;
+}
+
+static long long firstrpcc;
+static long long firsttod;
+static long long lastrpcc;
+static long long lasttod;
+static double mhz = 0;
+
+static void init(void)
+{
+  firstrpcc = rpcc();
+  firsttod = tod();
+
+  do {
+    lastrpcc = rpcc();
+    lasttod = tod();
+  } while (lasttod - firsttod < 10000);
+
+  lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff;
+  lasttod -= firsttod;
+
+  mhz = (double) lastrpcc / (double) lasttod;
+}
+
+long long cpucycles_alpha(void)
+{
+  double x;
+  long long y;
+
+  if (!mhz) init();
+
+  lastrpcc = rpcc();
+  lasttod = tod();
+
+  lastrpcc -= firstrpcc; lastrpcc &= 0xffffffff;
+  lasttod -= firsttod;
+
+  /* Number of cycles since firstrpcc is lastrpcc + 2^32 y for unknown y. */
+  /* Number of microseconds since firsttod is lasttod. */
+
+  x = (lasttod * mhz - lastrpcc) * 0.00000000023283064365386962890625;
+  y = x;
+  while (x > y + 0.5) y += 1;
+  while (x < y - 0.5) y -= 1;
+
+  y *= 4294967296ULL;
+  lastrpcc += y;
+
+  mhz = (double) lastrpcc / (double) lasttod;
+
+  return firstrpcc + lastrpcc;
+}
+
+long long cpucycles_alpha_persecond(void)
+{
+  if (!mhz) init();
+  return 1000000.0 * mhz;
+}
diff --git a/nacl/nacl-20110221/cpucycles/alpha.h b/nacl/nacl-20110221/cpucycles/alpha.h
new file mode 100644
index 00000000..c97672af
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/alpha.h
@@ -0,0 +1,27 @@
+/*
+cpucycles alpha.h version 20060318
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_alpha_h
+#define CPUCYCLES_alpha_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_alpha(void);
+extern long long cpucycles_alpha_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "alpha"
+#define cpucycles cpucycles_alpha
+#define cpucycles_persecond cpucycles_alpha_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/amd64cpuinfo.c b/nacl/nacl-20110221/cpucycles/amd64cpuinfo.c
new file mode 100644
index 00000000..729f2612
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64cpuinfo.c
@@ -0,0 +1,16 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+long long cpucycles_amd64cpuinfo(void)
+{
+  unsigned long long result;
+  asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
+    : "=a" (result) ::  "%rdx");
+  return result;
+}
+
+long long cpucycles_amd64cpuinfo_persecond(void)
+{
+  return osfreq();
+}
diff --git a/nacl/nacl-20110221/cpucycles/amd64cpuinfo.h b/nacl/nacl-20110221/cpucycles/amd64cpuinfo.h
new file mode 100644
index 00000000..8f858ae7
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64cpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles amd64cpuinfo.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_amd64cpuinfo_h
+#define CPUCYCLES_amd64cpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_amd64cpuinfo(void);
+extern long long cpucycles_amd64cpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "amd64cpuinfo"
+#define cpucycles cpucycles_amd64cpuinfo
+#define cpucycles_persecond cpucycles_amd64cpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/amd64cpuspeed.c b/nacl/nacl-20110221/cpucycles/amd64cpuspeed.c
new file mode 100644
index 00000000..7e89511c
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64cpuspeed.c
@@ -0,0 +1,25 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+
+long long cpucycles_amd64cpuspeed(void)
+{
+  unsigned long long result;
+  asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
+    : "=a" (result) ::  "%rdx");
+  return result;
+}
+
+long long cpucycles_amd64cpuspeed_persecond(void)
+{
+  int oid[2];
+  int val;
+  size_t size;
+  oid[0] = CTL_HW;
+  oid[1] = HW_CPUSPEED;
+  size = sizeof val;
+  if (sysctl(oid,2,&val,&size,0,0) == -1) return 0;
+  if (size != sizeof val) return 0;
+  return val * 1000000LL;
+}
diff --git a/nacl/nacl-20110221/cpucycles/amd64cpuspeed.h b/nacl/nacl-20110221/cpucycles/amd64cpuspeed.h
new file mode 100644
index 00000000..1f6ed54d
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64cpuspeed.h
@@ -0,0 +1,27 @@
+/*
+cpucycles amd64cpuspeed.h version 20090716
+Matthew Dempsky
+Public domain.
+*/
+
+#ifndef CPUCYCLES_amd64cpuspeed_h
+#define CPUCYCLES_amd64cpuspeed_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_amd64cpuspeed(void);
+extern long long cpucycles_amd64cpuspeed_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "amd64cpuspeed"
+#define cpucycles cpucycles_amd64cpuspeed
+#define cpucycles_persecond cpucycles_amd64cpuspeed_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/amd64tscfreq.c b/nacl/nacl-20110221/cpucycles/amd64tscfreq.c
new file mode 100644
index 00000000..ef182c1b
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64tscfreq.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <sys/types.h>
+
+long long cpucycles_amd64tscfreq(void)
+{
+  unsigned long long result;
+  asm volatile(".byte 15;.byte 49;shlq $32,%%rdx;orq %%rdx,%%rax"
+    : "=a" (result) ::  "%rdx");
+  return result;
+}
+
+long long cpucycles_amd64tscfreq_persecond(void)
+{
+  long result = 0;
+  size_t resultlen = sizeof(long);
+  sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0);
+  return result;
+}
diff --git a/nacl/nacl-20110221/cpucycles/amd64tscfreq.h b/nacl/nacl-20110221/cpucycles/amd64tscfreq.h
new file mode 100644
index 00000000..a3c7aa6f
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/amd64tscfreq.h
@@ -0,0 +1,27 @@
+/*
+cpucycles amd64tscfreq.h version 20060318
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_amd64tscfreq_h
+#define CPUCYCLES_amd64tscfreq_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_amd64tscfreq(void);
+extern long long cpucycles_amd64tscfreq_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "amd64tscfreq"
+#define cpucycles cpucycles_amd64tscfreq
+#define cpucycles_persecond cpucycles_amd64tscfreq_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/celllinux.c b/nacl/nacl-20110221/cpucycles/celllinux.c
new file mode 100644
index 00000000..83a0c38a
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/celllinux.c
@@ -0,0 +1,83 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <spu_mfcio.h>
+
+static long myround(double u)
+{
+  long result = u;
+  while (result + 0.5 < u) result += 1;
+  while (result - 0.5 > u) result -= 1;
+  return result;
+}
+
+static long long microseconds(void)
+{
+  struct timeval t;
+  gettimeofday(&t,(struct timezone *) 0);
+  return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+static long long timebase(void)
+{
+  unsigned long long result;
+  result = -spu_read_decrementer();
+  return 0xffffffff & result;
+}
+
+static double cpufrequency = 0;
+static long tbcycles = 0;
+
+static double guesstbcycles(void)
+{
+  long long tb0; long long us0;
+  long long tb1; long long us1;
+
+  tb0 = timebase();
+  us0 = microseconds();
+  do {
+    tb1 = timebase();
+    us1 = microseconds();
+  } while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
+  if (tb1 <= tb0) return 0;
+  tb1 -= tb0;
+  us1 -= us0;
+  return (cpufrequency * 0.000001 * (double) us1) / (double) tb1;
+}
+
+static void init(void)
+{
+  int loop;
+  double guess1;
+  double guess2;
+
+  spu_write_decrementer(0xffffffff);
+
+  cpufrequency = 3192000000.0;
+
+  for (loop = 0;loop < 100;++loop) {
+    guess1 = guesstbcycles();
+    guess2 = guesstbcycles();
+    tbcycles = myround(guess1);
+    if (guess1 - tbcycles > 0.1) continue;
+    if (tbcycles - guess1 > 0.1) continue;
+    if (guess2 - tbcycles > 0.1) continue;
+    if (tbcycles - guess2 > 0.1) continue;
+    return;
+  }
+  tbcycles = 0;
+}
+
+long long cpucycles_celllinux(void)
+{
+  if (!tbcycles) init();
+  return timebase() * tbcycles;
+}
+
+long long cpucycles_celllinux_persecond(void)
+{
+  if (!tbcycles) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/celllinux.h b/nacl/nacl-20110221/cpucycles/celllinux.h
new file mode 100644
index 00000000..75a5a3f2
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/celllinux.h
@@ -0,0 +1,27 @@
+/*
+cpucycles celllinux.h version 20081201
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_celllinux_h
+#define CPUCYCLES_celllinux_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_celllinux(void);
+extern long long cpucycles_celllinux_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "celllinux"
+#define cpucycles cpucycles_celllinux
+#define cpucycles_persecond cpucycles_celllinux_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/cortex.c b/nacl/nacl-20110221/cpucycles/cortex.c
new file mode 100644
index 00000000..07e2fa02
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/cortex.c
@@ -0,0 +1,73 @@
+/*
+cpucycles/cortex.c version 20101203
+D. J. Bernstein
+Public domain.
+*/
+
+#define SCALE 1
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+static int enabled = 0;
+
+static int prev[3];
+static unsigned long long prevcycles = 0;
+static int now[3];
+static long long cyclespersec = 0;
+
+static void readticks(unsigned int *result)
+{
+  struct timeval t;
+  unsigned int cc;
+  if (!enabled) {
+    asm volatile("mcr p15, 0, %0, c9, c12, 0" :: "r"(17));
+    asm volatile("mcr p15, 0, %0, c9, c12, 1" :: "r"(0x8000000f));
+    asm volatile("mcr p15, 0, %0, c9, c12, 3" :: "r"(0x8000000f));
+    enabled = 1;
+  }
+  asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(cc));
+  gettimeofday(&t,(struct timezone *) 0);
+  result[0] = cc;
+  result[1] = t.tv_usec;
+  result[2] = t.tv_sec;
+}
+
+long long cpucycles_cortex(void)
+{
+  unsigned long long delta4;
+  int deltan;
+  int deltas;
+  unsigned long long guesscycles;
+  
+  readticks(now);
+  delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */
+  deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */
+  deltas = now[2] - prev[2]; /* signed change in number of seconds */
+  if ((deltas == 0 && deltan < 200000) || (deltas == 1 && deltan < -800000))
+    return (prevcycles + delta4) * SCALE;
+
+  prev[0] = now[0];
+  prev[1] = now[1];
+  prev[2] = now[2];
+
+  if ((deltas == 0 && deltan < 300000) || (deltas == 1 && deltan < -700000)) {
+    // actual number of cycles cannot have increased by 2^32 in <0.3ms
+    cyclespersec = 1000000 * (unsigned long long) delta4;
+    cyclespersec /= deltan + 1000000 * (long long) deltas;
+  } else {
+    guesscycles = deltas * cyclespersec;
+    guesscycles += (deltan * cyclespersec) / 1000000;
+    while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL;
+    /* XXX: could do longer-term extrapolation here */
+  }
+
+  prevcycles += delta4;
+  return prevcycles * SCALE;
+}
+
+long long cpucycles_cortex_persecond(void)
+{
+  while (!cyclespersec) cpucycles_cortex();
+  return cyclespersec * SCALE;
+}
diff --git a/nacl/nacl-20110221/cpucycles/cortex.h b/nacl/nacl-20110221/cpucycles/cortex.h
new file mode 100644
index 00000000..e622f132
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/cortex.h
@@ -0,0 +1,27 @@
+/*
+cpucycles cortex.h version 20100912
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_cortex_h
+#define CPUCYCLES_cortex_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_cortex(void);
+extern long long cpucycles_cortex_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "cortex"
+#define cpucycles cpucycles_cortex
+#define cpucycles_persecond cpucycles_cortex_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/dev4ns.c b/nacl/nacl-20110221/cpucycles/dev4ns.c
new file mode 100644
index 00000000..73ff5755
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/dev4ns.c
@@ -0,0 +1,62 @@
+#include <sys/types.h>
+#include <fcntl.h>
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+static int fddev = -1;
+static int prev[3];
+static unsigned long long prevcycles = 0;
+static int now[3];
+static long long cyclespersec = 0;
+
+static void readdev(unsigned int *result)
+{
+  if (read(fddev,result,12) == 12) return;
+  result[0] = result[1] = result[2] = 0;
+}
+
+long long cpucycles_dev4ns(void)
+{
+  unsigned long long delta4;
+  int deltan;
+  int deltas;
+  unsigned long long guesscycles;
+
+  if (fddev == -1) {
+    fddev = open("/dev/cpucycles4ns",O_RDONLY);
+    readdev(prev);
+  }
+  
+  readdev(now);
+  delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */
+  deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */
+  deltas = now[2] - prev[2]; /* signed change in number of seconds */
+  if ((deltas == 0 && deltan < 200000000) || (deltas == 1 && deltan < -800000000))
+    return prevcycles + delta4;
+
+  prev[0] = now[0];
+  prev[1] = now[1];
+  prev[2] = now[2];
+
+  if ((deltas == 0 && deltan < 300000000) || (deltas == 1 && deltan < -700000000)) {
+    // actual number of cycles cannot have increased by 2^32 in <0.3ms
+    cyclespersec = 1000000000 * (unsigned long long) delta4;
+    cyclespersec /= deltan + 1000000000 * (long long) deltas;
+  } else {
+    guesscycles = deltas * cyclespersec;
+    guesscycles += (deltan * cyclespersec) / 1000000000;
+    while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL;
+    /* XXX: could do longer-term extrapolation here */
+  }
+
+  prevcycles += delta4;
+  return prevcycles;
+}
+
+long long cpucycles_dev4ns_persecond(void)
+{
+  while (!cyclespersec) cpucycles_dev4ns();
+  return cyclespersec;
+}
diff --git a/nacl/nacl-20110221/cpucycles/dev4ns.h b/nacl/nacl-20110221/cpucycles/dev4ns.h
new file mode 100644
index 00000000..1d99639a
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/dev4ns.h
@@ -0,0 +1,27 @@
+/*
+cpucycles dev4ns.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_dev4ns_h
+#define CPUCYCLES_dev4ns_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_dev4ns(void);
+extern long long cpucycles_dev4ns_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "dev4ns"
+#define cpucycles cpucycles_dev4ns
+#define cpucycles_persecond cpucycles_dev4ns_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/do b/nacl/nacl-20110221/cpucycles/do
new file mode 100755
index 00000000..efc063de
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/do
@@ -0,0 +1,105 @@
+#!/bin/sh -e
+
+okabi | (
+  while read abi
+  do
+
+    rm -f cpucycles.o cpucycles.h
+    
+    (
+      case "$abi" in
+        ppc*)
+          echo powerpccpuinfo
+          echo powerpcmacos
+	  ;;
+        amd64*)
+          echo amd64tscfreq
+          echo amd64cpuinfo
+          echo amd64cpuspeed
+	  ;;
+        x86*)
+          echo x86tscfreq
+          echo x86cpuinfo
+          echo x86cpuspeed
+          echo x86estimate
+	  ;;
+	cell*)
+          echo celllinux
+	  ;;
+	sparc*)
+          echo sparccpuinfo
+          echo sparc32cpuinfo
+	  ;;
+	mips*)
+          echo mips
+	  ;;
+	hppa*)
+          echo hppapstat
+	  ;;
+	alpha*)
+          echo alpha
+	  ;;
+	sgi*)
+          echo sgi
+	  ;;
+	arm*)
+	  echo cortex
+          echo dev4ns
+	  ;;
+      esac
+
+      echo amd64tscfreq
+      echo amd64cpuinfo
+      echo amd64cpuspeed
+      echo x86tscfreq
+      echo x86cpuinfo
+      echo x86cpuspeed
+      echo x86estimate
+      echo ia64cpuinfo
+      echo powerpccpuinfo
+      echo powerpcmacos
+      echo celllinux
+      echo sparccpuinfo
+      echo sparc32cpuinfo
+      echo mips
+      echo hppapstat
+      echo alpha
+      echo sgi
+      echo cortex
+      echo dev4ns
+      echo monotoniccpuinfo
+      echo monotonic
+      echo gettimeofday
+    ) | (
+      while read n
+      do
+        okc-$abi | (
+          while read c
+          do
+            echo "=== `date` === Trying $n.c with $c..." >&2
+            rm -f test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c
+            cp $n.c cpucycles-impl.c || continue
+            cp $n.h cpucycles-impl.h || continue
+            $c -c cpucycles-impl.c || continue
+            $c -o test test.c cpucycles-impl.o || continue
+            ./test || continue
+            echo "=== `date` === Success. Using $n.c." >&2
+            mkdir -p lib/$abi
+            mv cpucycles-impl.o lib/$abi/cpucycles.o
+            mkdir -p include/$abi
+            mv cpucycles-impl.h include/$abi/cpucycles.h
+            exit 0
+          done
+          exit 111
+        ) && exit 0
+      done
+      exit 111
+    ) || (
+      echo ===== Giving up. >&2
+      rm -f test cpucycles-impl.o cpucycles-impl.h cpucycles-impl.c
+      exit 111
+    ) || exit 0
+
+  done
+  exit 0
+) || exit 111
diff --git a/nacl/nacl-20110221/cpucycles/gettimeofday.c b/nacl/nacl-20110221/cpucycles/gettimeofday.c
new file mode 100644
index 00000000..0bf5e03c
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/gettimeofday.c
@@ -0,0 +1,32 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+static double cpufrequency = 0;
+
+static void init(void)
+{
+  cpufrequency = osfreq();
+}
+
+long long cpucycles_gettimeofday(void)
+{
+  double result;
+  struct timeval t;
+  if (!cpufrequency) init();
+  gettimeofday(&t,(struct timezone *) 0);
+  result = t.tv_usec;
+  result *= 0.000001;
+  result += (double) t.tv_sec;
+  result *= cpufrequency;
+  return result;
+}
+
+long long cpucycles_gettimeofday_persecond(void)
+{
+  if (!cpufrequency) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/gettimeofday.h b/nacl/nacl-20110221/cpucycles/gettimeofday.h
new file mode 100644
index 00000000..147b127b
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/gettimeofday.h
@@ -0,0 +1,27 @@
+/*
+cpucycles gettimeofday.h version 20060318
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_gettimeofday_h
+#define CPUCYCLES_gettimeofday_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_gettimeofday(void);
+extern long long cpucycles_gettimeofday_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "gettimeofday"
+#define cpucycles cpucycles_gettimeofday
+#define cpucycles_persecond cpucycles_gettimeofday_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/hppapstat.c b/nacl/nacl-20110221/cpucycles/hppapstat.c
new file mode 100644
index 00000000..5ae1e843
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/hppapstat.c
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/pstat.h>
+#include <machine/inline.h>
+
+long long cpucycles_hppapstat(void)
+{
+  register long long result;
+  _MFCTL(16,result);
+  return result;
+}
+
+long long cpucycles_hppapstat_persecond(void)
+{
+  struct pst_processor pst;
+  union pstun pu;
+  double result;
+
+  pu.pst_processor = &pst;
+  if (pstat(PSTAT_PROCESSOR,pu,sizeof(pst),1,0) < 0) return 0;
+  result = pst.psp_iticksperclktick;
+  result *= (double) sysconf(_SC_CLK_TCK);
+  return result;
+}
diff --git a/nacl/nacl-20110221/cpucycles/hppapstat.h b/nacl/nacl-20110221/cpucycles/hppapstat.h
new file mode 100644
index 00000000..721814bb
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/hppapstat.h
@@ -0,0 +1,27 @@
+/*
+cpucycles hppapstat.h version 20060319
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_hppapstat_h
+#define CPUCYCLES_hppapstat_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_hppapstat(void);
+extern long long cpucycles_hppapstat_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "hppapstat"
+#define cpucycles cpucycles_hppapstat
+#define cpucycles_persecond cpucycles_hppapstat_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/ia64cpuinfo.c b/nacl/nacl-20110221/cpucycles/ia64cpuinfo.c
new file mode 100644
index 00000000..580c6cee
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/ia64cpuinfo.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+long long cpucycles_ia64cpuinfo(void)
+{
+  long long result;
+  asm volatile("mov %0=ar.itc" : "=r"(result));
+  return result;
+}
+
+long long cpucycles_ia64cpuinfo_persecond(void)
+{
+  return osfreq();
+}
diff --git a/nacl/nacl-20110221/cpucycles/ia64cpuinfo.h b/nacl/nacl-20110221/cpucycles/ia64cpuinfo.h
new file mode 100644
index 00000000..a6bcf47d
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/ia64cpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles ia64cpuinfo.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_ia64cpuinfo_h
+#define CPUCYCLES_ia64cpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_ia64cpuinfo(void);
+extern long long cpucycles_ia64cpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "ia64cpuinfo"
+#define cpucycles cpucycles_ia64cpuinfo
+#define cpucycles_persecond cpucycles_ia64cpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/mips.c b/nacl/nacl-20110221/cpucycles/mips.c
new file mode 100644
index 00000000..8b75f824
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/mips.c
@@ -0,0 +1,65 @@
+/*
+cpucycles/mips.c version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#define SCALE 2
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+static int prev[3];
+static unsigned long long prevcycles = 0;
+static int now[3];
+static long long cyclespersec = 0;
+
+static void readticks(unsigned int *result)
+{
+  struct timeval t;
+  unsigned int cc;
+  asm volatile(".byte 59; .byte 16; .byte 2; .byte 124; move %0,$2" : "=r"(cc) : : "$2");
+  gettimeofday(&t,(struct timezone *) 0);
+  result[0] = cc;
+  result[1] = t.tv_usec;
+  result[2] = t.tv_sec;
+}
+
+long long cpucycles_mips(void)
+{
+  unsigned long long delta4;
+  int deltan;
+  int deltas;
+  unsigned long long guesscycles;
+  
+  readticks(now);
+  delta4 = (unsigned int) (now[0] - prev[0]); /* unsigned change in number of cycles mod 2^32 */
+  deltan = now[1] - prev[1]; /* signed change in number of nanoseconds mod 10^9 */
+  deltas = now[2] - prev[2]; /* signed change in number of seconds */
+  if ((deltas == 0 && deltan < 200000) || (deltas == 1 && deltan < -800000))
+    return (prevcycles + delta4) * SCALE;
+
+  prev[0] = now[0];
+  prev[1] = now[1];
+  prev[2] = now[2];
+
+  if ((deltas == 0 && deltan < 300000) || (deltas == 1 && deltan < -700000)) {
+    // actual number of cycles cannot have increased by 2^32 in <0.3ms
+    cyclespersec = 1000000 * (unsigned long long) delta4;
+    cyclespersec /= deltan + 1000000 * (long long) deltas;
+  } else {
+    guesscycles = deltas * cyclespersec;
+    guesscycles += (deltan * cyclespersec) / 1000000;
+    while (delta4 + 2147483648ULL < guesscycles) delta4 += 4294967296ULL;
+    /* XXX: could do longer-term extrapolation here */
+  }
+
+  prevcycles += delta4;
+  return prevcycles * SCALE;
+}
+
+long long cpucycles_mips_persecond(void)
+{
+  while (!cyclespersec) cpucycles_mips();
+  return cyclespersec * SCALE;
+}
diff --git a/nacl/nacl-20110221/cpucycles/mips.h b/nacl/nacl-20110221/cpucycles/mips.h
new file mode 100644
index 00000000..6f1b26c3
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/mips.h
@@ -0,0 +1,27 @@
+/*
+cpucycles mips.h version 20100802
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_mips_h
+#define CPUCYCLES_mips_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_mips(void);
+extern long long cpucycles_mips_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "mips"
+#define cpucycles cpucycles_mips
+#define cpucycles_persecond cpucycles_mips_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/monotonic.c b/nacl/nacl-20110221/cpucycles/monotonic.c
new file mode 100644
index 00000000..412a44fb
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/monotonic.c
@@ -0,0 +1,34 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static double cpufrequency = 0;
+
+static void init(void)
+{
+  long result = 0; size_t resultlen = sizeof(long);
+  sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0);
+  cpufrequency = result;
+}
+
+long long cpucycles_monotonic(void)
+{
+  double result;
+  struct timespec t;
+  if (!cpufrequency) init();
+  clock_gettime(CLOCK_MONOTONIC,&t);
+  result = t.tv_nsec;
+  result *= 0.000000001;
+  result += (double) t.tv_sec;
+  result *= cpufrequency;
+  return result;
+}
+
+long long cpucycles_monotonic_persecond(void)
+{
+  if (!cpufrequency) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/monotonic.h b/nacl/nacl-20110221/cpucycles/monotonic.h
new file mode 100644
index 00000000..9070860b
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/monotonic.h
@@ -0,0 +1,27 @@
+/*
+cpucycles monotonic.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_monotonic_h
+#define CPUCYCLES_monotonic_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_monotonic(void);
+extern long long cpucycles_monotonic_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "monotonic"
+#define cpucycles cpucycles_monotonic
+#define cpucycles_persecond cpucycles_monotonic_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.c b/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.c
new file mode 100644
index 00000000..609c6305
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.c
@@ -0,0 +1,33 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include "osfreq.c"
+
+static double cpufrequency = 0;
+
+static void init(void)
+{
+  cpufrequency = osfreq();
+}
+
+long long cpucycles_monotoniccpuinfo(void)
+{
+  double result;
+  struct timespec t;
+  if (!cpufrequency) init();
+  clock_gettime(CLOCK_MONOTONIC,&t);
+  result = t.tv_nsec;
+  result *= 0.000000001;
+  result += (double) t.tv_sec;
+  result *= cpufrequency;
+  return result;
+}
+
+long long cpucycles_monotoniccpuinfo_persecond(void)
+{
+  if (!cpufrequency) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.h b/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.h
new file mode 100644
index 00000000..d4ba7ea8
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/monotoniccpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles monotoniccpuinfo.h version 20100804
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_monotoniccpuinfo_h
+#define CPUCYCLES_monotoniccpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_monotoniccpuinfo(void);
+extern long long cpucycles_monotoniccpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "monotoniccpuinfo"
+#define cpucycles cpucycles_monotoniccpuinfo
+#define cpucycles_persecond cpucycles_monotoniccpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/osfreq.c b/nacl/nacl-20110221/cpucycles/osfreq.c
new file mode 100644
index 00000000..4e106a23
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/osfreq.c
@@ -0,0 +1,65 @@
+static double osfreq(void)
+{
+  FILE *f;
+  double result;
+  int s;
+
+  f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq", "r");
+  if (f) {
+    s = fscanf(f,"%lf",&result);
+    fclose(f);
+    if (s > 0) return 1000.0 * result;
+  }
+
+  f = fopen("/sys/devices/system/cpu/cpu0/clock_tick", "r");
+  if (f) {
+    s = fscanf(f,"%lf",&result);
+    fclose(f);
+    if (s > 0) return result;
+  }
+
+  f = fopen("/proc/cpuinfo","r");
+  if (f) {
+    for (;;) {
+      s = fscanf(f,"cpu MHz : %lf",&result);
+      if (s > 0) break;
+      if (s == 0) s = fscanf(f,"%*[^\n]\n");
+      if (s < 0) { result = 0; break; }
+    }
+    fclose(f);
+    if (result) return 1000000.0 * result;
+  }
+
+  f = fopen("/proc/cpuinfo","r");
+  if (f) {
+    for (;;) {
+      s = fscanf(f,"clock : %lf",&result);
+      if (s > 0) break;
+      if (s == 0) s = fscanf(f,"%*[^\n]\n");
+      if (s < 0) { result = 0; break; }
+    }
+    fclose(f);
+    if (result) return 1000000.0 * result;
+  }
+
+  f = popen("/usr/sbin/lsattr -E -l proc0 -a frequency 2>/dev/null","r");
+  if (f) {
+    s = fscanf(f,"frequency %lf",&result);
+    pclose(f);
+    if (s > 0) return result;
+  }
+
+  f = popen("/usr/sbin/psrinfo -v 2>/dev/null","r");
+  if (f) {
+    for (;;) {
+      s = fscanf(f," The %*s processor operates at %lf MHz",&result);
+      if (s > 0) break;
+      if (s == 0) s = fscanf(f,"%*[^\n]\n");
+      if (s < 0) { result = 0; break; }
+    }
+    pclose(f);
+    if (result) return 1000000.0 * result;
+  }
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpucycles/powerpccpuinfo.c b/nacl/nacl-20110221/cpucycles/powerpccpuinfo.c
new file mode 100644
index 00000000..b70c745a
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/powerpccpuinfo.c
@@ -0,0 +1,95 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+static long myround(double u)
+{
+  long result = u;
+  while (result + 0.5 < u) result += 1;
+  while (result - 0.5 > u) result -= 1;
+  return result;
+}
+
+static long long microseconds(void)
+{
+  struct timeval t;
+  gettimeofday(&t,(struct timezone *) 0);
+  return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+static int tbshift = 0;
+
+static long long timebase(void)
+{
+  unsigned long high;
+  unsigned long low;
+  unsigned long newhigh;
+  unsigned long long result;
+  asm volatile(
+    "7:mftbu %0;mftb %1;mftbu %2;cmpw %0,%2;bne 7b"
+    : "=r" (high), "=r" (low), "=r" (newhigh)
+  );
+  result = high;
+  result <<= 32;
+  result |= low;
+  return result >> tbshift;
+}
+
+static double cpufrequency = 0;
+static long tbcycles = 0;
+
+static double guesstbcycles(void)
+{
+  long long tb0; long long us0;
+  long long tb1; long long us1;
+
+  tb0 = timebase();
+  us0 = microseconds();
+  do {
+    tb1 = timebase();
+    us1 = microseconds();
+  } while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
+  if (tb1 <= tb0) return 0;
+  tb1 -= tb0;
+  us1 -= us0;
+  return (cpufrequency * 0.000001 * (double) us1) / (double) tb1;
+}
+
+static void init(void)
+{
+  int loop;
+  double guess1;
+  double guess2;
+
+  cpufrequency = osfreq();
+  if (!cpufrequency) return;
+
+  for (tbshift = 0;tbshift < 10;++tbshift) {
+    for (loop = 0;loop < 100;++loop) {
+      guess1 = guesstbcycles();
+      guess2 = guesstbcycles();
+      tbcycles = myround(guess1);
+      if (guess1 - tbcycles > 0.1) continue;
+      if (tbcycles - guess1 > 0.1) continue;
+      if (guess2 - tbcycles > 0.1) continue;
+      if (tbcycles - guess2 > 0.1) continue;
+      return;
+    }
+  }
+  tbcycles = 0;
+}
+
+long long cpucycles_powerpccpuinfo(void)
+{
+  if (!tbcycles) init();
+  return timebase() * tbcycles;
+}
+
+long long cpucycles_powerpccpuinfo_persecond(void)
+{
+  if (!tbcycles) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/powerpccpuinfo.h b/nacl/nacl-20110221/cpucycles/powerpccpuinfo.h
new file mode 100644
index 00000000..c763a1b4
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/powerpccpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles powerpccpuinfo.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_powerpccpuinfo_h
+#define CPUCYCLES_powerpccpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_powerpccpuinfo(void);
+extern long long cpucycles_powerpccpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "powerpccpuinfo"
+#define cpucycles cpucycles_powerpccpuinfo
+#define cpucycles_persecond cpucycles_powerpccpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/powerpcmacos.c b/nacl/nacl-20110221/cpucycles/powerpcmacos.c
new file mode 100644
index 00000000..ab0be1ea
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/powerpcmacos.c
@@ -0,0 +1,42 @@
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#include <mach/mach_time.h>
+
+#define timebase mach_absolute_time
+
+static int cpumib[2] = { CTL_HW, HW_CPU_FREQ } ;
+static int tbmib[2] = { CTL_HW, HW_TB_FREQ } ;
+
+static long myround(double u)
+{
+  long result = u;
+  while (result + 0.5 < u) result += 1;
+  while (result - 0.5 > u) result -= 1;
+  return result;
+}
+
+static long tbcycles = 0;
+
+static void init(void)
+{
+  unsigned int cpufrequency = 0; size_t cpufrequencylen = sizeof(unsigned int);
+  unsigned int tbfrequency = 0; size_t tbfrequencylen = sizeof(unsigned int);
+  sysctl(cpumib,2,&cpufrequency,&cpufrequencylen,0,0);
+  sysctl(tbmib,2,&tbfrequency,&tbfrequencylen,0,0);
+  if (tbfrequency > 0)
+    tbcycles = myround((double) (unsigned long long) cpufrequency
+                     / (double) (unsigned long long) tbfrequency);
+}
+
+long long cpucycles_powerpcmacos(void)
+{
+  if (!tbcycles) init();
+  return timebase() * tbcycles;
+}
+
+long long cpucycles_powerpcmacos_persecond(void)
+{
+  unsigned int result = 0; size_t resultlen = sizeof(unsigned int);
+  sysctl(cpumib,2,&result,&resultlen,0,0);
+  return (unsigned long long) result;
+}
diff --git a/nacl/nacl-20110221/cpucycles/powerpcmacos.h b/nacl/nacl-20110221/cpucycles/powerpcmacos.h
new file mode 100644
index 00000000..f66c0e36
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/powerpcmacos.h
@@ -0,0 +1,27 @@
+/*
+cpucycles powerpcmacos.h version 20060319
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_powerpcmacos_h
+#define CPUCYCLES_powerpcmacos_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_powerpcmacos(void);
+extern long long cpucycles_powerpcmacos_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "powerpcmacos"
+#define cpucycles cpucycles_powerpcmacos
+#define cpucycles_persecond cpucycles_powerpcmacos_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/sgi.c b/nacl/nacl-20110221/cpucycles/sgi.c
new file mode 100644
index 00000000..c232af09
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sgi.c
@@ -0,0 +1,38 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static double cpufrequency = 0;
+
+static void init(void)
+{
+  FILE *f;
+
+  f = popen("hinv -c processor | awk '{if ($3==\"MHZ\") print $2*1000000}'","r");
+  if (!f) return;
+  if (fscanf(f,"%lf",&cpufrequency) < 1) cpufrequency = 0;
+  pclose(f);
+  if (!cpufrequency) return;
+}
+
+long long cpucycles_sgi(void)
+{
+  double result;
+  struct timespec t;
+  if (!cpufrequency) init();
+  clock_gettime(CLOCK_SGI_CYCLE,&t);
+  result = t.tv_nsec;
+  result *= 0.000000001;
+  result += (double) t.tv_sec;
+  result *= cpufrequency;
+  return result;
+}
+
+long long cpucycles_sgi_persecond(void)
+{
+  if (!cpufrequency) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/sgi.h b/nacl/nacl-20110221/cpucycles/sgi.h
new file mode 100644
index 00000000..56bad976
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sgi.h
@@ -0,0 +1,27 @@
+/*
+cpucycles sgi.h version 20070916
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_sgi_h
+#define CPUCYCLES_sgi_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_sgi(void);
+extern long long cpucycles_sgi_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "sgi"
+#define cpucycles cpucycles_sgi
+#define cpucycles_persecond cpucycles_sgi_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.c b/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.c
new file mode 100644
index 00000000..1fc53d06
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.c
@@ -0,0 +1,16 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+long long cpucycles_sparc32cpuinfo(void)
+{
+  long long result;
+  asm volatile(".word 2202075136; .word 2570088480; srl %%g1,0,%L0; mov %%o4,%H0"
+    : "=r" (result) : : "g1","o4");
+  return result;
+}
+
+long long cpucycles_sparc32cpuinfo_persecond(void)
+{
+  return osfreq();
+}
diff --git a/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.h b/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.h
new file mode 100644
index 00000000..9d39dc65
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sparc32cpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles sparc32cpuinfo.h version 20100804
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_sparc32cpuinfo_h
+#define CPUCYCLES_sparc32cpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_sparc32cpuinfo(void);
+extern long long cpucycles_sparc32cpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "sparc32cpuinfo"
+#define cpucycles cpucycles_sparc32cpuinfo
+#define cpucycles_persecond cpucycles_sparc32cpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/sparccpuinfo.c b/nacl/nacl-20110221/cpucycles/sparccpuinfo.c
new file mode 100644
index 00000000..d07aafec
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sparccpuinfo.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+long long cpucycles_sparccpuinfo(void)
+{
+  long long result;
+  asm volatile("rd %%tick,%0" : "=r" (result));
+  return result;
+}
+
+long long cpucycles_sparccpuinfo_persecond(void)
+{
+  return osfreq();
+}
diff --git a/nacl/nacl-20110221/cpucycles/sparccpuinfo.h b/nacl/nacl-20110221/cpucycles/sparccpuinfo.h
new file mode 100644
index 00000000..badb2144
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/sparccpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles sparccpuinfo.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_sparccpuinfo_h
+#define CPUCYCLES_sparccpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_sparccpuinfo(void);
+extern long long cpucycles_sparccpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "sparccpuinfo"
+#define cpucycles cpucycles_sparccpuinfo
+#define cpucycles_persecond cpucycles_sparccpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/test.c b/nacl/nacl-20110221/cpucycles/test.c
new file mode 100644
index 00000000..bc43d719
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/test.c
@@ -0,0 +1,77 @@
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include "cpucycles-impl.h"
+
+static long long tod(void)
+{
+  struct timeval t;
+  gettimeofday(&t,(struct timezone *) 0);
+  return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+long long todstart;
+long long todend;
+long long cpustart;
+long long cpuend;
+
+long long cyclespersecond;
+long long cyclespertod;
+
+long long t[1001];
+
+int main()
+{
+  int j;
+  int i;
+
+  if (!cpucycles()) {
+    fprintf(stderr,"cpucycles() = %lld\n",cpucycles());
+    return 100;
+  }
+  for (i = 0;i <= 1000;++i) t[i] = cpucycles();
+  for (i = 0;i < 1000;++i) if (t[i] > t[i + 1]) {
+    fprintf(stderr,"t[%d] = %lld\n",i,t[i]);
+    fprintf(stderr,"t[%d] = %lld\n",i + 1,t[i + 1]);
+    fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond());
+    return 100;
+  }
+  if (t[0] == t[1000]) {
+    fprintf(stderr,"t[%d] = %lld\n",0,t[0]);
+    fprintf(stderr,"t[%d] = %lld\n",1000,t[1000]);
+    fprintf(stderr,"cpucycles_persecond() = %lld\n",cpucycles_persecond());
+    return 100;
+  } 
+
+  cyclespersecond = cpucycles_persecond();
+
+  if (cyclespersecond <= 0) {
+    fprintf(stderr,"cpucycles_persecond() = %lld\n",cyclespersecond);
+    return 100;
+  }
+
+  todstart = tod();
+  cpustart = cpucycles();
+  for (j = 0;j < 1000;++j) for (i = 0;i <= 1000;++i) t[i] = t[i] + i + j;
+  todend = tod();
+  cpuend = cpucycles();
+
+  todend -= todstart;
+  cpuend -= cpustart;
+
+  cyclespertod = (long long) (((double) cpuend) * 1000000.0 / (double) todend);
+
+  if (cyclespertod > 10 * cyclespersecond) {
+    fprintf(stderr,"cyclespertod = %lld, cyclespersecond = %lld\n",cyclespertod,cyclespersecond);
+    return 100;
+  }
+
+  for (i = 0;i <= 1000;++i) t[i] = cpucycles();
+  printf("%s",cpucycles_implementation);
+  printf(" %lld",cyclespersecond);
+  printf(" %lld",cyclespertod);
+  for (i = 0;i < 64;++i) printf(" %lld",t[i + 1] - t[i]);
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpucycles/x86cpuinfo.c b/nacl/nacl-20110221/cpucycles/x86cpuinfo.c
new file mode 100644
index 00000000..3fb0a1b0
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86cpuinfo.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include "osfreq.c"
+
+long long cpucycles_x86cpuinfo(void)
+{
+  long long result;
+  asm volatile(".byte 15;.byte 49" : "=A" (result));
+  return result;
+}
+
+long long cpucycles_x86cpuinfo_persecond(void)
+{
+  return osfreq();
+}
diff --git a/nacl/nacl-20110221/cpucycles/x86cpuinfo.h b/nacl/nacl-20110221/cpucycles/x86cpuinfo.h
new file mode 100644
index 00000000..88f151dd
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86cpuinfo.h
@@ -0,0 +1,27 @@
+/*
+cpucycles x86cpuinfo.h version 20100803
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_x86cpuinfo_h
+#define CPUCYCLES_x86cpuinfo_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_x86cpuinfo(void);
+extern long long cpucycles_x86cpuinfo_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "x86cpuinfo"
+#define cpucycles cpucycles_x86cpuinfo
+#define cpucycles_persecond cpucycles_x86cpuinfo_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/x86cpuspeed.c b/nacl/nacl-20110221/cpucycles/x86cpuspeed.c
new file mode 100644
index 00000000..34222565
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86cpuspeed.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <sys/sysctl.h>
+
+long long cpucycles_x86cpuspeed(void)
+{
+  long long result;
+  asm volatile(".byte 15;.byte 49" : "=A" (result));
+  return result;
+}
+
+long long cpucycles_x86cpuspeed_persecond(void)
+{
+  int oid[2];
+  int val;
+  size_t size;
+  oid[0] = CTL_HW;
+  oid[1] = HW_CPUSPEED;
+  size = sizeof val;
+  if (sysctl(oid,2,&val,&size,0,0) == -1) return 0;
+  if (size != sizeof val) return 0;
+  return val * 1000000LL;
+}
diff --git a/nacl/nacl-20110221/cpucycles/x86cpuspeed.h b/nacl/nacl-20110221/cpucycles/x86cpuspeed.h
new file mode 100644
index 00000000..43005cda
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86cpuspeed.h
@@ -0,0 +1,27 @@
+/*
+cpucycles x86cpuspeed.h version 20090716
+Matthew Dempsky
+Public domain.
+*/
+
+#ifndef CPUCYCLES_x86cpuspeed_h
+#define CPUCYCLES_x86cpuspeed_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_x86cpuspeed(void);
+extern long long cpucycles_x86cpuspeed_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "x86cpuspeed"
+#define cpucycles cpucycles_x86cpuspeed
+#define cpucycles_persecond cpucycles_x86cpuspeed_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/x86estimate.c b/nacl/nacl-20110221/cpucycles/x86estimate.c
new file mode 100644
index 00000000..e5ae66cf
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86estimate.c
@@ -0,0 +1,59 @@
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+long long cpucycles_x86estimate(void)
+{
+  long long result;
+  asm volatile(".byte 15;.byte 49" : "=A" (result));
+  return result;
+}
+
+static long long microseconds(void)
+{
+  struct timeval t;
+  gettimeofday(&t,(struct timezone *) 0);
+  return t.tv_sec * (long long) 1000000 + t.tv_usec;
+}
+
+static double guessfreq(void)
+{
+  long long tb0; long long us0;
+  long long tb1; long long us1;
+
+  tb0 = cpucycles_x86estimate();
+  us0 = microseconds();
+  do {
+    tb1 = cpucycles_x86estimate();
+    us1 = microseconds();
+  } while (us1 - us0 < 10000 || tb1 - tb0 < 1000);
+  if (tb1 <= tb0) return 0;
+  tb1 -= tb0;
+  us1 -= us0;
+  return ((double) tb1) / (0.000001 * (double) us1);
+}
+
+static double cpufrequency = 0;
+
+static void init(void)
+{
+  double guess1;
+  double guess2;
+  int loop;
+
+  for (loop = 0;loop < 100;++loop) {
+    guess1 = guessfreq();
+    guess2 = guessfreq();
+    if (guess1 > 1.01 * guess2) continue;
+    if (guess2 > 1.01 * guess1) continue;
+    cpufrequency = 0.5 * (guess1 + guess2);
+    break;
+  }
+}
+
+long long cpucycles_x86estimate_persecond(void)
+{
+  if (!cpufrequency) init();
+  return cpufrequency;
+}
diff --git a/nacl/nacl-20110221/cpucycles/x86estimate.h b/nacl/nacl-20110221/cpucycles/x86estimate.h
new file mode 100644
index 00000000..98f2dd15
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86estimate.h
@@ -0,0 +1,27 @@
+/*
+cpucycles x86estimate.h version 20070121
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_x86estimate_h
+#define CPUCYCLES_x86estimate_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_x86estimate(void);
+extern long long cpucycles_x86estimate_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "x86estimate"
+#define cpucycles cpucycles_x86estimate
+#define cpucycles_persecond cpucycles_x86estimate_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpucycles/x86tscfreq.c b/nacl/nacl-20110221/cpucycles/x86tscfreq.c
new file mode 100644
index 00000000..a1b94b62
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86tscfreq.c
@@ -0,0 +1,17 @@
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+long long cpucycles_x86tscfreq(void)
+{
+  long long result;
+  asm volatile(".byte 15;.byte 49" : "=A" (result));
+  return result;
+}
+
+long long cpucycles_x86tscfreq_persecond(void)
+{
+  long result = 0;
+  size_t resultlen = sizeof(long);
+  sysctlbyname("machdep.tsc_freq",&result,&resultlen,0,0);
+  return result;
+}
diff --git a/nacl/nacl-20110221/cpucycles/x86tscfreq.h b/nacl/nacl-20110221/cpucycles/x86tscfreq.h
new file mode 100644
index 00000000..abf616e5
--- /dev/null
+++ b/nacl/nacl-20110221/cpucycles/x86tscfreq.h
@@ -0,0 +1,27 @@
+/*
+cpucycles x86tscfreq.h version 20060318
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef CPUCYCLES_x86tscfreq_h
+#define CPUCYCLES_x86tscfreq_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern long long cpucycles_x86tscfreq(void);
+extern long long cpucycles_x86tscfreq_persecond(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef cpucycles_implementation
+#define cpucycles_implementation "x86tscfreq"
+#define cpucycles cpucycles_x86tscfreq
+#define cpucycles_persecond cpucycles_x86tscfreq_persecond
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/cpuid/cbytes.c b/nacl/nacl-20110221/cpuid/cbytes.c
new file mode 100644
index 00000000..bd5d1444
--- /dev/null
+++ b/nacl/nacl-20110221/cpuid/cbytes.c
@@ -0,0 +1,16 @@
+#include <stdio.h>
+
+int main()
+{
+  char ch;
+  int loop = 0;
+  while (scanf("%c",&ch) == 1) {
+    printf("0x%02x,",255 & (int) ch);
+    if (++loop == 16) {
+      loop = 0;
+      printf("\n");
+    }
+  }
+  printf("0x00\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpuid/cpuid.c b/nacl/nacl-20110221/cpuid/cpuid.c
new file mode 100644
index 00000000..1a5c6b8e
--- /dev/null
+++ b/nacl/nacl-20110221/cpuid/cpuid.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+
+void nope()
+{
+  exit(1);
+}
+
+int main()
+{
+  unsigned long x[4];
+  unsigned long y[4];
+  int i;
+  int j;
+  char c;
+
+  signal(SIGILL,nope);
+
+  x[0] = 0;
+  x[1] = 0;
+  x[2] = 0;
+  x[3] = 0;
+
+  asm volatile(".byte 15;.byte 162" : "=a"(x[0]),"=b"(x[1]),"=c"(x[3]),"=d"(x[2]) : "0"(0) );
+  if (!x[0]) return 0;
+  asm volatile(".byte 15;.byte 162" : "=a"(y[0]),"=b"(y[1]),"=c"(y[2]),"=d"(y[3]) : "0"(1) );
+
+  for (i = 1;i < 4;++i)
+    for (j = 0;j < 4;++j) {
+      c = x[i] >> (8 * j);
+      if (c < 32) c = 32;
+      if (c > 126) c = 126;
+      putchar(c);
+    }
+
+  printf("-%08x-%08x\n",y[0],y[3]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpuid/do b/nacl/nacl-20110221/cpuid/do
new file mode 100755
index 00000000..8a3a6f7b
--- /dev/null
+++ b/nacl/nacl-20110221/cpuid/do
@@ -0,0 +1,37 @@
+#!/bin/sh -e
+
+mkdir include
+
+(
+  echo x86
+  echo unknown
+) | (
+  while read n
+  do
+    okabi | (
+      while read abi
+      do
+        okc-$abi | (
+          while read c
+          do
+            echo "=== `date` === Trying $n.c with $c..." >&2
+            rm -f cpuid.c
+	    cp $n.c cpuid.c || continue
+            $c -o cpuid cpuid.c || continue
+            $c -o cbytes cbytes.c || continue
+	    ./cpuid > cpuid.out || continue
+	    echo 'static const char cpuid[] = {' > cpuid.h || continue
+	    ./cbytes < cpuid.out >> cpuid.h || continue
+	    echo '} ;' >> cpuid.h || continue
+	    cp cpuid.h include/cpuid.h || continue
+	    cat cpuid.out
+            exit 0
+          done
+          exit 111
+        ) && exit 0
+      done
+      exit 111
+    ) && exit 0
+  done
+  exit 111
+)
diff --git a/nacl/nacl-20110221/cpuid/unknown.c b/nacl/nacl-20110221/cpuid/unknown.c
new file mode 100644
index 00000000..786a5e72
--- /dev/null
+++ b/nacl/nacl-20110221/cpuid/unknown.c
@@ -0,0 +1,7 @@
+#include <stdio.h>
+
+main()
+{
+  printf("unknown CPU ID\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/cpuid/x86.c b/nacl/nacl-20110221/cpuid/x86.c
new file mode 100644
index 00000000..99e6a0c4
--- /dev/null
+++ b/nacl/nacl-20110221/cpuid/x86.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+
+void nope()
+{
+  exit(1);
+}
+
+int main()
+{
+  unsigned long x[4];
+  unsigned long y[4];
+  int i;
+  int j;
+  char c;
+
+  signal(SIGILL,nope);
+
+  x[0] = 0;
+  x[1] = 0;
+  x[2] = 0;
+  x[3] = 0;
+
+  asm volatile(".byte 15;.byte 162" : "=a"(x[0]),"=b"(x[1]),"=c"(x[3]),"=d"(x[2]) : "0"(0) );
+  if (!x[0]) return 0;
+  asm volatile(".byte 15;.byte 162" : "=a"(y[0]),"=b"(y[1]),"=c"(y[2]),"=d"(y[3]) : "0"(1) );
+
+  for (i = 1;i < 4;++i)
+    for (j = 0;j < 4;++j) {
+      c = x[i] >> (8 * j);
+      if (c < 32) c = 32;
+      if (c > 126) c = 126;
+      putchar(c);
+    }
+
+  printf("-%08x-%08x\n",(unsigned int) y[0],(unsigned int) y[3]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha256/checksum b/nacl/nacl-20110221/crypto_auth/hmacsha256/checksum
new file mode 100644
index 00000000..2fa9604b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha256/checksum
@@ -0,0 +1 @@
+3bd7abd4f4dce04396f2ac7cb1cff70607f692411c49a1563b037d31e1662632
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/api.h b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/api.h
new file mode 100644
index 00000000..c224d9d5
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/hmac.c b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/hmac.c
new file mode 100644
index 00000000..8ab30bb4
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/hmac.c
@@ -0,0 +1,83 @@
+/*
+ * 20080913
+ * D. J. Bernstein
+ * Public domain.
+ * */
+
+#include "crypto_hashblocks_sha256.h"
+#include "crypto_auth.h"
+
+#define blocks crypto_hashblocks_sha256
+
+typedef unsigned int uint32;
+
+static const char iv[32] = {
+  0x6a,0x09,0xe6,0x67,
+  0xbb,0x67,0xae,0x85,
+  0x3c,0x6e,0xf3,0x72,
+  0xa5,0x4f,0xf5,0x3a,
+  0x51,0x0e,0x52,0x7f,
+  0x9b,0x05,0x68,0x8c,
+  0x1f,0x83,0xd9,0xab,
+  0x5b,0xe0,0xcd,0x19,
+} ;
+
+int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char h[32];
+  unsigned char padded[128];
+  int i;
+  unsigned long long bits = 512 + (inlen << 3);
+
+  for (i = 0;i < 32;++i) h[i] = iv[i];
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36;
+  for (i = 32;i < 64;++i) padded[i] = 0x36;
+
+  blocks(h,padded,64);
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 63;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 56) {
+    for (i = inlen + 1;i < 56;++i) padded[i] = 0;
+    padded[56] = bits >> 56;
+    padded[57] = bits >> 48;
+    padded[58] = bits >> 40;
+    padded[59] = bits >> 32;
+    padded[60] = bits >> 24;
+    padded[61] = bits >> 16;
+    padded[62] = bits >> 8;
+    padded[63] = bits;
+    blocks(h,padded,64);
+  } else {
+    for (i = inlen + 1;i < 120;++i) padded[i] = 0;
+    padded[120] = bits >> 56;
+    padded[121] = bits >> 48;
+    padded[122] = bits >> 40;
+    padded[123] = bits >> 32;
+    padded[124] = bits >> 24;
+    padded[125] = bits >> 16;
+    padded[126] = bits >> 8;
+    padded[127] = bits;
+    blocks(h,padded,128);
+  }
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c;
+  for (i = 32;i < 64;++i) padded[i] = 0x5c;
+  for (i = 0;i < 32;++i) padded[64 + i] = h[i];
+
+  for (i = 0;i < 32;++i) out[i] = iv[i];
+
+  for (i = 32;i < 64;++i) padded[64 + i] = 0;
+  padded[64 + 32] = 0x80;
+  padded[64 + 62] = 3;
+
+  blocks(out,padded,128);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/verify.c b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/verify.c
new file mode 100644
index 00000000..96ff0ea8
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha256/ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_32.h"
+#include "crypto_auth.h"
+
+int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[32];
+  crypto_auth(correct,in,inlen,k);
+  return crypto_verify_32(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha256/used b/nacl/nacl-20110221/crypto_auth/hmacsha256/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/checksum b/nacl/nacl-20110221/crypto_auth/hmacsha512256/checksum
new file mode 100644
index 00000000..1c037f2d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha512256/checksum
@@ -0,0 +1 @@
+2f5e8a6a0cac012d8d001351d7d583e69f91390df46305c3608e0c2893491886
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/api.h b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/api.h
new file mode 100644
index 00000000..c224d9d5
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/hmac.c b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/hmac.c
new file mode 100644
index 00000000..56ebfa6b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/hmac.c
@@ -0,0 +1,86 @@
+/*
+ * 20080913
+ * D. J. Bernstein
+ * Public domain.
+ * */
+
+#include "crypto_hashblocks_sha512.h"
+#include "crypto_auth.h"
+
+#define blocks crypto_hashblocks_sha512
+
+typedef unsigned long long uint64;
+
+static const unsigned char iv[64] = {
+  0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
+  0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
+  0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
+  0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
+  0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
+  0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
+  0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
+  0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
+} ;
+
+int crypto_auth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char h[64];
+  unsigned char padded[256];
+  int i;
+  unsigned long long bytes = 128 + inlen;
+
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x36;
+  for (i = 32;i < 128;++i) padded[i] = 0x36;
+
+  blocks(h,padded,128);
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 127;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 112) {
+    for (i = inlen + 1;i < 119;++i) padded[i] = 0;
+    padded[119] = bytes >> 61;
+    padded[120] = bytes >> 53;
+    padded[121] = bytes >> 45;
+    padded[122] = bytes >> 37;
+    padded[123] = bytes >> 29;
+    padded[124] = bytes >> 21;
+    padded[125] = bytes >> 13;
+    padded[126] = bytes >> 5;
+    padded[127] = bytes << 3;
+    blocks(h,padded,128);
+  } else {
+    for (i = inlen + 1;i < 247;++i) padded[i] = 0;
+    padded[247] = bytes >> 61;
+    padded[248] = bytes >> 53;
+    padded[249] = bytes >> 45;
+    padded[250] = bytes >> 37;
+    padded[251] = bytes >> 29;
+    padded[252] = bytes >> 21;
+    padded[253] = bytes >> 13;
+    padded[254] = bytes >> 5;
+    padded[255] = bytes << 3;
+    blocks(h,padded,256);
+  }
+
+  for (i = 0;i < 32;++i) padded[i] = k[i] ^ 0x5c;
+  for (i = 32;i < 128;++i) padded[i] = 0x5c;
+
+  for (i = 0;i < 64;++i) padded[128 + i] = h[i];
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  for (i = 64;i < 128;++i) padded[128 + i] = 0;
+  padded[128 + 64] = 0x80;
+  padded[128 + 126] = 6;
+
+  blocks(h,padded,256);
+  for (i = 0;i < 32;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/verify.c b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/verify.c
new file mode 100644
index 00000000..96ff0ea8
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/hmacsha512256/ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_32.h"
+#include "crypto_auth.h"
+
+int crypto_auth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[32];
+  crypto_auth(correct,in,inlen,k);
+  return crypto_verify_32(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/selected b/nacl/nacl-20110221/crypto_auth/hmacsha512256/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_auth/hmacsha512256/used b/nacl/nacl-20110221/crypto_auth/hmacsha512256/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_auth/measure.c b/nacl/nacl-20110221/crypto_auth/measure.c
new file mode 100644
index 00000000..e5209903
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/measure.c
@@ -0,0 +1,69 @@
+#include "crypto_auth.h"
+#include "randombytes.h"
+#include "cpucycles.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_auth_IMPLEMENTATION;
+const char *implementationversion = crypto_auth_VERSION;
+const char *sizenames[] = { "outputbytes", "keybytes", 0 };
+const long long sizes[] = { crypto_auth_BYTES, crypto_auth_KEYBYTES };
+
+#define MAXTEST_BYTES 4096
+#ifdef SUPERCOP
+#define MGAP 8192
+#else
+#define MGAP 8
+#endif
+
+static unsigned char *k;
+static unsigned char *m;
+static unsigned char *h;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(crypto_auth_KEYBYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  h = alignedcalloc(crypto_auth_BYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) {
+      randombytes(k,crypto_auth_KEYBYTES);
+      randombytes(m,mlen);
+      randombytes(h,crypto_auth_BYTES);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_auth(h,m,mlen,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_auth_verify(h,m,mlen,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"verify_cycles",cycles,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_auth/try.c b/nacl/nacl-20110221/crypto_auth/try.c
new file mode 100644
index 00000000..6f855dc9
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/try.c
@@ -0,0 +1,119 @@
+/*
+ * crypto_auth/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "crypto_hash_sha256.h"
+#include "crypto_auth.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_auth_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *h;
+static unsigned char *m;
+static unsigned char *k;
+static unsigned char *h2;
+static unsigned char *m2;
+static unsigned char *k2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(crypto_auth_BYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  k = alignedcalloc(crypto_auth_KEYBYTES);
+  h2 = alignedcalloc(crypto_auth_BYTES);
+  m2 = alignedcalloc(MAXTEST_BYTES + crypto_auth_BYTES);
+  k2 = alignedcalloc(crypto_auth_KEYBYTES + crypto_auth_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_auth(h,m,TUNE_BYTES,k);
+  crypto_auth_verify(h,m,TUNE_BYTES,k);
+}
+
+char checksum[crypto_auth_BYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long mlen = i;
+    long long klen = crypto_auth_KEYBYTES;
+    long long hlen = crypto_auth_BYTES;
+
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+
+    if (crypto_auth(h,m,mlen,k) != 0) return "crypto_auth returns nonzero";
+
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_auth overwrites k";
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_auth overwrites m";
+    for (j = -16;j < 0;++j) if (h[j] != h2[j]) return "crypto_auth writes before output";
+    for (j = hlen;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_auth writes after output";
+
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+
+    if (crypto_auth(m2,m2,mlen,k) != 0) return "crypto_auth returns nonzero";
+    for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_auth does not handle m overlap";
+    for (j = 0;j < hlen;++j) m2[j] = m[j];
+    if (crypto_auth(k2,m2,mlen,k2) != 0) return "crypto_auth returns nonzero";
+    for (j = 0;j < hlen;++j) if (k2[j] != h[j]) return "crypto_auth does not handle k overlap";
+    for (j = 0;j < hlen;++j) k2[j] = k[j];
+
+    if (crypto_auth_verify(h,m,mlen,k) != 0) return "crypto_auth_verify returns nonzero";
+
+    for (j = -16;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_auth overwrites h";
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_auth overwrites k";
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_auth overwrites m";
+
+    crypto_hash_sha256(h2,h,hlen);
+    for (j = 0;j < klen;++j) k[j] ^= h2[j % 32];
+    if (crypto_auth(h,m,mlen,k) != 0) return "crypto_auth returns nonzero";
+    if (crypto_auth_verify(h,m,mlen,k) != 0) return "crypto_auth_verify returns nonzero";
+    
+    crypto_hash_sha256(h2,h,hlen);
+    for (j = 0;j < mlen;++j) m[j] ^= h2[j % 32];
+    m[mlen] = h2[0];
+  }
+  if (crypto_auth(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_auth returns nonzero";
+  if (crypto_auth_verify(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_auth_verify returns nonzero";
+
+  for (i = 0;i < crypto_auth_BYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_auth/wrapper-auth.cpp b/nacl/nacl-20110221/crypto_auth/wrapper-auth.cpp
new file mode 100644
index 00000000..2108aa31
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/wrapper-auth.cpp
@@ -0,0 +1,11 @@
+#include <string>
+using std::string;
+#include "crypto_auth.h"
+
+string crypto_auth(const string &m,const string &k)
+{
+  if (k.size() != crypto_auth_KEYBYTES) throw "incorrect key length";
+  unsigned char a[crypto_auth_BYTES];
+  crypto_auth(a,(const unsigned char *) m.c_str(),m.size(),(const unsigned char *) k.c_str());
+  return string((char *) a,crypto_auth_BYTES);
+}
diff --git a/nacl/nacl-20110221/crypto_auth/wrapper-verify.cpp b/nacl/nacl-20110221/crypto_auth/wrapper-verify.cpp
new file mode 100644
index 00000000..57e25a26
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_auth/wrapper-verify.cpp
@@ -0,0 +1,14 @@
+#include <string>
+using std::string;
+#include "crypto_auth.h"
+
+void crypto_auth_verify(const string &a,const string &m,const string &k)
+{
+  if (k.size() != crypto_auth_KEYBYTES) throw "incorrect key length";
+  if (a.size() != crypto_auth_BYTES) throw "incorrect authenticator length";
+  if (crypto_auth_verify(
+       (const unsigned char *) a.c_str(),
+       (const unsigned char *) m.c_str(),m.size(),
+       (const unsigned char *) k.c_str()) == 0) return;
+  throw "invalid authenticator";
+}
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/checksum b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/checksum
new file mode 100644
index 00000000..56a20083
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/checksum
@@ -0,0 +1 @@
+5fac7400caabc14a99c5c0bc13fb1df5e468e870382a3a1c
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/after.c b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/after.c
new file mode 100644
index 00000000..eb243e22
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/after.c
@@ -0,0 +1,22 @@
+#include "crypto_secretbox_xsalsa20poly1305.h"
+#include "crypto_box.h"
+
+int crypto_box_afternm(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  return crypto_secretbox_xsalsa20poly1305(c,m,mlen,n,k);
+}
+
+int crypto_box_open_afternm(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  return crypto_secretbox_xsalsa20poly1305_open(m,c,clen,n,k);
+}
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/api.h b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/api.h
new file mode 100644
index 00000000..ce7762df
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/api.h
@@ -0,0 +1,6 @@
+#define CRYPTO_PUBLICKEYBYTES 32
+#define CRYPTO_SECRETKEYBYTES 32
+#define CRYPTO_BEFORENMBYTES 32
+#define CRYPTO_NONCEBYTES 24
+#define CRYPTO_ZEROBYTES 32
+#define CRYPTO_BOXZEROBYTES 16
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/before.c b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/before.c
new file mode 100644
index 00000000..279bb12a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/before.c
@@ -0,0 +1,17 @@
+#include "crypto_core_hsalsa20.h"
+#include "crypto_scalarmult_curve25519.h"
+#include "crypto_box.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+static const unsigned char n[16] = {0};
+
+int crypto_box_beforenm(
+  unsigned char *k,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char s[32];
+  crypto_scalarmult_curve25519(s,sk,pk);
+  return crypto_core_hsalsa20(k,n,s,sigma);
+}
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/box.c b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/box.c
new file mode 100644
index 00000000..81ff72e2
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/box.c
@@ -0,0 +1,27 @@
+#include "crypto_box.h"
+
+int crypto_box(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char k[crypto_box_BEFORENMBYTES];
+  crypto_box_beforenm(k,pk,sk);
+  return crypto_box_afternm(c,m,mlen,n,k);
+}
+
+int crypto_box_open(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *pk,
+  const unsigned char *sk
+)
+{
+  unsigned char k[crypto_box_BEFORENMBYTES];
+  crypto_box_beforenm(k,pk,sk);
+  return crypto_box_open_afternm(m,c,clen,n,k);
+}
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c
new file mode 100644
index 00000000..233bc950
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/ref/keypair.c
@@ -0,0 +1,12 @@
+#include "crypto_scalarmult_curve25519.h"
+#include "crypto_box.h"
+#include "randombytes.h"
+
+int crypto_box_keypair(
+  unsigned char *pk,
+  unsigned char *sk
+)
+{
+  randombytes(sk,32);
+  return crypto_scalarmult_curve25519_base(pk,sk);
+}
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/selected b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/used b/nacl/nacl-20110221/crypto_box/curve25519xsalsa20poly1305/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_box/measure.c b/nacl/nacl-20110221/crypto_box/measure.c
new file mode 100644
index 00000000..08df1e39
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/measure.c
@@ -0,0 +1,137 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_box.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_box_IMPLEMENTATION;
+const char *implementationversion = crypto_box_VERSION;
+const char *sizenames[] = { "publickeybytes", "secretkeybytes", "beforenmbytes", "noncebytes", "zerobytes", "boxzerobytes", 0 };
+const long long sizes[] = { crypto_box_PUBLICKEYBYTES, crypto_box_SECRETKEYBYTES, crypto_box_BEFORENMBYTES, crypto_box_NONCEBYTES, crypto_box_ZEROBYTES, crypto_box_BOXZEROBYTES };
+
+#define MAXTEST_BYTES 4096
+
+static unsigned char *ska;
+static unsigned char *pka;
+static unsigned char *skb;
+static unsigned char *pkb;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *sa;
+static unsigned char *sb;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  ska = alignedcalloc(crypto_box_SECRETKEYBYTES);
+  pka = alignedcalloc(crypto_box_PUBLICKEYBYTES);
+  skb = alignedcalloc(crypto_box_SECRETKEYBYTES);
+  pkb = alignedcalloc(crypto_box_PUBLICKEYBYTES);
+  n = alignedcalloc(crypto_box_NONCEBYTES);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  c = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  sa = alignedcalloc(crypto_box_BEFORENMBYTES);
+  sb = alignedcalloc(crypto_box_BEFORENMBYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_box_keypair(pka,ska);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"keypair_cycles",cycles,TIMINGS);
+
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_box_keypair(pkb,skb);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"keypair_cycles",cycles,TIMINGS);
+
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_box_beforenm(sa,pkb,ska);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"beforenm_cycles",cycles,TIMINGS);
+
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_box_beforenm(sb,pka,skb);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"beforenm_cycles",cycles,TIMINGS);
+
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 8) {
+      randombytes(n,crypto_box_NONCEBYTES);
+      randombytes(m + crypto_box_ZEROBYTES,mlen);
+      randombytes(c,mlen + crypto_box_ZEROBYTES);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,pka,skb);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box_open(m,c,mlen + crypto_box_ZEROBYTES,n,pkb,ska);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"open_cycles",cycles,TIMINGS);
+
+      ++c[crypto_box_ZEROBYTES];
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box_open(m,c,mlen + crypto_box_ZEROBYTES,n,pkb,ska);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"forgery_open_cycles",cycles,TIMINGS);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box_afternm(c,m,mlen + crypto_box_ZEROBYTES,n,sb);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"afternm_cycles",cycles,TIMINGS);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box_open_afternm(m,c,mlen + crypto_box_ZEROBYTES,n,sa);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"open_afternm_cycles",cycles,TIMINGS);
+
+      ++c[crypto_box_ZEROBYTES];
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_box_open_afternm(m,c,mlen + crypto_box_ZEROBYTES,n,sa);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"forgery_open_afternm_cycles",cycles,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_box/try.c b/nacl/nacl-20110221/crypto_box/try.c
new file mode 100644
index 00000000..f7029909
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/try.c
@@ -0,0 +1,195 @@
+/*
+ * crypto_box/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "crypto_box.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_box_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *ska;
+static unsigned char *pka;
+static unsigned char *skb;
+static unsigned char *pkb;
+static unsigned char *s;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *t;
+static unsigned char *ska2;
+static unsigned char *pka2;
+static unsigned char *skb2;
+static unsigned char *pkb2;
+static unsigned char *s2;
+static unsigned char *n2;
+static unsigned char *m2;
+static unsigned char *c2;
+static unsigned char *t2;
+
+#define sklen crypto_box_SECRETKEYBYTES
+#define pklen crypto_box_PUBLICKEYBYTES
+#define nlen crypto_box_NONCEBYTES
+#define slen crypto_box_BEFORENMBYTES
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  ska = alignedcalloc(sklen);
+  pka = alignedcalloc(pklen);
+  skb = alignedcalloc(sklen);
+  pkb = alignedcalloc(pklen);
+  n = alignedcalloc(nlen);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  c = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  t = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  s = alignedcalloc(slen);
+  ska2 = alignedcalloc(sklen);
+  pka2 = alignedcalloc(pklen);
+  skb2 = alignedcalloc(sklen);
+  pkb2 = alignedcalloc(pklen);
+  n2 = alignedcalloc(nlen);
+  m2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  c2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  t2 = alignedcalloc(MAXTEST_BYTES + crypto_box_ZEROBYTES);
+  s2 = alignedcalloc(slen);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_box(c,m,TUNE_BYTES + crypto_box_ZEROBYTES,n,pka,skb);
+  crypto_box_open(t,c,TUNE_BYTES + crypto_box_ZEROBYTES,n,pkb,ska);
+}
+
+char checksum[nlen * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  if (crypto_box_keypair(pka,ska) != 0) return "crypto_box_keypair returns nonzero";
+  if (crypto_box_keypair(pkb,skb) != 0) return "crypto_box_keypair returns nonzero";
+
+  for (j = 0;j < crypto_box_ZEROBYTES;++j) m[j] = 0;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long mlen = i + crypto_box_ZEROBYTES;
+    long long tlen = i + crypto_box_ZEROBYTES;
+    long long clen = i + crypto_box_ZEROBYTES;
+
+    for (j = -16;j < 0;++j) ska[j] = random();
+    for (j = -16;j < 0;++j) skb[j] = random();
+    for (j = -16;j < 0;++j) pka[j] = random();
+    for (j = -16;j < 0;++j) pkb[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = -16;j < 0;++j) n[j] = random();
+
+    for (j = sklen;j < sklen + 16;++j) ska[j] = random();
+    for (j = sklen;j < sklen + 16;++j) skb[j] = random();
+    for (j = pklen;j < pklen + 16;++j) pka[j] = random();
+    for (j = pklen;j < pklen + 16;++j) pkb[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = nlen;j < nlen + 16;++j) n[j] = random();
+
+    for (j = -16;j < sklen + 16;++j) ska2[j] = ska[j];
+    for (j = -16;j < sklen + 16;++j) skb2[j] = skb[j];
+    for (j = -16;j < pklen + 16;++j) pka2[j] = pka[j];
+    for (j = -16;j < pklen + 16;++j) pkb2[j] = pkb[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+    for (j = -16;j < nlen + 16;++j) n2[j] = n[j];
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j] = random();
+
+    if (crypto_box(c,m,mlen,n,pkb,ska) != 0) return "crypto_box returns nonzero";
+
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box overwrites m";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box overwrites n";
+    for (j = -16;j < 0;++j) if (c2[j] != c[j]) return "crypto_box writes before output";
+    for (j = clen;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_box writes after output";
+    for (j = 0;j < crypto_box_BOXZEROBYTES;++j)
+      if (c[j] != 0) return "crypto_box does not clear extra bytes";
+
+    for (j = -16;j < sklen + 16;++j) if (ska2[j] != ska[j]) return "crypto_box overwrites ska";
+    for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box overwrites skb";
+    for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box overwrites pka";
+    for (j = -16;j < pklen + 16;++j) if (pkb2[j] != pkb[j]) return "crypto_box overwrites pkb";
+
+    for (j = -16;j < 0;++j) c[j] = random();
+    for (j = clen;j < clen + 16;++j) c[j] = random();
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+    for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random();
+
+    if (crypto_box_open(t,c,clen,n,pka,skb) != 0) return "crypto_box_open returns nonzero";
+
+    for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_box_open overwrites c";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_open overwrites n";
+    for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_open writes before output";
+    for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_open writes after output";
+    for (j = 0;j < crypto_box_ZEROBYTES;++j)
+      if (t[j] != 0) return "crypto_box_open does not clear extra bytes";
+
+    for (j = -16;j < sklen + 16;++j) if (ska2[j] != ska[j]) return "crypto_box_open overwrites ska";
+    for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box_open overwrites skb";
+    for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box_open overwrites pka";
+    for (j = -16;j < pklen + 16;++j) if (pkb2[j] != pkb[j]) return "crypto_box_open overwrites pkb";
+
+    for (j = 0;j < mlen;++j) if (t[j] != m[j]) return "plaintext does not match";
+
+    for (j = -16;j < slen + 16;++j) s2[j] = s[j] = random();
+    if (crypto_box_beforenm(s,pkb,ska) != 0) return "crypto_box_beforenm returns nonzero";
+    for (j = -16;j < pklen + 16;++j) if (pka2[j] != pka[j]) return "crypto_box_open overwrites pk";
+    for (j = -16;j < sklen + 16;++j) if (skb2[j] != skb[j]) return "crypto_box_open overwrites sk";
+    for (j = -16;j < 0;++j) if (s2[j] != s[j]) return "crypto_box_beforenm writes before output";
+    for (j = slen;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_beforenm writes after output";
+
+    for (j = -16;j < slen + 16;++j) s2[j] = s[j];
+    for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random();
+    if (crypto_box_afternm(t,m,mlen,n,s) != 0) return "crypto_box_afternm returns nonzero";
+    for (j = -16;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_afternm overwrites s";
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box_afternm overwrites m";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_afternm overwrites n";
+    for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_afternm writes before output";
+    for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_afternm writes after output";
+    for (j = 0;j < crypto_box_BOXZEROBYTES;++j)
+      if (t[j] != 0) return "crypto_box_afternm does not clear extra bytes";
+    for (j = 0;j < mlen;++j) if (t[j] != c[j]) return "crypto_box_afternm does not match crypto_box";
+
+    if (crypto_box_beforenm(s,pka,skb) != 0) return "crypto_box_beforenm returns nonzero";
+
+    for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random();
+    if (crypto_box_open_afternm(t,c,clen,n,s) != 0) return "crypto_box_open_afternm returns nonzero";
+    for (j = -16;j < slen + 16;++j) if (s2[j] != s[j]) return "crypto_box_open_afternm overwrites s";
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_box_open_afternm overwrites m";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_box_open_afternm overwrites n";
+    for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_box_open_afternm writes before output";
+    for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_box_open_afternm writes after output";
+    for (j = 0;j < crypto_box_ZEROBYTES;++j)
+      if (t[j] != 0) return "crypto_box_open_afternm does not clear extra bytes";
+    for (j = 0;j < mlen;++j) if (t[j] != m[j]) return "crypto_box_open_afternm does not match crypto_box_open";
+
+    for (j = 0;j < i;++j) n[j % nlen] ^= c[j + crypto_box_BOXZEROBYTES];
+    if (i == 0) m[crypto_box_ZEROBYTES] = 0;
+    m[i + crypto_box_ZEROBYTES] = m[crypto_box_ZEROBYTES];
+    for (j = 0;j < i;++j) m[j + crypto_box_ZEROBYTES] ^= c[j + crypto_box_BOXZEROBYTES];
+  }
+
+  for (i = 0;i < nlen;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (n[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & n[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_box/wrapper-box.cpp b/nacl/nacl-20110221/crypto_box/wrapper-box.cpp
new file mode 100644
index 00000000..f0429295
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/wrapper-box.cpp
@@ -0,0 +1,24 @@
+#include <string>
+using std::string;
+#include "crypto_box.h"
+
+string crypto_box(const string &m,const string &n,const string &pk,const string &sk)
+{
+  if (pk.size() != crypto_box_PUBLICKEYBYTES) throw "incorrect public-key length";
+  if (sk.size() != crypto_box_SECRETKEYBYTES) throw "incorrect secret-key length";
+  if (n.size() != crypto_box_NONCEBYTES) throw "incorrect nonce length";
+  size_t mlen = m.size() + crypto_box_ZEROBYTES;
+  unsigned char mpad[mlen];
+  for (int i = 0;i < crypto_box_ZEROBYTES;++i) mpad[i] = 0;
+  for (int i = crypto_box_ZEROBYTES;i < mlen;++i) mpad[i] = m[i - crypto_box_ZEROBYTES];
+  unsigned char cpad[mlen];
+  crypto_box(cpad,mpad,mlen,
+    (const unsigned char *) n.c_str(),
+    (const unsigned char *) pk.c_str(),
+    (const unsigned char *) sk.c_str()
+    );
+  return string(
+    (char *) cpad + crypto_box_BOXZEROBYTES,
+    mlen - crypto_box_BOXZEROBYTES
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_box/wrapper-keypair.cpp b/nacl/nacl-20110221/crypto_box/wrapper-keypair.cpp
new file mode 100644
index 00000000..b59f92d9
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/wrapper-keypair.cpp
@@ -0,0 +1,12 @@
+#include <string>
+using std::string;
+#include "crypto_box.h"
+
+string crypto_box_keypair(string *sk_string)
+{
+  unsigned char pk[crypto_box_PUBLICKEYBYTES];
+  unsigned char sk[crypto_box_SECRETKEYBYTES];
+  crypto_box_keypair(pk,sk);
+  *sk_string = string((char *) sk,sizeof sk);
+  return string((char *) pk,sizeof pk);
+}
diff --git a/nacl/nacl-20110221/crypto_box/wrapper-open.cpp b/nacl/nacl-20110221/crypto_box/wrapper-open.cpp
new file mode 100644
index 00000000..67663a21
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_box/wrapper-open.cpp
@@ -0,0 +1,27 @@
+#include <string>
+using std::string;
+#include "crypto_box.h"
+
+string crypto_box_open(const string &c,const string &n,const string &pk,const string &sk)
+{
+  if (pk.size() != crypto_box_PUBLICKEYBYTES) throw "incorrect public-key length";
+  if (sk.size() != crypto_box_SECRETKEYBYTES) throw "incorrect secret-key length";
+  if (n.size() != crypto_box_NONCEBYTES) throw "incorrect nonce length";
+  size_t clen = c.size() + crypto_box_BOXZEROBYTES;
+  unsigned char cpad[clen];
+  for (int i = 0;i < crypto_box_BOXZEROBYTES;++i) cpad[i] = 0;
+  for (int i = crypto_box_BOXZEROBYTES;i < clen;++i) cpad[i] = c[i - crypto_box_BOXZEROBYTES];
+  unsigned char mpad[clen];
+  if (crypto_box_open(mpad,cpad,clen,
+                       (const unsigned char *) n.c_str(),
+                       (const unsigned char *) pk.c_str(),
+                       (const unsigned char *) sk.c_str()
+                     ) != 0)
+    throw "ciphertext fails verification";
+  if (clen < crypto_box_ZEROBYTES)
+    throw "ciphertext too short"; // should have been caught by _open
+  return string(
+    (char *) mpad + crypto_box_ZEROBYTES,
+    clen - crypto_box_ZEROBYTES
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/checksum b/nacl/nacl-20110221/crypto_core/hsalsa20/checksum
new file mode 100644
index 00000000..f67bb2e2
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/checksum
@@ -0,0 +1 @@
+28ebe700b5878570702a68740aa131e6fa907e58a3f6915cd183c6db3f7afd7a
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref/api.h b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/api.h
new file mode 100644
index 00000000..73bd8541
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 32
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref/core.c b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/core.c
new file mode 100644
index 00000000..36118da0
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/core.c
@@ -0,0 +1,135 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  x0 -= load_littleendian(c + 0);
+  x5 -= load_littleendian(c + 4);
+  x10 -= load_littleendian(c + 8);
+  x15 -= load_littleendian(c + 12);
+  x6 -= load_littleendian(in + 0);
+  x7 -= load_littleendian(in + 4);
+  x8 -= load_littleendian(in + 8);
+  x9 -= load_littleendian(in + 12);
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x5);
+  store_littleendian(out + 8,x10);
+  store_littleendian(out + 12,x15);
+  store_littleendian(out + 16,x6);
+  store_littleendian(out + 20,x7);
+  store_littleendian(out + 24,x8);
+  store_littleendian(out + 28,x9);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref/implementors b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/api.h b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/api.h
new file mode 100644
index 00000000..73bd8541
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 32
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/core.c b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/core.c
new file mode 100644
index 00000000..9a9a8c7c
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/core.c
@@ -0,0 +1,108 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  int i;
+
+  x0 = load_littleendian(c + 0);
+  x1 = load_littleendian(k + 0);
+  x2 = load_littleendian(k + 4);
+  x3 = load_littleendian(k + 8);
+  x4 = load_littleendian(k + 12);
+  x5 = load_littleendian(c + 4);
+  x6 = load_littleendian(in + 0);
+  x7 = load_littleendian(in + 4);
+  x8 = load_littleendian(in + 8);
+  x9 = load_littleendian(in + 12);
+  x10 = load_littleendian(c + 8);
+  x11 = load_littleendian(k + 16);
+  x12 = load_littleendian(k + 20);
+  x13 = load_littleendian(k + 24);
+  x14 = load_littleendian(k + 28);
+  x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x5);
+  store_littleendian(out + 8,x10);
+  store_littleendian(out + 12,x15);
+  store_littleendian(out + 16,x6);
+  store_littleendian(out + 20,x7);
+  store_littleendian(out + 24,x8);
+  store_littleendian(out + 28,x9);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/implementors b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/hsalsa20/ref2/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_core/hsalsa20/used b/nacl/nacl-20110221/crypto_core/hsalsa20/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_core/measure.c b/nacl/nacl-20110221/crypto_core/measure.c
new file mode 100644
index 00000000..dd7bac81
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/measure.c
@@ -0,0 +1,18 @@
+#include "crypto_core.h"
+
+const char *primitiveimplementation = crypto_core_IMPLEMENTATION;
+const char *implementationversion = crypto_core_VERSION;
+const char *sizenames[] = { "outputbytes", "inputbytes", "keybytes", "constbytes", 0 };
+const long long sizes[] = { crypto_core_OUTPUTBYTES, crypto_core_INPUTBYTES, crypto_core_KEYBYTES, crypto_core_CONSTBYTES };
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+}
+
+void measure(void)
+{
+}
diff --git a/nacl/nacl-20110221/crypto_core/salsa20/checksum b/nacl/nacl-20110221/crypto_core/salsa20/checksum
new file mode 100644
index 00000000..fcf56186
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa20/checksum
@@ -0,0 +1 @@
+9d1ee8d84b974e648507ffd93829376c5b4420751710e44f6593abd8769378011d85ecda51ceb8f43661d3c65ef5b57c4f5bf8df76c8202784c8df8def61e6a6
diff --git a/nacl/nacl-20110221/crypto_core/salsa20/ref/api.h b/nacl/nacl-20110221/crypto_core/salsa20/ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa20/ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/crypto_core/salsa20/ref/core.c b/nacl/nacl-20110221/crypto_core/salsa20/ref/core.c
new file mode 100644
index 00000000..910a0056
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa20/ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 20
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/salsa20/ref/implementors b/nacl/nacl-20110221/crypto_core/salsa20/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa20/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_core/salsa20/used b/nacl/nacl-20110221/crypto_core/salsa20/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_core/salsa2012/checksum b/nacl/nacl-20110221/crypto_core/salsa2012/checksum
new file mode 100644
index 00000000..2f99a8d6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa2012/checksum
@@ -0,0 +1 @@
+f36d643f798efc0fca888d3ac4bdcc54c98a968c2da16bd5b8bfe9fe9025a6ca3a207e9362dc7cf17ddfc7477ee754d3f521b1df91640093754f7275b1a54293
diff --git a/nacl/nacl-20110221/crypto_core/salsa2012/ref/api.h b/nacl/nacl-20110221/crypto_core/salsa2012/ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa2012/ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/crypto_core/salsa2012/ref/core.c b/nacl/nacl-20110221/crypto_core/salsa2012/ref/core.c
new file mode 100644
index 00000000..d4b59e48
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa2012/ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 12
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/salsa2012/ref/implementors b/nacl/nacl-20110221/crypto_core/salsa2012/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa2012/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_core/salsa2012/used b/nacl/nacl-20110221/crypto_core/salsa2012/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_core/salsa208/checksum b/nacl/nacl-20110221/crypto_core/salsa208/checksum
new file mode 100644
index 00000000..a16cb52f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa208/checksum
@@ -0,0 +1 @@
+1e13ea9e74cb36989f7cbf4abc80b29154e1a8b150bd5244951318abea002a93ae9fe2abbcf7217526ac2a85b66c256ba9374b1257eda0c01816da328edfa11a
diff --git a/nacl/nacl-20110221/crypto_core/salsa208/ref/api.h b/nacl/nacl-20110221/crypto_core/salsa208/ref/api.h
new file mode 100644
index 00000000..2a387b6d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa208/ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_OUTPUTBYTES 64
+#define CRYPTO_INPUTBYTES 16
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_CONSTBYTES 16
diff --git a/nacl/nacl-20110221/crypto_core/salsa208/ref/core.c b/nacl/nacl-20110221/crypto_core/salsa208/ref/core.c
new file mode 100644
index 00000000..921e7a86
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa208/ref/core.c
@@ -0,0 +1,134 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core.h"
+
+#define ROUNDS 8
+
+typedef unsigned int uint32;
+
+static uint32 rotate(uint32 u,int c)
+{
+  return (u << c) | (u >> (32 - c));
+}
+
+static uint32 load_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+static void store_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+int crypto_core(
+        unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k,
+  const unsigned char *c
+)
+{
+  uint32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  uint32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
+  int i;
+
+  j0 = x0 = load_littleendian(c + 0);
+  j1 = x1 = load_littleendian(k + 0);
+  j2 = x2 = load_littleendian(k + 4);
+  j3 = x3 = load_littleendian(k + 8);
+  j4 = x4 = load_littleendian(k + 12);
+  j5 = x5 = load_littleendian(c + 4);
+  j6 = x6 = load_littleendian(in + 0);
+  j7 = x7 = load_littleendian(in + 4);
+  j8 = x8 = load_littleendian(in + 8);
+  j9 = x9 = load_littleendian(in + 12);
+  j10 = x10 = load_littleendian(c + 8);
+  j11 = x11 = load_littleendian(k + 16);
+  j12 = x12 = load_littleendian(k + 20);
+  j13 = x13 = load_littleendian(k + 24);
+  j14 = x14 = load_littleendian(k + 28);
+  j15 = x15 = load_littleendian(c + 12);
+
+  for (i = ROUNDS;i > 0;i -= 2) {
+     x4 ^= rotate( x0+x12, 7);
+     x8 ^= rotate( x4+ x0, 9);
+    x12 ^= rotate( x8+ x4,13);
+     x0 ^= rotate(x12+ x8,18);
+     x9 ^= rotate( x5+ x1, 7);
+    x13 ^= rotate( x9+ x5, 9);
+     x1 ^= rotate(x13+ x9,13);
+     x5 ^= rotate( x1+x13,18);
+    x14 ^= rotate(x10+ x6, 7);
+     x2 ^= rotate(x14+x10, 9);
+     x6 ^= rotate( x2+x14,13);
+    x10 ^= rotate( x6+ x2,18);
+     x3 ^= rotate(x15+x11, 7);
+     x7 ^= rotate( x3+x15, 9);
+    x11 ^= rotate( x7+ x3,13);
+    x15 ^= rotate(x11+ x7,18);
+     x1 ^= rotate( x0+ x3, 7);
+     x2 ^= rotate( x1+ x0, 9);
+     x3 ^= rotate( x2+ x1,13);
+     x0 ^= rotate( x3+ x2,18);
+     x6 ^= rotate( x5+ x4, 7);
+     x7 ^= rotate( x6+ x5, 9);
+     x4 ^= rotate( x7+ x6,13);
+     x5 ^= rotate( x4+ x7,18);
+    x11 ^= rotate(x10+ x9, 7);
+     x8 ^= rotate(x11+x10, 9);
+     x9 ^= rotate( x8+x11,13);
+    x10 ^= rotate( x9+ x8,18);
+    x12 ^= rotate(x15+x14, 7);
+    x13 ^= rotate(x12+x15, 9);
+    x14 ^= rotate(x13+x12,13);
+    x15 ^= rotate(x14+x13,18);
+  }
+
+  x0 += j0;
+  x1 += j1;
+  x2 += j2;
+  x3 += j3;
+  x4 += j4;
+  x5 += j5;
+  x6 += j6;
+  x7 += j7;
+  x8 += j8;
+  x9 += j9;
+  x10 += j10;
+  x11 += j11;
+  x12 += j12;
+  x13 += j13;
+  x14 += j14;
+  x15 += j15;
+
+  store_littleendian(out + 0,x0);
+  store_littleendian(out + 4,x1);
+  store_littleendian(out + 8,x2);
+  store_littleendian(out + 12,x3);
+  store_littleendian(out + 16,x4);
+  store_littleendian(out + 20,x5);
+  store_littleendian(out + 24,x6);
+  store_littleendian(out + 28,x7);
+  store_littleendian(out + 32,x8);
+  store_littleendian(out + 36,x9);
+  store_littleendian(out + 40,x10);
+  store_littleendian(out + 44,x11);
+  store_littleendian(out + 48,x12);
+  store_littleendian(out + 52,x13);
+  store_littleendian(out + 56,x14);
+  store_littleendian(out + 60,x15);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/salsa208/ref/implementors b/nacl/nacl-20110221/crypto_core/salsa208/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/salsa208/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_core/salsa208/used b/nacl/nacl-20110221/crypto_core/salsa208/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_core/try.c b/nacl/nacl-20110221/crypto_core/try.c
new file mode 100644
index 00000000..7eb1c677
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_core/try.c
@@ -0,0 +1,116 @@
+/*
+ * crypto_core/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_core.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_core_IMPLEMENTATION;
+
+static unsigned char *h;
+static unsigned char *n;
+static unsigned char *k;
+static unsigned char *c;
+static unsigned char *h2;
+static unsigned char *n2;
+static unsigned char *k2;
+static unsigned char *c2;
+
+#define hlen crypto_core_OUTPUTBYTES
+#define nlen crypto_core_INPUTBYTES
+#define klen crypto_core_KEYBYTES
+#define clen crypto_core_CONSTBYTES
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(hlen);
+  n = alignedcalloc(nlen);
+  k = alignedcalloc(klen);
+  c = alignedcalloc(clen);
+  h2 = alignedcalloc(hlen);
+  n2 = alignedcalloc(nlen + crypto_core_OUTPUTBYTES);
+  k2 = alignedcalloc(klen + crypto_core_OUTPUTBYTES);
+  c2 = alignedcalloc(clen + crypto_core_OUTPUTBYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_core(h,n,k,c);
+}
+
+static unsigned char newbyte(void)
+{
+  unsigned long long x;
+  long long j;
+  x = 8675309;
+  for (j = 0;j < hlen;++j) { x += h[j]; x *= x; x += (x >> 31); }
+  for (j = 0;j < nlen;++j) { x += n[j]; x *= x; x += (x >> 31); }
+  for (j = 0;j < klen;++j) { x += k[j]; x *= x; x += (x >> 31); }
+  for (j = 0;j < clen;++j) { x += c[j]; x *= x; x += (x >> 31); }
+  for (j = 0;j < 100;++j)  { x +=   j ; x *= x; x += (x >> 31); }
+  return x;
+}
+
+char checksum[hlen * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < 100;++i) {
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < 0;++j) n[j] = random();
+    for (j = nlen;j < nlen + 16;++j) n[j] = random();
+    for (j = -16;j < nlen + 16;++j) n2[j] = n[j];
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < 0;++j) c[j] = random();
+    for (j = clen;j < clen + 16;++j) c[j] = random();
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+    if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero";
+    for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_core writes before output";
+    for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_core writes after output";
+    for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_core writes to k";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_core writes to n";
+    for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_core writes to c";
+
+    if (crypto_core(n2,n2,k,c) != 0) return "crypto_core returns nonzero";
+    for (j = 0;j < hlen;++j) if (h[j] != n2[j]) return "crypto_core does not handle n overlap";
+    for (j = 0;j < hlen;++j) n2[j] = n[j];
+    if (crypto_core(k2,n2,k2,c) != 0) return "crypto_core returns nonzero";
+    for (j = 0;j < hlen;++j) if (h[j] != k2[j]) return "crypto_core does not handle k overlap";
+    for (j = 0;j < hlen;++j) k2[j] = k[j];
+    if (crypto_core(c2,n2,k2,c2) != 0) return "crypto_core returns nonzero";
+    for (j = 0;j < hlen;++j) if (h[j] != c2[j]) return "crypto_core does not handle c overlap";
+    for (j = 0;j < hlen;++j) c2[j] = c[j];
+
+    for (j = 0;j < nlen;++j) n[j] = newbyte();
+    if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero";
+    for (j = 0;j < klen;++j) k[j] = newbyte();
+    if (crypto_core(h,n,k,c) != 0) return "crypto_core returns nonzero";
+    for (j = 0;j < clen;++j) c[j] = newbyte();
+  }
+
+  for (i = 0;i < hlen;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_core/wrapper-empty.cpp b/nacl/nacl-20110221/crypto_core/wrapper-empty.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hash/measure.c b/nacl/nacl-20110221/crypto_hash/measure.c
new file mode 100644
index 00000000..cec0404d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/measure.c
@@ -0,0 +1,66 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_hash.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_hash_IMPLEMENTATION;
+const char *implementationversion = crypto_hash_VERSION;
+const char *sizenames[] = { "outputbytes", 0 };
+const long long sizes[] = { crypto_hash_BYTES };
+
+#define MAXTEST_BYTES 4096
+#ifdef SUPERCOP
+#define MGAP 8192
+#else
+#define MGAP 8
+#endif
+
+static unsigned char *h;
+static unsigned char *m;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(crypto_hash_BYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+static void printcycles(long long mlen)
+{
+  int i;
+  for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+  printentry(mlen,"cycles",cycles,TIMINGS);
+}
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) {
+      randombytes(m,mlen);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_hash(h,m,mlen);
+      }
+      printcycles(mlen);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_hash/sha256/checksum b/nacl/nacl-20110221/crypto_hash/sha256/checksum
new file mode 100644
index 00000000..ee52aa30
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha256/checksum
@@ -0,0 +1 @@
+86df8bd202b2a2b5fdc04a7f50a591e43a345849c12fef08d487109648a08e05
diff --git a/nacl/nacl-20110221/crypto_hash/sha256/ref/api.h b/nacl/nacl-20110221/crypto_hash/sha256/ref/api.h
new file mode 100644
index 00000000..ae8c7f6a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha256/ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 32
diff --git a/nacl/nacl-20110221/crypto_hash/sha256/ref/hash.c b/nacl/nacl-20110221/crypto_hash/sha256/ref/hash.c
new file mode 100644
index 00000000..21ce68a0
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha256/ref/hash.c
@@ -0,0 +1,69 @@
+/*
+20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_hashblocks_sha256.h"
+#include "crypto_hash.h"
+
+#define blocks crypto_hashblocks_sha256
+
+typedef unsigned int uint32;
+
+static const char iv[32] = {
+  0x6a,0x09,0xe6,0x67,
+  0xbb,0x67,0xae,0x85,
+  0x3c,0x6e,0xf3,0x72,
+  0xa5,0x4f,0xf5,0x3a,
+  0x51,0x0e,0x52,0x7f,
+  0x9b,0x05,0x68,0x8c,
+  0x1f,0x83,0xd9,0xab,
+  0x5b,0xe0,0xcd,0x19,
+} ;
+
+int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen)
+{
+  unsigned char h[32];
+  unsigned char padded[128];
+  int i;
+  unsigned long long bits = inlen << 3;
+
+  for (i = 0;i < 32;++i) h[i] = iv[i];
+
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 63;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 56) {
+    for (i = inlen + 1;i < 56;++i) padded[i] = 0;
+    padded[56] = bits >> 56;
+    padded[57] = bits >> 48;
+    padded[58] = bits >> 40;
+    padded[59] = bits >> 32;
+    padded[60] = bits >> 24;
+    padded[61] = bits >> 16;
+    padded[62] = bits >> 8;
+    padded[63] = bits;
+    blocks(h,padded,64);
+  } else {
+    for (i = inlen + 1;i < 120;++i) padded[i] = 0;
+    padded[120] = bits >> 56;
+    padded[121] = bits >> 48;
+    padded[122] = bits >> 40;
+    padded[123] = bits >> 32;
+    padded[124] = bits >> 24;
+    padded[125] = bits >> 16;
+    padded[126] = bits >> 8;
+    padded[127] = bits;
+    blocks(h,padded,128);
+  }
+
+  for (i = 0;i < 32;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hash/sha256/ref/implementors b/nacl/nacl-20110221/crypto_hash/sha256/ref/implementors
new file mode 100644
index 00000000..962e7d8e
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha256/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein (wrapper around crypto_hashblocks/sha256)
diff --git a/nacl/nacl-20110221/crypto_hash/sha256/used b/nacl/nacl-20110221/crypto_hash/sha256/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/checksum b/nacl/nacl-20110221/crypto_hash/sha512/checksum
new file mode 100644
index 00000000..edf714e9
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha512/checksum
@@ -0,0 +1 @@
+9a2a989e136a02c3362c98e6e1e0b52fab980a1dafbebe4dd5e44d15d061742e35fb686befd4e33c608d251c96e26c020f90d92bb7ec8a657f79bb8e0b00a473
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/ref/api.h b/nacl/nacl-20110221/crypto_hash/sha512/ref/api.h
new file mode 100644
index 00000000..de9380d7
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha512/ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 64
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/ref/hash.c b/nacl/nacl-20110221/crypto_hash/sha512/ref/hash.c
new file mode 100644
index 00000000..fc4347bb
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha512/ref/hash.c
@@ -0,0 +1,71 @@
+/*
+20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_hashblocks_sha512.h"
+#include "crypto_hash.h"
+
+#define blocks crypto_hashblocks_sha512
+
+static const unsigned char iv[64] = {
+  0x6a,0x09,0xe6,0x67,0xf3,0xbc,0xc9,0x08,
+  0xbb,0x67,0xae,0x85,0x84,0xca,0xa7,0x3b,
+  0x3c,0x6e,0xf3,0x72,0xfe,0x94,0xf8,0x2b,
+  0xa5,0x4f,0xf5,0x3a,0x5f,0x1d,0x36,0xf1,
+  0x51,0x0e,0x52,0x7f,0xad,0xe6,0x82,0xd1,
+  0x9b,0x05,0x68,0x8c,0x2b,0x3e,0x6c,0x1f,
+  0x1f,0x83,0xd9,0xab,0xfb,0x41,0xbd,0x6b,
+  0x5b,0xe0,0xcd,0x19,0x13,0x7e,0x21,0x79
+} ;
+
+typedef unsigned long long uint64;
+
+int crypto_hash(unsigned char *out,const unsigned char *in,unsigned long long inlen)
+{
+  unsigned char h[64];
+  unsigned char padded[256];
+  int i;
+  unsigned long long bytes = inlen;
+
+  for (i = 0;i < 64;++i) h[i] = iv[i];
+
+  blocks(h,in,inlen);
+  in += inlen;
+  inlen &= 127;
+  in -= inlen;
+
+  for (i = 0;i < inlen;++i) padded[i] = in[i];
+  padded[inlen] = 0x80;
+
+  if (inlen < 112) {
+    for (i = inlen + 1;i < 119;++i) padded[i] = 0;
+    padded[119] = bytes >> 61;
+    padded[120] = bytes >> 53;
+    padded[121] = bytes >> 45;
+    padded[122] = bytes >> 37;
+    padded[123] = bytes >> 29;
+    padded[124] = bytes >> 21;
+    padded[125] = bytes >> 13;
+    padded[126] = bytes >> 5;
+    padded[127] = bytes << 3;
+    blocks(h,padded,128);
+  } else {
+    for (i = inlen + 1;i < 247;++i) padded[i] = 0;
+    padded[247] = bytes >> 61;
+    padded[248] = bytes >> 53;
+    padded[249] = bytes >> 45;
+    padded[250] = bytes >> 37;
+    padded[251] = bytes >> 29;
+    padded[252] = bytes >> 21;
+    padded[253] = bytes >> 13;
+    padded[254] = bytes >> 5;
+    padded[255] = bytes << 3;
+    blocks(h,padded,256);
+  }
+
+  for (i = 0;i < 64;++i) out[i] = h[i];
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/ref/implementors b/nacl/nacl-20110221/crypto_hash/sha512/ref/implementors
new file mode 100644
index 00000000..40afca09
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/sha512/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein (wrapper around crypto_hashblocks/sha512)
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/selected b/nacl/nacl-20110221/crypto_hash/sha512/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hash/sha512/used b/nacl/nacl-20110221/crypto_hash/sha512/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hash/try.c b/nacl/nacl-20110221/crypto_hash/try.c
new file mode 100644
index 00000000..fab49c99
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/try.c
@@ -0,0 +1,77 @@
+/*
+ * crypto_hash/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_hash.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_hash_IMPLEMENTATION;
+
+#define MAXTEST_BYTES (10000 + crypto_hash_BYTES)
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *h;
+static unsigned char *h2;
+static unsigned char *m;
+static unsigned char *m2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(crypto_hash_BYTES);
+  h2 = alignedcalloc(crypto_hash_BYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  m2 = alignedcalloc(MAXTEST_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_hash(h,m,TUNE_BYTES);
+}
+
+char checksum[crypto_hash_BYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long hlen = crypto_hash_BYTES;
+    long long mlen = i;
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+    if (crypto_hash(h,m,mlen) != 0) return "crypto_hash returns nonzero";
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_hash writes to input";
+    for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_hash writes before output";
+    for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_hash writes after output";
+    if (crypto_hash(m2,m2,mlen) != 0) return "crypto_hash returns nonzero";
+    for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_hash does not handle overlap";
+    for (j = 0;j < mlen;++j) m[j] ^= h[j % hlen];
+    m[mlen] = h[0];
+  }
+  if (crypto_hash(h,m,CHECKSUM_BYTES) != 0) return "crypto_hash returns nonzero";
+
+  for (i = 0;i < crypto_hash_BYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hash/wrapper-hash.cpp b/nacl/nacl-20110221/crypto_hash/wrapper-hash.cpp
new file mode 100644
index 00000000..4c0fb590
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hash/wrapper-hash.cpp
@@ -0,0 +1,10 @@
+#include <string>
+using std::string;
+#include "crypto_hash.h"
+
+string crypto_hash(const string &m)
+{
+  unsigned char h[crypto_hash_BYTES];
+  crypto_hash(h,(const unsigned char *) m.c_str(),m.size());
+  return string((char *) h,sizeof h);
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/measure.c b/nacl/nacl-20110221/crypto_hashblocks/measure.c
new file mode 100644
index 00000000..145fbbc4
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/measure.c
@@ -0,0 +1,18 @@
+#include "crypto_hashblocks.h"
+
+const char *primitiveimplementation = crypto_hashblocks_IMPLEMENTATION;
+const char *implementationversion = crypto_hashblocks_VERSION;
+const char *sizenames[] = { "statebytes", 0 };
+const long long sizes[] = { crypto_hashblocks_STATEBYTES };
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+}
+
+void measure(void)
+{
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/checksum b/nacl/nacl-20110221/crypto_hashblocks/sha256/checksum
new file mode 100644
index 00000000..edde1d4f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/checksum
@@ -0,0 +1 @@
+69a9dc2464f9593161e462d3dbb634b84f1d68d67d26df29aaa805f9dcd8f656
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/api.h b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/api.h
new file mode 100644
index 00000000..005a4f47
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 32
+#define CRYPTO_BLOCKBYTES 64
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/blocks.c b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/blocks.c
new file mode 100644
index 00000000..4a191501
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/blocks.c
@@ -0,0 +1,228 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned int uint32;
+
+static uint32 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[3]) \
+  | (((uint32) (x[2])) << 8) \
+  | (((uint32) (x[1])) << 16) \
+  | (((uint32) (x[0])) << 24)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint32 u)
+{
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
+#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
+#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
+#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
+
+#define M(w0,w14,w9,w1) w0 += sigma1(w14) + w9 + sigma0(w1);
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(r0,r1,r2,r3,r4,r5,r6,r7,w,k) \
+  r7 += Sigma1(r4) + Ch(r4,r5,r6) + k + w; \
+  r3 += r7; \
+  r7 += Sigma0(r0) + Maj(r0,r1,r2);
+
+#define G(r0,r1,r2,r3,r4,r5,r6,r7,i) \
+  F(r0,r1,r2,r3,r4,r5,r6,r7,w0 ,round[i + 0]) \
+  F(r7,r0,r1,r2,r3,r4,r5,r6,w1 ,round[i + 1]) \
+  F(r6,r7,r0,r1,r2,r3,r4,r5,w2 ,round[i + 2]) \
+  F(r5,r6,r7,r0,r1,r2,r3,r4,w3 ,round[i + 3]) \
+  F(r4,r5,r6,r7,r0,r1,r2,r3,w4 ,round[i + 4]) \
+  F(r3,r4,r5,r6,r7,r0,r1,r2,w5 ,round[i + 5]) \
+  F(r2,r3,r4,r5,r6,r7,r0,r1,w6 ,round[i + 6]) \
+  F(r1,r2,r3,r4,r5,r6,r7,r0,w7 ,round[i + 7]) \
+  F(r0,r1,r2,r3,r4,r5,r6,r7,w8 ,round[i + 8]) \
+  F(r7,r0,r1,r2,r3,r4,r5,r6,w9 ,round[i + 9]) \
+  F(r6,r7,r0,r1,r2,r3,r4,r5,w10,round[i + 10]) \
+  F(r5,r6,r7,r0,r1,r2,r3,r4,w11,round[i + 11]) \
+  F(r4,r5,r6,r7,r0,r1,r2,r3,w12,round[i + 12]) \
+  F(r3,r4,r5,r6,r7,r0,r1,r2,w13,round[i + 13]) \
+  F(r2,r3,r4,r5,r6,r7,r0,r1,w14,round[i + 14]) \
+  F(r1,r2,r3,r4,r5,r6,r7,r0,w15,round[i + 15])
+
+static const uint32 round[64] = {
+  0x428a2f98
+, 0x71374491
+, 0xb5c0fbcf
+, 0xe9b5dba5
+, 0x3956c25b
+, 0x59f111f1
+, 0x923f82a4
+, 0xab1c5ed5
+, 0xd807aa98
+, 0x12835b01
+, 0x243185be
+, 0x550c7dc3
+, 0x72be5d74
+, 0x80deb1fe
+, 0x9bdc06a7
+, 0xc19bf174
+, 0xe49b69c1
+, 0xefbe4786
+, 0x0fc19dc6
+, 0x240ca1cc
+, 0x2de92c6f
+, 0x4a7484aa
+, 0x5cb0a9dc
+, 0x76f988da
+, 0x983e5152
+, 0xa831c66d
+, 0xb00327c8
+, 0xbf597fc7
+, 0xc6e00bf3
+, 0xd5a79147
+, 0x06ca6351
+, 0x14292967
+, 0x27b70a85
+, 0x2e1b2138
+, 0x4d2c6dfc
+, 0x53380d13
+, 0x650a7354
+, 0x766a0abb
+, 0x81c2c92e
+, 0x92722c85
+, 0xa2bfe8a1
+, 0xa81a664b
+, 0xc24b8b70
+, 0xc76c51a3
+, 0xd192e819
+, 0xd6990624
+, 0xf40e3585
+, 0x106aa070
+, 0x19a4c116
+, 0x1e376c08
+, 0x2748774c
+, 0x34b0bcb5
+, 0x391c0cb3
+, 0x4ed8aa4a
+, 0x5b9cca4f
+, 0x682e6ff3
+, 0x748f82ee
+, 0x78a5636f
+, 0x84c87814
+, 0x8cc70208
+, 0x90befffa
+, 0xa4506ceb
+, 0xbef9a3f7
+, 0xc67178f2
+} ;
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint32 state[8];
+  uint32 r0;
+  uint32 r1;
+  uint32 r2;
+  uint32 r3;
+  uint32 r4;
+  uint32 r5;
+  uint32 r6;
+  uint32 r7;
+
+  r0 = load_bigendian(statebytes +  0); state[0] = r0;
+  r1 = load_bigendian(statebytes +  4); state[1] = r1;
+  r2 = load_bigendian(statebytes +  8); state[2] = r2;
+  r3 = load_bigendian(statebytes + 12); state[3] = r3;
+  r4 = load_bigendian(statebytes + 16); state[4] = r4;
+  r5 = load_bigendian(statebytes + 20); state[5] = r5;
+  r6 = load_bigendian(statebytes + 24); state[6] = r6;
+  r7 = load_bigendian(statebytes + 28); state[7] = r7;
+
+  while (inlen >= 64) {
+    uint32 w0  = load_bigendian(in +  0);
+    uint32 w1  = load_bigendian(in +  4);
+    uint32 w2  = load_bigendian(in +  8);
+    uint32 w3  = load_bigendian(in + 12);
+    uint32 w4  = load_bigendian(in + 16);
+    uint32 w5  = load_bigendian(in + 20);
+    uint32 w6  = load_bigendian(in + 24);
+    uint32 w7  = load_bigendian(in + 28);
+    uint32 w8  = load_bigendian(in + 32);
+    uint32 w9  = load_bigendian(in + 36);
+    uint32 w10 = load_bigendian(in + 40);
+    uint32 w11 = load_bigendian(in + 44);
+    uint32 w12 = load_bigendian(in + 48);
+    uint32 w13 = load_bigendian(in + 52);
+    uint32 w14 = load_bigendian(in + 56);
+    uint32 w15 = load_bigendian(in + 60);
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,0)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,16)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,32)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,48)
+
+    r0 += state[0];
+    r1 += state[1];
+    r2 += state[2];
+    r3 += state[3];
+    r4 += state[4];
+    r5 += state[5];
+    r6 += state[6];
+    r7 += state[7];
+  
+    state[0] = r0;
+    state[1] = r1;
+    state[2] = r2;
+    state[3] = r3;
+    state[4] = r4;
+    state[5] = r5;
+    state[6] = r6;
+    state[7] = r7;
+
+    in += 64;
+    inlen -= 64;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  4,state[1]);
+  store_bigendian(statebytes +  8,state[2]);
+  store_bigendian(statebytes + 12,state[3]);
+  store_bigendian(statebytes + 16,state[4]);
+  store_bigendian(statebytes + 20,state[5]);
+  store_bigendian(statebytes + 24,state[6]);
+  store_bigendian(statebytes + 28,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/implementors b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/inplace/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/api.h b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/api.h
new file mode 100644
index 00000000..005a4f47
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 32
+#define CRYPTO_BLOCKBYTES 64
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/blocks.c b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/blocks.c
new file mode 100644
index 00000000..ad977945
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/blocks.c
@@ -0,0 +1,212 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned int uint32;
+
+static uint32 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[3]) \
+  | (((uint32) (x[2])) << 8) \
+  | (((uint32) (x[1])) << 16) \
+  | (((uint32) (x[0])) << 24)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint32 u)
+{
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (32 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x, 2) ^ ROTR(x,13) ^ ROTR(x,22))
+#define Sigma1(x) (ROTR(x, 6) ^ ROTR(x,11) ^ ROTR(x,25))
+#define sigma0(x) (ROTR(x, 7) ^ ROTR(x,18) ^ SHR(x, 3))
+#define sigma1(x) (ROTR(x,17) ^ ROTR(x,19) ^ SHR(x,10))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(w,k) \
+  T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \
+  T2 = Sigma0(a) + Maj(a,b,c); \
+  h = g; \
+  g = f; \
+  f = e; \
+  e = d + T1; \
+  d = c; \
+  c = b; \
+  b = a; \
+  a = T1 + T2;
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint32 state[8];
+  uint32 a;
+  uint32 b;
+  uint32 c;
+  uint32 d;
+  uint32 e;
+  uint32 f;
+  uint32 g;
+  uint32 h;
+  uint32 T1;
+  uint32 T2;
+
+  a = load_bigendian(statebytes +  0); state[0] = a;
+  b = load_bigendian(statebytes +  4); state[1] = b;
+  c = load_bigendian(statebytes +  8); state[2] = c;
+  d = load_bigendian(statebytes + 12); state[3] = d;
+  e = load_bigendian(statebytes + 16); state[4] = e;
+  f = load_bigendian(statebytes + 20); state[5] = f;
+  g = load_bigendian(statebytes + 24); state[6] = g;
+  h = load_bigendian(statebytes + 28); state[7] = h;
+
+  while (inlen >= 64) {
+    uint32 w0  = load_bigendian(in +  0);
+    uint32 w1  = load_bigendian(in +  4);
+    uint32 w2  = load_bigendian(in +  8);
+    uint32 w3  = load_bigendian(in + 12);
+    uint32 w4  = load_bigendian(in + 16);
+    uint32 w5  = load_bigendian(in + 20);
+    uint32 w6  = load_bigendian(in + 24);
+    uint32 w7  = load_bigendian(in + 28);
+    uint32 w8  = load_bigendian(in + 32);
+    uint32 w9  = load_bigendian(in + 36);
+    uint32 w10 = load_bigendian(in + 40);
+    uint32 w11 = load_bigendian(in + 44);
+    uint32 w12 = load_bigendian(in + 48);
+    uint32 w13 = load_bigendian(in + 52);
+    uint32 w14 = load_bigendian(in + 56);
+    uint32 w15 = load_bigendian(in + 60);
+
+    F(w0 ,0x428a2f98)
+    F(w1 ,0x71374491)
+    F(w2 ,0xb5c0fbcf)
+    F(w3 ,0xe9b5dba5)
+    F(w4 ,0x3956c25b)
+    F(w5 ,0x59f111f1)
+    F(w6 ,0x923f82a4)
+    F(w7 ,0xab1c5ed5)
+    F(w8 ,0xd807aa98)
+    F(w9 ,0x12835b01)
+    F(w10,0x243185be)
+    F(w11,0x550c7dc3)
+    F(w12,0x72be5d74)
+    F(w13,0x80deb1fe)
+    F(w14,0x9bdc06a7)
+    F(w15,0xc19bf174)
+
+    EXPAND
+
+    F(w0 ,0xe49b69c1)
+    F(w1 ,0xefbe4786)
+    F(w2 ,0x0fc19dc6)
+    F(w3 ,0x240ca1cc)
+    F(w4 ,0x2de92c6f)
+    F(w5 ,0x4a7484aa)
+    F(w6 ,0x5cb0a9dc)
+    F(w7 ,0x76f988da)
+    F(w8 ,0x983e5152)
+    F(w9 ,0xa831c66d)
+    F(w10,0xb00327c8)
+    F(w11,0xbf597fc7)
+    F(w12,0xc6e00bf3)
+    F(w13,0xd5a79147)
+    F(w14,0x06ca6351)
+    F(w15,0x14292967)
+
+    EXPAND
+
+    F(w0 ,0x27b70a85)
+    F(w1 ,0x2e1b2138)
+    F(w2 ,0x4d2c6dfc)
+    F(w3 ,0x53380d13)
+    F(w4 ,0x650a7354)
+    F(w5 ,0x766a0abb)
+    F(w6 ,0x81c2c92e)
+    F(w7 ,0x92722c85)
+    F(w8 ,0xa2bfe8a1)
+    F(w9 ,0xa81a664b)
+    F(w10,0xc24b8b70)
+    F(w11,0xc76c51a3)
+    F(w12,0xd192e819)
+    F(w13,0xd6990624)
+    F(w14,0xf40e3585)
+    F(w15,0x106aa070)
+
+    EXPAND
+
+    F(w0 ,0x19a4c116)
+    F(w1 ,0x1e376c08)
+    F(w2 ,0x2748774c)
+    F(w3 ,0x34b0bcb5)
+    F(w4 ,0x391c0cb3)
+    F(w5 ,0x4ed8aa4a)
+    F(w6 ,0x5b9cca4f)
+    F(w7 ,0x682e6ff3)
+    F(w8 ,0x748f82ee)
+    F(w9 ,0x78a5636f)
+    F(w10,0x84c87814)
+    F(w11,0x8cc70208)
+    F(w12,0x90befffa)
+    F(w13,0xa4506ceb)
+    F(w14,0xbef9a3f7)
+    F(w15,0xc67178f2)
+
+    a += state[0];
+    b += state[1];
+    c += state[2];
+    d += state[3];
+    e += state[4];
+    f += state[5];
+    g += state[6];
+    h += state[7];
+  
+    state[0] = a;
+    state[1] = b;
+    state[2] = c;
+    state[3] = d;
+    state[4] = e;
+    state[5] = f;
+    state[6] = g;
+    state[7] = h;
+
+    in += 64;
+    inlen -= 64;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  4,state[1]);
+  store_bigendian(statebytes +  8,state[2]);
+  store_bigendian(statebytes + 12,state[3]);
+  store_bigendian(statebytes + 16,state[4]);
+  store_bigendian(statebytes + 20,state[5]);
+  store_bigendian(statebytes + 24,state[6]);
+  store_bigendian(statebytes + 28,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/implementors b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha256/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha256/used b/nacl/nacl-20110221/crypto_hashblocks/sha256/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/checksum b/nacl/nacl-20110221/crypto_hashblocks/sha512/checksum
new file mode 100644
index 00000000..ed5245ec
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/checksum
@@ -0,0 +1 @@
+f005c91634ae549f0dd4529ddbaf07038cb75a59b818cd1d4eb4e2b4019ab6733556131f320c4a145c735a22594581d454cccb15c18bf198ffcb2da29fe39456
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/api.h b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/api.h
new file mode 100644
index 00000000..ac45d103
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 64
+#define CRYPTO_BLOCKBYTES 128
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/blocks.c b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/blocks.c
new file mode 100644
index 00000000..93791b69
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/blocks.c
@@ -0,0 +1,256 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned long long uint64;
+
+static uint64 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint64) (x[7]) \
+  | (((uint64) (x[6])) << 8) \
+  | (((uint64) (x[5])) << 16) \
+  | (((uint64) (x[4])) << 24) \
+  | (((uint64) (x[3])) << 32) \
+  | (((uint64) (x[2])) << 40) \
+  | (((uint64) (x[1])) << 48) \
+  | (((uint64) (x[0])) << 56)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint64 u)
+{
+  x[7] = u; u >>= 8;
+  x[6] = u; u >>= 8;
+  x[5] = u; u >>= 8;
+  x[4] = u; u >>= 8;
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
+#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
+#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
+#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(r0,r1,r2,r3,r4,r5,r6,r7,w,k) \
+  r7 += Sigma1(r4) + Ch(r4,r5,r6) + k + w; \
+  r3 += r7; \
+  r7 += Sigma0(r0) + Maj(r0,r1,r2);
+
+#define G(r0,r1,r2,r3,r4,r5,r6,r7,i) \
+  F(r0,r1,r2,r3,r4,r5,r6,r7,w0 ,round[i + 0]) \
+  F(r7,r0,r1,r2,r3,r4,r5,r6,w1 ,round[i + 1]) \
+  F(r6,r7,r0,r1,r2,r3,r4,r5,w2 ,round[i + 2]) \
+  F(r5,r6,r7,r0,r1,r2,r3,r4,w3 ,round[i + 3]) \
+  F(r4,r5,r6,r7,r0,r1,r2,r3,w4 ,round[i + 4]) \
+  F(r3,r4,r5,r6,r7,r0,r1,r2,w5 ,round[i + 5]) \
+  F(r2,r3,r4,r5,r6,r7,r0,r1,w6 ,round[i + 6]) \
+  F(r1,r2,r3,r4,r5,r6,r7,r0,w7 ,round[i + 7]) \
+  F(r0,r1,r2,r3,r4,r5,r6,r7,w8 ,round[i + 8]) \
+  F(r7,r0,r1,r2,r3,r4,r5,r6,w9 ,round[i + 9]) \
+  F(r6,r7,r0,r1,r2,r3,r4,r5,w10,round[i + 10]) \
+  F(r5,r6,r7,r0,r1,r2,r3,r4,w11,round[i + 11]) \
+  F(r4,r5,r6,r7,r0,r1,r2,r3,w12,round[i + 12]) \
+  F(r3,r4,r5,r6,r7,r0,r1,r2,w13,round[i + 13]) \
+  F(r2,r3,r4,r5,r6,r7,r0,r1,w14,round[i + 14]) \
+  F(r1,r2,r3,r4,r5,r6,r7,r0,w15,round[i + 15])
+
+static const uint64 round[80] = {
+  0x428a2f98d728ae22ULL
+, 0x7137449123ef65cdULL
+, 0xb5c0fbcfec4d3b2fULL
+, 0xe9b5dba58189dbbcULL
+, 0x3956c25bf348b538ULL
+, 0x59f111f1b605d019ULL
+, 0x923f82a4af194f9bULL
+, 0xab1c5ed5da6d8118ULL
+, 0xd807aa98a3030242ULL
+, 0x12835b0145706fbeULL
+, 0x243185be4ee4b28cULL
+, 0x550c7dc3d5ffb4e2ULL
+, 0x72be5d74f27b896fULL
+, 0x80deb1fe3b1696b1ULL
+, 0x9bdc06a725c71235ULL
+, 0xc19bf174cf692694ULL
+, 0xe49b69c19ef14ad2ULL
+, 0xefbe4786384f25e3ULL
+, 0x0fc19dc68b8cd5b5ULL
+, 0x240ca1cc77ac9c65ULL
+, 0x2de92c6f592b0275ULL
+, 0x4a7484aa6ea6e483ULL
+, 0x5cb0a9dcbd41fbd4ULL
+, 0x76f988da831153b5ULL
+, 0x983e5152ee66dfabULL
+, 0xa831c66d2db43210ULL
+, 0xb00327c898fb213fULL
+, 0xbf597fc7beef0ee4ULL
+, 0xc6e00bf33da88fc2ULL
+, 0xd5a79147930aa725ULL
+, 0x06ca6351e003826fULL
+, 0x142929670a0e6e70ULL
+, 0x27b70a8546d22ffcULL
+, 0x2e1b21385c26c926ULL
+, 0x4d2c6dfc5ac42aedULL
+, 0x53380d139d95b3dfULL
+, 0x650a73548baf63deULL
+, 0x766a0abb3c77b2a8ULL
+, 0x81c2c92e47edaee6ULL
+, 0x92722c851482353bULL
+, 0xa2bfe8a14cf10364ULL
+, 0xa81a664bbc423001ULL
+, 0xc24b8b70d0f89791ULL
+, 0xc76c51a30654be30ULL
+, 0xd192e819d6ef5218ULL
+, 0xd69906245565a910ULL
+, 0xf40e35855771202aULL
+, 0x106aa07032bbd1b8ULL
+, 0x19a4c116b8d2d0c8ULL
+, 0x1e376c085141ab53ULL
+, 0x2748774cdf8eeb99ULL
+, 0x34b0bcb5e19b48a8ULL
+, 0x391c0cb3c5c95a63ULL
+, 0x4ed8aa4ae3418acbULL
+, 0x5b9cca4f7763e373ULL
+, 0x682e6ff3d6b2b8a3ULL
+, 0x748f82ee5defb2fcULL
+, 0x78a5636f43172f60ULL
+, 0x84c87814a1f0ab72ULL
+, 0x8cc702081a6439ecULL
+, 0x90befffa23631e28ULL
+, 0xa4506cebde82bde9ULL
+, 0xbef9a3f7b2c67915ULL
+, 0xc67178f2e372532bULL
+, 0xca273eceea26619cULL
+, 0xd186b8c721c0c207ULL
+, 0xeada7dd6cde0eb1eULL
+, 0xf57d4f7fee6ed178ULL
+, 0x06f067aa72176fbaULL
+, 0x0a637dc5a2c898a6ULL
+, 0x113f9804bef90daeULL
+, 0x1b710b35131c471bULL
+, 0x28db77f523047d84ULL
+, 0x32caab7b40c72493ULL
+, 0x3c9ebe0a15c9bebcULL
+, 0x431d67c49c100d4cULL
+, 0x4cc5d4becb3e42b6ULL
+, 0x597f299cfc657e2aULL
+, 0x5fcb6fab3ad6faecULL
+, 0x6c44198c4a475817ULL
+};
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint64 state[8];
+  uint64 r0;
+  uint64 r1;
+  uint64 r2;
+  uint64 r3;
+  uint64 r4;
+  uint64 r5;
+  uint64 r6;
+  uint64 r7;
+
+  r0 = load_bigendian(statebytes +  0); state[0] = r0;
+  r1 = load_bigendian(statebytes +  8); state[1] = r1;
+  r2 = load_bigendian(statebytes + 16); state[2] = r2;
+  r3 = load_bigendian(statebytes + 24); state[3] = r3;
+  r4 = load_bigendian(statebytes + 32); state[4] = r4;
+  r5 = load_bigendian(statebytes + 40); state[5] = r5;
+  r6 = load_bigendian(statebytes + 48); state[6] = r6;
+  r7 = load_bigendian(statebytes + 56); state[7] = r7;
+
+  while (inlen >= 128) {
+    uint64 w0  = load_bigendian(in +   0);
+    uint64 w1  = load_bigendian(in +   8);
+    uint64 w2  = load_bigendian(in +  16);
+    uint64 w3  = load_bigendian(in +  24);
+    uint64 w4  = load_bigendian(in +  32);
+    uint64 w5  = load_bigendian(in +  40);
+    uint64 w6  = load_bigendian(in +  48);
+    uint64 w7  = load_bigendian(in +  56);
+    uint64 w8  = load_bigendian(in +  64);
+    uint64 w9  = load_bigendian(in +  72);
+    uint64 w10 = load_bigendian(in +  80);
+    uint64 w11 = load_bigendian(in +  88);
+    uint64 w12 = load_bigendian(in +  96);
+    uint64 w13 = load_bigendian(in + 104);
+    uint64 w14 = load_bigendian(in + 112);
+    uint64 w15 = load_bigendian(in + 120);
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,0)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,16)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,32)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,48)
+
+    EXPAND
+
+    G(r0,r1,r2,r3,r4,r5,r6,r7,64)
+
+    r0 += state[0];
+    r1 += state[1];
+    r2 += state[2];
+    r3 += state[3];
+    r4 += state[4];
+    r5 += state[5];
+    r6 += state[6];
+    r7 += state[7];
+  
+    state[0] = r0;
+    state[1] = r1;
+    state[2] = r2;
+    state[3] = r3;
+    state[4] = r4;
+    state[5] = r5;
+    state[6] = r6;
+    state[7] = r7;
+
+    in += 128;
+    inlen -= 128;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  8,state[1]);
+  store_bigendian(statebytes + 16,state[2]);
+  store_bigendian(statebytes + 24,state[3]);
+  store_bigendian(statebytes + 32,state[4]);
+  store_bigendian(statebytes + 40,state[5]);
+  store_bigendian(statebytes + 48,state[6]);
+  store_bigendian(statebytes + 56,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/implementors b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/inplace/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/api.h b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/api.h
new file mode 100644
index 00000000..ac45d103
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_STATEBYTES 64
+#define CRYPTO_BLOCKBYTES 128
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/blocks.c b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/blocks.c
new file mode 100644
index 00000000..f8fae491
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/blocks.c
@@ -0,0 +1,239 @@
+#include "crypto_hashblocks.h"
+
+typedef unsigned long long uint64;
+
+static uint64 load_bigendian(const unsigned char *x)
+{
+  return
+      (uint64) (x[7]) \
+  | (((uint64) (x[6])) << 8) \
+  | (((uint64) (x[5])) << 16) \
+  | (((uint64) (x[4])) << 24) \
+  | (((uint64) (x[3])) << 32) \
+  | (((uint64) (x[2])) << 40) \
+  | (((uint64) (x[1])) << 48) \
+  | (((uint64) (x[0])) << 56)
+  ;
+}
+
+static void store_bigendian(unsigned char *x,uint64 u)
+{
+  x[7] = u; u >>= 8;
+  x[6] = u; u >>= 8;
+  x[5] = u; u >>= 8;
+  x[4] = u; u >>= 8;
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+#define SHR(x,c) ((x) >> (c))
+#define ROTR(x,c) (((x) >> (c)) | ((x) << (64 - (c))))
+
+#define Ch(x,y,z) ((x & y) ^ (~x & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) (ROTR(x,28) ^ ROTR(x,34) ^ ROTR(x,39))
+#define Sigma1(x) (ROTR(x,14) ^ ROTR(x,18) ^ ROTR(x,41))
+#define sigma0(x) (ROTR(x, 1) ^ ROTR(x, 8) ^ SHR(x,7))
+#define sigma1(x) (ROTR(x,19) ^ ROTR(x,61) ^ SHR(x,6))
+
+#define M(w0,w14,w9,w1) w0 = sigma1(w14) + w9 + sigma0(w1) + w0;
+
+#define EXPAND \
+  M(w0 ,w14,w9 ,w1 ) \
+  M(w1 ,w15,w10,w2 ) \
+  M(w2 ,w0 ,w11,w3 ) \
+  M(w3 ,w1 ,w12,w4 ) \
+  M(w4 ,w2 ,w13,w5 ) \
+  M(w5 ,w3 ,w14,w6 ) \
+  M(w6 ,w4 ,w15,w7 ) \
+  M(w7 ,w5 ,w0 ,w8 ) \
+  M(w8 ,w6 ,w1 ,w9 ) \
+  M(w9 ,w7 ,w2 ,w10) \
+  M(w10,w8 ,w3 ,w11) \
+  M(w11,w9 ,w4 ,w12) \
+  M(w12,w10,w5 ,w13) \
+  M(w13,w11,w6 ,w14) \
+  M(w14,w12,w7 ,w15) \
+  M(w15,w13,w8 ,w0 )
+
+#define F(w,k) \
+  T1 = h + Sigma1(e) + Ch(e,f,g) + k + w; \
+  T2 = Sigma0(a) + Maj(a,b,c); \
+  h = g; \
+  g = f; \
+  f = e; \
+  e = d + T1; \
+  d = c; \
+  c = b; \
+  b = a; \
+  a = T1 + T2;
+
+int crypto_hashblocks(unsigned char *statebytes,const unsigned char *in,unsigned long long inlen)
+{
+  uint64 state[8];
+  uint64 a;
+  uint64 b;
+  uint64 c;
+  uint64 d;
+  uint64 e;
+  uint64 f;
+  uint64 g;
+  uint64 h;
+  uint64 T1;
+  uint64 T2;
+
+  a = load_bigendian(statebytes +  0); state[0] = a;
+  b = load_bigendian(statebytes +  8); state[1] = b;
+  c = load_bigendian(statebytes + 16); state[2] = c;
+  d = load_bigendian(statebytes + 24); state[3] = d;
+  e = load_bigendian(statebytes + 32); state[4] = e;
+  f = load_bigendian(statebytes + 40); state[5] = f;
+  g = load_bigendian(statebytes + 48); state[6] = g;
+  h = load_bigendian(statebytes + 56); state[7] = h;
+
+  while (inlen >= 128) {
+    uint64 w0  = load_bigendian(in +   0);
+    uint64 w1  = load_bigendian(in +   8);
+    uint64 w2  = load_bigendian(in +  16);
+    uint64 w3  = load_bigendian(in +  24);
+    uint64 w4  = load_bigendian(in +  32);
+    uint64 w5  = load_bigendian(in +  40);
+    uint64 w6  = load_bigendian(in +  48);
+    uint64 w7  = load_bigendian(in +  56);
+    uint64 w8  = load_bigendian(in +  64);
+    uint64 w9  = load_bigendian(in +  72);
+    uint64 w10 = load_bigendian(in +  80);
+    uint64 w11 = load_bigendian(in +  88);
+    uint64 w12 = load_bigendian(in +  96);
+    uint64 w13 = load_bigendian(in + 104);
+    uint64 w14 = load_bigendian(in + 112);
+    uint64 w15 = load_bigendian(in + 120);
+
+    F(w0 ,0x428a2f98d728ae22ULL)
+    F(w1 ,0x7137449123ef65cdULL)
+    F(w2 ,0xb5c0fbcfec4d3b2fULL)
+    F(w3 ,0xe9b5dba58189dbbcULL)
+    F(w4 ,0x3956c25bf348b538ULL)
+    F(w5 ,0x59f111f1b605d019ULL)
+    F(w6 ,0x923f82a4af194f9bULL)
+    F(w7 ,0xab1c5ed5da6d8118ULL)
+    F(w8 ,0xd807aa98a3030242ULL)
+    F(w9 ,0x12835b0145706fbeULL)
+    F(w10,0x243185be4ee4b28cULL)
+    F(w11,0x550c7dc3d5ffb4e2ULL)
+    F(w12,0x72be5d74f27b896fULL)
+    F(w13,0x80deb1fe3b1696b1ULL)
+    F(w14,0x9bdc06a725c71235ULL)
+    F(w15,0xc19bf174cf692694ULL)
+
+    EXPAND
+
+    F(w0 ,0xe49b69c19ef14ad2ULL)
+    F(w1 ,0xefbe4786384f25e3ULL)
+    F(w2 ,0x0fc19dc68b8cd5b5ULL)
+    F(w3 ,0x240ca1cc77ac9c65ULL)
+    F(w4 ,0x2de92c6f592b0275ULL)
+    F(w5 ,0x4a7484aa6ea6e483ULL)
+    F(w6 ,0x5cb0a9dcbd41fbd4ULL)
+    F(w7 ,0x76f988da831153b5ULL)
+    F(w8 ,0x983e5152ee66dfabULL)
+    F(w9 ,0xa831c66d2db43210ULL)
+    F(w10,0xb00327c898fb213fULL)
+    F(w11,0xbf597fc7beef0ee4ULL)
+    F(w12,0xc6e00bf33da88fc2ULL)
+    F(w13,0xd5a79147930aa725ULL)
+    F(w14,0x06ca6351e003826fULL)
+    F(w15,0x142929670a0e6e70ULL)
+
+    EXPAND
+
+    F(w0 ,0x27b70a8546d22ffcULL)
+    F(w1 ,0x2e1b21385c26c926ULL)
+    F(w2 ,0x4d2c6dfc5ac42aedULL)
+    F(w3 ,0x53380d139d95b3dfULL)
+    F(w4 ,0x650a73548baf63deULL)
+    F(w5 ,0x766a0abb3c77b2a8ULL)
+    F(w6 ,0x81c2c92e47edaee6ULL)
+    F(w7 ,0x92722c851482353bULL)
+    F(w8 ,0xa2bfe8a14cf10364ULL)
+    F(w9 ,0xa81a664bbc423001ULL)
+    F(w10,0xc24b8b70d0f89791ULL)
+    F(w11,0xc76c51a30654be30ULL)
+    F(w12,0xd192e819d6ef5218ULL)
+    F(w13,0xd69906245565a910ULL)
+    F(w14,0xf40e35855771202aULL)
+    F(w15,0x106aa07032bbd1b8ULL)
+
+    EXPAND
+
+    F(w0 ,0x19a4c116b8d2d0c8ULL)
+    F(w1 ,0x1e376c085141ab53ULL)
+    F(w2 ,0x2748774cdf8eeb99ULL)
+    F(w3 ,0x34b0bcb5e19b48a8ULL)
+    F(w4 ,0x391c0cb3c5c95a63ULL)
+    F(w5 ,0x4ed8aa4ae3418acbULL)
+    F(w6 ,0x5b9cca4f7763e373ULL)
+    F(w7 ,0x682e6ff3d6b2b8a3ULL)
+    F(w8 ,0x748f82ee5defb2fcULL)
+    F(w9 ,0x78a5636f43172f60ULL)
+    F(w10,0x84c87814a1f0ab72ULL)
+    F(w11,0x8cc702081a6439ecULL)
+    F(w12,0x90befffa23631e28ULL)
+    F(w13,0xa4506cebde82bde9ULL)
+    F(w14,0xbef9a3f7b2c67915ULL)
+    F(w15,0xc67178f2e372532bULL)
+
+    EXPAND
+
+    F(w0 ,0xca273eceea26619cULL)
+    F(w1 ,0xd186b8c721c0c207ULL)
+    F(w2 ,0xeada7dd6cde0eb1eULL)
+    F(w3 ,0xf57d4f7fee6ed178ULL)
+    F(w4 ,0x06f067aa72176fbaULL)
+    F(w5 ,0x0a637dc5a2c898a6ULL)
+    F(w6 ,0x113f9804bef90daeULL)
+    F(w7 ,0x1b710b35131c471bULL)
+    F(w8 ,0x28db77f523047d84ULL)
+    F(w9 ,0x32caab7b40c72493ULL)
+    F(w10,0x3c9ebe0a15c9bebcULL)
+    F(w11,0x431d67c49c100d4cULL)
+    F(w12,0x4cc5d4becb3e42b6ULL)
+    F(w13,0x597f299cfc657e2aULL)
+    F(w14,0x5fcb6fab3ad6faecULL)
+    F(w15,0x6c44198c4a475817ULL)
+
+    a += state[0];
+    b += state[1];
+    c += state[2];
+    d += state[3];
+    e += state[4];
+    f += state[5];
+    g += state[6];
+    h += state[7];
+  
+    state[0] = a;
+    state[1] = b;
+    state[2] = c;
+    state[3] = d;
+    state[4] = e;
+    state[5] = f;
+    state[6] = g;
+    state[7] = h;
+
+    in += 128;
+    inlen -= 128;
+  }
+
+  store_bigendian(statebytes +  0,state[0]);
+  store_bigendian(statebytes +  8,state[1]);
+  store_bigendian(statebytes + 16,state[2]);
+  store_bigendian(statebytes + 24,state[3]);
+  store_bigendian(statebytes + 32,state[4]);
+  store_bigendian(statebytes + 40,state[5]);
+  store_bigendian(statebytes + 48,state[6]);
+  store_bigendian(statebytes + 56,state[7]);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/implementors b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/sha512/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/selected b/nacl/nacl-20110221/crypto_hashblocks/sha512/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hashblocks/sha512/used b/nacl/nacl-20110221/crypto_hashblocks/sha512/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_hashblocks/try.c b/nacl/nacl-20110221/crypto_hashblocks/try.c
new file mode 100644
index 00000000..720d2fb3
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_hashblocks/try.c
@@ -0,0 +1,79 @@
+/*
+ * crypto_hashblocks/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_hashblocks.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_hashblocks_IMPLEMENTATION;
+
+#define MAXTEST_BYTES (10000 + crypto_hashblocks_STATEBYTES)
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *h;
+static unsigned char *h2;
+static unsigned char *m;
+static unsigned char *m2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(crypto_hashblocks_STATEBYTES);
+  h2 = alignedcalloc(crypto_hashblocks_STATEBYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  m2 = alignedcalloc(MAXTEST_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_hashblocks(h,m,TUNE_BYTES);
+}
+
+char checksum[crypto_hashblocks_STATEBYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long hlen = crypto_hashblocks_STATEBYTES;
+    long long mlen = i;
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+    if (crypto_hashblocks(h,m,mlen) != 0) return "crypto_hashblocks returns nonzero";
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_hashblocks writes to input";
+    for (j = -16;j < 0;++j) if (h2[j] != h[j]) return "crypto_hashblocks writes before output";
+    for (j = hlen;j < hlen + 16;++j) if (h2[j] != h[j]) return "crypto_hashblocks writes after output";
+    for (j = 0;j < hlen;++j) m2[j] = h2[j];
+    if (crypto_hashblocks(h2,m2,mlen) != 0) return "crypto_hashblocks returns nonzero";
+    if (crypto_hashblocks(m2,m2,mlen) != 0) return "crypto_hashblocks returns nonzero";
+    for (j = 0;j < hlen;++j) if (m2[j] != h2[j]) return "crypto_hashblocks does not handle overlap";
+    for (j = 0;j < mlen;++j) m[j] ^= h[j % hlen];
+    m[mlen] = h[0];
+  }
+  if (crypto_hashblocks(h,m,CHECKSUM_BYTES) != 0) return "crypto_hashblocks returns nonzero";
+
+  for (i = 0;i < crypto_hashblocks_STATEBYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_hashblocks/wrapper-empty.cpp b/nacl/nacl-20110221/crypto_hashblocks/wrapper-empty.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/measure.c b/nacl/nacl-20110221/crypto_onetimeauth/measure.c
new file mode 100644
index 00000000..6d3ddfd5
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/measure.c
@@ -0,0 +1,69 @@
+#include "crypto_onetimeauth.h"
+#include "randombytes.h"
+#include "cpucycles.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_onetimeauth_IMPLEMENTATION;
+const char *implementationversion = crypto_onetimeauth_VERSION;
+const char *sizenames[] = { "outputbytes", "keybytes", 0 };
+const long long sizes[] = { crypto_onetimeauth_BYTES, crypto_onetimeauth_KEYBYTES };
+
+#define MAXTEST_BYTES 4096
+#ifdef SUPERCOP
+#define MGAP 8192
+#else
+#define MGAP 8
+#endif
+
+static unsigned char *k;
+static unsigned char *m;
+static unsigned char *h;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(crypto_onetimeauth_KEYBYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  h = alignedcalloc(crypto_onetimeauth_BYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) {
+      randombytes(k,crypto_onetimeauth_KEYBYTES);
+      randombytes(m,mlen);
+      randombytes(h,crypto_onetimeauth_BYTES);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_onetimeauth(h,m,mlen,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_onetimeauth_verify(h,m,mlen,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"verify_cycles",cycles,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/api.h b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/api.h
new file mode 100644
index 00000000..acc133ed
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 16
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/auth.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/auth.c
new file mode 100644
index 00000000..a4a9c3f6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/auth.c
@@ -0,0 +1,1616 @@
+/*
+20080910
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_onetimeauth.h"
+
+typedef unsigned char uchar;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+
+static const double poly1305_53_constants[] = {
+  0.00000000558793544769287109375 /* alpham80 = 3 2^(-29) */
+, 24.0 /* alpham48 = 3 2^3 */
+, 103079215104.0 /* alpham16 = 3 2^35 */
+, 6755399441055744.0 /* alpha0 = 3 2^51 */
+, 1770887431076116955136.0 /* alpha18 = 3 2^69 */
+, 29014219670751100192948224.0 /* alpha32 = 3 2^83 */
+, 7605903601369376408980219232256.0 /* alpha50 = 3 2^101 */
+, 124615124604835863084731911901282304.0 /* alpha64 = 3 2^115 */
+, 32667107224410092492483962313449748299776.0 /* alpha82 = 3 2^133 */
+, 535217884764734955396857238543560676143529984.0 /* alpha96 = 3 2^147 */
+, 35076039295941670036888435985190792471742381031424.0 /* alpha112 = 3 2^163 */
+, 9194973245195333150150082162901855101712434733101613056.0 /* alpha130 = 3 2^181 */
+, 0.0000000000000000000000000000000000000036734198463196484624023016788195177431833298649127735047148490821200539357960224151611328125 /* scale = 5 2^(-130) */
+, 6755408030990331.0 /* offset0 = alpha0 + 2^33 - 5 */
+, 29014256564239239022116864.0 /* offset1 = alpha32 + 2^65 - 2^33 */
+, 124615283061160854719918951570079744.0 /* offset2 = alpha64 + 2^97 - 2^65 */
+, 535219245894202480694386063513315216128475136.0 /* offset3 = alpha96 + 2^130 - 2^97 */
+} ;
+
+int crypto_onetimeauth(unsigned char *out,const unsigned char *m,unsigned long long l,const unsigned char *k)
+{
+  register const unsigned char *r = k;
+  register const unsigned char *s = k + 16;
+  double r0high_stack;
+  double r1high_stack;
+  double r1low_stack;
+  double sr1high_stack;
+  double r2low_stack;
+  double sr2high_stack;
+  double r0low_stack;
+  double sr1low_stack;
+  double r2high_stack;
+  double sr2low_stack;
+  double r3high_stack;
+  double sr3high_stack;
+  double r3low_stack;
+  double sr3low_stack;
+  int64 d0;
+  int64 d1;
+  int64 d2;
+  int64 d3;
+  register double scale;
+  register double alpha0;
+  register double alpha32;
+  register double alpha64;
+  register double alpha96;
+  register double alpha130;
+  register double h0;
+  register double h1;
+  register double h2;
+  register double h3;
+  register double h4;
+  register double h5;
+  register double h6;
+  register double h7;
+  register double y7;
+  register double y6;
+  register double y1;
+  register double y0;
+  register double y5;
+  register double y4;
+  register double x7;
+  register double x6;
+  register double x1;
+  register double x0;
+  register double y3;
+  register double y2;
+  register double r3low;
+  register double r0low;
+  register double r3high;
+  register double r0high;
+  register double sr1low;
+  register double x5;
+  register double r3lowx0;
+  register double sr1high;
+  register double x4;
+  register double r0lowx6;
+  register double r1low;
+  register double x3;
+  register double r3highx0;
+  register double r1high;
+  register double x2;
+  register double r0highx6;
+  register double sr2low;
+  register double r0lowx0;
+  register double sr2high;
+  register double sr1lowx6;
+  register double r2low;
+  register double r0highx0;
+  register double r2high;
+  register double sr1highx6;
+  register double sr3low;
+  register double r1lowx0;
+  register double sr3high;
+  register double sr2lowx6;
+  register double r1highx0;
+  register double sr2highx6;
+  register double r2lowx0;
+  register double sr3lowx6;
+  register double r2highx0;
+  register double sr3highx6;
+  register double r1highx4;
+  register double r1lowx4;
+  register double r0highx4;
+  register double r0lowx4;
+  register double sr3highx4;
+  register double sr3lowx4;
+  register double sr2highx4;
+  register double sr2lowx4;
+  register double r0lowx2;
+  register double r0highx2;
+  register double r1lowx2;
+  register double r1highx2;
+  register double r2lowx2;
+  register double r2highx2;
+  register double sr3lowx2;
+  register double sr3highx2;
+  register double z0;
+  register double z1;
+  register double z2;
+  register double z3;
+  register int64 r0;
+  register int64 r1;
+  register int64 r2;
+  register int64 r3;
+  register uint32 r00;
+  register uint32 r01;
+  register uint32 r02;
+  register uint32 r03;
+  register uint32 r10;
+  register uint32 r11;
+  register uint32 r12;
+  register uint32 r13;
+  register uint32 r20;
+  register uint32 r21;
+  register uint32 r22;
+  register uint32 r23;
+  register uint32 r30;
+  register uint32 r31;
+  register uint32 r32;
+  register uint32 r33;
+  register int64 m0;
+  register int64 m1;
+  register int64 m2;
+  register int64 m3;
+  register uint32 m00;
+  register uint32 m01;
+  register uint32 m02;
+  register uint32 m03;
+  register uint32 m10;
+  register uint32 m11;
+  register uint32 m12;
+  register uint32 m13;
+  register uint32 m20;
+  register uint32 m21;
+  register uint32 m22;
+  register uint32 m23;
+  register uint32 m30;
+  register uint32 m31;
+  register uint32 m32;
+  register uint64 m33;
+  register char *constants;
+  register int32 lbelow2;
+  register int32 lbelow3;
+  register int32 lbelow4;
+  register int32 lbelow5;
+  register int32 lbelow6;
+  register int32 lbelow7;
+  register int32 lbelow8;
+  register int32 lbelow9;
+  register int32 lbelow10;
+  register int32 lbelow11;
+  register int32 lbelow12;
+  register int32 lbelow13;
+  register int32 lbelow14;
+  register int32 lbelow15;
+  register double alpham80;
+  register double alpham48;
+  register double alpham16;
+  register double alpha18;
+  register double alpha50;
+  register double alpha82;
+  register double alpha112;
+  register double offset0;
+  register double offset1;
+  register double offset2;
+  register double offset3;
+  register uint32 s00;
+  register uint32 s01;
+  register uint32 s02;
+  register uint32 s03;
+  register uint32 s10;
+  register uint32 s11;
+  register uint32 s12;
+  register uint32 s13;
+  register uint32 s20;
+  register uint32 s21;
+  register uint32 s22;
+  register uint32 s23;
+  register uint32 s30;
+  register uint32 s31;
+  register uint32 s32;
+  register uint32 s33;
+  register uint64 bits32;
+  register uint64 f;
+  register uint64 f0;
+  register uint64 f1;
+  register uint64 f2;
+  register uint64 f3;
+  register uint64 f4;
+  register uint64 g;
+  register uint64 g0;
+  register uint64 g1;
+  register uint64 g2;
+  register uint64 g3;
+  register uint64 g4;
+
+  r00 = *(uchar *) (r + 0);
+  constants = (char *) &poly1305_53_constants;
+
+  r01 = *(uchar *) (r + 1);
+
+  r02 = *(uchar *) (r + 2);
+  r0 = 2151;
+
+  r03 = *(uchar *) (r + 3); r03 &= 15;
+  r0 <<= 51;
+
+  r10 = *(uchar *) (r + 4); r10 &= 252;
+  r01 <<= 8;
+  r0 += r00;
+
+  r11 = *(uchar *) (r + 5);
+  r02 <<= 16;
+  r0 += r01;
+
+  r12 = *(uchar *) (r + 6);
+  r03 <<= 24;
+  r0 += r02;
+
+  r13 = *(uchar *) (r + 7); r13 &= 15;
+  r1 = 2215;
+  r0 += r03;
+
+  d0 = r0;
+  r1 <<= 51;
+  r2 = 2279;
+
+  r20 = *(uchar *) (r + 8); r20 &= 252;
+  r11 <<= 8;
+  r1 += r10;
+
+  r21 = *(uchar *) (r + 9);
+  r12 <<= 16;
+  r1 += r11;
+
+  r22 = *(uchar *) (r + 10);
+  r13 <<= 24;
+  r1 += r12;
+
+  r23 = *(uchar *) (r + 11); r23 &= 15;
+  r2 <<= 51;
+  r1 += r13;
+
+  d1 = r1;
+  r21 <<= 8;
+  r2 += r20;
+
+  r30 = *(uchar *) (r + 12); r30 &= 252;
+  r22 <<= 16;
+  r2 += r21;
+
+  r31 = *(uchar *) (r + 13);
+  r23 <<= 24;
+  r2 += r22;
+
+  r32 = *(uchar *) (r + 14);
+  r2 += r23;
+  r3 = 2343;
+
+  d2 = r2;
+  r3 <<= 51;
+  alpha32 = *(double *) (constants + 40);
+
+  r33 = *(uchar *) (r + 15); r33 &= 15;
+  r31 <<= 8;
+  r3 += r30;
+
+  r32 <<= 16;
+  r3 += r31;
+
+  r33 <<= 24;
+  r3 += r32;
+
+  r3 += r33;
+  h0 = alpha32 - alpha32;
+
+  d3 = r3;
+  h1 = alpha32 - alpha32;
+
+  alpha0 = *(double *) (constants + 24);
+  h2 = alpha32 - alpha32;
+
+  alpha64 = *(double *) (constants + 56);
+  h3 = alpha32 - alpha32;
+
+  alpha18 = *(double *) (constants + 32);
+  h4 = alpha32 - alpha32;
+
+  r0low = *(double *) &d0;
+  h5 = alpha32 - alpha32;
+
+  r1low = *(double *) &d1;
+  h6 = alpha32 - alpha32;
+
+  r2low = *(double *) &d2;
+  h7 = alpha32 - alpha32;
+
+  alpha50 = *(double *) (constants + 48);
+  r0low -= alpha0;
+
+  alpha82 = *(double *) (constants + 64);
+  r1low -= alpha32;
+
+  scale = *(double *) (constants + 96);
+  r2low -= alpha64;
+
+  alpha96 = *(double *) (constants + 72);
+  r0high = r0low + alpha18;
+
+  r3low = *(double *) &d3;
+
+  alpham80 = *(double *) (constants + 0);
+  r1high = r1low + alpha50;
+  sr1low = scale * r1low;
+
+  alpham48 = *(double *) (constants + 8);
+  r2high = r2low + alpha82;
+  sr2low = scale * r2low;
+
+  r0high -= alpha18;
+  r0high_stack = r0high;
+
+  r3low -= alpha96;
+
+  r1high -= alpha50;
+  r1high_stack = r1high;
+
+  sr1high = sr1low + alpham80;
+
+  alpha112 = *(double *) (constants + 80);
+  r0low -= r0high;
+
+  alpham16 = *(double *) (constants + 16);
+  r2high -= alpha82;
+  sr3low = scale * r3low;
+
+  alpha130 = *(double *) (constants + 88);
+  sr2high = sr2low + alpham48;
+
+  r1low -= r1high;
+  r1low_stack = r1low;
+
+  sr1high -= alpham80;
+  sr1high_stack = sr1high;
+
+  r2low -= r2high;
+  r2low_stack = r2low;
+
+  sr2high -= alpham48;
+  sr2high_stack = sr2high;
+
+  r3high = r3low + alpha112;
+  r0low_stack = r0low;
+
+  sr1low -= sr1high;
+  sr1low_stack = sr1low;
+
+  sr3high = sr3low + alpham16;
+  r2high_stack = r2high;
+
+  sr2low -= sr2high;
+  sr2low_stack = sr2low;
+
+  r3high -= alpha112;
+  r3high_stack = r3high;
+
+
+  sr3high -= alpham16;
+  sr3high_stack = sr3high;
+
+
+  r3low -= r3high;
+  r3low_stack = r3low;
+
+
+  sr3low -= sr3high;
+  sr3low_stack = sr3low;
+
+if (l < 16) goto addatmost15bytes;
+
+  m00 = *(uchar *) (m + 0);
+  m0 = 2151;
+
+  m0 <<= 51;
+  m1 = 2215;
+  m01 = *(uchar *) (m + 1);
+
+  m1 <<= 51;
+  m2 = 2279;
+  m02 = *(uchar *) (m + 2);
+
+  m2 <<= 51;
+  m3 = 2343;
+  m03 = *(uchar *) (m + 3);
+
+  m10 = *(uchar *) (m + 4);
+  m01 <<= 8;
+  m0 += m00;
+
+  m11 = *(uchar *) (m + 5);
+  m02 <<= 16;
+  m0 += m01;
+
+  m12 = *(uchar *) (m + 6);
+  m03 <<= 24;
+  m0 += m02;
+
+  m13 = *(uchar *) (m + 7);
+  m3 <<= 51;
+  m0 += m03;
+
+  m20 = *(uchar *) (m + 8);
+  m11 <<= 8;
+  m1 += m10;
+
+  m21 = *(uchar *) (m + 9);
+  m12 <<= 16;
+  m1 += m11;
+
+  m22 = *(uchar *) (m + 10);
+  m13 <<= 24;
+  m1 += m12;
+
+  m23 = *(uchar *) (m + 11);
+  m1 += m13;
+
+  m30 = *(uchar *) (m + 12);
+  m21 <<= 8;
+  m2 += m20;
+
+  m31 = *(uchar *) (m + 13);
+  m22 <<= 16;
+  m2 += m21;
+
+  m32 = *(uchar *) (m + 14);
+  m23 <<= 24;
+  m2 += m22;
+
+  m33 = *(uchar *) (m + 15);
+  m2 += m23;
+
+  d0 = m0;
+  m31 <<= 8;
+  m3 += m30;
+
+  d1 = m1;
+  m32 <<= 16;
+  m3 += m31;
+
+  d2 = m2;
+  m33 += 256;
+
+  m33 <<= 24;
+  m3 += m32;
+
+  m3 += m33;
+  d3 = m3;
+
+  m += 16;
+  l -= 16;
+
+  z0 = *(double *) &d0;
+
+  z1 = *(double *) &d1;
+
+  z2 = *(double *) &d2;
+
+  z3 = *(double *) &d3;
+
+  z0 -= alpha0;
+
+  z1 -= alpha32;
+
+  z2 -= alpha64;
+
+  z3 -= alpha96;
+
+  h0 += z0;
+
+  h1 += z1;
+
+  h3 += z2;
+
+  h5 += z3;
+
+if (l < 16) goto multiplyaddatmost15bytes;
+
+multiplyaddatleast16bytes:;
+
+  m2 = 2279;
+  m20 = *(uchar *) (m + 8);
+  y7 = h7 + alpha130;
+
+  m2 <<= 51;
+  m3 = 2343;
+  m21 = *(uchar *) (m + 9);
+  y6 = h6 + alpha130;
+
+  m3 <<= 51;
+  m0 = 2151;
+  m22 = *(uchar *) (m + 10);
+  y1 = h1 + alpha32;
+
+  m0 <<= 51;
+  m1 = 2215;
+  m23 = *(uchar *) (m + 11);
+  y0 = h0 + alpha32;
+
+  m1 <<= 51;
+  m30 = *(uchar *) (m + 12);
+  y7 -= alpha130;
+
+  m21 <<= 8;
+  m2 += m20;
+  m31 = *(uchar *) (m + 13);
+  y6 -= alpha130;
+
+  m22 <<= 16;
+  m2 += m21;
+  m32 = *(uchar *) (m + 14);
+  y1 -= alpha32;
+
+  m23 <<= 24;
+  m2 += m22;
+  m33 = *(uchar *) (m + 15);
+  y0 -= alpha32;
+
+  m2 += m23;
+  m00 = *(uchar *) (m + 0);
+  y5 = h5 + alpha96;
+
+  m31 <<= 8;
+  m3 += m30;
+  m01 = *(uchar *) (m + 1);
+  y4 = h4 + alpha96;
+
+  m32 <<= 16;
+  m02 = *(uchar *) (m + 2);
+  x7 = h7 - y7;
+  y7 *= scale;
+
+  m33 += 256;
+  m03 = *(uchar *) (m + 3);
+  x6 = h6 - y6;
+  y6 *= scale;
+
+  m33 <<= 24;
+  m3 += m31;
+  m10 = *(uchar *) (m + 4);
+  x1 = h1 - y1;
+
+  m01 <<= 8;
+  m3 += m32;
+  m11 = *(uchar *) (m + 5);
+  x0 = h0 - y0;
+
+  m3 += m33;
+  m0 += m00;
+  m12 = *(uchar *) (m + 6);
+  y5 -= alpha96;
+
+  m02 <<= 16;
+  m0 += m01;
+  m13 = *(uchar *) (m + 7);
+  y4 -= alpha96;
+
+  m03 <<= 24;
+  m0 += m02;
+  d2 = m2;
+  x1 += y7;
+
+  m0 += m03;
+  d3 = m3;
+  x0 += y6;
+
+  m11 <<= 8;
+  m1 += m10;
+  d0 = m0;
+  x7 += y5;
+
+  m12 <<= 16;
+  m1 += m11;
+  x6 += y4;
+
+  m13 <<= 24;
+  m1 += m12;
+  y3 = h3 + alpha64;
+
+  m1 += m13;
+  d1 = m1;
+  y2 = h2 + alpha64;
+
+  x0 += x1;
+
+  x6 += x7;
+
+  y3 -= alpha64;
+  r3low = r3low_stack;
+
+  y2 -= alpha64;
+  r0low = r0low_stack;
+
+  x5 = h5 - y5;
+  r3lowx0 = r3low * x0;
+  r3high = r3high_stack;
+
+  x4 = h4 - y4;
+  r0lowx6 = r0low * x6;
+  r0high = r0high_stack;
+
+  x3 = h3 - y3;
+  r3highx0 = r3high * x0;
+  sr1low = sr1low_stack;
+
+  x2 = h2 - y2;
+  r0highx6 = r0high * x6;
+  sr1high = sr1high_stack;
+
+  x5 += y3;
+  r0lowx0 = r0low * x0;
+  r1low = r1low_stack;
+
+  h6 = r3lowx0 + r0lowx6;
+  sr1lowx6 = sr1low * x6;
+  r1high = r1high_stack;
+
+  x4 += y2;
+  r0highx0 = r0high * x0;
+  sr2low = sr2low_stack;
+
+  h7 = r3highx0 + r0highx6;
+  sr1highx6 = sr1high * x6;
+  sr2high = sr2high_stack;
+
+  x3 += y1;
+  r1lowx0 = r1low * x0;
+  r2low = r2low_stack;
+
+  h0 = r0lowx0 + sr1lowx6;
+  sr2lowx6 = sr2low * x6;
+  r2high = r2high_stack;
+
+  x2 += y0;
+  r1highx0 = r1high * x0;
+  sr3low = sr3low_stack;
+
+  h1 = r0highx0 + sr1highx6;
+  sr2highx6 = sr2high * x6;
+  sr3high = sr3high_stack;
+
+  x4 += x5;
+  r2lowx0 = r2low * x0;
+  z2 = *(double *) &d2;
+
+  h2 = r1lowx0 + sr2lowx6;
+  sr3lowx6 = sr3low * x6;
+
+  x2 += x3;
+  r2highx0 = r2high * x0;
+  z3 = *(double *) &d3;
+
+  h3 = r1highx0 + sr2highx6;
+  sr3highx6 = sr3high * x6;
+
+  r1highx4 = r1high * x4;
+  z2 -= alpha64;
+
+  h4 = r2lowx0 + sr3lowx6;
+  r1lowx4 = r1low * x4;
+
+  r0highx4 = r0high * x4;
+  z3 -= alpha96;
+
+  h5 = r2highx0 + sr3highx6;
+  r0lowx4 = r0low * x4;
+
+  h7 += r1highx4;
+  sr3highx4 = sr3high * x4;
+
+  h6 += r1lowx4;
+  sr3lowx4 = sr3low * x4;
+
+  h5 += r0highx4;
+  sr2highx4 = sr2high * x4;
+
+  h4 += r0lowx4;
+  sr2lowx4 = sr2low * x4;
+
+  h3 += sr3highx4;
+  r0lowx2 = r0low * x2;
+
+  h2 += sr3lowx4;
+  r0highx2 = r0high * x2;
+
+  h1 += sr2highx4;
+  r1lowx2 = r1low * x2;
+
+  h0 += sr2lowx4;
+  r1highx2 = r1high * x2;
+
+  h2 += r0lowx2;
+  r2lowx2 = r2low * x2;
+
+  h3 += r0highx2;
+  r2highx2 = r2high * x2;
+
+  h4 += r1lowx2;
+  sr3lowx2 = sr3low * x2;
+
+  h5 += r1highx2;
+  sr3highx2 = sr3high * x2;
+  alpha0 = *(double *) (constants + 24);
+
+  m += 16;
+  h6 += r2lowx2;
+
+  l -= 16;
+  h7 += r2highx2;
+
+  z1 = *(double *) &d1;
+  h0 += sr3lowx2;
+
+  z0 = *(double *) &d0;
+  h1 += sr3highx2;
+
+  z1 -= alpha32;
+
+  z0 -= alpha0;
+
+  h5 += z3;
+
+  h3 += z2;
+
+  h1 += z1;
+
+  h0 += z0;
+
+if (l >= 16) goto multiplyaddatleast16bytes;
+
+multiplyaddatmost15bytes:;
+
+  y7 = h7 + alpha130;
+
+  y6 = h6 + alpha130;
+
+  y1 = h1 + alpha32;
+
+  y0 = h0 + alpha32;
+
+  y7 -= alpha130;
+
+  y6 -= alpha130;
+
+  y1 -= alpha32;
+
+  y0 -= alpha32;
+
+  y5 = h5 + alpha96;
+
+  y4 = h4 + alpha96;
+
+  x7 = h7 - y7;
+  y7 *= scale;
+
+  x6 = h6 - y6;
+  y6 *= scale;
+
+  x1 = h1 - y1;
+
+  x0 = h0 - y0;
+
+  y5 -= alpha96;
+
+  y4 -= alpha96;
+
+  x1 += y7;
+
+  x0 += y6;
+
+  x7 += y5;
+
+  x6 += y4;
+
+  y3 = h3 + alpha64;
+
+  y2 = h2 + alpha64;
+
+  x0 += x1;
+
+  x6 += x7;
+
+  y3 -= alpha64;
+  r3low = r3low_stack;
+
+  y2 -= alpha64;
+  r0low = r0low_stack;
+
+  x5 = h5 - y5;
+  r3lowx0 = r3low * x0;
+  r3high = r3high_stack;
+
+  x4 = h4 - y4;
+  r0lowx6 = r0low * x6;
+  r0high = r0high_stack;
+
+  x3 = h3 - y3;
+  r3highx0 = r3high * x0;
+  sr1low = sr1low_stack;
+
+  x2 = h2 - y2;
+  r0highx6 = r0high * x6;
+  sr1high = sr1high_stack;
+
+  x5 += y3;
+  r0lowx0 = r0low * x0;
+  r1low = r1low_stack;
+
+  h6 = r3lowx0 + r0lowx6;
+  sr1lowx6 = sr1low * x6;
+  r1high = r1high_stack;
+
+  x4 += y2;
+  r0highx0 = r0high * x0;
+  sr2low = sr2low_stack;
+
+  h7 = r3highx0 + r0highx6;
+  sr1highx6 = sr1high * x6;
+  sr2high = sr2high_stack;
+
+  x3 += y1;
+  r1lowx0 = r1low * x0;
+  r2low = r2low_stack;
+
+  h0 = r0lowx0 + sr1lowx6;
+  sr2lowx6 = sr2low * x6;
+  r2high = r2high_stack;
+
+  x2 += y0;
+  r1highx0 = r1high * x0;
+  sr3low = sr3low_stack;
+
+  h1 = r0highx0 + sr1highx6;
+  sr2highx6 = sr2high * x6;
+  sr3high = sr3high_stack;
+
+  x4 += x5;
+  r2lowx0 = r2low * x0;
+
+  h2 = r1lowx0 + sr2lowx6;
+  sr3lowx6 = sr3low * x6;
+
+  x2 += x3;
+  r2highx0 = r2high * x0;
+
+  h3 = r1highx0 + sr2highx6;
+  sr3highx6 = sr3high * x6;
+
+  r1highx4 = r1high * x4;
+
+  h4 = r2lowx0 + sr3lowx6;
+  r1lowx4 = r1low * x4;
+
+  r0highx4 = r0high * x4;
+
+  h5 = r2highx0 + sr3highx6;
+  r0lowx4 = r0low * x4;
+
+  h7 += r1highx4;
+  sr3highx4 = sr3high * x4;
+
+  h6 += r1lowx4;
+  sr3lowx4 = sr3low * x4;
+
+  h5 += r0highx4;
+  sr2highx4 = sr2high * x4;
+
+  h4 += r0lowx4;
+  sr2lowx4 = sr2low * x4;
+
+  h3 += sr3highx4;
+  r0lowx2 = r0low * x2;
+
+  h2 += sr3lowx4;
+  r0highx2 = r0high * x2;
+
+  h1 += sr2highx4;
+  r1lowx2 = r1low * x2;
+
+  h0 += sr2lowx4;
+  r1highx2 = r1high * x2;
+
+  h2 += r0lowx2;
+  r2lowx2 = r2low * x2;
+
+  h3 += r0highx2;
+  r2highx2 = r2high * x2;
+
+  h4 += r1lowx2;
+  sr3lowx2 = sr3low * x2;
+
+  h5 += r1highx2;
+  sr3highx2 = sr3high * x2;
+
+  h6 += r2lowx2;
+
+  h7 += r2highx2;
+
+  h0 += sr3lowx2;
+
+  h1 += sr3highx2;
+
+addatmost15bytes:;
+
+if (l == 0) goto nomorebytes;
+
+  lbelow2 = l - 2;
+
+  lbelow3 = l - 3;
+
+  lbelow2 >>= 31;
+  lbelow4 = l - 4;
+
+  m00 = *(uchar *) (m + 0);
+  lbelow3 >>= 31;
+  m += lbelow2;
+
+  m01 = *(uchar *) (m + 1);
+  lbelow4 >>= 31;
+  m += lbelow3;
+
+  m02 = *(uchar *) (m + 2);
+  m += lbelow4;
+  m0 = 2151;
+
+  m03 = *(uchar *) (m + 3);
+  m0 <<= 51;
+  m1 = 2215;
+
+  m0 += m00;
+  m01 &= ~lbelow2;
+
+  m02 &= ~lbelow3;
+  m01 -= lbelow2;
+
+  m01 <<= 8;
+  m03 &= ~lbelow4;
+
+  m0 += m01;
+  lbelow2 -= lbelow3;
+
+  m02 += lbelow2;
+  lbelow3 -= lbelow4;
+
+  m02 <<= 16;
+  m03 += lbelow3;
+
+  m03 <<= 24;
+  m0 += m02;
+
+  m0 += m03;
+  lbelow5 = l - 5;
+
+  lbelow6 = l - 6;
+  lbelow7 = l - 7;
+
+  lbelow5 >>= 31;
+  lbelow8 = l - 8;
+
+  lbelow6 >>= 31;
+  m += lbelow5;
+
+  m10 = *(uchar *) (m + 4);
+  lbelow7 >>= 31;
+  m += lbelow6;
+
+  m11 = *(uchar *) (m + 5);
+  lbelow8 >>= 31;
+  m += lbelow7;
+
+  m12 = *(uchar *) (m + 6);
+  m1 <<= 51;
+  m += lbelow8;
+
+  m13 = *(uchar *) (m + 7);
+  m10 &= ~lbelow5;
+  lbelow4 -= lbelow5;
+
+  m10 += lbelow4;
+  lbelow5 -= lbelow6;
+
+  m11 &= ~lbelow6;
+  m11 += lbelow5;
+
+  m11 <<= 8;
+  m1 += m10;
+
+  m1 += m11;
+  m12 &= ~lbelow7;
+
+  lbelow6 -= lbelow7;
+  m13 &= ~lbelow8;
+
+  m12 += lbelow6;
+  lbelow7 -= lbelow8;
+
+  m12 <<= 16;
+  m13 += lbelow7;
+
+  m13 <<= 24;
+  m1 += m12;
+
+  m1 += m13;
+  m2 = 2279;
+
+  lbelow9 = l - 9;
+  m3 = 2343;
+
+  lbelow10 = l - 10;
+  lbelow11 = l - 11;
+
+  lbelow9 >>= 31;
+  lbelow12 = l - 12;
+
+  lbelow10 >>= 31;
+  m += lbelow9;
+
+  m20 = *(uchar *) (m + 8);
+  lbelow11 >>= 31;
+  m += lbelow10;
+
+  m21 = *(uchar *) (m + 9);
+  lbelow12 >>= 31;
+  m += lbelow11;
+
+  m22 = *(uchar *) (m + 10);
+  m2 <<= 51;
+  m += lbelow12;
+
+  m23 = *(uchar *) (m + 11);
+  m20 &= ~lbelow9;
+  lbelow8 -= lbelow9;
+
+  m20 += lbelow8;
+  lbelow9 -= lbelow10;
+
+  m21 &= ~lbelow10;
+  m21 += lbelow9;
+
+  m21 <<= 8;
+  m2 += m20;
+
+  m2 += m21;
+  m22 &= ~lbelow11;
+
+  lbelow10 -= lbelow11;
+  m23 &= ~lbelow12;
+
+  m22 += lbelow10;
+  lbelow11 -= lbelow12;
+
+  m22 <<= 16;
+  m23 += lbelow11;
+
+  m23 <<= 24;
+  m2 += m22;
+
+  m3 <<= 51;
+  lbelow13 = l - 13;
+
+  lbelow13 >>= 31;
+  lbelow14 = l - 14;
+
+  lbelow14 >>= 31;
+  m += lbelow13;
+  lbelow15 = l - 15;
+
+  m30 = *(uchar *) (m + 12);
+  lbelow15 >>= 31;
+  m += lbelow14;
+
+  m31 = *(uchar *) (m + 13);
+  m += lbelow15;
+  m2 += m23;
+
+  m32 = *(uchar *) (m + 14);
+  m30 &= ~lbelow13;
+  lbelow12 -= lbelow13;
+
+  m30 += lbelow12;
+  lbelow13 -= lbelow14;
+
+  m3 += m30;
+  m31 &= ~lbelow14;
+
+  m31 += lbelow13;
+  m32 &= ~lbelow15;
+
+  m31 <<= 8;
+  lbelow14 -= lbelow15;
+
+  m3 += m31;
+  m32 += lbelow14;
+  d0 = m0;
+
+  m32 <<= 16;
+  m33 = lbelow15 + 1;
+  d1 = m1;
+
+  m33 <<= 24;
+  m3 += m32;
+  d2 = m2;
+
+  m3 += m33;
+  d3 = m3;
+
+  alpha0 = *(double *) (constants + 24);
+
+  z3 = *(double *) &d3;
+
+  z2 = *(double *) &d2;
+
+  z1 = *(double *) &d1;
+
+  z0 = *(double *) &d0;
+
+  z3 -= alpha96;
+
+  z2 -= alpha64;
+
+  z1 -= alpha32;
+
+  z0 -= alpha0;
+
+  h5 += z3;
+
+  h3 += z2;
+
+  h1 += z1;
+
+  h0 += z0;
+
+  y7 = h7 + alpha130;
+
+  y6 = h6 + alpha130;
+
+  y1 = h1 + alpha32;
+
+  y0 = h0 + alpha32;
+
+  y7 -= alpha130;
+
+  y6 -= alpha130;
+
+  y1 -= alpha32;
+
+  y0 -= alpha32;
+
+  y5 = h5 + alpha96;
+
+  y4 = h4 + alpha96;
+
+  x7 = h7 - y7;
+  y7 *= scale;
+
+  x6 = h6 - y6;
+  y6 *= scale;
+
+  x1 = h1 - y1;
+
+  x0 = h0 - y0;
+
+  y5 -= alpha96;
+
+  y4 -= alpha96;
+
+  x1 += y7;
+
+  x0 += y6;
+
+  x7 += y5;
+
+  x6 += y4;
+
+  y3 = h3 + alpha64;
+
+  y2 = h2 + alpha64;
+
+  x0 += x1;
+
+  x6 += x7;
+
+  y3 -= alpha64;
+  r3low = r3low_stack;
+
+  y2 -= alpha64;
+  r0low = r0low_stack;
+
+  x5 = h5 - y5;
+  r3lowx0 = r3low * x0;
+  r3high = r3high_stack;
+
+  x4 = h4 - y4;
+  r0lowx6 = r0low * x6;
+  r0high = r0high_stack;
+
+  x3 = h3 - y3;
+  r3highx0 = r3high * x0;
+  sr1low = sr1low_stack;
+
+  x2 = h2 - y2;
+  r0highx6 = r0high * x6;
+  sr1high = sr1high_stack;
+
+  x5 += y3;
+  r0lowx0 = r0low * x0;
+  r1low = r1low_stack;
+
+  h6 = r3lowx0 + r0lowx6;
+  sr1lowx6 = sr1low * x6;
+  r1high = r1high_stack;
+
+  x4 += y2;
+  r0highx0 = r0high * x0;
+  sr2low = sr2low_stack;
+
+  h7 = r3highx0 + r0highx6;
+  sr1highx6 = sr1high * x6;
+  sr2high = sr2high_stack;
+
+  x3 += y1;
+  r1lowx0 = r1low * x0;
+  r2low = r2low_stack;
+
+  h0 = r0lowx0 + sr1lowx6;
+  sr2lowx6 = sr2low * x6;
+  r2high = r2high_stack;
+
+  x2 += y0;
+  r1highx0 = r1high * x0;
+  sr3low = sr3low_stack;
+
+  h1 = r0highx0 + sr1highx6;
+  sr2highx6 = sr2high * x6;
+  sr3high = sr3high_stack;
+
+  x4 += x5;
+  r2lowx0 = r2low * x0;
+
+  h2 = r1lowx0 + sr2lowx6;
+  sr3lowx6 = sr3low * x6;
+
+  x2 += x3;
+  r2highx0 = r2high * x0;
+
+  h3 = r1highx0 + sr2highx6;
+  sr3highx6 = sr3high * x6;
+
+  r1highx4 = r1high * x4;
+
+  h4 = r2lowx0 + sr3lowx6;
+  r1lowx4 = r1low * x4;
+
+  r0highx4 = r0high * x4;
+
+  h5 = r2highx0 + sr3highx6;
+  r0lowx4 = r0low * x4;
+
+  h7 += r1highx4;
+  sr3highx4 = sr3high * x4;
+
+  h6 += r1lowx4;
+  sr3lowx4 = sr3low * x4;
+
+  h5 += r0highx4;
+  sr2highx4 = sr2high * x4;
+
+  h4 += r0lowx4;
+  sr2lowx4 = sr2low * x4;
+
+  h3 += sr3highx4;
+  r0lowx2 = r0low * x2;
+
+  h2 += sr3lowx4;
+  r0highx2 = r0high * x2;
+
+  h1 += sr2highx4;
+  r1lowx2 = r1low * x2;
+
+  h0 += sr2lowx4;
+  r1highx2 = r1high * x2;
+
+  h2 += r0lowx2;
+  r2lowx2 = r2low * x2;
+
+  h3 += r0highx2;
+  r2highx2 = r2high * x2;
+
+  h4 += r1lowx2;
+  sr3lowx2 = sr3low * x2;
+
+  h5 += r1highx2;
+  sr3highx2 = sr3high * x2;
+
+  h6 += r2lowx2;
+
+  h7 += r2highx2;
+
+  h0 += sr3lowx2;
+
+  h1 += sr3highx2;
+
+
+nomorebytes:;
+
+  offset0 = *(double *) (constants + 104);
+  y7 = h7 + alpha130;
+
+  offset1 = *(double *) (constants + 112);
+  y0 = h0 + alpha32;
+
+  offset2 = *(double *) (constants + 120);
+  y1 = h1 + alpha32;
+
+  offset3 = *(double *) (constants + 128);
+  y2 = h2 + alpha64;
+
+  y7 -= alpha130;
+
+  y3 = h3 + alpha64;
+
+  y4 = h4 + alpha96;
+
+  y5 = h5 + alpha96;
+
+  x7 = h7 - y7;
+  y7 *= scale;
+
+  y0 -= alpha32;
+
+  y1 -= alpha32;
+
+  y2 -= alpha64;
+
+  h6 += x7;
+
+  y3 -= alpha64;
+
+  y4 -= alpha96;
+
+  y5 -= alpha96;
+
+  y6 = h6 + alpha130;
+
+  x0 = h0 - y0;
+
+  x1 = h1 - y1;
+
+  x2 = h2 - y2;
+
+  y6 -= alpha130;
+
+  x0 += y7;
+
+  x3 = h3 - y3;
+
+  x4 = h4 - y4;
+
+  x5 = h5 - y5;
+
+  x6 = h6 - y6;
+
+  y6 *= scale;
+
+  x2 += y0;
+
+  x3 += y1;
+
+  x4 += y2;
+
+  x0 += y6;
+
+  x5 += y3;
+
+  x6 += y4;
+
+  x2 += x3;
+
+  x0 += x1;
+
+  x4 += x5;
+
+  x6 += y5;
+
+  x2 += offset1;
+  *(double *) &d1 = x2;
+
+  x0 += offset0;
+  *(double *) &d0 = x0;
+
+  x4 += offset2;
+  *(double *) &d2 = x4;
+
+  x6 += offset3;
+  *(double *) &d3 = x6;
+
+
+
+
+  f0 = d0;
+
+  f1 = d1;
+  bits32 = -1;
+
+  f2 = d2;
+  bits32 >>= 32;
+
+  f3 = d3;
+  f = f0 >> 32;
+
+  f0 &= bits32;
+  f &= 255;
+
+  f1 += f;
+  g0 = f0 + 5;
+
+  g = g0 >> 32;
+  g0 &= bits32;
+
+  f = f1 >> 32;
+  f1 &= bits32;
+
+  f &= 255;
+  g1 = f1 + g;
+
+  g = g1 >> 32;
+  f2 += f;
+
+  f = f2 >> 32;
+  g1 &= bits32;
+
+  f2 &= bits32;
+  f &= 255;
+
+  f3 += f;
+  g2 = f2 + g;
+
+  g = g2 >> 32;
+  g2 &= bits32;
+
+  f4 = f3 >> 32;
+  f3 &= bits32;
+
+  f4 &= 255;
+  g3 = f3 + g;
+
+  g = g3 >> 32;
+  g3 &= bits32;
+
+  g4 = f4 + g;
+
+  g4 = g4 - 4;
+  s00 = *(uchar *) (s + 0);
+
+  f = (int64) g4 >> 63;
+  s01 = *(uchar *) (s + 1);
+
+  f0 &= f;
+  g0 &= ~f;
+  s02 = *(uchar *) (s + 2);
+
+  f1 &= f;
+  f0 |= g0;
+  s03 = *(uchar *) (s + 3);
+
+  g1 &= ~f;
+  f2 &= f;
+  s10 = *(uchar *) (s + 4);
+
+  f3 &= f;
+  g2 &= ~f;
+  s11 = *(uchar *) (s + 5);
+
+  g3 &= ~f;
+  f1 |= g1;
+  s12 = *(uchar *) (s + 6);
+
+  f2 |= g2;
+  f3 |= g3;
+  s13 = *(uchar *) (s + 7);
+
+  s01 <<= 8;
+  f0 += s00;
+  s20 = *(uchar *) (s + 8);
+
+  s02 <<= 16;
+  f0 += s01;
+  s21 = *(uchar *) (s + 9);
+
+  s03 <<= 24;
+  f0 += s02;
+  s22 = *(uchar *) (s + 10);
+
+  s11 <<= 8;
+  f1 += s10;
+  s23 = *(uchar *) (s + 11);
+
+  s12 <<= 16;
+  f1 += s11;
+  s30 = *(uchar *) (s + 12);
+
+  s13 <<= 24;
+  f1 += s12;
+  s31 = *(uchar *) (s + 13);
+
+  f0 += s03;
+  f1 += s13;
+  s32 = *(uchar *) (s + 14);
+
+  s21 <<= 8;
+  f2 += s20;
+  s33 = *(uchar *) (s + 15);
+
+  s22 <<= 16;
+  f2 += s21;
+
+  s23 <<= 24;
+  f2 += s22;
+
+  s31 <<= 8;
+  f3 += s30;
+
+  s32 <<= 16;
+  f3 += s31;
+
+  s33 <<= 24;
+  f3 += s32;
+
+  f2 += s23;
+  f3 += s33;
+
+  *(uchar *) (out + 0) = f0;
+  f0 >>= 8;
+  *(uchar *) (out + 1) = f0;
+  f0 >>= 8;
+  *(uchar *) (out + 2) = f0;
+  f0 >>= 8;
+  *(uchar *) (out + 3) = f0;
+  f0 >>= 8;
+  f1 += f0;
+
+  *(uchar *) (out + 4) = f1;
+  f1 >>= 8;
+  *(uchar *) (out + 5) = f1;
+  f1 >>= 8;
+  *(uchar *) (out + 6) = f1;
+  f1 >>= 8;
+  *(uchar *) (out + 7) = f1;
+  f1 >>= 8;
+  f2 += f1;
+
+  *(uchar *) (out + 8) = f2;
+  f2 >>= 8;
+  *(uchar *) (out + 9) = f2;
+  f2 >>= 8;
+  *(uchar *) (out + 10) = f2;
+  f2 >>= 8;
+  *(uchar *) (out + 11) = f2;
+  f2 >>= 8;
+  f3 += f2;
+
+  *(uchar *) (out + 12) = f3;
+  f3 >>= 8;
+  *(uchar *) (out + 13) = f3;
+  f3 >>= 8;
+  *(uchar *) (out + 14) = f3;
+  f3 >>= 8;
+  *(uchar *) (out + 15) = f3;
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/verify.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/verify.c
new file mode 100644
index 00000000..c7e063f1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/53/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_16.h"
+#include "crypto_onetimeauth.h"
+
+int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[16];
+  crypto_onetimeauth(correct,in,inlen,k);
+  return crypto_verify_16(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/api.h b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/api.h
new file mode 100644
index 00000000..acc133ed
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 16
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/auth.s b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/auth.s
new file mode 100644
index 00000000..5212a3e7
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/auth.s
@@ -0,0 +1,2787 @@
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 out
+
+# qhasm: stack64 out_stack
+
+# qhasm: int64 m
+
+# qhasm: int64 l
+
+# qhasm: int64 k
+
+# qhasm: stack64 k_stack
+
+# qhasm: int64 m0
+
+# qhasm: int64 m1
+
+# qhasm: int64 m2
+
+# qhasm: int64 m3
+
+# qhasm: float80 a0
+
+# qhasm: float80 a1
+
+# qhasm: float80 a2
+
+# qhasm: float80 a3
+
+# qhasm: float80 h0
+
+# qhasm: float80 h1
+
+# qhasm: float80 h2
+
+# qhasm: float80 h3
+
+# qhasm: float80 x0
+
+# qhasm: float80 x1
+
+# qhasm: float80 x2
+
+# qhasm: float80 x3
+
+# qhasm: float80 y0
+
+# qhasm: float80 y1
+
+# qhasm: float80 y2
+
+# qhasm: float80 y3
+
+# qhasm: float80 r0x0
+
+# qhasm: float80 r1x0
+
+# qhasm: float80 r2x0
+
+# qhasm: float80 r3x0
+
+# qhasm: float80 r0x1
+
+# qhasm: float80 r1x1
+
+# qhasm: float80 r2x1
+
+# qhasm: float80 sr3x1
+
+# qhasm: float80 r0x2
+
+# qhasm: float80 r1x2
+
+# qhasm: float80 sr2x2
+
+# qhasm: float80 sr3x2
+
+# qhasm: float80 r0x3
+
+# qhasm: float80 sr1x3
+
+# qhasm: float80 sr2x3
+
+# qhasm: float80 sr3x3
+
+# qhasm: stack64 d0
+
+# qhasm: stack64 d1
+
+# qhasm: stack64 d2
+
+# qhasm: stack64 d3
+
+# qhasm: stack64 r0
+
+# qhasm: stack64 r1
+
+# qhasm: stack64 r2
+
+# qhasm: stack64 r3
+
+# qhasm: stack64 sr1
+
+# qhasm: stack64 sr2
+
+# qhasm: stack64 sr3
+
+# qhasm: enter crypto_onetimeauth_poly1305_amd64
+.text
+.p2align 5
+.globl _crypto_onetimeauth_poly1305_amd64
+.globl crypto_onetimeauth_poly1305_amd64
+_crypto_onetimeauth_poly1305_amd64:
+crypto_onetimeauth_poly1305_amd64:
+mov %rsp,%r11
+and $31,%r11
+add $192,%r11
+sub %r11,%rsp
+
+# qhasm: input out
+
+# qhasm: input m
+
+# qhasm: input l
+
+# qhasm: input k
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=32(%rsp)
+movq %r11,32(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=40(%rsp)
+movq %r12,40(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=48(%rsp)
+movq %r13,48(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=56(%rsp)
+movq %r14,56(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=64(%rsp)
+movq %r15,64(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=72(%rsp)
+movq %rbx,72(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=80(%rsp)
+movq %rbp,80(%rsp)
+
+# qhasm:   round *(uint16 *) &crypto_onetimeauth_poly1305_amd64_rounding
+fldcw crypto_onetimeauth_poly1305_amd64_rounding(%rip)
+
+# qhasm:   m0 = *(uint32 *) (k + 0)
+# asm 1: movl   0(<k=int64#4),>m0=int64#5d
+# asm 2: movl   0(<k=%rcx),>m0=%r8d
+movl   0(%rcx),%r8d
+
+# qhasm:   m1 = *(uint32 *) (k + 4)
+# asm 1: movl   4(<k=int64#4),>m1=int64#6d
+# asm 2: movl   4(<k=%rcx),>m1=%r9d
+movl   4(%rcx),%r9d
+
+# qhasm:   m2 = *(uint32 *) (k + 8)
+# asm 1: movl   8(<k=int64#4),>m2=int64#7d
+# asm 2: movl   8(<k=%rcx),>m2=%eax
+movl   8(%rcx),%eax
+
+# qhasm:   m3 = *(uint32 *) (k + 12)
+# asm 1: movl   12(<k=int64#4),>m3=int64#8d
+# asm 2: movl   12(<k=%rcx),>m3=%r10d
+movl   12(%rcx),%r10d
+
+# qhasm:   out_stack = out
+# asm 1: movq <out=int64#1,>out_stack=stack64#8
+# asm 2: movq <out=%rdi,>out_stack=88(%rsp)
+movq %rdi,88(%rsp)
+
+# qhasm:   k_stack = k
+# asm 1: movq <k=int64#4,>k_stack=stack64#9
+# asm 2: movq <k=%rcx,>k_stack=96(%rsp)
+movq %rcx,96(%rsp)
+
+# qhasm:   d0 top = 0x43300000
+# asm 1: movl  $0x43300000,>d0=stack64#10
+# asm 2: movl  $0x43300000,>d0=108(%rsp)
+movl  $0x43300000,108(%rsp)
+
+# qhasm:   d1 top = 0x45300000
+# asm 1: movl  $0x45300000,>d1=stack64#11
+# asm 2: movl  $0x45300000,>d1=116(%rsp)
+movl  $0x45300000,116(%rsp)
+
+# qhasm:   d2 top = 0x47300000
+# asm 1: movl  $0x47300000,>d2=stack64#12
+# asm 2: movl  $0x47300000,>d2=124(%rsp)
+movl  $0x47300000,124(%rsp)
+
+# qhasm:   d3 top = 0x49300000
+# asm 1: movl  $0x49300000,>d3=stack64#13
+# asm 2: movl  $0x49300000,>d3=132(%rsp)
+movl  $0x49300000,132(%rsp)
+
+# qhasm:   (uint32) m0 &= 0x0fffffff
+# asm 1: and  $0x0fffffff,<m0=int64#5d
+# asm 2: and  $0x0fffffff,<m0=%r8d
+and  $0x0fffffff,%r8d
+
+# qhasm:   (uint32) m1 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m1=int64#6d
+# asm 2: and  $0x0ffffffc,<m1=%r9d
+and  $0x0ffffffc,%r9d
+
+# qhasm:   (uint32) m2 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m2=int64#7d
+# asm 2: and  $0x0ffffffc,<m2=%eax
+and  $0x0ffffffc,%eax
+
+# qhasm:   (uint32) m3 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m3=int64#8d
+# asm 2: and  $0x0ffffffc,<m3=%r10d
+and  $0x0ffffffc,%r10d
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int64#5d,<d0=stack64#10
+# asm 2: movl <m0=%r8d,<d0=104(%rsp)
+movl %r8d,104(%rsp)
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int64#6d,<d1=stack64#11
+# asm 2: movl <m1=%r9d,<d1=112(%rsp)
+movl %r9d,112(%rsp)
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int64#7d,<d2=stack64#12
+# asm 2: movl <m2=%eax,<d2=120(%rsp)
+movl %eax,120(%rsp)
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int64#8d,<d3=stack64#13
+# asm 2: movl <m3=%r10d,<d3=128(%rsp)
+movl %r10d,128(%rsp)
+
+# qhasm:   a0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#10
+# asm 2: fldl <d0=104(%rsp)
+fldl 104(%rsp)
+# comment:fpstackfrombottom:<a0#28:
+
+# qhasm:   a0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<a0#28:
+
+# qhasm:   a1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#11
+# asm 2: fldl <d1=112(%rsp)
+fldl 112(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:
+
+# qhasm:   a1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:
+
+# qhasm:   a2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#12
+# asm 2: fldl <d2=120(%rsp)
+fldl 120(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:
+
+# qhasm:   a2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:
+
+# qhasm:   a3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#13
+# asm 2: fldl <d3=128(%rsp)
+fldl 128(%rsp)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:<a3#31:
+
+# qhasm:   a3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3(%rip)
+# comment:fpstackfrombottom:<a0#28:<a1#29:<a2#30:<a3#31:
+
+# qhasm: internal stacktop a0
+# asm 1: fxch <a0=float80#4
+# asm 2: fxch <a0=%st(3)
+fxch %st(3)
+
+# qhasm:   *(float64 *) &r0 = a0
+# asm 1: fstpl >r0=stack64#14
+# asm 2: fstpl >r0=136(%rsp)
+fstpl 136(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a1#29:<a2#30:
+
+# qhasm: internal stacktop a1
+# asm 1: fxch <a1=float80#2
+# asm 2: fxch <a1=%st(1)
+fxch %st(1)
+
+# qhasm:   *(float64 *) &r1 = a1
+# asm 1: fstl >r1=stack64#15
+# asm 2: fstl >r1=144(%rsp)
+fstl 144(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:<a1#29:
+
+# qhasm:   a1 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:<a2#30:<a1#29:
+
+# qhasm:   *(float64 *) &sr1 = a1
+# asm 1: fstpl >sr1=stack64#16
+# asm 2: fstpl >sr1=152(%rsp)
+fstpl 152(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm:   *(float64 *) &r2 = a2
+# asm 1: fstl >r2=stack64#17
+# asm 2: fstl >r2=160(%rsp)
+fstl 160(%rsp)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm:   a2 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:<a2#30:
+
+# qhasm:   *(float64 *) &sr2 = a2
+# asm 1: fstpl >sr2=stack64#18
+# asm 2: fstpl >sr2=168(%rsp)
+fstpl 168(%rsp)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm:   *(float64 *) &r3 = a3
+# asm 1: fstl >r3=stack64#19
+# asm 2: fstl >r3=176(%rsp)
+fstl 176(%rsp)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm:   a3 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<a3#31:
+
+# qhasm:   *(float64 *) &sr3 = a3
+# asm 1: fstpl >sr3=stack64#20
+# asm 2: fstpl >sr3=184(%rsp)
+fstpl 184(%rsp)
+# comment:fpstackfrombottom:
+
+# qhasm:   h3 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:
+
+# qhasm:   h2 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:
+
+# qhasm:   h1 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h0 = 0
+fldz
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:                          unsigned<? l - 16
+# asm 1: cmp  $16,<l=int64#3
+# asm 2: cmp  $16,<l=%rdx
+cmp  $16,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: goto addatmost15bytes if unsigned<
+jb ._addatmost15bytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: initialatleast16bytes:
+._initialatleast16bytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m3 = *(uint32 *) (m + 12)
+# asm 1: movl   12(<m=int64#2),>m3=int64#1d
+# asm 2: movl   12(<m=%rsi),>m3=%edi
+movl   12(%rsi),%edi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m2 = *(uint32 *) (m + 8)
+# asm 1: movl   8(<m=int64#2),>m2=int64#4d
+# asm 2: movl   8(<m=%rsi),>m2=%ecx
+movl   8(%rsi),%ecx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m1 = *(uint32 *) (m + 4)
+# asm 1: movl   4(<m=int64#2),>m1=int64#5d
+# asm 2: movl   4(<m=%rsi),>m1=%r8d
+movl   4(%rsi),%r8d
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m0 = *(uint32 *) (m + 0)
+# asm 1: movl   0(<m=int64#2),>m0=int64#6d
+# asm 2: movl   0(<m=%rsi),>m0=%r9d
+movl   0(%rsi),%r9d
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int64#4d,<d2=stack64#12
+# asm 2: movl <m2=%ecx,<d2=120(%rsp)
+movl %ecx,120(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int64#5d,<d1=stack64#11
+# asm 2: movl <m1=%r8d,<d1=112(%rsp)
+movl %r8d,112(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int64#6d,<d0=stack64#10
+# asm 2: movl <m0=%r9d,<d0=104(%rsp)
+movl %r9d,104(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m += 16
+# asm 1: add  $16,<m=int64#2
+# asm 2: add  $16,<m=%rsi
+add  $16,%rsi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   l -= 16
+# asm 1: sub  $16,<l=int64#3
+# asm 2: sub  $16,<l=%rdx
+sub  $16,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#13
+# asm 2: faddl <d3=128(%rsp)
+faddl 128(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm:   h3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm:   h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#11
+# asm 2: faddl <d1=112(%rsp)
+faddl 112(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h3#39:<h1#41:
+
+# qhasm:   h1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h3#39:<h1#41:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#12
+# asm 2: faddl <d2=120(%rsp)
+faddl 120(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h1#41:<h3#39:<h2#40:
+
+# qhasm:   h2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h0#42:<h1#41:<h3#39:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#10
+# asm 2: faddl <d0=104(%rsp)
+faddl 104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   h0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:                                  unsigned<? l - 16
+# asm 1: cmp  $16,<l=int64#3
+# asm 2: cmp  $16,<l=%rdx
+cmp  $16,%rdx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: goto multiplyaddatmost15bytes if unsigned<
+jb ._multiplyaddatmost15bytes
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: multiplyaddatleast16bytes:
+._multiplyaddatleast16bytes:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   m3 = *(uint32 *) (m + 12)
+# asm 1: movl   12(<m=int64#2),>m3=int64#1d
+# asm 2: movl   12(<m=%rsi),>m3=%edi
+movl   12(%rsi),%edi
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   m2 = *(uint32 *) (m + 8)
+# asm 1: movl   8(<m=int64#2),>m2=int64#4d
+# asm 2: movl   8(<m=%rsi),>m2=%ecx
+movl   8(%rsi),%ecx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   m1 = *(uint32 *) (m + 4)
+# asm 1: movl   4(<m=int64#2),>m1=int64#5d
+# asm 2: movl   4(<m=%rsi),>m1=%r8d
+movl   4(%rsi),%r8d
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   m0 = *(uint32 *) (m + 0)
+# asm 1: movl   0(<m=int64#2),>m0=int64#6d
+# asm 2: movl   0(<m=%rsi),>m0=%r9d
+movl   0(%rsi),%r9d
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int64#4d,<d2=stack64#12
+# asm 2: movl <m2=%ecx,<d2=120(%rsp)
+movl %ecx,120(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int64#5d,<d1=stack64#11
+# asm 2: movl <m1=%r8d,<d1=112(%rsp)
+movl %r8d,112(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int64#6d,<d0=stack64#10
+# asm 2: movl <m0=%r9d,<d0=104(%rsp)
+movl %r9d,104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   m += 16
+# asm 1: add  $16,<m=int64#2
+# asm 2: add  $16,<m=%rsi
+add  $16,%rsi
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   l -= 16
+# asm 1: sub  $16,<l=int64#3
+# asm 2: sub  $16,<l=%rdx
+sub  $16,%rdx
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#53:<x1#54:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#7
+# asm 2: fsubr <x3=%st(0),<h2=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:<x3#56:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h1#41:<h3#39:<x1#54:<x0#53:<x2#55:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#56:<h1#41:<x2#55:<x1#54:<x0#53:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<x0#53:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r2x1#57:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#5,<r2x1=float80#1
+# asm 2: fmul <x1=%st(4),<r2x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r2x1#57:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r1x1#58:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#5,<r1x1=float80#1
+# asm 2: fmul <x1=%st(4),<r1x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r1x1#58:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r0x1#59:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#5,<r0x1=float80#1
+# asm 2: fmul <x1=%st(4),<r0x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<r0x1#59:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<x1#54:<h3#39:<h2#40:<h1#41:<sr3x1#60:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#5
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<sr3x1#60:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#4
+# asm 2: fxch <sr3x1=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r1x2#61:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#5,<r1x2=float80#1
+# asm 2: fmul <x2=%st(4),<r1x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r1x2#61:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r0x2#62:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#5,<r0x2=float80#1
+# asm 2: fmul <x2=%st(4),<r0x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<r0x2#62:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr3x2#63:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#5,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(4),<sr3x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr3x2#63:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#4
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<x2#55:<h1#41:<h3#39:<h2#40:<sr2x2#64:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#5
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<sr2x2#64:<h1#41:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#4
+# asm 2: fxch <sr2x2=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:<r0x3#65:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:<r0x3#65:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm:   stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:<sr3x3#66:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:<sr3x3#66:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#4
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr2x3#67:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr2x3#67:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#2
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<x3#56:<h3#39:<h2#40:<h0#42:<h1#41:<sr1x3#68:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#68:<h3#39:<h2#40:<h0#42:<h1#41:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm:                                    unsigned<? l - 16
+# asm 1: cmp  $16,<l=int64#3
+# asm 2: cmp  $16,<l=%rdx
+cmp  $16,%rdx
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm:   stacktop h3
+# asm 1: fxch <h3=float80#3
+# asm 2: fxch <h3=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:
+
+# qhasm:   y3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#13
+# asm 2: fldl <d3=128(%rsp)
+fldl 128(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:<y3#70:
+
+# qhasm:   y3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128(%rip)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:<y3#70:
+
+# qhasm:   h3 += y3
+# asm 1: faddp <y3=float80#1,<h3=float80#2
+# asm 2: faddp <y3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h2#40:<h3#39:
+
+# qhasm:   stacktop h2
+# asm 1: fxch <h2=float80#2
+# asm 2: fxch <h2=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   y2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#12
+# asm 2: fldl <d2=120(%rsp)
+fldl 120(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:<y2#71:
+
+# qhasm:   y2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:<y2#71:
+
+# qhasm:   h2 += y2
+# asm 1: faddp <y2=float80#1,<h2=float80#2
+# asm 2: faddp <y2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:
+
+# qhasm:   y1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#11
+# asm 2: fldl <d1=112(%rsp)
+fldl 112(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:<y1#72:
+
+# qhasm:   y1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:<y1#72:
+
+# qhasm:   h1 += y1
+# asm 1: faddp <y1=float80#1,<h1=float80#2
+# asm 2: faddp <y1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h3#39:<h1#41:
+
+# qhasm:   stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   y0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#10
+# asm 2: fldl <d0=104(%rsp)
+fldl 104(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<y0#73:
+
+# qhasm:   y0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<y0#73:
+
+# qhasm:   h0 += y0
+# asm 1: faddp <y0=float80#1,<h0=float80#2
+# asm 2: faddp <y0=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: goto multiplyaddatleast16bytes if !unsigned<
+jae ._multiplyaddatleast16bytes
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm: multiplyaddatmost15bytes:
+._multiplyaddatmost15bytes:
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#6,<x2=float80#1
+# asm 2: fadd <h1=%st(5),<x2=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#6
+# asm 2: fsubr <x2=%st(0),<h1=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#8,<x3=float80#1
+# asm 2: fadd <h2=%st(7),<x3=%st(0)
+fadd %st(7),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#8
+# asm 2: fsubr <x3=%st(0),<h2=%st(7)
+fsubr %st(0),%st(7)
+# comment:fpstackfrombottom:<h2#40:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:<x3#77:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#8
+# asm 2: fxch <h2=%st(7)
+fxch %st(7)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<h1#41:<h3#39:<h0#42:<x0#74:<x1#75:<x2#76:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#6
+# asm 2: fxch <h1=%st(5)
+fxch %st(5)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h3#39:<h0#42:<x0#74:<x1#75:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<x0#74:
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#2,<h3=float80#1
+# asm 2: fmul <x0=%st(1),<h3=%st(0)
+fmul %st(1),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#3,<h2=float80#1
+# asm 2: fmul <x0=%st(2),<h2=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#4,<h1=float80#1
+# asm 2: fmul <x0=%st(3),<h1=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<x0#74:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#5
+# asm 2: fmulp <x0=%st(0),<h0=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r2x1#78:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r2x1#78:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r1x1#79:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r1x1#79:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r0x1#80:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<r0x1#80:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<x1#75:<h0#42:<h3#39:<h2#40:<h1#41:<sr3x1#81:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<sr3x1#81:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#4
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r1x2#82:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#6,<r1x2=float80#1
+# asm 2: fmul <x2=%st(5),<r1x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r1x2#82:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r0x2#83:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#6,<r0x2=float80#1
+# asm 2: fmul <x2=%st(5),<r0x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<r0x2#83:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr3x2#84:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#6,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(5),<sr3x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr3x2#84:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#77:<x2#76:<h1#41:<h0#42:<h3#39:<h2#40:<sr2x2#85:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#6
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#77:<sr2x2#85:<h1#41:<h0#42:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#5
+# asm 2: fxch <sr2x2=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#3
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<r0x3#86:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<r0x3#86:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr3x3#87:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr3x3#87:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#5
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr2x3#88:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr2x3#88:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<x3#77:<h2#40:<h1#41:<h0#42:<h3#39:<sr1x3#89:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#89:<h2#40:<h1#41:<h0#42:<h3#39:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: addatmost15bytes:
+._addatmost15bytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:                     =? l - 0
+# asm 1: cmp  $0,<l=int64#3
+# asm 2: cmp  $0,<l=%rdx
+cmp  $0,%rdx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: goto nomorebytes if =
+je ._nomorebytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: stack128 lastchunk
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: int64 destination
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: int64 numbytes
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   ((uint32 *)&lastchunk)[0] = 0
+# asm 1: movl $0,>lastchunk=stack128#1
+# asm 2: movl $0,>lastchunk=0(%rsp)
+movl $0,0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   ((uint32 *)&lastchunk)[1] = 0
+# asm 1: movl $0,4+<lastchunk=stack128#1
+# asm 2: movl $0,4+<lastchunk=0(%rsp)
+movl $0,4+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   ((uint32 *)&lastchunk)[2] = 0
+# asm 1: movl $0,8+<lastchunk=stack128#1
+# asm 2: movl $0,8+<lastchunk=0(%rsp)
+movl $0,8+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   ((uint32 *)&lastchunk)[3] = 0
+# asm 1: movl $0,12+<lastchunk=stack128#1
+# asm 2: movl $0,12+<lastchunk=0(%rsp)
+movl $0,12+0(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   destination = &lastchunk
+# asm 1: leaq <lastchunk=stack128#1,>destination=int64#1
+# asm 2: leaq <lastchunk=0(%rsp),>destination=%rdi
+leaq 0(%rsp),%rdi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   numbytes = l
+# asm 1: mov  <l=int64#3,>numbytes=int64#4
+# asm 2: mov  <l=%rdx,>numbytes=%rcx
+mov  %rdx,%rcx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   while (numbytes) { *destination++ = *m++; --numbytes }
+rep movsb
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   *(uint8 *) (destination + 0) = 1
+# asm 1: movb   $1,0(<destination=int64#1)
+# asm 2: movb   $1,0(<destination=%rdi)
+movb   $1,0(%rdi)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m3 = ((uint32 *)&lastchunk)[3]
+# asm 1: movl 12+<lastchunk=stack128#1,>m3=int64#1d
+# asm 2: movl 12+<lastchunk=0(%rsp),>m3=%edi
+movl 12+0(%rsp),%edi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m2 = ((uint32 *)&lastchunk)[2]
+# asm 1: movl 8+<lastchunk=stack128#1,>m2=int64#2d
+# asm 2: movl 8+<lastchunk=0(%rsp),>m2=%esi
+movl 8+0(%rsp),%esi
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m1 = ((uint32 *)&lastchunk)[1]
+# asm 1: movl 4+<lastchunk=stack128#1,>m1=int64#3d
+# asm 2: movl 4+<lastchunk=0(%rsp),>m1=%edx
+movl 4+0(%rsp),%edx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   m0 = ((uint32 *)&lastchunk)[0]
+# asm 1: movl <lastchunk=stack128#1,>m0=int64#4d
+# asm 2: movl <lastchunk=0(%rsp),>m0=%ecx
+movl 0(%rsp),%ecx
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int64#1d,<d3=stack64#13
+# asm 2: movl <m3=%edi,<d3=128(%rsp)
+movl %edi,128(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int64#2d,<d2=stack64#12
+# asm 2: movl <m2=%esi,<d2=120(%rsp)
+movl %esi,120(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int64#3d,<d1=stack64#11
+# asm 2: movl <m1=%edx,<d1=112(%rsp)
+movl %edx,112(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int64#4d,<d0=stack64#10
+# asm 2: movl <m0=%ecx,<d0=104(%rsp)
+movl %ecx,104(%rsp)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#13
+# asm 2: faddl <d3=128(%rsp)
+faddl 128(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm:   h3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset3
+fsubl crypto_onetimeauth_poly1305_amd64_doffset3(%rip)
+# comment:fpstackfrombottom:<h0#42:<h2#40:<h1#41:<h3#39:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#12
+# asm 2: faddl <d2=120(%rsp)
+faddl 120(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h1#41:<h2#40:
+
+# qhasm:   h2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset2
+fsubl crypto_onetimeauth_poly1305_amd64_doffset2(%rip)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h1#41:<h2#40:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm:   h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#11
+# asm 2: faddl <d1=112(%rsp)
+faddl 112(%rsp)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset1
+fsubl crypto_onetimeauth_poly1305_amd64_doffset1(%rip)
+# comment:fpstackfrombottom:<h0#42:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#10
+# asm 2: faddl <d0=104(%rsp)
+faddl 104(%rsp)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm:   h0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_doffset0
+fsubl crypto_onetimeauth_poly1305_amd64_doffset0(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#4,<x0=float80#1
+# asm 2: fadd <h3=%st(3),<x0=%st(0)
+fadd %st(3),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#4
+# asm 2: fsubr <x0=%st(0),<h3=%st(3)
+fsubr %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#7,<x2=float80#1
+# asm 2: fadd <h1=%st(6),<x2=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#7
+# asm 2: fsubr <x2=%st(0),<h1=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#6,<x3=float80#1
+# asm 2: fadd <h2=%st(5),<x3=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#6
+# asm 2: fsubr <x3=%st(0),<h2=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<h0#42:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#41:<h3#39:<h2#40:<x3#101:<x0#98:<x1#99:<x2#100:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#7
+# asm 2: fxch <h1=%st(6)
+fxch %st(6)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h3#39:<h2#40:<x3#101:<x0#98:<x1#99:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#6
+# asm 2: faddp <h2=%st(0),<x2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h3#39:<x1#99:<x3#101:<x0#98:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#2
+# asm 2: faddp <h3=%st(0),<x3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#19
+# asm 2: fldl <r3=176(%rsp)
+fldl 176(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#17
+# asm 2: fldl <r2=160(%rsp)
+fldl 160(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r2x1#102:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r2x1#102:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r1x1#103:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r1x1#103:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r0x1#104:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<r0x1#104:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<x1#99:<x3#101:<h3#39:<h2#40:<h1#41:<sr3x1#105:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<sr3x1#105:<x3#101:<h3#39:<h2#40:<h1#41:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#15
+# asm 2: fldl <r1=144(%rsp)
+fldl 144(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r1x2#106:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#7,<r1x2=float80#1
+# asm 2: fmul <x2=%st(6),<r1x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r1x2#106:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r0x2#107:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#7,<r0x2=float80#1
+# asm 2: fmul <x2=%st(6),<r0x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<r0x2#107:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr3x2#108:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#7,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(6),<sr3x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr3x2#108:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<x2#100:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:<sr2x2#109:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#7
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<sr2x2#109:<h0#42:<h1#41:<x3#101:<h3#39:<h2#40:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#6
+# asm 2: fxch <sr2x2=%st(5)
+fxch %st(5)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#14
+# asm 2: fldl <r0=136(%rsp)
+fldl 136(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<r0x3#110:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#3,<r0x3=float80#1
+# asm 2: fmul <x3=%st(2),<r0x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<r0x3#110:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#20
+# asm 2: fldl <sr3=184(%rsp)
+fldl 184(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr3x3#111:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr3x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr3x3#111:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#6
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#18
+# asm 2: fldl <sr2=168(%rsp)
+fldl 168(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr2x3#112:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr2x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr2x3#112:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#16
+# asm 2: fldl <sr1=152(%rsp)
+fldl 152(%rsp)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<x3#101:<h3#39:<sr1x3#113:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#3
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(2)
+fmulp %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<sr1x3#113:<h3#39:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#2
+# asm 2: fxch <sr1x3=%st(1)
+fxch %st(1)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#4
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#40:<h0#42:<h1#41:<h3#39:
+# comment:automatically reorganizing fp stack for fallthrough
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h3#39:<h0#42:<h1#41:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm: nomorebytes:
+._nomorebytes:
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fldl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#5,<x0=float80#1
+# asm 2: fadd <h3=%st(4),<x0=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha130
+fsubl crypto_onetimeauth_poly1305_amd64_alpha130(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#5
+# asm 2: fsubr <x0=%st(0),<h3=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_amd64_scale
+fmull crypto_onetimeauth_poly1305_amd64_scale(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fldl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha32
+fsubl crypto_onetimeauth_poly1305_amd64_alpha32(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fldl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha64
+fsubl crypto_onetimeauth_poly1305_amd64_alpha64(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fldl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_amd64_alpha96
+fsubl crypto_onetimeauth_poly1305_amd64_alpha96(%rip)
+# comment:fpstackfrombottom:<h3#39:<h2#40:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<h2#40:
+
+# qhasm:   h2 -= x3
+# asm 1: fsub <x3=float80#7,<h2=float80#1
+# asm 2: fsub <x3=%st(6),<h2=%st(0)
+fsub %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h0#42:<x0#114:<x1#115:<x2#116:<h2#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<h1#41:<h2#40:<x0#114:<x1#115:<x2#116:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#5
+# asm 2: fxch <h1=%st(4)
+fxch %st(4)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<x2#116:<h2#40:<x0#114:<x1#115:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#4
+# asm 2: faddp <h2=%st(0),<x2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#39:<x3#117:<x2#116:<x1#115:<x0#114:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#5
+# asm 2: fxch <h3=%st(4)
+fxch %st(4)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#4
+# asm 2: faddp <h3=%st(0),<x3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm:   x0 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset0
+faddl crypto_onetimeauth_poly1305_amd64_hoffset0(%rip)
+# comment:fpstackfrombottom:<x1#115:<x3#117:<x2#116:<x0#114:
+
+# qhasm: internal stacktop x1
+# asm 1: fxch <x1=float80#4
+# asm 2: fxch <x1=%st(3)
+fxch %st(3)
+
+# qhasm:   x1 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset1
+faddl crypto_onetimeauth_poly1305_amd64_hoffset1(%rip)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x2
+# asm 1: fxch <x2=float80#2
+# asm 2: fxch <x2=%st(1)
+fxch %st(1)
+
+# qhasm:   x2 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset2
+faddl crypto_onetimeauth_poly1305_amd64_hoffset2(%rip)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x1#115:<x2#116:
+
+# qhasm: internal stacktop x3
+# asm 1: fxch <x3=float80#3
+# asm 2: fxch <x3=%st(2)
+fxch %st(2)
+
+# qhasm:   x3 += *(float64 *) &crypto_onetimeauth_poly1305_amd64_hoffset3
+faddl crypto_onetimeauth_poly1305_amd64_hoffset3(%rip)
+# comment:fpstackfrombottom:<x0#114:<x2#116:<x1#115:<x3#117:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm:   *(float64 *) &d0 = x0
+# asm 1: fstpl >d0=stack64#10
+# asm 2: fstpl >d0=104(%rsp)
+fstpl 104(%rsp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:<x1#115:
+
+# qhasm:   *(float64 *) &d1 = x1
+# asm 1: fstpl >d1=stack64#11
+# asm 2: fstpl >d1=112(%rsp)
+fstpl 112(%rsp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:
+
+# qhasm:   *(float64 *) &d2 = x2
+# asm 1: fstpl >d2=stack64#12
+# asm 2: fstpl >d2=120(%rsp)
+fstpl 120(%rsp)
+# comment:fpstackfrombottom:<x3#117:
+
+# qhasm:   *(float64 *) &d3 = x3
+# asm 1: fstpl >d3=stack64#13
+# asm 2: fstpl >d3=128(%rsp)
+fstpl 128(%rsp)
+# comment:fpstackfrombottom:
+
+# qhasm: int64 f0
+
+# qhasm: int64 f1
+
+# qhasm: int64 f2
+
+# qhasm: int64 f3
+
+# qhasm: int64 f4
+
+# qhasm: int64 g0
+
+# qhasm: int64 g1
+
+# qhasm: int64 g2
+
+# qhasm: int64 g3
+
+# qhasm: int64 f
+
+# qhasm: int64 notf
+
+# qhasm: stack64 f1_stack
+
+# qhasm: stack64 f2_stack
+
+# qhasm: stack64 f3_stack
+
+# qhasm: stack64 f4_stack
+
+# qhasm: stack64 g0_stack
+
+# qhasm: stack64 g1_stack
+
+# qhasm: stack64 g2_stack
+
+# qhasm: stack64 g3_stack
+
+# qhasm:   g0 = top d0
+# asm 1: movl <d0=stack64#10,>g0=int64#1d
+# asm 2: movl <d0=108(%rsp),>g0=%edi
+movl 108(%rsp),%edi
+
+# qhasm:   (uint32) g0 &= 63
+# asm 1: and  $63,<g0=int64#1d
+# asm 2: and  $63,<g0=%edi
+and  $63,%edi
+
+# qhasm:   g1 = top d1
+# asm 1: movl <d1=stack64#11,>g1=int64#2d
+# asm 2: movl <d1=116(%rsp),>g1=%esi
+movl 116(%rsp),%esi
+
+# qhasm:   (uint32) g1 &= 63
+# asm 1: and  $63,<g1=int64#2d
+# asm 2: and  $63,<g1=%esi
+and  $63,%esi
+
+# qhasm:   g2 = top d2
+# asm 1: movl <d2=stack64#12,>g2=int64#3d
+# asm 2: movl <d2=124(%rsp),>g2=%edx
+movl 124(%rsp),%edx
+
+# qhasm:   (uint32) g2 &= 63
+# asm 1: and  $63,<g2=int64#3d
+# asm 2: and  $63,<g2=%edx
+and  $63,%edx
+
+# qhasm:   g3 = top d3
+# asm 1: movl <d3=stack64#13,>g3=int64#4d
+# asm 2: movl <d3=132(%rsp),>g3=%ecx
+movl 132(%rsp),%ecx
+
+# qhasm:   (uint32) g3 &= 63
+# asm 1: and  $63,<g3=int64#4d
+# asm 2: and  $63,<g3=%ecx
+and  $63,%ecx
+
+# qhasm:   f1 = bottom d1
+# asm 1: movl <d1=stack64#11,>f1=int64#5d
+# asm 2: movl <d1=112(%rsp),>f1=%r8d
+movl 112(%rsp),%r8d
+
+# qhasm:   carry? (uint32) f1 += g0
+# asm 1: add <g0=int64#1d,<f1=int64#5d
+# asm 2: add <g0=%edi,<f1=%r8d
+add %edi,%r8d
+
+# qhasm:   f1_stack = f1
+# asm 1: movq <f1=int64#5,>f1_stack=stack64#11
+# asm 2: movq <f1=%r8,>f1_stack=112(%rsp)
+movq %r8,112(%rsp)
+
+# qhasm:   f2 = bottom d2
+# asm 1: movl <d2=stack64#12,>f2=int64#1d
+# asm 2: movl <d2=120(%rsp),>f2=%edi
+movl 120(%rsp),%edi
+
+# qhasm:   carry? (uint32) f2 += g1 + carry
+# asm 1: adc <g1=int64#2d,<f2=int64#1d
+# asm 2: adc <g1=%esi,<f2=%edi
+adc %esi,%edi
+
+# qhasm:   f2_stack = f2
+# asm 1: movq <f2=int64#1,>f2_stack=stack64#12
+# asm 2: movq <f2=%rdi,>f2_stack=120(%rsp)
+movq %rdi,120(%rsp)
+
+# qhasm:   f3 = bottom d3
+# asm 1: movl <d3=stack64#13,>f3=int64#1d
+# asm 2: movl <d3=128(%rsp),>f3=%edi
+movl 128(%rsp),%edi
+
+# qhasm:   carry? (uint32) f3 += g2 + carry
+# asm 1: adc <g2=int64#3d,<f3=int64#1d
+# asm 2: adc <g2=%edx,<f3=%edi
+adc %edx,%edi
+
+# qhasm:   f3_stack = f3
+# asm 1: movq <f3=int64#1,>f3_stack=stack64#13
+# asm 2: movq <f3=%rdi,>f3_stack=128(%rsp)
+movq %rdi,128(%rsp)
+
+# qhasm:   f4 = 0
+# asm 1: mov  $0,>f4=int64#1
+# asm 2: mov  $0,>f4=%rdi
+mov  $0,%rdi
+
+# qhasm:   carry? (uint32) f4 += g3 + carry
+# asm 1: adc <g3=int64#4d,<f4=int64#1d
+# asm 2: adc <g3=%ecx,<f4=%edi
+adc %ecx,%edi
+
+# qhasm:   f4_stack = f4
+# asm 1: movq <f4=int64#1,>f4_stack=stack64#14
+# asm 2: movq <f4=%rdi,>f4_stack=136(%rsp)
+movq %rdi,136(%rsp)
+
+# qhasm:   g0 = 5
+# asm 1: mov  $5,>g0=int64#1
+# asm 2: mov  $5,>g0=%rdi
+mov  $5,%rdi
+
+# qhasm:   f0 = bottom d0
+# asm 1: movl <d0=stack64#10,>f0=int64#2d
+# asm 2: movl <d0=104(%rsp),>f0=%esi
+movl 104(%rsp),%esi
+
+# qhasm:   carry? (uint32) g0 += f0
+# asm 1: add <f0=int64#2d,<g0=int64#1d
+# asm 2: add <f0=%esi,<g0=%edi
+add %esi,%edi
+
+# qhasm:   g0_stack = g0
+# asm 1: movq <g0=int64#1,>g0_stack=stack64#10
+# asm 2: movq <g0=%rdi,>g0_stack=104(%rsp)
+movq %rdi,104(%rsp)
+
+# qhasm:   g1 = 0
+# asm 1: mov  $0,>g1=int64#1
+# asm 2: mov  $0,>g1=%rdi
+mov  $0,%rdi
+
+# qhasm:   f1 = f1_stack
+# asm 1: movq <f1_stack=stack64#11,>f1=int64#3
+# asm 2: movq <f1_stack=112(%rsp),>f1=%rdx
+movq 112(%rsp),%rdx
+
+# qhasm:   carry? (uint32) g1 += f1 + carry
+# asm 1: adc <f1=int64#3d,<g1=int64#1d
+# asm 2: adc <f1=%edx,<g1=%edi
+adc %edx,%edi
+
+# qhasm:   g1_stack = g1
+# asm 1: movq <g1=int64#1,>g1_stack=stack64#11
+# asm 2: movq <g1=%rdi,>g1_stack=112(%rsp)
+movq %rdi,112(%rsp)
+
+# qhasm:   g2 = 0
+# asm 1: mov  $0,>g2=int64#1
+# asm 2: mov  $0,>g2=%rdi
+mov  $0,%rdi
+
+# qhasm:   f2 = f2_stack
+# asm 1: movq <f2_stack=stack64#12,>f2=int64#4
+# asm 2: movq <f2_stack=120(%rsp),>f2=%rcx
+movq 120(%rsp),%rcx
+
+# qhasm:   carry? (uint32) g2 += f2 + carry
+# asm 1: adc <f2=int64#4d,<g2=int64#1d
+# asm 2: adc <f2=%ecx,<g2=%edi
+adc %ecx,%edi
+
+# qhasm:   g2_stack = g2
+# asm 1: movq <g2=int64#1,>g2_stack=stack64#12
+# asm 2: movq <g2=%rdi,>g2_stack=120(%rsp)
+movq %rdi,120(%rsp)
+
+# qhasm:   g3 = 0
+# asm 1: mov  $0,>g3=int64#1
+# asm 2: mov  $0,>g3=%rdi
+mov  $0,%rdi
+
+# qhasm:   f3 = f3_stack
+# asm 1: movq <f3_stack=stack64#13,>f3=int64#5
+# asm 2: movq <f3_stack=128(%rsp),>f3=%r8
+movq 128(%rsp),%r8
+
+# qhasm:   carry? (uint32) g3 += f3 + carry
+# asm 1: adc <f3=int64#5d,<g3=int64#1d
+# asm 2: adc <f3=%r8d,<g3=%edi
+adc %r8d,%edi
+
+# qhasm:   g3_stack = g3
+# asm 1: movq <g3=int64#1,>g3_stack=stack64#13
+# asm 2: movq <g3=%rdi,>g3_stack=128(%rsp)
+movq %rdi,128(%rsp)
+
+# qhasm:   f = 0xfffffffc
+# asm 1: mov  $0xfffffffc,>f=int64#1
+# asm 2: mov  $0xfffffffc,>f=%rdi
+mov  $0xfffffffc,%rdi
+
+# qhasm:   f4 = f4_stack
+# asm 1: movq <f4_stack=stack64#14,>f4=int64#6
+# asm 2: movq <f4_stack=136(%rsp),>f4=%r9
+movq 136(%rsp),%r9
+
+# qhasm:   carry? (uint32) f += f4 + carry
+# asm 1: adc <f4=int64#6d,<f=int64#1d
+# asm 2: adc <f4=%r9d,<f=%edi
+adc %r9d,%edi
+
+# qhasm:   (int32) f >>= 16
+# asm 1: sar  $16,<f=int64#1d
+# asm 2: sar  $16,<f=%edi
+sar  $16,%edi
+
+# qhasm:   notf = f
+# asm 1: mov  <f=int64#1,>notf=int64#6
+# asm 2: mov  <f=%rdi,>notf=%r9
+mov  %rdi,%r9
+
+# qhasm:   (uint32) notf ^= 0xffffffff
+# asm 1: xor  $0xffffffff,<notf=int64#6d
+# asm 2: xor  $0xffffffff,<notf=%r9d
+xor  $0xffffffff,%r9d
+
+# qhasm:   f0 &= f
+# asm 1: and  <f=int64#1,<f0=int64#2
+# asm 2: and  <f=%rdi,<f0=%rsi
+and  %rdi,%rsi
+
+# qhasm:   g0 = g0_stack
+# asm 1: movq <g0_stack=stack64#10,>g0=int64#7
+# asm 2: movq <g0_stack=104(%rsp),>g0=%rax
+movq 104(%rsp),%rax
+
+# qhasm:   g0 &= notf
+# asm 1: and  <notf=int64#6,<g0=int64#7
+# asm 2: and  <notf=%r9,<g0=%rax
+and  %r9,%rax
+
+# qhasm:   f0 |= g0
+# asm 1: or   <g0=int64#7,<f0=int64#2
+# asm 2: or   <g0=%rax,<f0=%rsi
+or   %rax,%rsi
+
+# qhasm:   f1 &= f
+# asm 1: and  <f=int64#1,<f1=int64#3
+# asm 2: and  <f=%rdi,<f1=%rdx
+and  %rdi,%rdx
+
+# qhasm:   g1 = g1_stack
+# asm 1: movq <g1_stack=stack64#11,>g1=int64#7
+# asm 2: movq <g1_stack=112(%rsp),>g1=%rax
+movq 112(%rsp),%rax
+
+# qhasm:   g1 &= notf
+# asm 1: and  <notf=int64#6,<g1=int64#7
+# asm 2: and  <notf=%r9,<g1=%rax
+and  %r9,%rax
+
+# qhasm:   f1 |= g1
+# asm 1: or   <g1=int64#7,<f1=int64#3
+# asm 2: or   <g1=%rax,<f1=%rdx
+or   %rax,%rdx
+
+# qhasm:   f2 &= f
+# asm 1: and  <f=int64#1,<f2=int64#4
+# asm 2: and  <f=%rdi,<f2=%rcx
+and  %rdi,%rcx
+
+# qhasm:   g2 = g2_stack
+# asm 1: movq <g2_stack=stack64#12,>g2=int64#7
+# asm 2: movq <g2_stack=120(%rsp),>g2=%rax
+movq 120(%rsp),%rax
+
+# qhasm:   g2 &= notf
+# asm 1: and  <notf=int64#6,<g2=int64#7
+# asm 2: and  <notf=%r9,<g2=%rax
+and  %r9,%rax
+
+# qhasm:   f2 |= g2
+# asm 1: or   <g2=int64#7,<f2=int64#4
+# asm 2: or   <g2=%rax,<f2=%rcx
+or   %rax,%rcx
+
+# qhasm:   f3 &= f
+# asm 1: and  <f=int64#1,<f3=int64#5
+# asm 2: and  <f=%rdi,<f3=%r8
+and  %rdi,%r8
+
+# qhasm:   g3 = g3_stack
+# asm 1: movq <g3_stack=stack64#13,>g3=int64#1
+# asm 2: movq <g3_stack=128(%rsp),>g3=%rdi
+movq 128(%rsp),%rdi
+
+# qhasm:   g3 &= notf
+# asm 1: and  <notf=int64#6,<g3=int64#1
+# asm 2: and  <notf=%r9,<g3=%rdi
+and  %r9,%rdi
+
+# qhasm:   f3 |= g3
+# asm 1: or   <g3=int64#1,<f3=int64#5
+# asm 2: or   <g3=%rdi,<f3=%r8
+or   %rdi,%r8
+
+# qhasm:   out = out_stack
+# asm 1: movq <out_stack=stack64#8,>out=int64#1
+# asm 2: movq <out_stack=88(%rsp),>out=%rdi
+movq 88(%rsp),%rdi
+
+# qhasm:   k = k_stack
+# asm 1: movq <k_stack=stack64#9,>k=int64#6
+# asm 2: movq <k_stack=96(%rsp),>k=%r9
+movq 96(%rsp),%r9
+
+# qhasm:   carry? (uint32) f0 += *(uint32 *) (k + 16)
+# asm 1: addl 16(<k=int64#6),<f0=int64#2d
+# asm 2: addl 16(<k=%r9),<f0=%esi
+addl 16(%r9),%esi
+
+# qhasm:   carry? (uint32) f1 += *(uint32 *) (k + 20) + carry
+# asm 1: adcl 20(<k=int64#6),<f1=int64#3d
+# asm 2: adcl 20(<k=%r9),<f1=%edx
+adcl 20(%r9),%edx
+
+# qhasm:   carry? (uint32) f2 += *(uint32 *) (k + 24) + carry
+# asm 1: adcl 24(<k=int64#6),<f2=int64#4d
+# asm 2: adcl 24(<k=%r9),<f2=%ecx
+adcl 24(%r9),%ecx
+
+# qhasm:   carry? (uint32) f3 += *(uint32 *) (k + 28) + carry
+# asm 1: adcl 28(<k=int64#6),<f3=int64#5d
+# asm 2: adcl 28(<k=%r9),<f3=%r8d
+adcl 28(%r9),%r8d
+
+# qhasm:   *(uint32 *) (out + 0) = f0
+# asm 1: movl   <f0=int64#2d,0(<out=int64#1)
+# asm 2: movl   <f0=%esi,0(<out=%rdi)
+movl   %esi,0(%rdi)
+
+# qhasm:   *(uint32 *) (out + 4) = f1
+# asm 1: movl   <f1=int64#3d,4(<out=int64#1)
+# asm 2: movl   <f1=%edx,4(<out=%rdi)
+movl   %edx,4(%rdi)
+
+# qhasm:   *(uint32 *) (out + 8) = f2
+# asm 1: movl   <f2=int64#4d,8(<out=int64#1)
+# asm 2: movl   <f2=%ecx,8(<out=%rdi)
+movl   %ecx,8(%rdi)
+
+# qhasm:   *(uint32 *) (out + 12) = f3
+# asm 1: movl   <f3=int64#5d,12(<out=int64#1)
+# asm 2: movl   <f3=%r8d,12(<out=%rdi)
+movl   %r8d,12(%rdi)
+
+# qhasm: r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=32(%rsp),>r11_caller=%r11
+movq 32(%rsp),%r11
+
+# qhasm: r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=40(%rsp),>r12_caller=%r12
+movq 40(%rsp),%r12
+
+# qhasm: r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=48(%rsp),>r13_caller=%r13
+movq 48(%rsp),%r13
+
+# qhasm: r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=56(%rsp),>r14_caller=%r14
+movq 56(%rsp),%r14
+
+# qhasm: r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=64(%rsp),>r15_caller=%r15
+movq 64(%rsp),%r15
+
+# qhasm: rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=72(%rsp),>rbx_caller=%rbx
+movq 72(%rsp),%rbx
+
+# qhasm: rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=80(%rsp),>rbp_caller=%rbp
+movq 80(%rsp),%rbp
+
+# qhasm: leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/constants.s b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/constants.s
new file mode 100644
index 00000000..1bfb0be9
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/constants.s
@@ -0,0 +1,85 @@
+# version 20080913
+# D. J. Bernstein
+# Public domain.
+
+.data
+.section .rodata
+.p2align 5
+
+.globl _crypto_onetimeauth_poly1305_amd64_constants
+.globl crypto_onetimeauth_poly1305_amd64_constants
+.globl crypto_onetimeauth_poly1305_amd64_scale
+.globl crypto_onetimeauth_poly1305_amd64_two32
+.globl crypto_onetimeauth_poly1305_amd64_two64
+.globl crypto_onetimeauth_poly1305_amd64_two96
+.globl crypto_onetimeauth_poly1305_amd64_alpha32
+.globl crypto_onetimeauth_poly1305_amd64_alpha64
+.globl crypto_onetimeauth_poly1305_amd64_alpha96
+.globl crypto_onetimeauth_poly1305_amd64_alpha130
+.globl crypto_onetimeauth_poly1305_amd64_doffset0
+.globl crypto_onetimeauth_poly1305_amd64_doffset1
+.globl crypto_onetimeauth_poly1305_amd64_doffset2
+.globl crypto_onetimeauth_poly1305_amd64_doffset3
+.globl crypto_onetimeauth_poly1305_amd64_doffset3minustwo128
+.globl crypto_onetimeauth_poly1305_amd64_hoffset0
+.globl crypto_onetimeauth_poly1305_amd64_hoffset1
+.globl crypto_onetimeauth_poly1305_amd64_hoffset2
+.globl crypto_onetimeauth_poly1305_amd64_hoffset3
+.globl crypto_onetimeauth_poly1305_amd64_rounding
+
+_crypto_onetimeauth_poly1305_amd64_constants:
+crypto_onetimeauth_poly1305_amd64_constants:
+crypto_onetimeauth_poly1305_amd64_scale:
+.long 0x0,0x37f40000
+
+crypto_onetimeauth_poly1305_amd64_two32:
+.long 0x0,0x41f00000
+
+crypto_onetimeauth_poly1305_amd64_two64:
+.long 0x0,0x43f00000
+
+crypto_onetimeauth_poly1305_amd64_two96:
+.long 0x0,0x45f00000
+
+crypto_onetimeauth_poly1305_amd64_alpha32:
+.long 0x0,0x45e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha64:
+.long 0x0,0x47e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha96:
+.long 0x0,0x49e80000
+
+crypto_onetimeauth_poly1305_amd64_alpha130:
+.long 0x0,0x4c080000
+
+crypto_onetimeauth_poly1305_amd64_doffset0:
+.long 0x0,0x43300000
+
+crypto_onetimeauth_poly1305_amd64_doffset1:
+.long 0x0,0x45300000
+
+crypto_onetimeauth_poly1305_amd64_doffset2:
+.long 0x0,0x47300000
+
+crypto_onetimeauth_poly1305_amd64_doffset3:
+.long 0x0,0x49300000
+
+crypto_onetimeauth_poly1305_amd64_doffset3minustwo128:
+.long 0x0,0x492ffffe
+
+crypto_onetimeauth_poly1305_amd64_hoffset0:
+.long 0xfffffffb,0x43300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset1:
+.long 0xfffffffe,0x45300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset2:
+.long 0xfffffffe,0x47300001
+
+crypto_onetimeauth_poly1305_amd64_hoffset3:
+.long 0xfffffffe,0x49300003
+
+crypto_onetimeauth_poly1305_amd64_rounding:
+.byte 0x7f
+.byte 0x13
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/verify.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/verify.c
new file mode 100644
index 00000000..c7e063f1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/amd64/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_16.h"
+#include "crypto_onetimeauth.h"
+
+int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[16];
+  crypto_onetimeauth(correct,in,inlen,k);
+  return crypto_verify_16(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/checksum b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/checksum
new file mode 100644
index 00000000..a713ea40
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/checksum
@@ -0,0 +1 @@
+e836d5ca58cf673fca2b4910f23f3990
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/api.h b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/api.h
new file mode 100644
index 00000000..acc133ed
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 16
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/auth.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/auth.c
new file mode 100644
index 00000000..06cf115d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/auth.c
@@ -0,0 +1,104 @@
+/*
+20080912
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_onetimeauth.h"
+
+static void add(unsigned int h[17],const unsigned int c[17])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 17;++j) { u += h[j] + c[j]; h[j] = u & 255; u >>= 8; }
+}
+
+static void squeeze(unsigned int h[17])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
+  u += h[16]; h[16] = u & 3;
+  u = 5 * (u >> 2);
+  for (j = 0;j < 16;++j) { u += h[j]; h[j] = u & 255; u >>= 8; }
+  u += h[16]; h[16] = u;
+}
+
+static const unsigned int minusp[17] = {
+  5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252
+} ;
+
+static void freeze(unsigned int h[17])
+{
+  unsigned int horig[17];
+  unsigned int j;
+  unsigned int negative;
+  for (j = 0;j < 17;++j) horig[j] = h[j];
+  add(h,minusp);
+  negative = -(h[16] >> 7);
+  for (j = 0;j < 17;++j) h[j] ^= negative & (horig[j] ^ h[j]);
+}
+
+static void mulmod(unsigned int h[17],const unsigned int r[17])
+{
+  unsigned int hr[17];
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 17;++i) {
+    u = 0;
+    for (j = 0;j <= i;++j) u += h[j] * r[i - j];
+    for (j = i + 1;j < 17;++j) u += 320 * h[j] * r[i + 17 - j];
+    hr[i] = u;
+  }
+  for (i = 0;i < 17;++i) h[i] = hr[i];
+  squeeze(h);
+}
+
+int crypto_onetimeauth(unsigned char *out,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned int j;
+  unsigned int r[17];
+  unsigned int h[17];
+  unsigned int c[17];
+
+  r[0] = k[0];
+  r[1] = k[1];
+  r[2] = k[2];
+  r[3] = k[3] & 15;
+  r[4] = k[4] & 252;
+  r[5] = k[5];
+  r[6] = k[6];
+  r[7] = k[7] & 15;
+  r[8] = k[8] & 252;
+  r[9] = k[9];
+  r[10] = k[10];
+  r[11] = k[11] & 15;
+  r[12] = k[12] & 252;
+  r[13] = k[13];
+  r[14] = k[14];
+  r[15] = k[15] & 15;
+  r[16] = 0;
+
+  for (j = 0;j < 17;++j) h[j] = 0;
+
+  while (inlen > 0) {
+    for (j = 0;j < 17;++j) c[j] = 0;
+    for (j = 0;(j < 16) && (j < inlen);++j) c[j] = in[j];
+    c[j] = 1;
+    in += j; inlen -= j;
+    add(h,c);
+    mulmod(h,r);
+  }
+
+  freeze(h);
+
+  for (j = 0;j < 16;++j) c[j] = k[j + 16];
+  c[16] = 0;
+  add(h,c);
+  for (j = 0;j < 16;++j) out[j] = h[j];
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/verify.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/verify.c
new file mode 100644
index 00000000..c7e063f1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/ref/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_16.h"
+#include "crypto_onetimeauth.h"
+
+int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[16];
+  crypto_onetimeauth(correct,in,inlen,k);
+  return crypto_verify_16(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/selected b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/used b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/api.h b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/api.h
new file mode 100644
index 00000000..acc133ed
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 16
+#define CRYPTO_KEYBYTES 32
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/auth.s b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/auth.s
new file mode 100644
index 00000000..acb8c51c
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/auth.s
@@ -0,0 +1,2779 @@
+
+# qhasm: stack32 arg_out
+
+# qhasm: stack32 arg_m
+
+# qhasm: stack32 arg_l
+
+# qhasm: stack32 arg_ltop
+
+# qhasm: stack32 arg_k
+
+# qhasm: input arg_out
+
+# qhasm: input arg_m
+
+# qhasm: input arg_l
+
+# qhasm: input arg_ltop
+
+# qhasm: input arg_k
+
+# qhasm: int32 eax
+
+# qhasm: int32 ebx
+
+# qhasm: int32 esi
+
+# qhasm: int32 edi
+
+# qhasm: int32 ebp
+
+# qhasm: caller eax
+
+# qhasm: caller ebx
+
+# qhasm: caller esi
+
+# qhasm: caller edi
+
+# qhasm: caller ebp
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int32 out
+
+# qhasm: stack32 out_stack
+
+# qhasm: int32 k
+
+# qhasm: stack32 k_stack
+
+# qhasm: int32 m
+
+# qhasm: int32 l
+
+# qhasm: int32 m0
+
+# qhasm: int32 m1
+
+# qhasm: int32 m2
+
+# qhasm: int32 m3
+
+# qhasm: float80 a0
+
+# qhasm: float80 a1
+
+# qhasm: float80 a2
+
+# qhasm: float80 a3
+
+# qhasm: float80 h0
+
+# qhasm: float80 h1
+
+# qhasm: float80 h2
+
+# qhasm: float80 h3
+
+# qhasm: float80 x0
+
+# qhasm: float80 x1
+
+# qhasm: float80 x2
+
+# qhasm: float80 x3
+
+# qhasm: float80 y0
+
+# qhasm: float80 y1
+
+# qhasm: float80 y2
+
+# qhasm: float80 y3
+
+# qhasm: float80 r0x0
+
+# qhasm: float80 r1x0
+
+# qhasm: float80 r2x0
+
+# qhasm: float80 r3x0
+
+# qhasm: float80 r0x1
+
+# qhasm: float80 r1x1
+
+# qhasm: float80 r2x1
+
+# qhasm: float80 sr3x1
+
+# qhasm: float80 r0x2
+
+# qhasm: float80 r1x2
+
+# qhasm: float80 sr2x2
+
+# qhasm: float80 sr3x2
+
+# qhasm: float80 r0x3
+
+# qhasm: float80 sr1x3
+
+# qhasm: float80 sr2x3
+
+# qhasm: float80 sr3x3
+
+# qhasm: stack64 d0
+
+# qhasm: stack64 d1
+
+# qhasm: stack64 d2
+
+# qhasm: stack64 d3
+
+# qhasm: stack64 r0
+
+# qhasm: stack64 r1
+
+# qhasm: stack64 r2
+
+# qhasm: stack64 r3
+
+# qhasm: stack64 sr1
+
+# qhasm: stack64 sr2
+
+# qhasm: stack64 sr3
+
+# qhasm: enter crypto_onetimeauth_poly1305_x86 stackaligned4096 crypto_onetimeauth_poly1305_x86_constants
+.text
+.p2align 5
+.globl _crypto_onetimeauth_poly1305_x86
+.globl crypto_onetimeauth_poly1305_x86
+_crypto_onetimeauth_poly1305_x86:
+crypto_onetimeauth_poly1305_x86:
+mov %esp,%eax
+sub $crypto_onetimeauth_poly1305_x86_constants,%eax
+and $4095,%eax
+add $192,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm:   round *(uint16 *) &crypto_onetimeauth_poly1305_x86_rounding
+fldcw crypto_onetimeauth_poly1305_x86_rounding
+
+# qhasm:   k = arg_k
+# asm 1: movl <arg_k=stack32#-5,>k=int32#3
+# asm 2: movl <arg_k=20(%esp,%eax),>k=%edx
+movl 20(%esp,%eax),%edx
+
+# qhasm:   m0 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int32#3),>m0=int32#2
+# asm 2: movl 0(<k=%edx),>m0=%ecx
+movl 0(%edx),%ecx
+
+# qhasm:   m1 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int32#3),>m1=int32#4
+# asm 2: movl 4(<k=%edx),>m1=%ebx
+movl 4(%edx),%ebx
+
+# qhasm:   m2 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int32#3),>m2=int32#5
+# asm 2: movl 8(<k=%edx),>m2=%esi
+movl 8(%edx),%esi
+
+# qhasm:   m3 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int32#3),>m3=int32#6
+# asm 2: movl 12(<k=%edx),>m3=%edi
+movl 12(%edx),%edi
+
+# qhasm:   d0 top = 0x43300000
+# asm 1: movl  $0x43300000,>d0=stack64#1
+# asm 2: movl  $0x43300000,>d0=100(%esp)
+movl  $0x43300000,100(%esp)
+
+# qhasm:   d1 top = 0x45300000
+# asm 1: movl  $0x45300000,>d1=stack64#2
+# asm 2: movl  $0x45300000,>d1=108(%esp)
+movl  $0x45300000,108(%esp)
+
+# qhasm:   d2 top = 0x47300000
+# asm 1: movl  $0x47300000,>d2=stack64#3
+# asm 2: movl  $0x47300000,>d2=116(%esp)
+movl  $0x47300000,116(%esp)
+
+# qhasm:   d3 top = 0x49300000
+# asm 1: movl  $0x49300000,>d3=stack64#4
+# asm 2: movl  $0x49300000,>d3=124(%esp)
+movl  $0x49300000,124(%esp)
+
+# qhasm:   m0 &= 0x0fffffff
+# asm 1: and  $0x0fffffff,<m0=int32#2
+# asm 2: and  $0x0fffffff,<m0=%ecx
+and  $0x0fffffff,%ecx
+
+# qhasm:   m1 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m1=int32#4
+# asm 2: and  $0x0ffffffc,<m1=%ebx
+and  $0x0ffffffc,%ebx
+
+# qhasm:   m2 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m2=int32#5
+# asm 2: and  $0x0ffffffc,<m2=%esi
+and  $0x0ffffffc,%esi
+
+# qhasm:   m3 &= 0x0ffffffc
+# asm 1: and  $0x0ffffffc,<m3=int32#6
+# asm 2: and  $0x0ffffffc,<m3=%edi
+and  $0x0ffffffc,%edi
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int32#2,<d0=stack64#1
+# asm 2: movl <m0=%ecx,<d0=96(%esp)
+movl %ecx,96(%esp)
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int32#4,<d1=stack64#2
+# asm 2: movl <m1=%ebx,<d1=104(%esp)
+movl %ebx,104(%esp)
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int32#5,<d2=stack64#3
+# asm 2: movl <m2=%esi,<d2=112(%esp)
+movl %esi,112(%esp)
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int32#6,<d3=stack64#4
+# asm 2: movl <m3=%edi,<d3=120(%esp)
+movl %edi,120(%esp)
+
+# qhasm:   a0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#1
+# asm 2: fldl <d0=96(%esp)
+fldl 96(%esp)
+# comment:fpstackfrombottom:<a0#24:
+
+# qhasm:   a0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset0
+fsubl crypto_onetimeauth_poly1305_x86_doffset0
+# comment:fpstackfrombottom:<a0#24:
+
+# qhasm:   a1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#2
+# asm 2: fldl <d1=104(%esp)
+fldl 104(%esp)
+# comment:fpstackfrombottom:<a0#24:<a1#25:
+
+# qhasm:   a1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset1
+fsubl crypto_onetimeauth_poly1305_x86_doffset1
+# comment:fpstackfrombottom:<a0#24:<a1#25:
+
+# qhasm:   a2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#3
+# asm 2: fldl <d2=112(%esp)
+fldl 112(%esp)
+# comment:fpstackfrombottom:<a0#24:<a1#25:<a2#26:
+
+# qhasm:   a2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset2
+fsubl crypto_onetimeauth_poly1305_x86_doffset2
+# comment:fpstackfrombottom:<a0#24:<a1#25:<a2#26:
+
+# qhasm:   a3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#4
+# asm 2: fldl <d3=120(%esp)
+fldl 120(%esp)
+# comment:fpstackfrombottom:<a0#24:<a1#25:<a2#26:<a3#27:
+
+# qhasm:   a3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset3
+fsubl crypto_onetimeauth_poly1305_x86_doffset3
+# comment:fpstackfrombottom:<a0#24:<a1#25:<a2#26:<a3#27:
+
+# qhasm: internal stacktop a0
+# asm 1: fxch <a0=float80#4
+# asm 2: fxch <a0=%st(3)
+fxch %st(3)
+
+# qhasm:   *(float64 *) &r0 = a0
+# asm 1: fstpl >r0=stack64#5
+# asm 2: fstpl >r0=128(%esp)
+fstpl 128(%esp)
+# comment:fpstackfrombottom:<a3#27:<a1#25:<a2#26:
+
+# qhasm: internal stacktop a1
+# asm 1: fxch <a1=float80#2
+# asm 2: fxch <a1=%st(1)
+fxch %st(1)
+
+# qhasm:   *(float64 *) &r1 = a1
+# asm 1: fstl >r1=stack64#6
+# asm 2: fstl >r1=136(%esp)
+fstl 136(%esp)
+# comment:fpstackfrombottom:<a3#27:<a2#26:<a1#25:
+
+# qhasm:   a1 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<a3#27:<a2#26:<a1#25:
+
+# qhasm:   *(float64 *) &sr1 = a1
+# asm 1: fstpl >sr1=stack64#7
+# asm 2: fstpl >sr1=144(%esp)
+fstpl 144(%esp)
+# comment:fpstackfrombottom:<a3#27:<a2#26:
+
+# qhasm:   *(float64 *) &r2 = a2
+# asm 1: fstl >r2=stack64#8
+# asm 2: fstl >r2=152(%esp)
+fstl 152(%esp)
+# comment:fpstackfrombottom:<a3#27:<a2#26:
+
+# qhasm:   a2 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<a3#27:<a2#26:
+
+# qhasm:   *(float64 *) &sr2 = a2
+# asm 1: fstpl >sr2=stack64#9
+# asm 2: fstpl >sr2=160(%esp)
+fstpl 160(%esp)
+# comment:fpstackfrombottom:<a3#27:
+
+# qhasm:   *(float64 *) &r3 = a3
+# asm 1: fstl >r3=stack64#10
+# asm 2: fstl >r3=168(%esp)
+fstl 168(%esp)
+# comment:fpstackfrombottom:<a3#27:
+
+# qhasm:   a3 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<a3#27:
+
+# qhasm:   *(float64 *) &sr3 = a3
+# asm 1: fstpl >sr3=stack64#11
+# asm 2: fstpl >sr3=176(%esp)
+fstpl 176(%esp)
+# comment:fpstackfrombottom:
+
+# qhasm:   out = arg_out
+# asm 1: movl <arg_out=stack32#-1,>out=int32#4
+# asm 2: movl <arg_out=4(%esp,%eax),>out=%ebx
+movl 4(%esp,%eax),%ebx
+
+# qhasm:   m = arg_m
+# asm 1: movl <arg_m=stack32#-2,>m=int32#5
+# asm 2: movl <arg_m=8(%esp,%eax),>m=%esi
+movl 8(%esp,%eax),%esi
+
+# qhasm:   l = arg_l
+# asm 1: movl <arg_l=stack32#-3,>l=int32#2
+# asm 2: movl <arg_l=12(%esp,%eax),>l=%ecx
+movl 12(%esp,%eax),%ecx
+
+# qhasm:   h3 = 0
+fldz
+# comment:fpstackfrombottom:<h3#38:
+
+# qhasm:   h2 = 0
+fldz
+# comment:fpstackfrombottom:<h3#38:<h2#39:
+
+# qhasm:   h1 = 0
+fldz
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h0 = 0
+fldz
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   k_stack = k
+# asm 1: movl <k=int32#3,>k_stack=stack32#6
+# asm 2: movl <k=%edx,>k_stack=20(%esp)
+movl %edx,20(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#4,>out_stack=stack32#7
+# asm 2: movl <out=%ebx,>out_stack=24(%esp)
+movl %ebx,24(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:                          unsigned<? l - 16
+# asm 1: cmp  $16,<l=int32#2
+# asm 2: cmp  $16,<l=%ecx
+cmp  $16,%ecx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: goto addatmost15bytes if unsigned<
+jb ._addatmost15bytes
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: initialatleast16bytes:
+._initialatleast16bytes:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m3 = *(uint32 *) (m + 12)
+# asm 1: movl 12(<m=int32#5),>m3=int32#1
+# asm 2: movl 12(<m=%esi),>m3=%eax
+movl 12(%esi),%eax
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m2 = *(uint32 *) (m + 8)
+# asm 1: movl 8(<m=int32#5),>m2=int32#3
+# asm 2: movl 8(<m=%esi),>m2=%edx
+movl 8(%esi),%edx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m1 = *(uint32 *) (m + 4)
+# asm 1: movl 4(<m=int32#5),>m1=int32#4
+# asm 2: movl 4(<m=%esi),>m1=%ebx
+movl 4(%esi),%ebx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m0 = *(uint32 *) (m + 0)
+# asm 1: movl 0(<m=int32#5),>m0=int32#6
+# asm 2: movl 0(<m=%esi),>m0=%edi
+movl 0(%esi),%edi
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int32#1,<d3=stack64#4
+# asm 2: movl <m3=%eax,<d3=120(%esp)
+movl %eax,120(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int32#3,<d2=stack64#3
+# asm 2: movl <m2=%edx,<d2=112(%esp)
+movl %edx,112(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int32#4,<d1=stack64#2
+# asm 2: movl <m1=%ebx,<d1=104(%esp)
+movl %ebx,104(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int32#6,<d0=stack64#1
+# asm 2: movl <m0=%edi,<d0=96(%esp)
+movl %edi,96(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m += 16
+# asm 1: add  $16,<m=int32#5
+# asm 2: add  $16,<m=%esi
+add  $16,%esi
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   l -= 16
+# asm 1: sub  $16,<l=int32#2
+# asm 2: sub  $16,<l=%ecx
+sub  $16,%ecx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#4
+# asm 2: faddl <d3=120(%esp)
+faddl 120(%esp)
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm:   h3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_x86_doffset3minustwo128
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm:   h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#2
+# asm 2: faddl <d1=104(%esp)
+faddl 104(%esp)
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h3#38:<h1#40:
+
+# qhasm:   h1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset1
+fsubl crypto_onetimeauth_poly1305_x86_doffset1
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h3#38:<h1#40:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#3
+# asm 2: faddl <d2=112(%esp)
+faddl 112(%esp)
+# comment:fpstackfrombottom:<h0#41:<h1#40:<h3#38:<h2#39:
+
+# qhasm:   h2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset2
+fsubl crypto_onetimeauth_poly1305_x86_doffset2
+# comment:fpstackfrombottom:<h0#41:<h1#40:<h3#38:<h2#39:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#1
+# asm 2: faddl <d0=96(%esp)
+faddl 96(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   h0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset0
+fsubl crypto_onetimeauth_poly1305_x86_doffset0
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:                                  unsigned<? l - 16
+# asm 1: cmp  $16,<l=int32#2
+# asm 2: cmp  $16,<l=%ecx
+cmp  $16,%ecx
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm: goto multiplyaddatmost15bytes if unsigned<
+jb ._multiplyaddatmost15bytes
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm: multiplyaddatleast16bytes:
+._multiplyaddatleast16bytes:
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   m3 = *(uint32 *) (m + 12)
+# asm 1: movl 12(<m=int32#5),>m3=int32#1
+# asm 2: movl 12(<m=%esi),>m3=%eax
+movl 12(%esi),%eax
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   m2 = *(uint32 *) (m + 8)
+# asm 1: movl 8(<m=int32#5),>m2=int32#3
+# asm 2: movl 8(<m=%esi),>m2=%edx
+movl 8(%esi),%edx
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   m1 = *(uint32 *) (m + 4)
+# asm 1: movl 4(<m=int32#5),>m1=int32#4
+# asm 2: movl 4(<m=%esi),>m1=%ebx
+movl 4(%esi),%ebx
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   m0 = *(uint32 *) (m + 0)
+# asm 1: movl 0(<m=int32#5),>m0=int32#6
+# asm 2: movl 0(<m=%esi),>m0=%edi
+movl 0(%esi),%edi
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int32#1,<d3=stack64#4
+# asm 2: movl <m3=%eax,<d3=120(%esp)
+movl %eax,120(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int32#3,<d2=stack64#3
+# asm 2: movl <m2=%edx,<d2=112(%esp)
+movl %edx,112(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int32#4,<d1=stack64#2
+# asm 2: movl <m1=%ebx,<d1=104(%esp)
+movl %ebx,104(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int32#6,<d0=stack64#1
+# asm 2: movl <m0=%edi,<d0=96(%esp)
+movl %edi,96(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   m += 16
+# asm 1: add  $16,<m=int32#5
+# asm 2: add  $16,<m=%esi
+add  $16,%esi
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   l -= 16
+# asm 1: sub  $16,<l=int32#2
+# asm 2: sub  $16,<l=%ecx
+sub  $16,%ecx
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fldl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fsubl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fldl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:<x1#55:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:<x1#55:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fsubl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:<x1#55:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#54:<x1#55:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fldl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fsubl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fldl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:<x3#57:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:<x3#57:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fsubl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:<x3#57:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#7
+# asm 2: fsubr <x3=%st(0),<h2=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:<x3#57:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<h1#40:<h3#38:<x1#55:<x0#54:<x2#56:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#57:<h1#40:<x2#56:<x1#55:<x0#54:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#10
+# asm 2: fldl <r3=168(%esp)
+fldl 168(%esp)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:<h2#39:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:<h2#39:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#57:<x0#54:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r2x1#58:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#5,<r2x1=float80#1
+# asm 2: fmul <x1=%st(4),<r2x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r2x1#58:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r1x1#59:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#5,<r1x1=float80#1
+# asm 2: fmul <x1=%st(4),<r1x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r1x1#59:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r0x1#60:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#5,<r0x1=float80#1
+# asm 2: fmul <x1=%st(4),<r0x1=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<r0x1#60:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<x1#55:<h3#38:<h2#39:<h1#40:<sr3x1#61:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#5
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<sr3x1#61:<h3#38:<h2#39:<h1#40:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#4
+# asm 2: fxch <sr3x1=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<r1x2#62:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#5,<r1x2=float80#1
+# asm 2: fmul <x2=%st(4),<r1x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<r1x2#62:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<r0x2#63:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#5,<r0x2=float80#1
+# asm 2: fmul <x2=%st(4),<r0x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<r0x2#63:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<sr3x2#64:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#5,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(4),<sr3x2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<sr3x2#64:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#4
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<x2#56:<h1#40:<h3#38:<h2#39:<sr2x2#65:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#5
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<sr2x2#65:<h1#40:<h3#38:<h2#39:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#4
+# asm 2: fxch <sr2x2=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<h2#39:<h1#40:<h3#38:<r0x3#66:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<h2#39:<h1#40:<h3#38:<r0x3#66:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm:   stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h1#40:<h0#41:<sr3x3#67:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h1#40:<h0#41:<sr3x3#67:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#4
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h0#41:<h1#40:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h0#41:<h1#40:<sr2x3#68:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h0#41:<h1#40:<sr2x3#68:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#2
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h0#41:<h1#40:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#7
+# asm 2: fldl <sr1=144(%esp)
+fldl 144(%esp)
+# comment:fpstackfrombottom:<x3#57:<h3#38:<h2#39:<h0#41:<h1#40:<sr1x3#69:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#69:<h3#38:<h2#39:<h0#41:<h1#40:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:
+
+# qhasm:                                    unsigned<? l - 16
+# asm 1: cmp  $16,<l=int32#2
+# asm 2: cmp  $16,<l=%ecx
+cmp  $16,%ecx
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:
+
+# qhasm:   stacktop h3
+# asm 1: fxch <h3=float80#3
+# asm 2: fxch <h3=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h2#39:<h3#38:
+
+# qhasm:   y3 = *(float64 *) &d3
+# asm 1: fldl <d3=stack64#4
+# asm 2: fldl <d3=120(%esp)
+fldl 120(%esp)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h2#39:<h3#38:<y3#71:
+
+# qhasm:   y3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset3minustwo128
+fsubl crypto_onetimeauth_poly1305_x86_doffset3minustwo128
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h2#39:<h3#38:<y3#71:
+
+# qhasm:   h3 += y3
+# asm 1: faddp <y3=float80#1,<h3=float80#2
+# asm 2: faddp <y3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h2#39:<h3#38:
+
+# qhasm:   stacktop h2
+# asm 1: fxch <h2=float80#2
+# asm 2: fxch <h2=%st(1)
+fxch %st(1)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   y2 = *(float64 *) &d2
+# asm 1: fldl <d2=stack64#3
+# asm 2: fldl <d2=112(%esp)
+fldl 112(%esp)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h3#38:<h2#39:<y2#72:
+
+# qhasm:   y2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset2
+fsubl crypto_onetimeauth_poly1305_x86_doffset2
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h3#38:<h2#39:<y2#72:
+
+# qhasm:   h2 += y2
+# asm 1: faddp <y2=float80#1,<h2=float80#2
+# asm 2: faddp <y2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   stacktop h1
+# asm 1: fxch <h1=float80#4
+# asm 2: fxch <h1=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h3#38:<h1#40:
+
+# qhasm:   y1 = *(float64 *) &d1
+# asm 1: fldl <d1=stack64#2
+# asm 2: fldl <d1=104(%esp)
+fldl 104(%esp)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h3#38:<h1#40:<y1#73:
+
+# qhasm:   y1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset1
+fsubl crypto_onetimeauth_poly1305_x86_doffset1
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h3#38:<h1#40:<y1#73:
+
+# qhasm:   h1 += y1
+# asm 1: faddp <y1=float80#1,<h1=float80#2
+# asm 2: faddp <y1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h3#38:<h1#40:
+
+# qhasm:   stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   y0 = *(float64 *) &d0
+# asm 1: fldl <d0=stack64#1
+# asm 2: fldl <d0=96(%esp)
+fldl 96(%esp)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<y0#74:
+
+# qhasm:   y0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset0
+fsubl crypto_onetimeauth_poly1305_x86_doffset0
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<y0#74:
+
+# qhasm:   h0 += y0
+# asm 1: faddp <y0=float80#1,<h0=float80#2
+# asm 2: faddp <y0=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm: goto multiplyaddatleast16bytes if !unsigned<
+jae ._multiplyaddatleast16bytes
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm: multiplyaddatmost15bytes:
+._multiplyaddatmost15bytes:
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fldl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#3,<x0=float80#1
+# asm 2: fadd <h3=%st(2),<x0=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fsubl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#3
+# asm 2: fsubr <x0=%st(0),<h3=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fldl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fsubl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fldl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#6,<x2=float80#1
+# asm 2: fadd <h1=%st(5),<x2=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fsubl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#6
+# asm 2: fsubr <x2=%st(0),<h1=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fldl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:<x3#78:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#8,<x3=float80#1
+# asm 2: fadd <h2=%st(7),<x3=%st(0)
+fadd %st(7),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:<x3#78:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fsubl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:<x3#78:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#8
+# asm 2: fsubr <x3=%st(0),<h2=%st(7)
+fsubr %st(0),%st(7)
+# comment:fpstackfrombottom:<h2#39:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:<x3#78:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#8
+# asm 2: fxch <h2=%st(7)
+fxch %st(7)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#2
+# asm 2: faddp <h2=%st(0),<x2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<h1#40:<h3#38:<h0#41:<x0#75:<x1#76:<x2#77:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#6
+# asm 2: fxch <h1=%st(5)
+fxch %st(5)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h3#38:<h0#41:<x0#75:<x1#76:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#6
+# asm 2: faddp <h3=%st(0),<x3=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<x0#75:
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#2
+# asm 2: faddp <h0=%st(0),<x0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#10
+# asm 2: fldl <r3=168(%esp)
+fldl 168(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#2,<h3=float80#1
+# asm 2: fmul <x0=%st(1),<h3=%st(0)
+fmul %st(1),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:<h2#39:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#3,<h2=float80#1
+# asm 2: fmul <x0=%st(2),<h2=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:<h2#39:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#4,<h1=float80#1
+# asm 2: fmul <x0=%st(3),<h1=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<x0#75:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#5
+# asm 2: fmulp <x0=%st(0),<h0=%st(4)
+fmulp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r2x1#79:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r2x1#79:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r1x1#80:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r1x1#80:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r0x1#81:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<r0x1#81:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<x1#76:<h0#41:<h3#38:<h2#39:<h1#40:<sr3x1#82:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<sr3x1#82:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#4
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<r1x2#83:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#6,<r1x2=float80#1
+# asm 2: fmul <x2=%st(5),<r1x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<r1x2#83:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<r0x2#84:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#6,<r0x2=float80#1
+# asm 2: fmul <x2=%st(5),<r0x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<r0x2#84:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<sr3x2#85:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#6,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(5),<sr3x2=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<sr3x2#85:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<x3#78:<x2#77:<h1#40:<h0#41:<h3#38:<h2#39:<sr2x2#86:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#6
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x3#78:<sr2x2#86:<h1#40:<h0#41:<h3#38:<h2#39:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#5
+# asm 2: fxch <sr2x2=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#3
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<r0x3#87:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#6,<r0x3=float80#1
+# asm 2: fmul <x3=%st(5),<r0x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<r0x3#87:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<sr3x3#88:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr3x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<sr3x3#88:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#5
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<sr2x3#89:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#6,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(5),<sr2x3=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<sr2x3#89:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#7
+# asm 2: fldl <sr1=144(%esp)
+fldl 144(%esp)
+# comment:fpstackfrombottom:<x3#78:<h2#39:<h1#40:<h0#41:<h3#38:<sr1x3#90:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#6
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<sr1x3#90:<h2#39:<h1#40:<h0#41:<h3#38:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#5
+# asm 2: fxch <sr1x3=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#2
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fp stack unchanged by fallthrough
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: addatmost15bytes:
+._addatmost15bytes:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:                     =? l - 0
+# asm 1: cmp  $0,<l=int32#2
+# asm 2: cmp  $0,<l=%ecx
+cmp  $0,%ecx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fp stack unchanged by jump
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: goto nomorebytes if =
+je ._nomorebytes
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: stack128 lastchunk
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: int32 destination
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   ((uint32 *)&lastchunk)[0] = 0
+# asm 1: movl $0,>lastchunk=stack128#1
+# asm 2: movl $0,>lastchunk=64(%esp)
+movl $0,64(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   ((uint32 *)&lastchunk)[1] = 0
+# asm 1: movl $0,4+<lastchunk=stack128#1
+# asm 2: movl $0,4+<lastchunk=64(%esp)
+movl $0,4+64(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   ((uint32 *)&lastchunk)[2] = 0
+# asm 1: movl $0,8+<lastchunk=stack128#1
+# asm 2: movl $0,8+<lastchunk=64(%esp)
+movl $0,8+64(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   ((uint32 *)&lastchunk)[3] = 0
+# asm 1: movl $0,12+<lastchunk=stack128#1
+# asm 2: movl $0,12+<lastchunk=64(%esp)
+movl $0,12+64(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   destination = &lastchunk
+# asm 1: leal <lastchunk=stack128#1,>destination=int32#6
+# asm 2: leal <lastchunk=64(%esp),>destination=%edi
+leal 64(%esp),%edi
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   while (l) { *destination++ = *m++; --l }
+rep movsb
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   *(uint8 *) (destination + 0) = 1
+# asm 1: movb $1,0(<destination=int32#6)
+# asm 2: movb $1,0(<destination=%edi)
+movb $1,0(%edi)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m3 = ((uint32 *)&lastchunk)[3]
+# asm 1: movl 12+<lastchunk=stack128#1,>m3=int32#1
+# asm 2: movl 12+<lastchunk=64(%esp),>m3=%eax
+movl 12+64(%esp),%eax
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m2 = ((uint32 *)&lastchunk)[2]
+# asm 1: movl 8+<lastchunk=stack128#1,>m2=int32#2
+# asm 2: movl 8+<lastchunk=64(%esp),>m2=%ecx
+movl 8+64(%esp),%ecx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m1 = ((uint32 *)&lastchunk)[1]
+# asm 1: movl 4+<lastchunk=stack128#1,>m1=int32#3
+# asm 2: movl 4+<lastchunk=64(%esp),>m1=%edx
+movl 4+64(%esp),%edx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   m0 = ((uint32 *)&lastchunk)[0]
+# asm 1: movl <lastchunk=stack128#1,>m0=int32#4
+# asm 2: movl <lastchunk=64(%esp),>m0=%ebx
+movl 64(%esp),%ebx
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d3 bottom = m3
+# asm 1: movl <m3=int32#1,<d3=stack64#4
+# asm 2: movl <m3=%eax,<d3=120(%esp)
+movl %eax,120(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d2 bottom = m2
+# asm 1: movl <m2=int32#2,<d2=stack64#3
+# asm 2: movl <m2=%ecx,<d2=112(%esp)
+movl %ecx,112(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d1 bottom = m1
+# asm 1: movl <m1=int32#3,<d1=stack64#2
+# asm 2: movl <m1=%edx,<d1=104(%esp)
+movl %edx,104(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   inplace d0 bottom = m0
+# asm 1: movl <m0=int32#4,<d0=stack64#1
+# asm 2: movl <m0=%ebx,<d0=96(%esp)
+movl %ebx,96(%esp)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   h3 += *(float64 *) &d3
+# asm 1: faddl <d3=stack64#4
+# asm 2: faddl <d3=120(%esp)
+faddl 120(%esp)
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm:   h3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset3
+fsubl crypto_onetimeauth_poly1305_x86_doffset3
+# comment:fpstackfrombottom:<h0#41:<h2#39:<h1#40:<h3#38:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   h2 += *(float64 *) &d2
+# asm 1: faddl <d2=stack64#3
+# asm 2: faddl <d2=112(%esp)
+faddl 112(%esp)
+# comment:fpstackfrombottom:<h0#41:<h3#38:<h1#40:<h2#39:
+
+# qhasm:   h2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset2
+fsubl crypto_onetimeauth_poly1305_x86_doffset2
+# comment:fpstackfrombottom:<h0#41:<h3#38:<h1#40:<h2#39:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#2
+# asm 2: fxch <h1=%st(1)
+fxch %st(1)
+
+# qhasm:   h1 += *(float64 *) &d1
+# asm 1: faddl <d1=stack64#2
+# asm 2: faddl <d1=104(%esp)
+faddl 104(%esp)
+# comment:fpstackfrombottom:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset1
+fsubl crypto_onetimeauth_poly1305_x86_doffset1
+# comment:fpstackfrombottom:<h0#41:<h3#38:<h2#39:<h1#40:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#4
+# asm 2: fxch <h0=%st(3)
+fxch %st(3)
+
+# qhasm:   h0 += *(float64 *) &d0
+# asm 1: faddl <d0=stack64#1
+# asm 2: faddl <d0=96(%esp)
+faddl 96(%esp)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:
+
+# qhasm:   h0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_doffset0
+fsubl crypto_onetimeauth_poly1305_x86_doffset0
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fldl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#4,<x0=float80#1
+# asm 2: fadd <h3=%st(3),<x0=%st(0)
+fadd %st(3),%st(0)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fsubl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#4
+# asm 2: fsubr <x0=%st(0),<h3=%st(3)
+fsubr %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fldl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fsubl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fldl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#7,<x2=float80#1
+# asm 2: fadd <h1=%st(6),<x2=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fsubl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#7
+# asm 2: fsubr <x2=%st(0),<h1=%st(6)
+fsubr %st(0),%st(6)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fldl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#6,<x3=float80#1
+# asm 2: fadd <h2=%st(5),<x3=%st(0)
+fadd %st(5),%st(0)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fsubl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm:   h2 -= x3
+# asm 1: fsubr <x3=float80#1,<h2=float80#6
+# asm 2: fsubr <x3=%st(0),<h2=%st(5)
+fsubr %st(0),%st(5)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<h0#41:<x0#98:<x1#99:<x2#100:<x3#101:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h1#40:<h3#38:<h2#39:<x3#101:<x0#98:<x1#99:<x2#100:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#7
+# asm 2: fxch <h1=%st(6)
+fxch %st(6)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h3#38:<h2#39:<x3#101:<x0#98:<x1#99:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#6
+# asm 2: faddp <h2=%st(0),<x2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h3#38:<x1#99:<x3#101:<x0#98:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#4
+# asm 2: fxch <h3=%st(3)
+fxch %st(3)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#2
+# asm 2: faddp <h3=%st(0),<x3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:
+
+# qhasm:   h3 = *(float64 *) &r3
+# asm 1: fldl <r3=stack64#10
+# asm 2: fldl <r3=168(%esp)
+fldl 168(%esp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:
+
+# qhasm:   h3 *= x0
+# asm 1: fmul <x0=float80#4,<h3=float80#1
+# asm 2: fmul <x0=%st(3),<h3=%st(0)
+fmul %st(3),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:
+
+# qhasm:   h2 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   h2 *= x0
+# asm 1: fmul <x0=float80#5,<h2=float80#1
+# asm 2: fmul <x0=%st(4),<h2=%st(0)
+fmul %st(4),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   h1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h1 *= x0
+# asm 1: fmul <x0=float80#6,<h1=float80#1
+# asm 2: fmul <x0=%st(5),<h1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   h0 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x2#100:<x0#98:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   h0 *= x0
+# asm 1: fmulp <x0=float80#1,<h0=float80#7
+# asm 2: fmulp <x0=%st(0),<h0=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r2x1 = *(float64 *) &r2
+# asm 1: fldl <r2=stack64#8
+# asm 2: fldl <r2=152(%esp)
+fldl 152(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r2x1#102:
+
+# qhasm:   r2x1 *= x1
+# asm 1: fmul <x1=float80#6,<r2x1=float80#1
+# asm 2: fmul <x1=%st(5),<r2x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r2x1#102:
+
+# qhasm:   h3 += r2x1
+# asm 1: faddp <r2x1=float80#1,<h3=float80#4
+# asm 2: faddp <r2x1=%st(0),<h3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r1x1 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r1x1#103:
+
+# qhasm:   r1x1 *= x1
+# asm 1: fmul <x1=float80#6,<r1x1=float80#1
+# asm 2: fmul <x1=%st(5),<r1x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r1x1#103:
+
+# qhasm:   h2 += r1x1
+# asm 1: faddp <r1x1=float80#1,<h2=float80#3
+# asm 2: faddp <r1x1=%st(0),<h2=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   r0x1 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r0x1#104:
+
+# qhasm:   r0x1 *= x1
+# asm 1: fmul <x1=float80#6,<r0x1=float80#1
+# asm 2: fmul <x1=%st(5),<r0x1=%st(0)
+fmul %st(5),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<r0x1#104:
+
+# qhasm:   h1 += r0x1
+# asm 1: faddp <r0x1=float80#1,<h1=float80#2
+# asm 2: faddp <r0x1=%st(0),<h1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm:   sr3x1 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<x1#99:<x3#101:<h3#38:<h2#39:<h1#40:<sr3x1#105:
+
+# qhasm:   sr3x1 *= x1
+# asm 1: fmulp <x1=float80#1,<sr3x1=float80#6
+# asm 2: fmulp <x1=%st(0),<sr3x1=%st(5)
+fmulp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<sr3x1#105:<x3#101:<h3#38:<h2#39:<h1#40:
+
+# qhasm: internal stacktop sr3x1
+# asm 1: fxch <sr3x1=float80#5
+# asm 2: fxch <sr3x1=%st(4)
+fxch %st(4)
+
+# qhasm:   h0 += sr3x1
+# asm 1: faddp <sr3x1=float80#1,<h0=float80#6
+# asm 2: faddp <sr3x1=%st(0),<h0=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   r1x2 = *(float64 *) &r1
+# asm 1: fldl <r1=stack64#6
+# asm 2: fldl <r1=136(%esp)
+fldl 136(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<r1x2#106:
+
+# qhasm:   r1x2 *= x2
+# asm 1: fmul <x2=float80#7,<r1x2=float80#1
+# asm 2: fmul <x2=%st(6),<r1x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<r1x2#106:
+
+# qhasm:   h3 += r1x2
+# asm 1: faddp <r1x2=float80#1,<h3=float80#3
+# asm 2: faddp <r1x2=%st(0),<h3=%st(2)
+faddp %st(0),%st(2)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   r0x2 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<r0x2#107:
+
+# qhasm:   r0x2 *= x2
+# asm 1: fmul <x2=float80#7,<r0x2=float80#1
+# asm 2: fmul <x2=%st(6),<r0x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<r0x2#107:
+
+# qhasm:   h2 += r0x2
+# asm 1: faddp <r0x2=float80#1,<h2=float80#2
+# asm 2: faddp <r0x2=%st(0),<h2=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   sr3x2 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<sr3x2#108:
+
+# qhasm:   sr3x2 *= x2
+# asm 1: fmul <x2=float80#7,<sr3x2=float80#1
+# asm 2: fmul <x2=%st(6),<sr3x2=%st(0)
+fmul %st(6),%st(0)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<sr3x2#108:
+
+# qhasm:   h1 += sr3x2
+# asm 1: faddp <sr3x2=float80#1,<h1=float80#5
+# asm 2: faddp <sr3x2=%st(0),<h1=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:
+
+# qhasm:   sr2x2 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<x2#100:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:<sr2x2#109:
+
+# qhasm:   sr2x2 *= x2
+# asm 1: fmulp <x2=float80#1,<sr2x2=float80#7
+# asm 2: fmulp <x2=%st(0),<sr2x2=%st(6)
+fmulp %st(0),%st(6)
+# comment:fpstackfrombottom:<sr2x2#109:<h0#41:<h1#40:<x3#101:<h3#38:<h2#39:
+
+# qhasm: internal stacktop sr2x2
+# asm 1: fxch <sr2x2=float80#6
+# asm 2: fxch <sr2x2=%st(5)
+fxch %st(5)
+
+# qhasm:   h0 += sr2x2
+# asm 1: faddp <sr2x2=float80#1,<h0=float80#5
+# asm 2: faddp <sr2x2=%st(0),<h0=%st(4)
+faddp %st(0),%st(4)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:
+
+# qhasm:   r0x3 = *(float64 *) &r0
+# asm 1: fldl <r0=stack64#5
+# asm 2: fldl <r0=128(%esp)
+fldl 128(%esp)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<r0x3#110:
+
+# qhasm:   r0x3 *= x3
+# asm 1: fmul <x3=float80#3,<r0x3=float80#1
+# asm 2: fmul <x3=%st(2),<r0x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<r0x3#110:
+
+# qhasm:   h3 += r0x3
+# asm 1: faddp <r0x3=float80#1,<h3=float80#2
+# asm 2: faddp <r0x3=%st(0),<h3=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:
+
+# qhasm:   sr3x3 = *(float64 *) &sr3
+# asm 1: fldl <sr3=stack64#11
+# asm 2: fldl <sr3=176(%esp)
+fldl 176(%esp)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<sr3x3#111:
+
+# qhasm:   sr3x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr3x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr3x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<sr3x3#111:
+
+# qhasm:   h2 += sr3x3
+# asm 1: faddp <sr3x3=float80#1,<h2=float80#6
+# asm 2: faddp <sr3x3=%st(0),<h2=%st(5)
+faddp %st(0),%st(5)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:
+
+# qhasm:   sr2x3 = *(float64 *) &sr2
+# asm 1: fldl <sr2=stack64#9
+# asm 2: fldl <sr2=160(%esp)
+fldl 160(%esp)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<sr2x3#112:
+
+# qhasm:   sr2x3 *= x3
+# asm 1: fmul <x3=float80#3,<sr2x3=float80#1
+# asm 2: fmul <x3=%st(2),<sr2x3=%st(0)
+fmul %st(2),%st(0)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<sr2x3#112:
+
+# qhasm:   h1 += sr2x3
+# asm 1: faddp <sr2x3=float80#1,<h1=float80#4
+# asm 2: faddp <sr2x3=%st(0),<h1=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:
+
+# qhasm:   sr1x3 = *(float64 *) &sr1
+# asm 1: fldl <sr1=stack64#7
+# asm 2: fldl <sr1=144(%esp)
+fldl 144(%esp)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<x3#101:<h3#38:<sr1x3#113:
+
+# qhasm:   sr1x3 *= x3
+# asm 1: fmulp <x3=float80#1,<sr1x3=float80#3
+# asm 2: fmulp <x3=%st(0),<sr1x3=%st(2)
+fmulp %st(0),%st(2)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<sr1x3#113:<h3#38:
+
+# qhasm: internal stacktop sr1x3
+# asm 1: fxch <sr1x3=float80#2
+# asm 2: fxch <sr1x3=%st(1)
+fxch %st(1)
+
+# qhasm:   h0 += sr1x3
+# asm 1: faddp <sr1x3=float80#1,<h0=float80#4
+# asm 2: faddp <sr1x3=%st(0),<h0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h2#39:<h0#41:<h1#40:<h3#38:
+# comment:automatically reorganizing fp stack for fallthrough
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#4
+# asm 2: fxch <h2=%st(3)
+fxch %st(3)
+# comment:fpstackfrombottom:<h3#38:<h0#41:<h1#40:<h2#39:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#3
+# asm 2: fxch <h0=%st(2)
+fxch %st(2)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm: nomorebytes:
+._nomorebytes:
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:
+
+# qhasm:   x0 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fldl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:
+
+# qhasm:   x0 += h3
+# asm 1: fadd <h3=float80#5,<x0=float80#1
+# asm 2: fadd <h3=%st(4),<x0=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:
+
+# qhasm:   x0 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha130
+fsubl crypto_onetimeauth_poly1305_x86_alpha130
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:
+
+# qhasm:   h3 -= x0
+# asm 1: fsubr <x0=float80#1,<h3=float80#5
+# asm 2: fsubr <x0=%st(0),<h3=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:
+
+# qhasm:   x0 *= *(float64 *) &crypto_onetimeauth_poly1305_x86_scale
+fmull crypto_onetimeauth_poly1305_x86_scale
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:
+
+# qhasm:   x1 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fldl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:
+
+# qhasm:   x1 += h0
+# asm 1: fadd <h0=float80#3,<x1=float80#1
+# asm 2: fadd <h0=%st(2),<x1=%st(0)
+fadd %st(2),%st(0)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:
+
+# qhasm:   x1 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha32
+fsubl crypto_onetimeauth_poly1305_x86_alpha32
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:
+
+# qhasm:   h0 -= x1
+# asm 1: fsubr <x1=float80#1,<h0=float80#3
+# asm 2: fsubr <x1=%st(0),<h0=%st(2)
+fsubr %st(0),%st(2)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:
+
+# qhasm:   x2 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fldl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x2 += h1
+# asm 1: fadd <h1=float80#5,<x2=float80#1
+# asm 2: fadd <h1=%st(4),<x2=%st(0)
+fadd %st(4),%st(0)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x2 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha64
+fsubl crypto_onetimeauth_poly1305_x86_alpha64
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   h1 -= x2
+# asm 1: fsubr <x2=float80#1,<h1=float80#5
+# asm 2: fsubr <x2=%st(0),<h1=%st(4)
+fsubr %st(0),%st(4)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:
+
+# qhasm:   x3 = *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fldl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   x3 += h2
+# asm 1: fadd <h2=float80#7,<x3=float80#1
+# asm 2: fadd <h2=%st(6),<x3=%st(0)
+fadd %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   x3 -= *(float64 *) &crypto_onetimeauth_poly1305_x86_alpha96
+fsubl crypto_onetimeauth_poly1305_x86_alpha96
+# comment:fpstackfrombottom:<h3#38:<h2#39:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:<x3#117:
+
+# qhasm:   stacktop h2
+# asm 1: fxch <h2=float80#7
+# asm 2: fxch <h2=%st(6)
+fxch %st(6)
+# comment:fpstackfrombottom:<h3#38:<x3#117:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:<h2#39:
+
+# qhasm:   h2 -= x3
+# asm 1: fsub <x3=float80#7,<h2=float80#1
+# asm 2: fsub <x3=%st(6),<h2=%st(0)
+fsub %st(6),%st(0)
+# comment:fpstackfrombottom:<h3#38:<x3#117:<h1#40:<h0#41:<x0#114:<x1#115:<x2#116:<h2#39:
+
+# qhasm: internal stacktop h0
+# asm 1: fxch <h0=float80#5
+# asm 2: fxch <h0=%st(4)
+fxch %st(4)
+
+# qhasm:   x0 += h0
+# asm 1: faddp <h0=float80#1,<x0=float80#4
+# asm 2: faddp <h0=%st(0),<x0=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#38:<x3#117:<h1#40:<h2#39:<x0#114:<x1#115:<x2#116:
+
+# qhasm: internal stacktop h1
+# asm 1: fxch <h1=float80#5
+# asm 2: fxch <h1=%st(4)
+fxch %st(4)
+
+# qhasm:   x1 += h1
+# asm 1: faddp <h1=float80#1,<x1=float80#2
+# asm 2: faddp <h1=%st(0),<x1=%st(1)
+faddp %st(0),%st(1)
+# comment:fpstackfrombottom:<h3#38:<x3#117:<x2#116:<h2#39:<x0#114:<x1#115:
+
+# qhasm: internal stacktop h2
+# asm 1: fxch <h2=float80#3
+# asm 2: fxch <h2=%st(2)
+fxch %st(2)
+
+# qhasm:   x2 += h2
+# asm 1: faddp <h2=float80#1,<x2=float80#4
+# asm 2: faddp <h2=%st(0),<x2=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<h3#38:<x3#117:<x2#116:<x1#115:<x0#114:
+
+# qhasm: internal stacktop h3
+# asm 1: fxch <h3=float80#5
+# asm 2: fxch <h3=%st(4)
+fxch %st(4)
+
+# qhasm:   x3 += h3
+# asm 1: faddp <h3=float80#1,<x3=float80#4
+# asm 2: faddp <h3=%st(0),<x3=%st(3)
+faddp %st(0),%st(3)
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm:   x0 += *(float64 *) &crypto_onetimeauth_poly1305_x86_hoffset0
+faddl crypto_onetimeauth_poly1305_x86_hoffset0
+# comment:fpstackfrombottom:<x1#115:<x3#117:<x2#116:<x0#114:
+
+# qhasm: internal stacktop x1
+# asm 1: fxch <x1=float80#4
+# asm 2: fxch <x1=%st(3)
+fxch %st(3)
+
+# qhasm:   x1 += *(float64 *) &crypto_onetimeauth_poly1305_x86_hoffset1
+faddl crypto_onetimeauth_poly1305_x86_hoffset1
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x2#116:<x1#115:
+
+# qhasm: internal stacktop x2
+# asm 1: fxch <x2=float80#2
+# asm 2: fxch <x2=%st(1)
+fxch %st(1)
+
+# qhasm:   x2 += *(float64 *) &crypto_onetimeauth_poly1305_x86_hoffset2
+faddl crypto_onetimeauth_poly1305_x86_hoffset2
+# comment:fpstackfrombottom:<x0#114:<x3#117:<x1#115:<x2#116:
+
+# qhasm: internal stacktop x3
+# asm 1: fxch <x3=float80#3
+# asm 2: fxch <x3=%st(2)
+fxch %st(2)
+
+# qhasm:   x3 += *(float64 *) &crypto_onetimeauth_poly1305_x86_hoffset3
+faddl crypto_onetimeauth_poly1305_x86_hoffset3
+# comment:fpstackfrombottom:<x0#114:<x2#116:<x1#115:<x3#117:
+
+# qhasm: internal stacktop x0
+# asm 1: fxch <x0=float80#4
+# asm 2: fxch <x0=%st(3)
+fxch %st(3)
+
+# qhasm:   *(float64 *) &d0 = x0
+# asm 1: fstpl >d0=stack64#1
+# asm 2: fstpl >d0=96(%esp)
+fstpl 96(%esp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:<x1#115:
+
+# qhasm:   *(float64 *) &d1 = x1
+# asm 1: fstpl >d1=stack64#2
+# asm 2: fstpl >d1=104(%esp)
+fstpl 104(%esp)
+# comment:fpstackfrombottom:<x3#117:<x2#116:
+
+# qhasm:   *(float64 *) &d2 = x2
+# asm 1: fstpl >d2=stack64#3
+# asm 2: fstpl >d2=112(%esp)
+fstpl 112(%esp)
+# comment:fpstackfrombottom:<x3#117:
+
+# qhasm:   *(float64 *) &d3 = x3
+# asm 1: fstpl >d3=stack64#4
+# asm 2: fstpl >d3=120(%esp)
+fstpl 120(%esp)
+# comment:fpstackfrombottom:
+
+# qhasm: int32 f0
+
+# qhasm: int32 f1
+
+# qhasm: int32 f2
+
+# qhasm: int32 f3
+
+# qhasm: int32 f4
+
+# qhasm: int32 g0
+
+# qhasm: int32 g1
+
+# qhasm: int32 g2
+
+# qhasm: int32 g3
+
+# qhasm: int32 f
+
+# qhasm: int32 notf
+
+# qhasm: stack32 f1_stack
+
+# qhasm: stack32 f2_stack
+
+# qhasm: stack32 f3_stack
+
+# qhasm: stack32 f4_stack
+
+# qhasm: stack32 g0_stack
+
+# qhasm: stack32 g1_stack
+
+# qhasm: stack32 g2_stack
+
+# qhasm: stack32 g3_stack
+
+# qhasm:   g0 = top d0
+# asm 1: movl <d0=stack64#1,>g0=int32#1
+# asm 2: movl <d0=100(%esp),>g0=%eax
+movl 100(%esp),%eax
+
+# qhasm:   g0 &= 63
+# asm 1: and  $63,<g0=int32#1
+# asm 2: and  $63,<g0=%eax
+and  $63,%eax
+
+# qhasm:   g1 = top d1
+# asm 1: movl <d1=stack64#2,>g1=int32#2
+# asm 2: movl <d1=108(%esp),>g1=%ecx
+movl 108(%esp),%ecx
+
+# qhasm:   g1 &= 63
+# asm 1: and  $63,<g1=int32#2
+# asm 2: and  $63,<g1=%ecx
+and  $63,%ecx
+
+# qhasm:   g2 = top d2
+# asm 1: movl <d2=stack64#3,>g2=int32#3
+# asm 2: movl <d2=116(%esp),>g2=%edx
+movl 116(%esp),%edx
+
+# qhasm:   g2 &= 63
+# asm 1: and  $63,<g2=int32#3
+# asm 2: and  $63,<g2=%edx
+and  $63,%edx
+
+# qhasm:   g3 = top d3
+# asm 1: movl <d3=stack64#4,>g3=int32#4
+# asm 2: movl <d3=124(%esp),>g3=%ebx
+movl 124(%esp),%ebx
+
+# qhasm:   g3 &= 63
+# asm 1: and  $63,<g3=int32#4
+# asm 2: and  $63,<g3=%ebx
+and  $63,%ebx
+
+# qhasm:   f1 = bottom d1
+# asm 1: movl <d1=stack64#2,>f1=int32#5
+# asm 2: movl <d1=104(%esp),>f1=%esi
+movl 104(%esp),%esi
+
+# qhasm:   carry? f1 += g0
+# asm 1: addl <g0=int32#1,<f1=int32#5
+# asm 2: addl <g0=%eax,<f1=%esi
+addl %eax,%esi
+
+# qhasm:   f1_stack = f1
+# asm 1: movl <f1=int32#5,>f1_stack=stack32#8
+# asm 2: movl <f1=%esi,>f1_stack=28(%esp)
+movl %esi,28(%esp)
+
+# qhasm:   f2 = bottom d2
+# asm 1: movl <d2=stack64#3,>f2=int32#1
+# asm 2: movl <d2=112(%esp),>f2=%eax
+movl 112(%esp),%eax
+
+# qhasm:   carry? f2 += g1 + carry
+# asm 1: adcl <g1=int32#2,<f2=int32#1
+# asm 2: adcl <g1=%ecx,<f2=%eax
+adcl %ecx,%eax
+
+# qhasm:   f2_stack = f2
+# asm 1: movl <f2=int32#1,>f2_stack=stack32#9
+# asm 2: movl <f2=%eax,>f2_stack=32(%esp)
+movl %eax,32(%esp)
+
+# qhasm:   f3 = bottom d3
+# asm 1: movl <d3=stack64#4,>f3=int32#1
+# asm 2: movl <d3=120(%esp),>f3=%eax
+movl 120(%esp),%eax
+
+# qhasm:   carry? f3 += g2 + carry
+# asm 1: adcl <g2=int32#3,<f3=int32#1
+# asm 2: adcl <g2=%edx,<f3=%eax
+adcl %edx,%eax
+
+# qhasm:   f3_stack = f3
+# asm 1: movl <f3=int32#1,>f3_stack=stack32#10
+# asm 2: movl <f3=%eax,>f3_stack=36(%esp)
+movl %eax,36(%esp)
+
+# qhasm:   f4 = 0
+# asm 1: mov  $0,>f4=int32#1
+# asm 2: mov  $0,>f4=%eax
+mov  $0,%eax
+
+# qhasm:   carry? f4 += g3 + carry
+# asm 1: adcl <g3=int32#4,<f4=int32#1
+# asm 2: adcl <g3=%ebx,<f4=%eax
+adcl %ebx,%eax
+
+# qhasm:   f4_stack = f4
+# asm 1: movl <f4=int32#1,>f4_stack=stack32#11
+# asm 2: movl <f4=%eax,>f4_stack=40(%esp)
+movl %eax,40(%esp)
+
+# qhasm:   g0 = 5
+# asm 1: mov  $5,>g0=int32#1
+# asm 2: mov  $5,>g0=%eax
+mov  $5,%eax
+
+# qhasm:   f0 = bottom d0
+# asm 1: movl <d0=stack64#1,>f0=int32#2
+# asm 2: movl <d0=96(%esp),>f0=%ecx
+movl 96(%esp),%ecx
+
+# qhasm:   carry? g0 += f0
+# asm 1: addl <f0=int32#2,<g0=int32#1
+# asm 2: addl <f0=%ecx,<g0=%eax
+addl %ecx,%eax
+
+# qhasm:   g0_stack = g0
+# asm 1: movl <g0=int32#1,>g0_stack=stack32#12
+# asm 2: movl <g0=%eax,>g0_stack=44(%esp)
+movl %eax,44(%esp)
+
+# qhasm:   g1 = 0
+# asm 1: mov  $0,>g1=int32#1
+# asm 2: mov  $0,>g1=%eax
+mov  $0,%eax
+
+# qhasm:   f1 = f1_stack
+# asm 1: movl <f1_stack=stack32#8,>f1=int32#3
+# asm 2: movl <f1_stack=28(%esp),>f1=%edx
+movl 28(%esp),%edx
+
+# qhasm:   carry? g1 += f1 + carry
+# asm 1: adcl <f1=int32#3,<g1=int32#1
+# asm 2: adcl <f1=%edx,<g1=%eax
+adcl %edx,%eax
+
+# qhasm:   g1_stack = g1
+# asm 1: movl <g1=int32#1,>g1_stack=stack32#8
+# asm 2: movl <g1=%eax,>g1_stack=28(%esp)
+movl %eax,28(%esp)
+
+# qhasm:   g2 = 0
+# asm 1: mov  $0,>g2=int32#1
+# asm 2: mov  $0,>g2=%eax
+mov  $0,%eax
+
+# qhasm:   f2 = f2_stack
+# asm 1: movl <f2_stack=stack32#9,>f2=int32#4
+# asm 2: movl <f2_stack=32(%esp),>f2=%ebx
+movl 32(%esp),%ebx
+
+# qhasm:   carry? g2 += f2 + carry
+# asm 1: adcl <f2=int32#4,<g2=int32#1
+# asm 2: adcl <f2=%ebx,<g2=%eax
+adcl %ebx,%eax
+
+# qhasm:   g2_stack = g2
+# asm 1: movl <g2=int32#1,>g2_stack=stack32#9
+# asm 2: movl <g2=%eax,>g2_stack=32(%esp)
+movl %eax,32(%esp)
+
+# qhasm:   g3 = 0
+# asm 1: mov  $0,>g3=int32#1
+# asm 2: mov  $0,>g3=%eax
+mov  $0,%eax
+
+# qhasm:   f3 = f3_stack
+# asm 1: movl <f3_stack=stack32#10,>f3=int32#5
+# asm 2: movl <f3_stack=36(%esp),>f3=%esi
+movl 36(%esp),%esi
+
+# qhasm:   carry? g3 += f3 + carry
+# asm 1: adcl <f3=int32#5,<g3=int32#1
+# asm 2: adcl <f3=%esi,<g3=%eax
+adcl %esi,%eax
+
+# qhasm:   g3_stack = g3
+# asm 1: movl <g3=int32#1,>g3_stack=stack32#10
+# asm 2: movl <g3=%eax,>g3_stack=36(%esp)
+movl %eax,36(%esp)
+
+# qhasm:   f = 0xfffffffc
+# asm 1: mov  $0xfffffffc,>f=int32#1
+# asm 2: mov  $0xfffffffc,>f=%eax
+mov  $0xfffffffc,%eax
+
+# qhasm:   f4 = f4_stack
+# asm 1: movl <f4_stack=stack32#11,>f4=int32#6
+# asm 2: movl <f4_stack=40(%esp),>f4=%edi
+movl 40(%esp),%edi
+
+# qhasm:   carry? f += f4 + carry
+# asm 1: adcl <f4=int32#6,<f=int32#1
+# asm 2: adcl <f4=%edi,<f=%eax
+adcl %edi,%eax
+
+# qhasm:   (int32) f >>= 16
+# asm 1: sar  $16,<f=int32#1
+# asm 2: sar  $16,<f=%eax
+sar  $16,%eax
+
+# qhasm:   notf = f
+# asm 1: mov  <f=int32#1,>notf=int32#6
+# asm 2: mov  <f=%eax,>notf=%edi
+mov  %eax,%edi
+
+# qhasm:   notf ^= 0xffffffff
+# asm 1: xor  $0xffffffff,<notf=int32#6
+# asm 2: xor  $0xffffffff,<notf=%edi
+xor  $0xffffffff,%edi
+
+# qhasm:   f0 &= f
+# asm 1: andl <f=int32#1,<f0=int32#2
+# asm 2: andl <f=%eax,<f0=%ecx
+andl %eax,%ecx
+
+# qhasm:   g0 = g0_stack
+# asm 1: movl <g0_stack=stack32#12,>g0=int32#7
+# asm 2: movl <g0_stack=44(%esp),>g0=%ebp
+movl 44(%esp),%ebp
+
+# qhasm:   g0 &= notf
+# asm 1: andl <notf=int32#6,<g0=int32#7
+# asm 2: andl <notf=%edi,<g0=%ebp
+andl %edi,%ebp
+
+# qhasm:   f0 |= g0
+# asm 1: orl  <g0=int32#7,<f0=int32#2
+# asm 2: orl  <g0=%ebp,<f0=%ecx
+orl  %ebp,%ecx
+
+# qhasm:   f1 &= f
+# asm 1: andl <f=int32#1,<f1=int32#3
+# asm 2: andl <f=%eax,<f1=%edx
+andl %eax,%edx
+
+# qhasm:   g1 = g1_stack
+# asm 1: movl <g1_stack=stack32#8,>g1=int32#7
+# asm 2: movl <g1_stack=28(%esp),>g1=%ebp
+movl 28(%esp),%ebp
+
+# qhasm:   g1 &= notf
+# asm 1: andl <notf=int32#6,<g1=int32#7
+# asm 2: andl <notf=%edi,<g1=%ebp
+andl %edi,%ebp
+
+# qhasm:   f1 |= g1
+# asm 1: orl  <g1=int32#7,<f1=int32#3
+# asm 2: orl  <g1=%ebp,<f1=%edx
+orl  %ebp,%edx
+
+# qhasm:   f2 &= f
+# asm 1: andl <f=int32#1,<f2=int32#4
+# asm 2: andl <f=%eax,<f2=%ebx
+andl %eax,%ebx
+
+# qhasm:   g2 = g2_stack
+# asm 1: movl <g2_stack=stack32#9,>g2=int32#7
+# asm 2: movl <g2_stack=32(%esp),>g2=%ebp
+movl 32(%esp),%ebp
+
+# qhasm:   g2 &= notf
+# asm 1: andl <notf=int32#6,<g2=int32#7
+# asm 2: andl <notf=%edi,<g2=%ebp
+andl %edi,%ebp
+
+# qhasm:   f2 |= g2
+# asm 1: orl  <g2=int32#7,<f2=int32#4
+# asm 2: orl  <g2=%ebp,<f2=%ebx
+orl  %ebp,%ebx
+
+# qhasm:   f3 &= f
+# asm 1: andl <f=int32#1,<f3=int32#5
+# asm 2: andl <f=%eax,<f3=%esi
+andl %eax,%esi
+
+# qhasm:   g3 = g3_stack
+# asm 1: movl <g3_stack=stack32#10,>g3=int32#1
+# asm 2: movl <g3_stack=36(%esp),>g3=%eax
+movl 36(%esp),%eax
+
+# qhasm:   g3 &= notf
+# asm 1: andl <notf=int32#6,<g3=int32#1
+# asm 2: andl <notf=%edi,<g3=%eax
+andl %edi,%eax
+
+# qhasm:   f3 |= g3
+# asm 1: orl  <g3=int32#1,<f3=int32#5
+# asm 2: orl  <g3=%eax,<f3=%esi
+orl  %eax,%esi
+
+# qhasm:   k = k_stack
+# asm 1: movl <k_stack=stack32#6,>k=int32#1
+# asm 2: movl <k_stack=20(%esp),>k=%eax
+movl 20(%esp),%eax
+
+# qhasm:   carry? f0 += *(uint32 *) (k + 16)
+# asm 1: addl 16(<k=int32#1),<f0=int32#2
+# asm 2: addl 16(<k=%eax),<f0=%ecx
+addl 16(%eax),%ecx
+
+# qhasm:   carry? f1 += *(uint32 *) (k + 20) + carry
+# asm 1: adcl 20(<k=int32#1),<f1=int32#3
+# asm 2: adcl 20(<k=%eax),<f1=%edx
+adcl 20(%eax),%edx
+
+# qhasm:   carry? f2 += *(uint32 *) (k + 24) + carry
+# asm 1: adcl 24(<k=int32#1),<f2=int32#4
+# asm 2: adcl 24(<k=%eax),<f2=%ebx
+adcl 24(%eax),%ebx
+
+# qhasm:   carry? f3 += *(uint32 *) (k + 28) + carry
+# asm 1: adcl 28(<k=int32#1),<f3=int32#5
+# asm 2: adcl 28(<k=%eax),<f3=%esi
+adcl 28(%eax),%esi
+
+# qhasm:   out = out_stack
+# asm 1: movl <out_stack=stack32#7,>out=int32#1
+# asm 2: movl <out_stack=24(%esp),>out=%eax
+movl 24(%esp),%eax
+
+# qhasm:   *(uint32 *) (out + 0) = f0
+# asm 1: movl <f0=int32#2,0(<out=int32#1)
+# asm 2: movl <f0=%ecx,0(<out=%eax)
+movl %ecx,0(%eax)
+
+# qhasm:   *(uint32 *) (out + 4) = f1
+# asm 1: movl <f1=int32#3,4(<out=int32#1)
+# asm 2: movl <f1=%edx,4(<out=%eax)
+movl %edx,4(%eax)
+
+# qhasm:   *(uint32 *) (out + 8) = f2
+# asm 1: movl <f2=int32#4,8(<out=int32#1)
+# asm 2: movl <f2=%ebx,8(<out=%eax)
+movl %ebx,8(%eax)
+
+# qhasm:   *(uint32 *) (out + 12) = f3
+# asm 1: movl <f3=int32#5,12(<out=int32#1)
+# asm 2: movl <f3=%esi,12(<out=%eax)
+movl %esi,12(%eax)
+
+# qhasm: eax = eax_stack
+# asm 1: movl <eax_stack=stack32#1,>eax=int32#1
+# asm 2: movl <eax_stack=0(%esp),>eax=%eax
+movl 0(%esp),%eax
+
+# qhasm: ebx = ebx_stack
+# asm 1: movl <ebx_stack=stack32#2,>ebx=int32#4
+# asm 2: movl <ebx_stack=4(%esp),>ebx=%ebx
+movl 4(%esp),%ebx
+
+# qhasm: esi = esi_stack
+# asm 1: movl <esi_stack=stack32#3,>esi=int32#5
+# asm 2: movl <esi_stack=8(%esp),>esi=%esi
+movl 8(%esp),%esi
+
+# qhasm: edi = edi_stack
+# asm 1: movl <edi_stack=stack32#4,>edi=int32#6
+# asm 2: movl <edi_stack=12(%esp),>edi=%edi
+movl 12(%esp),%edi
+
+# qhasm: ebp = ebp_stack
+# asm 1: movl <ebp_stack=stack32#5,>ebp=int32#7
+# asm 2: movl <ebp_stack=16(%esp),>ebp=%ebp
+movl 16(%esp),%ebp
+
+# qhasm: leave
+add %eax,%esp
+xor %eax,%eax
+ret
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/constants.s b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/constants.s
new file mode 100644
index 00000000..ab2456c3
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/constants.s
@@ -0,0 +1,85 @@
+# version 20080912
+# D. J. Bernstein
+# Public domain.
+
+.data
+.section .rodata
+.p2align 5
+
+.globl _crypto_onetimeauth_poly1305_x86_constants
+.globl crypto_onetimeauth_poly1305_x86_constants
+.globl crypto_onetimeauth_poly1305_x86_scale
+.globl crypto_onetimeauth_poly1305_x86_two32
+.globl crypto_onetimeauth_poly1305_x86_two64
+.globl crypto_onetimeauth_poly1305_x86_two96
+.globl crypto_onetimeauth_poly1305_x86_alpha32
+.globl crypto_onetimeauth_poly1305_x86_alpha64
+.globl crypto_onetimeauth_poly1305_x86_alpha96
+.globl crypto_onetimeauth_poly1305_x86_alpha130
+.globl crypto_onetimeauth_poly1305_x86_doffset0
+.globl crypto_onetimeauth_poly1305_x86_doffset1
+.globl crypto_onetimeauth_poly1305_x86_doffset2
+.globl crypto_onetimeauth_poly1305_x86_doffset3
+.globl crypto_onetimeauth_poly1305_x86_doffset3minustwo128
+.globl crypto_onetimeauth_poly1305_x86_hoffset0
+.globl crypto_onetimeauth_poly1305_x86_hoffset1
+.globl crypto_onetimeauth_poly1305_x86_hoffset2
+.globl crypto_onetimeauth_poly1305_x86_hoffset3
+.globl crypto_onetimeauth_poly1305_x86_rounding
+
+_crypto_onetimeauth_poly1305_x86_constants:
+crypto_onetimeauth_poly1305_x86_constants:
+crypto_onetimeauth_poly1305_x86_scale:
+.long 0x0,0x37f40000
+
+crypto_onetimeauth_poly1305_x86_two32:
+.long 0x0,0x41f00000
+
+crypto_onetimeauth_poly1305_x86_two64:
+.long 0x0,0x43f00000
+
+crypto_onetimeauth_poly1305_x86_two96:
+.long 0x0,0x45f00000
+
+crypto_onetimeauth_poly1305_x86_alpha32:
+.long 0x0,0x45e80000
+
+crypto_onetimeauth_poly1305_x86_alpha64:
+.long 0x0,0x47e80000
+
+crypto_onetimeauth_poly1305_x86_alpha96:
+.long 0x0,0x49e80000
+
+crypto_onetimeauth_poly1305_x86_alpha130:
+.long 0x0,0x4c080000
+
+crypto_onetimeauth_poly1305_x86_doffset0:
+.long 0x0,0x43300000
+
+crypto_onetimeauth_poly1305_x86_doffset1:
+.long 0x0,0x45300000
+
+crypto_onetimeauth_poly1305_x86_doffset2:
+.long 0x0,0x47300000
+
+crypto_onetimeauth_poly1305_x86_doffset3:
+.long 0x0,0x49300000
+
+crypto_onetimeauth_poly1305_x86_doffset3minustwo128:
+.long 0x0,0x492ffffe
+
+crypto_onetimeauth_poly1305_x86_hoffset0:
+.long 0xfffffffb,0x43300001
+
+crypto_onetimeauth_poly1305_x86_hoffset1:
+.long 0xfffffffe,0x45300001
+
+crypto_onetimeauth_poly1305_x86_hoffset2:
+.long 0xfffffffe,0x47300001
+
+crypto_onetimeauth_poly1305_x86_hoffset3:
+.long 0xfffffffe,0x49300003
+
+crypto_onetimeauth_poly1305_x86_rounding:
+.byte 0x7f
+.byte 0x13
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/verify.c b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/verify.c
new file mode 100644
index 00000000..c7e063f1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/poly1305/x86/verify.c
@@ -0,0 +1,9 @@
+#include "crypto_verify_16.h"
+#include "crypto_onetimeauth.h"
+
+int crypto_onetimeauth_verify(const unsigned char *h,const unsigned char *in,unsigned long long inlen,const unsigned char *k)
+{
+  unsigned char correct[16];
+  crypto_onetimeauth(correct,in,inlen,k);
+  return crypto_verify_16(h,correct);
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/try.c b/nacl/nacl-20110221/crypto_onetimeauth/try.c
new file mode 100644
index 00000000..54f4396d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/try.c
@@ -0,0 +1,119 @@
+/*
+ * crypto_onetimeauth/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "crypto_hash_sha256.h"
+#include "crypto_onetimeauth.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_onetimeauth_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *h;
+static unsigned char *m;
+static unsigned char *k;
+static unsigned char *h2;
+static unsigned char *m2;
+static unsigned char *k2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  h = alignedcalloc(crypto_onetimeauth_BYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  k = alignedcalloc(crypto_onetimeauth_KEYBYTES);
+  h2 = alignedcalloc(crypto_onetimeauth_BYTES);
+  m2 = alignedcalloc(MAXTEST_BYTES + crypto_onetimeauth_BYTES);
+  k2 = alignedcalloc(crypto_onetimeauth_KEYBYTES + crypto_onetimeauth_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_onetimeauth(h,m,TUNE_BYTES,k);
+  crypto_onetimeauth_verify(h,m,TUNE_BYTES,k);
+}
+
+char checksum[crypto_onetimeauth_BYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long mlen = i;
+    long long klen = crypto_onetimeauth_KEYBYTES;
+    long long hlen = crypto_onetimeauth_BYTES;
+
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+
+    if (crypto_onetimeauth(h,m,mlen,k) != 0) return "crypto_onetimeauth returns nonzero";
+
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_onetimeauth overwrites k";
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_onetimeauth overwrites m";
+    for (j = -16;j < 0;++j) if (h[j] != h2[j]) return "crypto_onetimeauth writes before output";
+    for (j = hlen;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_onetimeauth writes after output";
+
+    for (j = -16;j < 0;++j) h[j] = random();
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = hlen;j < hlen + 16;++j) h[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < hlen + 16;++j) h2[j] = h[j];
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+
+    if (crypto_onetimeauth(m2,m2,mlen,k) != 0) return "crypto_onetimeauth returns nonzero";
+    for (j = 0;j < hlen;++j) if (m2[j] != h[j]) return "crypto_onetimeauth does not handle m overlap";
+    for (j = 0;j < hlen;++j) m2[j] = m[j];
+    if (crypto_onetimeauth(k2,m2,mlen,k2) != 0) return "crypto_onetimeauth returns nonzero";
+    for (j = 0;j < hlen;++j) if (k2[j] != h[j]) return "crypto_onetimeauth does not handle k overlap";
+    for (j = 0;j < hlen;++j) k2[j] = k[j];
+
+    if (crypto_onetimeauth_verify(h,m,mlen,k) != 0) return "crypto_onetimeauth_verify returns nonzero";
+
+    for (j = -16;j < hlen + 16;++j) if (h[j] != h2[j]) return "crypto_onetimeauth overwrites h";
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_onetimeauth overwrites k";
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_onetimeauth overwrites m";
+
+    crypto_hash_sha256(h2,h,hlen);
+    for (j = 0;j < klen;++j) k[j] ^= h2[j % 32];
+    if (crypto_onetimeauth(h,m,mlen,k) != 0) return "crypto_onetimeauth returns nonzero";
+    if (crypto_onetimeauth_verify(h,m,mlen,k) != 0) return "crypto_onetimeauth_verify returns nonzero";
+    
+    crypto_hash_sha256(h2,h,hlen);
+    for (j = 0;j < mlen;++j) m[j] ^= h2[j % 32];
+    m[mlen] = h2[0];
+  }
+  if (crypto_onetimeauth(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_onetimeauth returns nonzero";
+  if (crypto_onetimeauth_verify(h,m,CHECKSUM_BYTES,k) != 0) return "crypto_onetimeauth_verify returns nonzero";
+
+  for (i = 0;i < crypto_onetimeauth_BYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (h[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & h[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/wrapper-auth.cpp b/nacl/nacl-20110221/crypto_onetimeauth/wrapper-auth.cpp
new file mode 100644
index 00000000..f4279607
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/wrapper-auth.cpp
@@ -0,0 +1,11 @@
+#include <string>
+using std::string;
+#include "crypto_onetimeauth.h"
+
+string crypto_onetimeauth(const string &m,const string &k)
+{
+  if (k.size() != crypto_onetimeauth_KEYBYTES) throw "incorrect key length";
+  unsigned char a[crypto_onetimeauth_BYTES];
+  crypto_onetimeauth(a,(const unsigned char *) m.c_str(),m.size(),(const unsigned char *) k.c_str());
+  return string((char *) a,crypto_onetimeauth_BYTES);
+}
diff --git a/nacl/nacl-20110221/crypto_onetimeauth/wrapper-verify.cpp b/nacl/nacl-20110221/crypto_onetimeauth/wrapper-verify.cpp
new file mode 100644
index 00000000..248239ee
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_onetimeauth/wrapper-verify.cpp
@@ -0,0 +1,14 @@
+#include <string>
+using std::string;
+#include "crypto_onetimeauth.h"
+
+void crypto_onetimeauth_verify(const string &a,const string &m,const string &k)
+{
+  if (k.size() != crypto_onetimeauth_KEYBYTES) throw "incorrect key length";
+  if (a.size() != crypto_onetimeauth_BYTES) throw "incorrect authenticator length";
+  if (crypto_onetimeauth_verify(
+       (const unsigned char *) a.c_str(),
+       (const unsigned char *) m.c_str(),m.size(),
+       (const unsigned char *) k.c_str()) == 0) return;
+  throw "invalid authenticator";
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/api.h b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/api.h
new file mode 100644
index 00000000..60339596
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_SCALARBYTES 32
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/base.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/base.c
new file mode 100644
index 00000000..dde929ec
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/base.c
@@ -0,0 +1,8 @@
+#include "crypto_scalarmult.h"
+
+static char basepoint[32] = {9};
+
+int crypto_scalarmult_base(unsigned char *q,const unsigned char *n)
+{
+  return crypto_scalarmult(q,n,basepoint);
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/const.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/const.s
new file mode 100644
index 00000000..9042c2fb
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/const.s
@@ -0,0 +1,114 @@
+.data
+.section .rodata
+.p2align 5
+
+.globl crypto_scalarmult_curve25519_athlon_scale
+.globl crypto_scalarmult_curve25519_athlon_121665
+.globl crypto_scalarmult_curve25519_athlon_alpha26
+.globl crypto_scalarmult_curve25519_athlon_alpha51
+.globl crypto_scalarmult_curve25519_athlon_alpha77
+.globl crypto_scalarmult_curve25519_athlon_alpha102
+.globl crypto_scalarmult_curve25519_athlon_alpha128
+.globl crypto_scalarmult_curve25519_athlon_alpha153
+.globl crypto_scalarmult_curve25519_athlon_alpha179
+.globl crypto_scalarmult_curve25519_athlon_alpha204
+.globl crypto_scalarmult_curve25519_athlon_alpha230
+.globl crypto_scalarmult_curve25519_athlon_alpha255
+.globl crypto_scalarmult_curve25519_athlon_in0offset
+.globl crypto_scalarmult_curve25519_athlon_in1offset
+.globl crypto_scalarmult_curve25519_athlon_in2offset
+.globl crypto_scalarmult_curve25519_athlon_in3offset
+.globl crypto_scalarmult_curve25519_athlon_in4offset
+.globl crypto_scalarmult_curve25519_athlon_in5offset
+.globl crypto_scalarmult_curve25519_athlon_in6offset
+.globl crypto_scalarmult_curve25519_athlon_in7offset
+.globl crypto_scalarmult_curve25519_athlon_in8offset
+.globl crypto_scalarmult_curve25519_athlon_in9offset
+.globl crypto_scalarmult_curve25519_athlon_out0offset
+.globl crypto_scalarmult_curve25519_athlon_out1offset
+.globl crypto_scalarmult_curve25519_athlon_out2offset
+.globl crypto_scalarmult_curve25519_athlon_out3offset
+.globl crypto_scalarmult_curve25519_athlon_out4offset
+.globl crypto_scalarmult_curve25519_athlon_out5offset
+.globl crypto_scalarmult_curve25519_athlon_out6offset
+.globl crypto_scalarmult_curve25519_athlon_out7offset
+.globl crypto_scalarmult_curve25519_athlon_out8offset
+.globl crypto_scalarmult_curve25519_athlon_out9offset
+.globl crypto_scalarmult_curve25519_athlon_two0
+.globl crypto_scalarmult_curve25519_athlon_two1
+.globl crypto_scalarmult_curve25519_athlon_zero
+.globl crypto_scalarmult_curve25519_athlon_rounding
+
+crypto_scalarmult_curve25519_athlon_scale:
+	.long 0x0,0x30430000
+crypto_scalarmult_curve25519_athlon_121665:
+	.long 0x0,0x40fdb410
+crypto_scalarmult_curve25519_athlon_in0offset:
+	.long 0x0,0x43300000
+crypto_scalarmult_curve25519_athlon_in1offset:
+	.long 0x0,0x45300000
+crypto_scalarmult_curve25519_athlon_in2offset:
+	.long 0x0,0x46b00000
+crypto_scalarmult_curve25519_athlon_in3offset:
+	.long 0x0,0x48300000
+crypto_scalarmult_curve25519_athlon_in4offset:
+	.long 0x0,0x49b00000
+crypto_scalarmult_curve25519_athlon_in5offset:
+	.long 0x0,0x4b300000
+crypto_scalarmult_curve25519_athlon_in6offset:
+	.long 0x0,0x4d300000
+crypto_scalarmult_curve25519_athlon_in7offset:
+	.long 0x0,0x4eb00000
+crypto_scalarmult_curve25519_athlon_in8offset:
+	.long 0x0,0x50300000
+crypto_scalarmult_curve25519_athlon_in9offset:
+	.long 0x0,0x51b00000
+crypto_scalarmult_curve25519_athlon_alpha26:
+	.long 0x0,0x45880000
+crypto_scalarmult_curve25519_athlon_alpha51:
+	.long 0x0,0x47180000
+crypto_scalarmult_curve25519_athlon_alpha77:
+	.long 0x0,0x48b80000
+crypto_scalarmult_curve25519_athlon_alpha102:
+	.long 0x0,0x4a480000
+crypto_scalarmult_curve25519_athlon_alpha128:
+	.long 0x0,0x4be80000
+crypto_scalarmult_curve25519_athlon_alpha153:
+	.long 0x0,0x4d780000
+crypto_scalarmult_curve25519_athlon_alpha179:
+	.long 0x0,0x4f180000
+crypto_scalarmult_curve25519_athlon_alpha204:
+	.long 0x0,0x50a80000
+crypto_scalarmult_curve25519_athlon_alpha230:
+	.long 0x0,0x52480000
+crypto_scalarmult_curve25519_athlon_alpha255:
+	.long 0x0,0x53d80000
+crypto_scalarmult_curve25519_athlon_two0:
+	.long 0x0,0x3ff00000
+crypto_scalarmult_curve25519_athlon_two1:
+	.long 0x0,0x40000000
+crypto_scalarmult_curve25519_athlon_zero:
+	.long 0x0,0x0
+crypto_scalarmult_curve25519_athlon_out0offset:
+	.long 0x1fffffed,0x43380000
+crypto_scalarmult_curve25519_athlon_out1offset:
+	.long 0xffffff8,0x44d80000
+crypto_scalarmult_curve25519_athlon_out2offset:
+	.long 0x1ffffff8,0x46680000
+crypto_scalarmult_curve25519_athlon_out3offset:
+	.long 0xffffff8,0x48080000
+crypto_scalarmult_curve25519_athlon_out4offset:
+	.long 0x1ffffff8,0x49980000
+crypto_scalarmult_curve25519_athlon_out5offset:
+	.long 0xffffff8,0x4b380000
+crypto_scalarmult_curve25519_athlon_out6offset:
+	.long 0x1ffffff8,0x4cc80000
+crypto_scalarmult_curve25519_athlon_out7offset:
+	.long 0xffffff8,0x4e680000
+crypto_scalarmult_curve25519_athlon_out8offset:
+	.long 0x1ffffff8,0x4ff80000
+crypto_scalarmult_curve25519_athlon_out9offset:
+	.long 0x1fffff8,0x51980000
+crypto_scalarmult_curve25519_athlon_rounding:
+	.byte 0x7f
+	.byte 0x13
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/fromdouble.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/fromdouble.s
new file mode 100644
index 00000000..221ca35f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/fromdouble.s
@@ -0,0 +1,195 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_fromdouble
+.globl crypto_scalarmult_curve25519_athlon_fromdouble
+_crypto_scalarmult_curve25519_athlon_fromdouble:
+crypto_scalarmult_curve25519_athlon_fromdouble:
+mov %esp,%eax
+and $31,%eax
+add $192,%eax
+sub %eax,%esp
+movl %ebp,0(%esp)
+movl 8(%esp,%eax),%ecx
+fldl 0(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out0offset
+fstpl 96(%esp)
+fldl 8(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out1offset
+fstpl 104(%esp)
+fldl 16(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out2offset
+fstpl 112(%esp)
+fldl 24(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out3offset
+fstpl 120(%esp)
+fldl 32(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out4offset
+fstpl 128(%esp)
+fldl 40(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out5offset
+fstpl 136(%esp)
+fldl 48(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out6offset
+fstpl 144(%esp)
+fldl 56(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out7offset
+fstpl 152(%esp)
+fldl 64(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out8offset
+fstpl 160(%esp)
+fldl 72(%ecx)
+faddl crypto_scalarmult_curve25519_athlon_out9offset
+fstpl 168(%esp)
+movl 96(%esp),%ecx
+movl %ecx,4(%esp)
+movl 104(%esp),%ecx
+shl  $26,%ecx
+movl %ecx,40(%esp)
+movl 104(%esp),%ecx
+shr  $6,%ecx
+movl %ecx,8(%esp)
+movl 112(%esp),%ecx
+shl  $19,%ecx
+movl %ecx,44(%esp)
+movl 112(%esp),%ecx
+shr  $13,%ecx
+movl %ecx,12(%esp)
+movl 120(%esp),%ecx
+shl  $13,%ecx
+movl %ecx,48(%esp)
+movl 120(%esp),%ecx
+shr  $19,%ecx
+movl %ecx,16(%esp)
+movl 128(%esp),%ecx
+shl  $6,%ecx
+movl %ecx,52(%esp)
+movl 128(%esp),%ecx
+shr  $26,%ecx
+movl 136(%esp),%edx
+add  %edx,%ecx
+movl %ecx,20(%esp)
+movl 144(%esp),%ecx
+shl  $25,%ecx
+movl %ecx,56(%esp)
+movl 144(%esp),%ecx
+shr  $7,%ecx
+movl %ecx,24(%esp)
+movl 152(%esp),%ecx
+shl  $19,%ecx
+movl %ecx,60(%esp)
+movl 152(%esp),%ecx
+shr  $13,%ecx
+movl %ecx,28(%esp)
+movl 160(%esp),%ecx
+shl  $12,%ecx
+movl %ecx,64(%esp)
+movl 160(%esp),%ecx
+shr  $20,%ecx
+movl %ecx,32(%esp)
+movl 168(%esp),%ecx
+shl  $6,%ecx
+movl %ecx,68(%esp)
+movl 168(%esp),%ecx
+shr  $26,%ecx
+movl %ecx,36(%esp)
+mov  $0,%ecx
+movl %ecx,72(%esp)
+movl 4(%esp),%ecx
+addl 40(%esp),%ecx
+movl %ecx,4(%esp)
+movl 8(%esp),%ecx
+adcl 44(%esp),%ecx
+movl %ecx,8(%esp)
+movl 12(%esp),%ecx
+adcl 48(%esp),%ecx
+movl %ecx,12(%esp)
+movl 16(%esp),%ecx
+adcl 52(%esp),%ecx
+movl %ecx,16(%esp)
+movl 20(%esp),%ecx
+adcl 56(%esp),%ecx
+movl %ecx,20(%esp)
+movl 24(%esp),%ecx
+adcl 60(%esp),%ecx
+movl %ecx,24(%esp)
+movl 28(%esp),%ecx
+adcl 64(%esp),%ecx
+movl %ecx,28(%esp)
+movl 32(%esp),%ecx
+adcl 68(%esp),%ecx
+movl %ecx,32(%esp)
+movl 36(%esp),%ecx
+adcl 72(%esp),%ecx
+movl %ecx,36(%esp)
+movl 4(%esp),%ecx
+adc  $0x13,%ecx
+movl %ecx,40(%esp)
+movl 8(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,44(%esp)
+movl 12(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,48(%esp)
+movl 16(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,52(%esp)
+movl 20(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,56(%esp)
+movl 24(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,60(%esp)
+movl 28(%esp),%ecx
+adc  $0,%ecx
+movl %ecx,64(%esp)
+movl 32(%esp),%ecx
+adc  $0x80000000,%ecx
+movl %ecx,68(%esp)
+movl 36(%esp),%ebp
+adc  $0xffffffff,%ebp
+and  $0x80000000,%ebp
+sar  $31,%ebp
+movl 4(%esp,%eax),%ecx
+movl 4(%esp),%edx
+xorl 40(%esp),%edx
+and  %ebp,%edx
+xorl 40(%esp),%edx
+movl %edx,0(%ecx)
+movl 8(%esp),%edx
+xorl 44(%esp),%edx
+and  %ebp,%edx
+xorl 44(%esp),%edx
+movl %edx,4(%ecx)
+movl 12(%esp),%edx
+xorl 48(%esp),%edx
+and  %ebp,%edx
+xorl 48(%esp),%edx
+movl %edx,8(%ecx)
+movl 16(%esp),%edx
+xorl 52(%esp),%edx
+and  %ebp,%edx
+xorl 52(%esp),%edx
+movl %edx,12(%ecx)
+movl 20(%esp),%edx
+xorl 56(%esp),%edx
+and  %ebp,%edx
+xorl 56(%esp),%edx
+movl %edx,16(%ecx)
+movl 24(%esp),%edx
+xorl 60(%esp),%edx
+and  %ebp,%edx
+xorl 60(%esp),%edx
+movl %edx,20(%ecx)
+movl 28(%esp),%edx
+xorl 64(%esp),%edx
+and  %ebp,%edx
+xorl 64(%esp),%edx
+movl %edx,24(%ecx)
+movl 32(%esp),%edx
+xorl 68(%esp),%edx
+and  %ebp,%edx
+xorl 68(%esp),%edx
+movl %edx,28(%ecx)
+movl 0(%esp),%ebp
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/implementors b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/init.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/init.s
new file mode 100644
index 00000000..edd3c589
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/init.s
@@ -0,0 +1,13 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_init
+.globl crypto_scalarmult_curve25519_athlon_init
+_crypto_scalarmult_curve25519_athlon_init:
+crypto_scalarmult_curve25519_athlon_init:
+mov %esp,%eax
+and $31,%eax
+add $0,%eax
+sub %eax,%esp
+fldcw crypto_scalarmult_curve25519_athlon_rounding
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mainloop.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mainloop.s
new file mode 100644
index 00000000..47412905
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mainloop.s
@@ -0,0 +1,3990 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_mainloop
+.globl crypto_scalarmult_curve25519_athlon_mainloop
+_crypto_scalarmult_curve25519_athlon_mainloop:
+crypto_scalarmult_curve25519_athlon_mainloop:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+lea 256(%esp),%edx
+lea 512(%esp),%ecx
+fldl crypto_scalarmult_curve25519_athlon_two0
+fldl crypto_scalarmult_curve25519_athlon_zero
+movl %eax,160(%ecx)
+movl %ebx,164(%ecx)
+movl %esi,168(%ecx)
+movl %edi,172(%ecx)
+movl %ebp,176(%ecx)
+movl 4(%esp,%eax),%ebx
+fxch %st(1)
+fstl 0(%esp)
+fxch %st(1)
+fstl 8(%esp)
+fstl 16(%esp)
+fstl 24(%esp)
+fstl 32(%esp)
+fstl 40(%esp)
+fstl 48(%esp)
+fstl -120(%edx)
+fstl -112(%edx)
+fstl -104(%edx)
+fstl -96(%edx)
+fstl -88(%edx)
+fstl -80(%edx)
+fstl -72(%edx)
+fstl -64(%edx)
+fstl -56(%edx)
+fstl -48(%edx)
+fstl -40(%edx)
+fstl -32(%edx)
+fstl -24(%edx)
+fxch %st(1)
+fstpl 64(%edx)
+fstl 72(%edx)
+fstl 80(%edx)
+fstl 88(%edx)
+fstl 96(%edx)
+fstl 104(%edx)
+fstl 112(%edx)
+fstl 120(%edx)
+fstl -128(%ecx)
+fstpl -120(%ecx)
+fldl 0(%ebx)
+fldl 8(%ebx)
+fldl 16(%ebx)
+fldl 24(%ebx)
+fxch %st(3)
+fstl -16(%edx)
+fstpl 56(%esp)
+fldl 32(%ebx)
+fxch %st(2)
+fstl -8(%edx)
+fstpl 64(%esp)
+fldl 40(%ebx)
+fxch %st(1)
+fstl 0(%edx)
+fstpl 72(%esp)
+fldl 48(%ebx)
+fxch %st(3)
+fstl 8(%edx)
+fstpl 80(%esp)
+fldl 56(%ebx)
+fxch %st(2)
+fstl 16(%edx)
+fstpl 88(%esp)
+fldl 64(%ebx)
+fxch %st(1)
+fstl 24(%edx)
+fstpl 96(%esp)
+fldl 72(%ebx)
+fxch %st(3)
+fstl 32(%edx)
+fstpl 104(%esp)
+fxch %st(1)
+fstl 40(%edx)
+fstpl 112(%esp)
+fstl 48(%edx)
+fstpl 120(%esp)
+fstl 56(%edx)
+fstpl -128(%edx)
+movl 8(%esp,%eax),%ebx
+mov  $28,%edi
+mov  $31,%ebp
+movl 28(%ebx),%esi
+rol  $1,%esi
+._morebytes:
+movl %edi,188(%ecx)
+._morebits:
+rol  $1,%esi
+movl %esi,180(%ecx)
+movl %ebp,184(%ecx)
+and  $1,%esi
+movl  $0x43300000,-108(%ecx)
+movl %esi,-112(%ecx)
+fldl -96(%edx)
+fldl 0(%esp)
+fadd %st(0),%st(1)
+fsubl -96(%edx)
+fldl 64(%edx)
+fldl -16(%edx)
+fadd %st(0),%st(1)
+fsubl 64(%edx)
+fldl -88(%edx)
+fldl 8(%esp)
+fadd %st(0),%st(1)
+fsubl -88(%edx)
+fxch %st(5)
+fstpl 0(%esp)
+fxch %st(3)
+fstpl -96(%edx)
+fldl 72(%edx)
+fldl -8(%edx)
+fadd %st(0),%st(1)
+fsubl 72(%edx)
+fxch %st(3)
+fstpl -16(%edx)
+fxch %st(1)
+fstpl 64(%edx)
+fldl -80(%edx)
+fldl 16(%esp)
+fadd %st(0),%st(1)
+fsubl -80(%edx)
+fxch %st(4)
+fstpl 8(%esp)
+fxch %st(4)
+fstpl -88(%edx)
+fldl 80(%edx)
+fldl 0(%edx)
+fadd %st(0),%st(1)
+fsubl 80(%edx)
+fxch %st(2)
+fstpl -8(%edx)
+fxch %st(2)
+fstpl 72(%edx)
+fldl -72(%edx)
+fldl 24(%esp)
+fadd %st(0),%st(1)
+fsubl -72(%edx)
+fxch %st(5)
+fstpl 16(%esp)
+fxch %st(3)
+fstpl -80(%edx)
+fldl 88(%edx)
+fldl 8(%edx)
+fadd %st(0),%st(1)
+fsubl 88(%edx)
+fxch %st(3)
+fstpl 0(%edx)
+fxch %st(1)
+fstpl 80(%edx)
+fldl -64(%edx)
+fldl 32(%esp)
+fadd %st(0),%st(1)
+fsubl -64(%edx)
+fxch %st(4)
+fstpl 24(%esp)
+fxch %st(4)
+fstpl -72(%edx)
+fldl 96(%edx)
+fldl 16(%edx)
+fadd %st(0),%st(1)
+fsubl 96(%edx)
+fxch %st(2)
+fstpl 8(%edx)
+fxch %st(2)
+fstpl 88(%edx)
+fldl -56(%edx)
+fldl 40(%esp)
+fadd %st(0),%st(1)
+fsubl -56(%edx)
+fxch %st(5)
+fstpl 32(%esp)
+fxch %st(3)
+fstpl -64(%edx)
+fldl 104(%edx)
+fldl 24(%edx)
+fadd %st(0),%st(1)
+fsubl 104(%edx)
+fxch %st(3)
+fstpl 16(%edx)
+fxch %st(1)
+fstpl 96(%edx)
+fldl -48(%edx)
+fldl 48(%esp)
+fadd %st(0),%st(1)
+fsubl -48(%edx)
+fxch %st(4)
+fstpl 40(%esp)
+fxch %st(4)
+fstpl -56(%edx)
+fldl 112(%edx)
+fldl 32(%edx)
+fadd %st(0),%st(1)
+fsubl 112(%edx)
+fxch %st(2)
+fstpl 24(%edx)
+fxch %st(2)
+fstpl 104(%edx)
+fldl -40(%edx)
+fldl -120(%edx)
+fadd %st(0),%st(1)
+fsubl -40(%edx)
+fxch %st(5)
+fstpl 48(%esp)
+fxch %st(3)
+fstpl -48(%edx)
+fldl 120(%edx)
+fldl 40(%edx)
+fadd %st(0),%st(1)
+fsubl 120(%edx)
+fxch %st(3)
+fstpl 32(%edx)
+fxch %st(1)
+fstpl 112(%edx)
+fldl -32(%edx)
+fldl -112(%edx)
+fadd %st(0),%st(1)
+fsubl -32(%edx)
+fxch %st(4)
+fstpl -120(%edx)
+fxch %st(4)
+fstpl -40(%edx)
+fldl -128(%ecx)
+fldl 48(%edx)
+fadd %st(0),%st(1)
+fsubl -128(%ecx)
+fxch %st(2)
+fstpl 40(%edx)
+fxch %st(2)
+fstpl 120(%edx)
+fldl -24(%edx)
+fldl -104(%edx)
+fadd %st(0),%st(1)
+fsubl -24(%edx)
+fxch %st(5)
+fstpl -112(%edx)
+fxch %st(3)
+fstpl -32(%edx)
+fldl -120(%ecx)
+fldl 56(%edx)
+fadd %st(0),%st(1)
+fsubl -120(%ecx)
+fxch %st(3)
+fstpl 48(%edx)
+fxch %st(1)
+fstpl -128(%ecx)
+fldl -112(%ecx)
+fsubl crypto_scalarmult_curve25519_athlon_in0offset
+fldl crypto_scalarmult_curve25519_athlon_two0
+fsub %st(1),%st(0)
+fxch %st(4)
+fstpl -104(%edx)
+fxch %st(4)
+fstpl -24(%edx)
+fstpl 56(%edx)
+fstpl -120(%ecx)
+fxch %st(1)
+fstl 136(%ecx)
+fldl 0(%esp)
+fmul %st(2),%st(0)
+fldl -16(%edx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmul %st(3),%st(0)
+fldl -8(%edx)
+fmul %st(3),%st(0)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(4),%st(0)
+fldl 0(%edx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmul %st(5),%st(0)
+fldl 8(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -112(%ecx)
+fldl 32(%esp)
+fmul %st(5),%st(0)
+fldl 16(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fstpl -104(%ecx)
+fldl 40(%esp)
+fmul %st(5),%st(0)
+fldl 24(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(1)
+fstpl -96(%ecx)
+fldl 48(%esp)
+fmul %st(5),%st(0)
+fldl 32(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -88(%ecx)
+fldl -120(%edx)
+fmul %st(5),%st(0)
+fldl 40(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fstpl -80(%ecx)
+fldl -112(%edx)
+fmul %st(5),%st(0)
+fldl 48(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(1)
+fstpl -72(%ecx)
+fldl -104(%edx)
+fmul %st(5),%st(0)
+fldl 56(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -64(%ecx)
+fldl -96(%edx)
+fmul %st(5),%st(0)
+fldl 64(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fstpl -56(%ecx)
+fldl -88(%edx)
+fmul %st(5),%st(0)
+fldl 72(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(1)
+fstpl -48(%ecx)
+fldl -80(%edx)
+fmul %st(5),%st(0)
+fldl 80(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -40(%ecx)
+fldl -72(%edx)
+fmul %st(5),%st(0)
+fldl 88(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fstpl -32(%ecx)
+fldl -64(%edx)
+fmul %st(5),%st(0)
+fldl 96(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(1)
+fstpl -24(%ecx)
+fldl -56(%edx)
+fmul %st(5),%st(0)
+fldl 104(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -16(%ecx)
+fldl -48(%edx)
+fmul %st(5),%st(0)
+fldl 112(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fstpl -8(%ecx)
+fldl -40(%edx)
+fmul %st(5),%st(0)
+fldl 120(%edx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(1)
+fstpl 0(%ecx)
+fldl -32(%edx)
+fmul %st(5),%st(0)
+fldl -128(%ecx)
+fmul %st(5),%st(0)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl 8(%ecx)
+fldl -24(%edx)
+fmulp %st(0),%st(5)
+fldl -120(%ecx)
+fmulp %st(0),%st(4)
+fxch %st(3)
+faddp %st(0),%st(4)
+fstpl 16(%ecx)
+fxch %st(1)
+fstpl 24(%ecx)
+fstpl 32(%ecx)
+fstpl 40(%ecx)
+fldl -24(%edx)
+fmull 56(%edx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(1)
+fldl -88(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl -96(%edx)
+fmull 56(%edx)
+fldl -80(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(2)
+fldl -88(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(1)
+fldl -72(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(2)
+fldl -80(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl -64(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(2)
+fldl -72(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+fldl -88(%edx)
+fmull 56(%edx)
+fldl -56(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(3)
+fldl -64(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(2)
+fldl -80(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(1)
+fldl -48(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(3)
+fldl -56(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(2)
+fldl -72(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl -40(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(3)
+fldl -48(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(2)
+fldl -64(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+fldl -32(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(3)
+fldl -40(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(2)
+fldl -56(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl -80(%edx)
+fmull 56(%edx)
+fldl -48(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(2)
+fldl -32(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl -72(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(2)
+fldl -40(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(3)
+fldl -24(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl -64(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(2)
+fldl -72(%edx)
+fmull 56(%edx)
+fldl -32(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl -56(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(3)
+fldl -64(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl -48(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(3)
+fldl -56(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(2)
+fldl -24(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl -40(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(3)
+fldl -64(%edx)
+fmull 56(%edx)
+fldl -48(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(3)
+fldl -32(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 64(%ecx)
+fldl -56(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(5)
+fldl -40(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl -24(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -48(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(5)
+fldl -32(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 72(%ecx)
+fldl -56(%edx)
+fmull 56(%edx)
+fldl -40(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(5)
+fldl -24(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(3)
+fldl -96(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl -48(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(2)
+fldl -32(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl -40(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(2)
+fldl -24(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(6)
+fldl -96(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl -48(%edx)
+fmull 56(%edx)
+fldl -32(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl -40(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(7)
+fldl -24(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(3)
+fldl -96(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(2)
+fldl -80(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl 48(%ecx)
+fldl -32(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl -40(%edx)
+fmull 56(%edx)
+fldl -24(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(6)
+fldl -96(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(3)
+fldl -80(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl -32(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(4)
+fldl -72(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl 56(%ecx)
+fldl -24(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl -96(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(6)
+fldl -80(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl -32(%edx)
+fmull 56(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(6)
+fldl -72(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl -24(%edx)
+fmull 48(%edx)
+faddp %st(0),%st(6)
+fldl -96(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(2)
+fldl -80(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(7)
+fldl -64(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl -24(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -88(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(5)
+fldl -72(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl -96(%edx)
+fmull 40(%edx)
+faddp %st(0),%st(3)
+fldl -80(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(4)
+fldl -64(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl -88(%edx)
+fmull 32(%edx)
+faddp %st(0),%st(4)
+fldl -72(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(5)
+fldl -56(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl -96(%edx)
+fldl -80(%edx)
+fmull 24(%edx)
+faddp %st(0),%st(3)
+fldl -64(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl -88(%edx)
+fldl -72(%edx)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+fldl -56(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl -64(%edx)
+fmull 8(%edx)
+faddp %st(0),%st(2)
+fldl -48(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl -56(%edx)
+fmull 0(%edx)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl -80(%edx)
+fldl -48(%edx)
+fmull -8(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl -40(%edx)
+fmull -16(%edx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 64(%ecx)
+fldl 72(%ecx)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl -72(%edx)
+fxch %st(2)
+fstpl -64(%edx)
+fstpl -56(%edx)
+fstpl -48(%edx)
+fldl -104(%edx)
+fmull -120(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull -120(%ecx)
+fldl 16(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(2)
+fldl 8(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(2)
+fldl 16(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(1)
+fldl 32(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(2)
+fldl 24(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -120(%ecx)
+fldl 40(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(2)
+fldl 16(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(2)
+fldl 24(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(1)
+fldl -120(%edx)
+fmull 72(%edx)
+faddp %st(0),%st(3)
+fldl 48(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(1)
+fldl -112(%edx)
+fmull 64(%edx)
+faddp %st(0),%st(3)
+fldl -120(%edx)
+fmull 80(%edx)
+faddp %st(0),%st(2)
+fldl 40(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -120(%ecx)
+fldl 48(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(2)
+fldl -112(%edx)
+fmull 72(%edx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl 24(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(2)
+fldl -120(%edx)
+fmull 88(%edx)
+faddp %st(0),%st(3)
+fldl -104(%edx)
+fmull 64(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 32(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(2)
+fldl 24(%esp)
+fmull -120(%ecx)
+fldl -112(%edx)
+fmull 80(%edx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl 40(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl 48(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(2)
+fldl -104(%edx)
+fmull 72(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl -120(%edx)
+fmull 96(%edx)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmull -120(%ecx)
+fldl 48(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(3)
+fldl -112(%edx)
+fmull 88(%edx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 8(%edx)
+fldl 40(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(5)
+fldl -120(%edx)
+fmull 104(%edx)
+faddp %st(0),%st(1)
+fldl -104(%edx)
+fmull 80(%edx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 48(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(5)
+fldl -112(%edx)
+fmull 96(%edx)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 16(%edx)
+fldl 40(%esp)
+fmull -120(%ecx)
+fldl -120(%edx)
+fmull 112(%edx)
+faddp %st(0),%st(5)
+fldl -104(%edx)
+fmull 88(%edx)
+faddp %st(0),%st(3)
+fldl 0(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl 48(%esp)
+fmull -128(%ecx)
+faddp %st(0),%st(2)
+fldl -112(%edx)
+fmull 104(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl -120(%edx)
+fmull 120(%edx)
+faddp %st(0),%st(2)
+fldl -104(%edx)
+fmull 96(%edx)
+faddp %st(0),%st(6)
+fldl 0(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl 48(%esp)
+fmull -120(%ecx)
+fldl -112(%edx)
+fmull 112(%edx)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl -120(%edx)
+fmull -128(%ecx)
+faddp %st(0),%st(7)
+fldl -104(%edx)
+fmull 104(%edx)
+faddp %st(0),%st(3)
+fldl 0(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(2)
+fldl 16(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl -40(%edx)
+fldl -112(%edx)
+fmull 120(%edx)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl -120(%edx)
+fmull -120(%ecx)
+fldl -104(%edx)
+fmull 112(%edx)
+faddp %st(0),%st(6)
+fldl 0(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(3)
+fldl 16(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl -112(%edx)
+fmull -128(%ecx)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(4)
+fldl 24(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl -32(%edx)
+fldl -104(%edx)
+fmull 120(%edx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(6)
+fldl 16(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl -112(%edx)
+fmull -120(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(6)
+fldl 24(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl -104(%edx)
+fmull -128(%ecx)
+faddp %st(0),%st(6)
+fldl 0(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(2)
+fldl 16(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(7)
+fldl 32(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl -104(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(5)
+fldl 24(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl 0(%esp)
+fmull 120(%edx)
+faddp %st(0),%st(3)
+fldl 16(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(4)
+fldl 32(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl 8(%esp)
+fmull 112(%edx)
+faddp %st(0),%st(4)
+fldl 24(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(5)
+fldl 40(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl -16(%edx)
+fldl 16(%esp)
+fmull 104(%edx)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl -8(%edx)
+fldl 24(%esp)
+fmull 96(%edx)
+faddp %st(0),%st(1)
+fldl 40(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl 32(%esp)
+fmull 88(%edx)
+faddp %st(0),%st(2)
+fldl 48(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl 40(%esp)
+fmull 80(%edx)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl 0(%edx)
+fldl 48(%esp)
+fmull 72(%edx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl -120(%edx)
+fmull 64(%edx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 8(%edx)
+fldl 16(%edx)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl 8(%edx)
+fxch %st(2)
+fstpl 16(%edx)
+fstpl 24(%edx)
+fstpl 32(%edx)
+fldl -40(%ecx)
+fmul %st(0),%st(0)
+fldl -112(%ecx)
+fadd %st(0),%st(0)
+fldl -104(%ecx)
+fadd %st(0),%st(0)
+fldl -96(%ecx)
+fadd %st(0),%st(0)
+fldl -56(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -40(%ecx)
+fmul %st(4),%st(0)
+fldl -48(%ecx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstl 0(%esp)
+fxch %st(3)
+fstl 8(%esp)
+fxch %st(3)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl -64(%ecx)
+fxch %st(5)
+fmul %st(0),%st(3)
+fxch %st(3)
+faddp %st(0),%st(1)
+fxch %st(2)
+fadd %st(0),%st(0)
+fldl -56(%ecx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(4)
+fxch %st(1)
+fstl 16(%esp)
+fldl -72(%ecx)
+fxch %st(5)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fadd %st(0),%st(0)
+fstpl 48(%esp)
+fldl -88(%ecx)
+fadd %st(0),%st(0)
+fstl 24(%esp)
+fldl -64(%ecx)
+fmul %st(1),%st(0)
+faddp %st(0),%st(4)
+fmul %st(4),%st(0)
+faddp %st(0),%st(2)
+fxch %st(3)
+fadd %st(0),%st(0)
+fstpl 40(%esp)
+fldl -80(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fldl 8(%esp)
+fldl -40(%ecx)
+fmul %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(0),%st(1)
+fldl -48(%ecx)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fldl 24(%esp)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(2)
+fldl -80(%ecx)
+fadd %st(0),%st(0)
+fstl 32(%esp)
+fmull -72(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(5),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(5)
+fldl -56(%ecx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(2)
+fldl -64(%ecx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fxch %st(3)
+fmull 40(%esp)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -120(%edx)
+fldl -72(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(3)
+fstpl -112(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 24(%esp)
+fmull -40(%ecx)
+fldl -112(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull -104(%ecx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(2)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(3)
+fldl -64(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -96(%ecx)
+faddp %st(0),%st(1)
+fldl -104(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 64(%edx)
+fldl 32(%esp)
+fmull -40(%ecx)
+fldl 40(%esp)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -88(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -96(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(2)
+fxch %st(3)
+fstpl 72(%edx)
+fldl 40(%esp)
+fmull -40(%ecx)
+fldl 48(%esp)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl -56(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -80(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -88(%ecx)
+faddp %st(0),%st(1)
+fldl -96(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(3)
+fxch %st(1)
+fstpl 80(%edx)
+fldl 48(%esp)
+fldl -40(%ecx)
+fmul %st(0),%st(1)
+fmul %st(5),%st(0)
+fxch %st(5)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -72(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -80(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -88(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 88(%edx)
+fldl -48(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(4)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -64(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -72(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -80(%ecx)
+faddp %st(0),%st(1)
+fldl -88(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fldl -48(%ecx)
+fadd %st(0),%st(0)
+fmull -40(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -64(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -72(%ecx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull -80(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fldl -120(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fldl -112(%edx)
+fxch %st(1)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(2)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstpl 96(%edx)
+fxch %st(4)
+fstpl 104(%edx)
+fxch %st(1)
+fstpl 112(%edx)
+fstpl 120(%edx)
+fxch %st(1)
+fstpl -128(%ecx)
+fstpl -120(%ecx)
+fldl 40(%ecx)
+fmul %st(0),%st(0)
+fldl -32(%ecx)
+fadd %st(0),%st(0)
+fldl -24(%ecx)
+fadd %st(0),%st(0)
+fldl -16(%ecx)
+fadd %st(0),%st(0)
+fldl 24(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 40(%ecx)
+fmul %st(4),%st(0)
+fldl 32(%ecx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstl 0(%esp)
+fxch %st(3)
+fstl 8(%esp)
+fxch %st(3)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%ecx)
+fxch %st(5)
+fmul %st(0),%st(3)
+fxch %st(3)
+faddp %st(0),%st(1)
+fxch %st(2)
+fadd %st(0),%st(0)
+fldl 24(%ecx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(4)
+fxch %st(1)
+fstl 16(%esp)
+fldl 8(%ecx)
+fxch %st(5)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fadd %st(0),%st(0)
+fstpl 48(%esp)
+fldl -8(%ecx)
+fadd %st(0),%st(0)
+fstl 24(%esp)
+fldl 16(%ecx)
+fmul %st(1),%st(0)
+faddp %st(0),%st(4)
+fmul %st(4),%st(0)
+faddp %st(0),%st(2)
+fxch %st(3)
+fadd %st(0),%st(0)
+fstpl 40(%esp)
+fldl 0(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fldl 8(%esp)
+fldl 40(%ecx)
+fmul %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(0),%st(1)
+fldl 32(%ecx)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fldl 24(%esp)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(2)
+fldl 0(%ecx)
+fadd %st(0),%st(0)
+fstl 32(%esp)
+fmull 8(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(5),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(5)
+fldl 24(%ecx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(2)
+fldl 16(%ecx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fxch %st(3)
+fmull 40(%esp)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -120(%edx)
+fldl 8(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(3)
+fstpl -112(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 24(%esp)
+fmull 40(%ecx)
+fldl -32(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull -24(%ecx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(2)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(3)
+fldl 16(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -16(%ecx)
+faddp %st(0),%st(1)
+fldl -24(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl -112(%ecx)
+fldl 32(%esp)
+fmull 40(%ecx)
+fldl 40(%esp)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -8(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -16(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(2)
+fxch %st(3)
+fstpl -104(%ecx)
+fldl 40(%esp)
+fmull 40(%ecx)
+fldl 48(%esp)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl 24(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 0(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -8(%ecx)
+faddp %st(0),%st(1)
+fldl -16(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(3)
+fxch %st(1)
+fstpl -96(%ecx)
+fldl 48(%esp)
+fldl 40(%ecx)
+fmul %st(0),%st(1)
+fmul %st(5),%st(0)
+fxch %st(5)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 8(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 0(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -8(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl -88(%ecx)
+fldl 32(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(4)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 16(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 8(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 0(%ecx)
+faddp %st(0),%st(1)
+fldl -8(%ecx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fldl 32(%ecx)
+fadd %st(0),%st(0)
+fmull 40(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 16(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 8(%ecx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull 0(%ecx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fldl -120(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fldl -112(%edx)
+fxch %st(1)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(2)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstpl -80(%ecx)
+fxch %st(4)
+fstpl -72(%ecx)
+fxch %st(1)
+fstpl -64(%ecx)
+fstpl -56(%ecx)
+fxch %st(1)
+fstpl -48(%ecx)
+fstpl -40(%ecx)
+fldl -40(%edx)
+fldl 48(%ecx)
+fadd %st(0),%st(1)
+fsubl -40(%edx)
+fxch %st(1)
+fstpl -120(%edx)
+fstpl -40(%edx)
+fldl -32(%edx)
+fldl 56(%ecx)
+fadd %st(0),%st(1)
+fsubl -32(%edx)
+fxch %st(1)
+fstpl -112(%edx)
+fstpl -32(%edx)
+fldl -104(%edx)
+fldl -24(%edx)
+fadd %st(0),%st(1)
+fsubl -104(%edx)
+fxch %st(1)
+fstpl -104(%edx)
+fstpl -24(%edx)
+fldl -16(%edx)
+fldl -96(%edx)
+fadd %st(0),%st(1)
+fsubl -16(%edx)
+fxch %st(1)
+fstpl -96(%edx)
+fstpl -16(%edx)
+fldl -8(%edx)
+fldl -88(%edx)
+fadd %st(0),%st(1)
+fsubl -8(%edx)
+fxch %st(1)
+fstpl -88(%edx)
+fstpl -8(%edx)
+fldl 0(%edx)
+fldl -80(%edx)
+fadd %st(0),%st(1)
+fsubl 0(%edx)
+fxch %st(1)
+fstpl -80(%edx)
+fstpl 0(%edx)
+fldl 8(%edx)
+fldl -72(%edx)
+fadd %st(0),%st(1)
+fsubl 8(%edx)
+fxch %st(1)
+fstpl -72(%edx)
+fstpl 8(%edx)
+fldl 16(%edx)
+fldl -64(%edx)
+fadd %st(0),%st(1)
+fsubl 16(%edx)
+fxch %st(1)
+fstpl -64(%edx)
+fstpl 16(%edx)
+fldl 24(%edx)
+fldl -56(%edx)
+fadd %st(0),%st(1)
+fsubl 24(%edx)
+fxch %st(1)
+fstpl -56(%edx)
+fstpl 24(%edx)
+fldl 32(%edx)
+fldl -48(%edx)
+fadd %st(0),%st(1)
+fsubl 32(%edx)
+fxch %st(1)
+fstpl -48(%edx)
+fstpl 32(%edx)
+fldl 64(%edx)
+fsubl -112(%ecx)
+fstpl -32(%ecx)
+fldl 72(%edx)
+fsubl -104(%ecx)
+fstpl -24(%ecx)
+fldl 80(%edx)
+fsubl -96(%ecx)
+fstpl -16(%ecx)
+fldl 88(%edx)
+fsubl -88(%ecx)
+fstpl -8(%ecx)
+fldl 96(%edx)
+fsubl -80(%ecx)
+fstpl 0(%ecx)
+fldl 104(%edx)
+fsubl -72(%ecx)
+fstpl 8(%ecx)
+fldl 112(%edx)
+fsubl -64(%ecx)
+fstpl 16(%ecx)
+fldl 120(%edx)
+fsubl -56(%ecx)
+fstpl 24(%ecx)
+fldl -128(%ecx)
+fsubl -48(%ecx)
+fstpl 32(%ecx)
+fldl -120(%ecx)
+fsubl -40(%ecx)
+fstpl 40(%ecx)
+fldl -48(%edx)
+fmul %st(0),%st(0)
+fldl -120(%edx)
+fadd %st(0),%st(0)
+fldl -112(%edx)
+fadd %st(0),%st(0)
+fldl -104(%edx)
+fadd %st(0),%st(0)
+fldl -64(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -48(%edx)
+fmul %st(4),%st(0)
+fldl -56(%edx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstl 0(%esp)
+fxch %st(3)
+fstl 8(%esp)
+fxch %st(3)
+fmull -56(%edx)
+faddp %st(0),%st(1)
+fldl -72(%edx)
+fxch %st(5)
+fmul %st(0),%st(3)
+fxch %st(3)
+faddp %st(0),%st(1)
+fxch %st(2)
+fadd %st(0),%st(0)
+fldl -64(%edx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(4)
+fxch %st(1)
+fstl 16(%esp)
+fldl -80(%edx)
+fxch %st(5)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fadd %st(0),%st(0)
+fstpl 48(%esp)
+fldl -96(%edx)
+fadd %st(0),%st(0)
+fstl 24(%esp)
+fldl -72(%edx)
+fmul %st(1),%st(0)
+faddp %st(0),%st(4)
+fmul %st(4),%st(0)
+faddp %st(0),%st(2)
+fxch %st(3)
+fadd %st(0),%st(0)
+fstpl 40(%esp)
+fldl -88(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fldl 8(%esp)
+fldl -48(%edx)
+fmul %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(0),%st(1)
+fldl -56(%edx)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fldl 24(%esp)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(2)
+fldl -88(%edx)
+fadd %st(0),%st(0)
+fstl 32(%esp)
+fmull -80(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(5),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(5)
+fldl -64(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(2)
+fldl -72(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fxch %st(3)
+fmull 40(%esp)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl 48(%edx)
+fldl -80(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(3)
+fstpl 56(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 24(%esp)
+fmull -48(%edx)
+fldl -120(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull -56(%edx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull -112(%edx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull -64(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(2)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(3)
+fldl -72(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -104(%edx)
+faddp %st(0),%st(1)
+fldl -112(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl -120(%edx)
+fldl 32(%esp)
+fmull -48(%edx)
+fldl 40(%esp)
+fmull -56(%edx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull -64(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -96(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -104(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(2)
+fxch %st(3)
+fstpl -112(%edx)
+fldl 40(%esp)
+fmull -48(%edx)
+fldl 48(%esp)
+fmull -56(%edx)
+faddp %st(0),%st(1)
+fldl -64(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -88(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -96(%edx)
+faddp %st(0),%st(1)
+fldl -104(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(3)
+fxch %st(1)
+fstpl -104(%edx)
+fldl 48(%esp)
+fldl -48(%edx)
+fmul %st(0),%st(1)
+fmul %st(5),%st(0)
+fxch %st(5)
+fmull -56(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -80(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -88(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -96(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 40(%edx)
+fldl -56(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(4)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -72(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -80(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -88(%edx)
+faddp %st(0),%st(1)
+fldl -96(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fldl -56(%edx)
+fadd %st(0),%st(0)
+fmull -48(%edx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -64(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -72(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -80(%edx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull -88(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fldl 48(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fldl 56(%edx)
+fxch %st(1)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(2)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstpl -96(%edx)
+fxch %st(4)
+fstpl -88(%edx)
+fxch %st(1)
+fstpl -80(%edx)
+fstpl -72(%edx)
+fxch %st(1)
+fstpl -64(%edx)
+fstpl -56(%edx)
+fldl 32(%edx)
+fmul %st(0),%st(0)
+fldl -40(%edx)
+fadd %st(0),%st(0)
+fldl -32(%edx)
+fadd %st(0),%st(0)
+fldl -24(%edx)
+fadd %st(0),%st(0)
+fldl 16(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 32(%edx)
+fmul %st(4),%st(0)
+fldl 24(%edx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstl 0(%esp)
+fxch %st(3)
+fstl 8(%esp)
+fxch %st(3)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 8(%edx)
+fxch %st(5)
+fmul %st(0),%st(3)
+fxch %st(3)
+faddp %st(0),%st(1)
+fxch %st(2)
+fadd %st(0),%st(0)
+fldl 16(%edx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(4)
+fxch %st(1)
+fstl 16(%esp)
+fldl 0(%edx)
+fxch %st(5)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fadd %st(0),%st(0)
+fstpl 48(%esp)
+fldl -16(%edx)
+fadd %st(0),%st(0)
+fstl 24(%esp)
+fldl 8(%edx)
+fmul %st(1),%st(0)
+faddp %st(0),%st(4)
+fmul %st(4),%st(0)
+faddp %st(0),%st(2)
+fxch %st(3)
+fadd %st(0),%st(0)
+fstpl 40(%esp)
+fldl -8(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fldl 8(%esp)
+fldl 32(%edx)
+fmul %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(0),%st(1)
+fldl 24(%edx)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fldl 24(%esp)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(2)
+fldl -8(%edx)
+fadd %st(0),%st(0)
+fstl 32(%esp)
+fmull 0(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(5),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(5)
+fldl 16(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(2)
+fldl 8(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fxch %st(3)
+fmull 40(%esp)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl -48(%edx)
+fldl 0(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(3)
+fstpl 48(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 24(%esp)
+fmull 32(%edx)
+fldl -40(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull -32(%edx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(2)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(3)
+fldl 8(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -24(%edx)
+faddp %st(0),%st(1)
+fldl -32(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 56(%ecx)
+fldl 32(%esp)
+fmull 32(%edx)
+fldl 40(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -16(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -24(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(2)
+fxch %st(3)
+fstpl 64(%ecx)
+fldl 40(%esp)
+fmull 32(%edx)
+fldl 48(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 16(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull -8(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -16(%edx)
+faddp %st(0),%st(1)
+fldl -24(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(3)
+fxch %st(1)
+fstpl 72(%ecx)
+fldl 48(%esp)
+fldl 32(%edx)
+fmul %st(0),%st(1)
+fmul %st(5),%st(0)
+fxch %st(5)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 0(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull -8(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -16(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 80(%ecx)
+fldl 24(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(4)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 8(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 0(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull -8(%edx)
+faddp %st(0),%st(1)
+fldl -16(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fldl 24(%edx)
+fadd %st(0),%st(0)
+fmull 32(%edx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 8(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 0(%edx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull -8(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fldl -48(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fldl 48(%edx)
+fxch %st(1)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(2)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstpl 88(%ecx)
+fxch %st(4)
+fstpl 96(%ecx)
+fxch %st(1)
+fstpl 104(%ecx)
+fstpl 112(%ecx)
+fxch %st(1)
+fstpl 120(%ecx)
+fstpl 128(%ecx)
+fldl 32(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 40(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl 0(%esp)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(1)
+fstpl 8(%esp)
+fldl -32(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl -24(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -48(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fldl -16(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -40(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fldl -8(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -32(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fldl 0(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -24(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fldl 8(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -16(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl 16(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl -8(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 24(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_121665
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl 0(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fldl 0(%esp)
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl 8(%edx)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 8(%esp)
+fadd %st(1),%st(0)
+fxch %st(1)
+fsubrp %st(0),%st(2)
+fxch %st(1)
+fstpl 16(%edx)
+fstpl 48(%ecx)
+fldl -120(%ecx)
+fmull -40(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl 72(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl 64(%edx)
+fmull -40(%ecx)
+fldl 80(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(2)
+fldl 72(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl 88(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl 96(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(2)
+fldl 88(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(1)
+fldl 72(%edx)
+fmull -40(%ecx)
+fldl 104(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(3)
+fldl 96(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(1)
+fldl 112(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(3)
+fldl 104(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(2)
+fldl 88(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl 120(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(3)
+fldl 112(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(2)
+fldl 96(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(1)
+fldl -128(%ecx)
+fmull -112(%ecx)
+faddp %st(0),%st(3)
+fldl 120(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(2)
+fldl 104(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(1)
+fldl 80(%edx)
+fmull -40(%ecx)
+fldl 112(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(2)
+fldl -128(%ecx)
+fmull -104(%ecx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl 88(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(2)
+fldl 120(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(3)
+fldl -120(%ecx)
+fmull -112(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 96(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(2)
+fldl 88(%edx)
+fmull -40(%ecx)
+fldl -128(%ecx)
+fmull -96(%ecx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl 104(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(3)
+fldl 96(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl 112(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(3)
+fldl 104(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(2)
+fldl -120(%ecx)
+fmull -104(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl 120(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(3)
+fldl 96(%edx)
+fmull -40(%ecx)
+fldl 112(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(3)
+fldl -128(%ecx)
+fmull -88(%ecx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 0(%esp)
+fldl 104(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(5)
+fldl 120(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(1)
+fldl -120(%ecx)
+fmull -96(%ecx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 112(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(5)
+fldl -128(%ecx)
+fmull -80(%ecx)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 8(%esp)
+fldl 104(%edx)
+fmull -40(%ecx)
+fldl 120(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(5)
+fldl -120(%ecx)
+fmull -88(%ecx)
+faddp %st(0),%st(3)
+fldl 64(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl 112(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(2)
+fldl -128(%ecx)
+fmull -72(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl 120(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(2)
+fldl -120(%ecx)
+fmull -80(%ecx)
+faddp %st(0),%st(6)
+fldl 64(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl 112(%edx)
+fmull -40(%ecx)
+fldl -128(%ecx)
+fmull -64(%ecx)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl 120(%edx)
+fmull -48(%ecx)
+faddp %st(0),%st(7)
+fldl -120(%ecx)
+fmull -72(%ecx)
+faddp %st(0),%st(3)
+fldl 64(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl 16(%esp)
+fldl -128(%ecx)
+fmull -56(%ecx)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl 120(%edx)
+fmull -40(%ecx)
+fldl -120(%ecx)
+fmull -64(%ecx)
+faddp %st(0),%st(6)
+fldl 64(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(3)
+fldl 80(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl -128(%ecx)
+fmull -48(%ecx)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(4)
+fldl 88(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl 24(%esp)
+fldl -120(%ecx)
+fmull -56(%ecx)
+faddp %st(0),%st(1)
+fldl 64(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(6)
+fldl 80(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl -128(%ecx)
+fmull -40(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(6)
+fldl 88(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl -120(%ecx)
+fmull -48(%ecx)
+faddp %st(0),%st(6)
+fldl 64(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(7)
+fldl 96(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl 32(%esp)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(5)
+fldl 88(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl 64(%edx)
+fmull -56(%ecx)
+faddp %st(0),%st(3)
+fldl 80(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(4)
+fldl 96(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl 72(%edx)
+fmull -64(%ecx)
+faddp %st(0),%st(4)
+fldl 88(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(5)
+fldl 104(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl 40(%esp)
+fldl 80(%edx)
+fmull -72(%ecx)
+faddp %st(0),%st(3)
+fldl 96(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl 48(%esp)
+fldl 88(%edx)
+fmull -80(%ecx)
+faddp %st(0),%st(1)
+fldl 104(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl 96(%edx)
+fmull -88(%ecx)
+faddp %st(0),%st(2)
+fldl 112(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl 104(%edx)
+fmull -96(%ecx)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl 24(%edx)
+fldl 112(%edx)
+fmull -104(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl 120(%edx)
+fmull -112(%ecx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 0(%esp)
+fldl 8(%esp)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl 32(%edx)
+fxch %st(2)
+fstpl 48(%edx)
+fstpl 56(%edx)
+fstpl -112(%ecx)
+fldl -48(%edx)
+faddl 64(%edx)
+fstpl -104(%ecx)
+fldl -40(%edx)
+faddl 72(%edx)
+fstpl -96(%ecx)
+fldl -32(%edx)
+faddl 80(%edx)
+fstpl -88(%ecx)
+fldl -24(%edx)
+faddl 88(%edx)
+fstpl -80(%ecx)
+fldl -16(%edx)
+faddl 96(%edx)
+fstpl -16(%edx)
+fldl -8(%edx)
+faddl 104(%edx)
+fstpl -8(%edx)
+fldl 0(%edx)
+faddl 112(%edx)
+fstpl 0(%edx)
+fldl 8(%edx)
+faddl 120(%edx)
+fstpl 8(%edx)
+fldl 16(%edx)
+faddl -128(%ecx)
+fstpl 16(%edx)
+fldl 48(%ecx)
+faddl -120(%ecx)
+fstpl 80(%edx)
+fldl 128(%ecx)
+fmull -128(%edx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 56(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(1)
+fldl 64(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(1)
+fldl 56(%ecx)
+fmull -128(%edx)
+fldl 72(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(2)
+fldl 64(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(1)
+fldl 80(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(2)
+fldl 72(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(1)
+fldl 88(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(2)
+fldl 80(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(1)
+fldl 64(%ecx)
+fmull -128(%edx)
+fldl 96(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(3)
+fldl 88(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(2)
+fldl 72(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(1)
+fldl 104(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(3)
+fldl 96(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(2)
+fldl 80(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(1)
+fldl 112(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(3)
+fldl 104(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(2)
+fldl 88(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(1)
+fldl 120(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(3)
+fldl 112(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(2)
+fldl 96(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(1)
+fldl 72(%ecx)
+fmull -128(%edx)
+fldl 104(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(2)
+fldl 120(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl 80(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(2)
+fldl 112(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(3)
+fldl 128(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 88(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(2)
+fldl 80(%ecx)
+fmull -128(%edx)
+fldl 120(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl 96(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(3)
+fldl 88(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl 104(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(3)
+fldl 96(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(2)
+fldl 128(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl 112(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(3)
+fldl 88(%ecx)
+fmull -128(%edx)
+fldl 104(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(3)
+fldl 120(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 0(%esp)
+fldl 96(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(5)
+fldl 112(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(1)
+fldl 128(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 104(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(5)
+fldl 120(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 56(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 8(%esp)
+fldl 96(%ecx)
+fmull -128(%edx)
+fldl 112(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(5)
+fldl 128(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(3)
+fldl 56(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl 104(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(2)
+fldl 120(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl 112(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(2)
+fldl 128(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(6)
+fldl 56(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl 104(%ecx)
+fmull -128(%edx)
+fldl 120(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl 112(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(7)
+fldl 128(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(3)
+fldl 56(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(2)
+fldl 72(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl -48(%edx)
+fldl 120(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl 112(%ecx)
+fmull -128(%edx)
+fldl 128(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(6)
+fldl 56(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(3)
+fldl 72(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl 120(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(4)
+fldl 80(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl -40(%edx)
+fldl 128(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(1)
+fldl 56(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(6)
+fldl 72(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl 120(%ecx)
+fmull -128(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(6)
+fldl 80(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl 128(%ecx)
+fmull 120(%esp)
+faddp %st(0),%st(6)
+fldl 56(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(2)
+fldl 72(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(7)
+fldl 88(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl -32(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 64(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(5)
+fldl 80(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl 56(%ecx)
+fmull 112(%esp)
+faddp %st(0),%st(3)
+fldl 72(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(4)
+fldl 88(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl 64(%ecx)
+fmull 104(%esp)
+faddp %st(0),%st(4)
+fldl 80(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(5)
+fldl 96(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl -24(%edx)
+fldl 72(%ecx)
+fmull 96(%esp)
+faddp %st(0),%st(3)
+fldl 88(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl 96(%edx)
+fldl 80(%ecx)
+fmull 88(%esp)
+faddp %st(0),%st(1)
+fldl 96(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl 88(%ecx)
+fmull 80(%esp)
+faddp %st(0),%st(2)
+fldl 104(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl 96(%ecx)
+fmull 72(%esp)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl 104(%edx)
+fldl 104(%ecx)
+fmull 64(%esp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl 112(%ecx)
+fmull 56(%esp)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 0(%esp)
+fldl 8(%esp)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl 112(%edx)
+fxch %st(2)
+fstpl 120(%edx)
+fstpl -128(%ecx)
+fstpl -120(%ecx)
+fldl 80(%edx)
+fmull 40(%ecx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -104(%ecx)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl -96(%ecx)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl -104(%ecx)
+fmull 40(%ecx)
+fldl -88(%ecx)
+fmull 16(%ecx)
+faddp %st(0),%st(2)
+fldl -96(%ecx)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl -80(%ecx)
+fmull 8(%ecx)
+faddp %st(0),%st(2)
+fldl -88(%ecx)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl -16(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(2)
+fldl -80(%ecx)
+fmull 16(%ecx)
+faddp %st(0),%st(1)
+fldl -96(%ecx)
+fmull 40(%ecx)
+fldl -8(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(3)
+fldl -16(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(2)
+fldl -88(%ecx)
+fmull 32(%ecx)
+faddp %st(0),%st(1)
+fldl 0(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(3)
+fldl -8(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(2)
+fldl -80(%ecx)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl 8(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(3)
+fldl 0(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(2)
+fldl -16(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(1)
+fldl 16(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(3)
+fldl 8(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(2)
+fldl -8(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(1)
+fldl -88(%ecx)
+fmull 40(%ecx)
+fldl 0(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl -80(%ecx)
+fmull 32(%ecx)
+faddp %st(0),%st(2)
+fldl 8(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(3)
+fldl 80(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl -16(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(2)
+fldl -80(%ecx)
+fmull 40(%ecx)
+fldl 16(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl -8(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(3)
+fldl -16(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl 0(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(3)
+fldl -8(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl 8(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(3)
+fldl -16(%edx)
+fmull 40(%ecx)
+fldl 0(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(3)
+fldl 16(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 0(%esp)
+fldl -8(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(5)
+fldl 8(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(1)
+fldl 80(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(5)
+fldl 16(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -104(%ecx)
+fmull -32(%ecx)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 8(%esp)
+fldl -8(%edx)
+fmull 40(%ecx)
+fldl 8(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(5)
+fldl 80(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(3)
+fldl -104(%ecx)
+fmull -24(%ecx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl 0(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull -32(%ecx)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl 8(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(2)
+fldl 80(%edx)
+fmull 0(%ecx)
+faddp %st(0),%st(6)
+fldl -104(%ecx)
+fmull -16(%ecx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl 0(%edx)
+fmull 40(%ecx)
+fldl 16(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull -24(%ecx)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl 8(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(7)
+fldl 80(%edx)
+fmull 8(%ecx)
+faddp %st(0),%st(3)
+fldl -104(%ecx)
+fmull -8(%ecx)
+faddp %st(0),%st(2)
+fldl -88(%ecx)
+fmull -32(%ecx)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl 64(%edx)
+fldl 16(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull -16(%ecx)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl 8(%edx)
+fmull 40(%ecx)
+fldl 80(%edx)
+fmull 16(%ecx)
+faddp %st(0),%st(6)
+fldl -104(%ecx)
+fmull 0(%ecx)
+faddp %st(0),%st(3)
+fldl -88(%ecx)
+fmull -24(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl 16(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull -8(%ecx)
+faddp %st(0),%st(4)
+fldl -80(%ecx)
+fmull -32(%ecx)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl 72(%edx)
+fldl 80(%edx)
+fmull 24(%ecx)
+faddp %st(0),%st(1)
+fldl -104(%ecx)
+fmull 8(%ecx)
+faddp %st(0),%st(6)
+fldl -88(%ecx)
+fmull -16(%ecx)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl 16(%edx)
+fmull 40(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull 0(%ecx)
+faddp %st(0),%st(6)
+fldl -80(%ecx)
+fmull -24(%ecx)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl 80(%edx)
+fmull 32(%ecx)
+faddp %st(0),%st(6)
+fldl -104(%ecx)
+fmull 16(%ecx)
+faddp %st(0),%st(2)
+fldl -88(%ecx)
+fmull -8(%ecx)
+faddp %st(0),%st(7)
+fldl -16(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl 80(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl -96(%ecx)
+fmull 8(%ecx)
+faddp %st(0),%st(5)
+fldl -80(%ecx)
+fmull -16(%ecx)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl -104(%ecx)
+fmull 24(%ecx)
+faddp %st(0),%st(3)
+fldl -88(%ecx)
+fmull 0(%ecx)
+faddp %st(0),%st(4)
+fldl -16(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl -96(%ecx)
+fmull 16(%ecx)
+faddp %st(0),%st(4)
+fldl -80(%ecx)
+fmull -8(%ecx)
+faddp %st(0),%st(5)
+fldl -8(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl 88(%edx)
+fldl -88(%ecx)
+fmull 8(%ecx)
+faddp %st(0),%st(3)
+fldl -16(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl -104(%ecx)
+fldl -80(%ecx)
+fmull 0(%ecx)
+faddp %st(0),%st(1)
+fldl -8(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl -16(%edx)
+fmull -8(%ecx)
+faddp %st(0),%st(2)
+fldl 0(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl -8(%edx)
+fmull -16(%ecx)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl -96(%ecx)
+fldl 0(%edx)
+fmull -24(%ecx)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl 8(%edx)
+fmull -32(%ecx)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 0(%esp)
+fldl 8(%esp)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl -88(%ecx)
+fxch %st(2)
+fstpl -80(%ecx)
+fstpl -72(%ecx)
+fstpl -64(%ecx)
+fldl 136(%ecx)
+fldl -120(%edx)
+fldl 16(%esp)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 16(%esp)
+fxch %st(1)
+fstpl -16(%edx)
+fstpl 0(%esp)
+fldl -112(%edx)
+fldl 24(%esp)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 24(%esp)
+fxch %st(1)
+fstpl -8(%edx)
+fstpl 8(%esp)
+fldl -104(%edx)
+fldl 32(%esp)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 32(%esp)
+fxch %st(1)
+fstpl 0(%edx)
+fstpl 16(%esp)
+fldl 40(%edx)
+fldl 40(%esp)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 40(%esp)
+fxch %st(1)
+fstpl 8(%edx)
+fstpl 24(%esp)
+fldl -96(%edx)
+fldl 48(%esp)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 48(%esp)
+fxch %st(1)
+fstpl 16(%edx)
+fstpl 32(%esp)
+fldl -88(%edx)
+fldl 24(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 24(%edx)
+fxch %st(1)
+fstpl 24(%edx)
+fstpl 40(%esp)
+fldl -80(%edx)
+fldl 32(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 32(%edx)
+fxch %st(1)
+fstpl 32(%edx)
+fstpl 48(%esp)
+fldl -72(%edx)
+fldl 48(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 48(%edx)
+fxch %st(1)
+fstpl 40(%edx)
+fstpl -120(%edx)
+fldl -64(%edx)
+fldl 56(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 56(%edx)
+fxch %st(1)
+fstpl 48(%edx)
+fstpl -112(%edx)
+fldl -56(%edx)
+fldl -112(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -112(%ecx)
+fxch %st(1)
+fstpl 56(%edx)
+fstpl -104(%edx)
+fldl -48(%edx)
+fldl 64(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 64(%edx)
+fxch %st(1)
+fstpl 64(%edx)
+fstpl -96(%edx)
+fldl -40(%edx)
+fldl 72(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 72(%edx)
+fxch %st(1)
+fstpl 72(%edx)
+fstpl -88(%edx)
+fldl -32(%edx)
+fldl 80(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 80(%edx)
+fxch %st(1)
+fstpl 80(%edx)
+fstpl -80(%edx)
+fldl -24(%edx)
+fldl 88(%edx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl 88(%edx)
+fxch %st(1)
+fstpl 88(%edx)
+fstpl -72(%edx)
+fldl 96(%edx)
+fldl -104(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -104(%ecx)
+fxch %st(1)
+fstpl 96(%edx)
+fstpl -64(%edx)
+fldl 104(%edx)
+fldl -96(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -96(%ecx)
+fxch %st(1)
+fstpl 104(%edx)
+fstpl -56(%edx)
+fldl 112(%edx)
+fldl -88(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -88(%ecx)
+fxch %st(1)
+fstpl 112(%edx)
+fstpl -48(%edx)
+fldl 120(%edx)
+fldl -80(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -80(%ecx)
+fxch %st(1)
+fstpl 120(%edx)
+fstpl -40(%edx)
+fldl -128(%ecx)
+fldl -72(%ecx)
+fsubr %st(1),%st(0)
+fmul %st(2),%st(0)
+fsubr %st(0),%st(1)
+faddl -72(%ecx)
+fxch %st(1)
+fstpl -128(%ecx)
+fstpl -32(%edx)
+fldl -120(%ecx)
+fldl -64(%ecx)
+fsubr %st(1),%st(0)
+fmulp %st(0),%st(2)
+fsub %st(1),%st(0)
+fxch %st(1)
+faddl -64(%ecx)
+fxch %st(1)
+fstpl -120(%ecx)
+fstpl -24(%edx)
+movl 180(%ecx),%esi
+movl 184(%ecx),%ebp
+sub  $1,%ebp
+ja ._morebits
+movl 188(%ecx),%edi
+sub  $4,%edi
+jb ._done
+movl (%ebx,%edi),%esi
+mov  $32,%ebp
+jmp ._morebytes
+._done:
+movl 4(%esp,%eax),%eax
+fldl 0(%esp)
+fstpl 0(%eax)
+fldl 8(%esp)
+fstpl 8(%eax)
+fldl 16(%esp)
+fstpl 16(%eax)
+fldl 24(%esp)
+fstpl 24(%eax)
+fldl 32(%esp)
+fstpl 32(%eax)
+fldl 40(%esp)
+fstpl 40(%eax)
+fldl 48(%esp)
+fstpl 48(%eax)
+fldl -120(%edx)
+fstpl 56(%eax)
+fldl -112(%edx)
+fstpl 64(%eax)
+fldl -104(%edx)
+fstpl 72(%eax)
+fldl -96(%edx)
+fstpl 80(%eax)
+fldl -88(%edx)
+fstpl 88(%eax)
+fldl -80(%edx)
+fstpl 96(%eax)
+fldl -72(%edx)
+fstpl 104(%eax)
+fldl -64(%edx)
+fstpl 112(%eax)
+fldl -56(%edx)
+fstpl 120(%eax)
+fldl -48(%edx)
+fstpl 128(%eax)
+fldl -40(%edx)
+fstpl 136(%eax)
+fldl -32(%edx)
+fstpl 144(%eax)
+fldl -24(%edx)
+fstpl 152(%eax)
+movl 160(%ecx),%eax
+movl 164(%ecx),%ebx
+movl 168(%ecx),%esi
+movl 172(%ecx),%edi
+movl 176(%ecx),%ebp
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mult.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mult.s
new file mode 100644
index 00000000..16f0e908
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/mult.s
@@ -0,0 +1,410 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_mult
+.globl crypto_scalarmult_curve25519_athlon_mult
+_crypto_scalarmult_curve25519_athlon_mult:
+crypto_scalarmult_curve25519_athlon_mult:
+mov %esp,%eax
+and $31,%eax
+add $32,%eax
+sub %eax,%esp
+movl %ebp,0(%esp)
+movl 4(%esp,%eax),%ecx
+movl 8(%esp,%eax),%edx
+movl 12(%esp,%eax),%ebp
+fldl 72(%edx)
+fmull 72(%ebp)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(1)
+fldl 8(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(1)
+fldl 0(%edx)
+fmull 72(%ebp)
+fldl 16(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(2)
+fldl 8(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(1)
+fldl 24(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(1)
+fldl 32(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(2)
+fldl 24(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(1)
+fldl 8(%edx)
+fmull 72(%ebp)
+fldl 40(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(3)
+fldl 32(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(1)
+fldl 48(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(3)
+fldl 40(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(2)
+fldl 24(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(1)
+fldl 56(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(3)
+fldl 48(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(2)
+fldl 32(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(1)
+fldl 64(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(3)
+fldl 56(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(2)
+fldl 40(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(1)
+fldl 16(%edx)
+fmull 72(%ebp)
+fldl 48(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(2)
+fldl 64(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(4),%st(0)
+fldl 24(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(2)
+fldl 56(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(3)
+fldl 72(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fldl 32(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(2)
+fldl 24(%edx)
+fmull 72(%ebp)
+fldl 64(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fldl 40(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(3)
+fldl 32(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(2)
+fsubrp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(4),%st(0)
+fldl 48(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(3)
+fldl 40(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(2)
+fldl 72(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fldl 56(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(3)
+fldl 32(%edx)
+fmull 72(%ebp)
+fldl 48(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(3)
+fldl 64(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(4)
+fxch %st(1)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(5)
+fxch %st(5)
+fstpl 64(%ecx)
+fldl 40(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(5)
+fldl 56(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(1)
+fldl 72(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(2)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 48(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(5)
+fldl 64(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(3)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(2)
+fxch %st(3)
+fstpl 72(%ecx)
+fldl 40(%edx)
+fmull 72(%ebp)
+fldl 56(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(5)
+fldl 72(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(3)
+fldl 0(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fldl 48(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(2)
+fldl 64(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(6)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(5)
+fxch %st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fldl 56(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(2)
+fldl 72(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(6)
+fldl 0(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(2)
+fldl 48(%edx)
+fmull 72(%ebp)
+fldl 64(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(2)
+fxch %st(5)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(5),%st(0)
+fldl 56(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(7)
+fldl 72(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(3)
+fldl 0(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(5)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fxch %st(3)
+fstpl 0(%ecx)
+fldl 64(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(6)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(2)
+fxch %st(2)
+fadd %st(0),%st(3)
+fsubrp %st(0),%st(4)
+fldl 56(%edx)
+fmull 72(%ebp)
+fldl 72(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(6)
+fldl 0(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(3)
+fldl 16(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(4),%st(0)
+fldl 64(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(2)
+fxch %st(6)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(4)
+fldl 24(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(3)
+fxch %st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fxch %st(5)
+fstpl 8(%ecx)
+fldl 72(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(1)
+fldl 0(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(6)
+fldl 16(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(3)
+fxch %st(4)
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(3)
+fldl 64(%edx)
+fmull 72(%ebp)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(6)
+fldl 24(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fldl 72(%edx)
+fmull 64(%ebp)
+faddp %st(0),%st(6)
+fldl 0(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(2)
+fldl 16(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(7)
+fldl 32(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(4)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fxch %st(4)
+fstpl 16(%ecx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 8(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(5)
+fldl 24(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(6)
+fxch %st(3)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(1)
+fldl 0(%edx)
+fmull 56(%ebp)
+faddp %st(0),%st(3)
+fldl 16(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(4)
+fldl 32(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(2),%st(0)
+fldl 8(%edx)
+fmull 48(%ebp)
+faddp %st(0),%st(4)
+fldl 24(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(5)
+fldl 40(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(6)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fxch %st(1)
+fstpl 24(%ecx)
+fldl 16(%edx)
+fmull 40(%ebp)
+faddp %st(0),%st(3)
+fldl 32(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(4)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(1)
+fstpl 32(%ecx)
+fldl 24(%edx)
+fmull 32(%ebp)
+faddp %st(0),%st(1)
+fldl 40(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(3),%st(0)
+fldl 32(%edx)
+fmull 24(%ebp)
+faddp %st(0),%st(2)
+fldl 48(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fldl 40(%edx)
+fmull 16(%ebp)
+faddp %st(0),%st(2)
+fadd %st(0),%st(2)
+fsubrp %st(0),%st(3)
+fxch %st(2)
+fstpl 40(%ecx)
+fldl 48(%edx)
+fmull 8(%ebp)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fldl 56(%edx)
+fmull 0(%ebp)
+faddp %st(0),%st(3)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fldl 64(%ecx)
+fldl 72(%ecx)
+fxch %st(2)
+fadd %st(0),%st(4)
+fsubrp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(4),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(0),%st(1)
+fsubrp %st(0),%st(4)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+faddp %st(0),%st(2)
+fxch %st(2)
+fstpl 48(%ecx)
+fxch %st(2)
+fstpl 56(%ecx)
+fstpl 64(%ecx)
+fstpl 72(%ecx)
+movl 0(%esp),%ebp
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/smult.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/smult.c
new file mode 100644
index 00000000..157f1e6c
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/smult.c
@@ -0,0 +1,91 @@
+#include "crypto_scalarmult.h"
+
+#define mult crypto_scalarmult_curve25519_athlon_mult
+#define square crypto_scalarmult_curve25519_athlon_square
+
+void crypto_scalarmult_curve25519_athlon_recip(double out[10],const double z[10])
+{
+  double z2[10];
+  double z9[10];
+  double z11[10];
+  double z2_5_0[10];
+  double z2_10_0[10];
+  double z2_20_0[10];
+  double z2_50_0[10];
+  double z2_100_0[10];
+  double t0[10];
+  double t1[10];
+  int i;
+
+  /* 2 */ square(z2,z);
+  /* 4 */ square(t1,z2);
+  /* 8 */ square(t0,t1);
+  /* 9 */ mult(z9,t0,z);
+  /* 11 */ mult(z11,z9,z2);
+  /* 22 */ square(t0,z11);
+  /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ square(t0,z2_5_0);
+  /* 2^7 - 2^2 */ square(t1,t0);
+  /* 2^8 - 2^3 */ square(t0,t1);
+  /* 2^9 - 2^4 */ square(t1,t0);
+  /* 2^10 - 2^5 */ square(t0,t1);
+  /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ square(t0,z2_10_0);
+  /* 2^12 - 2^2 */ square(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ square(t0,z2_20_0);
+  /* 2^22 - 2^2 */ square(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ square(t1,t0);
+  /* 2^42 - 2^2 */ square(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ square(t0,z2_50_0);
+  /* 2^52 - 2^2 */ square(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ square(t1,z2_100_0);
+  /* 2^102 - 2^2 */ square(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ square(t0,t1);
+  /* 2^202 - 2^2 */ square(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ square(t1,t0);
+  /* 2^252 - 2^2 */ square(t0,t1);
+  /* 2^253 - 2^3 */ square(t1,t0);
+  /* 2^254 - 2^4 */ square(t0,t1);
+  /* 2^255 - 2^5 */ square(t1,t0);
+  /* 2^255 - 21 */ mult(out,t1,z11);
+}
+
+int crypto_scalarmult(unsigned char *q,
+  const unsigned char *n,
+  const unsigned char *p)
+{
+  double work[30];
+  unsigned char e[32];
+  int i;
+  for (i = 0;i < 32;++i) e[i] = n[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+  crypto_scalarmult_curve25519_athlon_init();
+  crypto_scalarmult_curve25519_athlon_todouble(work,p);
+  crypto_scalarmult_curve25519_athlon_mainloop(work,e);
+  crypto_scalarmult_curve25519_athlon_recip(work + 10,work + 10);
+  mult(work + 20,work,work + 10);
+  crypto_scalarmult_curve25519_athlon_fromdouble(q,work + 20);
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/square.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/square.s
new file mode 100644
index 00000000..754def78
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/square.s
@@ -0,0 +1,298 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_square
+.globl crypto_scalarmult_curve25519_athlon_square
+_crypto_scalarmult_curve25519_athlon_square:
+crypto_scalarmult_curve25519_athlon_square:
+mov %esp,%eax
+and $31,%eax
+add $64,%eax
+sub %eax,%esp
+movl 8(%esp,%eax),%edx
+movl 4(%esp,%eax),%ecx
+fldl 72(%edx)
+fmul %st(0),%st(0)
+fldl 0(%edx)
+fadd %st(0),%st(0)
+fldl 8(%edx)
+fadd %st(0),%st(0)
+fldl 16(%edx)
+fadd %st(0),%st(0)
+fldl 56(%edx)
+fxch %st(4)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 72(%edx)
+fmul %st(4),%st(0)
+fldl 64(%edx)
+fmul %st(4),%st(0)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstl 0(%esp)
+fxch %st(3)
+fstl 8(%esp)
+fxch %st(3)
+fmull 64(%edx)
+faddp %st(0),%st(1)
+fldl 48(%edx)
+fxch %st(5)
+fmul %st(0),%st(3)
+fxch %st(3)
+faddp %st(0),%st(1)
+fxch %st(2)
+fadd %st(0),%st(0)
+fldl 56(%edx)
+fmul %st(2),%st(0)
+faddp %st(0),%st(4)
+fxch %st(1)
+fstl 16(%esp)
+fldl 40(%edx)
+fxch %st(5)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fadd %st(0),%st(0)
+fstpl 48(%esp)
+fldl 24(%edx)
+fadd %st(0),%st(0)
+fstl 24(%esp)
+fldl 48(%edx)
+fmul %st(1),%st(0)
+faddp %st(0),%st(4)
+fmul %st(4),%st(0)
+faddp %st(0),%st(2)
+fxch %st(3)
+fadd %st(0),%st(0)
+fstpl 40(%esp)
+fldl 32(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fldl 8(%esp)
+fldl 72(%edx)
+fmul %st(0),%st(1)
+fldl 16(%esp)
+fmul %st(0),%st(1)
+fldl 64(%edx)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(3)
+fldl 24(%esp)
+fmul %st(0),%st(1)
+fxch %st(1)
+faddp %st(0),%st(2)
+fldl 32(%edx)
+fadd %st(0),%st(0)
+fstl 32(%esp)
+fmull 40(%edx)
+faddp %st(0),%st(6)
+fxch %st(3)
+faddp %st(0),%st(5)
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(5),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(5)
+fldl 56(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fldl 32(%esp)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(2)
+fldl 48(%edx)
+fmul %st(0),%st(4)
+fxch %st(4)
+faddp %st(0),%st(3)
+fxch %st(3)
+fmull 40(%esp)
+faddp %st(0),%st(1)
+fxch %st(3)
+fstpl 64(%ecx)
+fldl 40(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fxch %st(2)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fxch %st(3)
+fstpl 72(%ecx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 24(%esp)
+fmull 72(%edx)
+fldl 0(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fldl 32(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(1)
+fldl 0(%esp)
+fmull 8(%edx)
+faddp %st(0),%st(3)
+fldl 40(%esp)
+fmull 56(%edx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(2)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(3)
+fldl 48(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(2)
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+fldl 8(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 0(%ecx)
+fldl 32(%esp)
+fmull 72(%edx)
+fldl 40(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(1)
+fldl 48(%esp)
+fmull 56(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 16(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(2)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(2),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(2)
+fxch %st(3)
+fstpl 8(%ecx)
+fldl 40(%esp)
+fmull 72(%edx)
+fldl 48(%esp)
+fmull 64(%edx)
+faddp %st(0),%st(1)
+fldl 56(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+fldl 16(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(3)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(3),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(3)
+fxch %st(1)
+fstpl 16(%ecx)
+fldl 48(%esp)
+fldl 72(%edx)
+fmul %st(0),%st(1)
+fmul %st(5),%st(0)
+fxch %st(5)
+fmull 64(%edx)
+faddp %st(0),%st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 24(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(2)
+fstpl 24(%ecx)
+fldl 64(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(4)
+fxch %st(3)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 48(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+fldl 24(%edx)
+fmul %st(0),%st(0)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fldl 64(%edx)
+fadd %st(0),%st(0)
+fmull 72(%edx)
+fmull crypto_scalarmult_curve25519_athlon_scale
+fldl 0(%esp)
+fmull 56(%edx)
+faddp %st(0),%st(1)
+fldl 8(%esp)
+fmull 48(%edx)
+faddp %st(0),%st(1)
+fldl 16(%esp)
+fmull 40(%edx)
+faddp %st(0),%st(1)
+fldl 24(%esp)
+fmull 32(%edx)
+faddp %st(0),%st(1)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fldl 64(%ecx)
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fldl 72(%ecx)
+fxch %st(1)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(2)
+faddp %st(0),%st(1)
+fxch %st(4)
+fstpl 32(%ecx)
+fxch %st(4)
+fstpl 40(%ecx)
+fxch %st(1)
+fstpl 48(%ecx)
+fstpl 56(%ecx)
+fxch %st(1)
+fstpl 64(%ecx)
+fstpl 72(%ecx)
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/todouble.s b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/todouble.s
new file mode 100644
index 00000000..c37aa447
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/athlon/todouble.s
@@ -0,0 +1,144 @@
+.text
+.p2align 5
+.globl _crypto_scalarmult_curve25519_athlon_todouble
+.globl crypto_scalarmult_curve25519_athlon_todouble
+_crypto_scalarmult_curve25519_athlon_todouble:
+crypto_scalarmult_curve25519_athlon_todouble:
+mov %esp,%eax
+and $31,%eax
+add $96,%eax
+sub %eax,%esp
+movl 8(%esp,%eax),%ecx
+movl 0(%ecx),%edx
+movl  $0x43300000,4(%esp)
+movl %edx,0(%esp)
+movl 4(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x45300000,12(%esp)
+movl %edx,8(%esp)
+movl 7(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x46b00000,20(%esp)
+movl %edx,16(%esp)
+movl 10(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x48300000,28(%esp)
+movl %edx,24(%esp)
+movl 13(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x49b00000,36(%esp)
+movl %edx,32(%esp)
+movl 16(%ecx),%edx
+movl  $0x4b300000,44(%esp)
+movl %edx,40(%esp)
+movl 20(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x4d300000,52(%esp)
+movl %edx,48(%esp)
+movl 23(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x4eb00000,60(%esp)
+movl %edx,56(%esp)
+movl 26(%ecx),%edx
+and  $0xffffff,%edx
+movl  $0x50300000,68(%esp)
+movl %edx,64(%esp)
+movl 28(%ecx),%ecx
+shr  $8,%ecx
+and  $0x7fffff,%ecx
+movl  $0x51b00000,76(%esp)
+movl %ecx,72(%esp)
+movl 4(%esp,%eax),%ecx
+fldl 72(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in9offset
+fldl crypto_scalarmult_curve25519_athlon_alpha255
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha255
+fsubr %st(0),%st(1)
+fldl 0(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in0offset
+fxch %st(1)
+fmull crypto_scalarmult_curve25519_athlon_scale
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha26
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha26
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 0(%ecx)
+fldl 8(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in1offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha51
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha51
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 8(%ecx)
+fldl 16(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in2offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha77
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha77
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 16(%ecx)
+fldl 24(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in3offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha102
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha102
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 24(%ecx)
+fldl 32(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in4offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha128
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha128
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 32(%ecx)
+fldl 40(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in5offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha153
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha153
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 40(%ecx)
+fldl 48(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in6offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha179
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha179
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 48(%ecx)
+fldl 56(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in7offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha204
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha204
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 56(%ecx)
+fldl 64(%esp)
+fsubl crypto_scalarmult_curve25519_athlon_in8offset
+faddp %st(0),%st(1)
+fldl crypto_scalarmult_curve25519_athlon_alpha230
+fadd %st(1),%st(0)
+fsubl crypto_scalarmult_curve25519_athlon_alpha230
+fsubr %st(0),%st(1)
+fxch %st(1)
+fstpl 64(%ecx)
+faddp %st(0),%st(1)
+fstpl 72(%ecx)
+add %eax,%esp
+ret
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/checksum b/nacl/nacl-20110221/crypto_scalarmult/curve25519/checksum
new file mode 100644
index 00000000..ce2d395b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/checksum
@@ -0,0 +1 @@
+dacdae4a0f12353dfc66757f2fd1fff538fe6616115dace9afb8016a55be2a52
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/api.h b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/api.h
new file mode 100644
index 00000000..60339596
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_SCALARBYTES 32
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/base.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/base.c
new file mode 100644
index 00000000..f33419e8
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/base.c
@@ -0,0 +1,8 @@
+#include "crypto_scalarmult.h"
+
+static const unsigned char basepoint[32] = {9};
+
+int crypto_scalarmult_base(unsigned char *q,const unsigned char *n)
+{
+  return crypto_scalarmult(q, n, basepoint);
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/implementors b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/implementors
new file mode 100644
index 00000000..0ce43280
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/implementors
@@ -0,0 +1 @@
+Adam Langley (Google)
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/smult.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/smult.c
new file mode 100644
index 00000000..6d26956b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/donna_c64/smult.c
@@ -0,0 +1,477 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Code released into the public domain.
+ *
+ * curve25519-donna: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+
+#include <string.h>
+#include <stdint.h>
+#include "crypto_scalarmult.h"
+
+typedef uint8_t u8;
+typedef uint64_t felem;
+// This is a special gcc mode for 128-bit integers. It's implemented on 64-bit
+// platforms only as far as I know.
+typedef unsigned uint128_t __attribute__((mode(TI)));
+
+/* Sum two numbers: output += in */
+static void fsum(felem *output, const felem *in) {
+  unsigned i;
+  for (i = 0; i < 5; ++i) output[i] += in[i];
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void fdifference_backwards(felem *ioutput, const felem *iin) {
+  static const int64_t twotothe51 = (1l << 51);
+  const int64_t *in = (const int64_t *) iin;
+  int64_t *out = (int64_t *) ioutput;
+
+  out[0] = in[0] - out[0];
+  out[1] = in[1] - out[1];
+  out[2] = in[2] - out[2];
+  out[3] = in[3] - out[3];
+  out[4] = in[4] - out[4];
+
+  // An arithmetic shift right of 63 places turns a positive number to 0 and a
+  // negative number to all 1's. This gives us a bitmask that lets us avoid
+  // side-channel prone branches.
+  int64_t t;
+
+#define NEGCHAIN(a,b) \
+  t = out[a] >> 63; \
+  out[a] += twotothe51 & t; \
+  out[b] -= 1 & t;
+
+#define NEGCHAIN19(a,b) \
+  t = out[a] >> 63; \
+  out[a] += twotothe51 & t; \
+  out[b] -= 19 & t;
+
+  NEGCHAIN(0, 1);
+  NEGCHAIN(1, 2);
+  NEGCHAIN(2, 3);
+  NEGCHAIN(3, 4);
+  NEGCHAIN19(4, 0);
+  NEGCHAIN(0, 1);
+  NEGCHAIN(1, 2);
+  NEGCHAIN(2, 3);
+  NEGCHAIN(3, 4);
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void fscalar_product(felem *output, const felem *in, const felem scalar) {
+  uint128_t a;
+
+  a = ((uint128_t) in[0]) * scalar;
+  output[0] = a & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[1]) * scalar + (a >> 51);
+  output[1] = a & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[2]) * scalar + (a >> 51);
+  output[2] = a & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[3]) * scalar + (a >> 51);
+  output[3] = a & 0x7ffffffffffff;
+
+  a = ((uint128_t) in[4]) * scalar + (a >> 51);
+  output[4] = a & 0x7ffffffffffff;
+
+  output[0] += (a >> 51) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void fmul(felem *output, const felem *in2, const felem *in) {
+  uint128_t t[9];
+
+  t[0] = ((uint128_t) in[0]) * in2[0];
+  t[1] = ((uint128_t) in[0]) * in2[1] +
+         ((uint128_t) in[1]) * in2[0];
+  t[2] = ((uint128_t) in[0]) * in2[2] +
+         ((uint128_t) in[2]) * in2[0] +
+         ((uint128_t) in[1]) * in2[1];
+  t[3] = ((uint128_t) in[0]) * in2[3] +
+         ((uint128_t) in[3]) * in2[0] +
+         ((uint128_t) in[1]) * in2[2] +
+         ((uint128_t) in[2]) * in2[1];
+  t[4] = ((uint128_t) in[0]) * in2[4] +
+         ((uint128_t) in[4]) * in2[0] +
+         ((uint128_t) in[3]) * in2[1] +
+         ((uint128_t) in[1]) * in2[3] +
+         ((uint128_t) in[2]) * in2[2];
+  t[5] = ((uint128_t) in[4]) * in2[1] +
+         ((uint128_t) in[1]) * in2[4] +
+         ((uint128_t) in[2]) * in2[3] +
+         ((uint128_t) in[3]) * in2[2];
+  t[6] = ((uint128_t) in[4]) * in2[2] +
+         ((uint128_t) in[2]) * in2[4] +
+         ((uint128_t) in[3]) * in2[3];
+  t[7] = ((uint128_t) in[3]) * in2[4] +
+         ((uint128_t) in[4]) * in2[3];
+  t[8] = ((uint128_t) in[4]) * in2[4];
+
+  t[0] += t[5] * 19;
+  t[1] += t[6] * 19;
+  t[2] += t[7] * 19;
+  t[3] += t[8] * 19;
+
+  t[1] += t[0] >> 51;
+  t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51;
+  t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51;
+  t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51;
+  t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51);
+  t[4] &= 0x7ffffffffffff;
+  t[1] += t[0] >> 51;
+  t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51;
+  t[1] &= 0x7ffffffffffff;
+
+  output[0] = t[0];
+  output[1] = t[1];
+  output[2] = t[2];
+  output[3] = t[3];
+  output[4] = t[4];
+}
+
+static void
+fsquare(felem *output, const felem *in) {
+  uint128_t t[9];
+
+  t[0] = ((uint128_t) in[0]) * in[0];
+  t[1] = ((uint128_t) in[0]) * in[1] * 2;
+  t[2] = ((uint128_t) in[0]) * in[2] * 2 +
+         ((uint128_t) in[1]) * in[1];
+  t[3] = ((uint128_t) in[0]) * in[3] * 2 +
+         ((uint128_t) in[1]) * in[2] * 2;
+  t[4] = ((uint128_t) in[0]) * in[4] * 2 +
+         ((uint128_t) in[3]) * in[1] * 2 +
+         ((uint128_t) in[2]) * in[2];
+  t[5] = ((uint128_t) in[4]) * in[1] * 2 +
+         ((uint128_t) in[2]) * in[3] * 2;
+  t[6] = ((uint128_t) in[4]) * in[2] * 2 +
+         ((uint128_t) in[3]) * in[3];
+  t[7] = ((uint128_t) in[3]) * in[4] * 2;
+  t[8] = ((uint128_t) in[4]) * in[4];
+
+  t[0] += t[5] * 19;
+  t[1] += t[6] * 19;
+  t[2] += t[7] * 19;
+  t[3] += t[8] * 19;
+
+  t[1] += t[0] >> 51;
+  t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51;
+  t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51;
+  t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51;
+  t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51);
+  t[4] &= 0x7ffffffffffff;
+  t[1] += t[0] >> 51;
+  t[0] &= 0x7ffffffffffff;
+
+  output[0] = t[0];
+  output[1] = t[1];
+  output[2] = t[2];
+  output[3] = t[3];
+  output[4] = t[4];
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(felem *output, const u8 *in) {
+  output[0] = *((const uint64_t *)(in)) & 0x7ffffffffffff;
+  output[1] = (*((const uint64_t *)(in+6)) >> 3) & 0x7ffffffffffff;
+  output[2] = (*((const uint64_t *)(in+12)) >> 6) & 0x7ffffffffffff;
+  output[3] = (*((const uint64_t *)(in+19)) >> 1) & 0x7ffffffffffff;
+  output[4] = (*((const uint64_t *)(in+25)) >> 4) & 0x7ffffffffffff;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(u8 *output, const felem *input) {
+  uint128_t t[5];
+
+  t[0] = input[0];
+  t[1] = input[1];
+  t[2] = input[2];
+  t[3] = input[3];
+  t[4] = input[4];
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  /* now t is between 0 and 2^255-1, properly carried. */
+  /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+  t[0] += 19;
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[0] += 19 * (t[4] >> 51); t[4] &= 0x7ffffffffffff;
+
+  /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+  t[0] += 0x8000000000000 - 19;
+  t[1] += 0x8000000000000 - 1;
+  t[2] += 0x8000000000000 - 1;
+  t[3] += 0x8000000000000 - 1;
+  t[4] += 0x8000000000000 - 1;
+
+  /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+  t[1] += t[0] >> 51; t[0] &= 0x7ffffffffffff;
+  t[2] += t[1] >> 51; t[1] &= 0x7ffffffffffff;
+  t[3] += t[2] >> 51; t[2] &= 0x7ffffffffffff;
+  t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
+  t[4] &= 0x7ffffffffffff;
+
+  *((uint64_t *)(output)) = t[0] | (t[1] << 51);
+  *((uint64_t *)(output+8)) = (t[1] >> 13) | (t[2] << 38);
+  *((uint64_t *)(output+16)) = (t[2] >> 26) | (t[3] << 25);
+  *((uint64_t *)(output+24)) = (t[3] >> 39) | (t[4] << 12);
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void
+fmonty(felem *x2, felem *z2, /* output 2Q */
+       felem *x3, felem *z3, /* output Q + Q' */
+       felem *x, felem *z,   /* input Q */
+       felem *xprime, felem *zprime, /* input Q' */
+       const felem *qmqp /* input Q - Q' */) {
+  felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5],
+        zzprime[5], zzzprime[5];
+
+  memcpy(origx, x, 5 * sizeof(felem));
+  fsum(x, z);
+  fdifference_backwards(z, origx);  // does x - z
+
+  memcpy(origxprime, xprime, sizeof(felem) * 5);
+  fsum(xprime, zprime);
+  fdifference_backwards(zprime, origxprime);
+  fmul(xxprime, xprime, z);
+  fmul(zzprime, x, zprime);
+  memcpy(origxprime, xxprime, sizeof(felem) * 5);
+  fsum(xxprime, zzprime);
+  fdifference_backwards(zzprime, origxprime);
+  fsquare(x3, xxprime);
+  fsquare(zzzprime, zzprime);
+  fmul(z3, zzzprime, qmqp);
+
+  fsquare(xx, x);
+  fsquare(zz, z);
+  fmul(x2, xx, zz);
+  fdifference_backwards(zz, xx);  // does zz = xx - zz
+  fscalar_product(zzz, zz, 121665);
+  fsum(zzz, xx);
+  fmul(z2, zz, zzz);
+}
+
+// -----------------------------------------------------------------------------
+// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
+// long. Perform the swap iff @swap is non-zero.
+//
+// This function performs the swap without leaking any side-channel
+// information.
+// -----------------------------------------------------------------------------
+static void
+swap_conditional(felem *a, felem *b, unsigned len, felem iswap) {
+  unsigned i;
+  const felem swap = -iswap;
+
+  for (i = 0; i < len; ++i) {
+    const felem x = swap & (a[i] ^ b[i]);
+    a[i] ^= x;
+    b[i] ^= x;
+  }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) {
+  felem a[5] = {0}, b[5] = {1}, c[5] = {1}, d[5] = {0};
+  felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  felem e[5] = {0}, f[5] = {1}, g[5] = {0}, h[5] = {1};
+  felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+  unsigned i, j;
+
+  memcpy(nqpqx, q, sizeof(felem) * 5);
+
+  for (i = 0; i < 32; ++i) {
+    u8 byte = n[31 - i];
+    for (j = 0; j < 8; ++j) {
+      const felem bit = byte >> 7;
+
+      swap_conditional(nqx, nqpqx, 5, bit);
+      swap_conditional(nqz, nqpqz, 5, bit);
+      fmonty(nqx2, nqz2,
+             nqpqx2, nqpqz2,
+             nqx, nqz,
+             nqpqx, nqpqz,
+             q);
+      swap_conditional(nqx2, nqpqx2, 5, bit);
+      swap_conditional(nqz2, nqpqz2, 5, bit);
+
+      t = nqx;
+      nqx = nqx2;
+      nqx2 = t;
+      t = nqz;
+      nqz = nqz2;
+      nqz2 = t;
+      t = nqpqx;
+      nqpqx = nqpqx2;
+      nqpqx2 = t;
+      t = nqpqz;
+      nqpqz = nqpqz2;
+      nqpqz2 = t;
+
+      byte <<= 1;
+    }
+  }
+
+  memcpy(resultx, nqx, sizeof(felem) * 5);
+  memcpy(resultz, nqz, sizeof(felem) * 5);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, const felem *z) {
+  felem z2[5];
+  felem z9[5];
+  felem z11[5];
+  felem z2_5_0[5];
+  felem z2_10_0[5];
+  felem z2_20_0[5];
+  felem z2_50_0[5];
+  felem z2_100_0[5];
+  felem t0[5];
+  felem t1[5];
+  int i;
+
+  /* 2 */ fsquare(z2,z);
+  /* 4 */ fsquare(t1,z2);
+  /* 8 */ fsquare(t0,t1);
+  /* 9 */ fmul(z9,t0,z);
+  /* 11 */ fmul(z11,z9,z2);
+  /* 22 */ fsquare(t0,z11);
+  /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
+  /* 2^7 - 2^2 */ fsquare(t1,t0);
+  /* 2^8 - 2^3 */ fsquare(t0,t1);
+  /* 2^9 - 2^4 */ fsquare(t1,t0);
+  /* 2^10 - 2^5 */ fsquare(t0,t1);
+  /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
+  /* 2^12 - 2^2 */ fsquare(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
+  /* 2^22 - 2^2 */ fsquare(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ fsquare(t1,t0);
+  /* 2^42 - 2^2 */ fsquare(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
+  /* 2^52 - 2^2 */ fsquare(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
+  /* 2^102 - 2^2 */ fsquare(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ fsquare(t0,t1);
+  /* 2^202 - 2^2 */ fsquare(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ fsquare(t1,t0);
+  /* 2^252 - 2^2 */ fsquare(t0,t1);
+  /* 2^253 - 2^3 */ fsquare(t1,t0);
+  /* 2^254 - 2^4 */ fsquare(t0,t1);
+  /* 2^255 - 2^5 */ fsquare(t1,t0);
+  /* 2^255 - 21 */ fmul(out,t1,z11);
+}
+
+int
+crypto_scalarmult(u8 *mypublic, const u8 *secret, const u8 *basepoint) {
+  felem bp[5], x[5], z[5], zmone[5];
+  unsigned char e[32];
+  int i;
+  for (i = 0;i < 32;++i) e[i] = secret[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+  fexpand(bp, basepoint);
+  cmult(x, z, e, bp);
+  crecip(zmone, z);
+  fmul(z, x, zmone);
+  fcontract(mypublic, z);
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/api.h b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/api.h
new file mode 100644
index 00000000..60339596
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_BYTES 32
+#define CRYPTO_SCALARBYTES 32
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/base.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/base.c
new file mode 100644
index 00000000..ac2d7eb4
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/base.c
@@ -0,0 +1,16 @@
+/*
+version 20081011
+Matthew Dempsky
+Public domain.
+Derived from public domain code by D. J. Bernstein.
+*/
+
+#include "crypto_scalarmult.h"
+
+const unsigned char base[32] = {9};
+
+int crypto_scalarmult_base(unsigned char *q,
+  const unsigned char *n)
+{
+  return crypto_scalarmult(q,n,base);
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/implementors b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/implementors
new file mode 100644
index 00000000..aa551790
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/implementors
@@ -0,0 +1 @@
+Matthew Dempsky (Mochi Media)
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/smult.c b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/smult.c
new file mode 100644
index 00000000..6a479558
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/curve25519/ref/smult.c
@@ -0,0 +1,265 @@
+/*
+version 20081011
+Matthew Dempsky
+Public domain.
+Derived from public domain code by D. J. Bernstein.
+*/
+
+#include "crypto_scalarmult.h"
+
+static void add(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 31;++j) { u += a[j] + b[j]; out[j] = u & 255; u >>= 8; }
+  u += a[31] + b[31]; out[31] = u;
+}
+
+static void sub(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 218;
+  for (j = 0;j < 31;++j) {
+    u += a[j] + 65280 - b[j];
+    out[j] = u & 255;
+    u >>= 8;
+  }
+  u += a[31] - b[31];
+  out[31] = u;
+}
+
+static void squeeze(unsigned int a[32])
+{
+  unsigned int j;
+  unsigned int u;
+  u = 0;
+  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
+  u += a[31]; a[31] = u & 127;
+  u = 19 * (u >> 7);
+  for (j = 0;j < 31;++j) { u += a[j]; a[j] = u & 255; u >>= 8; }
+  u += a[31]; a[31] = u;
+}
+
+static const unsigned int minusp[32] = {
+ 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128
+} ;
+
+static void freeze(unsigned int a[32])
+{
+  unsigned int aorig[32];
+  unsigned int j;
+  unsigned int negative;
+
+  for (j = 0;j < 32;++j) aorig[j] = a[j];
+  add(a,a,minusp);
+  negative = -((a[31] >> 7) & 1);
+  for (j = 0;j < 32;++j) a[j] ^= negative & (aorig[j] ^ a[j]);
+}
+
+static void mult(unsigned int out[32],const unsigned int a[32],const unsigned int b[32])
+{
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 32;++i) {
+    u = 0;
+    for (j = 0;j <= i;++j) u += a[j] * b[i - j];
+    for (j = i + 1;j < 32;++j) u += 38 * a[j] * b[i + 32 - j];
+    out[i] = u;
+  }
+  squeeze(out);
+}
+
+static void mult121665(unsigned int out[32],const unsigned int a[32])
+{
+  unsigned int j;
+  unsigned int u;
+
+  u = 0;
+  for (j = 0;j < 31;++j) { u += 121665 * a[j]; out[j] = u & 255; u >>= 8; }
+  u += 121665 * a[31]; out[31] = u & 127;
+  u = 19 * (u >> 7);
+  for (j = 0;j < 31;++j) { u += out[j]; out[j] = u & 255; u >>= 8; }
+  u += out[j]; out[j] = u;
+}
+
+static void square(unsigned int out[32],const unsigned int a[32])
+{
+  unsigned int i;
+  unsigned int j;
+  unsigned int u;
+
+  for (i = 0;i < 32;++i) {
+    u = 0;
+    for (j = 0;j < i - j;++j) u += a[j] * a[i - j];
+    for (j = i + 1;j < i + 32 - j;++j) u += 38 * a[j] * a[i + 32 - j];
+    u *= 2;
+    if ((i & 1) == 0) {
+      u += a[i / 2] * a[i / 2];
+      u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
+    }
+    out[i] = u;
+  }
+  squeeze(out);
+}
+
+static void select(unsigned int p[64],unsigned int q[64],const unsigned int r[64],const unsigned int s[64],unsigned int b)
+{
+  unsigned int j;
+  unsigned int t;
+  unsigned int bminus1;
+
+  bminus1 = b - 1;
+  for (j = 0;j < 64;++j) {
+    t = bminus1 & (r[j] ^ s[j]);
+    p[j] = s[j] ^ t;
+    q[j] = r[j] ^ t;
+  }
+}
+
+static void mainloop(unsigned int work[64],const unsigned char e[32])
+{
+  unsigned int xzm1[64];
+  unsigned int xzm[64];
+  unsigned int xzmb[64];
+  unsigned int xzm1b[64];
+  unsigned int xznb[64];
+  unsigned int xzn1b[64];
+  unsigned int a0[64];
+  unsigned int a1[64];
+  unsigned int b0[64];
+  unsigned int b1[64];
+  unsigned int c1[64];
+  unsigned int r[32];
+  unsigned int s[32];
+  unsigned int t[32];
+  unsigned int u[32];
+  unsigned int i;
+  unsigned int j;
+  unsigned int b;
+  int pos;
+
+  for (j = 0;j < 32;++j) xzm1[j] = work[j];
+  xzm1[32] = 1;
+  for (j = 33;j < 64;++j) xzm1[j] = 0;
+
+  xzm[0] = 1;
+  for (j = 1;j < 64;++j) xzm[j] = 0;
+
+  for (pos = 254;pos >= 0;--pos) {
+    b = e[pos / 8] >> (pos & 7);
+    b &= 1;
+    select(xzmb,xzm1b,xzm,xzm1,b);
+    add(a0,xzmb,xzmb + 32);
+    sub(a0 + 32,xzmb,xzmb + 32);
+    add(a1,xzm1b,xzm1b + 32);
+    sub(a1 + 32,xzm1b,xzm1b + 32);
+    square(b0,a0);
+    square(b0 + 32,a0 + 32);
+    mult(b1,a1,a0 + 32);
+    mult(b1 + 32,a1 + 32,a0);
+    add(c1,b1,b1 + 32);
+    sub(c1 + 32,b1,b1 + 32);
+    square(r,c1 + 32);
+    sub(s,b0,b0 + 32);
+    mult121665(t,s);
+    add(u,t,b0);
+    mult(xznb,b0,b0 + 32);
+    mult(xznb + 32,s,u);
+    square(xzn1b,c1);
+    mult(xzn1b + 32,r,work);
+    select(xzm,xzm1,xznb,xzn1b,b);
+  }
+
+  for (j = 0;j < 64;++j) work[j] = xzm[j];
+}
+
+static void recip(unsigned int out[32],const unsigned int z[32])
+{
+  unsigned int z2[32];
+  unsigned int z9[32];
+  unsigned int z11[32];
+  unsigned int z2_5_0[32];
+  unsigned int z2_10_0[32];
+  unsigned int z2_20_0[32];
+  unsigned int z2_50_0[32];
+  unsigned int z2_100_0[32];
+  unsigned int t0[32];
+  unsigned int t1[32];
+  int i;
+
+  /* 2 */ square(z2,z);
+  /* 4 */ square(t1,z2);
+  /* 8 */ square(t0,t1);
+  /* 9 */ mult(z9,t0,z);
+  /* 11 */ mult(z11,z9,z2);
+  /* 22 */ square(t0,z11);
+  /* 2^5 - 2^0 = 31 */ mult(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ square(t0,z2_5_0);
+  /* 2^7 - 2^2 */ square(t1,t0);
+  /* 2^8 - 2^3 */ square(t0,t1);
+  /* 2^9 - 2^4 */ square(t1,t0);
+  /* 2^10 - 2^5 */ square(t0,t1);
+  /* 2^10 - 2^0 */ mult(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ square(t0,z2_10_0);
+  /* 2^12 - 2^2 */ square(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^20 - 2^0 */ mult(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ square(t0,z2_20_0);
+  /* 2^22 - 2^2 */ square(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^40 - 2^0 */ mult(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ square(t1,t0);
+  /* 2^42 - 2^2 */ square(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^50 - 2^0 */ mult(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ square(t0,z2_50_0);
+  /* 2^52 - 2^2 */ square(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^100 - 2^0 */ mult(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ square(t1,z2_100_0);
+  /* 2^102 - 2^2 */ square(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { square(t1,t0); square(t0,t1); }
+  /* 2^200 - 2^0 */ mult(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ square(t0,t1);
+  /* 2^202 - 2^2 */ square(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { square(t0,t1); square(t1,t0); }
+  /* 2^250 - 2^0 */ mult(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ square(t1,t0);
+  /* 2^252 - 2^2 */ square(t0,t1);
+  /* 2^253 - 2^3 */ square(t1,t0);
+  /* 2^254 - 2^4 */ square(t0,t1);
+  /* 2^255 - 2^5 */ square(t1,t0);
+  /* 2^255 - 21 */ mult(out,t1,z11);
+}
+
+int crypto_scalarmult(unsigned char *q,
+  const unsigned char *n,
+  const unsigned char *p)
+{
+  unsigned int work[96];
+  unsigned char e[32];
+  unsigned int i;
+  for (i = 0;i < 32;++i) e[i] = n[i];
+  e[0] &= 248;
+  e[31] &= 127;
+  e[31] |= 64;
+  for (i = 0;i < 32;++i) work[i] = p[i];
+  mainloop(work,e);
+  recip(work + 32,work + 32);
+  mult(work + 64,work,work + 32);
+  freeze(work + 64);
+  for (i = 0;i < 32;++i) q[i] = work[64 + i];
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/curve25519/used b/nacl/nacl-20110221/crypto_scalarmult/curve25519/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_scalarmult/measure.c b/nacl/nacl-20110221/crypto_scalarmult/measure.c
new file mode 100644
index 00000000..0c7265d5
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/measure.c
@@ -0,0 +1,61 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_scalarmult.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_scalarmult_IMPLEMENTATION;
+const char *implementationversion = crypto_scalarmult_VERSION;
+const char *sizenames[] = { "outputbytes", "scalarbytes", 0 };
+const long long sizes[] = { crypto_scalarmult_BYTES, crypto_scalarmult_SCALARBYTES };
+
+static unsigned char *m;
+static unsigned char *n;
+static unsigned char *p;
+static unsigned char *q;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  m = alignedcalloc(crypto_scalarmult_SCALARBYTES);
+  n = alignedcalloc(crypto_scalarmult_SCALARBYTES);
+  p = alignedcalloc(crypto_scalarmult_BYTES);
+  q = alignedcalloc(crypto_scalarmult_BYTES);
+}
+
+#define TIMINGS 63
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    randombytes(m,crypto_scalarmult_SCALARBYTES);
+    randombytes(n,crypto_scalarmult_SCALARBYTES);
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_scalarmult_base(p,m);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"base_cycles",cycles,TIMINGS);
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_scalarmult(q,n,p);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"cycles",cycles,TIMINGS);
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/try.c b/nacl/nacl-20110221/crypto_scalarmult/try.c
new file mode 100644
index 00000000..560ce493
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/try.c
@@ -0,0 +1,126 @@
+/*
+ * crypto_scalarmult/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_scalarmult.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_scalarmult_IMPLEMENTATION;
+
+#define mlen crypto_scalarmult_SCALARBYTES
+#define nlen crypto_scalarmult_SCALARBYTES
+#define plen crypto_scalarmult_BYTES
+#define qlen crypto_scalarmult_BYTES
+#define rlen crypto_scalarmult_BYTES
+
+static unsigned char *m;
+static unsigned char *n;
+static unsigned char *p;
+static unsigned char *q;
+static unsigned char *r;
+
+static unsigned char *m2;
+static unsigned char *n2;
+static unsigned char *p2;
+static unsigned char *q2;
+static unsigned char *r2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  m = alignedcalloc(mlen);
+  n = alignedcalloc(nlen);
+  p = alignedcalloc(plen);
+  q = alignedcalloc(qlen);
+  r = alignedcalloc(rlen);
+  m2 = alignedcalloc(mlen + crypto_scalarmult_BYTES);
+  n2 = alignedcalloc(nlen + crypto_scalarmult_BYTES);
+  p2 = alignedcalloc(plen + crypto_scalarmult_BYTES);
+  q2 = alignedcalloc(qlen + crypto_scalarmult_BYTES);
+  r2 = alignedcalloc(rlen + crypto_scalarmult_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_scalarmult(q,n,p);
+  crypto_scalarmult_base(r,n);
+}
+
+char checksum[crypto_scalarmult_BYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+  long long tests;
+
+  for (i = 0;i < mlen;++i) m[i] = i;
+  for (i = 0;i < nlen;++i) n[i] = i + 1;
+  for (i = 0;i < plen;++i) p[i] = i + 2;
+  for (i = 0;i < qlen;++i) q[i] = i + 3;
+  for (i = 0;i < rlen;++i) r[i] = i + 4;
+
+  for (i = -16;i < 0;++i) p[i] = random();
+  for (i = -16;i < 0;++i) n[i] = random();
+  for (i = plen;i < plen + 16;++i) p[i] = random();
+  for (i = nlen;i < nlen + 16;++i) n[i] = random();
+  for (i = -16;i < plen + 16;++i) p2[i] = p[i];
+  for (i = -16;i < nlen + 16;++i) n2[i] = n[i];
+
+  if (crypto_scalarmult_base(p,n) != 0) return "crypto_scalarmult_base returns nonzero";
+
+  for (i = -16;i < nlen + 16;++i) if (n2[i] != n[i]) return "crypto_scalarmult_base overwrites input";
+  for (i = -16;i < 0;++i) if (p2[i] != p[i]) return "crypto_scalarmult_base writes before output";
+  for (i = plen;i < plen + 16;++i) if (p2[i] != p[i]) return "crypto_scalarmult_base writes after output";
+
+  for (tests = 0;tests < 100;++tests) {
+    for (i = -16;i < 0;++i) q[i] = random();
+    for (i = -16;i < 0;++i) p[i] = random();
+    for (i = -16;i < 0;++i) m[i] = random();
+    for (i = qlen;i < qlen + 16;++i) q[i] = random();
+    for (i = plen;i < plen + 16;++i) p[i] = random();
+    for (i = mlen;i < mlen + 16;++i) m[i] = random();
+    for (i = -16;i < qlen + 16;++i) q2[i] = q[i];
+    for (i = -16;i < plen + 16;++i) p2[i] = p[i];
+    for (i = -16;i < mlen + 16;++i) m2[i] = m[i];
+
+    if (crypto_scalarmult(q,m,p) != 0) return "crypto_scalarmult returns nonzero";
+
+    for (i = -16;i < mlen + 16;++i) if (m2[i] != m[i]) return "crypto_scalarmult overwrites n input";
+    for (i = -16;i < plen + 16;++i) if (p2[i] != p[i]) return "crypto_scalarmult overwrites p input";
+    for (i = -16;i < 0;++i) if (q2[i] != q[i]) return "crypto_scalarmult writes before output";
+    for (i = qlen;i < qlen + 16;++i) if (q2[i] != q[i]) return "crypto_scalarmult writes after output";
+
+    if (crypto_scalarmult(m2,m2,p) != 0) return "crypto_scalarmult returns nonzero";
+    for (i = 0;i < qlen;++i) if (q[i] != m2[i]) return "crypto_scalarmult does not handle n overlap";
+    for (i = 0;i < qlen;++i) m2[i] = m[i];
+
+    if (crypto_scalarmult(p2,m2,p2) != 0) return "crypto_scalarmult returns nonzero";
+    for (i = 0;i < qlen;++i) if (q[i] != p2[i]) return "crypto_scalarmult does not handle p overlap";
+
+    if (crypto_scalarmult(r,n,q) != 0) return "crypto_scalarmult returns nonzero";
+    if (crypto_scalarmult(q,n,p) != 0) return "crypto_scalarmult returns nonzero";
+    if (crypto_scalarmult(p,m,q) != 0) return "crypto_scalarmult returns nonzero";
+    for (j = 0;j < plen;++j) if (p[j] != r[j]) return "crypto_scalarmult not associative";
+    for (j = 0;j < mlen;++j) m[j] ^= q[j % qlen];
+    for (j = 0;j < nlen;++j) n[j] ^= p[j % plen];
+  }
+
+  for (i = 0;i < crypto_scalarmult_BYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (p[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & p[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/wrapper-base.cpp b/nacl/nacl-20110221/crypto_scalarmult/wrapper-base.cpp
new file mode 100644
index 00000000..f71ce19a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/wrapper-base.cpp
@@ -0,0 +1,11 @@
+#include <string>
+using std::string;
+#include "crypto_scalarmult.h"
+
+string crypto_scalarmult_base(const string &n)
+{
+  unsigned char q[crypto_scalarmult_BYTES];
+  if (n.size() != crypto_scalarmult_SCALARBYTES) throw "incorrect scalar length";
+  crypto_scalarmult_base(q,(const unsigned char *) n.c_str());
+  return string((char *) q,sizeof q);
+}
diff --git a/nacl/nacl-20110221/crypto_scalarmult/wrapper-mult.cpp b/nacl/nacl-20110221/crypto_scalarmult/wrapper-mult.cpp
new file mode 100644
index 00000000..fc693cf0
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_scalarmult/wrapper-mult.cpp
@@ -0,0 +1,12 @@
+#include <string>
+using std::string;
+#include "crypto_scalarmult.h"
+
+string crypto_scalarmult(const string &n,const string &p)
+{
+  unsigned char q[crypto_scalarmult_BYTES];
+  if (n.size() != crypto_scalarmult_SCALARBYTES) throw "incorrect scalar length";
+  if (p.size() != crypto_scalarmult_BYTES) throw "incorrect element length";
+  crypto_scalarmult(q,(const unsigned char *) n.c_str(),(const unsigned char *) p.c_str());
+  return string((char *) q,sizeof q);
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/measure.c b/nacl/nacl-20110221/crypto_secretbox/measure.c
new file mode 100644
index 00000000..6cb0692f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/measure.c
@@ -0,0 +1,75 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_secretbox.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_secretbox_IMPLEMENTATION;
+const char *implementationversion = crypto_secretbox_VERSION;
+const char *sizenames[] = { "keybytes", "noncebytes", "zerobytes", "boxzerobytes", 0 };
+const long long sizes[] = { crypto_secretbox_KEYBYTES, crypto_secretbox_NONCEBYTES, crypto_secretbox_ZEROBYTES, crypto_secretbox_BOXZEROBYTES };
+
+#define MAXTEST_BYTES 4096
+
+static unsigned char *k;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(crypto_secretbox_KEYBYTES);
+  n = alignedcalloc(crypto_secretbox_NONCEBYTES);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  c = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 8) {
+      randombytes(k,crypto_secretbox_KEYBYTES);
+      randombytes(n,crypto_secretbox_NONCEBYTES);
+      randombytes(m + crypto_secretbox_ZEROBYTES,mlen);
+      randombytes(c,mlen + crypto_secretbox_ZEROBYTES);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_secretbox_open(m,c,mlen + crypto_secretbox_ZEROBYTES,n,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"open_cycles",cycles,TIMINGS);
+      ++c[crypto_secretbox_ZEROBYTES];
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_secretbox_open(m,c,mlen + crypto_secretbox_ZEROBYTES,n,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"forgery_open_cycles",cycles,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/try.c b/nacl/nacl-20110221/crypto_secretbox/try.c
new file mode 100644
index 00000000..eda091e9
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/try.c
@@ -0,0 +1,129 @@
+/*
+ * crypto_secretbox/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "crypto_secretbox.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_secretbox_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *k;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *t;
+static unsigned char *k2;
+static unsigned char *n2;
+static unsigned char *m2;
+static unsigned char *c2;
+static unsigned char *t2;
+
+#define klen crypto_secretbox_KEYBYTES
+#define nlen crypto_secretbox_NONCEBYTES
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(klen);
+  n = alignedcalloc(nlen);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  c = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  t = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  k2 = alignedcalloc(klen);
+  n2 = alignedcalloc(nlen);
+  m2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  c2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+  t2 = alignedcalloc(MAXTEST_BYTES + crypto_secretbox_ZEROBYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_secretbox(c,m,TUNE_BYTES + crypto_secretbox_ZEROBYTES,n,k);
+  crypto_secretbox_open(t,c,TUNE_BYTES + crypto_secretbox_ZEROBYTES,n,k);
+}
+
+char checksum[klen * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (j = 0;j < crypto_secretbox_ZEROBYTES;++j) m[j] = 0;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long mlen = i + crypto_secretbox_ZEROBYTES;
+    long long tlen = i + crypto_secretbox_ZEROBYTES;
+    long long clen = i + crypto_secretbox_ZEROBYTES;
+
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = -16;j < 0;++j) n[j] = random();
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = nlen;j < nlen + 16;++j) n[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+    for (j = -16;j < nlen + 16;++j) n2[j] = n[j];
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j] = random();
+
+    if (crypto_secretbox(c,m,mlen,n,k) != 0) return "crypto_secretbox returns nonzero";
+
+    for (j = -16;j < mlen + 16;++j) if (m2[j] != m[j]) return "crypto_secretbox overwrites m";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_secretbox overwrites n";
+    for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_secretbox overwrites k";
+    for (j = -16;j < 0;++j) if (c2[j] != c[j]) return "crypto_secretbox writes before output";
+    for (j = clen;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_secretbox writes after output";
+    for (j = 0;j < crypto_secretbox_BOXZEROBYTES;++j)
+      if (c[j] != 0) return "crypto_secretbox does not clear extra bytes";
+
+    for (j = -16;j < 0;++j) c[j] = random();
+    for (j = clen;j < clen + 16;++j) c[j] = random();
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+    for (j = -16;j < tlen + 16;++j) t2[j] = t[j] = random();
+
+    if (crypto_secretbox_open(t,c,clen,n,k) != 0) return "crypto_secretbox_open returns nonzero";
+
+    for (j = -16;j < clen + 16;++j) if (c2[j] != c[j]) return "crypto_secretbox_open overwrites c";
+    for (j = -16;j < nlen + 16;++j) if (n2[j] != n[j]) return "crypto_secretbox_open overwrites n";
+    for (j = -16;j < klen + 16;++j) if (k2[j] != k[j]) return "crypto_secretbox_open overwrites k";
+    for (j = -16;j < 0;++j) if (t2[j] != t[j]) return "crypto_secretbox_open writes before output";
+    for (j = tlen;j < tlen + 16;++j) if (t2[j] != t[j]) return "crypto_secretbox_open writes after output";
+    for (j = 0;j < crypto_secretbox_ZEROBYTES;++j)
+      if (t[j] != 0) return "crypto_secretbox_open does not clear extra bytes";
+    
+    for (j = 0;j < i;++j) if (t[j] != m[j]) return "plaintext does not match";
+    
+    for (j = 0;j < i;++j)
+      k[j % klen] ^= c[j + crypto_secretbox_BOXZEROBYTES];
+    crypto_secretbox(c,m,mlen,n,k);
+    for (j = 0;j < i;++j)
+      n[j % nlen] ^= c[j + crypto_secretbox_BOXZEROBYTES];
+    crypto_secretbox(c,m,mlen,n,k);
+    if (i == 0) m[crypto_secretbox_ZEROBYTES + 0] = 0;
+    m[crypto_secretbox_ZEROBYTES + i] = m[crypto_secretbox_ZEROBYTES + 0];
+    for (j = 0;j < i;++j)
+      m[j + crypto_secretbox_ZEROBYTES] ^= c[j + crypto_secretbox_BOXZEROBYTES];
+  }
+
+  for (i = 0;i < klen;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (k[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & k[i]];
+  }
+  checksum[2 * i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/wrapper-box.cpp b/nacl/nacl-20110221/crypto_secretbox/wrapper-box.cpp
new file mode 100644
index 00000000..fb8b1784
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/wrapper-box.cpp
@@ -0,0 +1,19 @@
+#include <string>
+using std::string;
+#include "crypto_secretbox.h"
+
+string crypto_secretbox(const string &m,const string &n,const string &k)
+{
+  if (k.size() != crypto_secretbox_KEYBYTES) throw "incorrect key length";
+  if (n.size() != crypto_secretbox_NONCEBYTES) throw "incorrect nonce length";
+  size_t mlen = m.size() + crypto_secretbox_ZEROBYTES;
+  unsigned char mpad[mlen];
+  for (int i = 0;i < crypto_secretbox_ZEROBYTES;++i) mpad[i] = 0;
+  for (int i = crypto_secretbox_ZEROBYTES;i < mlen;++i) mpad[i] = m[i - crypto_secretbox_ZEROBYTES];
+  unsigned char cpad[mlen];
+  crypto_secretbox(cpad,mpad,mlen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str());
+  return string(
+    (char *) cpad + crypto_secretbox_BOXZEROBYTES,
+    mlen - crypto_secretbox_BOXZEROBYTES
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/wrapper-open.cpp b/nacl/nacl-20110221/crypto_secretbox/wrapper-open.cpp
new file mode 100644
index 00000000..07989813
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/wrapper-open.cpp
@@ -0,0 +1,22 @@
+#include <string>
+using std::string;
+#include "crypto_secretbox.h"
+
+string crypto_secretbox_open(const string &c,const string &n,const string &k)
+{
+  if (k.size() != crypto_secretbox_KEYBYTES) throw "incorrect key length";
+  if (n.size() != crypto_secretbox_NONCEBYTES) throw "incorrect nonce length";
+  size_t clen = c.size() + crypto_secretbox_BOXZEROBYTES;
+  unsigned char cpad[clen];
+  for (int i = 0;i < crypto_secretbox_BOXZEROBYTES;++i) cpad[i] = 0;
+  for (int i = crypto_secretbox_BOXZEROBYTES;i < clen;++i) cpad[i] = c[i - crypto_secretbox_BOXZEROBYTES];
+  unsigned char mpad[clen];
+  if (crypto_secretbox_open(mpad,cpad,clen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str()) != 0)
+    throw "ciphertext fails verification";
+  if (clen < crypto_secretbox_ZEROBYTES)
+    throw "ciphertext too short"; // should have been caught by _open
+  return string(
+    (char *) mpad + crypto_secretbox_ZEROBYTES,
+    clen - crypto_secretbox_ZEROBYTES
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/checksum b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/checksum
new file mode 100644
index 00000000..af3c6897
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/checksum
@@ -0,0 +1 @@
+df372f95dd87381b7c9ceb6f340ccaa03d19bed5d9e4ab004d99d847675a9658
diff --git a/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/api.h b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/api.h
new file mode 100644
index 00000000..f5aeb356
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/api.h
@@ -0,0 +1,4 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 24
+#define CRYPTO_ZEROBYTES 32
+#define CRYPTO_BOXZEROBYTES 16
diff --git a/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/box.c b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/box.c
new file mode 100644
index 00000000..f1abb06f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/ref/box.c
@@ -0,0 +1,35 @@
+#include "crypto_onetimeauth_poly1305.h"
+#include "crypto_stream_xsalsa20.h"
+#include "crypto_secretbox.h"
+
+int crypto_secretbox(
+  unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  int i;
+  if (mlen < 32) return -1;
+  crypto_stream_xsalsa20_xor(c,m,mlen,n,k);
+  crypto_onetimeauth_poly1305(c + 16,c + 32,mlen - 32,c);
+  for (i = 0;i < 16;++i) c[i] = 0;
+  return 0;
+}
+
+int crypto_secretbox_open(
+  unsigned char *m,
+  const unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  int i;
+  unsigned char subkey[32];
+  if (clen < 32) return -1;
+  crypto_stream_xsalsa20(subkey,32,n,k);
+  if (crypto_onetimeauth_poly1305_verify(c + 16,c + 32,clen - 32,subkey) != 0) return -1;
+  crypto_stream_xsalsa20_xor(m,c,clen,n,k);
+  for (i = 0;i < 32;++i) m[i] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/selected b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/used b/nacl/nacl-20110221/crypto_secretbox/xsalsa20poly1305/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/api.h b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/api.h
new file mode 100644
index 00000000..352240c0
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/api.h
@@ -0,0 +1,3 @@
+#define CRYPTO_SECRETKEYBYTES 64
+#define CRYPTO_PUBLICKEYBYTES 32
+#define CRYPTO_BYTES 64
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.c b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.c
new file mode 100644
index 00000000..a9f806d2
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.c
@@ -0,0 +1,345 @@
+#include "fe25519.h"
+
+#define WINDOWSIZE 4 /* Should be 1,2, or 4 */
+#define WINDOWMASK ((1<<WINDOWSIZE)-1)
+
+static void reduce_add_sub(fe25519 *r)
+{
+  crypto_uint32 t;
+  int i,rep;
+
+  for(rep=0;rep<4;rep++)
+  {
+    t = r->v[31] >> 7;
+    r->v[31] &= 127;
+    t *= 19;
+    r->v[0] += t;
+    for(i=0;i<31;i++)
+    {
+      t = r->v[i] >> 8;
+      r->v[i+1] += t;
+      r->v[i] &= 255;
+    }
+  }
+}
+
+static void reduce_mul(fe25519 *r)
+{
+  crypto_uint32 t;
+  int i,rep;
+
+  for(rep=0;rep<2;rep++)
+  {
+    t = r->v[31] >> 7;
+    r->v[31] &= 127;
+    t *= 19;
+    r->v[0] += t;
+    for(i=0;i<31;i++)
+    {
+      t = r->v[i] >> 8;
+      r->v[i+1] += t;
+      r->v[i] &= 255;
+    }
+  }
+}
+
+/* reduction modulo 2^255-19 */
+static void freeze(fe25519 *r) 
+{
+  int i;
+  unsigned int m = (r->v[31] == 127);
+  for(i=30;i>1;i--)
+    m *= (r->v[i] == 255);
+  m *= (r->v[0] >= 237);
+
+  r->v[31] -= m*127;
+  for(i=30;i>0;i--)
+    r->v[i] -= m*255;
+  r->v[0] -= m*237;
+}
+
+/*freeze input before calling isone*/
+static int isone(const fe25519 *x)
+{
+  int i;
+  int r = (x->v[0] == 1);
+  for(i=1;i<32;i++) 
+    r *= (x->v[i] == 0);
+  return r;
+}
+
+/*freeze input before calling iszero*/
+static int iszero(const fe25519 *x)
+{
+  int i;
+  int r = (x->v[0] == 0);
+  for(i=1;i<32;i++) 
+    r *= (x->v[i] == 0);
+  return r;
+}
+
+
+static int issquare(const fe25519 *x)
+{
+  unsigned char e[32] = {0xf6,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x3f}; /* (p-1)/2 */
+  fe25519 t;
+
+  fe25519_pow(&t,x,e);
+  freeze(&t);
+  return isone(&t) || iszero(&t);
+}
+
+void fe25519_unpack(fe25519 *r, const unsigned char x[32])
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i] = x[i];
+  r->v[31] &= 127;
+}
+
+/* Assumes input x being reduced mod 2^255 */
+void fe25519_pack(unsigned char r[32], const fe25519 *x)
+{
+  int i;
+  for(i=0;i<32;i++) 
+    r[i] = x->v[i];
+  
+  /* freeze byte array */
+  unsigned int m = (r[31] == 127); /* XXX: some compilers might use branches; fix */
+  for(i=30;i>1;i--)
+    m *= (r[i] == 255);
+  m *= (r[0] >= 237);
+  r[31] -= m*127;
+  for(i=30;i>0;i--)
+    r[i] -= m*255;
+  r[0] -= m*237;
+}
+
+void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b)
+{
+  unsigned char nb = 1-b;
+  int i;
+  for(i=0;i<32;i++) r->v[i] = nb * r->v[i] + b * x->v[i];
+}
+
+unsigned char fe25519_getparity(const fe25519 *x)
+{
+  fe25519 t;
+  int i;
+  for(i=0;i<32;i++) t.v[i] = x->v[i];
+  freeze(&t);
+  return t.v[0] & 1;
+}
+
+void fe25519_setone(fe25519 *r)
+{
+  int i;
+  r->v[0] = 1;
+  for(i=1;i<32;i++) r->v[i]=0;
+}
+
+void fe25519_setzero(fe25519 *r)
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i]=0;
+}
+
+void fe25519_neg(fe25519 *r, const fe25519 *x)
+{
+  fe25519 t;
+  int i;
+  for(i=0;i<32;i++) t.v[i]=x->v[i];
+  fe25519_setzero(r);
+  fe25519_sub(r, r, &t);
+}
+
+void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i;
+  for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i];
+  reduce_add_sub(r);
+}
+
+void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i;
+  crypto_uint32 t[32];
+  t[0] = x->v[0] + 0x1da;
+  t[31] = x->v[31] + 0xfe;
+  for(i=1;i<31;i++) t[i] = x->v[i] + 0x1fe;
+  for(i=0;i<32;i++) r->v[i] = t[i] - y->v[i];
+  reduce_add_sub(r);
+}
+
+void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y)
+{
+  int i,j;
+  crypto_uint32 t[63];
+  for(i=0;i<63;i++)t[i] = 0;
+
+  for(i=0;i<32;i++)
+    for(j=0;j<32;j++)
+      t[i+j] += x->v[i] * y->v[j];
+
+  for(i=32;i<63;i++)
+    r->v[i-32] = t[i-32] + 38*t[i]; 
+  r->v[31] = t[31]; /* result now in r[0]...r[31] */
+
+  reduce_mul(r);
+}
+
+void fe25519_square(fe25519 *r, const fe25519 *x)
+{
+  fe25519_mul(r, x, x);
+}
+
+/*XXX: Make constant time! */
+void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e)
+{
+  /*
+  fe25519 g;
+  fe25519_setone(&g);
+  int i;
+  unsigned char j;
+  for(i=32;i>0;i--)
+  {
+    for(j=128;j>0;j>>=1)
+    {
+      fe25519_square(&g,&g);
+      if(e[i-1] & j) 
+        fe25519_mul(&g,&g,x);
+    }
+  }
+  for(i=0;i<32;i++) r->v[i] = g.v[i];
+  */
+  fe25519 g;
+  fe25519_setone(&g);
+  int i,j,k;
+  fe25519 pre[(1 << WINDOWSIZE)];
+  fe25519 t;
+  unsigned char w;
+
+  // Precomputation
+  fe25519_setone(pre);
+  pre[1] = *x;
+  for(i=2;i<(1<<WINDOWSIZE);i+=2)
+  {
+    fe25519_square(pre+i, pre+i/2);
+    fe25519_mul(pre+i+1, pre+i, pre+1);
+  }
+
+  // Fixed-window scalar multiplication
+  for(i=32;i>0;i--)
+  {
+    for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE)
+    {
+      for(k=0;k<WINDOWSIZE;k++)
+        fe25519_square(&g, &g);
+      // Cache-timing resistant loading of precomputed value:
+      w = (e[i-1]>>j) & WINDOWMASK;
+      t = pre[0];
+      for(k=1;k<(1<<WINDOWSIZE);k++)
+        fe25519_cmov(&t, &pre[k], k==w);
+      fe25519_mul(&g, &g, &t);
+    }
+  }
+  *r = g;
+}
+
+/* Return 0 on success, 1 otherwise */
+int fe25519_sqrt_vartime(fe25519 *r, const fe25519 *x, unsigned char parity)
+{
+  /* See HAC, Alg. 3.37 */
+  if (!issquare(x)) return -1;
+  unsigned char e[32] = {0xfb,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x1f}; /* (p-1)/4 */
+  unsigned char e2[32] = {0xfe,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x0f}; /* (p+3)/8 */
+  unsigned char e3[32] = {0xfd,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x0f}; /* (p-5)/8 */
+  fe25519 p = {{0}};
+  fe25519 d;
+  int i;
+  fe25519_pow(&d,x,e);
+  freeze(&d);
+  if(isone(&d))
+    fe25519_pow(r,x,e2);
+  else
+  {
+    for(i=0;i<32;i++)
+      d.v[i] = 4*x->v[i];
+    fe25519_pow(&d,&d,e3);
+    for(i=0;i<32;i++)
+      r->v[i] = 2*x->v[i];
+    fe25519_mul(r,r,&d);
+  }
+  freeze(r);
+  if((r->v[0] & 1) != (parity & 1))
+  {
+    fe25519_sub(r,&p,r);
+  }
+  return 0;
+}
+
+void fe25519_invert(fe25519 *r, const fe25519 *x)
+{
+	fe25519 z2;
+	fe25519 z9;
+	fe25519 z11;
+	fe25519 z2_5_0;
+	fe25519 z2_10_0;
+	fe25519 z2_20_0;
+	fe25519 z2_50_0;
+	fe25519 z2_100_0;
+	fe25519 t0;
+	fe25519 t1;
+	int i;
+	
+	/* 2 */ fe25519_square(&z2,x);
+	/* 4 */ fe25519_square(&t1,&z2);
+	/* 8 */ fe25519_square(&t0,&t1);
+	/* 9 */ fe25519_mul(&z9,&t0,x);
+	/* 11 */ fe25519_mul(&z11,&z9,&z2);
+	/* 22 */ fe25519_square(&t0,&z11);
+	/* 2^5 - 2^0 = 31 */ fe25519_mul(&z2_5_0,&t0,&z9);
+
+	/* 2^6 - 2^1 */ fe25519_square(&t0,&z2_5_0);
+	/* 2^7 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^8 - 2^3 */ fe25519_square(&t0,&t1);
+	/* 2^9 - 2^4 */ fe25519_square(&t1,&t0);
+	/* 2^10 - 2^5 */ fe25519_square(&t0,&t1);
+	/* 2^10 - 2^0 */ fe25519_mul(&z2_10_0,&t0,&z2_5_0);
+
+	/* 2^11 - 2^1 */ fe25519_square(&t0,&z2_10_0);
+	/* 2^12 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^20 - 2^0 */ fe25519_mul(&z2_20_0,&t1,&z2_10_0);
+
+	/* 2^21 - 2^1 */ fe25519_square(&t0,&z2_20_0);
+	/* 2^22 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^40 - 2^0 */ fe25519_mul(&t0,&t1,&z2_20_0);
+
+	/* 2^41 - 2^1 */ fe25519_square(&t1,&t0);
+	/* 2^42 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
+	/* 2^50 - 2^0 */ fe25519_mul(&z2_50_0,&t0,&z2_10_0);
+
+	/* 2^51 - 2^1 */ fe25519_square(&t0,&z2_50_0);
+	/* 2^52 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^100 - 2^0 */ fe25519_mul(&z2_100_0,&t1,&z2_50_0);
+
+	/* 2^101 - 2^1 */ fe25519_square(&t1,&z2_100_0);
+	/* 2^102 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fe25519_square(&t1,&t0); fe25519_square(&t0,&t1); }
+	/* 2^200 - 2^0 */ fe25519_mul(&t1,&t0,&z2_100_0);
+
+	/* 2^201 - 2^1 */ fe25519_square(&t0,&t1);
+	/* 2^202 - 2^2 */ fe25519_square(&t1,&t0);
+	/* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fe25519_square(&t0,&t1); fe25519_square(&t1,&t0); }
+	/* 2^250 - 2^0 */ fe25519_mul(&t0,&t1,&z2_50_0);
+
+	/* 2^251 - 2^1 */ fe25519_square(&t1,&t0);
+	/* 2^252 - 2^2 */ fe25519_square(&t0,&t1);
+	/* 2^253 - 2^3 */ fe25519_square(&t1,&t0);
+	/* 2^254 - 2^4 */ fe25519_square(&t0,&t1);
+	/* 2^255 - 2^5 */ fe25519_square(&t1,&t0);
+	/* 2^255 - 21 */ fe25519_mul(r,&t1,&z11);
+}
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.h b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.h
new file mode 100644
index 00000000..e07ddba7
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/fe25519.h
@@ -0,0 +1,54 @@
+#ifndef FE25519_H
+#define FE25519_H
+
+#define fe25519 crypto_sign_edwards25519sha512batch_fe25519
+#define fe25519_unpack crypto_sign_edwards25519sha512batch_fe25519_unpack
+#define fe25519_pack crypto_sign_edwards25519sha512batch_fe25519_pack
+#define fe25519_cmov crypto_sign_edwards25519sha512batch_fe25519_cmov
+#define fe25519_setone crypto_sign_edwards25519sha512batch_fe25519_setone
+#define fe25519_setzero crypto_sign_edwards25519sha512batch_fe25519_setzero
+#define fe25519_neg crypto_sign_edwards25519sha512batch_fe25519_neg
+#define fe25519_getparity crypto_sign_edwards25519sha512batch_fe25519_getparity
+#define fe25519_add crypto_sign_edwards25519sha512batch_fe25519_add
+#define fe25519_sub crypto_sign_edwards25519sha512batch_fe25519_sub
+#define fe25519_mul crypto_sign_edwards25519sha512batch_fe25519_mul
+#define fe25519_square crypto_sign_edwards25519sha512batch_fe25519_square
+#define fe25519_pow crypto_sign_edwards25519sha512batch_fe25519_pow
+#define fe25519_sqrt_vartime crypto_sign_edwards25519sha512batch_fe25519_sqrt_vartime
+#define fe25519_invert crypto_sign_edwards25519sha512batch_fe25519_invert
+
+#include "crypto_uint32.h"
+
+typedef struct {
+  crypto_uint32 v[32]; 
+} fe25519;
+
+void fe25519_unpack(fe25519 *r, const unsigned char x[32]);
+
+void fe25519_pack(unsigned char r[32], const fe25519 *x);
+
+void fe25519_cmov(fe25519 *r, const fe25519 *x, unsigned char b);
+
+void fe25519_setone(fe25519 *r);
+
+void fe25519_setzero(fe25519 *r);
+
+void fe25519_neg(fe25519 *r, const fe25519 *x);
+
+unsigned char fe25519_getparity(const fe25519 *x);
+
+void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_sub(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y);
+
+void fe25519_square(fe25519 *r, const fe25519 *x);
+
+void fe25519_pow(fe25519 *r, const fe25519 *x, const unsigned char *e);
+
+int fe25519_sqrt_vartime(fe25519 *r, const fe25519 *x, unsigned char parity);
+
+void fe25519_invert(fe25519 *r, const fe25519 *x);
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.c b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.c
new file mode 100644
index 00000000..a57b8f3c
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.c
@@ -0,0 +1,227 @@
+#include "fe25519.h"
+#include "sc25519.h"
+#include "ge25519.h"
+
+/* 
+ * Arithmetic on the twisted Edwards curve -x^2 + y^2 = 1 + dx^2y^2 
+ * with d = -(121665/121666) = 37095705934669439343138083508754565189542113879843219016388785533085940283555
+ * Base point: (15112221349535400772501151409588531511454012693041857206046113283949847762202,46316835694926478169428394003475163141307993866256225615783033603165251855960);
+ */
+
+typedef struct
+{
+  fe25519 x;
+  fe25519 z;
+  fe25519 y;
+  fe25519 t;
+} ge25519_p1p1;
+
+typedef struct
+{
+  fe25519 x;
+  fe25519 y;
+  fe25519 z;
+} ge25519_p2;
+
+#define ge25519_p3 ge25519
+
+/* Windowsize for fixed-window scalar multiplication */
+#define WINDOWSIZE 2                      /* Should be 1,2, or 4 */
+#define WINDOWMASK ((1<<WINDOWSIZE)-1)
+
+/* packed parameter d in the Edwards curve equation */
+static const unsigned char ecd[32] = {0xA3, 0x78, 0x59, 0x13, 0xCA, 0x4D, 0xEB, 0x75, 0xAB, 0xD8, 0x41, 0x41, 0x4D, 0x0A, 0x70, 0x00, 
+                                      0x98, 0xE8, 0x79, 0x77, 0x79, 0x40, 0xC7, 0x8C, 0x73, 0xFE, 0x6F, 0x2B, 0xEE, 0x6C, 0x03, 0x52};
+
+/* Packed coordinates of the base point */
+static const unsigned char ge25519_base_x[32] = {0x1A, 0xD5, 0x25, 0x8F, 0x60, 0x2D, 0x56, 0xC9, 0xB2, 0xA7, 0x25, 0x95, 0x60, 0xC7, 0x2C, 0x69, 
+                                                 0x5C, 0xDC, 0xD6, 0xFD, 0x31, 0xE2, 0xA4, 0xC0, 0xFE, 0x53, 0x6E, 0xCD, 0xD3, 0x36, 0x69, 0x21};
+static const unsigned char ge25519_base_y[32] = {0x58, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 
+                                                 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66};
+static const unsigned char ge25519_base_z[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_base_t[32] = {0xA3, 0xDD, 0xB7, 0xA5, 0xB3, 0x8A, 0xDE, 0x6D, 0xF5, 0x52, 0x51, 0x77, 0x80, 0x9F, 0xF0, 0x20, 
+                                                 0x7D, 0xE3, 0xAB, 0x64, 0x8E, 0x4E, 0xEA, 0x66, 0x65, 0x76, 0x8B, 0xD7, 0x0F, 0x5F, 0x87, 0x67};
+
+/* Packed coordinates of the neutral element */
+static const unsigned char ge25519_neutral_x[32] = {0};
+static const unsigned char ge25519_neutral_y[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_neutral_z[32] = {1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+static const unsigned char ge25519_neutral_t[32] = {0};
+
+static void p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
+{
+  fe25519_mul(&r->x, &p->x, &p->t);
+  fe25519_mul(&r->y, &p->y, &p->z);
+  fe25519_mul(&r->z, &p->z, &p->t);
+}
+
+static void p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
+{
+  p1p1_to_p2((ge25519_p2 *)r, p);
+  fe25519_mul(&r->t, &p->x, &p->y);
+}
+
+/* Constant-time version of: if(b) r = p */
+static void cmov_p3(ge25519_p3 *r, const ge25519_p3 *p, unsigned char b)
+{
+  fe25519_cmov(&r->x, &p->x, b);
+  fe25519_cmov(&r->y, &p->y, b);
+  fe25519_cmov(&r->z, &p->z, b);
+  fe25519_cmov(&r->t, &p->t, b);
+}
+
+/* See http://www.hyperelliptic.org/EFD/g1p/auto-twisted-extended-1.html#doubling-dbl-2008-hwcd */
+static void dbl_p1p1(ge25519_p1p1 *r, const ge25519_p2 *p)
+{
+  fe25519 a,b,c,d;
+  fe25519_square(&a, &p->x);
+  fe25519_square(&b, &p->y);
+  fe25519_square(&c, &p->z);
+  fe25519_add(&c, &c, &c);
+  fe25519_neg(&d, &a);
+
+  fe25519_add(&r->x, &p->x, &p->y);
+  fe25519_square(&r->x, &r->x);
+  fe25519_sub(&r->x, &r->x, &a);
+  fe25519_sub(&r->x, &r->x, &b);
+  fe25519_add(&r->z, &d, &b);
+  fe25519_sub(&r->t, &r->z, &c);
+  fe25519_sub(&r->y, &d, &b);
+}
+
+static void add_p1p1(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_p3 *q)
+{
+  fe25519 a, b, c, d, t, fd;
+  fe25519_unpack(&fd, ecd);
+  
+  fe25519_sub(&a, &p->y, &p->x); // A = (Y1-X1)*(Y2-X2)
+  fe25519_sub(&t, &q->y, &q->x);
+  fe25519_mul(&a, &a, &t);
+  fe25519_add(&b, &p->x, &p->y); // B = (Y1+X1)*(Y2+X2)
+  fe25519_add(&t, &q->x, &q->y);
+  fe25519_mul(&b, &b, &t);
+  fe25519_mul(&c, &p->t, &q->t); //C = T1*k*T2
+  fe25519_mul(&c, &c, &fd);
+  fe25519_add(&c, &c, &c);       //XXX: Can save this addition by precomputing 2*ecd
+  fe25519_mul(&d, &p->z, &q->z); //D = Z1*2*Z2
+  fe25519_add(&d, &d, &d);
+  fe25519_sub(&r->x, &b, &a); // E = B-A
+  fe25519_sub(&r->t, &d, &c); // F = D-C
+  fe25519_add(&r->z, &d, &c); // G = D+C
+  fe25519_add(&r->y, &b, &a); // H = B+A
+}
+
+/* ********************************************************************
+ *                    EXPORTED FUNCTIONS
+ ******************************************************************** */
+
+/* return 0 on success, -1 otherwise */
+int ge25519_unpack_vartime(ge25519_p3 *r, const unsigned char p[32])
+{
+  int ret;
+  fe25519 t, fd;
+  fe25519_setone(&r->z);
+  fe25519_unpack(&fd, ecd);
+  unsigned char par = p[31] >> 7;
+  fe25519_unpack(&r->y, p);
+  fe25519_square(&r->x, &r->y);
+  fe25519_mul(&t, &r->x, &fd);
+  fe25519_sub(&r->x, &r->x, &r->z);
+  fe25519_add(&t, &r->z, &t);
+  fe25519_invert(&t, &t);
+  fe25519_mul(&r->x, &r->x, &t);
+  ret = fe25519_sqrt_vartime(&r->x, &r->x, par);
+  fe25519_mul(&r->t, &r->x, &r->y);
+  return ret;
+}
+
+void ge25519_pack(unsigned char r[32], const ge25519_p3 *p)
+{
+  fe25519 tx, ty, zi;
+  fe25519_invert(&zi, &p->z); 
+  fe25519_mul(&tx, &p->x, &zi);
+  fe25519_mul(&ty, &p->y, &zi);
+  fe25519_pack(r, &ty);
+  r[31] ^= fe25519_getparity(&tx) << 7;
+}
+
+void ge25519_add(ge25519_p3 *r, const ge25519_p3 *p, const ge25519_p3 *q)
+{
+  ge25519_p1p1 grp1p1;
+  add_p1p1(&grp1p1, p, q);
+  p1p1_to_p3(r, &grp1p1);
+}
+
+void ge25519_double(ge25519_p3 *r, const ge25519_p3 *p)
+{
+  ge25519_p1p1 grp1p1;
+  dbl_p1p1(&grp1p1, (ge25519_p2 *)p);
+  p1p1_to_p3(r, &grp1p1);
+}
+
+void ge25519_scalarmult(ge25519_p3 *r, const ge25519_p3 *p, const sc25519 *s)
+{
+  int i,j,k;
+  ge25519_p3 g;  
+  fe25519_unpack(&g.x, ge25519_neutral_x);
+  fe25519_unpack(&g.y, ge25519_neutral_y);
+  fe25519_unpack(&g.z, ge25519_neutral_z);
+  fe25519_unpack(&g.t, ge25519_neutral_t);
+
+  ge25519_p3 pre[(1 << WINDOWSIZE)];
+  ge25519_p3 t;
+  ge25519_p1p1 tp1p1;
+  unsigned char w;
+  unsigned char sb[32];
+  sc25519_to32bytes(sb, s);
+
+  // Precomputation
+  pre[0] = g;
+  pre[1] = *p;
+  for(i=2;i<(1<<WINDOWSIZE);i+=2)
+  {
+    dbl_p1p1(&tp1p1, (ge25519_p2 *)(pre+i/2));
+    p1p1_to_p3(pre+i, &tp1p1);
+    add_p1p1(&tp1p1, pre+i, pre+1);
+    p1p1_to_p3(pre+i+1, &tp1p1);
+  }
+
+  // Fixed-window scalar multiplication
+  for(i=32;i>0;i--)
+  {
+    for(j=8-WINDOWSIZE;j>=0;j-=WINDOWSIZE)
+    {
+      for(k=0;k<WINDOWSIZE-1;k++)
+      {
+        dbl_p1p1(&tp1p1, (ge25519_p2 *)&g);
+        p1p1_to_p2((ge25519_p2 *)&g, &tp1p1);
+      }
+      dbl_p1p1(&tp1p1, (ge25519_p2 *)&g);
+      p1p1_to_p3(&g, &tp1p1);
+      // Cache-timing resistant loading of precomputed value:
+      w = (sb[i-1]>>j) & WINDOWMASK;
+      t = pre[0];
+      for(k=1;k<(1<<WINDOWSIZE);k++)
+        cmov_p3(&t, &pre[k], k==w);
+
+      add_p1p1(&tp1p1, &g, &t);
+      if(j != 0) p1p1_to_p2((ge25519_p2 *)&g, &tp1p1);
+      else p1p1_to_p3(&g, &tp1p1); /* convert to p3 representation at the end */
+    }
+  }
+  r->x = g.x;
+  r->y = g.y;
+  r->z = g.z;
+  r->t = g.t;
+}
+
+void ge25519_scalarmult_base(ge25519_p3 *r, const sc25519 *s)
+{
+  /* XXX: Better algorithm for known-base-point scalar multiplication */
+  ge25519_p3 t;
+  fe25519_unpack(&t.x, ge25519_base_x);
+  fe25519_unpack(&t.y, ge25519_base_y);
+  fe25519_unpack(&t.z, ge25519_base_z);
+  fe25519_unpack(&t.t, ge25519_base_t);
+  ge25519_scalarmult(r, &t, s);          
+}
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.h b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.h
new file mode 100644
index 00000000..49ad163a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/ge25519.h
@@ -0,0 +1,34 @@
+#ifndef GE25519_H
+#define GE25519_H
+
+#include "fe25519.h"
+#include "sc25519.h"
+
+#define ge25519 crypto_sign_edwards25519sha512batch_ge25519
+#define ge25519_unpack_vartime crypto_sign_edwards25519sha512batch_ge25519_unpack_vartime
+#define ge25519_pack crypto_sign_edwards25519sha512batch_ge25519_pack
+#define ge25519_add crypto_sign_edwards25519sha512batch_ge25519_add
+#define ge25519_double crypto_sign_edwards25519sha512batch_ge25519_double
+#define ge25519_scalarmult crypto_sign_edwards25519sha512batch_ge25519_scalarmult
+#define ge25519_scalarmult_base crypto_sign_edwards25519sha512batch_ge25519_scalarmult_base
+
+typedef struct {
+  fe25519 x;
+  fe25519 y;
+  fe25519 z;
+  fe25519 t;
+} ge25519;
+
+int ge25519_unpack_vartime(ge25519 *r, const unsigned char p[32]);
+
+void ge25519_pack(unsigned char r[32], const ge25519 *p);
+
+void ge25519_add(ge25519 *r, const ge25519 *p, const ge25519 *q);
+
+void ge25519_double(ge25519 *r, const ge25519 *p);
+
+void ge25519_scalarmult(ge25519 *r, const ge25519 *p, const sc25519 *s);
+
+void ge25519_scalarmult_base(ge25519 *r, const sc25519 *s);
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.c b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.c
new file mode 100644
index 00000000..5f27eb1b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.c
@@ -0,0 +1,146 @@
+#include "sc25519.h"
+
+/*Arithmetic modulo the group order n = 2^252 +  27742317777372353535851937790883648493 = 7237005577332262213973186563042994240857116359379907606001950938285454250989 */
+
+static const crypto_uint32 m[32] = {0xED, 0xD3, 0xF5, 0x5C, 0x1A, 0x63, 0x12, 0x58, 0xD6, 0x9C, 0xF7, 0xA2, 0xDE, 0xF9, 0xDE, 0x14, 
+                                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10};
+
+static const crypto_uint32 mu[33] = {0x1B, 0x13, 0x2C, 0x0A, 0xA3, 0xE5, 0x9C, 0xED, 0xA7, 0x29, 0x63, 0x08, 0x5D, 0x21, 0x06, 0x21, 
+                                     0xEB, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0F};
+
+/* Reduce coefficients of r before calling reduce_add_sub */
+static void reduce_add_sub(sc25519 *r)
+{
+  int i, b, pb=0, nb;
+  unsigned char t[32];
+
+  for(i=0;i<32;i++) 
+  {
+    b = (r->v[i]<pb+m[i]);
+    t[i] = r->v[i]-pb-m[i]+b*256;
+    pb = b;
+  }
+  nb = 1-b;
+  for(i=0;i<32;i++) 
+    r->v[i] = r->v[i]*b + t[i]*nb;
+}
+
+/* Reduce coefficients of x before calling barrett_reduce */
+static void barrett_reduce(sc25519 *r, const crypto_uint32 x[64])
+{
+  /* See HAC, Alg. 14.42 */
+  int i,j;
+  crypto_uint32 q2[66] = {0};
+  crypto_uint32 *q3 = q2 + 33;
+  crypto_uint32 r1[33];
+  crypto_uint32 r2[33] = {0};
+  crypto_uint32 carry;
+  int b, pb=0;
+
+  for(i=0;i<33;i++)
+    for(j=0;j<33;j++)
+      if(i+j >= 31) q2[i+j] += mu[i]*x[j+31];
+  carry = q2[31] >> 8;
+  q2[32] += carry;
+  carry = q2[32] >> 8;
+  q2[33] += carry;
+
+  for(i=0;i<33;i++)r1[i] = x[i];
+  for(i=0;i<32;i++)
+    for(j=0;j<33;j++)
+      if(i+j < 33) r2[i+j] += m[i]*q3[j];
+
+  for(i=0;i<32;i++)
+  {
+    carry = r2[i] >> 8;
+    r2[i+1] += carry;
+    r2[i] &= 0xff;
+  }
+
+  for(i=0;i<32;i++) 
+  {
+    b = (r1[i]<pb+r2[i]);
+    r->v[i] = r1[i]-pb-r2[i]+b*256;
+    pb = b;
+  }
+
+  /* XXX: Can it really happen that r<0?, See HAC, Alg 14.42, Step 3 
+   * If so: Handle  it here!
+   */
+
+  reduce_add_sub(r);
+  reduce_add_sub(r);
+}
+
+/*
+static int iszero(const sc25519 *x)
+{
+  // Implement
+  return 0;
+}
+*/
+
+void sc25519_from32bytes(sc25519 *r, const unsigned char x[32])
+{
+  int i;
+  crypto_uint32 t[64] = {0};
+  for(i=0;i<32;i++) t[i] = x[i];
+  barrett_reduce(r, t);
+}
+
+void sc25519_from64bytes(sc25519 *r, const unsigned char x[64])
+{
+  int i;
+  crypto_uint32 t[64] = {0};
+  for(i=0;i<64;i++) t[i] = x[i];
+  barrett_reduce(r, t);
+}
+
+/* XXX: What we actually want for crypto_group is probably just something like
+ * void sc25519_frombytes(sc25519 *r, const unsigned char *x, size_t xlen)
+ */
+
+void sc25519_to32bytes(unsigned char r[32], const sc25519 *x)
+{
+  int i;
+  for(i=0;i<32;i++) r[i] = x->v[i];
+}
+
+void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y)
+{
+  int i, carry;
+  for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i];
+  for(i=0;i<31;i++)
+  {
+    carry = r->v[i] >> 8;
+    r->v[i+1] += carry;
+    r->v[i] &= 0xff;
+  }
+  reduce_add_sub(r);
+}
+
+void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y)
+{
+  int i,j,carry;
+  crypto_uint32 t[64];
+  for(i=0;i<64;i++)t[i] = 0;
+
+  for(i=0;i<32;i++)
+    for(j=0;j<32;j++)
+      t[i+j] += x->v[i] * y->v[j];
+
+  /* Reduce coefficients */
+  for(i=0;i<63;i++)
+  {
+    carry = t[i] >> 8;
+    t[i+1] += carry;
+    t[i] &= 0xff;
+  }
+
+  barrett_reduce(r, t);
+}
+
+void sc25519_square(sc25519 *r, const sc25519 *x)
+{
+  sc25519_mul(r, x, x);
+}
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.h b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.h
new file mode 100644
index 00000000..48584a85
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sc25519.h
@@ -0,0 +1,51 @@
+#ifndef SC25519_H
+#define SC25519_H
+
+#define sc25519 crypto_sign_edwards25519sha512batch_sc25519
+#define sc25519_from32bytes crypto_sign_edwards25519sha512batch_sc25519_from32bytes
+#define sc25519_from64bytes crypto_sign_edwards25519sha512batch_sc25519_from64bytes
+#define sc25519_to32bytes crypto_sign_edwards25519sha512batch_sc25519_to32bytes
+#define sc25519_pack crypto_sign_edwards25519sha512batch_sc25519_pack
+#define sc25519_getparity crypto_sign_edwards25519sha512batch_sc25519_getparity
+#define sc25519_setone crypto_sign_edwards25519sha512batch_sc25519_setone
+#define sc25519_setzero crypto_sign_edwards25519sha512batch_sc25519_setzero
+#define sc25519_neg crypto_sign_edwards25519sha512batch_sc25519_neg
+#define sc25519_add crypto_sign_edwards25519sha512batch_sc25519_add
+#define sc25519_sub crypto_sign_edwards25519sha512batch_sc25519_sub
+#define sc25519_mul crypto_sign_edwards25519sha512batch_sc25519_mul
+#define sc25519_square crypto_sign_edwards25519sha512batch_sc25519_square
+#define sc25519_invert crypto_sign_edwards25519sha512batch_sc25519_invert
+
+#include "crypto_uint32.h"
+
+typedef struct {
+  crypto_uint32 v[32]; 
+} sc25519;
+
+void sc25519_from32bytes(sc25519 *r, const unsigned char x[32]);
+
+void sc25519_from64bytes(sc25519 *r, const unsigned char x[64]);
+
+void sc25519_to32bytes(unsigned char r[32], const sc25519 *x);
+
+void sc25519_pack(unsigned char r[32], const sc25519 *x);
+
+unsigned char sc25519_getparity(const sc25519 *x);
+
+void sc25519_setone(sc25519 *r);
+
+void sc25519_setzero(sc25519 *r);
+
+void sc25519_neg(sc25519 *r, const sc25519 *x);
+
+void sc25519_add(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_sub(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y);
+
+void sc25519_square(sc25519 *r, const sc25519 *x);
+
+void sc25519_invert(sc25519 *r, const sc25519 *x);
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sign.c b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sign.c
new file mode 100644
index 00000000..f40e548b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/ref/sign.c
@@ -0,0 +1,103 @@
+#include "api.h"
+#include "crypto_sign.h"
+#include "crypto_hash_sha512.h"
+#include "randombytes.h"
+#include "crypto_verify_32.h"
+
+#include "ge25519.h"
+
+int crypto_sign_keypair(
+    unsigned char *pk,
+    unsigned char *sk
+    )
+{
+  sc25519 scsk;
+  ge25519 gepk;
+
+  randombytes(sk, 32);
+  crypto_hash_sha512(sk, sk, 32);
+  sk[0] &= 248;
+  sk[31] &= 127;
+  sk[31] |= 64;
+
+  sc25519_from32bytes(&scsk,sk);
+  
+  ge25519_scalarmult_base(&gepk, &scsk);
+  ge25519_pack(pk, &gepk);
+  return 0;
+}
+
+int crypto_sign(
+    unsigned char *sm,unsigned long long *smlen,
+    const unsigned char *m,unsigned long long mlen,
+    const unsigned char *sk
+    )
+{
+  sc25519 sck, scs, scsk;
+  ge25519 ger;
+  unsigned char r[32];
+  unsigned char s[32];
+  unsigned long long i;
+  unsigned char hmg[crypto_hash_sha512_BYTES];
+  unsigned char hmr[crypto_hash_sha512_BYTES];
+
+  *smlen = mlen+64;
+  for(i=0;i<mlen;i++)
+    sm[32 + i] = m[i];
+  for(i=0;i<32;i++)
+    sm[i] = sk[32+i];
+  crypto_hash_sha512(hmg, sm, mlen+32); /* Generate k as h(m,sk[32],...,sk[63]) */
+
+  sc25519_from64bytes(&sck, hmg);
+  ge25519_scalarmult_base(&ger, &sck);
+  ge25519_pack(r, &ger);
+  
+  for(i=0;i<32;i++)
+    sm[i] = r[i];
+
+  crypto_hash_sha512(hmr, sm, mlen+32); /* Compute h(m,r) */
+  sc25519_from64bytes(&scs, hmr);
+  sc25519_mul(&scs, &scs, &sck);
+  
+  sc25519_from32bytes(&scsk, sk);
+  sc25519_add(&scs, &scs, &scsk);
+
+  sc25519_to32bytes(s,&scs); /* cat s */
+  for(i=0;i<32;i++)
+    sm[mlen+32+i] = s[i]; 
+
+  return 0;
+}
+
+int crypto_sign_open(
+    unsigned char *m,unsigned long long *mlen,
+    const unsigned char *sm,unsigned long long smlen,
+    const unsigned char *pk
+    )
+{
+  int i;
+  unsigned char t1[32], t2[32];
+  ge25519 get1, get2, gepk;
+  sc25519 schmr, scs;
+  unsigned char hmr[crypto_hash_sha512_BYTES];
+
+  if (ge25519_unpack_vartime(&get1, sm)) return -1;
+  if (ge25519_unpack_vartime(&gepk, pk)) return -1;
+
+  crypto_hash_sha512(hmr,sm,smlen-32);
+
+  sc25519_from64bytes(&schmr, hmr);
+  ge25519_scalarmult(&get1, &get1, &schmr);
+  ge25519_add(&get1, &get1, &gepk);
+  ge25519_pack(t1, &get1);
+
+  sc25519_from32bytes(&scs, &sm[smlen-32]);
+  ge25519_scalarmult_base(&get2, &scs);
+  ge25519_pack(t2, &get2);
+
+  for(i=0;i<smlen-64;i++)
+    m[i] = sm[i + 32];
+  *mlen = smlen-64;
+
+  return crypto_verify_32(t1, t2);
+}
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/selected b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/used b/nacl/nacl-20110221/crypto_sign/edwards25519sha512batch/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_sign/measure.c b/nacl/nacl-20110221/crypto_sign/measure.c
new file mode 100644
index 00000000..8d8495a8
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/measure.c
@@ -0,0 +1,83 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_sign.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_sign_IMPLEMENTATION;
+const char *implementationversion = crypto_sign_VERSION;
+const char *sizenames[] = { "outputbytes", "publickeybytes", "secretkeybytes", 0 };
+const long long sizes[] = { crypto_sign_BYTES, crypto_sign_PUBLICKEYBYTES, crypto_sign_SECRETKEYBYTES };
+
+#define MAXTEST_BYTES 100000
+
+static unsigned char *pk;
+static unsigned char *sk;
+static unsigned char *m; unsigned long long mlen;
+static unsigned char *sm; unsigned long long smlen;
+static unsigned char *t; unsigned long long tlen;
+
+void preallocate(void)
+{
+#ifdef RAND_R_PRNG_NOT_SEEDED
+  RAND_status();
+#endif
+}
+
+void allocate(void)
+{
+  pk = alignedcalloc(crypto_sign_PUBLICKEYBYTES);
+  sk = alignedcalloc(crypto_sign_SECRETKEYBYTES);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+  sm = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+  t = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+}
+
+#define TIMINGS 31
+static long long cycles[TIMINGS + 1];
+static long long bytes[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (i = 0;i <= TIMINGS;++i) {
+      cycles[i] = cpucycles();
+      crypto_sign_keypair(pk,sk);
+    }
+    for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+    printentry(-1,"keypair_cycles",cycles,TIMINGS);
+
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / 4) {
+      randombytes(m,mlen);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+        bytes[i] = crypto_sign(sm,&smlen,m,mlen,sk);
+	if (bytes[i] == 0) bytes[i] = smlen;
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+      printentry(mlen,"bytes",bytes,TIMINGS);
+
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+        bytes[i] = crypto_sign_open(t,&tlen,sm,smlen,pk);
+	if (bytes[i] == 0) bytes[i] = tlen;
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"open_cycles",cycles,TIMINGS);
+      printentry(mlen,"open_bytes",bytes,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_sign/try.c b/nacl/nacl-20110221/crypto_sign/try.c
new file mode 100644
index 00000000..fc553416
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/try.c
@@ -0,0 +1,86 @@
+/*
+ * crypto_sign/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "randombytes.h"
+#include "crypto_sign.h"
+
+#define MAXTEST_BYTES 10000
+#define TUNE_BYTES 1536
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_sign_IMPLEMENTATION;
+
+static unsigned char *pk;
+static unsigned char *sk;
+static unsigned char *m; unsigned long long mlen;
+static unsigned char *sm; unsigned long long smlen;
+static unsigned char *t; unsigned long long tlen;
+
+void preallocate(void)
+{
+#ifdef RAND_R_PRNG_NOT_SEEDED
+  RAND_status();
+#endif
+}
+
+void allocate(void)
+{
+  pk = alignedcalloc(crypto_sign_PUBLICKEYBYTES);
+  sk = alignedcalloc(crypto_sign_SECRETKEYBYTES);
+  m = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+  sm = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+  t = alignedcalloc(MAXTEST_BYTES + crypto_sign_BYTES);
+}
+
+void predoit(void)
+{
+  crypto_sign_keypair(pk,sk);
+  mlen = TUNE_BYTES;
+  smlen = 0;
+  randombytes(m,mlen);
+  crypto_sign(sm,&smlen,m,mlen,sk);
+}
+
+void doit(void)
+{
+  crypto_sign_open(t,&tlen,sm,smlen,pk);
+}
+
+char checksum[crypto_sign_BYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long mlen;
+  long long i;
+  long long j;
+
+  if (crypto_sign_keypair(pk,sk) != 0) return "crypto_sign_keypair returns nonzero";
+  for (mlen = 0;mlen < MAXTEST_BYTES;mlen += 1 + (mlen / 16)) {
+    if (crypto_sign(sm,&smlen,m,mlen,sk) != 0) return "crypto_sign returns nonzero";
+    if (crypto_sign_open(t,&tlen,sm,smlen,pk) != 0) return "crypto_sign_open returns nonzero";
+    if (tlen != mlen) return "crypto_sign_open does not match length";
+    for (i = 0;i < tlen;++i)
+      if (t[i] != m[i])
+        return "crypto_sign_open does not match contents";
+
+    j = random() % smlen;
+    sm[j] ^= 1;
+    if (crypto_sign_open(t,&tlen,sm,smlen,pk) == 0) {
+      if (tlen != mlen) return "crypto_sign_open allows trivial forgery of length";
+      for (i = 0;i < tlen;++i)
+        if (t[i] != m[i])
+          return "crypto_sign_open allows trivial forgery of contents";
+    }
+    sm[j] ^= 1;
+
+  }
+
+  /* do some long-term checksum */
+  checksum[0] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_sign/wrapper-keypair.cpp b/nacl/nacl-20110221/crypto_sign/wrapper-keypair.cpp
new file mode 100644
index 00000000..3687465d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/wrapper-keypair.cpp
@@ -0,0 +1,12 @@
+#include <string>
+using std::string;
+#include "crypto_sign.h"
+
+string crypto_sign_keypair(string *sk_string)
+{
+  unsigned char pk[crypto_sign_PUBLICKEYBYTES];
+  unsigned char sk[crypto_sign_SECRETKEYBYTES];
+  crypto_sign_keypair(pk,sk);
+  *sk_string = string((char *) sk,sizeof sk);
+  return string((char *) pk,sizeof pk);
+}
diff --git a/nacl/nacl-20110221/crypto_sign/wrapper-sign-open.cpp b/nacl/nacl-20110221/crypto_sign/wrapper-sign-open.cpp
new file mode 100644
index 00000000..346e9400
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/wrapper-sign-open.cpp
@@ -0,0 +1,24 @@
+#include <string>
+using std::string;
+#include "crypto_sign.h"
+
+string crypto_sign_open(const string &sm_string, const string &pk_string)
+{
+  if (pk_string.size() != crypto_sign_PUBLICKEYBYTES) throw "incorrect public-key length";
+  size_t smlen = sm_string.size();
+  unsigned char m[smlen];
+  unsigned long long mlen;
+  for (int i = 0;i < smlen;++i) m[i] = sm_string[i];
+  if (crypto_sign_open(
+        m,
+        &mlen,
+        m,
+        smlen,
+        (const unsigned char *) pk_string.c_str()
+        ) != 0)
+    throw "ciphertext fails verification";
+  return string(
+    (char *) m,
+    mlen
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_sign/wrapper-sign.cpp b/nacl/nacl-20110221/crypto_sign/wrapper-sign.cpp
new file mode 100644
index 00000000..f0624b76
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_sign/wrapper-sign.cpp
@@ -0,0 +1,23 @@
+#include <string>
+using std::string;
+#include "crypto_sign.h"
+
+string crypto_sign(const string &m_string, const string &sk_string)
+{
+  if (sk_string.size() != crypto_sign_SECRETKEYBYTES) throw "incorrect secret-key length";
+  size_t mlen = m_string.size();
+  unsigned char m[mlen+crypto_sign_BYTES];
+  unsigned long long smlen;
+  for (int i = 0;i < mlen;++i) m[i] = m_string[i];
+  crypto_sign(
+      m, 
+      &smlen, 
+      m, 
+      mlen, 
+      (const unsigned char *) sk_string.c_str()
+      );
+  return string(
+      (char *) m,
+      smlen
+  );
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/checksum b/nacl/nacl-20110221/crypto_stream/aes128ctr/checksum
new file mode 100644
index 00000000..92865436
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/checksum
@@ -0,0 +1 @@
+6e9966897837aae181e93261ae88fdf0
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/afternm.s b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/afternm.s
new file mode 100644
index 00000000..c1ba79ef
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/afternm.s
@@ -0,0 +1,12308 @@
+# Author: Emilia Käsper and Peter Schwabe
+# Date: 2009-03-19
+# +2010.01.31: minor namespace modifications
+# Public domain
+
+.data
+.p2align 6
+
+RCON: .int 0x00000000, 0x00000000, 0x00000000, 0xffffffff
+ROTB: .int 0x0c000000, 0x00000000, 0x04000000, 0x08000000
+EXPB0: .int 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f
+CTRINC1: .int 0x00000001, 0x00000000, 0x00000000, 0x00000000
+CTRINC2: .int 0x00000002, 0x00000000, 0x00000000, 0x00000000
+CTRINC3: .int 0x00000003, 0x00000000, 0x00000000, 0x00000000
+CTRINC4: .int 0x00000004, 0x00000000, 0x00000000, 0x00000000
+CTRINC5: .int 0x00000005, 0x00000000, 0x00000000, 0x00000000
+CTRINC6: .int 0x00000006, 0x00000000, 0x00000000, 0x00000000
+CTRINC7: .int 0x00000007, 0x00000000, 0x00000000, 0x00000000
+RCTRINC1: .int 0x00000000, 0x00000000, 0x00000000, 0x00000001
+RCTRINC2: .int 0x00000000, 0x00000000, 0x00000000, 0x00000002
+RCTRINC3: .int 0x00000000, 0x00000000, 0x00000000, 0x00000003
+RCTRINC4: .int 0x00000000, 0x00000000, 0x00000000, 0x00000004
+RCTRINC5: .int 0x00000000, 0x00000000, 0x00000000, 0x00000005
+RCTRINC6: .int 0x00000000, 0x00000000, 0x00000000, 0x00000006
+RCTRINC7: .int 0x00000000, 0x00000000, 0x00000000, 0x00000007
+
+SWAP32: .int 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+M0SWAP: .quad 0x0105090d0004080c , 0x03070b0f02060a0e
+
+BS0: .quad 0x5555555555555555, 0x5555555555555555
+BS1: .quad 0x3333333333333333, 0x3333333333333333
+BS2: .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+ONE: .quad 0xffffffffffffffff, 0xffffffffffffffff
+M0:  .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+SRM0:	.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+
+# qhasm: int64 outp
+
+# qhasm: int64 len
+
+# qhasm: int64 np
+
+# qhasm: int64 c
+
+# qhasm: input outp
+
+# qhasm: input len
+
+# qhasm: input np
+
+# qhasm: input c
+
+# qhasm: int64 lensav
+
+# qhasm: int6464 xmm0
+
+# qhasm: int6464 xmm1
+
+# qhasm: int6464 xmm2
+
+# qhasm: int6464 xmm3
+
+# qhasm: int6464 xmm4
+
+# qhasm: int6464 xmm5
+
+# qhasm: int6464 xmm6
+
+# qhasm: int6464 xmm7
+
+# qhasm: int6464 xmm8
+
+# qhasm: int6464 xmm9
+
+# qhasm: int6464 xmm10
+
+# qhasm: int6464 xmm11
+
+# qhasm: int6464 xmm12
+
+# qhasm: int6464 xmm13
+
+# qhasm: int6464 xmm14
+
+# qhasm: int6464 xmm15
+
+# qhasm: int6464 t
+
+# qhasm: stack1024 bl
+
+# qhasm: stack128 nonce_stack
+
+# qhasm: int64 blp
+
+# qhasm: int64 b
+
+# qhasm: int64 tmp
+
+# qhasm: enter crypto_stream_aes128ctr_core2_afternm
+.text
+.p2align 5
+.globl _crypto_stream_aes128ctr_core2_afternm
+.globl crypto_stream_aes128ctr_core2_afternm
+_crypto_stream_aes128ctr_core2_afternm:
+crypto_stream_aes128ctr_core2_afternm:
+mov %rsp,%r11
+and $31,%r11
+add $160,%r11
+sub %r11,%rsp
+
+# qhasm: xmm0 = *(int128 *) (np + 0)
+# asm 1: movdqa 0(<np=int64#3),>xmm0=int6464#1
+# asm 2: movdqa 0(<np=%rdx),>xmm0=%xmm0
+movdqa 0(%rdx),%xmm0
+
+# qhasm: nonce_stack = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>nonce_stack=stack128#1
+# asm 2: movdqa <xmm0=%xmm0,>nonce_stack=0(%rsp)
+movdqa %xmm0,0(%rsp)
+
+# qhasm: np = &nonce_stack
+# asm 1: leaq <nonce_stack=stack128#1,>np=int64#3
+# asm 2: leaq <nonce_stack=0(%rsp),>np=%rdx
+leaq 0(%rsp),%rdx
+
+# qhasm: enc_block:
+._enc_block:
+
+# qhasm: xmm0 = *(int128 *) (np + 0)
+# asm 1: movdqa 0(<np=int64#3),>xmm0=int6464#1
+# asm 2: movdqa 0(<np=%rdx),>xmm0=%xmm0
+movdqa 0(%rdx),%xmm0
+
+# qhasm: xmm1 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm1=int6464#2
+# asm 2: movdqa <xmm0=%xmm0,>xmm1=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: shuffle bytes of xmm1 by SWAP32
+# asm 1: pshufb SWAP32,<xmm1=int6464#2
+# asm 2: pshufb SWAP32,<xmm1=%xmm1
+pshufb SWAP32,%xmm1
+
+# qhasm: xmm2 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm2=int6464#3
+# asm 2: movdqa <xmm1=%xmm1,>xmm2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: xmm3 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm3=int6464#4
+# asm 2: movdqa <xmm1=%xmm1,>xmm3=%xmm3
+movdqa %xmm1,%xmm3
+
+# qhasm: xmm4 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm4=int6464#5
+# asm 2: movdqa <xmm1=%xmm1,>xmm4=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm5=int6464#6
+# asm 2: movdqa <xmm1=%xmm1,>xmm5=%xmm5
+movdqa %xmm1,%xmm5
+
+# qhasm: xmm6 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm6=int6464#7
+# asm 2: movdqa <xmm1=%xmm1,>xmm6=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: xmm7 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm7=int6464#8
+# asm 2: movdqa <xmm1=%xmm1,>xmm7=%xmm7
+movdqa %xmm1,%xmm7
+
+# qhasm: int32323232 xmm1 += RCTRINC1
+# asm 1: paddd  RCTRINC1,<xmm1=int6464#2
+# asm 2: paddd  RCTRINC1,<xmm1=%xmm1
+paddd  RCTRINC1,%xmm1
+
+# qhasm: int32323232 xmm2 += RCTRINC2
+# asm 1: paddd  RCTRINC2,<xmm2=int6464#3
+# asm 2: paddd  RCTRINC2,<xmm2=%xmm2
+paddd  RCTRINC2,%xmm2
+
+# qhasm: int32323232 xmm3 += RCTRINC3
+# asm 1: paddd  RCTRINC3,<xmm3=int6464#4
+# asm 2: paddd  RCTRINC3,<xmm3=%xmm3
+paddd  RCTRINC3,%xmm3
+
+# qhasm: int32323232 xmm4 += RCTRINC4
+# asm 1: paddd  RCTRINC4,<xmm4=int6464#5
+# asm 2: paddd  RCTRINC4,<xmm4=%xmm4
+paddd  RCTRINC4,%xmm4
+
+# qhasm: int32323232 xmm5 += RCTRINC5
+# asm 1: paddd  RCTRINC5,<xmm5=int6464#6
+# asm 2: paddd  RCTRINC5,<xmm5=%xmm5
+paddd  RCTRINC5,%xmm5
+
+# qhasm: int32323232 xmm6 += RCTRINC6
+# asm 1: paddd  RCTRINC6,<xmm6=int6464#7
+# asm 2: paddd  RCTRINC6,<xmm6=%xmm6
+paddd  RCTRINC6,%xmm6
+
+# qhasm: int32323232 xmm7 += RCTRINC7
+# asm 1: paddd  RCTRINC7,<xmm7=int6464#8
+# asm 2: paddd  RCTRINC7,<xmm7=%xmm7
+paddd  RCTRINC7,%xmm7
+
+# qhasm: shuffle bytes of xmm0 by M0
+# asm 1: pshufb M0,<xmm0=int6464#1
+# asm 2: pshufb M0,<xmm0=%xmm0
+pshufb M0,%xmm0
+
+# qhasm: shuffle bytes of xmm1 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm1=int6464#2
+# asm 2: pshufb M0SWAP,<xmm1=%xmm1
+pshufb M0SWAP,%xmm1
+
+# qhasm: shuffle bytes of xmm2 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm2=int6464#3
+# asm 2: pshufb M0SWAP,<xmm2=%xmm2
+pshufb M0SWAP,%xmm2
+
+# qhasm: shuffle bytes of xmm3 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm3=int6464#4
+# asm 2: pshufb M0SWAP,<xmm3=%xmm3
+pshufb M0SWAP,%xmm3
+
+# qhasm: shuffle bytes of xmm4 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm4=int6464#5
+# asm 2: pshufb M0SWAP,<xmm4=%xmm4
+pshufb M0SWAP,%xmm4
+
+# qhasm: shuffle bytes of xmm5 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm5=int6464#6
+# asm 2: pshufb M0SWAP,<xmm5=%xmm5
+pshufb M0SWAP,%xmm5
+
+# qhasm: shuffle bytes of xmm6 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm6=int6464#7
+# asm 2: pshufb M0SWAP,<xmm6=%xmm6
+pshufb M0SWAP,%xmm6
+
+# qhasm: shuffle bytes of xmm7 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm7=int6464#8
+# asm 2: pshufb M0SWAP,<xmm7=%xmm7
+pshufb M0SWAP,%xmm7
+
+# qhasm:     xmm8 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm8=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm8=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     xmm8 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm8=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm8=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     xmm8 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm8=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm8=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm8=int6464#9
+# asm 2: pxor  <xmm1=%xmm1,<xmm8=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     xmm8 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm8=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm8=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm8 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm8=int6464#9
+# asm 2: movdqa <xmm3=%xmm3,>xmm8=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     xmm8 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm8=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm8=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 0)
+# asm 1: pxor 0(<c=int64#4),<xmm0=int6464#1
+# asm 2: pxor 0(<c=%rcx),<xmm0=%xmm0
+pxor 0(%rcx),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 16)
+# asm 1: pxor 16(<c=int64#4),<xmm1=int6464#2
+# asm 2: pxor 16(<c=%rcx),<xmm1=%xmm1
+pxor 16(%rcx),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 32)
+# asm 1: pxor 32(<c=int64#4),<xmm2=int6464#3
+# asm 2: pxor 32(<c=%rcx),<xmm2=%xmm2
+pxor 32(%rcx),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 48)
+# asm 1: pxor 48(<c=int64#4),<xmm3=int6464#4
+# asm 2: pxor 48(<c=%rcx),<xmm3=%xmm3
+pxor 48(%rcx),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 64)
+# asm 1: pxor 64(<c=int64#4),<xmm4=int6464#5
+# asm 2: pxor 64(<c=%rcx),<xmm4=%xmm4
+pxor 64(%rcx),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 80)
+# asm 1: pxor 80(<c=int64#4),<xmm5=int6464#6
+# asm 2: pxor 80(<c=%rcx),<xmm5=%xmm5
+pxor 80(%rcx),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 96)
+# asm 1: pxor 96(<c=int64#4),<xmm6=int6464#7
+# asm 2: pxor 96(<c=%rcx),<xmm6=%xmm6
+pxor 96(%rcx),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 112)
+# asm 1: pxor 112(<c=int64#4),<xmm7=int6464#8
+# asm 2: pxor 112(<c=%rcx),<xmm7=%xmm7
+pxor 112(%rcx),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 128)
+# asm 1: pxor 128(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 128(<c=%rcx),<xmm8=%xmm8
+pxor 128(%rcx),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 144)
+# asm 1: pxor 144(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 144(<c=%rcx),<xmm9=%xmm9
+pxor 144(%rcx),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 160)
+# asm 1: pxor 160(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 160(<c=%rcx),<xmm10=%xmm10
+pxor 160(%rcx),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 176)
+# asm 1: pxor 176(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 176(<c=%rcx),<xmm11=%xmm11
+pxor 176(%rcx),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 192)
+# asm 1: pxor 192(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 192(<c=%rcx),<xmm12=%xmm12
+pxor 192(%rcx),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 208)
+# asm 1: pxor 208(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 208(<c=%rcx),<xmm13=%xmm13
+pxor 208(%rcx),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 224)
+# asm 1: pxor 224(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 224(<c=%rcx),<xmm14=%xmm14
+pxor 224(%rcx),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 240)
+# asm 1: pxor 240(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 240(<c=%rcx),<xmm15=%xmm15
+pxor 240(%rcx),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 256)
+# asm 1: pxor 256(<c=int64#4),<xmm0=int6464#1
+# asm 2: pxor 256(<c=%rcx),<xmm0=%xmm0
+pxor 256(%rcx),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 272)
+# asm 1: pxor 272(<c=int64#4),<xmm1=int6464#2
+# asm 2: pxor 272(<c=%rcx),<xmm1=%xmm1
+pxor 272(%rcx),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 288)
+# asm 1: pxor 288(<c=int64#4),<xmm2=int6464#3
+# asm 2: pxor 288(<c=%rcx),<xmm2=%xmm2
+pxor 288(%rcx),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 304)
+# asm 1: pxor 304(<c=int64#4),<xmm3=int6464#4
+# asm 2: pxor 304(<c=%rcx),<xmm3=%xmm3
+pxor 304(%rcx),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 320)
+# asm 1: pxor 320(<c=int64#4),<xmm4=int6464#5
+# asm 2: pxor 320(<c=%rcx),<xmm4=%xmm4
+pxor 320(%rcx),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 336)
+# asm 1: pxor 336(<c=int64#4),<xmm5=int6464#6
+# asm 2: pxor 336(<c=%rcx),<xmm5=%xmm5
+pxor 336(%rcx),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 352)
+# asm 1: pxor 352(<c=int64#4),<xmm6=int6464#7
+# asm 2: pxor 352(<c=%rcx),<xmm6=%xmm6
+pxor 352(%rcx),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 368)
+# asm 1: pxor 368(<c=int64#4),<xmm7=int6464#8
+# asm 2: pxor 368(<c=%rcx),<xmm7=%xmm7
+pxor 368(%rcx),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 384)
+# asm 1: pxor 384(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 384(<c=%rcx),<xmm8=%xmm8
+pxor 384(%rcx),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 400)
+# asm 1: pxor 400(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 400(<c=%rcx),<xmm9=%xmm9
+pxor 400(%rcx),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 416)
+# asm 1: pxor 416(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 416(<c=%rcx),<xmm10=%xmm10
+pxor 416(%rcx),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 432)
+# asm 1: pxor 432(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 432(<c=%rcx),<xmm11=%xmm11
+pxor 432(%rcx),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 448)
+# asm 1: pxor 448(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 448(<c=%rcx),<xmm12=%xmm12
+pxor 448(%rcx),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 464)
+# asm 1: pxor 464(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 464(<c=%rcx),<xmm13=%xmm13
+pxor 464(%rcx),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 480)
+# asm 1: pxor 480(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 480(<c=%rcx),<xmm14=%xmm14
+pxor 480(%rcx),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 496)
+# asm 1: pxor 496(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 496(<c=%rcx),<xmm15=%xmm15
+pxor 496(%rcx),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 512)
+# asm 1: pxor 512(<c=int64#4),<xmm0=int6464#1
+# asm 2: pxor 512(<c=%rcx),<xmm0=%xmm0
+pxor 512(%rcx),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 528)
+# asm 1: pxor 528(<c=int64#4),<xmm1=int6464#2
+# asm 2: pxor 528(<c=%rcx),<xmm1=%xmm1
+pxor 528(%rcx),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 544)
+# asm 1: pxor 544(<c=int64#4),<xmm2=int6464#3
+# asm 2: pxor 544(<c=%rcx),<xmm2=%xmm2
+pxor 544(%rcx),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 560)
+# asm 1: pxor 560(<c=int64#4),<xmm3=int6464#4
+# asm 2: pxor 560(<c=%rcx),<xmm3=%xmm3
+pxor 560(%rcx),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 576)
+# asm 1: pxor 576(<c=int64#4),<xmm4=int6464#5
+# asm 2: pxor 576(<c=%rcx),<xmm4=%xmm4
+pxor 576(%rcx),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 592)
+# asm 1: pxor 592(<c=int64#4),<xmm5=int6464#6
+# asm 2: pxor 592(<c=%rcx),<xmm5=%xmm5
+pxor 592(%rcx),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 608)
+# asm 1: pxor 608(<c=int64#4),<xmm6=int6464#7
+# asm 2: pxor 608(<c=%rcx),<xmm6=%xmm6
+pxor 608(%rcx),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 624)
+# asm 1: pxor 624(<c=int64#4),<xmm7=int6464#8
+# asm 2: pxor 624(<c=%rcx),<xmm7=%xmm7
+pxor 624(%rcx),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 640)
+# asm 1: pxor 640(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 640(<c=%rcx),<xmm8=%xmm8
+pxor 640(%rcx),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 656)
+# asm 1: pxor 656(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 656(<c=%rcx),<xmm9=%xmm9
+pxor 656(%rcx),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 672)
+# asm 1: pxor 672(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 672(<c=%rcx),<xmm10=%xmm10
+pxor 672(%rcx),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 688)
+# asm 1: pxor 688(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 688(<c=%rcx),<xmm11=%xmm11
+pxor 688(%rcx),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 704)
+# asm 1: pxor 704(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 704(<c=%rcx),<xmm12=%xmm12
+pxor 704(%rcx),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 720)
+# asm 1: pxor 720(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 720(<c=%rcx),<xmm13=%xmm13
+pxor 720(%rcx),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 736)
+# asm 1: pxor 736(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 736(<c=%rcx),<xmm14=%xmm14
+pxor 736(%rcx),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 752)
+# asm 1: pxor 752(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 752(<c=%rcx),<xmm15=%xmm15
+pxor 752(%rcx),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 768)
+# asm 1: pxor 768(<c=int64#4),<xmm0=int6464#1
+# asm 2: pxor 768(<c=%rcx),<xmm0=%xmm0
+pxor 768(%rcx),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 784)
+# asm 1: pxor 784(<c=int64#4),<xmm1=int6464#2
+# asm 2: pxor 784(<c=%rcx),<xmm1=%xmm1
+pxor 784(%rcx),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 800)
+# asm 1: pxor 800(<c=int64#4),<xmm2=int6464#3
+# asm 2: pxor 800(<c=%rcx),<xmm2=%xmm2
+pxor 800(%rcx),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 816)
+# asm 1: pxor 816(<c=int64#4),<xmm3=int6464#4
+# asm 2: pxor 816(<c=%rcx),<xmm3=%xmm3
+pxor 816(%rcx),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 832)
+# asm 1: pxor 832(<c=int64#4),<xmm4=int6464#5
+# asm 2: pxor 832(<c=%rcx),<xmm4=%xmm4
+pxor 832(%rcx),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 848)
+# asm 1: pxor 848(<c=int64#4),<xmm5=int6464#6
+# asm 2: pxor 848(<c=%rcx),<xmm5=%xmm5
+pxor 848(%rcx),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 864)
+# asm 1: pxor 864(<c=int64#4),<xmm6=int6464#7
+# asm 2: pxor 864(<c=%rcx),<xmm6=%xmm6
+pxor 864(%rcx),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 880)
+# asm 1: pxor 880(<c=int64#4),<xmm7=int6464#8
+# asm 2: pxor 880(<c=%rcx),<xmm7=%xmm7
+pxor 880(%rcx),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 896)
+# asm 1: pxor 896(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 896(<c=%rcx),<xmm8=%xmm8
+pxor 896(%rcx),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 912)
+# asm 1: pxor 912(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 912(<c=%rcx),<xmm9=%xmm9
+pxor 912(%rcx),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 928)
+# asm 1: pxor 928(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 928(<c=%rcx),<xmm10=%xmm10
+pxor 928(%rcx),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 944)
+# asm 1: pxor 944(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 944(<c=%rcx),<xmm11=%xmm11
+pxor 944(%rcx),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 960)
+# asm 1: pxor 960(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 960(<c=%rcx),<xmm12=%xmm12
+pxor 960(%rcx),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 976)
+# asm 1: pxor 976(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 976(<c=%rcx),<xmm13=%xmm13
+pxor 976(%rcx),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 992)
+# asm 1: pxor 992(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 992(<c=%rcx),<xmm14=%xmm14
+pxor 992(%rcx),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 1008)
+# asm 1: pxor 1008(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 1008(<c=%rcx),<xmm15=%xmm15
+pxor 1008(%rcx),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 1024)
+# asm 1: pxor 1024(<c=int64#4),<xmm0=int6464#1
+# asm 2: pxor 1024(<c=%rcx),<xmm0=%xmm0
+pxor 1024(%rcx),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 1040)
+# asm 1: pxor 1040(<c=int64#4),<xmm1=int6464#2
+# asm 2: pxor 1040(<c=%rcx),<xmm1=%xmm1
+pxor 1040(%rcx),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 1056)
+# asm 1: pxor 1056(<c=int64#4),<xmm2=int6464#3
+# asm 2: pxor 1056(<c=%rcx),<xmm2=%xmm2
+pxor 1056(%rcx),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 1072)
+# asm 1: pxor 1072(<c=int64#4),<xmm3=int6464#4
+# asm 2: pxor 1072(<c=%rcx),<xmm3=%xmm3
+pxor 1072(%rcx),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 1088)
+# asm 1: pxor 1088(<c=int64#4),<xmm4=int6464#5
+# asm 2: pxor 1088(<c=%rcx),<xmm4=%xmm4
+pxor 1088(%rcx),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 1104)
+# asm 1: pxor 1104(<c=int64#4),<xmm5=int6464#6
+# asm 2: pxor 1104(<c=%rcx),<xmm5=%xmm5
+pxor 1104(%rcx),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 1120)
+# asm 1: pxor 1120(<c=int64#4),<xmm6=int6464#7
+# asm 2: pxor 1120(<c=%rcx),<xmm6=%xmm6
+pxor 1120(%rcx),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 1136)
+# asm 1: pxor 1136(<c=int64#4),<xmm7=int6464#8
+# asm 2: pxor 1136(<c=%rcx),<xmm7=%xmm7
+pxor 1136(%rcx),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 1152)
+# asm 1: pxor 1152(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 1152(<c=%rcx),<xmm8=%xmm8
+pxor 1152(%rcx),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SRM0
+# asm 1: pshufb SRM0,<xmm8=int6464#9
+# asm 2: pshufb SRM0,<xmm8=%xmm8
+pshufb SRM0,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 1168)
+# asm 1: pxor 1168(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 1168(<c=%rcx),<xmm9=%xmm9
+pxor 1168(%rcx),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SRM0
+# asm 1: pshufb SRM0,<xmm9=int6464#10
+# asm 2: pshufb SRM0,<xmm9=%xmm9
+pshufb SRM0,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 1184)
+# asm 1: pxor 1184(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 1184(<c=%rcx),<xmm10=%xmm10
+pxor 1184(%rcx),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SRM0
+# asm 1: pshufb SRM0,<xmm10=int6464#11
+# asm 2: pshufb SRM0,<xmm10=%xmm10
+pshufb SRM0,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 1200)
+# asm 1: pxor 1200(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 1200(<c=%rcx),<xmm11=%xmm11
+pxor 1200(%rcx),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SRM0
+# asm 1: pshufb SRM0,<xmm11=int6464#12
+# asm 2: pshufb SRM0,<xmm11=%xmm11
+pshufb SRM0,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 1216)
+# asm 1: pxor 1216(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 1216(<c=%rcx),<xmm12=%xmm12
+pxor 1216(%rcx),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SRM0
+# asm 1: pshufb SRM0,<xmm12=int6464#13
+# asm 2: pshufb SRM0,<xmm12=%xmm12
+pshufb SRM0,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 1232)
+# asm 1: pxor 1232(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 1232(<c=%rcx),<xmm13=%xmm13
+pxor 1232(%rcx),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SRM0
+# asm 1: pshufb SRM0,<xmm13=int6464#14
+# asm 2: pshufb SRM0,<xmm13=%xmm13
+pshufb SRM0,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 1248)
+# asm 1: pxor 1248(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 1248(<c=%rcx),<xmm14=%xmm14
+pxor 1248(%rcx),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SRM0
+# asm 1: pshufb SRM0,<xmm14=int6464#15
+# asm 2: pshufb SRM0,<xmm14=%xmm14
+pshufb SRM0,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 1264)
+# asm 1: pxor 1264(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 1264(<c=%rcx),<xmm15=%xmm15
+pxor 1264(%rcx),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SRM0
+# asm 1: pshufb SRM0,<xmm15=int6464#16
+# asm 2: pshufb SRM0,<xmm15=%xmm15
+pshufb SRM0,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:   xmm8 ^= *(int128 *)(c + 1280)
+# asm 1: pxor 1280(<c=int64#4),<xmm8=int6464#9
+# asm 2: pxor 1280(<c=%rcx),<xmm8=%xmm8
+pxor 1280(%rcx),%xmm8
+
+# qhasm:   xmm9 ^= *(int128 *)(c + 1296)
+# asm 1: pxor 1296(<c=int64#4),<xmm9=int6464#10
+# asm 2: pxor 1296(<c=%rcx),<xmm9=%xmm9
+pxor 1296(%rcx),%xmm9
+
+# qhasm:   xmm12 ^= *(int128 *)(c + 1312)
+# asm 1: pxor 1312(<c=int64#4),<xmm12=int6464#13
+# asm 2: pxor 1312(<c=%rcx),<xmm12=%xmm12
+pxor 1312(%rcx),%xmm12
+
+# qhasm:   xmm14 ^= *(int128 *)(c + 1328)
+# asm 1: pxor 1328(<c=int64#4),<xmm14=int6464#15
+# asm 2: pxor 1328(<c=%rcx),<xmm14=%xmm14
+pxor 1328(%rcx),%xmm14
+
+# qhasm:   xmm11 ^= *(int128 *)(c + 1344)
+# asm 1: pxor 1344(<c=int64#4),<xmm11=int6464#12
+# asm 2: pxor 1344(<c=%rcx),<xmm11=%xmm11
+pxor 1344(%rcx),%xmm11
+
+# qhasm:   xmm15 ^= *(int128 *)(c + 1360)
+# asm 1: pxor 1360(<c=int64#4),<xmm15=int6464#16
+# asm 2: pxor 1360(<c=%rcx),<xmm15=%xmm15
+pxor 1360(%rcx),%xmm15
+
+# qhasm:   xmm10 ^= *(int128 *)(c + 1376)
+# asm 1: pxor 1376(<c=int64#4),<xmm10=int6464#11
+# asm 2: pxor 1376(<c=%rcx),<xmm10=%xmm10
+pxor 1376(%rcx),%xmm10
+
+# qhasm:   xmm13 ^= *(int128 *)(c + 1392)
+# asm 1: pxor 1392(<c=int64#4),<xmm13=int6464#14
+# asm 2: pxor 1392(<c=%rcx),<xmm13=%xmm13
+pxor 1392(%rcx),%xmm13
+
+# qhasm:     xmm0 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm0=int6464#1
+# asm 2: movdqa <xmm10=%xmm10,>xmm0=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     xmm0 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm0=int6464#1
+# asm 2: movdqa <xmm11=%xmm11,>xmm0=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm0=int6464#1
+# asm 2: pxor  <xmm15=%xmm15,<xmm0=%xmm0
+pxor  %xmm15,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     xmm0 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm0=int6464#1
+# asm 2: movdqa <xmm12=%xmm12,>xmm0=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm0=int6464#1
+# asm 2: pxor  <xmm14=%xmm14,<xmm0=%xmm0
+pxor  %xmm14,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm0=int6464#1
+# asm 2: pxor  <xmm9=%xmm9,<xmm0=%xmm0
+pxor  %xmm9,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm0 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm0=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm0=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     xmm0 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm0=int6464#1
+# asm 2: movdqa <xmm11=%xmm11,>xmm0=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#1
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm0
+pxor  %xmm10,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#1
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm0=int6464#1
+# asm 2: pxor  <xmm14=%xmm14,<xmm0=%xmm0
+pxor  %xmm14,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm0=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm0 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm0=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm0=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     xmm0 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm0=int6464#1
+# asm 2: movdqa <xmm12=%xmm12,>xmm0=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#1
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm0
+pxor  %xmm10,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#1
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm0=int6464#1
+# asm 2: pxor  <xmm15=%xmm15,<xmm0=%xmm0
+pxor  %xmm15,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#1
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm0
+pxor  %xmm11,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm: unsigned<? =? len-128
+# asm 1: cmp  $128,<len=int64#2
+# asm 2: cmp  $128,<len=%rsi
+cmp  $128,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto partial if unsigned<
+jb ._partial
+# comment:fp stack unchanged by jump
+
+# qhasm: goto full if =
+je ._full
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#3),>tmp=int64#5d
+# asm 2: movl   12(<np=%rdx),>tmp=%r8d
+movl   12(%rdx),%r8d
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#5d
+# asm 2: bswap <tmp=%r8d
+bswap %r8d
+
+# qhasm: tmp += 8
+# asm 1: add  $8,<tmp=int64#5
+# asm 2: add  $8,<tmp=%r8
+add  $8,%r8
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#5d
+# asm 2: bswap <tmp=%r8d
+bswap %r8d
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#5d,12(<np=int64#3)
+# asm 2: movl   <tmp=%r8d,12(<np=%rdx)
+movl   %r8d,12(%rdx)
+
+# qhasm: *(int128 *) (outp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<outp=int64#1)
+# asm 2: movdqa <xmm8=%xmm8,0(<outp=%rdi)
+movdqa %xmm8,0(%rdi)
+
+# qhasm: *(int128 *) (outp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<outp=int64#1)
+# asm 2: movdqa <xmm9=%xmm9,16(<outp=%rdi)
+movdqa %xmm9,16(%rdi)
+
+# qhasm: *(int128 *) (outp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<outp=int64#1)
+# asm 2: movdqa <xmm12=%xmm12,32(<outp=%rdi)
+movdqa %xmm12,32(%rdi)
+
+# qhasm: *(int128 *) (outp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<outp=int64#1)
+# asm 2: movdqa <xmm14=%xmm14,48(<outp=%rdi)
+movdqa %xmm14,48(%rdi)
+
+# qhasm: *(int128 *) (outp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<outp=int64#1)
+# asm 2: movdqa <xmm11=%xmm11,64(<outp=%rdi)
+movdqa %xmm11,64(%rdi)
+
+# qhasm: *(int128 *) (outp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<outp=int64#1)
+# asm 2: movdqa <xmm15=%xmm15,80(<outp=%rdi)
+movdqa %xmm15,80(%rdi)
+
+# qhasm: *(int128 *) (outp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<outp=int64#1)
+# asm 2: movdqa <xmm10=%xmm10,96(<outp=%rdi)
+movdqa %xmm10,96(%rdi)
+
+# qhasm: *(int128 *) (outp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<outp=int64#1)
+# asm 2: movdqa <xmm13=%xmm13,112(<outp=%rdi)
+movdqa %xmm13,112(%rdi)
+
+# qhasm: len -= 128
+# asm 1: sub  $128,<len=int64#2
+# asm 2: sub  $128,<len=%rsi
+sub  $128,%rsi
+
+# qhasm: outp += 128
+# asm 1: add  $128,<outp=int64#1
+# asm 2: add  $128,<outp=%rdi
+add  $128,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto enc_block
+jmp ._enc_block
+
+# qhasm: partial:
+._partial:
+
+# qhasm: lensav = len
+# asm 1: mov  <len=int64#2,>lensav=int64#4
+# asm 2: mov  <len=%rsi,>lensav=%rcx
+mov  %rsi,%rcx
+
+# qhasm: (uint32) len >>= 4
+# asm 1: shr  $4,<len=int64#2d
+# asm 2: shr  $4,<len=%esi
+shr  $4,%esi
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#3),>tmp=int64#5d
+# asm 2: movl   12(<np=%rdx),>tmp=%r8d
+movl   12(%rdx),%r8d
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#5d
+# asm 2: bswap <tmp=%r8d
+bswap %r8d
+
+# qhasm: tmp += len
+# asm 1: add  <len=int64#2,<tmp=int64#5
+# asm 2: add  <len=%rsi,<tmp=%r8
+add  %rsi,%r8
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#5d
+# asm 2: bswap <tmp=%r8d
+bswap %r8d
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#5d,12(<np=int64#3)
+# asm 2: movl   <tmp=%r8d,12(<np=%rdx)
+movl   %r8d,12(%rdx)
+
+# qhasm: blp = &bl
+# asm 1: leaq <bl=stack1024#1,>blp=int64#2
+# asm 2: leaq <bl=32(%rsp),>blp=%rsi
+leaq 32(%rsp),%rsi
+
+# qhasm: *(int128 *)(blp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<blp=int64#2)
+# asm 2: movdqa <xmm8=%xmm8,0(<blp=%rsi)
+movdqa %xmm8,0(%rsi)
+
+# qhasm: *(int128 *)(blp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<blp=int64#2)
+# asm 2: movdqa <xmm9=%xmm9,16(<blp=%rsi)
+movdqa %xmm9,16(%rsi)
+
+# qhasm: *(int128 *)(blp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<blp=int64#2)
+# asm 2: movdqa <xmm12=%xmm12,32(<blp=%rsi)
+movdqa %xmm12,32(%rsi)
+
+# qhasm: *(int128 *)(blp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<blp=int64#2)
+# asm 2: movdqa <xmm14=%xmm14,48(<blp=%rsi)
+movdqa %xmm14,48(%rsi)
+
+# qhasm: *(int128 *)(blp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<blp=int64#2)
+# asm 2: movdqa <xmm11=%xmm11,64(<blp=%rsi)
+movdqa %xmm11,64(%rsi)
+
+# qhasm: *(int128 *)(blp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<blp=int64#2)
+# asm 2: movdqa <xmm15=%xmm15,80(<blp=%rsi)
+movdqa %xmm15,80(%rsi)
+
+# qhasm: *(int128 *)(blp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<blp=int64#2)
+# asm 2: movdqa <xmm10=%xmm10,96(<blp=%rsi)
+movdqa %xmm10,96(%rsi)
+
+# qhasm: *(int128 *)(blp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<blp=int64#2)
+# asm 2: movdqa <xmm13=%xmm13,112(<blp=%rsi)
+movdqa %xmm13,112(%rsi)
+
+# qhasm: bytes:
+._bytes:
+
+# qhasm: =? lensav-0
+# asm 1: cmp  $0,<lensav=int64#4
+# asm 2: cmp  $0,<lensav=%rcx
+cmp  $0,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto end if =
+je ._end
+
+# qhasm: b = *(uint8 *)(blp + 0)
+# asm 1: movzbq 0(<blp=int64#2),>b=int64#3
+# asm 2: movzbq 0(<blp=%rsi),>b=%rdx
+movzbq 0(%rsi),%rdx
+
+# qhasm: *(uint8 *)(outp + 0) = b
+# asm 1: movb   <b=int64#3b,0(<outp=int64#1)
+# asm 2: movb   <b=%dl,0(<outp=%rdi)
+movb   %dl,0(%rdi)
+
+# qhasm: blp += 1
+# asm 1: add  $1,<blp=int64#2
+# asm 2: add  $1,<blp=%rsi
+add  $1,%rsi
+
+# qhasm: outp +=1
+# asm 1: add  $1,<outp=int64#1
+# asm 2: add  $1,<outp=%rdi
+add  $1,%rdi
+
+# qhasm: lensav -= 1
+# asm 1: sub  $1,<lensav=int64#4
+# asm 2: sub  $1,<lensav=%rcx
+sub  $1,%rcx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytes
+jmp ._bytes
+
+# qhasm: full:
+._full:
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#3),>tmp=int64#4d
+# asm 2: movl   12(<np=%rdx),>tmp=%ecx
+movl   12(%rdx),%ecx
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#4d
+# asm 2: bswap <tmp=%ecx
+bswap %ecx
+
+# qhasm: tmp += len
+# asm 1: add  <len=int64#2,<tmp=int64#4
+# asm 2: add  <len=%rsi,<tmp=%rcx
+add  %rsi,%rcx
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#4d
+# asm 2: bswap <tmp=%ecx
+bswap %ecx
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#4d,12(<np=int64#3)
+# asm 2: movl   <tmp=%ecx,12(<np=%rdx)
+movl   %ecx,12(%rdx)
+
+# qhasm: *(int128 *) (outp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<outp=int64#1)
+# asm 2: movdqa <xmm8=%xmm8,0(<outp=%rdi)
+movdqa %xmm8,0(%rdi)
+
+# qhasm: *(int128 *) (outp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<outp=int64#1)
+# asm 2: movdqa <xmm9=%xmm9,16(<outp=%rdi)
+movdqa %xmm9,16(%rdi)
+
+# qhasm: *(int128 *) (outp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<outp=int64#1)
+# asm 2: movdqa <xmm12=%xmm12,32(<outp=%rdi)
+movdqa %xmm12,32(%rdi)
+
+# qhasm: *(int128 *) (outp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<outp=int64#1)
+# asm 2: movdqa <xmm14=%xmm14,48(<outp=%rdi)
+movdqa %xmm14,48(%rdi)
+
+# qhasm: *(int128 *) (outp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<outp=int64#1)
+# asm 2: movdqa <xmm11=%xmm11,64(<outp=%rdi)
+movdqa %xmm11,64(%rdi)
+
+# qhasm: *(int128 *) (outp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<outp=int64#1)
+# asm 2: movdqa <xmm15=%xmm15,80(<outp=%rdi)
+movdqa %xmm15,80(%rdi)
+
+# qhasm: *(int128 *) (outp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<outp=int64#1)
+# asm 2: movdqa <xmm10=%xmm10,96(<outp=%rdi)
+movdqa %xmm10,96(%rdi)
+
+# qhasm: *(int128 *) (outp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<outp=int64#1)
+# asm 2: movdqa <xmm13=%xmm13,112(<outp=%rdi)
+movdqa %xmm13,112(%rdi)
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: end:
+._end:
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+xor %rax,%rax
+ret
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/api.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/api.h
new file mode 100644
index 00000000..62fc8d88
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/api.h
@@ -0,0 +1,3 @@
+#define CRYPTO_KEYBYTES 16
+#define CRYPTO_NONCEBYTES 16
+#define CRYPTO_BEFORENMBYTES 1408
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/beforenm.s b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/beforenm.s
new file mode 100644
index 00000000..689ad8c3
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/beforenm.s
@@ -0,0 +1,13694 @@
+# Author: Emilia Käsper and Peter Schwabe
+# Date: 2009-03-19
+# +2010.01.31: minor namespace modifications
+# Public domain
+
+.data
+.p2align 6
+
+RCON: .int 0x00000000, 0x00000000, 0x00000000, 0xffffffff
+ROTB: .int 0x0c000000, 0x00000000, 0x04000000, 0x08000000
+EXPB0: .int 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f
+CTRINC1: .int 0x00000001, 0x00000000, 0x00000000, 0x00000000
+CTRINC2: .int 0x00000002, 0x00000000, 0x00000000, 0x00000000
+CTRINC3: .int 0x00000003, 0x00000000, 0x00000000, 0x00000000
+CTRINC4: .int 0x00000004, 0x00000000, 0x00000000, 0x00000000
+CTRINC5: .int 0x00000005, 0x00000000, 0x00000000, 0x00000000
+CTRINC6: .int 0x00000006, 0x00000000, 0x00000000, 0x00000000
+CTRINC7: .int 0x00000007, 0x00000000, 0x00000000, 0x00000000
+RCTRINC1: .int 0x00000000, 0x00000000, 0x00000000, 0x00000001
+RCTRINC2: .int 0x00000000, 0x00000000, 0x00000000, 0x00000002
+RCTRINC3: .int 0x00000000, 0x00000000, 0x00000000, 0x00000003
+RCTRINC4: .int 0x00000000, 0x00000000, 0x00000000, 0x00000004
+RCTRINC5: .int 0x00000000, 0x00000000, 0x00000000, 0x00000005
+RCTRINC6: .int 0x00000000, 0x00000000, 0x00000000, 0x00000006
+RCTRINC7: .int 0x00000000, 0x00000000, 0x00000000, 0x00000007
+
+SWAP32: .int 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+M0SWAP: .quad 0x0105090d0004080c , 0x03070b0f02060a0e
+
+BS0: .quad 0x5555555555555555, 0x5555555555555555
+BS1: .quad 0x3333333333333333, 0x3333333333333333
+BS2: .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+ONE: .quad 0xffffffffffffffff, 0xffffffffffffffff
+M0:  .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+SRM0:	.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+
+# qhasm: int64 arg1
+
+# qhasm: int64 arg2
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: int64 sboxp
+
+# qhasm: int64 c
+
+# qhasm: int64 k
+
+# qhasm: int64 x0
+
+# qhasm: int64 x1
+
+# qhasm: int64 x2
+
+# qhasm: int64 x3
+
+# qhasm: int64 e
+
+# qhasm: int64 q0
+
+# qhasm: int64 q1
+
+# qhasm: int64 q2
+
+# qhasm: int64 q3
+
+# qhasm: int6464 xmm0
+
+# qhasm: int6464 xmm1
+
+# qhasm: int6464 xmm2
+
+# qhasm: int6464 xmm3
+
+# qhasm: int6464 xmm4
+
+# qhasm: int6464 xmm5
+
+# qhasm: int6464 xmm6
+
+# qhasm: int6464 xmm7
+
+# qhasm: int6464 xmm8
+
+# qhasm: int6464 xmm9
+
+# qhasm: int6464 xmm10
+
+# qhasm: int6464 xmm11
+
+# qhasm: int6464 xmm12
+
+# qhasm: int6464 xmm13
+
+# qhasm: int6464 xmm14
+
+# qhasm: int6464 xmm15
+
+# qhasm: int6464 t
+
+# qhasm: enter crypto_stream_aes128ctr_core2_beforenm
+.text
+.p2align 5
+.globl _crypto_stream_aes128ctr_core2_beforenm
+.globl crypto_stream_aes128ctr_core2_beforenm
+_crypto_stream_aes128ctr_core2_beforenm:
+crypto_stream_aes128ctr_core2_beforenm:
+mov %rsp,%r11
+and $31,%r11
+add $0,%r11
+sub %r11,%rsp
+
+# qhasm: c = arg1
+# asm 1: mov  <arg1=int64#1,>c=int64#1
+# asm 2: mov  <arg1=%rdi,>c=%rdi
+mov  %rdi,%rdi
+
+# qhasm: k = arg2
+# asm 1: mov  <arg2=int64#2,>k=int64#2
+# asm 2: mov  <arg2=%rsi,>k=%rsi
+mov  %rsi,%rsi
+
+# qhasm:   xmm0 = *(int128 *) (k + 0)
+# asm 1: movdqa 0(<k=int64#2),>xmm0=int6464#1
+# asm 2: movdqa 0(<k=%rsi),>xmm0=%xmm0
+movdqa 0(%rsi),%xmm0
+
+# qhasm:   shuffle bytes of xmm0 by M0
+# asm 1: pshufb M0,<xmm0=int6464#1
+# asm 2: pshufb M0,<xmm0=%xmm0
+pshufb M0,%xmm0
+
+# qhasm:   xmm1 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm1=int6464#2
+# asm 2: movdqa <xmm0=%xmm0,>xmm1=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm:   xmm2 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm2=int6464#3
+# asm 2: movdqa <xmm0=%xmm0,>xmm2=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:   xmm3 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm3=int6464#4
+# asm 2: movdqa <xmm0=%xmm0,>xmm3=%xmm3
+movdqa %xmm0,%xmm3
+
+# qhasm:   xmm4 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm4=int6464#5
+# asm 2: movdqa <xmm0=%xmm0,>xmm4=%xmm4
+movdqa %xmm0,%xmm4
+
+# qhasm:   xmm5 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm5=int6464#6
+# asm 2: movdqa <xmm0=%xmm0,>xmm5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:   xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm6=int6464#7
+# asm 2: movdqa <xmm0=%xmm0,>xmm6=%xmm6
+movdqa %xmm0,%xmm6
+
+# qhasm:   xmm7 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm0=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       t = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>t=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>t=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:       uint6464 t >>= 1
+# asm 1: psrlq $1,<t=int6464#9
+# asm 2: psrlq $1,<t=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:       t ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<t=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<t=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:       t &= BS0
+# asm 1: pand  BS0,<t=int6464#9
+# asm 2: pand  BS0,<t=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:       xmm7 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <t=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:       uint6464 t <<= 1
+# asm 1: psllq $1,<t=int6464#9
+# asm 2: psllq $1,<t=%xmm8
+psllq $1,%xmm8
+
+# qhasm:       xmm6 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <t=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:       t = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>t=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>t=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:       uint6464 t >>= 1
+# asm 1: psrlq $1,<t=int6464#9
+# asm 2: psrlq $1,<t=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:       t ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<t=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<t=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:       t &= BS0
+# asm 1: pand  BS0,<t=int6464#9
+# asm 2: pand  BS0,<t=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:       xmm5 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <t=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:       uint6464 t <<= 1
+# asm 1: psllq $1,<t=int6464#9
+# asm 2: psllq $1,<t=%xmm8
+psllq $1,%xmm8
+
+# qhasm:       xmm4 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <t=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       t = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>t=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>t=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:       uint6464 t >>= 1
+# asm 1: psrlq $1,<t=int6464#9
+# asm 2: psrlq $1,<t=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:       t ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<t=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<t=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:       t &= BS0
+# asm 1: pand  BS0,<t=int6464#9
+# asm 2: pand  BS0,<t=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:       xmm3 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <t=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:       uint6464 t <<= 1
+# asm 1: psllq $1,<t=int6464#9
+# asm 2: psllq $1,<t=%xmm8
+psllq $1,%xmm8
+
+# qhasm:       xmm2 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <t=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       t = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>t=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>t=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:       uint6464 t >>= 1
+# asm 1: psrlq $1,<t=int6464#9
+# asm 2: psrlq $1,<t=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:       t ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<t=int6464#9
+# asm 2: pxor  <xmm1=%xmm1,<t=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm:       t &= BS0
+# asm 1: pand  BS0,<t=int6464#9
+# asm 2: pand  BS0,<t=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:       xmm1 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <t=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:       uint6464 t <<= 1
+# asm 1: psllq $1,<t=int6464#9
+# asm 2: psllq $1,<t=%xmm8
+psllq $1,%xmm8
+
+# qhasm:       xmm0 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <t=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:       t = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>t=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>t=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       uint6464 t >>= 2
+# asm 1: psrlq $2,<t=int6464#9
+# asm 2: psrlq $2,<t=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:       t ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<t=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<t=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:       t &= BS1
+# asm 1: pand  BS1,<t=int6464#9
+# asm 2: pand  BS1,<t=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:       xmm7 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <t=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:       uint6464 t <<= 2
+# asm 1: psllq $2,<t=int6464#9
+# asm 2: psllq $2,<t=%xmm8
+psllq $2,%xmm8
+
+# qhasm:       xmm5 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <t=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:       t = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>t=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>t=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:       uint6464 t >>= 2
+# asm 1: psrlq $2,<t=int6464#9
+# asm 2: psrlq $2,<t=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:       t ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<t=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<t=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:       t &= BS1
+# asm 1: pand  BS1,<t=int6464#9
+# asm 2: pand  BS1,<t=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:       xmm6 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <t=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:       uint6464 t <<= 2
+# asm 1: psllq $2,<t=int6464#9
+# asm 2: psllq $2,<t=%xmm8
+psllq $2,%xmm8
+
+# qhasm:       xmm4 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <t=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       t = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>t=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>t=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:       uint6464 t >>= 2
+# asm 1: psrlq $2,<t=int6464#9
+# asm 2: psrlq $2,<t=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:       t ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<t=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<t=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:       t &= BS1
+# asm 1: pand  BS1,<t=int6464#9
+# asm 2: pand  BS1,<t=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:       xmm3 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <t=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:       uint6464 t <<= 2
+# asm 1: psllq $2,<t=int6464#9
+# asm 2: psllq $2,<t=%xmm8
+psllq $2,%xmm8
+
+# qhasm:       xmm1 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <t=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:       t = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>t=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>t=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:       uint6464 t >>= 2
+# asm 1: psrlq $2,<t=int6464#9
+# asm 2: psrlq $2,<t=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:       t ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<t=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<t=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:       t &= BS1
+# asm 1: pand  BS1,<t=int6464#9
+# asm 2: pand  BS1,<t=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:       xmm2 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <t=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       uint6464 t <<= 2
+# asm 1: psllq $2,<t=int6464#9
+# asm 2: psllq $2,<t=%xmm8
+psllq $2,%xmm8
+
+# qhasm:       xmm0 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <t=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:       t = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>t=int6464#9
+# asm 2: movdqa <xmm3=%xmm3,>t=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm:       uint6464 t >>= 4
+# asm 1: psrlq $4,<t=int6464#9
+# asm 2: psrlq $4,<t=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:       t ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<t=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<t=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:       t &= BS2
+# asm 1: pand  BS2,<t=int6464#9
+# asm 2: pand  BS2,<t=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:       xmm7 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <t=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:       uint6464 t <<= 4
+# asm 1: psllq $4,<t=int6464#9
+# asm 2: psllq $4,<t=%xmm8
+psllq $4,%xmm8
+
+# qhasm:       xmm3 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <t=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:       t = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>t=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>t=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:       uint6464 t >>= 4
+# asm 1: psrlq $4,<t=int6464#9
+# asm 2: psrlq $4,<t=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:       t ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<t=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<t=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:       t &= BS2
+# asm 1: pand  BS2,<t=int6464#9
+# asm 2: pand  BS2,<t=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:       xmm6 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <t=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:       uint6464 t <<= 4
+# asm 1: psllq $4,<t=int6464#9
+# asm 2: psllq $4,<t=%xmm8
+psllq $4,%xmm8
+
+# qhasm:       xmm2 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <t=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       t = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>t=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>t=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:       uint6464 t >>= 4
+# asm 1: psrlq $4,<t=int6464#9
+# asm 2: psrlq $4,<t=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:       t ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<t=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<t=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:       t &= BS2
+# asm 1: pand  BS2,<t=int6464#9
+# asm 2: pand  BS2,<t=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:       xmm5 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <t=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:       uint6464 t <<= 4
+# asm 1: psllq $4,<t=int6464#9
+# asm 2: psllq $4,<t=%xmm8
+psllq $4,%xmm8
+
+# qhasm:       xmm1 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <t=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:       t = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>t=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>t=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:       uint6464 t >>= 4
+# asm 1: psrlq $4,<t=int6464#9
+# asm 2: psrlq $4,<t=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:       t ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<t=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<t=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       t &= BS2
+# asm 1: pand  BS2,<t=int6464#9
+# asm 2: pand  BS2,<t=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:       xmm4 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <t=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       uint6464 t <<= 4
+# asm 1: psllq $4,<t=int6464#9
+# asm 2: psllq $4,<t=%xmm8
+psllq $4,%xmm8
+
+# qhasm:       xmm0 ^= t
+# asm 1: pxor  <t=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <t=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   *(int128 *) (c + 0) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,0(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,0(<c=%rdi)
+movdqa %xmm0,0(%rdi)
+
+# qhasm:   *(int128 *) (c + 16) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,16(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,16(<c=%rdi)
+movdqa %xmm1,16(%rdi)
+
+# qhasm:   *(int128 *) (c + 32) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,32(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,32(<c=%rdi)
+movdqa %xmm2,32(%rdi)
+
+# qhasm:   *(int128 *) (c + 48) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,48(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,48(<c=%rdi)
+movdqa %xmm3,48(%rdi)
+
+# qhasm:   *(int128 *) (c + 64) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,64(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,64(<c=%rdi)
+movdqa %xmm4,64(%rdi)
+
+# qhasm:   *(int128 *) (c + 80) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,80(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,80(<c=%rdi)
+movdqa %xmm5,80(%rdi)
+
+# qhasm:   *(int128 *) (c + 96) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,96(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,96(<c=%rdi)
+movdqa %xmm6,96(%rdi)
+
+# qhasm:   *(int128 *) (c + 112) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,112(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,112(<c=%rdi)
+movdqa %xmm7,112(%rdi)
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:   xmm0 ^= RCON
+# asm 1: pxor  RCON,<xmm0=int6464#1
+# asm 2: pxor  RCON,<xmm0=%xmm0
+pxor  RCON,%xmm0
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   xmm8 = *(int128 *)(c + 0)
+# asm 1: movdqa 0(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 0(<c=%rdi),>xmm8=%xmm8
+movdqa 0(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 16)
+# asm 1: movdqa 16(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 16(<c=%rdi),>xmm9=%xmm9
+movdqa 16(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 32)
+# asm 1: movdqa 32(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 32(<c=%rdi),>xmm10=%xmm10
+movdqa 32(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 48)
+# asm 1: movdqa 48(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 48(<c=%rdi),>xmm11=%xmm11
+movdqa 48(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 64)
+# asm 1: movdqa 64(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 64(<c=%rdi),>xmm12=%xmm12
+movdqa 64(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 80)
+# asm 1: movdqa 80(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 80(<c=%rdi),>xmm13=%xmm13
+movdqa 80(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 96)
+# asm 1: movdqa 96(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 96(<c=%rdi),>xmm14=%xmm14
+movdqa 96(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 112)
+# asm 1: movdqa 112(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 112(<c=%rdi),>xmm15=%xmm15
+movdqa 112(%rdi),%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   *(int128 *)(c + 128) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,128(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,128(<c=%rdi)
+movdqa %xmm0,128(%rdi)
+
+# qhasm:   *(int128 *)(c + 144) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,144(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,144(<c=%rdi)
+movdqa %xmm1,144(%rdi)
+
+# qhasm:   *(int128 *)(c + 160) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,160(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,160(<c=%rdi)
+movdqa %xmm4,160(%rdi)
+
+# qhasm:   *(int128 *)(c + 176) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,176(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,176(<c=%rdi)
+movdqa %xmm6,176(%rdi)
+
+# qhasm:   *(int128 *)(c + 192) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,192(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,192(<c=%rdi)
+movdqa %xmm3,192(%rdi)
+
+# qhasm:   *(int128 *)(c + 208) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,208(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,208(<c=%rdi)
+movdqa %xmm7,208(%rdi)
+
+# qhasm:   *(int128 *)(c + 224) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,224(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,224(<c=%rdi)
+movdqa %xmm2,224(%rdi)
+
+# qhasm:   *(int128 *)(c + 240) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,240(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,240(<c=%rdi)
+movdqa %xmm5,240(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm7 ^= ONE
+# asm 1: pxor  ONE,<xmm7=int6464#8
+# asm 2: pxor  ONE,<xmm7=%xmm7
+pxor  ONE,%xmm7
+
+# qhasm:   xmm2 ^= ONE
+# asm 1: pxor  ONE,<xmm2=int6464#3
+# asm 2: pxor  ONE,<xmm2=%xmm2
+pxor  ONE,%xmm2
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm7=int6464#8
+# asm 2: pxor  <xmm2=%xmm2,<xmm7=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm1,<xmm4=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm2 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm4=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm5 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm5=int6464#6
+# asm 2: pxor  <xmm7=%xmm7,<xmm5=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:       xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm6=int6464#7
+# asm 2: pxor  <xmm1=%xmm1,<xmm6=%xmm6
+pxor  %xmm1,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm11 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm11=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm11=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm9=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm9=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#13
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm:       xmm11 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm11=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm11=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:       xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:       xmm9 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm9=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm9=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:       xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#12
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm11
+pxor  %xmm3,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#11
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm10
+movdqa %xmm6,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm13=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm13=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#12
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm11
+movdqa %xmm7,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#11
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm10
+movdqa %xmm4,%xmm10
+
+# qhasm:       xmm13 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm13=int6464#12
+# asm 2: movdqa <xmm3=%xmm3,>xmm13=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: movdqa <xmm5=%xmm5,>xmm15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm:       xmm12 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm12=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm12=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm14=int6464#14
+# asm 2: pand  <xmm7=%xmm7,<xmm14=%xmm13
+pand  %xmm7,%xmm13
+
+# qhasm:       xmm15 |= xmm2
+# asm 1: por   <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: por   <xmm2=%xmm2,<xmm15=%xmm15
+por   %xmm2,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:         xmm8 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm8=int6464#10
+# asm 2: movdqa <xmm7=%xmm7,>xmm8=%xmm9
+movdqa %xmm7,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm10=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm11,<xmm2=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:           xmm7 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm7=int6464#8
+# asm 2: pand  <xmm15=%xmm13,<xmm7=%xmm7
+pand  %xmm13,%xmm7
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm6=int6464#7
+# asm 2: pand  <xmm13=%xmm15,<xmm6=%xmm6
+pand  %xmm15,%xmm6
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:         xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm9,<xmm7=%xmm7
+pxor  %xmm9,%xmm7
+
+# qhasm:         xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm9,<xmm6=%xmm6
+pxor  %xmm9,%xmm6
+
+# qhasm:         xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm10=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm10=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm3=int6464#4
+# asm 2: pand  <xmm9=%xmm12,<xmm3=%xmm3
+pand  %xmm12,%xmm3
+
+# qhasm:           xmm4 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm4=int6464#5
+# asm 2: pand  <xmm13=%xmm15,<xmm4=%xmm4
+pand  %xmm15,%xmm4
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm11=int6464#11
+# asm 2: pand  <xmm5=%xmm5,<xmm11=%xmm10
+pand  %xmm5,%xmm10
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm5 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm5=int6464#6
+# asm 2: pand  <xmm14=%xmm11,<xmm5=%xmm5
+pand  %xmm11,%xmm5
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm12=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:         xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm9,<xmm4=%xmm4
+pxor  %xmm9,%xmm4
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm1=int6464#2
+# asm 2: pxor  <xmm2=%xmm2,<xmm1=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm2 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm2=int6464#3
+# asm 2: pxor  <xmm0=%xmm0,<xmm2=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm7 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pxor  <xmm4=%xmm4,<xmm7=%xmm7
+pxor  %xmm4,%xmm7
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm4=int6464#5
+# asm 2: pxor  <xmm6=%xmm6,<xmm4=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:       xmm6 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm6=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm6=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:   xmm1 ^= RCON
+# asm 1: pxor  RCON,<xmm1=int6464#2
+# asm 2: pxor  RCON,<xmm1=%xmm1
+pxor  RCON,%xmm1
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   xmm8 = *(int128 *)(c + 128)
+# asm 1: movdqa 128(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 128(<c=%rdi),>xmm8=%xmm8
+movdqa 128(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 144)
+# asm 1: movdqa 144(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 144(<c=%rdi),>xmm9=%xmm9
+movdqa 144(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 160)
+# asm 1: movdqa 160(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 160(<c=%rdi),>xmm10=%xmm10
+movdqa 160(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 176)
+# asm 1: movdqa 176(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 176(<c=%rdi),>xmm11=%xmm11
+movdqa 176(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 192)
+# asm 1: movdqa 192(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 192(<c=%rdi),>xmm12=%xmm12
+movdqa 192(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 208)
+# asm 1: movdqa 208(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 208(<c=%rdi),>xmm13=%xmm13
+movdqa 208(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 224)
+# asm 1: movdqa 224(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 224(<c=%rdi),>xmm14=%xmm14
+movdqa 224(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 240)
+# asm 1: movdqa 240(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 240(<c=%rdi),>xmm15=%xmm15
+movdqa 240(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   *(int128 *)(c + 256) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,256(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,256(<c=%rdi)
+movdqa %xmm0,256(%rdi)
+
+# qhasm:   *(int128 *)(c + 272) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,272(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,272(<c=%rdi)
+movdqa %xmm1,272(%rdi)
+
+# qhasm:   *(int128 *)(c + 288) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,288(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,288(<c=%rdi)
+movdqa %xmm3,288(%rdi)
+
+# qhasm:   *(int128 *)(c + 304) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,304(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,304(<c=%rdi)
+movdqa %xmm2,304(%rdi)
+
+# qhasm:   *(int128 *)(c + 320) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,320(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,320(<c=%rdi)
+movdqa %xmm6,320(%rdi)
+
+# qhasm:   *(int128 *)(c + 336) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,336(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,336(<c=%rdi)
+movdqa %xmm5,336(%rdi)
+
+# qhasm:   *(int128 *)(c + 352) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,352(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,352(<c=%rdi)
+movdqa %xmm4,352(%rdi)
+
+# qhasm:   *(int128 *)(c + 368) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,368(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,368(<c=%rdi)
+movdqa %xmm7,368(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm5 ^= ONE
+# asm 1: pxor  ONE,<xmm5=int6464#6
+# asm 2: pxor  ONE,<xmm5=%xmm5
+pxor  ONE,%xmm5
+
+# qhasm:   xmm4 ^= ONE
+# asm 1: pxor  ONE,<xmm4=int6464#5
+# asm 2: pxor  ONE,<xmm4=%xmm4
+pxor  ONE,%xmm4
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:       xmm5 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm5=int6464#6
+# asm 2: pxor  <xmm4=%xmm4,<xmm5=%xmm5
+pxor  %xmm4,%xmm5
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm4=int6464#5
+# asm 2: pxor  <xmm3=%xmm3,<xmm4=%xmm4
+pxor  %xmm3,%xmm4
+
+# qhasm:       xmm2 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm2=int6464#3
+# asm 2: pxor  <xmm0=%xmm0,<xmm2=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm13=int6464#12
+# asm 2: movdqa <xmm3=%xmm3,>xmm13=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#13
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm:       xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:       xmm10 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm10=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm10=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:       xmm9 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm9=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm9=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:       xmm13 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm13=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm13=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm13 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm13=int6464#12
+# asm 2: movdqa <xmm6=%xmm6,>xmm13=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm12=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm12=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm15=int6464#16
+# asm 2: por   <xmm4=%xmm4,<xmm15=%xmm15
+por   %xmm4,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:           xmm4 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm4=int6464#5
+# asm 2: pand  <xmm14=%xmm11,<xmm4=%xmm4
+pand  %xmm11,%xmm4
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm0=int6464#1
+# asm 2: pxor  <xmm2=%xmm2,<xmm0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm0 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm0=int6464#1
+# asm 2: pxor  <xmm2=%xmm2,<xmm0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:           xmm6 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm6=int6464#7
+# asm 2: pand  <xmm9=%xmm12,<xmm6=%xmm6
+pand  %xmm12,%xmm6
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm1=int6464#2
+# asm 2: pxor  <xmm4=%xmm4,<xmm1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:       xmm6 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm6=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm6=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm4=int6464#5
+# asm 2: pxor  <xmm0=%xmm0,<xmm4=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm5=int6464#6
+# asm 2: pxor  <xmm3=%xmm3,<xmm5=%xmm5
+pxor  %xmm3,%xmm5
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm3=int6464#4
+# asm 2: pxor  <xmm2=%xmm2,<xmm3=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm2=int6464#3
+# asm 2: pxor  <xmm5=%xmm5,<xmm2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:   xmm6 ^= RCON
+# asm 1: pxor  RCON,<xmm6=int6464#7
+# asm 2: pxor  RCON,<xmm6=%xmm6
+pxor  RCON,%xmm6
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   xmm8 = *(int128 *)(c + 256)
+# asm 1: movdqa 256(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 256(<c=%rdi),>xmm8=%xmm8
+movdqa 256(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 272)
+# asm 1: movdqa 272(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 272(<c=%rdi),>xmm9=%xmm9
+movdqa 272(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 288)
+# asm 1: movdqa 288(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 288(<c=%rdi),>xmm10=%xmm10
+movdqa 288(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 304)
+# asm 1: movdqa 304(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 304(<c=%rdi),>xmm11=%xmm11
+movdqa 304(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 320)
+# asm 1: movdqa 320(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 320(<c=%rdi),>xmm12=%xmm12
+movdqa 320(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 336)
+# asm 1: movdqa 336(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 336(<c=%rdi),>xmm13=%xmm13
+movdqa 336(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 352)
+# asm 1: movdqa 352(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 352(<c=%rdi),>xmm14=%xmm14
+movdqa 352(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 368)
+# asm 1: movdqa 368(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 368(<c=%rdi),>xmm15=%xmm15
+movdqa 368(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   *(int128 *)(c + 384) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,384(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,384(<c=%rdi)
+movdqa %xmm0,384(%rdi)
+
+# qhasm:   *(int128 *)(c + 400) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,400(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,400(<c=%rdi)
+movdqa %xmm1,400(%rdi)
+
+# qhasm:   *(int128 *)(c + 416) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,416(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,416(<c=%rdi)
+movdqa %xmm6,416(%rdi)
+
+# qhasm:   *(int128 *)(c + 432) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,432(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,432(<c=%rdi)
+movdqa %xmm4,432(%rdi)
+
+# qhasm:   *(int128 *)(c + 448) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,448(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,448(<c=%rdi)
+movdqa %xmm2,448(%rdi)
+
+# qhasm:   *(int128 *)(c + 464) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,464(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,464(<c=%rdi)
+movdqa %xmm7,464(%rdi)
+
+# qhasm:   *(int128 *)(c + 480) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,480(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,480(<c=%rdi)
+movdqa %xmm3,480(%rdi)
+
+# qhasm:   *(int128 *)(c + 496) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,496(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,496(<c=%rdi)
+movdqa %xmm5,496(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm7 ^= ONE
+# asm 1: pxor  ONE,<xmm7=int6464#8
+# asm 2: pxor  ONE,<xmm7=%xmm7
+pxor  ONE,%xmm7
+
+# qhasm:   xmm3 ^= ONE
+# asm 1: pxor  ONE,<xmm3=int6464#4
+# asm 2: pxor  ONE,<xmm3=%xmm3
+pxor  ONE,%xmm3
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:       xmm7 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm7=int6464#8
+# asm 2: pxor  <xmm3=%xmm3,<xmm7=%xmm7
+pxor  %xmm3,%xmm7
+
+# qhasm:       xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm6=int6464#7
+# asm 2: pxor  <xmm1=%xmm1,<xmm6=%xmm6
+pxor  %xmm1,%xmm6
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm3=int6464#4
+# asm 2: pxor  <xmm6=%xmm6,<xmm3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:       xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm4=int6464#5
+# asm 2: pxor  <xmm0=%xmm0,<xmm4=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm5 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm5=int6464#6
+# asm 2: pxor  <xmm7=%xmm7,<xmm5=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm1,<xmm4=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm11 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm11=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm11=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm9=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm9=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm13=int6464#12
+# asm 2: movdqa <xmm6=%xmm6,>xmm13=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm:       xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:       xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:       xmm9 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm9=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm9=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:       xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#11
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm10
+movdqa %xmm4,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm13=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm13=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#12
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm11
+movdqa %xmm7,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#12
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm11
+pxor  %xmm3,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#11
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm10
+movdqa %xmm6,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: movdqa <xmm5=%xmm5,>xmm15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm:       xmm12 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm12=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm12=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm14=int6464#14
+# asm 2: pand  <xmm7=%xmm7,<xmm14=%xmm13
+pand  %xmm7,%xmm13
+
+# qhasm:       xmm15 |= xmm3
+# asm 1: por   <xmm3=int6464#4,<xmm15=int6464#16
+# asm 2: por   <xmm3=%xmm3,<xmm15=%xmm15
+por   %xmm3,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#9
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm:         xmm8 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm8=int6464#10
+# asm 2: movdqa <xmm7=%xmm7,>xmm8=%xmm9
+movdqa %xmm7,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm10=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm10=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:           xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:           xmm3 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm3=int6464#4
+# asm 2: pand  <xmm14=%xmm11,<xmm3=%xmm3
+pand  %xmm11,%xmm3
+
+# qhasm:           xmm7 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm7=int6464#8
+# asm 2: pand  <xmm15=%xmm13,<xmm7=%xmm7
+pand  %xmm13,%xmm7
+
+# qhasm:           xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:           xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm0=int6464#1
+# asm 2: pxor  <xmm4=%xmm4,<xmm0=%xmm0
+pxor  %xmm4,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm4 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm4=int6464#5
+# asm 2: pand  <xmm13=%xmm15,<xmm4=%xmm4
+pand  %xmm15,%xmm4
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm0=int6464#1
+# asm 2: pxor  <xmm4=%xmm4,<xmm0=%xmm0
+pxor  %xmm4,%xmm0
+
+# qhasm:           xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:         xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm9,<xmm7=%xmm7
+pxor  %xmm9,%xmm7
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm9,<xmm4=%xmm4
+pxor  %xmm9,%xmm4
+
+# qhasm:         xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:         xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm10=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:           xmm2 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm9=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm6=int6464#7
+# asm 2: pand  <xmm13=%xmm15,<xmm6=%xmm6
+pand  %xmm15,%xmm6
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:           xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm11=int6464#11
+# asm 2: pand  <xmm5=%xmm5,<xmm11=%xmm10
+pand  %xmm5,%xmm10
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm5 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm5=int6464#6
+# asm 2: pand  <xmm14=%xmm11,<xmm5=%xmm5
+pand  %xmm11,%xmm5
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm12=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:         xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm9,<xmm6=%xmm6
+pxor  %xmm9,%xmm6
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm1 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm1=int6464#2
+# asm 2: pxor  <xmm3=%xmm3,<xmm1=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm2=int6464#3
+# asm 2: pxor  <xmm5=%xmm5,<xmm2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm7 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm6=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm6=int6464#7
+# asm 2: pxor  <xmm4=%xmm4,<xmm6=%xmm6
+pxor  %xmm4,%xmm6
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:   xmm3 ^= RCON
+# asm 1: pxor  RCON,<xmm3=int6464#4
+# asm 2: pxor  RCON,<xmm3=%xmm3
+pxor  RCON,%xmm3
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   xmm8 = *(int128 *)(c + 384)
+# asm 1: movdqa 384(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 384(<c=%rdi),>xmm8=%xmm8
+movdqa 384(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 400)
+# asm 1: movdqa 400(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 400(<c=%rdi),>xmm9=%xmm9
+movdqa 400(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 416)
+# asm 1: movdqa 416(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 416(<c=%rdi),>xmm10=%xmm10
+movdqa 416(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 432)
+# asm 1: movdqa 432(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 432(<c=%rdi),>xmm11=%xmm11
+movdqa 432(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 448)
+# asm 1: movdqa 448(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 448(<c=%rdi),>xmm12=%xmm12
+movdqa 448(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 464)
+# asm 1: movdqa 464(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 464(<c=%rdi),>xmm13=%xmm13
+movdqa 464(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 480)
+# asm 1: movdqa 480(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 480(<c=%rdi),>xmm14=%xmm14
+movdqa 480(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 496)
+# asm 1: movdqa 496(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 496(<c=%rdi),>xmm15=%xmm15
+movdqa 496(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   *(int128 *)(c + 512) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,512(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,512(<c=%rdi)
+movdqa %xmm0,512(%rdi)
+
+# qhasm:   *(int128 *)(c + 528) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,528(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,528(<c=%rdi)
+movdqa %xmm1,528(%rdi)
+
+# qhasm:   *(int128 *)(c + 544) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,544(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,544(<c=%rdi)
+movdqa %xmm2,544(%rdi)
+
+# qhasm:   *(int128 *)(c + 560) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,560(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,560(<c=%rdi)
+movdqa %xmm3,560(%rdi)
+
+# qhasm:   *(int128 *)(c + 576) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,576(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,576(<c=%rdi)
+movdqa %xmm4,576(%rdi)
+
+# qhasm:   *(int128 *)(c + 592) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,592(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,592(<c=%rdi)
+movdqa %xmm5,592(%rdi)
+
+# qhasm:   *(int128 *)(c + 608) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,608(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,608(<c=%rdi)
+movdqa %xmm6,608(%rdi)
+
+# qhasm:   *(int128 *)(c + 624) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,624(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,624(<c=%rdi)
+movdqa %xmm7,624(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm5 ^= ONE
+# asm 1: pxor  ONE,<xmm5=int6464#6
+# asm 2: pxor  ONE,<xmm5=%xmm5
+pxor  ONE,%xmm5
+
+# qhasm:   xmm6 ^= ONE
+# asm 1: pxor  ONE,<xmm6=int6464#7
+# asm 2: pxor  ONE,<xmm6=%xmm6
+pxor  ONE,%xmm6
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:   xmm3 ^= RCON
+# asm 1: pxor  RCON,<xmm3=int6464#4
+# asm 2: pxor  RCON,<xmm3=%xmm3
+pxor  RCON,%xmm3
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   xmm8 = *(int128 *)(c + 512)
+# asm 1: movdqa 512(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 512(<c=%rdi),>xmm8=%xmm8
+movdqa 512(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 528)
+# asm 1: movdqa 528(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 528(<c=%rdi),>xmm9=%xmm9
+movdqa 528(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 544)
+# asm 1: movdqa 544(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 544(<c=%rdi),>xmm10=%xmm10
+movdqa 544(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 560)
+# asm 1: movdqa 560(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 560(<c=%rdi),>xmm11=%xmm11
+movdqa 560(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 576)
+# asm 1: movdqa 576(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 576(<c=%rdi),>xmm12=%xmm12
+movdqa 576(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 592)
+# asm 1: movdqa 592(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 592(<c=%rdi),>xmm13=%xmm13
+movdqa 592(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 608)
+# asm 1: movdqa 608(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 608(<c=%rdi),>xmm14=%xmm14
+movdqa 608(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 624)
+# asm 1: movdqa 624(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 624(<c=%rdi),>xmm15=%xmm15
+movdqa 624(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   *(int128 *)(c + 640) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,640(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,640(<c=%rdi)
+movdqa %xmm0,640(%rdi)
+
+# qhasm:   *(int128 *)(c + 656) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,656(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,656(<c=%rdi)
+movdqa %xmm1,656(%rdi)
+
+# qhasm:   *(int128 *)(c + 672) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,672(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,672(<c=%rdi)
+movdqa %xmm4,672(%rdi)
+
+# qhasm:   *(int128 *)(c + 688) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,688(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,688(<c=%rdi)
+movdqa %xmm6,688(%rdi)
+
+# qhasm:   *(int128 *)(c + 704) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,704(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,704(<c=%rdi)
+movdqa %xmm3,704(%rdi)
+
+# qhasm:   *(int128 *)(c + 720) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,720(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,720(<c=%rdi)
+movdqa %xmm7,720(%rdi)
+
+# qhasm:   *(int128 *)(c + 736) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,736(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,736(<c=%rdi)
+movdqa %xmm2,736(%rdi)
+
+# qhasm:   *(int128 *)(c + 752) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,752(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,752(<c=%rdi)
+movdqa %xmm5,752(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm7 ^= ONE
+# asm 1: pxor  ONE,<xmm7=int6464#8
+# asm 2: pxor  ONE,<xmm7=%xmm7
+pxor  ONE,%xmm7
+
+# qhasm:   xmm2 ^= ONE
+# asm 1: pxor  ONE,<xmm2=int6464#3
+# asm 2: pxor  ONE,<xmm2=%xmm2
+pxor  ONE,%xmm2
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm7=int6464#8
+# asm 2: pxor  <xmm2=%xmm2,<xmm7=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm1,<xmm4=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm2 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm4=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm5 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm5=int6464#6
+# asm 2: pxor  <xmm7=%xmm7,<xmm5=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:       xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm6=int6464#7
+# asm 2: pxor  <xmm1=%xmm1,<xmm6=%xmm6
+pxor  %xmm1,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm11 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm11=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm11=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm9=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm9=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#13
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm:       xmm11 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm11=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm11=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:       xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:       xmm9 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm9=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm9=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:       xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#12
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm11
+pxor  %xmm3,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#11
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm10
+movdqa %xmm6,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm13=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm13=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#12
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm11
+movdqa %xmm7,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#11
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm10
+movdqa %xmm4,%xmm10
+
+# qhasm:       xmm13 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm13=int6464#12
+# asm 2: movdqa <xmm3=%xmm3,>xmm13=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: movdqa <xmm5=%xmm5,>xmm15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm:       xmm12 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm12=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm12=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm14=int6464#14
+# asm 2: pand  <xmm7=%xmm7,<xmm14=%xmm13
+pand  %xmm7,%xmm13
+
+# qhasm:       xmm15 |= xmm2
+# asm 1: por   <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: por   <xmm2=%xmm2,<xmm15=%xmm15
+por   %xmm2,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:         xmm8 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm8=int6464#10
+# asm 2: movdqa <xmm7=%xmm7,>xmm8=%xmm9
+movdqa %xmm7,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm10=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm11,<xmm2=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:           xmm7 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm7=int6464#8
+# asm 2: pand  <xmm15=%xmm13,<xmm7=%xmm7
+pand  %xmm13,%xmm7
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm6=int6464#7
+# asm 2: pand  <xmm13=%xmm15,<xmm6=%xmm6
+pand  %xmm15,%xmm6
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:         xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm9,<xmm7=%xmm7
+pxor  %xmm9,%xmm7
+
+# qhasm:         xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm9,<xmm6=%xmm6
+pxor  %xmm9,%xmm6
+
+# qhasm:         xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm10=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm10=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm3=int6464#4
+# asm 2: pand  <xmm9=%xmm12,<xmm3=%xmm3
+pand  %xmm12,%xmm3
+
+# qhasm:           xmm4 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm4=int6464#5
+# asm 2: pand  <xmm13=%xmm15,<xmm4=%xmm4
+pand  %xmm15,%xmm4
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm11=int6464#11
+# asm 2: pand  <xmm5=%xmm5,<xmm11=%xmm10
+pand  %xmm5,%xmm10
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm5 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm5=int6464#6
+# asm 2: pand  <xmm14=%xmm11,<xmm5=%xmm5
+pand  %xmm11,%xmm5
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm12=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:         xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm9,<xmm4=%xmm4
+pxor  %xmm9,%xmm4
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm1=int6464#2
+# asm 2: pxor  <xmm2=%xmm2,<xmm1=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm2 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm2=int6464#3
+# asm 2: pxor  <xmm0=%xmm0,<xmm2=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm7 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pxor  <xmm4=%xmm4,<xmm7=%xmm7
+pxor  %xmm4,%xmm7
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm4=int6464#5
+# asm 2: pxor  <xmm6=%xmm6,<xmm4=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:       xmm6 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm6=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm6=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:   xmm5 ^= RCON
+# asm 1: pxor  RCON,<xmm5=int6464#6
+# asm 2: pxor  RCON,<xmm5=%xmm5
+pxor  RCON,%xmm5
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   xmm8 = *(int128 *)(c + 640)
+# asm 1: movdqa 640(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 640(<c=%rdi),>xmm8=%xmm8
+movdqa 640(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 656)
+# asm 1: movdqa 656(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 656(<c=%rdi),>xmm9=%xmm9
+movdqa 656(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 672)
+# asm 1: movdqa 672(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 672(<c=%rdi),>xmm10=%xmm10
+movdqa 672(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 688)
+# asm 1: movdqa 688(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 688(<c=%rdi),>xmm11=%xmm11
+movdqa 688(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 704)
+# asm 1: movdqa 704(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 704(<c=%rdi),>xmm12=%xmm12
+movdqa 704(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 720)
+# asm 1: movdqa 720(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 720(<c=%rdi),>xmm13=%xmm13
+movdqa 720(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 736)
+# asm 1: movdqa 736(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 736(<c=%rdi),>xmm14=%xmm14
+movdqa 736(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 752)
+# asm 1: movdqa 752(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 752(<c=%rdi),>xmm15=%xmm15
+movdqa 752(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   *(int128 *)(c + 768) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,768(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,768(<c=%rdi)
+movdqa %xmm0,768(%rdi)
+
+# qhasm:   *(int128 *)(c + 784) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,784(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,784(<c=%rdi)
+movdqa %xmm1,784(%rdi)
+
+# qhasm:   *(int128 *)(c + 800) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,800(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,800(<c=%rdi)
+movdqa %xmm3,800(%rdi)
+
+# qhasm:   *(int128 *)(c + 816) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,816(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,816(<c=%rdi)
+movdqa %xmm2,816(%rdi)
+
+# qhasm:   *(int128 *)(c + 832) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,832(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,832(<c=%rdi)
+movdqa %xmm6,832(%rdi)
+
+# qhasm:   *(int128 *)(c + 848) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,848(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,848(<c=%rdi)
+movdqa %xmm5,848(%rdi)
+
+# qhasm:   *(int128 *)(c + 864) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,864(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,864(<c=%rdi)
+movdqa %xmm4,864(%rdi)
+
+# qhasm:   *(int128 *)(c + 880) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,880(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,880(<c=%rdi)
+movdqa %xmm7,880(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm5 ^= ONE
+# asm 1: pxor  ONE,<xmm5=int6464#6
+# asm 2: pxor  ONE,<xmm5=%xmm5
+pxor  ONE,%xmm5
+
+# qhasm:   xmm4 ^= ONE
+# asm 1: pxor  ONE,<xmm4=int6464#5
+# asm 2: pxor  ONE,<xmm4=%xmm4
+pxor  ONE,%xmm4
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:       xmm5 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm5=int6464#6
+# asm 2: pxor  <xmm4=%xmm4,<xmm5=%xmm5
+pxor  %xmm4,%xmm5
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm4=int6464#5
+# asm 2: pxor  <xmm3=%xmm3,<xmm4=%xmm4
+pxor  %xmm3,%xmm4
+
+# qhasm:       xmm2 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm2=int6464#3
+# asm 2: pxor  <xmm0=%xmm0,<xmm2=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm13=int6464#12
+# asm 2: movdqa <xmm3=%xmm3,>xmm13=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#13
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm12
+movdqa %xmm4,%xmm12
+
+# qhasm:       xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:       xmm10 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm10=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm10=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:       xmm9 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm9=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm9=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:       xmm13 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm13=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm13=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm13 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm13=int6464#12
+# asm 2: movdqa <xmm6=%xmm6,>xmm13=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm12=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm12=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm15=int6464#16
+# asm 2: por   <xmm4=%xmm4,<xmm15=%xmm15
+por   %xmm4,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:           xmm4 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm4=int6464#5
+# asm 2: pand  <xmm14=%xmm11,<xmm4=%xmm4
+pand  %xmm11,%xmm4
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm0=int6464#1
+# asm 2: pxor  <xmm2=%xmm2,<xmm0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm0 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm0=int6464#1
+# asm 2: pxor  <xmm2=%xmm2,<xmm0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:           xmm6 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm6=int6464#7
+# asm 2: pand  <xmm9=%xmm12,<xmm6=%xmm6
+pand  %xmm12,%xmm6
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm1=int6464#2
+# asm 2: pxor  <xmm4=%xmm4,<xmm1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:       xmm6 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm6=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm6=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm4=int6464#5
+# asm 2: pxor  <xmm0=%xmm0,<xmm4=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm5=int6464#6
+# asm 2: pxor  <xmm3=%xmm3,<xmm5=%xmm5
+pxor  %xmm3,%xmm5
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm3=int6464#4
+# asm 2: pxor  <xmm2=%xmm2,<xmm3=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm2=int6464#3
+# asm 2: pxor  <xmm5=%xmm5,<xmm2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:   xmm3 ^= RCON
+# asm 1: pxor  RCON,<xmm3=int6464#4
+# asm 2: pxor  RCON,<xmm3=%xmm3
+pxor  RCON,%xmm3
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   xmm8 = *(int128 *)(c + 768)
+# asm 1: movdqa 768(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 768(<c=%rdi),>xmm8=%xmm8
+movdqa 768(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 784)
+# asm 1: movdqa 784(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 784(<c=%rdi),>xmm9=%xmm9
+movdqa 784(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 800)
+# asm 1: movdqa 800(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 800(<c=%rdi),>xmm10=%xmm10
+movdqa 800(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 816)
+# asm 1: movdqa 816(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 816(<c=%rdi),>xmm11=%xmm11
+movdqa 816(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 832)
+# asm 1: movdqa 832(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 832(<c=%rdi),>xmm12=%xmm12
+movdqa 832(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 848)
+# asm 1: movdqa 848(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 848(<c=%rdi),>xmm13=%xmm13
+movdqa 848(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 864)
+# asm 1: movdqa 864(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 864(<c=%rdi),>xmm14=%xmm14
+movdqa 864(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 880)
+# asm 1: movdqa 880(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 880(<c=%rdi),>xmm15=%xmm15
+movdqa 880(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:   xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:   xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   *(int128 *)(c + 896) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,896(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,896(<c=%rdi)
+movdqa %xmm0,896(%rdi)
+
+# qhasm:   *(int128 *)(c + 912) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,912(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,912(<c=%rdi)
+movdqa %xmm1,912(%rdi)
+
+# qhasm:   *(int128 *)(c + 928) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,928(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,928(<c=%rdi)
+movdqa %xmm6,928(%rdi)
+
+# qhasm:   *(int128 *)(c + 944) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,944(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,944(<c=%rdi)
+movdqa %xmm4,944(%rdi)
+
+# qhasm:   *(int128 *)(c + 960) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,960(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,960(<c=%rdi)
+movdqa %xmm2,960(%rdi)
+
+# qhasm:   *(int128 *)(c + 976) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,976(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,976(<c=%rdi)
+movdqa %xmm7,976(%rdi)
+
+# qhasm:   *(int128 *)(c + 992) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,992(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,992(<c=%rdi)
+movdqa %xmm3,992(%rdi)
+
+# qhasm:   *(int128 *)(c + 1008) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,1008(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,1008(<c=%rdi)
+movdqa %xmm5,1008(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm7 ^= ONE
+# asm 1: pxor  ONE,<xmm7=int6464#8
+# asm 2: pxor  ONE,<xmm7=%xmm7
+pxor  ONE,%xmm7
+
+# qhasm:   xmm3 ^= ONE
+# asm 1: pxor  ONE,<xmm3=int6464#4
+# asm 2: pxor  ONE,<xmm3=%xmm3
+pxor  ONE,%xmm3
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:       xmm7 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm7=int6464#8
+# asm 2: pxor  <xmm3=%xmm3,<xmm7=%xmm7
+pxor  %xmm3,%xmm7
+
+# qhasm:       xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm6=int6464#7
+# asm 2: pxor  <xmm1=%xmm1,<xmm6=%xmm6
+pxor  %xmm1,%xmm6
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm3=int6464#4
+# asm 2: pxor  <xmm6=%xmm6,<xmm3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:       xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm4=int6464#5
+# asm 2: pxor  <xmm0=%xmm0,<xmm4=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm5 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm5=int6464#6
+# asm 2: pxor  <xmm7=%xmm7,<xmm5=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm1,<xmm4=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm11 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm11=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm11=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm9=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm9=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm13=int6464#12
+# asm 2: movdqa <xmm6=%xmm6,>xmm13=%xmm11
+movdqa %xmm6,%xmm11
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm12
+movdqa %xmm3,%xmm12
+
+# qhasm:       xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:       xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:       xmm9 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm9=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm9=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:       xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#11
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm10
+movdqa %xmm4,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm13=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm13=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#12
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm11
+movdqa %xmm7,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#12
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm11
+pxor  %xmm3,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#11
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm10
+movdqa %xmm6,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: movdqa <xmm5=%xmm5,>xmm15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm:       xmm12 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm12=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm12=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm14=int6464#14
+# asm 2: pand  <xmm7=%xmm7,<xmm14=%xmm13
+pand  %xmm7,%xmm13
+
+# qhasm:       xmm15 |= xmm3
+# asm 1: por   <xmm3=int6464#4,<xmm15=int6464#16
+# asm 2: por   <xmm3=%xmm3,<xmm15=%xmm15
+por   %xmm3,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#9
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm:         xmm8 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm8=int6464#10
+# asm 2: movdqa <xmm7=%xmm7,>xmm8=%xmm9
+movdqa %xmm7,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm10=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm10=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:           xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:           xmm3 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm3=int6464#4
+# asm 2: pand  <xmm14=%xmm11,<xmm3=%xmm3
+pand  %xmm11,%xmm3
+
+# qhasm:           xmm7 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm7=int6464#8
+# asm 2: pand  <xmm15=%xmm13,<xmm7=%xmm7
+pand  %xmm13,%xmm7
+
+# qhasm:           xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:           xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm0=int6464#1
+# asm 2: pxor  <xmm4=%xmm4,<xmm0=%xmm0
+pxor  %xmm4,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm4 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm4=int6464#5
+# asm 2: pand  <xmm13=%xmm15,<xmm4=%xmm4
+pand  %xmm15,%xmm4
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm0=int6464#1
+# asm 2: pxor  <xmm4=%xmm4,<xmm0=%xmm0
+pxor  %xmm4,%xmm0
+
+# qhasm:           xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:         xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm9,<xmm7=%xmm7
+pxor  %xmm9,%xmm7
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm9,<xmm4=%xmm4
+pxor  %xmm9,%xmm4
+
+# qhasm:         xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:         xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm10=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:           xmm2 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm9=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm6=int6464#7
+# asm 2: pand  <xmm13=%xmm15,<xmm6=%xmm6
+pand  %xmm15,%xmm6
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:           xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm11=int6464#11
+# asm 2: pand  <xmm5=%xmm5,<xmm11=%xmm10
+pand  %xmm5,%xmm10
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm5 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm5=int6464#6
+# asm 2: pand  <xmm14=%xmm11,<xmm5=%xmm5
+pand  %xmm11,%xmm5
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm12=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:         xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm9,<xmm6=%xmm6
+pxor  %xmm9,%xmm6
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm1 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm1=int6464#2
+# asm 2: pxor  <xmm3=%xmm3,<xmm1=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm2=int6464#3
+# asm 2: pxor  <xmm5=%xmm5,<xmm2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm7 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm6=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm6=int6464#7
+# asm 2: pxor  <xmm4=%xmm4,<xmm6=%xmm6
+pxor  %xmm4,%xmm6
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:   xmm7 ^= RCON
+# asm 1: pxor  RCON,<xmm7=int6464#8
+# asm 2: pxor  RCON,<xmm7=%xmm7
+pxor  RCON,%xmm7
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   xmm8 = *(int128 *)(c + 896)
+# asm 1: movdqa 896(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 896(<c=%rdi),>xmm8=%xmm8
+movdqa 896(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 912)
+# asm 1: movdqa 912(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 912(<c=%rdi),>xmm9=%xmm9
+movdqa 912(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 928)
+# asm 1: movdqa 928(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 928(<c=%rdi),>xmm10=%xmm10
+movdqa 928(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 944)
+# asm 1: movdqa 944(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 944(<c=%rdi),>xmm11=%xmm11
+movdqa 944(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 960)
+# asm 1: movdqa 960(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 960(<c=%rdi),>xmm12=%xmm12
+movdqa 960(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 976)
+# asm 1: movdqa 976(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 976(<c=%rdi),>xmm13=%xmm13
+movdqa 976(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 992)
+# asm 1: movdqa 992(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 992(<c=%rdi),>xmm14=%xmm14
+movdqa 992(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 1008)
+# asm 1: movdqa 1008(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 1008(<c=%rdi),>xmm15=%xmm15
+movdqa 1008(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:   xmm3 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm3=int6464#4
+# asm 2: pxor  <xmm11=%xmm11,<xmm3=%xmm3
+pxor  %xmm11,%xmm3
+
+# qhasm:   xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm6 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm6=int6464#7
+# asm 2: pxor  <xmm14=%xmm14,<xmm6=%xmm6
+pxor  %xmm14,%xmm6
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   *(int128 *)(c + 1024) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,1024(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,1024(<c=%rdi)
+movdqa %xmm0,1024(%rdi)
+
+# qhasm:   *(int128 *)(c + 1040) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,1040(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,1040(<c=%rdi)
+movdqa %xmm1,1040(%rdi)
+
+# qhasm:   *(int128 *)(c + 1056) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,1056(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,1056(<c=%rdi)
+movdqa %xmm2,1056(%rdi)
+
+# qhasm:   *(int128 *)(c + 1072) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,1072(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,1072(<c=%rdi)
+movdqa %xmm3,1072(%rdi)
+
+# qhasm:   *(int128 *)(c + 1088) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,1088(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,1088(<c=%rdi)
+movdqa %xmm4,1088(%rdi)
+
+# qhasm:   *(int128 *)(c + 1104) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,1104(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,1104(<c=%rdi)
+movdqa %xmm5,1104(%rdi)
+
+# qhasm:   *(int128 *)(c + 1120) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,1120(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,1120(<c=%rdi)
+movdqa %xmm6,1120(%rdi)
+
+# qhasm:   *(int128 *)(c + 1136) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,1136(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,1136(<c=%rdi)
+movdqa %xmm7,1136(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm5 ^= ONE
+# asm 1: pxor  ONE,<xmm5=int6464#6
+# asm 2: pxor  ONE,<xmm5=%xmm5
+pxor  ONE,%xmm5
+
+# qhasm:   xmm6 ^= ONE
+# asm 1: pxor  ONE,<xmm6=int6464#7
+# asm 2: pxor  ONE,<xmm6=%xmm6
+pxor  ONE,%xmm6
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:   xmm0 ^= RCON
+# asm 1: pxor  RCON,<xmm0=int6464#1
+# asm 2: pxor  RCON,<xmm0=%xmm0
+pxor  RCON,%xmm0
+
+# qhasm:   xmm1 ^= RCON
+# asm 1: pxor  RCON,<xmm1=int6464#2
+# asm 2: pxor  RCON,<xmm1=%xmm1
+pxor  RCON,%xmm1
+
+# qhasm:   xmm6 ^= RCON
+# asm 1: pxor  RCON,<xmm6=int6464#7
+# asm 2: pxor  RCON,<xmm6=%xmm6
+pxor  RCON,%xmm6
+
+# qhasm:   xmm3 ^= RCON
+# asm 1: pxor  RCON,<xmm3=int6464#4
+# asm 2: pxor  RCON,<xmm3=%xmm3
+pxor  RCON,%xmm3
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   xmm8 = *(int128 *)(c + 1024)
+# asm 1: movdqa 1024(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 1024(<c=%rdi),>xmm8=%xmm8
+movdqa 1024(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 1040)
+# asm 1: movdqa 1040(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 1040(<c=%rdi),>xmm9=%xmm9
+movdqa 1040(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 1056)
+# asm 1: movdqa 1056(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 1056(<c=%rdi),>xmm10=%xmm10
+movdqa 1056(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 1072)
+# asm 1: movdqa 1072(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 1072(<c=%rdi),>xmm11=%xmm11
+movdqa 1072(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 1088)
+# asm 1: movdqa 1088(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 1088(<c=%rdi),>xmm12=%xmm12
+movdqa 1088(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 1104)
+# asm 1: movdqa 1104(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 1104(<c=%rdi),>xmm13=%xmm13
+movdqa 1104(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 1120)
+# asm 1: movdqa 1120(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 1120(<c=%rdi),>xmm14=%xmm14
+movdqa 1120(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 1136)
+# asm 1: movdqa 1136(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 1136(<c=%rdi),>xmm15=%xmm15
+movdqa 1136(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:   xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:   xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:   xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:   xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:   xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:   *(int128 *)(c + 1152) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,1152(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,1152(<c=%rdi)
+movdqa %xmm0,1152(%rdi)
+
+# qhasm:   *(int128 *)(c + 1168) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,1168(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,1168(<c=%rdi)
+movdqa %xmm1,1168(%rdi)
+
+# qhasm:   *(int128 *)(c + 1184) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,1184(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,1184(<c=%rdi)
+movdqa %xmm4,1184(%rdi)
+
+# qhasm:   *(int128 *)(c + 1200) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,1200(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,1200(<c=%rdi)
+movdqa %xmm6,1200(%rdi)
+
+# qhasm:   *(int128 *)(c + 1216) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,1216(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,1216(<c=%rdi)
+movdqa %xmm3,1216(%rdi)
+
+# qhasm:   *(int128 *)(c + 1232) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,1232(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,1232(<c=%rdi)
+movdqa %xmm7,1232(%rdi)
+
+# qhasm:   *(int128 *)(c + 1248) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,1248(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,1248(<c=%rdi)
+movdqa %xmm2,1248(%rdi)
+
+# qhasm:   *(int128 *)(c + 1264) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,1264(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,1264(<c=%rdi)
+movdqa %xmm5,1264(%rdi)
+
+# qhasm:   xmm0 ^= ONE
+# asm 1: pxor  ONE,<xmm0=int6464#1
+# asm 2: pxor  ONE,<xmm0=%xmm0
+pxor  ONE,%xmm0
+
+# qhasm:   xmm1 ^= ONE
+# asm 1: pxor  ONE,<xmm1=int6464#2
+# asm 2: pxor  ONE,<xmm1=%xmm1
+pxor  ONE,%xmm1
+
+# qhasm:   xmm7 ^= ONE
+# asm 1: pxor  ONE,<xmm7=int6464#8
+# asm 2: pxor  ONE,<xmm7=%xmm7
+pxor  ONE,%xmm7
+
+# qhasm:   xmm2 ^= ONE
+# asm 1: pxor  ONE,<xmm2=int6464#3
+# asm 2: pxor  ONE,<xmm2=%xmm2
+pxor  ONE,%xmm2
+
+# qhasm:     shuffle bytes of xmm0 by ROTB
+# asm 1: pshufb ROTB,<xmm0=int6464#1
+# asm 2: pshufb ROTB,<xmm0=%xmm0
+pshufb ROTB,%xmm0
+
+# qhasm:     shuffle bytes of xmm1 by ROTB
+# asm 1: pshufb ROTB,<xmm1=int6464#2
+# asm 2: pshufb ROTB,<xmm1=%xmm1
+pshufb ROTB,%xmm1
+
+# qhasm:     shuffle bytes of xmm4 by ROTB
+# asm 1: pshufb ROTB,<xmm4=int6464#5
+# asm 2: pshufb ROTB,<xmm4=%xmm4
+pshufb ROTB,%xmm4
+
+# qhasm:     shuffle bytes of xmm6 by ROTB
+# asm 1: pshufb ROTB,<xmm6=int6464#7
+# asm 2: pshufb ROTB,<xmm6=%xmm6
+pshufb ROTB,%xmm6
+
+# qhasm:     shuffle bytes of xmm3 by ROTB
+# asm 1: pshufb ROTB,<xmm3=int6464#4
+# asm 2: pshufb ROTB,<xmm3=%xmm3
+pshufb ROTB,%xmm3
+
+# qhasm:     shuffle bytes of xmm7 by ROTB
+# asm 1: pshufb ROTB,<xmm7=int6464#8
+# asm 2: pshufb ROTB,<xmm7=%xmm7
+pshufb ROTB,%xmm7
+
+# qhasm:     shuffle bytes of xmm2 by ROTB
+# asm 1: pshufb ROTB,<xmm2=int6464#3
+# asm 2: pshufb ROTB,<xmm2=%xmm2
+pshufb ROTB,%xmm2
+
+# qhasm:     shuffle bytes of xmm5 by ROTB
+# asm 1: pshufb ROTB,<xmm5=int6464#6
+# asm 2: pshufb ROTB,<xmm5=%xmm5
+pshufb ROTB,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm7=int6464#8
+# asm 2: pxor  <xmm2=%xmm2,<xmm7=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm1,<xmm4=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm2 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm4=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:       xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm5 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm5=int6464#6
+# asm 2: pxor  <xmm7=%xmm7,<xmm5=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:       xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm6=int6464#7
+# asm 2: pxor  <xmm1=%xmm1,<xmm6=%xmm6
+pxor  %xmm1,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm11 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm11=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm11=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm9=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm9=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#13
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm:       xmm11 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm11=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm11=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:       xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:       xmm9 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm9=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm9=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:       xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#12
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm11
+pxor  %xmm3,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#11
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm10
+movdqa %xmm6,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm13=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm13=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#12
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm11
+movdqa %xmm7,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm12=int6464#11
+# asm 2: movdqa <xmm4=%xmm4,>xmm12=%xmm10
+movdqa %xmm4,%xmm10
+
+# qhasm:       xmm13 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm13=int6464#12
+# asm 2: movdqa <xmm3=%xmm3,>xmm13=%xmm11
+movdqa %xmm3,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: movdqa <xmm5=%xmm5,>xmm15=%xmm15
+movdqa %xmm5,%xmm15
+
+# qhasm:       xmm12 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm12=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm12=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm14=int6464#14
+# asm 2: pand  <xmm7=%xmm7,<xmm14=%xmm13
+pand  %xmm7,%xmm13
+
+# qhasm:       xmm15 |= xmm2
+# asm 1: por   <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: por   <xmm2=%xmm2,<xmm15=%xmm15
+por   %xmm2,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:         xmm8 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm8=int6464#10
+# asm 2: movdqa <xmm7=%xmm7,>xmm8=%xmm9
+movdqa %xmm7,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm2
+# asm 1: pand  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pand  <xmm2=%xmm2,<xmm10=%xmm10
+pand  %xmm2,%xmm10
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm11,<xmm2=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:           xmm7 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm7=int6464#8
+# asm 2: pand  <xmm15=%xmm13,<xmm7=%xmm7
+pand  %xmm13,%xmm7
+
+# qhasm:           xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:           xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#10
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm9
+pxor  %xmm6,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm6=int6464#7
+# asm 2: pand  <xmm13=%xmm15,<xmm6=%xmm6
+pand  %xmm15,%xmm6
+
+# qhasm:           xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm0=int6464#1
+# asm 2: pxor  <xmm6=%xmm6,<xmm0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:           xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:         xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm9,<xmm7=%xmm7
+pxor  %xmm9,%xmm7
+
+# qhasm:         xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm9,<xmm6=%xmm6
+pxor  %xmm9,%xmm6
+
+# qhasm:         xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#10
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm9
+pxor  %xmm4,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm10=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm10=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm3=int6464#4
+# asm 2: pand  <xmm9=%xmm12,<xmm3=%xmm3
+pand  %xmm12,%xmm3
+
+# qhasm:           xmm4 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm4=int6464#5
+# asm 2: pand  <xmm13=%xmm15,<xmm4=%xmm4
+pand  %xmm15,%xmm4
+
+# qhasm:           xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm11=int6464#11
+# asm 2: pand  <xmm5=%xmm5,<xmm11=%xmm10
+pand  %xmm5,%xmm10
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm5 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm5=int6464#6
+# asm 2: pand  <xmm14=%xmm11,<xmm5=%xmm5
+pand  %xmm11,%xmm5
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm5 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm5=int6464#6
+# asm 2: pxor  <xmm1=%xmm1,<xmm5=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm12=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:         xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm9,<xmm4=%xmm4
+pxor  %xmm9,%xmm4
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm1=int6464#2
+# asm 2: pxor  <xmm2=%xmm2,<xmm1=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm2 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm2=int6464#3
+# asm 2: pxor  <xmm0=%xmm0,<xmm2=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#2
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm1
+pxor  %xmm7,%xmm1
+
+# qhasm:       xmm7 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pxor  <xmm4=%xmm4,<xmm7=%xmm7
+pxor  %xmm4,%xmm7
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm4 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm4=int6464#5
+# asm 2: pxor  <xmm6=%xmm6,<xmm4=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:       xmm6 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm6=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm6=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm6,<xmm2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm:   xmm1 ^= RCON
+# asm 1: pxor  RCON,<xmm1=int6464#2
+# asm 2: pxor  RCON,<xmm1=%xmm1
+pxor  RCON,%xmm1
+
+# qhasm:   xmm3 ^= RCON
+# asm 1: pxor  RCON,<xmm3=int6464#4
+# asm 2: pxor  RCON,<xmm3=%xmm3
+pxor  RCON,%xmm3
+
+# qhasm:   xmm6 ^= RCON
+# asm 1: pxor  RCON,<xmm6=int6464#7
+# asm 2: pxor  RCON,<xmm6=%xmm6
+pxor  RCON,%xmm6
+
+# qhasm:   xmm5 ^= RCON
+# asm 1: pxor  RCON,<xmm5=int6464#6
+# asm 2: pxor  RCON,<xmm5=%xmm5
+pxor  RCON,%xmm5
+
+# qhasm:   shuffle bytes of xmm0 by EXPB0
+# asm 1: pshufb EXPB0,<xmm0=int6464#1
+# asm 2: pshufb EXPB0,<xmm0=%xmm0
+pshufb EXPB0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by EXPB0
+# asm 1: pshufb EXPB0,<xmm1=int6464#2
+# asm 2: pshufb EXPB0,<xmm1=%xmm1
+pshufb EXPB0,%xmm1
+
+# qhasm:   shuffle bytes of xmm3 by EXPB0
+# asm 1: pshufb EXPB0,<xmm3=int6464#4
+# asm 2: pshufb EXPB0,<xmm3=%xmm3
+pshufb EXPB0,%xmm3
+
+# qhasm:   shuffle bytes of xmm2 by EXPB0
+# asm 1: pshufb EXPB0,<xmm2=int6464#3
+# asm 2: pshufb EXPB0,<xmm2=%xmm2
+pshufb EXPB0,%xmm2
+
+# qhasm:   shuffle bytes of xmm6 by EXPB0
+# asm 1: pshufb EXPB0,<xmm6=int6464#7
+# asm 2: pshufb EXPB0,<xmm6=%xmm6
+pshufb EXPB0,%xmm6
+
+# qhasm:   shuffle bytes of xmm5 by EXPB0
+# asm 1: pshufb EXPB0,<xmm5=int6464#6
+# asm 2: pshufb EXPB0,<xmm5=%xmm5
+pshufb EXPB0,%xmm5
+
+# qhasm:   shuffle bytes of xmm4 by EXPB0
+# asm 1: pshufb EXPB0,<xmm4=int6464#5
+# asm 2: pshufb EXPB0,<xmm4=%xmm4
+pshufb EXPB0,%xmm4
+
+# qhasm:   shuffle bytes of xmm7 by EXPB0
+# asm 1: pshufb EXPB0,<xmm7=int6464#8
+# asm 2: pshufb EXPB0,<xmm7=%xmm7
+pshufb EXPB0,%xmm7
+
+# qhasm:   xmm8 = *(int128 *)(c + 1152)
+# asm 1: movdqa 1152(<c=int64#1),>xmm8=int6464#9
+# asm 2: movdqa 1152(<c=%rdi),>xmm8=%xmm8
+movdqa 1152(%rdi),%xmm8
+
+# qhasm:   xmm9 = *(int128 *)(c + 1168)
+# asm 1: movdqa 1168(<c=int64#1),>xmm9=int6464#10
+# asm 2: movdqa 1168(<c=%rdi),>xmm9=%xmm9
+movdqa 1168(%rdi),%xmm9
+
+# qhasm:   xmm10 = *(int128 *)(c + 1184)
+# asm 1: movdqa 1184(<c=int64#1),>xmm10=int6464#11
+# asm 2: movdqa 1184(<c=%rdi),>xmm10=%xmm10
+movdqa 1184(%rdi),%xmm10
+
+# qhasm:   xmm11 = *(int128 *)(c + 1200)
+# asm 1: movdqa 1200(<c=int64#1),>xmm11=int6464#12
+# asm 2: movdqa 1200(<c=%rdi),>xmm11=%xmm11
+movdqa 1200(%rdi),%xmm11
+
+# qhasm:   xmm12 = *(int128 *)(c + 1216)
+# asm 1: movdqa 1216(<c=int64#1),>xmm12=int6464#13
+# asm 2: movdqa 1216(<c=%rdi),>xmm12=%xmm12
+movdqa 1216(%rdi),%xmm12
+
+# qhasm:   xmm13 = *(int128 *)(c + 1232)
+# asm 1: movdqa 1232(<c=int64#1),>xmm13=int6464#14
+# asm 2: movdqa 1232(<c=%rdi),>xmm13=%xmm13
+movdqa 1232(%rdi),%xmm13
+
+# qhasm:   xmm14 = *(int128 *)(c + 1248)
+# asm 1: movdqa 1248(<c=int64#1),>xmm14=int6464#15
+# asm 2: movdqa 1248(<c=%rdi),>xmm14=%xmm14
+movdqa 1248(%rdi),%xmm14
+
+# qhasm:   xmm15 = *(int128 *)(c + 1264)
+# asm 1: movdqa 1264(<c=int64#1),>xmm15=int6464#16
+# asm 2: movdqa 1264(<c=%rdi),>xmm15=%xmm15
+movdqa 1264(%rdi),%xmm15
+
+# qhasm:   xmm8 ^= ONE
+# asm 1: pxor  ONE,<xmm8=int6464#9
+# asm 2: pxor  ONE,<xmm8=%xmm8
+pxor  ONE,%xmm8
+
+# qhasm:   xmm9 ^= ONE
+# asm 1: pxor  ONE,<xmm9=int6464#10
+# asm 2: pxor  ONE,<xmm9=%xmm9
+pxor  ONE,%xmm9
+
+# qhasm:   xmm13 ^= ONE
+# asm 1: pxor  ONE,<xmm13=int6464#14
+# asm 2: pxor  ONE,<xmm13=%xmm13
+pxor  ONE,%xmm13
+
+# qhasm:   xmm14 ^= ONE
+# asm 1: pxor  ONE,<xmm14=int6464#15
+# asm 2: pxor  ONE,<xmm14=%xmm14
+pxor  ONE,%xmm14
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   uint32323232 xmm8 >>= 8
+# asm 1: psrld $8,<xmm8=int6464#9
+# asm 2: psrld $8,<xmm8=%xmm8
+psrld $8,%xmm8
+
+# qhasm:   uint32323232 xmm9 >>= 8
+# asm 1: psrld $8,<xmm9=int6464#10
+# asm 2: psrld $8,<xmm9=%xmm9
+psrld $8,%xmm9
+
+# qhasm:   uint32323232 xmm10 >>= 8
+# asm 1: psrld $8,<xmm10=int6464#11
+# asm 2: psrld $8,<xmm10=%xmm10
+psrld $8,%xmm10
+
+# qhasm:   uint32323232 xmm11 >>= 8
+# asm 1: psrld $8,<xmm11=int6464#12
+# asm 2: psrld $8,<xmm11=%xmm11
+psrld $8,%xmm11
+
+# qhasm:   uint32323232 xmm12 >>= 8
+# asm 1: psrld $8,<xmm12=int6464#13
+# asm 2: psrld $8,<xmm12=%xmm12
+psrld $8,%xmm12
+
+# qhasm:   uint32323232 xmm13 >>= 8
+# asm 1: psrld $8,<xmm13=int6464#14
+# asm 2: psrld $8,<xmm13=%xmm13
+psrld $8,%xmm13
+
+# qhasm:   uint32323232 xmm14 >>= 8
+# asm 1: psrld $8,<xmm14=int6464#15
+# asm 2: psrld $8,<xmm14=%xmm14
+psrld $8,%xmm14
+
+# qhasm:   uint32323232 xmm15 >>= 8
+# asm 1: psrld $8,<xmm15=int6464#16
+# asm 2: psrld $8,<xmm15=%xmm15
+psrld $8,%xmm15
+
+# qhasm:   xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:   xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:   xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:   xmm2 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm2=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm2=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:   xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm12,<xmm6=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm:   xmm5 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm5=int6464#6
+# asm 2: pxor  <xmm13=%xmm13,<xmm5=%xmm5
+pxor  %xmm13,%xmm5
+
+# qhasm:   xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:   xmm7 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm7=int6464#8
+# asm 2: pxor  <xmm15=%xmm15,<xmm7=%xmm7
+pxor  %xmm15,%xmm7
+
+# qhasm:   shuffle bytes of xmm0 by M0
+# asm 1: pshufb M0,<xmm0=int6464#1
+# asm 2: pshufb M0,<xmm0=%xmm0
+pshufb M0,%xmm0
+
+# qhasm:   shuffle bytes of xmm1 by M0
+# asm 1: pshufb M0,<xmm1=int6464#2
+# asm 2: pshufb M0,<xmm1=%xmm1
+pshufb M0,%xmm1
+
+# qhasm:   shuffle bytes of xmm4 by M0
+# asm 1: pshufb M0,<xmm4=int6464#5
+# asm 2: pshufb M0,<xmm4=%xmm4
+pshufb M0,%xmm4
+
+# qhasm:   shuffle bytes of xmm6 by M0
+# asm 1: pshufb M0,<xmm6=int6464#7
+# asm 2: pshufb M0,<xmm6=%xmm6
+pshufb M0,%xmm6
+
+# qhasm:   shuffle bytes of xmm3 by M0
+# asm 1: pshufb M0,<xmm3=int6464#4
+# asm 2: pshufb M0,<xmm3=%xmm3
+pshufb M0,%xmm3
+
+# qhasm:   shuffle bytes of xmm7 by M0
+# asm 1: pshufb M0,<xmm7=int6464#8
+# asm 2: pshufb M0,<xmm7=%xmm7
+pshufb M0,%xmm7
+
+# qhasm:   shuffle bytes of xmm2 by M0
+# asm 1: pshufb M0,<xmm2=int6464#3
+# asm 2: pshufb M0,<xmm2=%xmm2
+pshufb M0,%xmm2
+
+# qhasm:   shuffle bytes of xmm5 by M0
+# asm 1: pshufb M0,<xmm5=int6464#6
+# asm 2: pshufb M0,<xmm5=%xmm5
+pshufb M0,%xmm5
+
+# qhasm:   *(int128 *)(c + 1280) = xmm0
+# asm 1: movdqa <xmm0=int6464#1,1280(<c=int64#1)
+# asm 2: movdqa <xmm0=%xmm0,1280(<c=%rdi)
+movdqa %xmm0,1280(%rdi)
+
+# qhasm:   *(int128 *)(c + 1296) = xmm1
+# asm 1: movdqa <xmm1=int6464#2,1296(<c=int64#1)
+# asm 2: movdqa <xmm1=%xmm1,1296(<c=%rdi)
+movdqa %xmm1,1296(%rdi)
+
+# qhasm:   *(int128 *)(c + 1312) = xmm3
+# asm 1: movdqa <xmm3=int6464#4,1312(<c=int64#1)
+# asm 2: movdqa <xmm3=%xmm3,1312(<c=%rdi)
+movdqa %xmm3,1312(%rdi)
+
+# qhasm:   *(int128 *)(c + 1328) = xmm2
+# asm 1: movdqa <xmm2=int6464#3,1328(<c=int64#1)
+# asm 2: movdqa <xmm2=%xmm2,1328(<c=%rdi)
+movdqa %xmm2,1328(%rdi)
+
+# qhasm:   *(int128 *)(c + 1344) = xmm6
+# asm 1: movdqa <xmm6=int6464#7,1344(<c=int64#1)
+# asm 2: movdqa <xmm6=%xmm6,1344(<c=%rdi)
+movdqa %xmm6,1344(%rdi)
+
+# qhasm:   *(int128 *)(c + 1360) = xmm5
+# asm 1: movdqa <xmm5=int6464#6,1360(<c=int64#1)
+# asm 2: movdqa <xmm5=%xmm5,1360(<c=%rdi)
+movdqa %xmm5,1360(%rdi)
+
+# qhasm:   *(int128 *)(c + 1376) = xmm4
+# asm 1: movdqa <xmm4=int6464#5,1376(<c=int64#1)
+# asm 2: movdqa <xmm4=%xmm4,1376(<c=%rdi)
+movdqa %xmm4,1376(%rdi)
+
+# qhasm:   *(int128 *)(c + 1392) = xmm7
+# asm 1: movdqa <xmm7=int6464#8,1392(<c=int64#1)
+# asm 2: movdqa <xmm7=%xmm7,1392(<c=%rdi)
+movdqa %xmm7,1392(%rdi)
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+xor %rax,%rax
+ret
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/stream.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/stream.c
new file mode 100644
index 00000000..53524a62
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/stream.c
@@ -0,0 +1,14 @@
+#include "crypto_stream.h"
+
+int crypto_stream(
+        unsigned char *out,
+        unsigned long long outlen,
+        const unsigned char *n,
+        const unsigned char *k
+        )
+{
+    unsigned char d[crypto_stream_BEFORENMBYTES];
+    crypto_stream_beforenm(d, k);
+    crypto_stream_afternm(out, outlen, n, d);
+    return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor.c
new file mode 100644
index 00000000..825088cc
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor.c
@@ -0,0 +1,15 @@
+#include "crypto_stream.h"
+
+int crypto_stream_xor(
+        unsigned char *out,
+        const unsigned char *in,
+        unsigned long long inlen,
+        const unsigned char *n,
+        const unsigned char *k
+        )
+{
+    unsigned char d[crypto_stream_BEFORENMBYTES];
+    crypto_stream_beforenm(d, k);
+    crypto_stream_xor_afternm(out, in, inlen, n, d);
+    return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor_afternm.s b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor_afternm.s
new file mode 100644
index 00000000..022691a2
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/core2/xor_afternm.s
@@ -0,0 +1,12407 @@
+# Author: Emilia Käsper and Peter Schwabe
+# Date: 2009-03-19
+# +2010.01.31: minor namespace modifications
+# Public domain
+
+.data
+.p2align 6
+
+RCON: .int 0x00000000, 0x00000000, 0x00000000, 0xffffffff
+ROTB: .int 0x0c000000, 0x00000000, 0x04000000, 0x08000000
+EXPB0: .int 0x03030303, 0x07070707, 0x0b0b0b0b, 0x0f0f0f0f
+CTRINC1: .int 0x00000001, 0x00000000, 0x00000000, 0x00000000
+CTRINC2: .int 0x00000002, 0x00000000, 0x00000000, 0x00000000
+CTRINC3: .int 0x00000003, 0x00000000, 0x00000000, 0x00000000
+CTRINC4: .int 0x00000004, 0x00000000, 0x00000000, 0x00000000
+CTRINC5: .int 0x00000005, 0x00000000, 0x00000000, 0x00000000
+CTRINC6: .int 0x00000006, 0x00000000, 0x00000000, 0x00000000
+CTRINC7: .int 0x00000007, 0x00000000, 0x00000000, 0x00000000
+RCTRINC1: .int 0x00000000, 0x00000000, 0x00000000, 0x00000001
+RCTRINC2: .int 0x00000000, 0x00000000, 0x00000000, 0x00000002
+RCTRINC3: .int 0x00000000, 0x00000000, 0x00000000, 0x00000003
+RCTRINC4: .int 0x00000000, 0x00000000, 0x00000000, 0x00000004
+RCTRINC5: .int 0x00000000, 0x00000000, 0x00000000, 0x00000005
+RCTRINC6: .int 0x00000000, 0x00000000, 0x00000000, 0x00000006
+RCTRINC7: .int 0x00000000, 0x00000000, 0x00000000, 0x00000007
+
+SWAP32: .int 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+M0SWAP: .quad 0x0105090d0004080c , 0x03070b0f02060a0e
+
+BS0: .quad 0x5555555555555555, 0x5555555555555555
+BS1: .quad 0x3333333333333333, 0x3333333333333333
+BS2: .quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+ONE: .quad 0xffffffffffffffff, 0xffffffffffffffff
+M0:  .quad 0x02060a0e03070b0f, 0x0004080c0105090d
+SRM0:	.quad 0x0304090e00050a0f, 0x01060b0c0207080d
+SR: .quad 0x0504070600030201, 0x0f0e0d0c0a09080b
+
+# qhasm: int64 outp
+
+# qhasm: int64 inp
+
+# qhasm: int64 len
+
+# qhasm: int64 np
+
+# qhasm: int64 c
+
+# qhasm: input outp
+
+# qhasm: input inp
+
+# qhasm: input len
+
+# qhasm: input np
+
+# qhasm: input c
+
+# qhasm: int64 lensav
+
+# qhasm: int64 tmp
+
+# qhasm: int6464 xmm0
+
+# qhasm: int6464 xmm1
+
+# qhasm: int6464 xmm2
+
+# qhasm: int6464 xmm3
+
+# qhasm: int6464 xmm4
+
+# qhasm: int6464 xmm5
+
+# qhasm: int6464 xmm6
+
+# qhasm: int6464 xmm7
+
+# qhasm: int6464 xmm8
+
+# qhasm: int6464 xmm9
+
+# qhasm: int6464 xmm10
+
+# qhasm: int6464 xmm11
+
+# qhasm: int6464 xmm12
+
+# qhasm: int6464 xmm13
+
+# qhasm: int6464 xmm14
+
+# qhasm: int6464 xmm15
+
+# qhasm: int6464 t
+
+# qhasm: stack1024 bl
+
+# qhasm: stack128 nonce_stack
+
+# qhasm: int64 blp
+
+# qhasm: int64 b
+
+# qhasm: enter crypto_stream_aes128ctr_core2_xor_afternm
+.text
+.p2align 5
+.globl _crypto_stream_aes128ctr_core2_xor_afternm
+.globl crypto_stream_aes128ctr_core2_xor_afternm
+_crypto_stream_aes128ctr_core2_xor_afternm:
+crypto_stream_aes128ctr_core2_xor_afternm:
+mov %rsp,%r11
+and $31,%r11
+add $160,%r11
+sub %r11,%rsp
+
+# qhasm: xmm0 = *(int128 *) (np + 0)
+# asm 1: movdqa 0(<np=int64#4),>xmm0=int6464#1
+# asm 2: movdqa 0(<np=%rcx),>xmm0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: nonce_stack = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>nonce_stack=stack128#1
+# asm 2: movdqa <xmm0=%xmm0,>nonce_stack=0(%rsp)
+movdqa %xmm0,0(%rsp)
+
+# qhasm: np = &nonce_stack
+# asm 1: leaq <nonce_stack=stack128#1,>np=int64#4
+# asm 2: leaq <nonce_stack=0(%rsp),>np=%rcx
+leaq 0(%rsp),%rcx
+
+# qhasm: enc_block:
+._enc_block:
+
+# qhasm: xmm0 = *(int128 *) (np + 0)
+# asm 1: movdqa 0(<np=int64#4),>xmm0=int6464#1
+# asm 2: movdqa 0(<np=%rcx),>xmm0=%xmm0
+movdqa 0(%rcx),%xmm0
+
+# qhasm: xmm1 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm1=int6464#2
+# asm 2: movdqa <xmm0=%xmm0,>xmm1=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: shuffle bytes of xmm1 by SWAP32
+# asm 1: pshufb SWAP32,<xmm1=int6464#2
+# asm 2: pshufb SWAP32,<xmm1=%xmm1
+pshufb SWAP32,%xmm1
+
+# qhasm: xmm2 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm2=int6464#3
+# asm 2: movdqa <xmm1=%xmm1,>xmm2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: xmm3 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm3=int6464#4
+# asm 2: movdqa <xmm1=%xmm1,>xmm3=%xmm3
+movdqa %xmm1,%xmm3
+
+# qhasm: xmm4 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm4=int6464#5
+# asm 2: movdqa <xmm1=%xmm1,>xmm4=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm5=int6464#6
+# asm 2: movdqa <xmm1=%xmm1,>xmm5=%xmm5
+movdqa %xmm1,%xmm5
+
+# qhasm: xmm6 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm6=int6464#7
+# asm 2: movdqa <xmm1=%xmm1,>xmm6=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: xmm7 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm7=int6464#8
+# asm 2: movdqa <xmm1=%xmm1,>xmm7=%xmm7
+movdqa %xmm1,%xmm7
+
+# qhasm: int32323232 xmm1 += RCTRINC1
+# asm 1: paddd  RCTRINC1,<xmm1=int6464#2
+# asm 2: paddd  RCTRINC1,<xmm1=%xmm1
+paddd  RCTRINC1,%xmm1
+
+# qhasm: int32323232 xmm2 += RCTRINC2
+# asm 1: paddd  RCTRINC2,<xmm2=int6464#3
+# asm 2: paddd  RCTRINC2,<xmm2=%xmm2
+paddd  RCTRINC2,%xmm2
+
+# qhasm: int32323232 xmm3 += RCTRINC3
+# asm 1: paddd  RCTRINC3,<xmm3=int6464#4
+# asm 2: paddd  RCTRINC3,<xmm3=%xmm3
+paddd  RCTRINC3,%xmm3
+
+# qhasm: int32323232 xmm4 += RCTRINC4
+# asm 1: paddd  RCTRINC4,<xmm4=int6464#5
+# asm 2: paddd  RCTRINC4,<xmm4=%xmm4
+paddd  RCTRINC4,%xmm4
+
+# qhasm: int32323232 xmm5 += RCTRINC5
+# asm 1: paddd  RCTRINC5,<xmm5=int6464#6
+# asm 2: paddd  RCTRINC5,<xmm5=%xmm5
+paddd  RCTRINC5,%xmm5
+
+# qhasm: int32323232 xmm6 += RCTRINC6
+# asm 1: paddd  RCTRINC6,<xmm6=int6464#7
+# asm 2: paddd  RCTRINC6,<xmm6=%xmm6
+paddd  RCTRINC6,%xmm6
+
+# qhasm: int32323232 xmm7 += RCTRINC7
+# asm 1: paddd  RCTRINC7,<xmm7=int6464#8
+# asm 2: paddd  RCTRINC7,<xmm7=%xmm7
+paddd  RCTRINC7,%xmm7
+
+# qhasm: shuffle bytes of xmm0 by M0
+# asm 1: pshufb M0,<xmm0=int6464#1
+# asm 2: pshufb M0,<xmm0=%xmm0
+pshufb M0,%xmm0
+
+# qhasm: shuffle bytes of xmm1 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm1=int6464#2
+# asm 2: pshufb M0SWAP,<xmm1=%xmm1
+pshufb M0SWAP,%xmm1
+
+# qhasm: shuffle bytes of xmm2 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm2=int6464#3
+# asm 2: pshufb M0SWAP,<xmm2=%xmm2
+pshufb M0SWAP,%xmm2
+
+# qhasm: shuffle bytes of xmm3 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm3=int6464#4
+# asm 2: pshufb M0SWAP,<xmm3=%xmm3
+pshufb M0SWAP,%xmm3
+
+# qhasm: shuffle bytes of xmm4 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm4=int6464#5
+# asm 2: pshufb M0SWAP,<xmm4=%xmm4
+pshufb M0SWAP,%xmm4
+
+# qhasm: shuffle bytes of xmm5 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm5=int6464#6
+# asm 2: pshufb M0SWAP,<xmm5=%xmm5
+pshufb M0SWAP,%xmm5
+
+# qhasm: shuffle bytes of xmm6 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm6=int6464#7
+# asm 2: pshufb M0SWAP,<xmm6=%xmm6
+pshufb M0SWAP,%xmm6
+
+# qhasm: shuffle bytes of xmm7 by M0SWAP
+# asm 1: pshufb M0SWAP,<xmm7=int6464#8
+# asm 2: pshufb M0SWAP,<xmm7=%xmm7
+pshufb M0SWAP,%xmm7
+
+# qhasm:     xmm8 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm8=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm8=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     xmm8 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm8=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm8=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     xmm8 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm8=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm8=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 1
+# asm 1: psrlq $1,<xmm8=int6464#9
+# asm 2: psrlq $1,<xmm8=%xmm8
+psrlq $1,%xmm8
+
+# qhasm:     xmm8 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm8=int6464#9
+# asm 2: pxor  <xmm1=%xmm1,<xmm8=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm:     xmm8 &= BS0
+# asm 1: pand  BS0,<xmm8=int6464#9
+# asm 2: pand  BS0,<xmm8=%xmm8
+pand  BS0,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     uint6464 xmm8 <<= 1
+# asm 1: psllq $1,<xmm8=int6464#9
+# asm 2: psllq $1,<xmm8=%xmm8
+psllq $1,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#9
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm8
+movdqa %xmm5,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     xmm8 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm8=int6464#9
+# asm 2: movdqa <xmm4=%xmm4,>xmm8=%xmm8
+movdqa %xmm4,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#9
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm8
+pxor  %xmm3,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 2
+# asm 1: psrlq $2,<xmm8=int6464#9
+# asm 2: psrlq $2,<xmm8=%xmm8
+psrlq $2,%xmm8
+
+# qhasm:     xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#9
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm:     xmm8 &= BS1
+# asm 1: pand  BS1,<xmm8=int6464#9
+# asm 2: pand  BS1,<xmm8=%xmm8
+pand  BS1,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     uint6464 xmm8 <<= 2
+# asm 1: psllq $2,<xmm8=int6464#9
+# asm 2: psllq $2,<xmm8=%xmm8
+psllq $2,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm8 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm8=int6464#9
+# asm 2: movdqa <xmm3=%xmm3,>xmm8=%xmm8
+movdqa %xmm3,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm8=int6464#9
+# asm 2: pxor  <xmm7=%xmm7,<xmm8=%xmm8
+pxor  %xmm7,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm7 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm8=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm8,<xmm3=%xmm3
+pxor  %xmm8,%xmm3
+
+# qhasm:     xmm8 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm8=int6464#9
+# asm 2: movdqa <xmm2=%xmm2,>xmm8=%xmm8
+movdqa %xmm2,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm8=int6464#9
+# asm 2: pxor  <xmm6=%xmm6,<xmm8=%xmm8
+pxor  %xmm6,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm6 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm8=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm2=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:     xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#9
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm8
+movdqa %xmm1,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm8,<xmm5=%xmm5
+pxor  %xmm8,%xmm5
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm8 = xmm0
+# asm 1: movdqa <xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: movdqa <xmm0=%xmm0,>xmm8=%xmm8
+movdqa %xmm0,%xmm8
+
+# qhasm:     uint6464 xmm8 >>= 4
+# asm 1: psrlq $4,<xmm8=int6464#9
+# asm 2: psrlq $4,<xmm8=%xmm8
+psrlq $4,%xmm8
+
+# qhasm:     xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm8=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:     xmm8 &= BS2
+# asm 1: pand  BS2,<xmm8=int6464#9
+# asm 2: pand  BS2,<xmm8=%xmm8
+pand  BS2,%xmm8
+
+# qhasm:     xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:     uint6464 xmm8 <<= 4
+# asm 1: psllq $4,<xmm8=int6464#9
+# asm 2: psllq $4,<xmm8=%xmm8
+psllq $4,%xmm8
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 0)
+# asm 1: pxor 0(<c=int64#5),<xmm0=int6464#1
+# asm 2: pxor 0(<c=%r8),<xmm0=%xmm0
+pxor 0(%r8),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 16)
+# asm 1: pxor 16(<c=int64#5),<xmm1=int6464#2
+# asm 2: pxor 16(<c=%r8),<xmm1=%xmm1
+pxor 16(%r8),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 32)
+# asm 1: pxor 32(<c=int64#5),<xmm2=int6464#3
+# asm 2: pxor 32(<c=%r8),<xmm2=%xmm2
+pxor 32(%r8),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 48)
+# asm 1: pxor 48(<c=int64#5),<xmm3=int6464#4
+# asm 2: pxor 48(<c=%r8),<xmm3=%xmm3
+pxor 48(%r8),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 64)
+# asm 1: pxor 64(<c=int64#5),<xmm4=int6464#5
+# asm 2: pxor 64(<c=%r8),<xmm4=%xmm4
+pxor 64(%r8),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 80)
+# asm 1: pxor 80(<c=int64#5),<xmm5=int6464#6
+# asm 2: pxor 80(<c=%r8),<xmm5=%xmm5
+pxor 80(%r8),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 96)
+# asm 1: pxor 96(<c=int64#5),<xmm6=int6464#7
+# asm 2: pxor 96(<c=%r8),<xmm6=%xmm6
+pxor 96(%r8),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 112)
+# asm 1: pxor 112(<c=int64#5),<xmm7=int6464#8
+# asm 2: pxor 112(<c=%r8),<xmm7=%xmm7
+pxor 112(%r8),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 128)
+# asm 1: pxor 128(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 128(<c=%r8),<xmm8=%xmm8
+pxor 128(%r8),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 144)
+# asm 1: pxor 144(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 144(<c=%r8),<xmm9=%xmm9
+pxor 144(%r8),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 160)
+# asm 1: pxor 160(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 160(<c=%r8),<xmm10=%xmm10
+pxor 160(%r8),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 176)
+# asm 1: pxor 176(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 176(<c=%r8),<xmm11=%xmm11
+pxor 176(%r8),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 192)
+# asm 1: pxor 192(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 192(<c=%r8),<xmm12=%xmm12
+pxor 192(%r8),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 208)
+# asm 1: pxor 208(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 208(<c=%r8),<xmm13=%xmm13
+pxor 208(%r8),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 224)
+# asm 1: pxor 224(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 224(<c=%r8),<xmm14=%xmm14
+pxor 224(%r8),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 240)
+# asm 1: pxor 240(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 240(<c=%r8),<xmm15=%xmm15
+pxor 240(%r8),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 256)
+# asm 1: pxor 256(<c=int64#5),<xmm0=int6464#1
+# asm 2: pxor 256(<c=%r8),<xmm0=%xmm0
+pxor 256(%r8),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 272)
+# asm 1: pxor 272(<c=int64#5),<xmm1=int6464#2
+# asm 2: pxor 272(<c=%r8),<xmm1=%xmm1
+pxor 272(%r8),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 288)
+# asm 1: pxor 288(<c=int64#5),<xmm2=int6464#3
+# asm 2: pxor 288(<c=%r8),<xmm2=%xmm2
+pxor 288(%r8),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 304)
+# asm 1: pxor 304(<c=int64#5),<xmm3=int6464#4
+# asm 2: pxor 304(<c=%r8),<xmm3=%xmm3
+pxor 304(%r8),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 320)
+# asm 1: pxor 320(<c=int64#5),<xmm4=int6464#5
+# asm 2: pxor 320(<c=%r8),<xmm4=%xmm4
+pxor 320(%r8),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 336)
+# asm 1: pxor 336(<c=int64#5),<xmm5=int6464#6
+# asm 2: pxor 336(<c=%r8),<xmm5=%xmm5
+pxor 336(%r8),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 352)
+# asm 1: pxor 352(<c=int64#5),<xmm6=int6464#7
+# asm 2: pxor 352(<c=%r8),<xmm6=%xmm6
+pxor 352(%r8),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 368)
+# asm 1: pxor 368(<c=int64#5),<xmm7=int6464#8
+# asm 2: pxor 368(<c=%r8),<xmm7=%xmm7
+pxor 368(%r8),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 384)
+# asm 1: pxor 384(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 384(<c=%r8),<xmm8=%xmm8
+pxor 384(%r8),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 400)
+# asm 1: pxor 400(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 400(<c=%r8),<xmm9=%xmm9
+pxor 400(%r8),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 416)
+# asm 1: pxor 416(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 416(<c=%r8),<xmm10=%xmm10
+pxor 416(%r8),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 432)
+# asm 1: pxor 432(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 432(<c=%r8),<xmm11=%xmm11
+pxor 432(%r8),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 448)
+# asm 1: pxor 448(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 448(<c=%r8),<xmm12=%xmm12
+pxor 448(%r8),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 464)
+# asm 1: pxor 464(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 464(<c=%r8),<xmm13=%xmm13
+pxor 464(%r8),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 480)
+# asm 1: pxor 480(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 480(<c=%r8),<xmm14=%xmm14
+pxor 480(%r8),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 496)
+# asm 1: pxor 496(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 496(<c=%r8),<xmm15=%xmm15
+pxor 496(%r8),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 512)
+# asm 1: pxor 512(<c=int64#5),<xmm0=int6464#1
+# asm 2: pxor 512(<c=%r8),<xmm0=%xmm0
+pxor 512(%r8),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 528)
+# asm 1: pxor 528(<c=int64#5),<xmm1=int6464#2
+# asm 2: pxor 528(<c=%r8),<xmm1=%xmm1
+pxor 528(%r8),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 544)
+# asm 1: pxor 544(<c=int64#5),<xmm2=int6464#3
+# asm 2: pxor 544(<c=%r8),<xmm2=%xmm2
+pxor 544(%r8),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 560)
+# asm 1: pxor 560(<c=int64#5),<xmm3=int6464#4
+# asm 2: pxor 560(<c=%r8),<xmm3=%xmm3
+pxor 560(%r8),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 576)
+# asm 1: pxor 576(<c=int64#5),<xmm4=int6464#5
+# asm 2: pxor 576(<c=%r8),<xmm4=%xmm4
+pxor 576(%r8),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 592)
+# asm 1: pxor 592(<c=int64#5),<xmm5=int6464#6
+# asm 2: pxor 592(<c=%r8),<xmm5=%xmm5
+pxor 592(%r8),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 608)
+# asm 1: pxor 608(<c=int64#5),<xmm6=int6464#7
+# asm 2: pxor 608(<c=%r8),<xmm6=%xmm6
+pxor 608(%r8),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 624)
+# asm 1: pxor 624(<c=int64#5),<xmm7=int6464#8
+# asm 2: pxor 624(<c=%r8),<xmm7=%xmm7
+pxor 624(%r8),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 640)
+# asm 1: pxor 640(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 640(<c=%r8),<xmm8=%xmm8
+pxor 640(%r8),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 656)
+# asm 1: pxor 656(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 656(<c=%r8),<xmm9=%xmm9
+pxor 656(%r8),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 672)
+# asm 1: pxor 672(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 672(<c=%r8),<xmm10=%xmm10
+pxor 672(%r8),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 688)
+# asm 1: pxor 688(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 688(<c=%r8),<xmm11=%xmm11
+pxor 688(%r8),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 704)
+# asm 1: pxor 704(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 704(<c=%r8),<xmm12=%xmm12
+pxor 704(%r8),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 720)
+# asm 1: pxor 720(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 720(<c=%r8),<xmm13=%xmm13
+pxor 720(%r8),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 736)
+# asm 1: pxor 736(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 736(<c=%r8),<xmm14=%xmm14
+pxor 736(%r8),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 752)
+# asm 1: pxor 752(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 752(<c=%r8),<xmm15=%xmm15
+pxor 752(%r8),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 768)
+# asm 1: pxor 768(<c=int64#5),<xmm0=int6464#1
+# asm 2: pxor 768(<c=%r8),<xmm0=%xmm0
+pxor 768(%r8),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 784)
+# asm 1: pxor 784(<c=int64#5),<xmm1=int6464#2
+# asm 2: pxor 784(<c=%r8),<xmm1=%xmm1
+pxor 784(%r8),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 800)
+# asm 1: pxor 800(<c=int64#5),<xmm2=int6464#3
+# asm 2: pxor 800(<c=%r8),<xmm2=%xmm2
+pxor 800(%r8),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 816)
+# asm 1: pxor 816(<c=int64#5),<xmm3=int6464#4
+# asm 2: pxor 816(<c=%r8),<xmm3=%xmm3
+pxor 816(%r8),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 832)
+# asm 1: pxor 832(<c=int64#5),<xmm4=int6464#5
+# asm 2: pxor 832(<c=%r8),<xmm4=%xmm4
+pxor 832(%r8),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 848)
+# asm 1: pxor 848(<c=int64#5),<xmm5=int6464#6
+# asm 2: pxor 848(<c=%r8),<xmm5=%xmm5
+pxor 848(%r8),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 864)
+# asm 1: pxor 864(<c=int64#5),<xmm6=int6464#7
+# asm 2: pxor 864(<c=%r8),<xmm6=%xmm6
+pxor 864(%r8),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 880)
+# asm 1: pxor 880(<c=int64#5),<xmm7=int6464#8
+# asm 2: pxor 880(<c=%r8),<xmm7=%xmm7
+pxor 880(%r8),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 896)
+# asm 1: pxor 896(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 896(<c=%r8),<xmm8=%xmm8
+pxor 896(%r8),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SR
+# asm 1: pshufb SR,<xmm8=int6464#9
+# asm 2: pshufb SR,<xmm8=%xmm8
+pshufb SR,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 912)
+# asm 1: pxor 912(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 912(<c=%r8),<xmm9=%xmm9
+pxor 912(%r8),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SR
+# asm 1: pshufb SR,<xmm9=int6464#10
+# asm 2: pshufb SR,<xmm9=%xmm9
+pshufb SR,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 928)
+# asm 1: pxor 928(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 928(<c=%r8),<xmm10=%xmm10
+pxor 928(%r8),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SR
+# asm 1: pshufb SR,<xmm10=int6464#11
+# asm 2: pshufb SR,<xmm10=%xmm10
+pshufb SR,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 944)
+# asm 1: pxor 944(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 944(<c=%r8),<xmm11=%xmm11
+pxor 944(%r8),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SR
+# asm 1: pshufb SR,<xmm11=int6464#12
+# asm 2: pshufb SR,<xmm11=%xmm11
+pshufb SR,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 960)
+# asm 1: pxor 960(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 960(<c=%r8),<xmm12=%xmm12
+pxor 960(%r8),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SR
+# asm 1: pshufb SR,<xmm12=int6464#13
+# asm 2: pshufb SR,<xmm12=%xmm12
+pshufb SR,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 976)
+# asm 1: pxor 976(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 976(<c=%r8),<xmm13=%xmm13
+pxor 976(%r8),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SR
+# asm 1: pshufb SR,<xmm13=int6464#14
+# asm 2: pshufb SR,<xmm13=%xmm13
+pshufb SR,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 992)
+# asm 1: pxor 992(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 992(<c=%r8),<xmm14=%xmm14
+pxor 992(%r8),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SR
+# asm 1: pshufb SR,<xmm14=int6464#15
+# asm 2: pshufb SR,<xmm14=%xmm14
+pshufb SR,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 1008)
+# asm 1: pxor 1008(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 1008(<c=%r8),<xmm15=%xmm15
+pxor 1008(%r8),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SR
+# asm 1: pshufb SR,<xmm15=int6464#16
+# asm 2: pshufb SR,<xmm15=%xmm15
+pshufb SR,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:     xmm0 = shuffle dwords of xmm8 by 0x93
+# asm 1: pshufd $0x93,<xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: pshufd $0x93,<xmm8=%xmm8,>xmm0=%xmm0
+pshufd $0x93,%xmm8,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm9 by 0x93
+# asm 1: pshufd $0x93,<xmm9=int6464#10,>xmm1=int6464#2
+# asm 2: pshufd $0x93,<xmm9=%xmm9,>xmm1=%xmm1
+pshufd $0x93,%xmm9,%xmm1
+
+# qhasm:     xmm2 = shuffle dwords of xmm12 by 0x93
+# asm 1: pshufd $0x93,<xmm12=int6464#13,>xmm2=int6464#3
+# asm 2: pshufd $0x93,<xmm12=%xmm12,>xmm2=%xmm2
+pshufd $0x93,%xmm12,%xmm2
+
+# qhasm:     xmm3 = shuffle dwords of xmm14 by 0x93
+# asm 1: pshufd $0x93,<xmm14=int6464#15,>xmm3=int6464#4
+# asm 2: pshufd $0x93,<xmm14=%xmm14,>xmm3=%xmm3
+pshufd $0x93,%xmm14,%xmm3
+
+# qhasm:     xmm4 = shuffle dwords of xmm11 by 0x93
+# asm 1: pshufd $0x93,<xmm11=int6464#12,>xmm4=int6464#5
+# asm 2: pshufd $0x93,<xmm11=%xmm11,>xmm4=%xmm4
+pshufd $0x93,%xmm11,%xmm4
+
+# qhasm:     xmm5 = shuffle dwords of xmm15 by 0x93
+# asm 1: pshufd $0x93,<xmm15=int6464#16,>xmm5=int6464#6
+# asm 2: pshufd $0x93,<xmm15=%xmm15,>xmm5=%xmm5
+pshufd $0x93,%xmm15,%xmm5
+
+# qhasm:     xmm6 = shuffle dwords of xmm10 by 0x93
+# asm 1: pshufd $0x93,<xmm10=int6464#11,>xmm6=int6464#7
+# asm 2: pshufd $0x93,<xmm10=%xmm10,>xmm6=%xmm6
+pshufd $0x93,%xmm10,%xmm6
+
+# qhasm:     xmm7 = shuffle dwords of xmm13 by 0x93
+# asm 1: pshufd $0x93,<xmm13=int6464#14,>xmm7=int6464#8
+# asm 2: pshufd $0x93,<xmm13=%xmm13,>xmm7=%xmm7
+pshufd $0x93,%xmm13,%xmm7
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm12 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm12=int6464#13
+# asm 2: pxor  <xmm2=%xmm2,<xmm12=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm:     xmm14 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm14=int6464#15
+# asm 2: pxor  <xmm3=%xmm3,<xmm14=%xmm14
+pxor  %xmm3,%xmm14
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm10 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pxor  <xmm6=%xmm6,<xmm10=%xmm10
+pxor  %xmm6,%xmm10
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm8,<xmm1=%xmm1
+pxor  %xmm8,%xmm1
+
+# qhasm:     xmm2 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:     xmm1 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm1=int6464#2
+# asm 2: pxor  <xmm13=%xmm13,<xmm1=%xmm1
+pxor  %xmm13,%xmm1
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#5
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm4
+pxor  %xmm14,%xmm4
+
+# qhasm:     xmm5 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm5=int6464#6
+# asm 2: pxor  <xmm11=%xmm11,<xmm5=%xmm5
+pxor  %xmm11,%xmm5
+
+# qhasm:     xmm3 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm3=int6464#4
+# asm 2: pxor  <xmm13=%xmm13,<xmm3=%xmm3
+pxor  %xmm13,%xmm3
+
+# qhasm:     xmm6 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm6=int6464#7
+# asm 2: pxor  <xmm15=%xmm15,<xmm6=%xmm6
+pxor  %xmm15,%xmm6
+
+# qhasm:     xmm7 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm7=int6464#8
+# asm 2: pxor  <xmm10=%xmm10,<xmm7=%xmm7
+pxor  %xmm10,%xmm7
+
+# qhasm:     xmm4 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm4=int6464#5
+# asm 2: pxor  <xmm13=%xmm13,<xmm4=%xmm4
+pxor  %xmm13,%xmm4
+
+# qhasm:     xmm8 = shuffle dwords of xmm8 by 0x4E
+# asm 1: pshufd $0x4E,<xmm8=int6464#9,>xmm8=int6464#9
+# asm 2: pshufd $0x4E,<xmm8=%xmm8,>xmm8=%xmm8
+pshufd $0x4E,%xmm8,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm9 by 0x4E
+# asm 1: pshufd $0x4E,<xmm9=int6464#10,>xmm9=int6464#10
+# asm 2: pshufd $0x4E,<xmm9=%xmm9,>xmm9=%xmm9
+pshufd $0x4E,%xmm9,%xmm9
+
+# qhasm:     xmm12 = shuffle dwords of xmm12 by 0x4E
+# asm 1: pshufd $0x4E,<xmm12=int6464#13,>xmm12=int6464#13
+# asm 2: pshufd $0x4E,<xmm12=%xmm12,>xmm12=%xmm12
+pshufd $0x4E,%xmm12,%xmm12
+
+# qhasm:     xmm14 = shuffle dwords of xmm14 by 0x4E
+# asm 1: pshufd $0x4E,<xmm14=int6464#15,>xmm14=int6464#15
+# asm 2: pshufd $0x4E,<xmm14=%xmm14,>xmm14=%xmm14
+pshufd $0x4E,%xmm14,%xmm14
+
+# qhasm:     xmm11 = shuffle dwords of xmm11 by 0x4E
+# asm 1: pshufd $0x4E,<xmm11=int6464#12,>xmm11=int6464#12
+# asm 2: pshufd $0x4E,<xmm11=%xmm11,>xmm11=%xmm11
+pshufd $0x4E,%xmm11,%xmm11
+
+# qhasm:     xmm15 = shuffle dwords of xmm15 by 0x4E
+# asm 1: pshufd $0x4E,<xmm15=int6464#16,>xmm15=int6464#16
+# asm 2: pshufd $0x4E,<xmm15=%xmm15,>xmm15=%xmm15
+pshufd $0x4E,%xmm15,%xmm15
+
+# qhasm:     xmm10 = shuffle dwords of xmm10 by 0x4E
+# asm 1: pshufd $0x4E,<xmm10=int6464#11,>xmm10=int6464#11
+# asm 2: pshufd $0x4E,<xmm10=%xmm10,>xmm10=%xmm10
+pshufd $0x4E,%xmm10,%xmm10
+
+# qhasm:     xmm13 = shuffle dwords of xmm13 by 0x4E
+# asm 1: pshufd $0x4E,<xmm13=int6464#14,>xmm13=int6464#14
+# asm 2: pshufd $0x4E,<xmm13=%xmm13,>xmm13=%xmm13
+pshufd $0x4E,%xmm13,%xmm13
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm2 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pxor  <xmm12=%xmm12,<xmm2=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm:     xmm3 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm3=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm3=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:     xmm4 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm4=int6464#5
+# asm 2: pxor  <xmm11=%xmm11,<xmm4=%xmm4
+pxor  %xmm11,%xmm4
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm6 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm6=int6464#7
+# asm 2: pxor  <xmm10=%xmm10,<xmm6=%xmm6
+pxor  %xmm10,%xmm6
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm0 ^= *(int128 *)(c + 1024)
+# asm 1: pxor 1024(<c=int64#5),<xmm0=int6464#1
+# asm 2: pxor 1024(<c=%r8),<xmm0=%xmm0
+pxor 1024(%r8),%xmm0
+
+# qhasm:     shuffle bytes of xmm0 by SR
+# asm 1: pshufb SR,<xmm0=int6464#1
+# asm 2: pshufb SR,<xmm0=%xmm0
+pshufb SR,%xmm0
+
+# qhasm:     xmm1 ^= *(int128 *)(c + 1040)
+# asm 1: pxor 1040(<c=int64#5),<xmm1=int6464#2
+# asm 2: pxor 1040(<c=%r8),<xmm1=%xmm1
+pxor 1040(%r8),%xmm1
+
+# qhasm:     shuffle bytes of xmm1 by SR
+# asm 1: pshufb SR,<xmm1=int6464#2
+# asm 2: pshufb SR,<xmm1=%xmm1
+pshufb SR,%xmm1
+
+# qhasm:     xmm2 ^= *(int128 *)(c + 1056)
+# asm 1: pxor 1056(<c=int64#5),<xmm2=int6464#3
+# asm 2: pxor 1056(<c=%r8),<xmm2=%xmm2
+pxor 1056(%r8),%xmm2
+
+# qhasm:     shuffle bytes of xmm2 by SR
+# asm 1: pshufb SR,<xmm2=int6464#3
+# asm 2: pshufb SR,<xmm2=%xmm2
+pshufb SR,%xmm2
+
+# qhasm:     xmm3 ^= *(int128 *)(c + 1072)
+# asm 1: pxor 1072(<c=int64#5),<xmm3=int6464#4
+# asm 2: pxor 1072(<c=%r8),<xmm3=%xmm3
+pxor 1072(%r8),%xmm3
+
+# qhasm:     shuffle bytes of xmm3 by SR
+# asm 1: pshufb SR,<xmm3=int6464#4
+# asm 2: pshufb SR,<xmm3=%xmm3
+pshufb SR,%xmm3
+
+# qhasm:     xmm4 ^= *(int128 *)(c + 1088)
+# asm 1: pxor 1088(<c=int64#5),<xmm4=int6464#5
+# asm 2: pxor 1088(<c=%r8),<xmm4=%xmm4
+pxor 1088(%r8),%xmm4
+
+# qhasm:     shuffle bytes of xmm4 by SR
+# asm 1: pshufb SR,<xmm4=int6464#5
+# asm 2: pshufb SR,<xmm4=%xmm4
+pshufb SR,%xmm4
+
+# qhasm:     xmm5 ^= *(int128 *)(c + 1104)
+# asm 1: pxor 1104(<c=int64#5),<xmm5=int6464#6
+# asm 2: pxor 1104(<c=%r8),<xmm5=%xmm5
+pxor 1104(%r8),%xmm5
+
+# qhasm:     shuffle bytes of xmm5 by SR
+# asm 1: pshufb SR,<xmm5=int6464#6
+# asm 2: pshufb SR,<xmm5=%xmm5
+pshufb SR,%xmm5
+
+# qhasm:     xmm6 ^= *(int128 *)(c + 1120)
+# asm 1: pxor 1120(<c=int64#5),<xmm6=int6464#7
+# asm 2: pxor 1120(<c=%r8),<xmm6=%xmm6
+pxor 1120(%r8),%xmm6
+
+# qhasm:     shuffle bytes of xmm6 by SR
+# asm 1: pshufb SR,<xmm6=int6464#7
+# asm 2: pshufb SR,<xmm6=%xmm6
+pshufb SR,%xmm6
+
+# qhasm:     xmm7 ^= *(int128 *)(c + 1136)
+# asm 1: pxor 1136(<c=int64#5),<xmm7=int6464#8
+# asm 2: pxor 1136(<c=%r8),<xmm7=%xmm7
+pxor 1136(%r8),%xmm7
+
+# qhasm:     shuffle bytes of xmm7 by SR
+# asm 1: pshufb SR,<xmm7=int6464#8
+# asm 2: pshufb SR,<xmm7=%xmm7
+pshufb SR,%xmm7
+
+# qhasm:       xmm5 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm5=int6464#6
+# asm 2: pxor  <xmm6=%xmm6,<xmm5=%xmm5
+pxor  %xmm6,%xmm5
+
+# qhasm:       xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm1,<xmm2=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm5=int6464#6
+# asm 2: pxor  <xmm0=%xmm0,<xmm5=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm6=int6464#7
+# asm 2: pxor  <xmm2=%xmm2,<xmm6=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm3=int6464#4
+# asm 2: pxor  <xmm0=%xmm0,<xmm3=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#4
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm3
+pxor  %xmm7,%xmm3
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm3=int6464#4
+# asm 2: pxor  <xmm4=%xmm4,<xmm3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:       xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm7=int6464#8
+# asm 2: pxor  <xmm5=%xmm5,<xmm7=%xmm7
+pxor  %xmm5,%xmm7
+
+# qhasm:       xmm3 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm3=int6464#4
+# asm 2: pxor  <xmm1=%xmm1,<xmm3=%xmm3
+pxor  %xmm1,%xmm3
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm2=int6464#3
+# asm 2: pxor  <xmm7=%xmm7,<xmm2=%xmm2
+pxor  %xmm7,%xmm2
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm11 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm11=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm11=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:       xmm10 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm10=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm10=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:       xmm9 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm9=int6464#11
+# asm 2: movdqa <xmm5=%xmm5,>xmm9=%xmm10
+movdqa %xmm5,%xmm10
+
+# qhasm:       xmm13 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm13=int6464#12
+# asm 2: movdqa <xmm2=%xmm2,>xmm13=%xmm11
+movdqa %xmm2,%xmm11
+
+# qhasm:       xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#13
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm12
+movdqa %xmm6,%xmm12
+
+# qhasm:       xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:       xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:       xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm9=int6464#11
+# asm 2: pxor  <xmm3=%xmm3,<xmm9=%xmm10
+pxor  %xmm3,%xmm10
+
+# qhasm:       xmm13 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm13=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm13=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:       xmm14 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm14=int6464#14
+# asm 2: movdqa <xmm11=%xmm8,>xmm14=%xmm13
+movdqa %xmm8,%xmm13
+
+# qhasm:       xmm8 = xmm10
+# asm 1: movdqa <xmm10=int6464#10,>xmm8=int6464#15
+# asm 2: movdqa <xmm10=%xmm9,>xmm8=%xmm14
+movdqa %xmm9,%xmm14
+
+# qhasm:       xmm15 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm15=int6464#16
+# asm 2: movdqa <xmm11=%xmm8,>xmm15=%xmm15
+movdqa %xmm8,%xmm15
+
+# qhasm:       xmm10 |= xmm9
+# asm 1: por   <xmm9=int6464#11,<xmm10=int6464#10
+# asm 2: por   <xmm9=%xmm10,<xmm10=%xmm9
+por   %xmm10,%xmm9
+
+# qhasm:       xmm11 |= xmm12
+# asm 1: por   <xmm12=int6464#13,<xmm11=int6464#9
+# asm 2: por   <xmm12=%xmm12,<xmm11=%xmm8
+por   %xmm12,%xmm8
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm15=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm14 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm14=int6464#14
+# asm 2: pand  <xmm12=%xmm12,<xmm14=%xmm13
+pand  %xmm12,%xmm13
+
+# qhasm:       xmm8 &= xmm9
+# asm 1: pand  <xmm9=int6464#11,<xmm8=int6464#15
+# asm 2: pand  <xmm9=%xmm10,<xmm8=%xmm14
+pand  %xmm10,%xmm14
+
+# qhasm:       xmm12 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm9=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:       xmm15 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm15=int6464#16
+# asm 2: pand  <xmm12=%xmm12,<xmm15=%xmm15
+pand  %xmm12,%xmm15
+
+# qhasm:       xmm12 = xmm3
+# asm 1: movdqa <xmm3=int6464#4,>xmm12=int6464#11
+# asm 2: movdqa <xmm3=%xmm3,>xmm12=%xmm10
+movdqa %xmm3,%xmm10
+
+# qhasm:       xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#11,<xmm13=int6464#12
+# asm 2: pand  <xmm12=%xmm10,<xmm13=%xmm11
+pand  %xmm10,%xmm11
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm11=int6464#9
+# asm 2: pxor  <xmm13=%xmm11,<xmm11=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm13 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm13=int6464#11
+# asm 2: movdqa <xmm7=%xmm7,>xmm13=%xmm10
+movdqa %xmm7,%xmm10
+
+# qhasm:       xmm13 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm13=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm13=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm12 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm12=int6464#12
+# asm 2: movdqa <xmm5=%xmm5,>xmm12=%xmm11
+movdqa %xmm5,%xmm11
+
+# qhasm:       xmm9 = xmm13
+# asm 1: movdqa <xmm13=int6464#11,>xmm9=int6464#13
+# asm 2: movdqa <xmm13=%xmm10,>xmm9=%xmm12
+movdqa %xmm10,%xmm12
+
+# qhasm:       xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:       xmm9 |= xmm12
+# asm 1: por   <xmm12=int6464#12,<xmm9=int6464#13
+# asm 2: por   <xmm12=%xmm11,<xmm9=%xmm12
+por   %xmm11,%xmm12
+
+# qhasm:       xmm13 &= xmm12
+# asm 1: pand  <xmm12=int6464#12,<xmm13=int6464#11
+# asm 2: pand  <xmm12=%xmm11,<xmm13=%xmm10
+pand  %xmm11,%xmm10
+
+# qhasm:       xmm8 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#11,<xmm8=int6464#15
+# asm 2: pxor  <xmm13=%xmm10,<xmm8=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#9
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm10=int6464#10
+# asm 2: pxor  <xmm14=%xmm13,<xmm10=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm9 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm8 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm8=int6464#15
+# asm 2: pxor  <xmm14=%xmm13,<xmm8=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm12 = xmm2
+# asm 1: movdqa <xmm2=int6464#3,>xmm12=int6464#11
+# asm 2: movdqa <xmm2=%xmm2,>xmm12=%xmm10
+movdqa %xmm2,%xmm10
+
+# qhasm:       xmm13 = xmm4
+# asm 1: movdqa <xmm4=int6464#5,>xmm13=int6464#12
+# asm 2: movdqa <xmm4=%xmm4,>xmm13=%xmm11
+movdqa %xmm4,%xmm11
+
+# qhasm:       xmm14 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm14=int6464#14
+# asm 2: movdqa <xmm1=%xmm1,>xmm14=%xmm13
+movdqa %xmm1,%xmm13
+
+# qhasm:       xmm15 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm15=int6464#16
+# asm 2: movdqa <xmm7=%xmm7,>xmm15=%xmm15
+movdqa %xmm7,%xmm15
+
+# qhasm:       xmm12 &= xmm3
+# asm 1: pand  <xmm3=int6464#4,<xmm12=int6464#11
+# asm 2: pand  <xmm3=%xmm3,<xmm12=%xmm10
+pand  %xmm3,%xmm10
+
+# qhasm:       xmm13 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm13=int6464#12
+# asm 2: pand  <xmm0=%xmm0,<xmm13=%xmm11
+pand  %xmm0,%xmm11
+
+# qhasm:       xmm14 &= xmm5
+# asm 1: pand  <xmm5=int6464#6,<xmm14=int6464#14
+# asm 2: pand  <xmm5=%xmm5,<xmm14=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:       xmm15 |= xmm6
+# asm 1: por   <xmm6=int6464#7,<xmm15=int6464#16
+# asm 2: por   <xmm6=%xmm6,<xmm15=%xmm15
+por   %xmm6,%xmm15
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm11=int6464#9
+# asm 2: pxor  <xmm12=%xmm10,<xmm11=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#12,<xmm10=int6464#10
+# asm 2: pxor  <xmm13=%xmm11,<xmm10=%xmm9
+pxor  %xmm11,%xmm9
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#14,<xmm9=int6464#13
+# asm 2: pxor  <xmm14=%xmm13,<xmm9=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm8 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm8=int6464#15
+# asm 2: pxor  <xmm15=%xmm15,<xmm8=%xmm14
+pxor  %xmm15,%xmm14
+
+# qhasm:       xmm12 = xmm11
+# asm 1: movdqa <xmm11=int6464#9,>xmm12=int6464#11
+# asm 2: movdqa <xmm11=%xmm8,>xmm12=%xmm10
+movdqa %xmm8,%xmm10
+
+# qhasm:       xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm12=int6464#11
+# asm 2: pxor  <xmm10=%xmm9,<xmm12=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm11 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm11=int6464#9
+# asm 2: pand  <xmm9=%xmm12,<xmm11=%xmm8
+pand  %xmm12,%xmm8
+
+# qhasm:       xmm14 = xmm8
+# asm 1: movdqa <xmm8=int6464#15,>xmm14=int6464#12
+# asm 2: movdqa <xmm8=%xmm14,>xmm14=%xmm11
+movdqa %xmm14,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm11=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm15 = xmm12
+# asm 1: movdqa <xmm12=int6464#11,>xmm15=int6464#14
+# asm 2: movdqa <xmm12=%xmm10,>xmm15=%xmm13
+movdqa %xmm10,%xmm13
+
+# qhasm:       xmm15 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm15=int6464#14
+# asm 2: pand  <xmm14=%xmm11,<xmm15=%xmm13
+pand  %xmm11,%xmm13
+
+# qhasm:       xmm15 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm15=int6464#14
+# asm 2: pxor  <xmm10=%xmm9,<xmm15=%xmm13
+pxor  %xmm9,%xmm13
+
+# qhasm:       xmm13 = xmm9
+# asm 1: movdqa <xmm9=int6464#13,>xmm13=int6464#16
+# asm 2: movdqa <xmm9=%xmm12,>xmm13=%xmm15
+movdqa %xmm12,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm11 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#10,<xmm11=int6464#9
+# asm 2: pxor  <xmm10=%xmm9,<xmm11=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm13 &= xmm11
+# asm 1: pand  <xmm11=int6464#9,<xmm13=int6464#16
+# asm 2: pand  <xmm11=%xmm8,<xmm13=%xmm15
+pand  %xmm8,%xmm15
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#15,<xmm13=int6464#16
+# asm 2: pxor  <xmm8=%xmm14,<xmm13=%xmm15
+pxor  %xmm14,%xmm15
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm9=int6464#13
+# asm 2: pxor  <xmm13=%xmm15,<xmm9=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm10 = xmm14
+# asm 1: movdqa <xmm14=int6464#12,>xmm10=int6464#9
+# asm 2: movdqa <xmm14=%xmm11,>xmm10=%xmm8
+movdqa %xmm11,%xmm8
+
+# qhasm:       xmm10 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm10=int6464#9
+# asm 2: pxor  <xmm13=%xmm15,<xmm10=%xmm8
+pxor  %xmm15,%xmm8
+
+# qhasm:       xmm10 &= xmm8
+# asm 1: pand  <xmm8=int6464#15,<xmm10=int6464#9
+# asm 2: pand  <xmm8=%xmm14,<xmm10=%xmm8
+pand  %xmm14,%xmm8
+
+# qhasm:       xmm9 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm9=int6464#13
+# asm 2: pxor  <xmm10=%xmm8,<xmm9=%xmm12
+pxor  %xmm8,%xmm12
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#9,<xmm14=int6464#12
+# asm 2: pxor  <xmm10=%xmm8,<xmm14=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm14=int6464#12
+# asm 2: pand  <xmm15=%xmm13,<xmm14=%xmm11
+pand  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#11,<xmm14=int6464#12
+# asm 2: pxor  <xmm12=%xmm10,<xmm14=%xmm11
+pxor  %xmm10,%xmm11
+
+# qhasm:         xmm12 = xmm6
+# asm 1: movdqa <xmm6=int6464#7,>xmm12=int6464#9
+# asm 2: movdqa <xmm6=%xmm6,>xmm12=%xmm8
+movdqa %xmm6,%xmm8
+
+# qhasm:         xmm8 = xmm5
+# asm 1: movdqa <xmm5=int6464#6,>xmm8=int6464#10
+# asm 2: movdqa <xmm5=%xmm5,>xmm8=%xmm9
+movdqa %xmm5,%xmm9
+
+# qhasm:           xmm10 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm10=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm10=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm10 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm10 &= xmm6
+# asm 1: pand  <xmm6=int6464#7,<xmm10=int6464#11
+# asm 2: pand  <xmm6=%xmm6,<xmm10=%xmm10
+pand  %xmm6,%xmm10
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm6 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm6=int6464#7
+# asm 2: pand  <xmm14=%xmm11,<xmm6=%xmm6
+pand  %xmm11,%xmm6
+
+# qhasm:           xmm5 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm5=int6464#6
+# asm 2: pand  <xmm15=%xmm13,<xmm5=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:           xmm6 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm6=int6464#7
+# asm 2: pxor  <xmm5=%xmm5,<xmm6=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:           xmm5 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm5=int6464#6
+# asm 2: pxor  <xmm10=%xmm10,<xmm5=%xmm5
+pxor  %xmm10,%xmm5
+
+# qhasm:         xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm8 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm8=int6464#10
+# asm 2: pxor  <xmm3=%xmm3,<xmm8=%xmm9
+pxor  %xmm3,%xmm9
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm0
+# asm 1: pand  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pand  <xmm0=%xmm0,<xmm10=%xmm10
+pand  %xmm0,%xmm10
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm0=int6464#1
+# asm 2: pand  <xmm9=%xmm12,<xmm0=%xmm0
+pand  %xmm12,%xmm0
+
+# qhasm:           xmm3 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm3=int6464#4
+# asm 2: pand  <xmm13=%xmm15,<xmm3=%xmm3
+pand  %xmm15,%xmm3
+
+# qhasm:           xmm0 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm0=int6464#1
+# asm 2: pxor  <xmm3=%xmm3,<xmm0=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:           xmm3 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm3=int6464#4
+# asm 2: pxor  <xmm10=%xmm10,<xmm3=%xmm3
+pxor  %xmm10,%xmm3
+
+# qhasm:         xmm6 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm6=int6464#7
+# asm 2: pxor  <xmm12=%xmm8,<xmm6=%xmm6
+pxor  %xmm8,%xmm6
+
+# qhasm:         xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm5 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm5=int6464#6
+# asm 2: pxor  <xmm8=%xmm9,<xmm5=%xmm5
+pxor  %xmm9,%xmm5
+
+# qhasm:         xmm3 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm3=int6464#4
+# asm 2: pxor  <xmm8=%xmm9,<xmm3=%xmm3
+pxor  %xmm9,%xmm3
+
+# qhasm:         xmm12 = xmm7
+# asm 1: movdqa <xmm7=int6464#8,>xmm12=int6464#9
+# asm 2: movdqa <xmm7=%xmm7,>xmm12=%xmm8
+movdqa %xmm7,%xmm8
+
+# qhasm:         xmm8 = xmm1
+# asm 1: movdqa <xmm1=int6464#2,>xmm8=int6464#10
+# asm 2: movdqa <xmm1=%xmm1,>xmm8=%xmm9
+movdqa %xmm1,%xmm9
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm12=int6464#9
+# asm 2: pxor  <xmm4=%xmm4,<xmm12=%xmm8
+pxor  %xmm4,%xmm8
+
+# qhasm:         xmm8 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm8=int6464#10
+# asm 2: pxor  <xmm2=%xmm2,<xmm8=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm12
+# asm 1: pand  <xmm12=int6464#9,<xmm11=int6464#11
+# asm 2: pand  <xmm12=%xmm8,<xmm11=%xmm10
+pand  %xmm8,%xmm10
+
+# qhasm:           xmm12 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm12=int6464#9
+# asm 2: pxor  <xmm8=%xmm9,<xmm12=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:           xmm12 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm12=int6464#9
+# asm 2: pand  <xmm14=%xmm11,<xmm12=%xmm8
+pand  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm8=int6464#10
+# asm 2: pand  <xmm15=%xmm13,<xmm8=%xmm9
+pand  %xmm13,%xmm9
+
+# qhasm:           xmm8 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm8=int6464#10
+# asm 2: pxor  <xmm12=%xmm8,<xmm8=%xmm9
+pxor  %xmm8,%xmm9
+
+# qhasm:           xmm12 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm12=int6464#9
+# asm 2: pxor  <xmm11=%xmm10,<xmm12=%xmm8
+pxor  %xmm10,%xmm8
+
+# qhasm:           xmm10 = xmm13
+# asm 1: movdqa <xmm13=int6464#16,>xmm10=int6464#11
+# asm 2: movdqa <xmm13=%xmm15,>xmm10=%xmm10
+movdqa %xmm15,%xmm10
+
+# qhasm:           xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm12,<xmm10=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm:           xmm10 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pand  <xmm4=%xmm4,<xmm10=%xmm10
+pand  %xmm4,%xmm10
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm4 &= xmm9
+# asm 1: pand  <xmm9=int6464#13,<xmm4=int6464#5
+# asm 2: pand  <xmm9=%xmm12,<xmm4=%xmm4
+pand  %xmm12,%xmm4
+
+# qhasm:           xmm2 &= xmm13
+# asm 1: pand  <xmm13=int6464#16,<xmm2=int6464#3
+# asm 2: pand  <xmm13=%xmm15,<xmm2=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm2=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:           xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#3
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm2
+pxor  %xmm10,%xmm2
+
+# qhasm:         xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#16,<xmm15=int6464#14
+# asm 2: pxor  <xmm13=%xmm15,<xmm15=%xmm13
+pxor  %xmm15,%xmm13
+
+# qhasm:         xmm14 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#13,<xmm14=int6464#12
+# asm 2: pxor  <xmm9=%xmm12,<xmm14=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:           xmm11 = xmm15
+# asm 1: movdqa <xmm15=int6464#14,>xmm11=int6464#11
+# asm 2: movdqa <xmm15=%xmm13,>xmm11=%xmm10
+movdqa %xmm13,%xmm10
+
+# qhasm:           xmm11 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#12,<xmm11=int6464#11
+# asm 2: pxor  <xmm14=%xmm11,<xmm11=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:           xmm11 &= xmm7
+# asm 1: pand  <xmm7=int6464#8,<xmm11=int6464#11
+# asm 2: pand  <xmm7=%xmm7,<xmm11=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm7 &= xmm14
+# asm 1: pand  <xmm14=int6464#12,<xmm7=int6464#8
+# asm 2: pand  <xmm14=%xmm11,<xmm7=%xmm7
+pand  %xmm11,%xmm7
+
+# qhasm:           xmm1 &= xmm15
+# asm 1: pand  <xmm15=int6464#14,<xmm1=int6464#2
+# asm 2: pand  <xmm15=%xmm13,<xmm1=%xmm1
+pand  %xmm13,%xmm1
+
+# qhasm:           xmm7 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm7=int6464#8
+# asm 2: pxor  <xmm1=%xmm1,<xmm7=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm:           xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#11,<xmm1=int6464#2
+# asm 2: pxor  <xmm11=%xmm10,<xmm1=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:         xmm7 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm7=int6464#8
+# asm 2: pxor  <xmm12=%xmm8,<xmm7=%xmm7
+pxor  %xmm8,%xmm7
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm12=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:         xmm1 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm8=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:         xmm2 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#10,<xmm2=int6464#3
+# asm 2: pxor  <xmm8=%xmm9,<xmm2=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm0,<xmm7=%xmm7
+pxor  %xmm0,%xmm7
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm1=int6464#2
+# asm 2: pxor  <xmm6=%xmm6,<xmm1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm:       xmm4 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm4=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm4=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm6 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm6=int6464#7
+# asm 2: pxor  <xmm0=%xmm0,<xmm6=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:       xmm0 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm0=int6464#1
+# asm 2: pxor  <xmm1=%xmm1,<xmm0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm1=int6464#2
+# asm 2: pxor  <xmm5=%xmm5,<xmm1=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm5 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm5=int6464#6
+# asm 2: pxor  <xmm2=%xmm2,<xmm5=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm:       xmm4 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm4=int6464#5
+# asm 2: pxor  <xmm5=%xmm5,<xmm4=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm2 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm3=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm3=int6464#4
+# asm 2: pxor  <xmm5=%xmm5,<xmm3=%xmm3
+pxor  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm6=int6464#7
+# asm 2: pxor  <xmm3=%xmm3,<xmm6=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm:     xmm8 = shuffle dwords of xmm0 by 0x93
+# asm 1: pshufd $0x93,<xmm0=int6464#1,>xmm8=int6464#9
+# asm 2: pshufd $0x93,<xmm0=%xmm0,>xmm8=%xmm8
+pshufd $0x93,%xmm0,%xmm8
+
+# qhasm:     xmm9 = shuffle dwords of xmm1 by 0x93
+# asm 1: pshufd $0x93,<xmm1=int6464#2,>xmm9=int6464#10
+# asm 2: pshufd $0x93,<xmm1=%xmm1,>xmm9=%xmm9
+pshufd $0x93,%xmm1,%xmm9
+
+# qhasm:     xmm10 = shuffle dwords of xmm4 by 0x93
+# asm 1: pshufd $0x93,<xmm4=int6464#5,>xmm10=int6464#11
+# asm 2: pshufd $0x93,<xmm4=%xmm4,>xmm10=%xmm10
+pshufd $0x93,%xmm4,%xmm10
+
+# qhasm:     xmm11 = shuffle dwords of xmm6 by 0x93
+# asm 1: pshufd $0x93,<xmm6=int6464#7,>xmm11=int6464#12
+# asm 2: pshufd $0x93,<xmm6=%xmm6,>xmm11=%xmm11
+pshufd $0x93,%xmm6,%xmm11
+
+# qhasm:     xmm12 = shuffle dwords of xmm3 by 0x93
+# asm 1: pshufd $0x93,<xmm3=int6464#4,>xmm12=int6464#13
+# asm 2: pshufd $0x93,<xmm3=%xmm3,>xmm12=%xmm12
+pshufd $0x93,%xmm3,%xmm12
+
+# qhasm:     xmm13 = shuffle dwords of xmm7 by 0x93
+# asm 1: pshufd $0x93,<xmm7=int6464#8,>xmm13=int6464#14
+# asm 2: pshufd $0x93,<xmm7=%xmm7,>xmm13=%xmm13
+pshufd $0x93,%xmm7,%xmm13
+
+# qhasm:     xmm14 = shuffle dwords of xmm2 by 0x93
+# asm 1: pshufd $0x93,<xmm2=int6464#3,>xmm14=int6464#15
+# asm 2: pshufd $0x93,<xmm2=%xmm2,>xmm14=%xmm14
+pshufd $0x93,%xmm2,%xmm14
+
+# qhasm:     xmm15 = shuffle dwords of xmm5 by 0x93
+# asm 1: pshufd $0x93,<xmm5=int6464#6,>xmm15=int6464#16
+# asm 2: pshufd $0x93,<xmm5=%xmm5,>xmm15=%xmm15
+pshufd $0x93,%xmm5,%xmm15
+
+# qhasm:     xmm0 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm0=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm0=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:     xmm1 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm1=int6464#2
+# asm 2: pxor  <xmm9=%xmm9,<xmm1=%xmm1
+pxor  %xmm9,%xmm1
+
+# qhasm:     xmm4 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm4=int6464#5
+# asm 2: pxor  <xmm10=%xmm10,<xmm4=%xmm4
+pxor  %xmm10,%xmm4
+
+# qhasm:     xmm6 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm6=int6464#7
+# asm 2: pxor  <xmm11=%xmm11,<xmm6=%xmm6
+pxor  %xmm11,%xmm6
+
+# qhasm:     xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:     xmm7 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm7=int6464#8
+# asm 2: pxor  <xmm13=%xmm13,<xmm7=%xmm7
+pxor  %xmm13,%xmm7
+
+# qhasm:     xmm2 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pxor  <xmm14=%xmm14,<xmm2=%xmm2
+pxor  %xmm14,%xmm2
+
+# qhasm:     xmm5 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm5=int6464#6
+# asm 2: pxor  <xmm15=%xmm15,<xmm5=%xmm5
+pxor  %xmm15,%xmm5
+
+# qhasm:     xmm8 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm8=int6464#9
+# asm 2: pxor  <xmm5=%xmm5,<xmm8=%xmm8
+pxor  %xmm5,%xmm8
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm10 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm1=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:     xmm9 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm9=int6464#10
+# asm 2: pxor  <xmm5=%xmm5,<xmm9=%xmm9
+pxor  %xmm5,%xmm9
+
+# qhasm:     xmm11 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm11=int6464#12
+# asm 2: pxor  <xmm4=%xmm4,<xmm11=%xmm11
+pxor  %xmm4,%xmm11
+
+# qhasm:     xmm12 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm12=int6464#13
+# asm 2: pxor  <xmm6=%xmm6,<xmm12=%xmm12
+pxor  %xmm6,%xmm12
+
+# qhasm:     xmm13 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm13=int6464#14
+# asm 2: pxor  <xmm3=%xmm3,<xmm13=%xmm13
+pxor  %xmm3,%xmm13
+
+# qhasm:     xmm11 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm11=int6464#12
+# asm 2: pxor  <xmm5=%xmm5,<xmm11=%xmm11
+pxor  %xmm5,%xmm11
+
+# qhasm:     xmm14 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm14=int6464#15
+# asm 2: pxor  <xmm7=%xmm7,<xmm14=%xmm14
+pxor  %xmm7,%xmm14
+
+# qhasm:     xmm15 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm15=int6464#16
+# asm 2: pxor  <xmm2=%xmm2,<xmm15=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm:     xmm12 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm12=int6464#13
+# asm 2: pxor  <xmm5=%xmm5,<xmm12=%xmm12
+pxor  %xmm5,%xmm12
+
+# qhasm:     xmm0 = shuffle dwords of xmm0 by 0x4E
+# asm 1: pshufd $0x4E,<xmm0=int6464#1,>xmm0=int6464#1
+# asm 2: pshufd $0x4E,<xmm0=%xmm0,>xmm0=%xmm0
+pshufd $0x4E,%xmm0,%xmm0
+
+# qhasm:     xmm1 = shuffle dwords of xmm1 by 0x4E
+# asm 1: pshufd $0x4E,<xmm1=int6464#2,>xmm1=int6464#2
+# asm 2: pshufd $0x4E,<xmm1=%xmm1,>xmm1=%xmm1
+pshufd $0x4E,%xmm1,%xmm1
+
+# qhasm:     xmm4 = shuffle dwords of xmm4 by 0x4E
+# asm 1: pshufd $0x4E,<xmm4=int6464#5,>xmm4=int6464#5
+# asm 2: pshufd $0x4E,<xmm4=%xmm4,>xmm4=%xmm4
+pshufd $0x4E,%xmm4,%xmm4
+
+# qhasm:     xmm6 = shuffle dwords of xmm6 by 0x4E
+# asm 1: pshufd $0x4E,<xmm6=int6464#7,>xmm6=int6464#7
+# asm 2: pshufd $0x4E,<xmm6=%xmm6,>xmm6=%xmm6
+pshufd $0x4E,%xmm6,%xmm6
+
+# qhasm:     xmm3 = shuffle dwords of xmm3 by 0x4E
+# asm 1: pshufd $0x4E,<xmm3=int6464#4,>xmm3=int6464#4
+# asm 2: pshufd $0x4E,<xmm3=%xmm3,>xmm3=%xmm3
+pshufd $0x4E,%xmm3,%xmm3
+
+# qhasm:     xmm7 = shuffle dwords of xmm7 by 0x4E
+# asm 1: pshufd $0x4E,<xmm7=int6464#8,>xmm7=int6464#8
+# asm 2: pshufd $0x4E,<xmm7=%xmm7,>xmm7=%xmm7
+pshufd $0x4E,%xmm7,%xmm7
+
+# qhasm:     xmm2 = shuffle dwords of xmm2 by 0x4E
+# asm 1: pshufd $0x4E,<xmm2=int6464#3,>xmm2=int6464#3
+# asm 2: pshufd $0x4E,<xmm2=%xmm2,>xmm2=%xmm2
+pshufd $0x4E,%xmm2,%xmm2
+
+# qhasm:     xmm5 = shuffle dwords of xmm5 by 0x4E
+# asm 1: pshufd $0x4E,<xmm5=int6464#6,>xmm5=int6464#6
+# asm 2: pshufd $0x4E,<xmm5=%xmm5,>xmm5=%xmm5
+pshufd $0x4E,%xmm5,%xmm5
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm9 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm1=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:     xmm10 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#5,<xmm10=int6464#11
+# asm 2: pxor  <xmm4=%xmm4,<xmm10=%xmm10
+pxor  %xmm4,%xmm10
+
+# qhasm:     xmm11 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#7,<xmm11=int6464#12
+# asm 2: pxor  <xmm6=%xmm6,<xmm11=%xmm11
+pxor  %xmm6,%xmm11
+
+# qhasm:     xmm12 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#4,<xmm12=int6464#13
+# asm 2: pxor  <xmm3=%xmm3,<xmm12=%xmm12
+pxor  %xmm3,%xmm12
+
+# qhasm:     xmm13 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm13=int6464#14
+# asm 2: pxor  <xmm7=%xmm7,<xmm13=%xmm13
+pxor  %xmm7,%xmm13
+
+# qhasm:     xmm14 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm14=int6464#15
+# asm 2: pxor  <xmm2=%xmm2,<xmm14=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm:     xmm15 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#6,<xmm15=int6464#16
+# asm 2: pxor  <xmm5=%xmm5,<xmm15=%xmm15
+pxor  %xmm5,%xmm15
+
+# qhasm:     xmm8 ^= *(int128 *)(c + 1152)
+# asm 1: pxor 1152(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 1152(<c=%r8),<xmm8=%xmm8
+pxor 1152(%r8),%xmm8
+
+# qhasm:     shuffle bytes of xmm8 by SRM0
+# asm 1: pshufb SRM0,<xmm8=int6464#9
+# asm 2: pshufb SRM0,<xmm8=%xmm8
+pshufb SRM0,%xmm8
+
+# qhasm:     xmm9 ^= *(int128 *)(c + 1168)
+# asm 1: pxor 1168(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 1168(<c=%r8),<xmm9=%xmm9
+pxor 1168(%r8),%xmm9
+
+# qhasm:     shuffle bytes of xmm9 by SRM0
+# asm 1: pshufb SRM0,<xmm9=int6464#10
+# asm 2: pshufb SRM0,<xmm9=%xmm9
+pshufb SRM0,%xmm9
+
+# qhasm:     xmm10 ^= *(int128 *)(c + 1184)
+# asm 1: pxor 1184(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 1184(<c=%r8),<xmm10=%xmm10
+pxor 1184(%r8),%xmm10
+
+# qhasm:     shuffle bytes of xmm10 by SRM0
+# asm 1: pshufb SRM0,<xmm10=int6464#11
+# asm 2: pshufb SRM0,<xmm10=%xmm10
+pshufb SRM0,%xmm10
+
+# qhasm:     xmm11 ^= *(int128 *)(c + 1200)
+# asm 1: pxor 1200(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 1200(<c=%r8),<xmm11=%xmm11
+pxor 1200(%r8),%xmm11
+
+# qhasm:     shuffle bytes of xmm11 by SRM0
+# asm 1: pshufb SRM0,<xmm11=int6464#12
+# asm 2: pshufb SRM0,<xmm11=%xmm11
+pshufb SRM0,%xmm11
+
+# qhasm:     xmm12 ^= *(int128 *)(c + 1216)
+# asm 1: pxor 1216(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 1216(<c=%r8),<xmm12=%xmm12
+pxor 1216(%r8),%xmm12
+
+# qhasm:     shuffle bytes of xmm12 by SRM0
+# asm 1: pshufb SRM0,<xmm12=int6464#13
+# asm 2: pshufb SRM0,<xmm12=%xmm12
+pshufb SRM0,%xmm12
+
+# qhasm:     xmm13 ^= *(int128 *)(c + 1232)
+# asm 1: pxor 1232(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 1232(<c=%r8),<xmm13=%xmm13
+pxor 1232(%r8),%xmm13
+
+# qhasm:     shuffle bytes of xmm13 by SRM0
+# asm 1: pshufb SRM0,<xmm13=int6464#14
+# asm 2: pshufb SRM0,<xmm13=%xmm13
+pshufb SRM0,%xmm13
+
+# qhasm:     xmm14 ^= *(int128 *)(c + 1248)
+# asm 1: pxor 1248(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 1248(<c=%r8),<xmm14=%xmm14
+pxor 1248(%r8),%xmm14
+
+# qhasm:     shuffle bytes of xmm14 by SRM0
+# asm 1: pshufb SRM0,<xmm14=int6464#15
+# asm 2: pshufb SRM0,<xmm14=%xmm14
+pshufb SRM0,%xmm14
+
+# qhasm:     xmm15 ^= *(int128 *)(c + 1264)
+# asm 1: pxor 1264(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 1264(<c=%r8),<xmm15=%xmm15
+pxor 1264(%r8),%xmm15
+
+# qhasm:     shuffle bytes of xmm15 by SRM0
+# asm 1: pshufb SRM0,<xmm15=int6464#16
+# asm 2: pshufb SRM0,<xmm15=%xmm15
+pshufb SRM0,%xmm15
+
+# qhasm:       xmm13 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm13=int6464#14
+# asm 2: pxor  <xmm14=%xmm14,<xmm13=%xmm13
+pxor  %xmm14,%xmm13
+
+# qhasm:       xmm10 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm10=int6464#11
+# asm 2: pxor  <xmm9=%xmm9,<xmm10=%xmm10
+pxor  %xmm9,%xmm10
+
+# qhasm:       xmm13 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm13=int6464#14
+# asm 2: pxor  <xmm8=%xmm8,<xmm13=%xmm13
+pxor  %xmm8,%xmm13
+
+# qhasm:       xmm14 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm14=int6464#15
+# asm 2: pxor  <xmm10=%xmm10,<xmm14=%xmm14
+pxor  %xmm10,%xmm14
+
+# qhasm:       xmm11 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm11=int6464#12
+# asm 2: pxor  <xmm8=%xmm8,<xmm11=%xmm11
+pxor  %xmm8,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:       xmm11 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm11=int6464#12
+# asm 2: pxor  <xmm15=%xmm15,<xmm11=%xmm11
+pxor  %xmm15,%xmm11
+
+# qhasm:       xmm11 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm11=int6464#12
+# asm 2: pxor  <xmm12=%xmm12,<xmm11=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm:       xmm15 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm15=int6464#16
+# asm 2: pxor  <xmm13=%xmm13,<xmm15=%xmm15
+pxor  %xmm13,%xmm15
+
+# qhasm:       xmm11 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm11=int6464#12
+# asm 2: pxor  <xmm9=%xmm9,<xmm11=%xmm11
+pxor  %xmm9,%xmm11
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm10=int6464#11
+# asm 2: pxor  <xmm15=%xmm15,<xmm10=%xmm10
+pxor  %xmm15,%xmm10
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm3 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm3=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm3=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:       xmm2 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm2=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm2=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:       xmm1 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm1=int6464#3
+# asm 2: movdqa <xmm13=%xmm13,>xmm1=%xmm2
+movdqa %xmm13,%xmm2
+
+# qhasm:       xmm5 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm5=int6464#4
+# asm 2: movdqa <xmm10=%xmm10,>xmm5=%xmm3
+movdqa %xmm10,%xmm3
+
+# qhasm:       xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#5
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm4
+movdqa %xmm14,%xmm4
+
+# qhasm:       xmm3 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm3=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm3=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:       xmm2 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm2=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm2=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:       xmm1 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm1=int6464#3
+# asm 2: pxor  <xmm11=%xmm11,<xmm1=%xmm2
+pxor  %xmm11,%xmm2
+
+# qhasm:       xmm5 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm5=int6464#4
+# asm 2: pxor  <xmm12=%xmm12,<xmm5=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#5
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm4
+pxor  %xmm8,%xmm4
+
+# qhasm:       xmm6 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm6=int6464#6
+# asm 2: movdqa <xmm3=%xmm0,>xmm6=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:       xmm0 = xmm2
+# asm 1: movdqa <xmm2=int6464#2,>xmm0=int6464#7
+# asm 2: movdqa <xmm2=%xmm1,>xmm0=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm:       xmm7 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm7=int6464#8
+# asm 2: movdqa <xmm3=%xmm0,>xmm7=%xmm7
+movdqa %xmm0,%xmm7
+
+# qhasm:       xmm2 |= xmm1
+# asm 1: por   <xmm1=int6464#3,<xmm2=int6464#2
+# asm 2: por   <xmm1=%xmm2,<xmm2=%xmm1
+por   %xmm2,%xmm1
+
+# qhasm:       xmm3 |= xmm4
+# asm 1: por   <xmm4=int6464#5,<xmm3=int6464#1
+# asm 2: por   <xmm4=%xmm4,<xmm3=%xmm0
+por   %xmm4,%xmm0
+
+# qhasm:       xmm7 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm7=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm7=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm6 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm6=int6464#6
+# asm 2: pand  <xmm4=%xmm4,<xmm6=%xmm5
+pand  %xmm4,%xmm5
+
+# qhasm:       xmm0 &= xmm1
+# asm 1: pand  <xmm1=int6464#3,<xmm0=int6464#7
+# asm 2: pand  <xmm1=%xmm2,<xmm0=%xmm6
+pand  %xmm2,%xmm6
+
+# qhasm:       xmm4 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#3,<xmm4=int6464#5
+# asm 2: pxor  <xmm1=%xmm2,<xmm4=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm:       xmm7 &= xmm4
+# asm 1: pand  <xmm4=int6464#5,<xmm7=int6464#8
+# asm 2: pand  <xmm4=%xmm4,<xmm7=%xmm7
+pand  %xmm4,%xmm7
+
+# qhasm:       xmm4 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm4=int6464#3
+# asm 2: movdqa <xmm11=%xmm11,>xmm4=%xmm2
+movdqa %xmm11,%xmm2
+
+# qhasm:       xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#3
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm2
+pxor  %xmm8,%xmm2
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#3,<xmm5=int6464#4
+# asm 2: pand  <xmm4=%xmm2,<xmm5=%xmm3
+pand  %xmm2,%xmm3
+
+# qhasm:       xmm3 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm3=int6464#1
+# asm 2: pxor  <xmm5=%xmm3,<xmm3=%xmm0
+pxor  %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm5 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm5=int6464#3
+# asm 2: movdqa <xmm15=%xmm15,>xmm5=%xmm2
+movdqa %xmm15,%xmm2
+
+# qhasm:       xmm5 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm5=int6464#3
+# asm 2: pxor  <xmm9=%xmm9,<xmm5=%xmm2
+pxor  %xmm9,%xmm2
+
+# qhasm:       xmm4 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm4=int6464#4
+# asm 2: movdqa <xmm13=%xmm13,>xmm4=%xmm3
+movdqa %xmm13,%xmm3
+
+# qhasm:       xmm1 = xmm5
+# asm 1: movdqa <xmm5=int6464#3,>xmm1=int6464#5
+# asm 2: movdqa <xmm5=%xmm2,>xmm1=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm:       xmm4 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm4=int6464#4
+# asm 2: pxor  <xmm14=%xmm14,<xmm4=%xmm3
+pxor  %xmm14,%xmm3
+
+# qhasm:       xmm1 |= xmm4
+# asm 1: por   <xmm4=int6464#4,<xmm1=int6464#5
+# asm 2: por   <xmm4=%xmm3,<xmm1=%xmm4
+por   %xmm3,%xmm4
+
+# qhasm:       xmm5 &= xmm4
+# asm 1: pand  <xmm4=int6464#4,<xmm5=int6464#3
+# asm 2: pand  <xmm4=%xmm3,<xmm5=%xmm2
+pand  %xmm3,%xmm2
+
+# qhasm:       xmm0 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#3,<xmm0=int6464#7
+# asm 2: pxor  <xmm5=%xmm2,<xmm0=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:       xmm3 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm3=int6464#1
+# asm 2: pxor  <xmm7=%xmm7,<xmm3=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm2=int6464#2
+# asm 2: pxor  <xmm6=%xmm5,<xmm2=%xmm1
+pxor  %xmm5,%xmm1
+
+# qhasm:       xmm1 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm7=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm0 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm0=int6464#7
+# asm 2: pxor  <xmm6=%xmm5,<xmm0=%xmm6
+pxor  %xmm5,%xmm6
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm4 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm4=int6464#3
+# asm 2: movdqa <xmm10=%xmm10,>xmm4=%xmm2
+movdqa %xmm10,%xmm2
+
+# qhasm:       xmm5 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm5=int6464#4
+# asm 2: movdqa <xmm12=%xmm12,>xmm5=%xmm3
+movdqa %xmm12,%xmm3
+
+# qhasm:       xmm6 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm6=int6464#6
+# asm 2: movdqa <xmm9=%xmm9,>xmm6=%xmm5
+movdqa %xmm9,%xmm5
+
+# qhasm:       xmm7 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm7=int6464#8
+# asm 2: movdqa <xmm15=%xmm15,>xmm7=%xmm7
+movdqa %xmm15,%xmm7
+
+# qhasm:       xmm4 &= xmm11
+# asm 1: pand  <xmm11=int6464#12,<xmm4=int6464#3
+# asm 2: pand  <xmm11=%xmm11,<xmm4=%xmm2
+pand  %xmm11,%xmm2
+
+# qhasm:       xmm5 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm5=int6464#4
+# asm 2: pand  <xmm8=%xmm8,<xmm5=%xmm3
+pand  %xmm8,%xmm3
+
+# qhasm:       xmm6 &= xmm13
+# asm 1: pand  <xmm13=int6464#14,<xmm6=int6464#6
+# asm 2: pand  <xmm13=%xmm13,<xmm6=%xmm5
+pand  %xmm13,%xmm5
+
+# qhasm:       xmm7 |= xmm14
+# asm 1: por   <xmm14=int6464#15,<xmm7=int6464#8
+# asm 2: por   <xmm14=%xmm14,<xmm7=%xmm7
+por   %xmm14,%xmm7
+
+# qhasm:       xmm3 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm3=int6464#1
+# asm 2: pxor  <xmm4=%xmm2,<xmm3=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#4,<xmm2=int6464#2
+# asm 2: pxor  <xmm5=%xmm3,<xmm2=%xmm1
+pxor  %xmm3,%xmm1
+
+# qhasm:       xmm1 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#6,<xmm1=int6464#5
+# asm 2: pxor  <xmm6=%xmm5,<xmm1=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm:       xmm0 ^= xmm7
+# asm 1: pxor  <xmm7=int6464#8,<xmm0=int6464#7
+# asm 2: pxor  <xmm7=%xmm7,<xmm0=%xmm6
+pxor  %xmm7,%xmm6
+
+# qhasm:       xmm4 = xmm3
+# asm 1: movdqa <xmm3=int6464#1,>xmm4=int6464#3
+# asm 2: movdqa <xmm3=%xmm0,>xmm4=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm:       xmm4 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm4=int6464#3
+# asm 2: pxor  <xmm2=%xmm1,<xmm4=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm:       xmm3 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm3=int6464#1
+# asm 2: pand  <xmm1=%xmm4,<xmm3=%xmm0
+pand  %xmm4,%xmm0
+
+# qhasm:       xmm6 = xmm0
+# asm 1: movdqa <xmm0=int6464#7,>xmm6=int6464#4
+# asm 2: movdqa <xmm0=%xmm6,>xmm6=%xmm3
+movdqa %xmm6,%xmm3
+
+# qhasm:       xmm6 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm3=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm7 = xmm4
+# asm 1: movdqa <xmm4=int6464#3,>xmm7=int6464#6
+# asm 2: movdqa <xmm4=%xmm2,>xmm7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:       xmm7 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm7=int6464#6
+# asm 2: pand  <xmm6=%xmm3,<xmm7=%xmm5
+pand  %xmm3,%xmm5
+
+# qhasm:       xmm7 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm7=int6464#6
+# asm 2: pxor  <xmm2=%xmm1,<xmm7=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:       xmm5 = xmm1
+# asm 1: movdqa <xmm1=int6464#5,>xmm5=int6464#8
+# asm 2: movdqa <xmm1=%xmm4,>xmm5=%xmm7
+movdqa %xmm4,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm3 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#2,<xmm3=int6464#1
+# asm 2: pxor  <xmm2=%xmm1,<xmm3=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:       xmm5 &= xmm3
+# asm 1: pand  <xmm3=int6464#1,<xmm5=int6464#8
+# asm 2: pand  <xmm3=%xmm0,<xmm5=%xmm7
+pand  %xmm0,%xmm7
+
+# qhasm:       xmm5 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#7,<xmm5=int6464#8
+# asm 2: pxor  <xmm0=%xmm6,<xmm5=%xmm7
+pxor  %xmm6,%xmm7
+
+# qhasm:       xmm1 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm1=int6464#5
+# asm 2: pxor  <xmm5=%xmm7,<xmm1=%xmm4
+pxor  %xmm7,%xmm4
+
+# qhasm:       xmm2 = xmm6
+# asm 1: movdqa <xmm6=int6464#4,>xmm2=int6464#1
+# asm 2: movdqa <xmm6=%xmm3,>xmm2=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm:       xmm2 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm2=int6464#1
+# asm 2: pxor  <xmm5=%xmm7,<xmm2=%xmm0
+pxor  %xmm7,%xmm0
+
+# qhasm:       xmm2 &= xmm0
+# asm 1: pand  <xmm0=int6464#7,<xmm2=int6464#1
+# asm 2: pand  <xmm0=%xmm6,<xmm2=%xmm0
+pand  %xmm6,%xmm0
+
+# qhasm:       xmm1 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm1=int6464#5
+# asm 2: pxor  <xmm2=%xmm0,<xmm1=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm:       xmm6 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#1,<xmm6=int6464#4
+# asm 2: pxor  <xmm2=%xmm0,<xmm6=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:       xmm6 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm6=int6464#4
+# asm 2: pand  <xmm7=%xmm5,<xmm6=%xmm3
+pand  %xmm5,%xmm3
+
+# qhasm:       xmm6 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#3,<xmm6=int6464#4
+# asm 2: pxor  <xmm4=%xmm2,<xmm6=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm:         xmm4 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm4=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm4=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:         xmm0 = xmm13
+# asm 1: movdqa <xmm13=int6464#14,>xmm0=int6464#2
+# asm 2: movdqa <xmm13=%xmm13,>xmm0=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm:           xmm2 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm2=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm2=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm2 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm2=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm2=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm2 &= xmm14
+# asm 1: pand  <xmm14=int6464#15,<xmm2=int6464#3
+# asm 2: pand  <xmm14=%xmm14,<xmm2=%xmm2
+pand  %xmm14,%xmm2
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm14 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm14=int6464#15
+# asm 2: pand  <xmm6=%xmm3,<xmm14=%xmm14
+pand  %xmm3,%xmm14
+
+# qhasm:           xmm13 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm13=int6464#14
+# asm 2: pand  <xmm7=%xmm5,<xmm13=%xmm13
+pand  %xmm5,%xmm13
+
+# qhasm:           xmm14 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm14=int6464#15
+# asm 2: pxor  <xmm13=%xmm13,<xmm14=%xmm14
+pxor  %xmm13,%xmm14
+
+# qhasm:           xmm13 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm13=int6464#14
+# asm 2: pxor  <xmm2=%xmm2,<xmm13=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm:         xmm4 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm4=int6464#1
+# asm 2: pxor  <xmm8=%xmm8,<xmm4=%xmm0
+pxor  %xmm8,%xmm0
+
+# qhasm:         xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#2
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm1
+pxor  %xmm11,%xmm1
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm8
+# asm 1: pand  <xmm8=int6464#9,<xmm2=int6464#3
+# asm 2: pand  <xmm8=%xmm8,<xmm2=%xmm2
+pand  %xmm8,%xmm2
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm8 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm8=int6464#9
+# asm 2: pand  <xmm1=%xmm4,<xmm8=%xmm8
+pand  %xmm4,%xmm8
+
+# qhasm:           xmm11 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm11=int6464#12
+# asm 2: pand  <xmm5=%xmm7,<xmm11=%xmm11
+pand  %xmm7,%xmm11
+
+# qhasm:           xmm8 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm8=int6464#9
+# asm 2: pxor  <xmm11=%xmm11,<xmm8=%xmm8
+pxor  %xmm11,%xmm8
+
+# qhasm:           xmm11 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm11=int6464#12
+# asm 2: pxor  <xmm2=%xmm2,<xmm11=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm:         xmm14 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm4=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:         xmm8 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm4=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:         xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm1,<xmm13=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm:         xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm1,<xmm11=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm:         xmm4 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm4=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm4=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:         xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#2
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm1
+movdqa %xmm9,%xmm1
+
+# qhasm:         xmm4 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm4=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm4=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:         xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#2
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm1
+pxor  %xmm10,%xmm1
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm4
+# asm 1: pand  <xmm4=int6464#1,<xmm3=int6464#3
+# asm 2: pand  <xmm4=%xmm0,<xmm3=%xmm2
+pand  %xmm0,%xmm2
+
+# qhasm:           xmm4 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm4=int6464#1
+# asm 2: pxor  <xmm0=%xmm1,<xmm4=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm:           xmm4 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm4=int6464#1
+# asm 2: pand  <xmm6=%xmm3,<xmm4=%xmm0
+pand  %xmm3,%xmm0
+
+# qhasm:           xmm0 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm0=int6464#2
+# asm 2: pand  <xmm7=%xmm5,<xmm0=%xmm1
+pand  %xmm5,%xmm1
+
+# qhasm:           xmm0 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm0=int6464#2
+# asm 2: pxor  <xmm4=%xmm0,<xmm0=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm:           xmm4 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm4=int6464#1
+# asm 2: pxor  <xmm3=%xmm2,<xmm4=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm:           xmm2 = xmm5
+# asm 1: movdqa <xmm5=int6464#8,>xmm2=int6464#3
+# asm 2: movdqa <xmm5=%xmm7,>xmm2=%xmm2
+movdqa %xmm7,%xmm2
+
+# qhasm:           xmm2 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm2=int6464#3
+# asm 2: pxor  <xmm1=%xmm4,<xmm2=%xmm2
+pxor  %xmm4,%xmm2
+
+# qhasm:           xmm2 &= xmm12
+# asm 1: pand  <xmm12=int6464#13,<xmm2=int6464#3
+# asm 2: pand  <xmm12=%xmm12,<xmm2=%xmm2
+pand  %xmm12,%xmm2
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm12 &= xmm1
+# asm 1: pand  <xmm1=int6464#5,<xmm12=int6464#13
+# asm 2: pand  <xmm1=%xmm4,<xmm12=%xmm12
+pand  %xmm4,%xmm12
+
+# qhasm:           xmm10 &= xmm5
+# asm 1: pand  <xmm5=int6464#8,<xmm10=int6464#11
+# asm 2: pand  <xmm5=%xmm7,<xmm10=%xmm10
+pand  %xmm7,%xmm10
+
+# qhasm:           xmm12 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm12=int6464#13
+# asm 2: pxor  <xmm10=%xmm10,<xmm12=%xmm12
+pxor  %xmm10,%xmm12
+
+# qhasm:           xmm10 ^= xmm2
+# asm 1: pxor  <xmm2=int6464#3,<xmm10=int6464#11
+# asm 2: pxor  <xmm2=%xmm2,<xmm10=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm:         xmm7 ^= xmm5
+# asm 1: pxor  <xmm5=int6464#8,<xmm7=int6464#6
+# asm 2: pxor  <xmm5=%xmm7,<xmm7=%xmm5
+pxor  %xmm7,%xmm5
+
+# qhasm:         xmm6 ^= xmm1
+# asm 1: pxor  <xmm1=int6464#5,<xmm6=int6464#4
+# asm 2: pxor  <xmm1=%xmm4,<xmm6=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:           xmm3 = xmm7
+# asm 1: movdqa <xmm7=int6464#6,>xmm3=int6464#3
+# asm 2: movdqa <xmm7=%xmm5,>xmm3=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm:           xmm3 ^= xmm6
+# asm 1: pxor  <xmm6=int6464#4,<xmm3=int6464#3
+# asm 2: pxor  <xmm6=%xmm3,<xmm3=%xmm2
+pxor  %xmm3,%xmm2
+
+# qhasm:           xmm3 &= xmm15
+# asm 1: pand  <xmm15=int6464#16,<xmm3=int6464#3
+# asm 2: pand  <xmm15=%xmm15,<xmm3=%xmm2
+pand  %xmm15,%xmm2
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm15 &= xmm6
+# asm 1: pand  <xmm6=int6464#4,<xmm15=int6464#16
+# asm 2: pand  <xmm6=%xmm3,<xmm15=%xmm15
+pand  %xmm3,%xmm15
+
+# qhasm:           xmm9 &= xmm7
+# asm 1: pand  <xmm7=int6464#6,<xmm9=int6464#10
+# asm 2: pand  <xmm7=%xmm5,<xmm9=%xmm9
+pand  %xmm5,%xmm9
+
+# qhasm:           xmm15 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm15=int6464#16
+# asm 2: pxor  <xmm9=%xmm9,<xmm15=%xmm15
+pxor  %xmm9,%xmm15
+
+# qhasm:           xmm9 ^= xmm3
+# asm 1: pxor  <xmm3=int6464#3,<xmm9=int6464#10
+# asm 2: pxor  <xmm3=%xmm2,<xmm9=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm:         xmm15 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm4=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:         xmm12 ^= xmm4
+# asm 1: pxor  <xmm4=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm4=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:         xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm1,<xmm9=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm:         xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#2,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm1,<xmm10=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm:       xmm15 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm15=int6464#16
+# asm 2: pxor  <xmm8=%xmm8,<xmm15=%xmm15
+pxor  %xmm8,%xmm15
+
+# qhasm:       xmm9 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm9=int6464#10
+# asm 2: pxor  <xmm14=%xmm14,<xmm9=%xmm9
+pxor  %xmm14,%xmm9
+
+# qhasm:       xmm12 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm12=int6464#13
+# asm 2: pxor  <xmm15=%xmm15,<xmm12=%xmm12
+pxor  %xmm15,%xmm12
+
+# qhasm:       xmm14 ^= xmm8
+# asm 1: pxor  <xmm8=int6464#9,<xmm14=int6464#15
+# asm 2: pxor  <xmm8=%xmm8,<xmm14=%xmm14
+pxor  %xmm8,%xmm14
+
+# qhasm:       xmm8 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm8=int6464#9
+# asm 2: pxor  <xmm9=%xmm9,<xmm8=%xmm8
+pxor  %xmm9,%xmm8
+
+# qhasm:       xmm9 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm9=int6464#10
+# asm 2: pxor  <xmm13=%xmm13,<xmm9=%xmm9
+pxor  %xmm13,%xmm9
+
+# qhasm:       xmm13 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm13=int6464#14
+# asm 2: pxor  <xmm10=%xmm10,<xmm13=%xmm13
+pxor  %xmm10,%xmm13
+
+# qhasm:       xmm12 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm12=int6464#13
+# asm 2: pxor  <xmm13=%xmm13,<xmm12=%xmm12
+pxor  %xmm13,%xmm12
+
+# qhasm:       xmm10 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm10=int6464#11
+# asm 2: pxor  <xmm11=%xmm11,<xmm10=%xmm10
+pxor  %xmm11,%xmm10
+
+# qhasm:       xmm11 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm11=int6464#12
+# asm 2: pxor  <xmm13=%xmm13,<xmm11=%xmm11
+pxor  %xmm13,%xmm11
+
+# qhasm:       xmm14 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm14=int6464#15
+# asm 2: pxor  <xmm11=%xmm11,<xmm14=%xmm14
+pxor  %xmm11,%xmm14
+
+# qhasm:   xmm8 ^= *(int128 *)(c + 1280)
+# asm 1: pxor 1280(<c=int64#5),<xmm8=int6464#9
+# asm 2: pxor 1280(<c=%r8),<xmm8=%xmm8
+pxor 1280(%r8),%xmm8
+
+# qhasm:   xmm9 ^= *(int128 *)(c + 1296)
+# asm 1: pxor 1296(<c=int64#5),<xmm9=int6464#10
+# asm 2: pxor 1296(<c=%r8),<xmm9=%xmm9
+pxor 1296(%r8),%xmm9
+
+# qhasm:   xmm12 ^= *(int128 *)(c + 1312)
+# asm 1: pxor 1312(<c=int64#5),<xmm12=int6464#13
+# asm 2: pxor 1312(<c=%r8),<xmm12=%xmm12
+pxor 1312(%r8),%xmm12
+
+# qhasm:   xmm14 ^= *(int128 *)(c + 1328)
+# asm 1: pxor 1328(<c=int64#5),<xmm14=int6464#15
+# asm 2: pxor 1328(<c=%r8),<xmm14=%xmm14
+pxor 1328(%r8),%xmm14
+
+# qhasm:   xmm11 ^= *(int128 *)(c + 1344)
+# asm 1: pxor 1344(<c=int64#5),<xmm11=int6464#12
+# asm 2: pxor 1344(<c=%r8),<xmm11=%xmm11
+pxor 1344(%r8),%xmm11
+
+# qhasm:   xmm15 ^= *(int128 *)(c + 1360)
+# asm 1: pxor 1360(<c=int64#5),<xmm15=int6464#16
+# asm 2: pxor 1360(<c=%r8),<xmm15=%xmm15
+pxor 1360(%r8),%xmm15
+
+# qhasm:   xmm10 ^= *(int128 *)(c + 1376)
+# asm 1: pxor 1376(<c=int64#5),<xmm10=int6464#11
+# asm 2: pxor 1376(<c=%r8),<xmm10=%xmm10
+pxor 1376(%r8),%xmm10
+
+# qhasm:   xmm13 ^= *(int128 *)(c + 1392)
+# asm 1: pxor 1392(<c=int64#5),<xmm13=int6464#14
+# asm 2: pxor 1392(<c=%r8),<xmm13=%xmm13
+pxor 1392(%r8),%xmm13
+
+# qhasm:     xmm0 = xmm10
+# asm 1: movdqa <xmm10=int6464#11,>xmm0=int6464#1
+# asm 2: movdqa <xmm10=%xmm10,>xmm0=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     xmm0 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm0=int6464#1
+# asm 2: movdqa <xmm11=%xmm11,>xmm0=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm0=int6464#1
+# asm 2: pxor  <xmm15=%xmm15,<xmm0=%xmm0
+pxor  %xmm15,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     xmm0 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm0=int6464#1
+# asm 2: movdqa <xmm12=%xmm12,>xmm0=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm0=int6464#1
+# asm 2: pxor  <xmm14=%xmm14,<xmm0=%xmm0
+pxor  %xmm14,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 1
+# asm 1: psrlq $1,<xmm0=int6464#1
+# asm 2: psrlq $1,<xmm0=%xmm0
+psrlq $1,%xmm0
+
+# qhasm:     xmm0 ^= xmm9
+# asm 1: pxor  <xmm9=int6464#10,<xmm0=int6464#1
+# asm 2: pxor  <xmm9=%xmm9,<xmm0=%xmm0
+pxor  %xmm9,%xmm0
+
+# qhasm:     xmm0 &= BS0
+# asm 1: pand  BS0,<xmm0=int6464#1
+# asm 2: pand  BS0,<xmm0=%xmm0
+pand  BS0,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     uint6464 xmm0 <<= 1
+# asm 1: psllq $1,<xmm0=int6464#1
+# asm 2: psllq $1,<xmm0=%xmm0
+psllq $1,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm0 = xmm15
+# asm 1: movdqa <xmm15=int6464#16,>xmm0=int6464#1
+# asm 2: movdqa <xmm15=%xmm15,>xmm0=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     xmm0 = xmm11
+# asm 1: movdqa <xmm11=int6464#12,>xmm0=int6464#1
+# asm 2: movdqa <xmm11=%xmm11,>xmm0=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#1
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm0
+pxor  %xmm10,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#1
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm14
+# asm 1: pxor  <xmm14=int6464#15,<xmm0=int6464#1
+# asm 2: pxor  <xmm14=%xmm14,<xmm0=%xmm0
+pxor  %xmm14,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 2
+# asm 1: psrlq $2,<xmm0=int6464#1
+# asm 2: psrlq $2,<xmm0=%xmm0
+psrlq $2,%xmm0
+
+# qhasm:     xmm0 ^= xmm12
+# asm 1: pxor  <xmm12=int6464#13,<xmm0=int6464#1
+# asm 2: pxor  <xmm12=%xmm12,<xmm0=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm:     xmm0 &= BS1
+# asm 1: pand  BS1,<xmm0=int6464#1
+# asm 2: pand  BS1,<xmm0=%xmm0
+pand  BS1,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     uint6464 xmm0 <<= 2
+# asm 1: psllq $2,<xmm0=int6464#1
+# asm 2: psllq $2,<xmm0=%xmm0
+psllq $2,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm:     xmm0 = xmm14
+# asm 1: movdqa <xmm14=int6464#15,>xmm0=int6464#1
+# asm 2: movdqa <xmm14=%xmm14,>xmm0=%xmm0
+movdqa %xmm14,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm13
+# asm 1: pxor  <xmm13=int6464#14,<xmm0=int6464#1
+# asm 2: pxor  <xmm13=%xmm13,<xmm0=%xmm0
+pxor  %xmm13,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm13 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm13=int6464#14
+# asm 2: pxor  <xmm0=%xmm0,<xmm13=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm14 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm14=int6464#15
+# asm 2: pxor  <xmm0=%xmm0,<xmm14=%xmm14
+pxor  %xmm0,%xmm14
+
+# qhasm:     xmm0 = xmm12
+# asm 1: movdqa <xmm12=int6464#13,>xmm0=int6464#1
+# asm 2: movdqa <xmm12=%xmm12,>xmm0=%xmm0
+movdqa %xmm12,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm10
+# asm 1: pxor  <xmm10=int6464#11,<xmm0=int6464#1
+# asm 2: pxor  <xmm10=%xmm10,<xmm0=%xmm0
+pxor  %xmm10,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm10 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm10=int6464#11
+# asm 2: pxor  <xmm0=%xmm0,<xmm10=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm12 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm12=int6464#13
+# asm 2: pxor  <xmm0=%xmm0,<xmm12=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm:     xmm0 = xmm9
+# asm 1: movdqa <xmm9=int6464#10,>xmm0=int6464#1
+# asm 2: movdqa <xmm9=%xmm9,>xmm0=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm15
+# asm 1: pxor  <xmm15=int6464#16,<xmm0=int6464#1
+# asm 2: pxor  <xmm15=%xmm15,<xmm0=%xmm0
+pxor  %xmm15,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm15 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm15=int6464#16
+# asm 2: pxor  <xmm0=%xmm0,<xmm15=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm9 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm9=int6464#10
+# asm 2: pxor  <xmm0=%xmm0,<xmm9=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm:     xmm0 = xmm8
+# asm 1: movdqa <xmm8=int6464#9,>xmm0=int6464#1
+# asm 2: movdqa <xmm8=%xmm8,>xmm0=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm:     uint6464 xmm0 >>= 4
+# asm 1: psrlq $4,<xmm0=int6464#1
+# asm 2: psrlq $4,<xmm0=%xmm0
+psrlq $4,%xmm0
+
+# qhasm:     xmm0 ^= xmm11
+# asm 1: pxor  <xmm11=int6464#12,<xmm0=int6464#1
+# asm 2: pxor  <xmm11=%xmm11,<xmm0=%xmm0
+pxor  %xmm11,%xmm0
+
+# qhasm:     xmm0 &= BS2
+# asm 1: pand  BS2,<xmm0=int6464#1
+# asm 2: pand  BS2,<xmm0=%xmm0
+pand  BS2,%xmm0
+
+# qhasm:     xmm11 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm11=int6464#12
+# asm 2: pxor  <xmm0=%xmm0,<xmm11=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm:     uint6464 xmm0 <<= 4
+# asm 1: psllq $4,<xmm0=int6464#1
+# asm 2: psllq $4,<xmm0=%xmm0
+psllq $4,%xmm0
+
+# qhasm:     xmm8 ^= xmm0
+# asm 1: pxor  <xmm0=int6464#1,<xmm8=int6464#9
+# asm 2: pxor  <xmm0=%xmm0,<xmm8=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm: unsigned<? =? len-128
+# asm 1: cmp  $128,<len=int64#3
+# asm 2: cmp  $128,<len=%rdx
+cmp  $128,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto partial if unsigned<
+jb ._partial
+# comment:fp stack unchanged by jump
+
+# qhasm: goto full if =
+je ._full
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#4),>tmp=int64#6d
+# asm 2: movl   12(<np=%rcx),>tmp=%r9d
+movl   12(%rcx),%r9d
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#6d
+# asm 2: bswap <tmp=%r9d
+bswap %r9d
+
+# qhasm: tmp += 8
+# asm 1: add  $8,<tmp=int64#6
+# asm 2: add  $8,<tmp=%r9
+add  $8,%r9
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#6d
+# asm 2: bswap <tmp=%r9d
+bswap %r9d
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#6d,12(<np=int64#4)
+# asm 2: movl   <tmp=%r9d,12(<np=%rcx)
+movl   %r9d,12(%rcx)
+
+# qhasm: xmm8 ^= *(int128 *)(inp + 0)
+# asm 1: pxor 0(<inp=int64#2),<xmm8=int6464#9
+# asm 2: pxor 0(<inp=%rsi),<xmm8=%xmm8
+pxor 0(%rsi),%xmm8
+
+# qhasm: xmm9 ^= *(int128 *)(inp + 16)
+# asm 1: pxor 16(<inp=int64#2),<xmm9=int6464#10
+# asm 2: pxor 16(<inp=%rsi),<xmm9=%xmm9
+pxor 16(%rsi),%xmm9
+
+# qhasm: xmm12 ^= *(int128 *)(inp + 32)
+# asm 1: pxor 32(<inp=int64#2),<xmm12=int6464#13
+# asm 2: pxor 32(<inp=%rsi),<xmm12=%xmm12
+pxor 32(%rsi),%xmm12
+
+# qhasm: xmm14 ^= *(int128 *)(inp + 48)
+# asm 1: pxor 48(<inp=int64#2),<xmm14=int6464#15
+# asm 2: pxor 48(<inp=%rsi),<xmm14=%xmm14
+pxor 48(%rsi),%xmm14
+
+# qhasm: xmm11 ^= *(int128 *)(inp + 64)
+# asm 1: pxor 64(<inp=int64#2),<xmm11=int6464#12
+# asm 2: pxor 64(<inp=%rsi),<xmm11=%xmm11
+pxor 64(%rsi),%xmm11
+
+# qhasm: xmm15 ^= *(int128 *)(inp + 80)
+# asm 1: pxor 80(<inp=int64#2),<xmm15=int6464#16
+# asm 2: pxor 80(<inp=%rsi),<xmm15=%xmm15
+pxor 80(%rsi),%xmm15
+
+# qhasm: xmm10 ^= *(int128 *)(inp + 96)
+# asm 1: pxor 96(<inp=int64#2),<xmm10=int6464#11
+# asm 2: pxor 96(<inp=%rsi),<xmm10=%xmm10
+pxor 96(%rsi),%xmm10
+
+# qhasm: xmm13 ^= *(int128 *)(inp + 112)
+# asm 1: pxor 112(<inp=int64#2),<xmm13=int6464#14
+# asm 2: pxor 112(<inp=%rsi),<xmm13=%xmm13
+pxor 112(%rsi),%xmm13
+
+# qhasm: *(int128 *) (outp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<outp=int64#1)
+# asm 2: movdqa <xmm8=%xmm8,0(<outp=%rdi)
+movdqa %xmm8,0(%rdi)
+
+# qhasm: *(int128 *) (outp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<outp=int64#1)
+# asm 2: movdqa <xmm9=%xmm9,16(<outp=%rdi)
+movdqa %xmm9,16(%rdi)
+
+# qhasm: *(int128 *) (outp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<outp=int64#1)
+# asm 2: movdqa <xmm12=%xmm12,32(<outp=%rdi)
+movdqa %xmm12,32(%rdi)
+
+# qhasm: *(int128 *) (outp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<outp=int64#1)
+# asm 2: movdqa <xmm14=%xmm14,48(<outp=%rdi)
+movdqa %xmm14,48(%rdi)
+
+# qhasm: *(int128 *) (outp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<outp=int64#1)
+# asm 2: movdqa <xmm11=%xmm11,64(<outp=%rdi)
+movdqa %xmm11,64(%rdi)
+
+# qhasm: *(int128 *) (outp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<outp=int64#1)
+# asm 2: movdqa <xmm15=%xmm15,80(<outp=%rdi)
+movdqa %xmm15,80(%rdi)
+
+# qhasm: *(int128 *) (outp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<outp=int64#1)
+# asm 2: movdqa <xmm10=%xmm10,96(<outp=%rdi)
+movdqa %xmm10,96(%rdi)
+
+# qhasm: *(int128 *) (outp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<outp=int64#1)
+# asm 2: movdqa <xmm13=%xmm13,112(<outp=%rdi)
+movdqa %xmm13,112(%rdi)
+
+# qhasm: len -= 128
+# asm 1: sub  $128,<len=int64#3
+# asm 2: sub  $128,<len=%rdx
+sub  $128,%rdx
+
+# qhasm: inp += 128
+# asm 1: add  $128,<inp=int64#2
+# asm 2: add  $128,<inp=%rsi
+add  $128,%rsi
+
+# qhasm: outp += 128
+# asm 1: add  $128,<outp=int64#1
+# asm 2: add  $128,<outp=%rdi
+add  $128,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto enc_block
+jmp ._enc_block
+
+# qhasm: partial:
+._partial:
+
+# qhasm: lensav = len
+# asm 1: mov  <len=int64#3,>lensav=int64#5
+# asm 2: mov  <len=%rdx,>lensav=%r8
+mov  %rdx,%r8
+
+# qhasm: (uint32) len >>= 4
+# asm 1: shr  $4,<len=int64#3d
+# asm 2: shr  $4,<len=%edx
+shr  $4,%edx
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#4),>tmp=int64#6d
+# asm 2: movl   12(<np=%rcx),>tmp=%r9d
+movl   12(%rcx),%r9d
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#6d
+# asm 2: bswap <tmp=%r9d
+bswap %r9d
+
+# qhasm: tmp += len
+# asm 1: add  <len=int64#3,<tmp=int64#6
+# asm 2: add  <len=%rdx,<tmp=%r9
+add  %rdx,%r9
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#6d
+# asm 2: bswap <tmp=%r9d
+bswap %r9d
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#6d,12(<np=int64#4)
+# asm 2: movl   <tmp=%r9d,12(<np=%rcx)
+movl   %r9d,12(%rcx)
+
+# qhasm: blp = &bl
+# asm 1: leaq <bl=stack1024#1,>blp=int64#3
+# asm 2: leaq <bl=32(%rsp),>blp=%rdx
+leaq 32(%rsp),%rdx
+
+# qhasm: *(int128 *)(blp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<blp=int64#3)
+# asm 2: movdqa <xmm8=%xmm8,0(<blp=%rdx)
+movdqa %xmm8,0(%rdx)
+
+# qhasm: *(int128 *)(blp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<blp=int64#3)
+# asm 2: movdqa <xmm9=%xmm9,16(<blp=%rdx)
+movdqa %xmm9,16(%rdx)
+
+# qhasm: *(int128 *)(blp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<blp=int64#3)
+# asm 2: movdqa <xmm12=%xmm12,32(<blp=%rdx)
+movdqa %xmm12,32(%rdx)
+
+# qhasm: *(int128 *)(blp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<blp=int64#3)
+# asm 2: movdqa <xmm14=%xmm14,48(<blp=%rdx)
+movdqa %xmm14,48(%rdx)
+
+# qhasm: *(int128 *)(blp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<blp=int64#3)
+# asm 2: movdqa <xmm11=%xmm11,64(<blp=%rdx)
+movdqa %xmm11,64(%rdx)
+
+# qhasm: *(int128 *)(blp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<blp=int64#3)
+# asm 2: movdqa <xmm15=%xmm15,80(<blp=%rdx)
+movdqa %xmm15,80(%rdx)
+
+# qhasm: *(int128 *)(blp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<blp=int64#3)
+# asm 2: movdqa <xmm10=%xmm10,96(<blp=%rdx)
+movdqa %xmm10,96(%rdx)
+
+# qhasm: *(int128 *)(blp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<blp=int64#3)
+# asm 2: movdqa <xmm13=%xmm13,112(<blp=%rdx)
+movdqa %xmm13,112(%rdx)
+
+# qhasm: bytes:
+._bytes:
+
+# qhasm: =? lensav-0
+# asm 1: cmp  $0,<lensav=int64#5
+# asm 2: cmp  $0,<lensav=%r8
+cmp  $0,%r8
+# comment:fp stack unchanged by jump
+
+# qhasm: goto end if =
+je ._end
+
+# qhasm: b = *(uint8 *)(blp + 0)
+# asm 1: movzbq 0(<blp=int64#3),>b=int64#4
+# asm 2: movzbq 0(<blp=%rdx),>b=%rcx
+movzbq 0(%rdx),%rcx
+
+# qhasm: (uint8) b ^= *(uint8 *)(inp + 0)
+# asm 1: xorb 0(<inp=int64#2),<b=int64#4b
+# asm 2: xorb 0(<inp=%rsi),<b=%cl
+xorb 0(%rsi),%cl
+
+# qhasm: *(uint8 *)(outp + 0) = b
+# asm 1: movb   <b=int64#4b,0(<outp=int64#1)
+# asm 2: movb   <b=%cl,0(<outp=%rdi)
+movb   %cl,0(%rdi)
+
+# qhasm: blp += 1
+# asm 1: add  $1,<blp=int64#3
+# asm 2: add  $1,<blp=%rdx
+add  $1,%rdx
+
+# qhasm: inp +=1
+# asm 1: add  $1,<inp=int64#2
+# asm 2: add  $1,<inp=%rsi
+add  $1,%rsi
+
+# qhasm: outp +=1
+# asm 1: add  $1,<outp=int64#1
+# asm 2: add  $1,<outp=%rdi
+add  $1,%rdi
+
+# qhasm: lensav -= 1
+# asm 1: sub  $1,<lensav=int64#5
+# asm 2: sub  $1,<lensav=%r8
+sub  $1,%r8
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytes
+jmp ._bytes
+
+# qhasm: full:
+._full:
+
+# qhasm: tmp = *(uint32 *)(np + 12)
+# asm 1: movl   12(<np=int64#4),>tmp=int64#3d
+# asm 2: movl   12(<np=%rcx),>tmp=%edx
+movl   12(%rcx),%edx
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#3d
+# asm 2: bswap <tmp=%edx
+bswap %edx
+
+# qhasm: tmp += 8
+# asm 1: add  $8,<tmp=int64#3
+# asm 2: add  $8,<tmp=%rdx
+add  $8,%rdx
+
+# qhasm: (uint32) bswap tmp
+# asm 1: bswap <tmp=int64#3d
+# asm 2: bswap <tmp=%edx
+bswap %edx
+
+# qhasm: *(uint32 *)(np + 12) = tmp
+# asm 1: movl   <tmp=int64#3d,12(<np=int64#4)
+# asm 2: movl   <tmp=%edx,12(<np=%rcx)
+movl   %edx,12(%rcx)
+
+# qhasm: xmm8 ^= *(int128 *)(inp + 0)
+# asm 1: pxor 0(<inp=int64#2),<xmm8=int6464#9
+# asm 2: pxor 0(<inp=%rsi),<xmm8=%xmm8
+pxor 0(%rsi),%xmm8
+
+# qhasm: xmm9 ^= *(int128 *)(inp + 16)
+# asm 1: pxor 16(<inp=int64#2),<xmm9=int6464#10
+# asm 2: pxor 16(<inp=%rsi),<xmm9=%xmm9
+pxor 16(%rsi),%xmm9
+
+# qhasm: xmm12 ^= *(int128 *)(inp + 32)
+# asm 1: pxor 32(<inp=int64#2),<xmm12=int6464#13
+# asm 2: pxor 32(<inp=%rsi),<xmm12=%xmm12
+pxor 32(%rsi),%xmm12
+
+# qhasm: xmm14 ^= *(int128 *)(inp + 48)
+# asm 1: pxor 48(<inp=int64#2),<xmm14=int6464#15
+# asm 2: pxor 48(<inp=%rsi),<xmm14=%xmm14
+pxor 48(%rsi),%xmm14
+
+# qhasm: xmm11 ^= *(int128 *)(inp + 64)
+# asm 1: pxor 64(<inp=int64#2),<xmm11=int6464#12
+# asm 2: pxor 64(<inp=%rsi),<xmm11=%xmm11
+pxor 64(%rsi),%xmm11
+
+# qhasm: xmm15 ^= *(int128 *)(inp + 80)
+# asm 1: pxor 80(<inp=int64#2),<xmm15=int6464#16
+# asm 2: pxor 80(<inp=%rsi),<xmm15=%xmm15
+pxor 80(%rsi),%xmm15
+
+# qhasm: xmm10 ^= *(int128 *)(inp + 96)
+# asm 1: pxor 96(<inp=int64#2),<xmm10=int6464#11
+# asm 2: pxor 96(<inp=%rsi),<xmm10=%xmm10
+pxor 96(%rsi),%xmm10
+
+# qhasm: xmm13 ^= *(int128 *)(inp + 112)
+# asm 1: pxor 112(<inp=int64#2),<xmm13=int6464#14
+# asm 2: pxor 112(<inp=%rsi),<xmm13=%xmm13
+pxor 112(%rsi),%xmm13
+
+# qhasm: *(int128 *) (outp + 0) = xmm8
+# asm 1: movdqa <xmm8=int6464#9,0(<outp=int64#1)
+# asm 2: movdqa <xmm8=%xmm8,0(<outp=%rdi)
+movdqa %xmm8,0(%rdi)
+
+# qhasm: *(int128 *) (outp + 16) = xmm9
+# asm 1: movdqa <xmm9=int6464#10,16(<outp=int64#1)
+# asm 2: movdqa <xmm9=%xmm9,16(<outp=%rdi)
+movdqa %xmm9,16(%rdi)
+
+# qhasm: *(int128 *) (outp + 32) = xmm12
+# asm 1: movdqa <xmm12=int6464#13,32(<outp=int64#1)
+# asm 2: movdqa <xmm12=%xmm12,32(<outp=%rdi)
+movdqa %xmm12,32(%rdi)
+
+# qhasm: *(int128 *) (outp + 48) = xmm14
+# asm 1: movdqa <xmm14=int6464#15,48(<outp=int64#1)
+# asm 2: movdqa <xmm14=%xmm14,48(<outp=%rdi)
+movdqa %xmm14,48(%rdi)
+
+# qhasm: *(int128 *) (outp + 64) = xmm11
+# asm 1: movdqa <xmm11=int6464#12,64(<outp=int64#1)
+# asm 2: movdqa <xmm11=%xmm11,64(<outp=%rdi)
+movdqa %xmm11,64(%rdi)
+
+# qhasm: *(int128 *) (outp + 80) = xmm15
+# asm 1: movdqa <xmm15=int6464#16,80(<outp=int64#1)
+# asm 2: movdqa <xmm15=%xmm15,80(<outp=%rdi)
+movdqa %xmm15,80(%rdi)
+
+# qhasm: *(int128 *) (outp + 96) = xmm10
+# asm 1: movdqa <xmm10=int6464#11,96(<outp=int64#1)
+# asm 2: movdqa <xmm10=%xmm10,96(<outp=%rdi)
+movdqa %xmm10,96(%rdi)
+
+# qhasm: *(int128 *) (outp + 112) = xmm13
+# asm 1: movdqa <xmm13=int6464#14,112(<outp=int64#1)
+# asm 2: movdqa <xmm13=%xmm13,112(<outp=%rdi)
+movdqa %xmm13,112(%rdi)
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: end:
+._end:
+
+# qhasm: leave
+add %r11,%rsp
+mov %rdi,%rax
+mov %rsi,%rdx
+xor %rax,%rax
+ret
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/afternm.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/afternm.c
new file mode 100644
index 00000000..93c96e42
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/afternm.c
@@ -0,0 +1,158 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include "int128.h"
+#include "common.h"
+#include "consts.h"
+#include "crypto_stream.h"
+
+int crypto_stream_afternm(unsigned char *outp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
+{
+
+  int128 xmm0;
+  int128 xmm1;
+  int128 xmm2;
+  int128 xmm3;
+  int128 xmm4;
+  int128 xmm5;
+  int128 xmm6;
+  int128 xmm7;
+
+  int128 xmm8;
+  int128 xmm9;
+  int128 xmm10;
+  int128 xmm11;
+  int128 xmm12;
+  int128 xmm13;
+  int128 xmm14;
+  int128 xmm15;
+
+  int128 nonce_stack;
+  unsigned long long lensav;
+  unsigned char bl[128];
+  unsigned char *blp;
+  unsigned char b;
+
+  uint32 tmp;
+
+  /* Copy nonce on the stack */
+  copy2(&nonce_stack, (int128 *) (noncep + 0));
+  unsigned char *np = (unsigned char *)&nonce_stack;
+
+    enc_block:
+
+    xmm0 = *(int128 *) (np + 0);
+    copy2(&xmm1, &xmm0);
+    shufb(&xmm1, SWAP32);
+    copy2(&xmm2, &xmm1);
+    copy2(&xmm3, &xmm1);
+    copy2(&xmm4, &xmm1);
+    copy2(&xmm5, &xmm1);
+    copy2(&xmm6, &xmm1);
+    copy2(&xmm7, &xmm1);
+
+    add_uint32_big(&xmm1, 1);
+    add_uint32_big(&xmm2, 2);
+    add_uint32_big(&xmm3, 3);
+    add_uint32_big(&xmm4, 4);
+    add_uint32_big(&xmm5, 5);
+    add_uint32_big(&xmm6, 6);
+    add_uint32_big(&xmm7, 7);
+
+    shufb(&xmm0, M0);
+    shufb(&xmm1, M0SWAP);
+    shufb(&xmm2, M0SWAP);
+    shufb(&xmm3, M0SWAP);
+    shufb(&xmm4, M0SWAP);
+    shufb(&xmm5, M0SWAP);
+    shufb(&xmm6, M0SWAP);
+    shufb(&xmm7, M0SWAP);
+
+    bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
+
+    aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+
+    bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
+
+    if(len < 128) goto partial;
+    if(len == 128) goto full;
+
+    tmp = load32_bigendian(np + 12);
+    tmp += 8;
+    store32_bigendian(np + 12, tmp);
+
+    *(int128 *) (outp + 0) = xmm8;
+    *(int128 *) (outp + 16) = xmm9;
+    *(int128 *) (outp + 32) = xmm12;
+    *(int128 *) (outp + 48) = xmm14;
+    *(int128 *) (outp + 64) = xmm11;
+    *(int128 *) (outp + 80) = xmm15;
+    *(int128 *) (outp + 96) = xmm10;
+    *(int128 *) (outp + 112) = xmm13;
+
+    len -= 128;
+    outp += 128;
+
+    goto enc_block;
+
+    partial:
+
+    lensav = len;
+    len >>= 4;
+
+    tmp = load32_bigendian(np + 12);
+    tmp += len;
+    store32_bigendian(np + 12, tmp);
+
+    blp = bl;
+    *(int128 *)(blp + 0) = xmm8;
+    *(int128 *)(blp + 16) = xmm9;
+    *(int128 *)(blp + 32) = xmm12;
+    *(int128 *)(blp + 48) = xmm14;
+    *(int128 *)(blp + 64) = xmm11;
+    *(int128 *)(blp + 80) = xmm15;
+    *(int128 *)(blp + 96) = xmm10;
+    *(int128 *)(blp + 112) = xmm13;
+
+    bytes:
+
+    if(lensav == 0) goto end;
+
+    b = blp[0];
+    *(unsigned char *)(outp + 0) = b;
+
+    blp += 1;
+    outp +=1;
+    lensav -= 1;
+
+    goto bytes;
+
+    full:
+
+    tmp = load32_bigendian(np + 12);
+    tmp += 8;
+    store32_bigendian(np + 12, tmp);
+
+    *(int128 *) (outp + 0) = xmm8;
+    *(int128 *) (outp + 16) = xmm9;
+    *(int128 *) (outp + 32) = xmm12;
+    *(int128 *) (outp + 48) = xmm14;
+    *(int128 *) (outp + 64) = xmm11;
+    *(int128 *) (outp + 80) = xmm15;
+    *(int128 *) (outp + 96) = xmm10;
+    *(int128 *) (outp + 112) = xmm13;
+
+    end:
+    return 0;
+
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/api.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/api.h
new file mode 100644
index 00000000..62fc8d88
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/api.h
@@ -0,0 +1,3 @@
+#define CRYPTO_KEYBYTES 16
+#define CRYPTO_NONCEBYTES 16
+#define CRYPTO_BEFORENMBYTES 1408
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/beforenm.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/beforenm.c
new file mode 100644
index 00000000..8fa2673d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/beforenm.c
@@ -0,0 +1,59 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include "consts.h"
+#include "int128.h"
+#include "common.h"
+#include "crypto_stream.h"
+
+int crypto_stream_beforenm(unsigned char *c, const unsigned char *k)
+{
+
+  /*
+     int64 x0;
+     int64 x1;
+     int64 x2;
+     int64 x3;
+     int64 e;
+     int64 q0;
+     int64 q1;
+     int64 q2;
+     int64 q3;
+     */
+
+  int128 xmm0;
+  int128 xmm1;
+  int128 xmm2;
+  int128 xmm3;
+  int128 xmm4;
+  int128 xmm5;
+  int128 xmm6;
+  int128 xmm7;
+  int128 xmm8;
+  int128 xmm9;
+  int128 xmm10;
+  int128 xmm11;
+  int128 xmm12;
+  int128 xmm13;
+  int128 xmm14;
+  int128 xmm15;
+  int128 t;
+
+  bitslicekey0(k, c)
+
+    keyexpbs1(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm1);, 2,c)
+    keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm6);, 3,c)
+    keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 4,c)
+
+    keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 5,c)
+    keyexpbs(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm5);, 6,c)
+    keyexpbs(xmm0, xmm1, xmm3, xmm2, xmm6, xmm5, xmm4, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm3);, 7,c)
+    keyexpbs(xmm0, xmm1, xmm6, xmm4, xmm2, xmm7, xmm3, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm7);, 8,c)
+
+    keyexpbs(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xor_rcon(&xmm0); xor_rcon(&xmm1); xor_rcon(&xmm6); xor_rcon(&xmm3);, 9,c)
+    keyexpbs10(xmm0, xmm1, xmm4, xmm6, xmm3, xmm7, xmm2, xmm5, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+
+    return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.c
new file mode 100644
index 00000000..14a28cc6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.c
@@ -0,0 +1,64 @@
+#include "common.h"
+
+uint32 load32_bigendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[3]) \
+  | (((uint32) (x[2])) << 8) \
+  | (((uint32) (x[1])) << 16) \
+  | (((uint32) (x[0])) << 24)
+  ;
+}
+
+void store32_bigendian(unsigned char *x,uint32 u)
+{
+  x[3] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[0] = u;
+}
+
+uint32 load32_littleendian(const unsigned char *x)
+{
+  return
+      (uint32) (x[0]) \
+  | (((uint32) (x[1])) << 8) \
+  | (((uint32) (x[2])) << 16) \
+  | (((uint32) (x[3])) << 24)
+  ;
+}
+
+void store32_littleendian(unsigned char *x,uint32 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u;
+}
+
+
+uint64 load64_littleendian(const unsigned char *x)
+{
+  return
+      (uint64) (x[0]) \
+  | (((uint64) (x[1])) << 8) \
+  | (((uint64) (x[2])) << 16) \
+  | (((uint64) (x[3])) << 24)
+  | (((uint64) (x[4])) << 32)
+  | (((uint64) (x[5])) << 40)
+  | (((uint64) (x[6])) << 48)
+  | (((uint64) (x[7])) << 56)
+  ;
+}
+
+void store64_littleendian(unsigned char *x,uint64 u)
+{
+  x[0] = u; u >>= 8;
+  x[1] = u; u >>= 8;
+  x[2] = u; u >>= 8;
+  x[3] = u; u >>= 8;
+  x[4] = u; u >>= 8;
+  x[5] = u; u >>= 8;
+  x[6] = u; u >>= 8;
+  x[7] = u;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.h
new file mode 100644
index 00000000..0f723332
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/common.h
@@ -0,0 +1,788 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ Date: 2009-03-19
+ Public domain */
+#ifndef COMMON_H
+#define COMMON_H
+
+#include "types.h"
+
+#define load32_bigendian crypto_stream_aes128ctr_portable_load32_bigendian
+uint32 load32_bigendian(const unsigned char *x);
+
+#define store32_bigendian crypto_stream_aes128ctr_portable_store32_bigendian
+void store32_bigendian(unsigned char *x,uint32 u);
+
+#define load32_littleendian crypto_stream_aes128ctr_portable_load32_littleendian
+uint32 load32_littleendian(const unsigned char *x);
+
+#define store32_littleendian crypto_stream_aes128ctr_portable_store32_littleendian
+void store32_littleendian(unsigned char *x,uint32 u);
+
+#define load64_littleendian crypto_stream_aes128ctr_portable_load64_littleendian
+uint64 load64_littleendian(const unsigned char *x);
+
+#define store64_littleendian crypto_stream_aes128ctr_portable_store64_littleendian
+void store64_littleendian(unsigned char *x,uint64 u);
+
+/* Macros required only for key expansion */
+
+#define keyexpbs1(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+  rotbyte(&b0);\
+  rotbyte(&b1);\
+  rotbyte(&b2);\
+  rotbyte(&b3);\
+  rotbyte(&b4);\
+  rotbyte(&b5);\
+  rotbyte(&b6);\
+  rotbyte(&b7);\
+  ;\
+  sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+  ;\
+  xor_rcon(&b0);\
+  shufb(&b0, EXPB0);\
+  shufb(&b1, EXPB0);\
+  shufb(&b4, EXPB0);\
+  shufb(&b6, EXPB0);\
+  shufb(&b3, EXPB0);\
+  shufb(&b7, EXPB0);\
+  shufb(&b2, EXPB0);\
+  shufb(&b5, EXPB0);\
+  shufb(&b0, EXPB0);\
+  ;\
+  t0 = *(int128 *)(bskey + 0);\
+  t1 = *(int128 *)(bskey + 16);\
+  t2 = *(int128 *)(bskey + 32);\
+  t3 = *(int128 *)(bskey + 48);\
+  t4 = *(int128 *)(bskey + 64);\
+  t5 = *(int128 *)(bskey + 80);\
+  t6 = *(int128 *)(bskey + 96);\
+  t7 = *(int128 *)(bskey + 112);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  *(int128 *)(bskey + 128) = b0;\
+  *(int128 *)(bskey + 144) = b1;\
+  *(int128 *)(bskey + 160) = b4;\
+  *(int128 *)(bskey + 176) = b6;\
+  *(int128 *)(bskey + 192) = b3;\
+  *(int128 *)(bskey + 208) = b7;\
+  *(int128 *)(bskey + 224) = b2;\
+  *(int128 *)(bskey + 240) = b5;\
+
+#define keyexpbs10(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) ;\
+  toggle(&b0);\
+  toggle(&b1);\
+  toggle(&b5);\
+  toggle(&b6);\
+  rotbyte(&b0);\
+  rotbyte(&b1);\
+  rotbyte(&b2);\
+  rotbyte(&b3);\
+  rotbyte(&b4);\
+  rotbyte(&b5);\
+  rotbyte(&b6);\
+  rotbyte(&b7);\
+  ;\
+  sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+  ;\
+  xor_rcon(&b1);\
+  xor_rcon(&b4);\
+  xor_rcon(&b3);\
+  xor_rcon(&b7);\
+  shufb(&b0, EXPB0);\
+  shufb(&b1, EXPB0);\
+  shufb(&b4, EXPB0);\
+  shufb(&b6, EXPB0);\
+  shufb(&b3, EXPB0);\
+  shufb(&b7, EXPB0);\
+  shufb(&b2, EXPB0);\
+  shufb(&b5, EXPB0);\
+  ;\
+  t0 = *(int128 *)(bskey + 9 * 128 +   0);\
+  t1 = *(int128 *)(bskey + 9 * 128 +  16);\
+  t2 = *(int128 *)(bskey + 9 * 128 +  32);\
+  t3 = *(int128 *)(bskey + 9 * 128 +  48);\
+  t4 = *(int128 *)(bskey + 9 * 128 +  64);\
+  t5 = *(int128 *)(bskey + 9 * 128 +  80);\
+  t6 = *(int128 *)(bskey + 9 * 128 +  96);\
+  t7 = *(int128 *)(bskey + 9 * 128 + 112);\
+  ;\
+  toggle(&t0);\
+  toggle(&t1);\
+  toggle(&t5);\
+  toggle(&t6);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  shufb(&b0, M0);\
+  shufb(&b1, M0);\
+  shufb(&b2, M0);\
+  shufb(&b3, M0);\
+  shufb(&b4, M0);\
+  shufb(&b5, M0);\
+  shufb(&b6, M0);\
+  shufb(&b7, M0);\
+  ;\
+  *(int128 *)(bskey + 1280) = b0;\
+  *(int128 *)(bskey + 1296) = b1;\
+  *(int128 *)(bskey + 1312) = b4;\
+  *(int128 *)(bskey + 1328) = b6;\
+  *(int128 *)(bskey + 1344) = b3;\
+  *(int128 *)(bskey + 1360) = b7;\
+  *(int128 *)(bskey + 1376) = b2;\
+  *(int128 *)(bskey + 1392) = b5;\
+
+
+#define keyexpbs(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, rcon, i, bskey) \
+  toggle(&b0);\
+  toggle(&b1);\
+  toggle(&b5);\
+  toggle(&b6);\
+  rotbyte(&b0);\
+  rotbyte(&b1);\
+  rotbyte(&b2);\
+  rotbyte(&b3);\
+  rotbyte(&b4);\
+  rotbyte(&b5);\
+  rotbyte(&b6);\
+  rotbyte(&b7);\
+  ;\
+  sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+  ;\
+  rcon;\
+  shufb(&b0, EXPB0);\
+  shufb(&b1, EXPB0);\
+  shufb(&b4, EXPB0);\
+  shufb(&b6, EXPB0);\
+  shufb(&b3, EXPB0);\
+  shufb(&b7, EXPB0);\
+  shufb(&b2, EXPB0);\
+  shufb(&b5, EXPB0);\
+  ;\
+  t0 = *(int128 *)(bskey + (i-1) * 128 +   0);\
+  t1 = *(int128 *)(bskey + (i-1) * 128 +  16);\
+  t2 = *(int128 *)(bskey + (i-1) * 128 +  32);\
+  t3 = *(int128 *)(bskey + (i-1) * 128 +  48);\
+  t4 = *(int128 *)(bskey + (i-1) * 128 +  64);\
+  t5 = *(int128 *)(bskey + (i-1) * 128 +  80);\
+  t6 = *(int128 *)(bskey + (i-1) * 128 +  96);\
+  t7 = *(int128 *)(bskey + (i-1) * 128 + 112);\
+  ;\
+  toggle(&t0);\
+  toggle(&t1);\
+  toggle(&t5);\
+  toggle(&t6);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  rshift32_littleendian(&t0, 8);\
+  rshift32_littleendian(&t1, 8);\
+  rshift32_littleendian(&t2, 8);\
+  rshift32_littleendian(&t3, 8);\
+  rshift32_littleendian(&t4, 8);\
+  rshift32_littleendian(&t5, 8);\
+  rshift32_littleendian(&t6, 8);\
+  rshift32_littleendian(&t7, 8);\
+  ;\
+  xor2(&b0, &t0);\
+  xor2(&b1, &t1);\
+  xor2(&b4, &t2);\
+  xor2(&b6, &t3);\
+  xor2(&b3, &t4);\
+  xor2(&b7, &t5);\
+  xor2(&b2, &t6);\
+  xor2(&b5, &t7);\
+  ;\
+  *(int128 *)(bskey + i*128 +   0) = b0;\
+  *(int128 *)(bskey + i*128 +  16) = b1;\
+  *(int128 *)(bskey + i*128 +  32) = b4;\
+  *(int128 *)(bskey + i*128 +  48) = b6;\
+  *(int128 *)(bskey + i*128 +  64) = b3;\
+  *(int128 *)(bskey + i*128 +  80) = b7;\
+  *(int128 *)(bskey + i*128 +  96) = b2;\
+  *(int128 *)(bskey + i*128 + 112) = b5;\
+
+/* Macros used in multiple contexts */
+
+#define bitslicekey0(key, bskey) \
+  xmm0 = *(int128 *) (key + 0);\
+  shufb(&xmm0, M0);\
+  copy2(&xmm1, &xmm0);\
+  copy2(&xmm2, &xmm0);\
+  copy2(&xmm3, &xmm0);\
+  copy2(&xmm4, &xmm0);\
+  copy2(&xmm5, &xmm0);\
+  copy2(&xmm6, &xmm0);\
+  copy2(&xmm7, &xmm0);\
+  ;\
+  bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+  ;\
+  *(int128 *) (bskey + 0) = xmm0;\
+  *(int128 *) (bskey + 16) = xmm1;\
+  *(int128 *) (bskey + 32) = xmm2;\
+  *(int128 *) (bskey + 48) = xmm3;\
+  *(int128 *) (bskey + 64) = xmm4;\
+  *(int128 *) (bskey + 80) = xmm5;\
+  *(int128 *) (bskey + 96) = xmm6;\
+  *(int128 *) (bskey + 112) = xmm7;\
+
+
+#define bitslicekey10(key, bskey) \
+  xmm0 = *(int128 *) (key + 0);\
+  copy2(xmm1, xmm0);\
+  copy2(xmm2, xmm0);\
+  copy2(xmm3, xmm0);\
+  copy2(xmm4, xmm0);\
+  copy2(xmm5, xmm0);\
+  copy2(xmm6, xmm0);\
+  copy2(xmm7, xmm0);\
+  ;\
+  bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+  ;\
+  toggle(&xmm6);\
+  toggle(&xmm5);\
+  toggle(&xmm1);\
+  toggle(&xmm0);\
+  ;\
+  *(int128 *) (bskey +   0 + 1280) = xmm0;\
+  *(int128 *) (bskey +  16 + 1280) = xmm1;\
+  *(int128 *) (bskey +  32 + 1280) = xmm2;\
+  *(int128 *) (bskey +  48 + 1280) = xmm3;\
+  *(int128 *) (bskey +  64 + 1280) = xmm4;\
+  *(int128 *) (bskey +  80 + 1280) = xmm5;\
+  *(int128 *) (bskey +  96 + 1280) = xmm6;\
+  *(int128 *) (bskey + 112 + 1280) = xmm7;\
+  
+
+#define bitslicekey(i,key,bskey) \
+  xmm0 = *(int128 *) (key + 0);\
+  shufb(&xmm0, M0);\
+  copy2(&xmm1, &xmm0);\
+  copy2(&xmm2, &xmm0);\
+  copy2(&xmm3, &xmm0);\
+  copy2(&xmm4, &xmm0);\
+  copy2(&xmm5, &xmm0);\
+  copy2(&xmm6, &xmm0);\
+  copy2(&xmm7, &xmm0);\
+  ;\
+  bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, t);\
+  ;\
+  toggle(&xmm6);\
+  toggle(&xmm5);\
+  toggle(&xmm1);\
+  toggle(&xmm0);\
+  ;\
+  *(int128 *) (bskey +   0 + 128*i) = xmm0;\
+  *(int128 *) (bskey +  16 + 128*i) = xmm1;\
+  *(int128 *) (bskey +  32 + 128*i) = xmm2;\
+  *(int128 *) (bskey +  48 + 128*i) = xmm3;\
+  *(int128 *) (bskey +  64 + 128*i) = xmm4;\
+  *(int128 *) (bskey +  80 + 128*i) = xmm5;\
+  *(int128 *) (bskey +  96 + 128*i) = xmm6;\
+  *(int128 *) (bskey + 112 + 128*i) = xmm7;\
+
+
+#define bitslice(x0, x1, x2, x3, x4, x5, x6, x7, t) \
+	swapmove(x0, x1, 1, BS0, t);\
+	swapmove(x2, x3, 1, BS0, t);\
+	swapmove(x4, x5, 1, BS0, t);\
+	swapmove(x6, x7, 1, BS0, t);\
+	;\
+	swapmove(x0, x2, 2, BS1, t);\
+	swapmove(x1, x3, 2, BS1, t);\
+	swapmove(x4, x6, 2, BS1, t);\
+	swapmove(x5, x7, 2, BS1, t);\
+	;\
+	swapmove(x0, x4, 4, BS2, t);\
+	swapmove(x1, x5, 4, BS2, t);\
+	swapmove(x2, x6, 4, BS2, t);\
+	swapmove(x3, x7, 4, BS2, t);\
+
+
+#define swapmove(a, b, n, m, t) \
+	copy2(&t, &b);\
+  rshift64_littleendian(&t, n);\
+	xor2(&t, &a);\
+  and2(&t, &m);\
+  xor2(&a, &t);\
+  lshift64_littleendian(&t, n);\
+  xor2(&b, &t);
+
+#define rotbyte(x) \
+  shufb(x, ROTB) /* TODO: Make faster */
+
+
+/* Macros used for encryption (and decryption) */
+
+#define shiftrows(x0, x1, x2, x3, x4, x5, x6, x7, i, M, bskey) \
+	xor2(&x0, (int128 *)(bskey + 128*(i-1) + 0));\
+  shufb(&x0, M);\
+	xor2(&x1, (int128 *)(bskey + 128*(i-1) + 16));\
+  shufb(&x1, M);\
+	xor2(&x2, (int128 *)(bskey + 128*(i-1) + 32));\
+  shufb(&x2, M);\
+	xor2(&x3, (int128 *)(bskey + 128*(i-1) + 48));\
+  shufb(&x3, M);\
+	xor2(&x4, (int128 *)(bskey + 128*(i-1) + 64));\
+  shufb(&x4, M);\
+	xor2(&x5, (int128 *)(bskey + 128*(i-1) + 80));\
+  shufb(&x5, M);\
+	xor2(&x6, (int128 *)(bskey + 128*(i-1) + 96));\
+  shufb(&x6, M);\
+	xor2(&x7, (int128 *)(bskey + 128*(i-1) + 112));\
+  shufb(&x7, M);\
+
+
+#define mixcolumns(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, t7) \
+  shufd(&t0, &x0, 0x93);\
+  shufd(&t1, &x1, 0x93);\
+  shufd(&t2, &x2, 0x93);\
+  shufd(&t3, &x3, 0x93);\
+  shufd(&t4, &x4, 0x93);\
+  shufd(&t5, &x5, 0x93);\
+  shufd(&t6, &x6, 0x93);\
+  shufd(&t7, &x7, 0x93);\
+	;\
+	xor2(&x0, &t0);\
+	xor2(&x1, &t1);\
+	xor2(&x2, &t2);\
+	xor2(&x3, &t3);\
+	xor2(&x4, &t4);\
+	xor2(&x5, &t5);\
+	xor2(&x6, &t6);\
+	xor2(&x7, &t7);\
+	;\
+	xor2(&t0, &x7);\
+	xor2(&t1, &x0);\
+	xor2(&t2, &x1);\
+	xor2(&t1, &x7);\
+	xor2(&t3, &x2);\
+	xor2(&t4, &x3);\
+	xor2(&t5, &x4);\
+	xor2(&t3, &x7);\
+	xor2(&t6, &x5);\
+	xor2(&t7, &x6);\
+	xor2(&t4, &x7);\
+	;\
+  shufd(&x0, &x0, 0x4e);\
+  shufd(&x1, &x1, 0x4e);\
+  shufd(&x2, &x2, 0x4e);\
+  shufd(&x3, &x3, 0x4e);\
+  shufd(&x4, &x4, 0x4e);\
+  shufd(&x5, &x5, 0x4e);\
+  shufd(&x6, &x6, 0x4e);\
+  shufd(&x7, &x7, 0x4e);\
+	;\
+	xor2(&t0, &x0);\
+	xor2(&t1, &x1);\
+	xor2(&t2, &x2);\
+	xor2(&t3, &x3);\
+	xor2(&t4, &x4);\
+	xor2(&t5, &x5);\
+	xor2(&t6, &x6);\
+	xor2(&t7, &x7);\
+	
+
+#define aesround(i, b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+	shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, i, SR, bskey);\
+	sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+	mixcolumns(b0, b1, b4, b6, b3, b7, b2, b5, t0, t1, t2, t3, t4, t5, t6, t7);\
+
+
+#define lastround(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7, bskey) \
+	shiftrows(b0, b1, b2, b3, b4, b5, b6, b7, 10, SRM0, bskey);\
+	sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, t4, t5, t6, t7);\
+	xor2(&b0,(int128 *)(bskey + 128*10));\
+	xor2(&b1,(int128 *)(bskey + 128*10+16));\
+	xor2(&b4,(int128 *)(bskey + 128*10+32));\
+	xor2(&b6,(int128 *)(bskey + 128*10+48));\
+	xor2(&b3,(int128 *)(bskey + 128*10+64));\
+	xor2(&b7,(int128 *)(bskey + 128*10+80));\
+	xor2(&b2,(int128 *)(bskey + 128*10+96));\
+	xor2(&b5,(int128 *)(bskey + 128*10+112));\
+
+
+#define sbox(b0, b1, b2, b3, b4, b5, b6, b7, t0, t1, t2, t3, s0, s1, s2, s3) \
+	InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7); \
+	Inv_GF256(b6, b5, b0, b3, b7, b1, b4, b2, t0, t1, t2, t3, s0, s1, s2, s3); \
+	OutBasisChange(b7, b1, b4, b2, b6, b5, b0, b3); \
+
+
+#define InBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
+	xor2(&b5, &b6);\
+	xor2(&b2, &b1);\
+	xor2(&b5, &b0);\
+	xor2(&b6, &b2);\
+	xor2(&b3, &b0);\
+	;\
+	xor2(&b6, &b3);\
+	xor2(&b3, &b7);\
+	xor2(&b3, &b4);\
+	xor2(&b7, &b5);\
+	xor2(&b3, &b1);\
+	;\
+	xor2(&b4, &b5);\
+	xor2(&b2, &b7);\
+	xor2(&b1, &b5);\
+ 
+#define OutBasisChange(b0, b1, b2, b3, b4, b5, b6, b7) \
+	xor2(&b0, &b6);\
+	xor2(&b1, &b4);\
+	xor2(&b2, &b0);\
+	xor2(&b4, &b6);\
+	xor2(&b6, &b1);\
+	;\
+	xor2(&b1, &b5);\
+	xor2(&b5, &b3);\
+	xor2(&b2, &b5);\
+	xor2(&b3, &b7);\
+	xor2(&b7, &b5);\
+	;\
+	xor2(&b4, &b7);\
+	
+#define Mul_GF4(x0, x1, y0, y1, t0) \
+	copy2(&t0, &y0);\
+	xor2(&t0, &y1);\
+	and2(&t0, &x0);\
+	xor2(&x0, &x1);\
+	and2(&x0, &y1);\
+	and2(&x1, &y0);\
+	xor2(&x0, &x1);\
+	xor2(&x1, &t0);\
+	
+#define Mul_GF4_N(x0, x1, y0, y1, t0) \
+	copy2(&t0, &y0);\
+	xor2(&t0, &y1);\
+	and2(&t0, &x0);\
+	xor2(&x0, &x1);\
+	and2(&x0, &y1);\
+	and2(&x1, &y0);\
+	xor2(&x1, &x0);\
+	xor2(&x0, &t0);\
+	
+#define Mul_GF4_2(x0, x1, x2, x3, y0, y1, t0, t1) \
+	copy2(&t0, = y0);\
+	xor2(&t0, &y1);\
+	copy2(&t1, &t0);\
+	and2(&t0, &x0);\
+	and2(&t1, &x2);\
+	xor2(&x0, &x1);\
+	xor2(&x2, &x3);\
+	and2(&x0, &y1);\
+	and2(&x2, &y1);\
+	and2(&x1, &y0);\
+	and2(&x3, &y0);\
+	xor2(&x0, &x1);\
+	xor2(&x2, &x3);\
+	xor2(&x1, &t0);\
+	xor2(&x3, &t1);\
+	
+#define Mul_GF16(x0, x1, x2, x3, y0, y1, y2, y3, t0, t1, t2, t3) \
+	copy2(&t0, &x0);\
+	copy2(&t1, &x1);\
+	Mul_GF4(x0, x1, y0, y1, t2);\
+	xor2(&t0, &x2);\
+	xor2(&t1, &x3);\
+	xor2(&y0, &y2);\
+	xor2(&y1, &y3);\
+	Mul_GF4_N(t0, t1, y0, y1, t2);\
+	Mul_GF4(x2, x3, y2, y3, t3);\
+	;\
+	xor2(&x0, &t0);\
+	xor2(&x2, &t0);\
+	xor2(&x1, &t1);\
+	xor2(&x3, &t1);\
+			
+#define Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, t0, t1, t2, t3) \
+	copy2(&t0, &x0);\
+	copy2(&t1, &x1);\
+	Mul_GF4(x0, x1, y0, y1, t2);\
+	xor2(&t0, &x2);\
+	xor2(&t1, &x3);\
+	xor2(&y0, &y2);\
+	xor2(&y1, &y3);\
+	Mul_GF4_N(t0, t1, y0, y1, t3);\
+	Mul_GF4(x2, x3, y2, y3, t2);\
+	;\
+	xor2(&x0, &t0);\
+	xor2(&x2, &t0);\
+	xor2(&x1, &t1);\
+	xor2(&x3, &t1);\
+	;\
+	copy2(&t0, &x4);\
+	copy2(&t1, &x5);\
+	xor2(&t0, &x6);\
+	xor2(&t1, &x7);\
+	Mul_GF4_N(t0, t1, y0, y1, t3);\
+	Mul_GF4(x6, x7, y2, y3, t2);\
+	xor2(&y0, &y2);\
+	xor2(&y1, &y3);\
+	Mul_GF4(x4, x5, y0, y1, t3);\
+	;\
+	xor2(&x4, &t0);\
+	xor2(&x6, &t0);\
+	xor2(&x5, &t1);\
+	xor2(&x7, &t1);\
+	
+#define Inv_GF16(x0, x1, x2, x3, t0, t1, t2, t3) \
+	copy2(&t0, &x1);\
+	copy2(&t1, &x0);\
+	and2(&t0, &x3);\
+	or2(&t1, &x2);\
+	copy2(&t2, &x1);\
+	copy2(&t3, &x0);\
+	or2(&t2, &x2);\
+	or2(&t3, &x3);\
+	xor2(&t2, &t3);\
+	;\
+	xor2(&t0, &t2);\
+	xor2(&t1, &t2);\
+	;\
+	Mul_GF4_2(x0, x1, x2, x3, t1, t0, t2, t3);\
+
+	
+#define Inv_GF256(x0,  x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, s0, s1, s2, s3) \
+	copy2(&t3, &x4);\
+	copy2(&t2, &x5);\
+	copy2(&t1, &x1);\
+	copy2(&s1, &x7);\
+	copy2(&s0, &x0);\
+	;\
+	xor2(&t3, &x6);\
+	xor2(&t2, &x7);\
+	xor2(&t1, &x3);\
+	xor2(&s1, &x6);\
+	xor2(&s0, &x2);\
+	;\
+	copy2(&s2, &t3);\
+	copy2(&t0, &t2);\
+	copy2(&s3, &t3);\
+	;\
+	or2(&t2, &t1);\
+	or2(&t3, &s0);\
+	xor2(&s3, &t0);\
+	and2(&s2, &s0);\
+	and2(&t0, &t1);\
+	xor2(&s0, &t1);\
+	and2(&s3, &s0);\
+	copy2(&s0, &x3);\
+	xor2(&s0, &x2);\
+	and2(&s1, &s0);\
+	xor2(&t3, &s1);\
+	xor2(&t2, &s1);\
+	copy2(&s1, &x4);\
+	xor2(&s1, &x5);\
+	copy2(&s0, &x1);\
+	copy2(&t1, &s1);\
+	xor2(&s0, &x0);\
+	or2(&t1, &s0);\
+	and2(&s1, &s0);\
+	xor2(&t0, &s1);\
+	xor2(&t3, &s3);\
+	xor2(&t2, &s2);\
+	xor2(&t1, &s3);\
+	xor2(&t0, &s2);\
+	xor2(&t1, &s2);\
+	copy2(&s0, &x7);\
+	copy2(&s1, &x6);\
+	copy2(&s2, &x5);\
+	copy2(&s3, &x4);\
+	and2(&s0, &x3);\
+	and2(&s1, &x2);\
+	and2(&s2, &x1);\
+	or2(&s3, &x0);\
+	xor2(&t3, &s0);\
+	xor2(&t2, &s1);\
+	xor2(&t1, &s2);\
+	xor2(&t0, &s3);\
+  ;\
+  copy2(&s0, &t3);\
+  xor2(&s0, &t2);\
+  and2(&t3, &t1);\
+  copy2(&s2, &t0);\
+  xor2(&s2, &t3);\
+  copy2(&s3, &s0);\
+  and2(&s3, &s2);\
+  xor2(&s3, &t2);\
+  copy2(&s1, &t1);\
+  xor2(&s1, &t0);\
+  xor2(&t3, &t2);\
+  and2(&s1, &t3);\
+  xor2(&s1, &t0);\
+  xor2(&t1, &s1);\
+  copy2(&t2, &s2);\
+  xor2(&t2, &s1);\
+  and2(&t2, &t0);\
+  xor2(&t1, &t2);\
+  xor2(&s2, &t2);\
+  and2(&s2, &s3);\
+  xor2(&s2, &s0);\
+  ;\
+  Mul_GF16_2(x0, x1, x2, x3, x4, x5, x6, x7, s3, s2, s1, t1, s0, t0, t2, t3);\
+  	
+#endif
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.c
new file mode 100644
index 00000000..ed2835db
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.c
@@ -0,0 +1,14 @@
+#include "consts.h"
+
+const unsigned char ROTB[16] = {0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x08};
+const unsigned char M0[16]   = {0x0f, 0x0b, 0x07, 0x03, 0x0e, 0x0a, 0x06, 0x02, 0x0d, 0x09, 0x05, 0x01, 0x0c, 0x08, 0x04, 0x00};
+const unsigned char EXPB0[16] = {0x03, 0x03, 0x03, 0x03, 0x07, 0x07, 0x07, 0x07, 0x0b, 0x0b, 0x0b, 0x0b, 0x0f, 0x0f, 0x0f, 0x0f};
+
+const unsigned char SWAP32[16] = {0x03, 0x02, 0x01, 0x00, 0x07, 0x06, 0x05, 0x04, 0x0b, 0x0a, 0x09, 0x08, 0x0f, 0x0e, 0x0d, 0x0c};
+const unsigned char M0SWAP[16] = {0x0c, 0x08, 0x04, 0x00, 0x0d, 0x09, 0x05, 0x01, 0x0e, 0x0a, 0x06, 0x02, 0x0f, 0x0b, 0x07, 0x03}; 
+const unsigned char SR[16] = {0x01, 0x02, 0x03, 0x00, 0x06, 0x07, 0x04, 0x05, 0x0b, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0e, 0x0f};
+const unsigned char SRM0[16] = {0x0f, 0x0a, 0x05, 0x00, 0x0e, 0x09, 0x04, 0x03, 0x0d, 0x08, 0x07, 0x02, 0x0c, 0x0b, 0x06, 0x01};
+
+const int128 BS0 = {0x5555555555555555ULL, 0x5555555555555555ULL};
+const int128 BS1 = {0x3333333333333333ULL, 0x3333333333333333ULL};
+const int128 BS2 = {0x0f0f0f0f0f0f0f0fULL, 0x0f0f0f0f0f0f0f0fULL};
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.h
new file mode 100644
index 00000000..4c50360b
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/consts.h
@@ -0,0 +1,28 @@
+#ifndef CONSTS_H
+#define CONSTS_H
+
+#include "int128.h"
+
+#define ROTB crypto_stream_aes128ctr_portable_ROTB
+#define M0 crypto_stream_aes128ctr_portable_M0
+#define EXPB0 crypto_stream_aes128ctr_portable_EXPB0
+#define SWAP32 crypto_stream_aes128ctr_portable_SWAP32
+#define M0SWAP crypto_stream_aes128ctr_portable_M0SWAP
+#define SR crypto_stream_aes128ctr_portable_SR
+#define SRM0 crypto_stream_aes128ctr_portable_SRM0
+#define BS0 crypto_stream_aes128ctr_portable_BS0
+#define BS1 crypto_stream_aes128ctr_portable_BS1
+#define BS2 crypto_stream_aes128ctr_portable_BS2
+
+extern const unsigned char ROTB[16];
+extern const unsigned char M0[16];
+extern const unsigned char EXPB0[16];
+extern const unsigned char SWAP32[16];
+extern const unsigned char M0SWAP[16];
+extern const unsigned char SR[16];
+extern const unsigned char SRM0[16];
+extern const int128 BS0;
+extern const int128 BS1;
+extern const int128 BS2;
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.c
new file mode 100644
index 00000000..25894d42
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.c
@@ -0,0 +1,128 @@
+#include "int128.h"
+#include "common.h"
+
+void xor2(int128 *r, const int128 *x)
+{
+  r->a ^= x->a;
+  r->b ^= x->b;
+}
+
+void and2(int128 *r, const int128 *x)
+{
+  r->a &= x->a;
+  r->b &= x->b;
+}
+
+void or2(int128 *r, const int128 *x)
+{
+  r->a |= x->a;
+  r->b |= x->b;
+}
+
+void copy2(int128 *r, const int128 *x)
+{
+  r->a = x->a;
+  r->b = x->b;
+}
+
+void shufb(int128 *r, const unsigned char *l)
+{
+  int128 t;
+  copy2(&t,r);
+  unsigned char *cr = (unsigned char *)r;
+  unsigned char *ct = (unsigned char *)&t;
+  cr[0] = ct[l[0]];
+  cr[1] = ct[l[1]];
+  cr[2] = ct[l[2]];
+  cr[3] = ct[l[3]];
+  cr[4] = ct[l[4]];
+  cr[5] = ct[l[5]];
+  cr[6] = ct[l[6]];
+  cr[7] = ct[l[7]];
+  cr[8] = ct[l[8]];
+  cr[9] = ct[l[9]];
+  cr[10] = ct[l[10]];
+  cr[11] = ct[l[11]];
+  cr[12] = ct[l[12]];
+  cr[13] = ct[l[13]];
+  cr[14] = ct[l[14]];
+  cr[15] = ct[l[15]];
+}
+
+void shufd(int128 *r, const int128 *x, const unsigned int c)
+{
+  int128 t;
+  uint32 *tp = (uint32 *)&t;
+  uint32 *xp = (uint32 *)x;
+  tp[0] = xp[c&3];
+  tp[1] = xp[(c>>2)&3];
+  tp[2] = xp[(c>>4)&3];
+  tp[3] = xp[(c>>6)&3];
+  copy2(r,&t);
+}
+
+void rshift32_littleendian(int128 *r, const unsigned int n)
+{
+  unsigned char *rp = (unsigned char *)r;
+  uint32 t;
+  t = load32_littleendian(rp);
+  t >>= n;
+  store32_littleendian(rp, t);
+  t = load32_littleendian(rp+4);
+  t >>= n;
+  store32_littleendian(rp+4, t);
+  t = load32_littleendian(rp+8);
+  t >>= n;
+  store32_littleendian(rp+8, t);
+  t = load32_littleendian(rp+12);
+  t >>= n;
+  store32_littleendian(rp+12, t);
+}
+
+void rshift64_littleendian(int128 *r, const unsigned int n)
+{
+  unsigned char *rp = (unsigned char *)r;
+  uint64 t;
+  t = load64_littleendian(rp);
+  t >>= n;
+  store64_littleendian(rp, t);
+  t = load64_littleendian(rp+8);
+  t >>= n;
+  store64_littleendian(rp+8, t);
+}
+
+void lshift64_littleendian(int128 *r, const unsigned int n)
+{
+  unsigned char *rp = (unsigned char *)r;
+  uint64 t;
+  t = load64_littleendian(rp);
+  t <<= n;
+  store64_littleendian(rp, t);
+  t = load64_littleendian(rp+8);
+  t <<= n;
+  store64_littleendian(rp+8, t);
+}
+
+void toggle(int128 *r)
+{
+  r->a ^= 0xffffffffffffffffULL;
+  r->b ^= 0xffffffffffffffffULL;
+}
+
+void xor_rcon(int128 *r)
+{
+  unsigned char *rp = (unsigned char *)r;
+  uint32 t;
+  t = load32_littleendian(rp+12);
+  t ^= 0xffffffff;
+  store32_littleendian(rp+12, t);
+}
+
+void add_uint32_big(int128 *r, uint32 x)
+{
+  unsigned char *rp = (unsigned char *)r;
+  uint32 t;
+  t = load32_littleendian(rp+12);
+  t += x;
+  store32_littleendian(rp+12, t);
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.h
new file mode 100644
index 00000000..7099e5b1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/int128.h
@@ -0,0 +1,47 @@
+#ifndef INT128_H
+#define INT128_H
+
+#include "common.h"
+
+typedef struct{
+  unsigned long long a;
+  unsigned long long b;
+} int128;
+
+#define xor2 crypto_stream_aes128ctr_portable_xor2
+void xor2(int128 *r, const int128 *x);
+
+#define and2 crypto_stream_aes128ctr_portable_and2
+void and2(int128 *r, const int128 *x);
+
+#define or2 crypto_stream_aes128ctr_portable_or2
+void or2(int128 *r, const int128 *x);
+
+#define copy2 crypto_stream_aes128ctr_portable_copy2
+void copy2(int128 *r, const int128 *x);
+
+#define shufb crypto_stream_aes128ctr_portable_shufb
+void shufb(int128 *r, const unsigned char *l);
+
+#define shufd crypto_stream_aes128ctr_portable_shufd
+void shufd(int128 *r, const int128 *x, const unsigned int c);
+
+#define rshift32_littleendian crypto_stream_aes128ctr_portable_rshift32_littleendian
+void rshift32_littleendian(int128 *r, const unsigned int n);
+
+#define rshift64_littleendian crypto_stream_aes128ctr_portable_rshift64_littleendian
+void rshift64_littleendian(int128 *r, const unsigned int n);
+
+#define lshift64_littleendian crypto_stream_aes128ctr_portable_lshift64_littleendian
+void lshift64_littleendian(int128 *r, const unsigned int n);
+
+#define toggle crypto_stream_aes128ctr_portable_toggle
+void toggle(int128 *r);
+
+#define xor_rcon crypto_stream_aes128ctr_portable_xor_rcon
+void xor_rcon(int128 *r);
+
+#define add_uint32_big crypto_stream_aes128ctr_portable_add_uint32_big
+void add_uint32_big(int128 *r, uint32 x);
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/stream.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/stream.c
new file mode 100644
index 00000000..963fa8c1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/stream.c
@@ -0,0 +1,28 @@
+#include "crypto_stream.h"
+
+int crypto_stream(
+        unsigned char *out,
+        unsigned long long outlen,
+        const unsigned char *n,
+        const unsigned char *k
+        )
+{
+    unsigned char d[crypto_stream_BEFORENMBYTES];
+    crypto_stream_beforenm(d, k);
+    crypto_stream_afternm(out, outlen, n, d);
+    return 0;
+}
+
+int crypto_stream_xor(
+        unsigned char *out,
+        const unsigned char *in,
+        unsigned long long inlen,
+        const unsigned char *n,
+        const unsigned char *k
+        )
+{
+    unsigned char d[crypto_stream_BEFORENMBYTES];
+    crypto_stream_beforenm(d, k);
+    crypto_stream_xor_afternm(out, in, inlen, n, d);
+    return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/types.h b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/types.h
new file mode 100644
index 00000000..6aa502fc
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/types.h
@@ -0,0 +1,10 @@
+#ifndef TYPES_H
+#define TYPES_H
+
+#include "crypto_uint32.h"
+typedef crypto_uint32 uint32;
+
+#include "crypto_uint64.h"
+typedef crypto_uint64 uint64;
+
+#endif
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/xor_afternm.c b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/xor_afternm.c
new file mode 100644
index 00000000..f2ff8ff6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/aes128ctr/portable/xor_afternm.c
@@ -0,0 +1,180 @@
+/* Author: Peter Schwabe, ported from an assembly implementation by Emilia Käsper
+ * Date: 2009-03-19
+ * Public domain */
+
+#include <stdio.h>
+#include "int128.h"
+#include "common.h"
+#include "consts.h"
+#include "crypto_stream.h"
+
+int crypto_stream_xor_afternm(unsigned char *outp, const unsigned char *inp, unsigned long long len, const unsigned char *noncep, const unsigned char *c)
+{
+
+  int128 xmm0;
+  int128 xmm1;
+  int128 xmm2;
+  int128 xmm3;
+  int128 xmm4;
+  int128 xmm5;
+  int128 xmm6;
+  int128 xmm7;
+
+  int128 xmm8;
+  int128 xmm9;
+  int128 xmm10;
+  int128 xmm11;
+  int128 xmm12;
+  int128 xmm13;
+  int128 xmm14;
+  int128 xmm15;
+
+  int128 nonce_stack;
+  unsigned long long lensav;
+  unsigned char bl[128];
+  unsigned char *blp;
+  unsigned char b;
+
+  uint32 tmp;
+
+  /* Copy nonce on the stack */
+  copy2(&nonce_stack, (int128 *) (noncep + 0));
+  unsigned char *np = (unsigned char *)&nonce_stack;
+
+    enc_block:
+
+    xmm0 = *(int128 *) (np + 0);
+    copy2(&xmm1, &xmm0);
+    shufb(&xmm1, SWAP32);
+    copy2(&xmm2, &xmm1);
+    copy2(&xmm3, &xmm1);
+    copy2(&xmm4, &xmm1);
+    copy2(&xmm5, &xmm1);
+    copy2(&xmm6, &xmm1);
+    copy2(&xmm7, &xmm1);
+
+    add_uint32_big(&xmm1, 1);
+    add_uint32_big(&xmm2, 2);
+    add_uint32_big(&xmm3, 3);
+    add_uint32_big(&xmm4, 4);
+    add_uint32_big(&xmm5, 5);
+    add_uint32_big(&xmm6, 6);
+    add_uint32_big(&xmm7, 7);
+
+    shufb(&xmm0, M0);
+    shufb(&xmm1, M0SWAP);
+    shufb(&xmm2, M0SWAP);
+    shufb(&xmm3, M0SWAP);
+    shufb(&xmm4, M0SWAP);
+    shufb(&xmm5, M0SWAP);
+    shufb(&xmm6, M0SWAP);
+    shufb(&xmm7, M0SWAP);
+
+    bitslice(xmm7, xmm6, xmm5, xmm4, xmm3, xmm2, xmm1, xmm0, xmm8)
+
+    aesround( 1, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 2, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 3, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 4, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 5, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 6, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 7, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    aesround( 8, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+    aesround( 9, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,c)
+    lastround(xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7,c)
+
+    bitslice(xmm13, xmm10, xmm15, xmm11, xmm14, xmm12, xmm9, xmm8, xmm0)
+
+    if(len < 128) goto partial;
+    if(len == 128) goto full;
+
+    tmp = load32_bigendian(np + 12);
+    tmp += 8;
+    store32_bigendian(np + 12, tmp);
+
+    xor2(&xmm8, (int128 *)(inp + 0));
+    xor2(&xmm9, (int128 *)(inp + 16));
+    xor2(&xmm12, (int128 *)(inp + 32));
+    xor2(&xmm14, (int128 *)(inp + 48));
+    xor2(&xmm11, (int128 *)(inp + 64));
+    xor2(&xmm15, (int128 *)(inp + 80));
+    xor2(&xmm10, (int128 *)(inp + 96));
+    xor2(&xmm13, (int128 *)(inp + 112));
+
+    *(int128 *) (outp + 0) = xmm8;
+    *(int128 *) (outp + 16) = xmm9;
+    *(int128 *) (outp + 32) = xmm12;
+    *(int128 *) (outp + 48) = xmm14;
+    *(int128 *) (outp + 64) = xmm11;
+    *(int128 *) (outp + 80) = xmm15;
+    *(int128 *) (outp + 96) = xmm10;
+    *(int128 *) (outp + 112) = xmm13;
+
+    len -= 128;
+    inp += 128;
+    outp += 128;
+
+    goto enc_block;
+
+    partial:
+
+    lensav = len;
+    len >>= 4;
+
+    tmp = load32_bigendian(np + 12);
+    tmp += len;
+    store32_bigendian(np + 12, tmp);
+
+    blp = bl;
+    *(int128 *)(blp + 0) = xmm8;
+    *(int128 *)(blp + 16) = xmm9;
+    *(int128 *)(blp + 32) = xmm12;
+    *(int128 *)(blp + 48) = xmm14;
+    *(int128 *)(blp + 64) = xmm11;
+    *(int128 *)(blp + 80) = xmm15;
+    *(int128 *)(blp + 96) = xmm10;
+    *(int128 *)(blp + 112) = xmm13;
+
+    bytes:
+
+    if(lensav == 0) goto end;
+
+    b = blp[0];
+    b ^= *(unsigned char *)(inp + 0);
+    *(unsigned char *)(outp + 0) = b;
+
+    blp += 1;
+    inp +=1;
+    outp +=1;
+    lensav -= 1;
+
+    goto bytes;
+
+    full:
+
+    tmp = load32_bigendian(np + 12);
+    tmp += 8;
+    store32_bigendian(np + 12, tmp);
+
+    xor2(&xmm8, (int128 *)(inp + 0));
+    xor2(&xmm9, (int128 *)(inp + 16));
+    xor2(&xmm12, (int128 *)(inp + 32));
+    xor2(&xmm14, (int128 *)(inp + 48));
+    xor2(&xmm11, (int128 *)(inp + 64));
+    xor2(&xmm15, (int128 *)(inp + 80));
+    xor2(&xmm10, (int128 *)(inp + 96));
+    xor2(&xmm13, (int128 *)(inp + 112));
+
+    *(int128 *) (outp + 0) = xmm8;
+    *(int128 *) (outp + 16) = xmm9;
+    *(int128 *) (outp + 32) = xmm12;
+    *(int128 *) (outp + 48) = xmm14;
+    *(int128 *) (outp + 64) = xmm11;
+    *(int128 *) (outp + 80) = xmm15;
+    *(int128 *) (outp + 96) = xmm10;
+    *(int128 *) (outp + 112) = xmm13;
+
+    end:
+    return 0;
+
+}
diff --git a/nacl/nacl-20110221/crypto_stream/aes128ctr/used b/nacl/nacl-20110221/crypto_stream/aes128ctr/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_stream/measure.c b/nacl/nacl-20110221/crypto_stream/measure.c
new file mode 100644
index 00000000..ff3ab610
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/measure.c
@@ -0,0 +1,73 @@
+#include <stdlib.h>
+#include "randombytes.h"
+#include "cpucycles.h"
+#include "crypto_stream.h"
+
+extern void printentry(long long,const char *,long long *,long long);
+extern unsigned char *alignedcalloc(unsigned long long);
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void allocate(void);
+extern void measure(void);
+
+const char *primitiveimplementation = crypto_stream_IMPLEMENTATION;
+const char *implementationversion = crypto_stream_VERSION;
+const char *sizenames[] = { "keybytes", "noncebytes", 0 };
+const long long sizes[] = { crypto_stream_KEYBYTES, crypto_stream_NONCEBYTES };
+
+#define MAXTEST_BYTES 4096
+#ifdef SUPERCOP
+#define MGAP 8192
+#else
+#define MGAP 8
+#endif
+
+static unsigned char *k;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(crypto_stream_KEYBYTES);
+  n = alignedcalloc(crypto_stream_NONCEBYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  c = alignedcalloc(MAXTEST_BYTES);
+}
+
+#define TIMINGS 15
+static long long cycles[TIMINGS + 1];
+
+void measure(void)
+{
+  int i;
+  int loop;
+  int mlen;
+
+  for (loop = 0;loop < LOOPS;++loop) {
+    for (mlen = 0;mlen <= MAXTEST_BYTES;mlen += 1 + mlen / MGAP) {
+      randombytes(k,crypto_stream_KEYBYTES);
+      randombytes(n,crypto_stream_NONCEBYTES);
+      randombytes(m,mlen);
+      randombytes(c,mlen);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_stream(c,mlen,n,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"cycles",cycles,TIMINGS);
+      for (i = 0;i <= TIMINGS;++i) {
+        cycles[i] = cpucycles();
+	crypto_stream_xor(c,m,mlen,n,k);
+      }
+      for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+      printentry(mlen,"xor_cycles",cycles,TIMINGS);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/api.h b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/implementors b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/stream.s b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/stream.s
new file mode 100644
index 00000000..82a897f7
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/amd64_xmm6/stream.s
@@ -0,0 +1,4823 @@
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 a
+
+# qhasm: int64 arg1
+
+# qhasm: int64 arg2
+
+# qhasm: int64 arg3
+
+# qhasm: int64 arg4
+
+# qhasm: int64 arg5
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: int64 k
+
+# qhasm: int64 kbits
+
+# qhasm: int64 iv
+
+# qhasm: int64 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int64 m
+
+# qhasm: int64 out
+
+# qhasm: int64 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: int6464 y0
+
+# qhasm: int6464 y1
+
+# qhasm: int6464 y2
+
+# qhasm: int6464 y3
+
+# qhasm: int6464 y4
+
+# qhasm: int6464 y5
+
+# qhasm: int6464 y6
+
+# qhasm: int6464 y7
+
+# qhasm: int6464 y8
+
+# qhasm: int6464 y9
+
+# qhasm: int6464 y10
+
+# qhasm: int6464 y11
+
+# qhasm: int6464 y12
+
+# qhasm: int6464 y13
+
+# qhasm: int6464 y14
+
+# qhasm: int6464 y15
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int64 in0
+
+# qhasm: int64 in1
+
+# qhasm: int64 in2
+
+# qhasm: int64 in3
+
+# qhasm: int64 in4
+
+# qhasm: int64 in5
+
+# qhasm: int64 in6
+
+# qhasm: int64 in7
+
+# qhasm: int64 in8
+
+# qhasm: int64 in9
+
+# qhasm: int64 in10
+
+# qhasm: int64 in11
+
+# qhasm: int64 in12
+
+# qhasm: int64 in13
+
+# qhasm: int64 in14
+
+# qhasm: int64 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: int64 ctarget
+
+# qhasm: stack64 bytes_backup
+
+# qhasm: enter crypto_stream_salsa20_amd64_xmm6
+.text
+.p2align 5
+.globl _crypto_stream_salsa20_amd64_xmm6
+.globl crypto_stream_salsa20_amd64_xmm6
+_crypto_stream_salsa20_amd64_xmm6:
+crypto_stream_salsa20_amd64_xmm6:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: bytes = arg2
+# asm 1: mov  <arg2=int64#2,>bytes=int64#6
+# asm 2: mov  <arg2=%rsi,>bytes=%r9
+mov  %rsi,%r9
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm: iv = arg3
+# asm 1: mov  <arg3=int64#3,>iv=int64#3
+# asm 2: mov  <arg3=%rdx,>iv=%rdx
+mov  %rdx,%rdx
+
+# qhasm: k = arg4
+# asm 1: mov  <arg4=int64#4,>k=int64#8
+# asm 2: mov  <arg4=%rcx,>k=%r10
+mov  %rcx,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int64#7
+# asm 2: mov  $0,>a=%rax
+mov  $0,%rax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: sub  <bytes=int64#6,<out=int64#1
+# asm 2: sub  <bytes=%r9,<out=%rdi
+sub  %r9,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa20_amd64_xmm6_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa20_amd64_xmm6_xor
+.globl crypto_stream_salsa20_amd64_xmm6_xor
+_crypto_stream_salsa20_amd64_xmm6_xor:
+crypto_stream_salsa20_amd64_xmm6_xor:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = arg2
+# asm 1: mov  <arg2=int64#2,>m=int64#2
+# asm 2: mov  <arg2=%rsi,>m=%rsi
+mov  %rsi,%rsi
+
+# qhasm: bytes = arg3
+# asm 1: mov  <arg3=int64#3,>bytes=int64#6
+# asm 2: mov  <arg3=%rdx,>bytes=%r9
+mov  %rdx,%r9
+
+# qhasm: iv = arg4
+# asm 1: mov  <arg4=int64#4,>iv=int64#3
+# asm 2: mov  <arg4=%rcx,>iv=%rdx
+mov  %rcx,%rdx
+
+# qhasm: k = arg5
+# asm 1: mov  <arg5=int64#5,>k=int64#8
+# asm 2: mov  <arg5=%r8,>k=%r10
+mov  %r8,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl   20(<k=int64#8),>in12=int64#4d
+# asm 2: movl   20(<k=%r10),>in12=%ecx
+movl   20(%r10),%ecx
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl   0(<k=int64#8),>in1=int64#5d
+# asm 2: movl   0(<k=%r10),>in1=%r8d
+movl   0(%r10),%r8d
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl   0(<iv=int64#3),>in6=int64#7d
+# asm 2: movl   0(<iv=%rdx),>in6=%eax
+movl   0(%rdx),%eax
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl   16(<k=int64#8),>in11=int64#9d
+# asm 2: movl   16(<k=%r10),>in11=%r11d
+movl   16(%r10),%r11d
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int64#4d,>x1=stack128#1
+# asm 2: movl <in12=%ecx,>x1=0(%rsp)
+movl %ecx,0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int64#5d,4+<x1=stack128#1
+# asm 2: movl <in1=%r8d,4+<x1=0(%rsp)
+movl %r8d,4+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int64#7d,8+<x1=stack128#1
+# asm 2: movl <in6=%eax,8+<x1=0(%rsp)
+movl %eax,8+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int64#9d,12+<x1=stack128#1
+# asm 2: movl <in11=%r11d,12+<x1=0(%rsp)
+movl %r11d,12+0(%rsp)
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int64#4
+# asm 2: mov  $0,>in8=%rcx
+mov  $0,%rcx
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl   24(<k=int64#8),>in13=int64#5d
+# asm 2: movl   24(<k=%r10),>in13=%r8d
+movl   24(%r10),%r8d
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl   4(<k=int64#8),>in2=int64#7d
+# asm 2: movl   4(<k=%r10),>in2=%eax
+movl   4(%r10),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl   4(<iv=int64#3),>in7=int64#3d
+# asm 2: movl   4(<iv=%rdx),>in7=%edx
+movl   4(%rdx),%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int64#5d,4+<x2=stack128#2
+# asm 2: movl <in13=%r8d,4+<x2=16(%rsp)
+movl %r8d,4+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int64#7d,8+<x2=stack128#2
+# asm 2: movl <in2=%eax,8+<x2=16(%rsp)
+movl %eax,8+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int64#3d,12+<x2=stack128#2
+# asm 2: movl <in7=%edx,12+<x2=16(%rsp)
+movl %edx,12+16(%rsp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl   12(<k=int64#8),>in4=int64#3d
+# asm 2: movl   12(<k=%r10),>in4=%edx
+movl   12(%r10),%edx
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int64#4
+# asm 2: mov  $0,>in9=%rcx
+mov  $0,%rcx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl   28(<k=int64#8),>in14=int64#5d
+# asm 2: movl   28(<k=%r10),>in14=%r8d
+movl   28(%r10),%r8d
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl   8(<k=int64#8),>in3=int64#7d
+# asm 2: movl   8(<k=%r10),>in3=%eax
+movl   8(%r10),%eax
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int64#3d,>x3=stack128#3
+# asm 2: movl <in4=%edx,>x3=32(%rsp)
+movl %edx,32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int64#5d,8+<x3=stack128#3
+# asm 2: movl <in14=%r8d,8+<x3=32(%rsp)
+movl %r8d,8+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int64#7d,12+<x3=stack128#3
+# asm 2: movl <in3=%eax,12+<x3=32(%rsp)
+movl %eax,12+32(%rsp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int64#3
+# asm 2: mov  $1634760805,>in0=%rdx
+mov  $1634760805,%rdx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int64#4
+# asm 2: mov  $857760878,>in5=%rcx
+mov  $857760878,%rcx
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int64#5
+# asm 2: mov  $2036477234,>in10=%r8
+mov  $2036477234,%r8
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int64#7
+# asm 2: mov  $1797285236,>in15=%rax
+mov  $1797285236,%rax
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int64#3d,>x0=stack128#4
+# asm 2: movl <in0=%edx,>x0=48(%rsp)
+movl %edx,48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int64#4d,4+<x0=stack128#4
+# asm 2: movl <in5=%ecx,4+<x0=48(%rsp)
+movl %ecx,4+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int64#5d,8+<x0=stack128#4
+# asm 2: movl <in10=%r8d,8+<x0=48(%rsp)
+movl %r8d,8+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int64#7d,12+<x0=stack128#4
+# asm 2: movl <in15=%eax,12+<x0=48(%rsp)
+movl %eax,12+48(%rsp)
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#4,>z0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>z0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=64(%rsp)
+movdqa %xmm1,64(%rsp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=80(%rsp)
+movdqa %xmm2,80(%rsp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=96(%rsp)
+movdqa %xmm3,96(%rsp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=112(%rsp)
+movdqa %xmm0,112(%rsp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#1,>z1=int6464#1
+# asm 2: movdqa <x1=0(%rsp),>z1=%xmm0
+movdqa 0(%rsp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=128(%rsp)
+movdqa %xmm1,128(%rsp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=144(%rsp)
+movdqa %xmm2,144(%rsp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=160(%rsp)
+movdqa %xmm3,160(%rsp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=176(%rsp)
+movdqa %xmm0,176(%rsp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#2,>z2=int6464#1
+# asm 2: movdqa <x2=16(%rsp),>z2=%xmm0
+movdqa 16(%rsp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=192(%rsp)
+movdqa %xmm1,192(%rsp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=208(%rsp)
+movdqa %xmm2,208(%rsp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=224(%rsp)
+movdqa %xmm0,224(%rsp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#3,>z3=int6464#1
+# asm 2: movdqa <x3=32(%rsp),>z3=%xmm0
+movdqa 32(%rsp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=240(%rsp)
+movdqa %xmm1,240(%rsp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=256(%rsp)
+movdqa %xmm2,256(%rsp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=272(%rsp)
+movdqa %xmm0,272(%rsp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#3d
+# asm 2: movl <x2=16(%rsp),>in8=%edx
+movl 16(%rsp),%edx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#4d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%ecx
+movl 4+32(%rsp),%ecx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int64#3d,>orig8=stack128#19
+# asm 2: movl <in8=%edx,>orig8=288(%rsp)
+movl %edx,288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int64#4d,>orig9=stack128#20
+# asm 2: movl <in9=%ecx,>orig9=304(%rsp)
+movl %ecx,304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int64#3d,4+<orig8=stack128#19
+# asm 2: movl <in8=%edx,4+<orig8=288(%rsp)
+movl %edx,4+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,4+<orig9=304(%rsp)
+movl %ecx,4+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int64#3d,8+<orig8=stack128#19
+# asm 2: movl <in8=%edx,8+<orig8=288(%rsp)
+movl %edx,8+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int64#4d,8+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,8+<orig9=304(%rsp)
+movl %ecx,8+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int64#3d,12+<orig8=stack128#19
+# asm 2: movl <in8=%edx,12+<orig8=288(%rsp)
+movl %edx,12+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int64#4d,12+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,12+<orig9=304(%rsp)
+movl %ecx,12+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#3d,>x2=stack128#2
+# asm 2: movl <in8=%edx,>x2=16(%rsp)
+movl %edx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: i = 20
+# asm 1: mov  $20,>i=int64#3
+# asm 2: mov  $20,>i=%rdx
+mov  $20,%rdx
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=64(%rsp),>z5=%xmm0
+movdqa 64(%rsp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=80(%rsp),>z10=%xmm1
+movdqa 80(%rsp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=96(%rsp),>z15=%xmm2
+movdqa 96(%rsp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=256(%rsp),>z14=%xmm3
+movdqa 256(%rsp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=272(%rsp),>z3=%xmm4
+movdqa 272(%rsp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=128(%rsp),>z6=%xmm5
+movdqa 128(%rsp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=144(%rsp),>z11=%xmm6
+movdqa 144(%rsp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=176(%rsp),>z1=%xmm7
+movdqa 176(%rsp),%xmm7
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#9
+# asm 2: movdqa <orig7=192(%rsp),>z7=%xmm8
+movdqa 192(%rsp),%xmm8
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#10
+# asm 2: movdqa <orig13=208(%rsp),>z13=%xmm9
+movdqa 208(%rsp),%xmm9
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#11
+# asm 2: movdqa <orig2=224(%rsp),>z2=%xmm10
+movdqa 224(%rsp),%xmm10
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#12
+# asm 2: movdqa <orig9=304(%rsp),>z9=%xmm11
+movdqa 304(%rsp),%xmm11
+
+# qhasm:   z0 = orig0
+# asm 1: movdqa <orig0=stack128#8,>z0=int6464#13
+# asm 2: movdqa <orig0=112(%rsp),>z0=%xmm12
+movdqa 112(%rsp),%xmm12
+
+# qhasm:   z12 = orig12
+# asm 1: movdqa <orig12=stack128#11,>z12=int6464#14
+# asm 2: movdqa <orig12=160(%rsp),>z12=%xmm13
+movdqa 160(%rsp),%xmm13
+
+# qhasm:   z4 = orig4
+# asm 1: movdqa <orig4=stack128#16,>z4=int6464#15
+# asm 2: movdqa <orig4=240(%rsp),>z4=%xmm14
+movdqa 240(%rsp),%xmm14
+
+# qhasm:   z8 = orig8
+# asm 1: movdqa <orig8=stack128#19,>z8=int6464#16
+# asm 2: movdqa <orig8=288(%rsp),>z8=%xmm15
+movdqa 288(%rsp),%xmm15
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 		y4 = z12
+# asm 1: movdqa <z12=int6464#14,>y4=int6464#2
+# asm 2: movdqa <z12=%xmm13,>y4=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: uint32323232	y4 += z0
+# asm 1: paddd <z0=int6464#13,<y4=int6464#2
+# asm 2: paddd <z0=%xmm12,<y4=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 		r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y4 <<= 7
+# asm 1: pslld $7,<y4=int6464#2
+# asm 2: pslld $7,<y4=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232	r4 >>= 25
+# asm 1: psrld $25,<r4=int6464#3
+# asm 2: psrld $25,<r4=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 		z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 				y9 = z1
+# asm 1: movdqa <z1=int6464#8,>y9=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y9=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232			y9 += z5
+# asm 1: paddd <z5=int6464#1,<y9=int6464#2
+# asm 2: paddd <z5=%xmm0,<y9=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 				r9 = y9
+# asm 1: movdqa <y9=int6464#2,>r9=int6464#3
+# asm 2: movdqa <y9=%xmm1,>r9=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y9 <<= 7
+# asm 1: pslld $7,<y9=int6464#2
+# asm 2: pslld $7,<y9=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z9 ^= y9
+# asm 1: pxor  <y9=int6464#2,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm1,<z9=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm: uint32323232			r9 >>= 25
+# asm 1: psrld $25,<r9=int6464#3
+# asm 2: psrld $25,<r9=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z9 ^= r9
+# asm 1: pxor  <r9=int6464#3,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm2,<z9=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm: 		y8 = z0
+# asm 1: movdqa <z0=int6464#13,>y8=int6464#2
+# asm 2: movdqa <z0=%xmm12,>y8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232	y8 += z4
+# asm 1: paddd <z4=int6464#15,<y8=int6464#2
+# asm 2: paddd <z4=%xmm14,<y8=%xmm1
+paddd %xmm14,%xmm1
+
+# qhasm: 		r8 = y8
+# asm 1: movdqa <y8=int6464#2,>r8=int6464#3
+# asm 2: movdqa <y8=%xmm1,>r8=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#2
+# asm 2: pslld $9,<y8=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z8 ^= y8
+# asm 1: pxor  <y8=int6464#2,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm1,<z8=%xmm15
+pxor  %xmm1,%xmm15
+
+# qhasm: uint32323232	r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#3
+# asm 2: psrld $23,<r8=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z8 ^= r8
+# asm 1: pxor  <r8=int6464#3,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm2,<z8=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm: 				y13 = z5
+# asm 1: movdqa <z5=int6464#1,>y13=int6464#2
+# asm 2: movdqa <z5=%xmm0,>y13=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232			y13 += z9
+# asm 1: paddd <z9=int6464#12,<y13=int6464#2
+# asm 2: paddd <z9=%xmm11,<y13=%xmm1
+paddd %xmm11,%xmm1
+
+# qhasm: 				r13 = y13
+# asm 1: movdqa <y13=int6464#2,>r13=int6464#3
+# asm 2: movdqa <y13=%xmm1,>r13=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#2
+# asm 2: pslld $9,<y13=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z13 ^= y13
+# asm 1: pxor  <y13=int6464#2,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm1,<z13=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm: uint32323232			r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#3
+# asm 2: psrld $23,<r13=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z13 ^= r13
+# asm 1: pxor  <r13=int6464#3,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm2,<z13=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm: 		y12 = z4
+# asm 1: movdqa <z4=int6464#15,>y12=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y12=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232	y12 += z8
+# asm 1: paddd <z8=int6464#16,<y12=int6464#2
+# asm 2: paddd <z8=%xmm15,<y12=%xmm1
+paddd %xmm15,%xmm1
+
+# qhasm: 		r12 = y12
+# asm 1: movdqa <y12=int6464#2,>r12=int6464#3
+# asm 2: movdqa <y12=%xmm1,>r12=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y12 <<= 13
+# asm 1: pslld $13,<y12=int6464#2
+# asm 2: pslld $13,<y12=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z12 ^= y12
+# asm 1: pxor  <y12=int6464#2,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm1,<z12=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm: uint32323232	r12 >>= 19
+# asm 1: psrld $19,<r12=int6464#3
+# asm 2: psrld $19,<r12=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z12 ^= r12
+# asm 1: pxor  <r12=int6464#3,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm2,<z12=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm: 				y1 = z9
+# asm 1: movdqa <z9=int6464#12,>y1=int6464#2
+# asm 2: movdqa <z9=%xmm11,>y1=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: uint32323232			y1 += z13
+# asm 1: paddd <z13=int6464#10,<y1=int6464#2
+# asm 2: paddd <z13=%xmm9,<y1=%xmm1
+paddd %xmm9,%xmm1
+
+# qhasm: 				r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#3
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y1 <<= 13
+# asm 1: pslld $13,<y1=int6464#2
+# asm 2: pslld $13,<y1=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232			r1 >>= 19
+# asm 1: psrld $19,<r1=int6464#3
+# asm 2: psrld $19,<r1=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z1 ^= r1
+# asm 1: pxor  <r1=int6464#3,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm2,<z1=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm: 		y0 = z8
+# asm 1: movdqa <z8=int6464#16,>y0=int6464#2
+# asm 2: movdqa <z8=%xmm15,>y0=%xmm1
+movdqa %xmm15,%xmm1
+
+# qhasm: uint32323232	y0 += z12
+# asm 1: paddd <z12=int6464#14,<y0=int6464#2
+# asm 2: paddd <z12=%xmm13,<y0=%xmm1
+paddd %xmm13,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#13
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm12
+pxor  %xmm1,%xmm12
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#13
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#13,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm12,>z0_stack=320(%rsp)
+movdqa %xmm12,320(%rsp)
+
+# qhasm: 				y5 = z13
+# asm 1: movdqa <z13=int6464#10,>y5=int6464#3
+# asm 2: movdqa <z13=%xmm9,>y5=%xmm2
+movdqa %xmm9,%xmm2
+
+# qhasm: uint32323232			y5 += z1
+# asm 1: paddd <z1=int6464#8,<y5=int6464#3
+# asm 2: paddd <z1=%xmm7,<y5=%xmm2
+paddd %xmm7,%xmm2
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#3,>r5=int6464#13
+# asm 2: movdqa <y5=%xmm2,>r5=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#3
+# asm 2: pslld $18,<y5=%xmm2
+pslld $18,%xmm2
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#3,<z5=int6464#1
+# asm 2: pxor  <y5=%xmm2,<z5=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#13
+# asm 2: psrld $14,<r5=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#13,<z5=int6464#1
+# asm 2: pxor  <r5=%xmm12,<z5=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm: 						y14 = z6
+# asm 1: movdqa <z6=int6464#6,>y14=int6464#3
+# asm 2: movdqa <z6=%xmm5,>y14=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: uint32323232					y14 += z10
+# asm 1: paddd <z10=int6464#2,<y14=int6464#3
+# asm 2: paddd <z10=%xmm1,<y14=%xmm2
+paddd %xmm1,%xmm2
+
+# qhasm: 						r14 = y14
+# asm 1: movdqa <y14=int6464#3,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm2,>r14=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232					y14 <<= 7
+# asm 1: pslld $7,<y14=int6464#3
+# asm 2: pslld $7,<y14=%xmm2
+pslld $7,%xmm2
+
+# qhasm: 						z14 ^= y14
+# asm 1: pxor  <y14=int6464#3,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm2,<z14=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232					r14 >>= 25
+# asm 1: psrld $25,<r14=int6464#13
+# asm 2: psrld $25,<r14=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 						z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm0,>z5_stack=336(%rsp)
+movdqa %xmm0,336(%rsp)
+
+# qhasm: 								y3 = z11
+# asm 1: movdqa <z11=int6464#7,>y3=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y3=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232							y3 += z15
+# asm 1: paddd <z15=int6464#3,<y3=int6464#1
+# asm 2: paddd <z15=%xmm2,<y3=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r3 = y3
+# asm 1: movdqa <y3=int6464#1,>r3=int6464#13
+# asm 2: movdqa <y3=%xmm0,>r3=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y3 <<= 7
+# asm 1: pslld $7,<y3=int6464#1
+# asm 2: pslld $7,<y3=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z3 ^= y3
+# asm 1: pxor  <y3=int6464#1,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm0,<z3=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm: uint32323232							r3 >>= 25
+# asm 1: psrld $25,<r3=int6464#13
+# asm 2: psrld $25,<r3=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z3 ^= r3
+# asm 1: pxor  <r3=int6464#13,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm12,<z3=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm: 						y2 = z10
+# asm 1: movdqa <z10=int6464#2,>y2=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y2=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y2 += z14
+# asm 1: paddd <z14=int6464#4,<y2=int6464#1
+# asm 2: paddd <z14=%xmm3,<y2=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 						r2 = y2
+# asm 1: movdqa <y2=int6464#1,>r2=int6464#13
+# asm 2: movdqa <y2=%xmm0,>r2=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#1
+# asm 2: pslld $9,<y2=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z2 ^= y2
+# asm 1: pxor  <y2=int6464#1,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm0,<z2=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm: uint32323232					r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#13
+# asm 2: psrld $23,<r2=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z2 ^= r2
+# asm 1: pxor  <r2=int6464#13,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm12,<z2=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm: 								y7 = z15
+# asm 1: movdqa <z15=int6464#3,>y7=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y7=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y7 += z3
+# asm 1: paddd <z3=int6464#5,<y7=int6464#1
+# asm 2: paddd <z3=%xmm4,<y7=%xmm0
+paddd %xmm4,%xmm0
+
+# qhasm: 								r7 = y7
+# asm 1: movdqa <y7=int6464#1,>r7=int6464#13
+# asm 2: movdqa <y7=%xmm0,>r7=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#1
+# asm 2: pslld $9,<y7=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z7 ^= y7
+# asm 1: pxor  <y7=int6464#1,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm0,<z7=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm: uint32323232							r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#13
+# asm 2: psrld $23,<r7=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z7 ^= r7
+# asm 1: pxor  <r7=int6464#13,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm12,<z7=%xmm8
+pxor  %xmm12,%xmm8
+
+# qhasm: 						y6 = z14
+# asm 1: movdqa <z14=int6464#4,>y6=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y6=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232					y6 += z2
+# asm 1: paddd <z2=int6464#11,<y6=int6464#1
+# asm 2: paddd <z2=%xmm10,<y6=%xmm0
+paddd %xmm10,%xmm0
+
+# qhasm: 						r6 = y6
+# asm 1: movdqa <y6=int6464#1,>r6=int6464#13
+# asm 2: movdqa <y6=%xmm0,>r6=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y6 <<= 13
+# asm 1: pslld $13,<y6=int6464#1
+# asm 2: pslld $13,<y6=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z6 ^= y6
+# asm 1: pxor  <y6=int6464#1,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm0,<z6=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232					r6 >>= 19
+# asm 1: psrld $19,<r6=int6464#13
+# asm 2: psrld $19,<r6=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z6 ^= r6
+# asm 1: pxor  <r6=int6464#13,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm12,<z6=%xmm5
+pxor  %xmm12,%xmm5
+
+# qhasm: 								y11 = z3
+# asm 1: movdqa <z3=int6464#5,>y11=int6464#1
+# asm 2: movdqa <z3=%xmm4,>y11=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232							y11 += z7
+# asm 1: paddd <z7=int6464#9,<y11=int6464#1
+# asm 2: paddd <z7=%xmm8,<y11=%xmm0
+paddd %xmm8,%xmm0
+
+# qhasm: 								r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#13
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y11 <<= 13
+# asm 1: pslld $13,<y11=int6464#1
+# asm 2: pslld $13,<y11=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232							r11 >>= 19
+# asm 1: psrld $19,<r11=int6464#13
+# asm 2: psrld $19,<r11=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z11 ^= r11
+# asm 1: pxor  <r11=int6464#13,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm12,<z11=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm: 						y10 = z2
+# asm 1: movdqa <z2=int6464#11,>y10=int6464#1
+# asm 2: movdqa <z2=%xmm10,>y10=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: uint32323232					y10 += z6
+# asm 1: paddd <z6=int6464#6,<y10=int6464#1
+# asm 2: paddd <z6=%xmm5,<y10=%xmm0
+paddd %xmm5,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#1
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm0
+movdqa 320(%rsp),%xmm0
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 		y1 = z3
+# asm 1: movdqa <z3=int6464#5,>y1=int6464#2
+# asm 2: movdqa <z3=%xmm4,>y1=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: uint32323232	y1 += z0
+# asm 1: paddd <z0=int6464#1,<y1=int6464#2
+# asm 2: paddd <z0=%xmm0,<y1=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 		r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#13
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232	y1 <<= 7
+# asm 1: pslld $7,<y1=int6464#2
+# asm 2: pslld $7,<y1=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232	r1 >>= 25
+# asm 1: psrld $25,<r1=int6464#13
+# asm 2: psrld $25,<r1=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 		z1 ^= r1
+# asm 1: pxor  <r1=int6464#13,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm12,<z1=%xmm7
+pxor  %xmm12,%xmm7
+
+# qhasm: 								y15 = z7
+# asm 1: movdqa <z7=int6464#9,>y15=int6464#2
+# asm 2: movdqa <z7=%xmm8,>y15=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: uint32323232							y15 += z11
+# asm 1: paddd <z11=int6464#7,<y15=int6464#2
+# asm 2: paddd <z11=%xmm6,<y15=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#2,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm1,>r15=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#2
+# asm 2: pslld $18,<y15=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#2,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm1,<z15=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#13
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm12
+movdqa 336(%rsp),%xmm12
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 				y6 = z4
+# asm 1: movdqa <z4=int6464#15,>y6=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y6=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232			y6 += z5
+# asm 1: paddd <z5=int6464#13,<y6=int6464#2
+# asm 2: paddd <z5=%xmm12,<y6=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 				r6 = y6
+# asm 1: movdqa <y6=int6464#2,>r6=int6464#3
+# asm 2: movdqa <y6=%xmm1,>r6=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y6 <<= 7
+# asm 1: pslld $7,<y6=int6464#2
+# asm 2: pslld $7,<y6=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z6 ^= y6
+# asm 1: pxor  <y6=int6464#2,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm1,<z6=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm: uint32323232			r6 >>= 25
+# asm 1: psrld $25,<r6=int6464#3
+# asm 2: psrld $25,<r6=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z6 ^= r6
+# asm 1: pxor  <r6=int6464#3,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm2,<z6=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm: 		y2 = z0
+# asm 1: movdqa <z0=int6464#1,>y2=int6464#2
+# asm 2: movdqa <z0=%xmm0,>y2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232	y2 += z1
+# asm 1: paddd <z1=int6464#8,<y2=int6464#2
+# asm 2: paddd <z1=%xmm7,<y2=%xmm1
+paddd %xmm7,%xmm1
+
+# qhasm: 		r2 = y2
+# asm 1: movdqa <y2=int6464#2,>r2=int6464#3
+# asm 2: movdqa <y2=%xmm1,>r2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#2
+# asm 2: pslld $9,<y2=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z2 ^= y2
+# asm 1: pxor  <y2=int6464#2,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm1,<z2=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm: uint32323232	r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#3
+# asm 2: psrld $23,<r2=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z2 ^= r2
+# asm 1: pxor  <r2=int6464#3,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm2,<z2=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm: 				y7 = z5
+# asm 1: movdqa <z5=int6464#13,>y7=int6464#2
+# asm 2: movdqa <z5=%xmm12,>y7=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232			y7 += z6
+# asm 1: paddd <z6=int6464#6,<y7=int6464#2
+# asm 2: paddd <z6=%xmm5,<y7=%xmm1
+paddd %xmm5,%xmm1
+
+# qhasm: 				r7 = y7
+# asm 1: movdqa <y7=int6464#2,>r7=int6464#3
+# asm 2: movdqa <y7=%xmm1,>r7=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#2
+# asm 2: pslld $9,<y7=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z7 ^= y7
+# asm 1: pxor  <y7=int6464#2,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm1,<z7=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm: uint32323232			r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#3
+# asm 2: psrld $23,<r7=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z7 ^= r7
+# asm 1: pxor  <r7=int6464#3,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm2,<z7=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm: 		y3 = z1
+# asm 1: movdqa <z1=int6464#8,>y3=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y3=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232	y3 += z2
+# asm 1: paddd <z2=int6464#11,<y3=int6464#2
+# asm 2: paddd <z2=%xmm10,<y3=%xmm1
+paddd %xmm10,%xmm1
+
+# qhasm: 		r3 = y3
+# asm 1: movdqa <y3=int6464#2,>r3=int6464#3
+# asm 2: movdqa <y3=%xmm1,>r3=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y3 <<= 13
+# asm 1: pslld $13,<y3=int6464#2
+# asm 2: pslld $13,<y3=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z3 ^= y3
+# asm 1: pxor  <y3=int6464#2,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm1,<z3=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm: uint32323232	r3 >>= 19
+# asm 1: psrld $19,<r3=int6464#3
+# asm 2: psrld $19,<r3=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z3 ^= r3
+# asm 1: pxor  <r3=int6464#3,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm2,<z3=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm: 				y4 = z6
+# asm 1: movdqa <z6=int6464#6,>y4=int6464#2
+# asm 2: movdqa <z6=%xmm5,>y4=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: uint32323232			y4 += z7
+# asm 1: paddd <z7=int6464#9,<y4=int6464#2
+# asm 2: paddd <z7=%xmm8,<y4=%xmm1
+paddd %xmm8,%xmm1
+
+# qhasm: 				r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y4 <<= 13
+# asm 1: pslld $13,<y4=int6464#2
+# asm 2: pslld $13,<y4=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232			r4 >>= 19
+# asm 1: psrld $19,<r4=int6464#3
+# asm 2: psrld $19,<r4=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 		y0 = z2
+# asm 1: movdqa <z2=int6464#11,>y0=int6464#2
+# asm 2: movdqa <z2=%xmm10,>y0=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: uint32323232	y0 += z3
+# asm 1: paddd <z3=int6464#5,<y0=int6464#2
+# asm 2: paddd <z3=%xmm4,<y0=%xmm1
+paddd %xmm4,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#1
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#1
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm0,>z0_stack=320(%rsp)
+movdqa %xmm0,320(%rsp)
+
+# qhasm: 				y5 = z7
+# asm 1: movdqa <z7=int6464#9,>y5=int6464#1
+# asm 2: movdqa <z7=%xmm8,>y5=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm: uint32323232			y5 += z4
+# asm 1: paddd <z4=int6464#15,<y5=int6464#1
+# asm 2: paddd <z4=%xmm14,<y5=%xmm0
+paddd %xmm14,%xmm0
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#1,>r5=int6464#3
+# asm 2: movdqa <y5=%xmm0,>r5=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#1
+# asm 2: pslld $18,<y5=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#1,<z5=int6464#13
+# asm 2: pxor  <y5=%xmm0,<z5=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#3
+# asm 2: psrld $14,<r5=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#3,<z5=int6464#13
+# asm 2: pxor  <r5=%xmm2,<z5=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						y11 = z9
+# asm 1: movdqa <z9=int6464#12,>y11=int6464#1
+# asm 2: movdqa <z9=%xmm11,>y11=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm: uint32323232					y11 += z10
+# asm 1: paddd <z10=int6464#2,<y11=int6464#1
+# asm 2: paddd <z10=%xmm1,<y11=%xmm0
+paddd %xmm1,%xmm0
+
+# qhasm: 						r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#3
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232					y11 <<= 7
+# asm 1: pslld $7,<y11=int6464#1
+# asm 2: pslld $7,<y11=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 						z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232					r11 >>= 25
+# asm 1: psrld $25,<r11=int6464#3
+# asm 2: psrld $25,<r11=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 						z11 ^= r11
+# asm 1: pxor  <r11=int6464#3,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm2,<z11=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#13,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm12,>z5_stack=336(%rsp)
+movdqa %xmm12,336(%rsp)
+
+# qhasm: 								y12 = z14
+# asm 1: movdqa <z14=int6464#4,>y12=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y12=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232							y12 += z15
+# asm 1: paddd <z15=int6464#3,<y12=int6464#1
+# asm 2: paddd <z15=%xmm2,<y12=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r12 = y12
+# asm 1: movdqa <y12=int6464#1,>r12=int6464#13
+# asm 2: movdqa <y12=%xmm0,>r12=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y12 <<= 7
+# asm 1: pslld $7,<y12=int6464#1
+# asm 2: pslld $7,<y12=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z12 ^= y12
+# asm 1: pxor  <y12=int6464#1,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm0,<z12=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm: uint32323232							r12 >>= 25
+# asm 1: psrld $25,<r12=int6464#13
+# asm 2: psrld $25,<r12=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z12 ^= r12
+# asm 1: pxor  <r12=int6464#13,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm12,<z12=%xmm13
+pxor  %xmm12,%xmm13
+
+# qhasm: 						y8 = z10
+# asm 1: movdqa <z10=int6464#2,>y8=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y8=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y8 += z11
+# asm 1: paddd <z11=int6464#7,<y8=int6464#1
+# asm 2: paddd <z11=%xmm6,<y8=%xmm0
+paddd %xmm6,%xmm0
+
+# qhasm: 						r8 = y8
+# asm 1: movdqa <y8=int6464#1,>r8=int6464#13
+# asm 2: movdqa <y8=%xmm0,>r8=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#1
+# asm 2: pslld $9,<y8=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z8 ^= y8
+# asm 1: pxor  <y8=int6464#1,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm0,<z8=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm: uint32323232					r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#13
+# asm 2: psrld $23,<r8=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z8 ^= r8
+# asm 1: pxor  <r8=int6464#13,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm12,<z8=%xmm15
+pxor  %xmm12,%xmm15
+
+# qhasm: 								y13 = z15
+# asm 1: movdqa <z15=int6464#3,>y13=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y13=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y13 += z12
+# asm 1: paddd <z12=int6464#14,<y13=int6464#1
+# asm 2: paddd <z12=%xmm13,<y13=%xmm0
+paddd %xmm13,%xmm0
+
+# qhasm: 								r13 = y13
+# asm 1: movdqa <y13=int6464#1,>r13=int6464#13
+# asm 2: movdqa <y13=%xmm0,>r13=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#1
+# asm 2: pslld $9,<y13=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z13 ^= y13
+# asm 1: pxor  <y13=int6464#1,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm0,<z13=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm: uint32323232							r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#13
+# asm 2: psrld $23,<r13=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z13 ^= r13
+# asm 1: pxor  <r13=int6464#13,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm12,<z13=%xmm9
+pxor  %xmm12,%xmm9
+
+# qhasm: 						y9 = z11
+# asm 1: movdqa <z11=int6464#7,>y9=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y9=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232					y9 += z8
+# asm 1: paddd <z8=int6464#16,<y9=int6464#1
+# asm 2: paddd <z8=%xmm15,<y9=%xmm0
+paddd %xmm15,%xmm0
+
+# qhasm: 						r9 = y9
+# asm 1: movdqa <y9=int6464#1,>r9=int6464#13
+# asm 2: movdqa <y9=%xmm0,>r9=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y9 <<= 13
+# asm 1: pslld $13,<y9=int6464#1
+# asm 2: pslld $13,<y9=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z9 ^= y9
+# asm 1: pxor  <y9=int6464#1,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm0,<z9=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm: uint32323232					r9 >>= 19
+# asm 1: psrld $19,<r9=int6464#13
+# asm 2: psrld $19,<r9=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z9 ^= r9
+# asm 1: pxor  <r9=int6464#13,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm12,<z9=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm: 								y14 = z12
+# asm 1: movdqa <z12=int6464#14,>y14=int6464#1
+# asm 2: movdqa <z12=%xmm13,>y14=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: uint32323232							y14 += z13
+# asm 1: paddd <z13=int6464#10,<y14=int6464#1
+# asm 2: paddd <z13=%xmm9,<y14=%xmm0
+paddd %xmm9,%xmm0
+
+# qhasm: 								r14 = y14
+# asm 1: movdqa <y14=int6464#1,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm0,>r14=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y14 <<= 13
+# asm 1: pslld $13,<y14=int6464#1
+# asm 2: pslld $13,<y14=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z14 ^= y14
+# asm 1: pxor  <y14=int6464#1,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm0,<z14=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm: uint32323232							r14 >>= 19
+# asm 1: psrld $19,<r14=int6464#13
+# asm 2: psrld $19,<r14=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 						y10 = z8
+# asm 1: movdqa <z8=int6464#16,>y10=int6464#1
+# asm 2: movdqa <z8=%xmm15,>y10=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm: uint32323232					y10 += z9
+# asm 1: paddd <z9=int6464#12,<y10=int6464#1
+# asm 2: paddd <z9=%xmm11,<y10=%xmm0
+paddd %xmm11,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 								y15 = z13
+# asm 1: movdqa <z13=int6464#10,>y15=int6464#1
+# asm 2: movdqa <z13=%xmm9,>y15=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm: uint32323232							y15 += z14
+# asm 1: paddd <z14=int6464#4,<y15=int6464#1
+# asm 2: paddd <z14=%xmm3,<y15=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#1,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm0,>r15=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#1
+# asm 2: pslld $18,<y15=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#1,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm0,<z15=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#13
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm12
+movdqa 320(%rsp),%xmm12
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#1
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm0
+movdqa 336(%rsp),%xmm0
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int64#3
+# asm 2: sub  $2,<i=%rdx
+sub  $2,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#13
+# asm 2: paddd <orig0=112(%rsp),<z0=%xmm12
+paddd 112(%rsp),%xmm12
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#8
+# asm 2: paddd <orig1=176(%rsp),<z1=%xmm7
+paddd 176(%rsp),%xmm7
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#11
+# asm 2: paddd <orig2=224(%rsp),<z2=%xmm10
+paddd 224(%rsp),%xmm10
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#5
+# asm 2: paddd <orig3=272(%rsp),<z3=%xmm4
+paddd 272(%rsp),%xmm4
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 0(<m=%rsi),<in0=%edx
+xorl 0(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 4(<m=%rsi),<in1=%ecx
+xorl 4(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 8(<m=%rsi),<in2=%r8d
+xorl 8(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 12(<m=%rsi),<in3=%r9d
+xorl 12(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#3d,0(<out=int64#1)
+# asm 2: movl   <in0=%edx,0(<out=%rdi)
+movl   %edx,0(%rdi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#4d,4(<out=int64#1)
+# asm 2: movl   <in1=%ecx,4(<out=%rdi)
+movl   %ecx,4(%rdi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#5d,8(<out=int64#1)
+# asm 2: movl   <in2=%r8d,8(<out=%rdi)
+movl   %r8d,8(%rdi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#6d,12(<out=int64#1)
+# asm 2: movl   <in3=%r9d,12(<out=%rdi)
+movl   %r9d,12(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 64(<m=%rsi),<in0=%edx
+xorl 64(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 68(<m=%rsi),<in1=%ecx
+xorl 68(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 72(<m=%rsi),<in2=%r8d
+xorl 72(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 76(<m=%rsi),<in3=%r9d
+xorl 76(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl   <in0=int64#3d,64(<out=int64#1)
+# asm 2: movl   <in0=%edx,64(<out=%rdi)
+movl   %edx,64(%rdi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl   <in1=int64#4d,68(<out=int64#1)
+# asm 2: movl   <in1=%ecx,68(<out=%rdi)
+movl   %ecx,68(%rdi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl   <in2=int64#5d,72(<out=int64#1)
+# asm 2: movl   <in2=%r8d,72(<out=%rdi)
+movl   %r8d,72(%rdi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl   <in3=int64#6d,76(<out=int64#1)
+# asm 2: movl   <in3=%r9d,76(<out=%rdi)
+movl   %r9d,76(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 128(<m=%rsi),<in0=%edx
+xorl 128(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 132(<m=%rsi),<in1=%ecx
+xorl 132(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 136(<m=%rsi),<in2=%r8d
+xorl 136(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 140(<m=%rsi),<in3=%r9d
+xorl 140(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl   <in0=int64#3d,128(<out=int64#1)
+# asm 2: movl   <in0=%edx,128(<out=%rdi)
+movl   %edx,128(%rdi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl   <in1=int64#4d,132(<out=int64#1)
+# asm 2: movl   <in1=%ecx,132(<out=%rdi)
+movl   %ecx,132(%rdi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl   <in2=int64#5d,136(<out=int64#1)
+# asm 2: movl   <in2=%r8d,136(<out=%rdi)
+movl   %r8d,136(%rdi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl   <in3=int64#6d,140(<out=int64#1)
+# asm 2: movl   <in3=%r9d,140(<out=%rdi)
+movl   %r9d,140(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 192(<m=%rsi),<in0=%edx
+xorl 192(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 196(<m=%rsi),<in1=%ecx
+xorl 196(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 200(<m=%rsi),<in2=%r8d
+xorl 200(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 204(<m=%rsi),<in3=%r9d
+xorl 204(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl   <in0=int64#3d,192(<out=int64#1)
+# asm 2: movl   <in0=%edx,192(<out=%rdi)
+movl   %edx,192(%rdi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl   <in1=int64#4d,196(<out=int64#1)
+# asm 2: movl   <in1=%ecx,196(<out=%rdi)
+movl   %ecx,196(%rdi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl   <in2=int64#5d,200(<out=int64#1)
+# asm 2: movl   <in2=%r8d,200(<out=%rdi)
+movl   %r8d,200(%rdi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl   <in3=int64#6d,204(<out=int64#1)
+# asm 2: movl   <in3=%r9d,204(<out=%rdi)
+movl   %r9d,204(%rdi)
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#15
+# asm 2: paddd <orig4=240(%rsp),<z4=%xmm14
+paddd 240(%rsp),%xmm14
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#1
+# asm 2: paddd <orig5=64(%rsp),<z5=%xmm0
+paddd 64(%rsp),%xmm0
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#6
+# asm 2: paddd <orig6=128(%rsp),<z6=%xmm5
+paddd 128(%rsp),%xmm5
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#9
+# asm 2: paddd <orig7=192(%rsp),<z7=%xmm8
+paddd 192(%rsp),%xmm8
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 16(<m=%rsi),<in4=%edx
+xorl 16(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#3d,16(<out=int64#1)
+# asm 2: movl   <in4=%edx,16(<out=%rdi)
+movl   %edx,16(%rdi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 80(<m=%rsi),<in4=%edx
+xorl 80(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 84(<m=%rsi),<in5=%ecx
+xorl 84(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 88(<m=%rsi),<in6=%r8d
+xorl 88(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 92(<m=%rsi),<in7=%r9d
+xorl 92(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl   <in4=int64#3d,80(<out=int64#1)
+# asm 2: movl   <in4=%edx,80(<out=%rdi)
+movl   %edx,80(%rdi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl   <in5=int64#4d,84(<out=int64#1)
+# asm 2: movl   <in5=%ecx,84(<out=%rdi)
+movl   %ecx,84(%rdi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl   <in6=int64#5d,88(<out=int64#1)
+# asm 2: movl   <in6=%r8d,88(<out=%rdi)
+movl   %r8d,88(%rdi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl   <in7=int64#6d,92(<out=int64#1)
+# asm 2: movl   <in7=%r9d,92(<out=%rdi)
+movl   %r9d,92(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 144(<m=%rsi),<in4=%edx
+xorl 144(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 148(<m=%rsi),<in5=%ecx
+xorl 148(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 152(<m=%rsi),<in6=%r8d
+xorl 152(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 156(<m=%rsi),<in7=%r9d
+xorl 156(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl   <in4=int64#3d,144(<out=int64#1)
+# asm 2: movl   <in4=%edx,144(<out=%rdi)
+movl   %edx,144(%rdi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl   <in5=int64#4d,148(<out=int64#1)
+# asm 2: movl   <in5=%ecx,148(<out=%rdi)
+movl   %ecx,148(%rdi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl   <in6=int64#5d,152(<out=int64#1)
+# asm 2: movl   <in6=%r8d,152(<out=%rdi)
+movl   %r8d,152(%rdi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl   <in7=int64#6d,156(<out=int64#1)
+# asm 2: movl   <in7=%r9d,156(<out=%rdi)
+movl   %r9d,156(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 208(<m=%rsi),<in4=%edx
+xorl 208(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 212(<m=%rsi),<in5=%ecx
+xorl 212(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 216(<m=%rsi),<in6=%r8d
+xorl 216(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 220(<m=%rsi),<in7=%r9d
+xorl 220(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl   <in4=int64#3d,208(<out=int64#1)
+# asm 2: movl   <in4=%edx,208(<out=%rdi)
+movl   %edx,208(%rdi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl   <in5=int64#4d,212(<out=int64#1)
+# asm 2: movl   <in5=%ecx,212(<out=%rdi)
+movl   %ecx,212(%rdi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl   <in6=int64#5d,216(<out=int64#1)
+# asm 2: movl   <in6=%r8d,216(<out=%rdi)
+movl   %r8d,216(%rdi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl   <in7=int64#6d,220(<out=int64#1)
+# asm 2: movl   <in7=%r9d,220(<out=%rdi)
+movl   %r9d,220(%rdi)
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#16
+# asm 2: paddd <orig8=288(%rsp),<z8=%xmm15
+paddd 288(%rsp),%xmm15
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#12
+# asm 2: paddd <orig9=304(%rsp),<z9=%xmm11
+paddd 304(%rsp),%xmm11
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#2
+# asm 2: paddd <orig10=80(%rsp),<z10=%xmm1
+paddd 80(%rsp),%xmm1
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#7
+# asm 2: paddd <orig11=144(%rsp),<z11=%xmm6
+paddd 144(%rsp),%xmm6
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 32(<m=%rsi),<in8=%edx
+xorl 32(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 36(<m=%rsi),<in9=%ecx
+xorl 36(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 40(<m=%rsi),<in10=%r8d
+xorl 40(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 44(<m=%rsi),<in11=%r9d
+xorl 44(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#3d,32(<out=int64#1)
+# asm 2: movl   <in8=%edx,32(<out=%rdi)
+movl   %edx,32(%rdi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#4d,36(<out=int64#1)
+# asm 2: movl   <in9=%ecx,36(<out=%rdi)
+movl   %ecx,36(%rdi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#5d,40(<out=int64#1)
+# asm 2: movl   <in10=%r8d,40(<out=%rdi)
+movl   %r8d,40(%rdi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#6d,44(<out=int64#1)
+# asm 2: movl   <in11=%r9d,44(<out=%rdi)
+movl   %r9d,44(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 96(<m=%rsi),<in8=%edx
+xorl 96(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 100(<m=%rsi),<in9=%ecx
+xorl 100(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 104(<m=%rsi),<in10=%r8d
+xorl 104(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 108(<m=%rsi),<in11=%r9d
+xorl 108(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl   <in8=int64#3d,96(<out=int64#1)
+# asm 2: movl   <in8=%edx,96(<out=%rdi)
+movl   %edx,96(%rdi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl   <in9=int64#4d,100(<out=int64#1)
+# asm 2: movl   <in9=%ecx,100(<out=%rdi)
+movl   %ecx,100(%rdi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl   <in10=int64#5d,104(<out=int64#1)
+# asm 2: movl   <in10=%r8d,104(<out=%rdi)
+movl   %r8d,104(%rdi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl   <in11=int64#6d,108(<out=int64#1)
+# asm 2: movl   <in11=%r9d,108(<out=%rdi)
+movl   %r9d,108(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 160(<m=%rsi),<in8=%edx
+xorl 160(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 164(<m=%rsi),<in9=%ecx
+xorl 164(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 168(<m=%rsi),<in10=%r8d
+xorl 168(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 172(<m=%rsi),<in11=%r9d
+xorl 172(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl   <in8=int64#3d,160(<out=int64#1)
+# asm 2: movl   <in8=%edx,160(<out=%rdi)
+movl   %edx,160(%rdi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl   <in9=int64#4d,164(<out=int64#1)
+# asm 2: movl   <in9=%ecx,164(<out=%rdi)
+movl   %ecx,164(%rdi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl   <in10=int64#5d,168(<out=int64#1)
+# asm 2: movl   <in10=%r8d,168(<out=%rdi)
+movl   %r8d,168(%rdi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl   <in11=int64#6d,172(<out=int64#1)
+# asm 2: movl   <in11=%r9d,172(<out=%rdi)
+movl   %r9d,172(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 224(<m=%rsi),<in8=%edx
+xorl 224(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 228(<m=%rsi),<in9=%ecx
+xorl 228(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 232(<m=%rsi),<in10=%r8d
+xorl 232(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 236(<m=%rsi),<in11=%r9d
+xorl 236(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl   <in8=int64#3d,224(<out=int64#1)
+# asm 2: movl   <in8=%edx,224(<out=%rdi)
+movl   %edx,224(%rdi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl   <in9=int64#4d,228(<out=int64#1)
+# asm 2: movl   <in9=%ecx,228(<out=%rdi)
+movl   %ecx,228(%rdi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl   <in10=int64#5d,232(<out=int64#1)
+# asm 2: movl   <in10=%r8d,232(<out=%rdi)
+movl   %r8d,232(%rdi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl   <in11=int64#6d,236(<out=int64#1)
+# asm 2: movl   <in11=%r9d,236(<out=%rdi)
+movl   %r9d,236(%rdi)
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#14
+# asm 2: paddd <orig12=160(%rsp),<z12=%xmm13
+paddd 160(%rsp),%xmm13
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#10
+# asm 2: paddd <orig13=208(%rsp),<z13=%xmm9
+paddd 208(%rsp),%xmm9
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#4
+# asm 2: paddd <orig14=256(%rsp),<z14=%xmm3
+paddd 256(%rsp),%xmm3
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#3
+# asm 2: paddd <orig15=96(%rsp),<z15=%xmm2
+paddd 96(%rsp),%xmm2
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 48(<m=%rsi),<in12=%edx
+xorl 48(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 52(<m=%rsi),<in13=%ecx
+xorl 52(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 56(<m=%rsi),<in14=%r8d
+xorl 56(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 60(<m=%rsi),<in15=%r9d
+xorl 60(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#3d,48(<out=int64#1)
+# asm 2: movl   <in12=%edx,48(<out=%rdi)
+movl   %edx,48(%rdi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#4d,52(<out=int64#1)
+# asm 2: movl   <in13=%ecx,52(<out=%rdi)
+movl   %ecx,52(%rdi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#5d,56(<out=int64#1)
+# asm 2: movl   <in14=%r8d,56(<out=%rdi)
+movl   %r8d,56(%rdi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#6d,60(<out=int64#1)
+# asm 2: movl   <in15=%r9d,60(<out=%rdi)
+movl   %r9d,60(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 112(<m=%rsi),<in12=%edx
+xorl 112(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 116(<m=%rsi),<in13=%ecx
+xorl 116(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 120(<m=%rsi),<in14=%r8d
+xorl 120(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 124(<m=%rsi),<in15=%r9d
+xorl 124(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl   <in12=int64#3d,112(<out=int64#1)
+# asm 2: movl   <in12=%edx,112(<out=%rdi)
+movl   %edx,112(%rdi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl   <in13=int64#4d,116(<out=int64#1)
+# asm 2: movl   <in13=%ecx,116(<out=%rdi)
+movl   %ecx,116(%rdi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl   <in14=int64#5d,120(<out=int64#1)
+# asm 2: movl   <in14=%r8d,120(<out=%rdi)
+movl   %r8d,120(%rdi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl   <in15=int64#6d,124(<out=int64#1)
+# asm 2: movl   <in15=%r9d,124(<out=%rdi)
+movl   %r9d,124(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 176(<m=%rsi),<in12=%edx
+xorl 176(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 180(<m=%rsi),<in13=%ecx
+xorl 180(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 184(<m=%rsi),<in14=%r8d
+xorl 184(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 188(<m=%rsi),<in15=%r9d
+xorl 188(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl   <in12=int64#3d,176(<out=int64#1)
+# asm 2: movl   <in12=%edx,176(<out=%rdi)
+movl   %edx,176(%rdi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl   <in13=int64#4d,180(<out=int64#1)
+# asm 2: movl   <in13=%ecx,180(<out=%rdi)
+movl   %ecx,180(%rdi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl   <in14=int64#5d,184(<out=int64#1)
+# asm 2: movl   <in14=%r8d,184(<out=%rdi)
+movl   %r8d,184(%rdi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl   <in15=int64#6d,188(<out=int64#1)
+# asm 2: movl   <in15=%r9d,188(<out=%rdi)
+movl   %r9d,188(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 240(<m=%rsi),<in12=%edx
+xorl 240(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 244(<m=%rsi),<in13=%ecx
+xorl 244(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 248(<m=%rsi),<in14=%r8d
+xorl 248(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 252(<m=%rsi),<in15=%r9d
+xorl 252(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl   <in12=int64#3d,240(<out=int64#1)
+# asm 2: movl   <in12=%edx,240(<out=%rdi)
+movl   %edx,240(%rdi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl   <in13=int64#4d,244(<out=int64#1)
+# asm 2: movl   <in13=%ecx,244(<out=%rdi)
+movl   %ecx,244(%rdi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl   <in14=int64#5d,248(<out=int64#1)
+# asm 2: movl   <in14=%r8d,248(<out=%rdi)
+movl   %r8d,248(%rdi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl   <in15=int64#6d,252(<out=int64#1)
+# asm 2: movl   <in15=%r9d,252(<out=%rdi)
+movl   %r9d,252(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int64#6
+# asm 2: sub  $256,<bytes=%r9
+sub  $256,%r9
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int64#2
+# asm 2: add  $256,<m=%rsi
+add  $256,%rsi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int64#1
+# asm 2: add  $256,<out=%rdi
+add  $256,%rdi
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: mov  <out=int64#1,>ctarget=int64#3
+# asm 2: mov  <out=%rdi,>ctarget=%rdx
+mov  %rdi,%rdx
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     m = &tmp
+# asm 1: leaq <tmp=stack512#1,>m=int64#2
+# asm 2: leaq <tmp=416(%rsp),>m=%rsi
+leaq 416(%rsp),%rsi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#4,>diag0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>diag0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#1,>diag1=int6464#2
+# asm 2: movdqa <x1=0(%rsp),>diag1=%xmm1
+movdqa 0(%rsp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#2,>diag2=int6464#3
+# asm 2: movdqa <x2=16(%rsp),>diag2=%xmm2
+movdqa 16(%rsp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#3,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%rsp),>diag3=%xmm3
+movdqa 32(%rsp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 20
+# asm 1: mov  $20,>i=int64#4
+# asm 2: mov  $20,>i=%rcx
+mov  $20,%rcx
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int64#4
+# asm 2: sub  $4,<i=%rcx
+sub  $4,%rcx
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#4,<diag0=int6464#1
+# asm 2: paddd <x0=48(%rsp),<diag0=%xmm0
+paddd 48(%rsp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#1,<diag1=int6464#2
+# asm 2: paddd <x1=0(%rsp),<diag1=%xmm1
+paddd 0(%rsp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#2,<diag2=int6464#3
+# asm 2: paddd <x2=16(%rsp),<diag2=%xmm2
+paddd 16(%rsp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#3,<diag3=int6464#4
+# asm 2: paddd <x3=32(%rsp),<diag3=%xmm3
+paddd 32(%rsp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int64#4
+# asm 2: movd   <diag0=%xmm0,>in0=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int64#5
+# asm 2: movd   <diag1=%xmm1,>in12=%r8
+movd   %xmm1,%r8
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int64#6
+# asm 2: movd   <diag2=%xmm2,>in8=%r9
+movd   %xmm2,%r9
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int64#7
+# asm 2: movd   <diag3=%xmm3,>in4=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#4d
+# asm 2: xorl 0(<m=%rsi),<in0=%ecx
+xorl 0(%rsi),%ecx
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#5d
+# asm 2: xorl 48(<m=%rsi),<in12=%r8d
+xorl 48(%rsi),%r8d
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#6d
+# asm 2: xorl 32(<m=%rsi),<in8=%r9d
+xorl 32(%rsi),%r9d
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#7d
+# asm 2: xorl 16(<m=%rsi),<in4=%eax
+xorl 16(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#4d,0(<out=int64#1)
+# asm 2: movl   <in0=%ecx,0(<out=%rdi)
+movl   %ecx,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#5d,48(<out=int64#1)
+# asm 2: movl   <in12=%r8d,48(<out=%rdi)
+movl   %r8d,48(%rdi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#6d,32(<out=int64#1)
+# asm 2: movl   <in8=%r9d,32(<out=%rdi)
+movl   %r9d,32(%rdi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#7d,16(<out=int64#1)
+# asm 2: movl   <in4=%eax,16(<out=%rdi)
+movl   %eax,16(%rdi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int64#4
+# asm 2: movd   <diag0=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int64#5
+# asm 2: movd   <diag1=%xmm1,>in1=%r8
+movd   %xmm1,%r8
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int64#6
+# asm 2: movd   <diag2=%xmm2,>in13=%r9
+movd   %xmm2,%r9
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int64#7
+# asm 2: movd   <diag3=%xmm3,>in9=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#5d
+# asm 2: xorl 4(<m=%rsi),<in1=%r8d
+xorl 4(%rsi),%r8d
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#6d
+# asm 2: xorl 52(<m=%rsi),<in13=%r9d
+xorl 52(%rsi),%r9d
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#7d
+# asm 2: xorl 36(<m=%rsi),<in9=%eax
+xorl 36(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#5d,4(<out=int64#1)
+# asm 2: movl   <in1=%r8d,4(<out=%rdi)
+movl   %r8d,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#6d,52(<out=int64#1)
+# asm 2: movl   <in13=%r9d,52(<out=%rdi)
+movl   %r9d,52(%rdi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#7d,36(<out=int64#1)
+# asm 2: movl   <in9=%eax,36(<out=%rdi)
+movl   %eax,36(%rdi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int64#4
+# asm 2: movd   <diag0=%xmm0,>in10=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int64#5
+# asm 2: movd   <diag1=%xmm1,>in6=%r8
+movd   %xmm1,%r8
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int64#6
+# asm 2: movd   <diag2=%xmm2,>in2=%r9
+movd   %xmm2,%r9
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int64#7
+# asm 2: movd   <diag3=%xmm3,>in14=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#4d
+# asm 2: xorl 40(<m=%rsi),<in10=%ecx
+xorl 40(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#6d
+# asm 2: xorl 8(<m=%rsi),<in2=%r9d
+xorl 8(%rsi),%r9d
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#7d
+# asm 2: xorl 56(<m=%rsi),<in14=%eax
+xorl 56(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#4d,40(<out=int64#1)
+# asm 2: movl   <in10=%ecx,40(<out=%rdi)
+movl   %ecx,40(%rdi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#6d,8(<out=int64#1)
+# asm 2: movl   <in2=%r9d,8(<out=%rdi)
+movl   %r9d,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#7d,56(<out=int64#1)
+# asm 2: movl   <in14=%eax,56(<out=%rdi)
+movl   %eax,56(%rdi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int64#4
+# asm 2: movd   <diag0=%xmm0,>in15=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int64#5
+# asm 2: movd   <diag1=%xmm1,>in11=%r8
+movd   %xmm1,%r8
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int64#6
+# asm 2: movd   <diag2=%xmm2,>in7=%r9
+movd   %xmm2,%r9
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int64#7
+# asm 2: movd   <diag3=%xmm3,>in3=%rax
+movd   %xmm3,%rax
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#4d
+# asm 2: xorl 60(<m=%rsi),<in15=%ecx
+xorl 60(%rsi),%ecx
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#5d
+# asm 2: xorl 44(<m=%rsi),<in11=%r8d
+xorl 44(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#7d
+# asm 2: xorl 12(<m=%rsi),<in3=%eax
+xorl 12(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#4d,60(<out=int64#1)
+# asm 2: movl   <in15=%ecx,60(<out=%rdi)
+movl   %ecx,60(%rdi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#5d,44(<out=int64#1)
+# asm 2: movl   <in11=%r8d,44(<out=%rdi)
+movl   %r8d,44(%rdi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#7d,12(<out=int64#1)
+# asm 2: movl   <in3=%eax,12(<out=%rdi)
+movl   %eax,12(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#4d
+# asm 2: movl <x2=16(%rsp),>in8=%ecx
+movl 16(%rsp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#5d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%r8d
+movl 4+32(%rsp),%r8d
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#4
+# asm 2: add  $1,<in8=%rcx
+add  $1,%rcx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#5
+# asm 2: shl  $32,<in9=%r8
+shl  $32,%r8
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#5,<in8=int64#4
+# asm 2: add  <in9=%r8,<in8=%rcx
+add  %r8,%rcx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#4,>in9=int64#5
+# asm 2: mov  <in8=%rcx,>in9=%r8
+mov  %rcx,%r8
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#5
+# asm 2: shr  $32,<in9=%r8
+shr  $32,%r8
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#5d,4+<x3=stack128#3
+# asm 2: movl <in9=%r8d,4+<x3=32(%rsp)
+movl %r8d,4+32(%rsp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm:       out = ctarget
+# asm 1: mov  <ctarget=int64#3,>out=int64#1
+# asm 2: mov  <ctarget=%rdx,>out=%rdi
+mov  %rdx,%rdi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=352(%rsp),>r11_caller=%r11
+movq 352(%rsp),%r11
+
+# qhasm:     r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=360(%rsp),>r12_caller=%r12
+movq 360(%rsp),%r12
+
+# qhasm:     r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=368(%rsp),>r13_caller=%r13
+movq 368(%rsp),%r13
+
+# qhasm:     r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=376(%rsp),>r14_caller=%r14
+movq 376(%rsp),%r14
+
+# qhasm:     r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=384(%rsp),>r15_caller=%r15
+movq 384(%rsp),%r15
+
+# qhasm:     rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=392(%rsp),>rbx_caller=%rbx
+movq 392(%rsp),%rbx
+
+# qhasm:     rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=400(%rsp),>rbp_caller=%rbp
+movq 400(%rsp),%rbp
+
+# qhasm:     leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int64#6
+# asm 2: sub  $64,<bytes=%r9
+sub  $64,%r9
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int64#1
+# asm 2: add  $64,<out=%rdi
+add  $64,%rdi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int64#2
+# asm 2: add  $64,<m=%rsi
+add  $64,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/checksum b/nacl/nacl-20110221/crypto_stream/salsa20/checksum
new file mode 100644
index 00000000..78ff05f4
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/checksum
@@ -0,0 +1 @@
+44a3966eabcd3a2b13faca2150e38f2b7e6bac187d626618f50a9f875158ae78
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/ref/api.h b/nacl/nacl-20110221/crypto_stream/salsa20/ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/ref/implementors b/nacl/nacl-20110221/crypto_stream/salsa20/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/ref/stream.c b/nacl/nacl-20110221/crypto_stream/salsa20/ref/stream.c
new file mode 100644
index 00000000..2f0262eb
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa20.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa20(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/ref/xor.c b/nacl/nacl-20110221/crypto_stream/salsa20/ref/xor.c
new file mode 100644
index 00000000..11c7e9f0
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa20.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa20(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/used b/nacl/nacl-20110221/crypto_stream/salsa20/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/api.h b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/implementors b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/stream.s b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/stream.s
new file mode 100644
index 00000000..9e32ea43
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa20/x86_xmm5/stream.s
@@ -0,0 +1,5078 @@
+
+# qhasm: int32 a
+
+# qhasm: stack32 arg1
+
+# qhasm: stack32 arg2
+
+# qhasm: stack32 arg3
+
+# qhasm: stack32 arg4
+
+# qhasm: stack32 arg5
+
+# qhasm: stack32 arg6
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: input arg6
+
+# qhasm: int32 eax
+
+# qhasm: int32 ebx
+
+# qhasm: int32 esi
+
+# qhasm: int32 edi
+
+# qhasm: int32 ebp
+
+# qhasm: caller eax
+
+# qhasm: caller ebx
+
+# qhasm: caller esi
+
+# qhasm: caller edi
+
+# qhasm: caller ebp
+
+# qhasm: int32 k
+
+# qhasm: int32 kbits
+
+# qhasm: int32 iv
+
+# qhasm: int32 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int32 m
+
+# qhasm: stack32 out_stack
+
+# qhasm: int32 out
+
+# qhasm: stack32 bytes_stack
+
+# qhasm: int32 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int6464 p
+
+# qhasm: int6464 q
+
+# qhasm: int6464 r
+
+# qhasm: int6464 s
+
+# qhasm: int6464 t
+
+# qhasm: int6464 u
+
+# qhasm: int6464 v
+
+# qhasm: int6464 w
+
+# qhasm: int6464 mp
+
+# qhasm: int6464 mq
+
+# qhasm: int6464 mr
+
+# qhasm: int6464 ms
+
+# qhasm: int6464 mt
+
+# qhasm: int6464 mu
+
+# qhasm: int6464 mv
+
+# qhasm: int6464 mw
+
+# qhasm: int32 in0
+
+# qhasm: int32 in1
+
+# qhasm: int32 in2
+
+# qhasm: int32 in3
+
+# qhasm: int32 in4
+
+# qhasm: int32 in5
+
+# qhasm: int32 in6
+
+# qhasm: int32 in7
+
+# qhasm: int32 in8
+
+# qhasm: int32 in9
+
+# qhasm: int32 in10
+
+# qhasm: int32 in11
+
+# qhasm: int32 in12
+
+# qhasm: int32 in13
+
+# qhasm: int32 in14
+
+# qhasm: int32 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: stack32 ctarget
+
+# qhasm: enter crypto_stream_salsa20_x86_xmm5
+.text
+.p2align 5
+.globl _crypto_stream_salsa20_x86_xmm5
+.globl crypto_stream_salsa20_x86_xmm5
+_crypto_stream_salsa20_x86_xmm5:
+crypto_stream_salsa20_x86_xmm5:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: bytes = arg2
+# asm 1: movl <arg2=stack32#-2,>bytes=int32#3
+# asm 2: movl <arg2=8(%esp,%eax),>bytes=%edx
+movl 8(%esp,%eax),%edx
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm: iv = arg4
+# asm 1: movl <arg4=stack32#-4,>iv=int32#4
+# asm 2: movl <arg4=16(%esp,%eax),>iv=%ebx
+movl 16(%esp,%eax),%ebx
+
+# qhasm: k = arg5
+# asm 1: movl <arg5=stack32#-5,>k=int32#7
+# asm 2: movl <arg5=20(%esp,%eax),>k=%ebp
+movl 20(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int32#1
+# asm 2: mov  $0,>a=%eax
+mov  $0,%eax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int32#3,>i=int32#2
+# asm 2: mov  <bytes=%edx,>i=%ecx
+mov  %edx,%ecx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: subl <bytes=int32#3,<out=int32#6
+# asm 2: subl <bytes=%edx,<out=%edi
+subl %edx,%edi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa20_x86_xmm5_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa20_x86_xmm5_xor
+.globl crypto_stream_salsa20_x86_xmm5_xor
+_crypto_stream_salsa20_x86_xmm5_xor:
+crypto_stream_salsa20_x86_xmm5_xor:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = arg2
+# asm 1: movl <arg2=stack32#-2,>m=int32#5
+# asm 2: movl <arg2=8(%esp,%eax),>m=%esi
+movl 8(%esp,%eax),%esi
+
+# qhasm: bytes = arg3
+# asm 1: movl <arg3=stack32#-3,>bytes=int32#3
+# asm 2: movl <arg3=12(%esp,%eax),>bytes=%edx
+movl 12(%esp,%eax),%edx
+
+# qhasm: iv = arg5
+# asm 1: movl <arg5=stack32#-5,>iv=int32#4
+# asm 2: movl <arg5=20(%esp,%eax),>iv=%ebx
+movl 20(%esp,%eax),%ebx
+
+# qhasm: k = arg6
+# asm 1: movl <arg6=stack32#-6,>k=int32#7
+# asm 2: movl <arg6=24(%esp,%eax),>k=%ebp
+movl 24(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#3,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%edx,>bytes_stack=24(%esp)
+movl %edx,24(%esp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int32#7),>in4=int32#1
+# asm 2: movl 12(<k=%ebp),>in4=%eax
+movl 12(%ebp),%eax
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl 20(<k=int32#7),>in12=int32#2
+# asm 2: movl 20(<k=%ebp),>in12=%ecx
+movl 20(%ebp),%ecx
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int32#1,>x3=stack128#1
+# asm 2: movl <in4=%eax,>x3=32(%esp)
+movl %eax,32(%esp)
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int32#2,>x1=stack128#2
+# asm 2: movl <in12=%ecx,>x1=48(%esp)
+movl %ecx,48(%esp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int32#1
+# asm 2: mov  $1634760805,>in0=%eax
+mov  $1634760805,%eax
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int32#2
+# asm 2: mov  $0,>in8=%ecx
+mov  $0,%ecx
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int32#1,>x0=stack128#3
+# asm 2: movl <in0=%eax,>x0=64(%esp)
+movl %eax,64(%esp)
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl 0(<iv=int32#4),>in6=int32#1
+# asm 2: movl 0(<iv=%ebx),>in6=%eax
+movl 0(%ebx),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl 4(<iv=int32#4),>in7=int32#2
+# asm 2: movl 4(<iv=%ebx),>in7=%ecx
+movl 4(%ebx),%ecx
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int32#1,8+<x1=stack128#2
+# asm 2: movl <in6=%eax,8+<x1=48(%esp)
+movl %eax,8+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int32#2,12+<x2=stack128#4
+# asm 2: movl <in7=%ecx,12+<x2=80(%esp)
+movl %ecx,12+80(%esp)
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int32#1
+# asm 2: mov  $0,>in9=%eax
+mov  $0,%eax
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int32#2
+# asm 2: mov  $2036477234,>in10=%ecx
+mov  $2036477234,%ecx
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#1,4+<x3=stack128#1
+# asm 2: movl <in9=%eax,4+<x3=32(%esp)
+movl %eax,4+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int32#2,8+<x0=stack128#3
+# asm 2: movl <in10=%ecx,8+<x0=64(%esp)
+movl %ecx,8+64(%esp)
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int32#7),>in1=int32#1
+# asm 2: movl 0(<k=%ebp),>in1=%eax
+movl 0(%ebp),%eax
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int32#7),>in2=int32#2
+# asm 2: movl 4(<k=%ebp),>in2=%ecx
+movl 4(%ebp),%ecx
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int32#7),>in3=int32#3
+# asm 2: movl 8(<k=%ebp),>in3=%edx
+movl 8(%ebp),%edx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int32#4
+# asm 2: mov  $857760878,>in5=%ebx
+mov  $857760878,%ebx
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int32#1,4+<x1=stack128#2
+# asm 2: movl <in1=%eax,4+<x1=48(%esp)
+movl %eax,4+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int32#2,8+<x2=stack128#4
+# asm 2: movl <in2=%ecx,8+<x2=80(%esp)
+movl %ecx,8+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int32#3,12+<x3=stack128#1
+# asm 2: movl <in3=%edx,12+<x3=32(%esp)
+movl %edx,12+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int32#4,4+<x0=stack128#3
+# asm 2: movl <in5=%ebx,4+<x0=64(%esp)
+movl %ebx,4+64(%esp)
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl 16(<k=int32#7),>in11=int32#1
+# asm 2: movl 16(<k=%ebp),>in11=%eax
+movl 16(%ebp),%eax
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl 24(<k=int32#7),>in13=int32#2
+# asm 2: movl 24(<k=%ebp),>in13=%ecx
+movl 24(%ebp),%ecx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl 28(<k=int32#7),>in14=int32#3
+# asm 2: movl 28(<k=%ebp),>in14=%edx
+movl 28(%ebp),%edx
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int32#4
+# asm 2: mov  $1797285236,>in15=%ebx
+mov  $1797285236,%ebx
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int32#1,12+<x1=stack128#2
+# asm 2: movl <in11=%eax,12+<x1=48(%esp)
+movl %eax,12+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int32#2,4+<x2=stack128#4
+# asm 2: movl <in13=%ecx,4+<x2=80(%esp)
+movl %ecx,4+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int32#3,8+<x3=stack128#1
+# asm 2: movl <in14=%edx,8+<x3=32(%esp)
+movl %edx,8+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int32#4,12+<x0=stack128#3
+# asm 2: movl <in15=%ebx,12+<x0=64(%esp)
+movl %ebx,12+64(%esp)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#3,>z0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>z0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=96(%esp)
+movdqa %xmm1,96(%esp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=112(%esp)
+movdqa %xmm2,112(%esp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=128(%esp)
+movdqa %xmm3,128(%esp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=144(%esp)
+movdqa %xmm0,144(%esp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#2,>z1=int6464#1
+# asm 2: movdqa <x1=48(%esp),>z1=%xmm0
+movdqa 48(%esp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=160(%esp)
+movdqa %xmm1,160(%esp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=176(%esp)
+movdqa %xmm2,176(%esp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=192(%esp)
+movdqa %xmm3,192(%esp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=208(%esp)
+movdqa %xmm0,208(%esp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#4,>z2=int6464#1
+# asm 2: movdqa <x2=80(%esp),>z2=%xmm0
+movdqa 80(%esp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=224(%esp)
+movdqa %xmm1,224(%esp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=240(%esp)
+movdqa %xmm2,240(%esp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=256(%esp)
+movdqa %xmm0,256(%esp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#1,>z3=int6464#1
+# asm 2: movdqa <x3=32(%esp),>z3=%xmm0
+movdqa 32(%esp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=272(%esp)
+movdqa %xmm1,272(%esp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=288(%esp)
+movdqa %xmm2,288(%esp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=304(%esp)
+movdqa %xmm0,304(%esp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int32#2,>orig8=stack128#19
+# asm 2: movl <in8=%ecx,>orig8=320(%esp)
+movl %ecx,320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int32#3,>orig9=stack128#20
+# asm 2: movl <in9=%edx,>orig9=336(%esp)
+movl %edx,336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int32#2,4+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,4+<orig8=320(%esp)
+movl %ecx,4+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int32#3,4+<orig9=stack128#20
+# asm 2: movl <in9=%edx,4+<orig9=336(%esp)
+movl %edx,4+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int32#2,8+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,8+<orig8=320(%esp)
+movl %ecx,8+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int32#3,8+<orig9=stack128#20
+# asm 2: movl <in9=%edx,8+<orig9=336(%esp)
+movl %edx,8+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int32#2,12+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,12+<orig8=320(%esp)
+movl %ecx,12+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int32#3,12+<orig9=stack128#20
+# asm 2: movl <in9=%edx,12+<orig9=336(%esp)
+movl %edx,12+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: i = 20
+# asm 1: mov  $20,>i=int32#1
+# asm 2: mov  $20,>i=%eax
+mov  $20,%eax
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=96(%esp),>z5=%xmm0
+movdqa 96(%esp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=112(%esp),>z10=%xmm1
+movdqa 112(%esp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=128(%esp),>z15=%xmm2
+movdqa 128(%esp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=288(%esp),>z14=%xmm3
+movdqa 288(%esp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=304(%esp),>z3=%xmm4
+movdqa 304(%esp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=160(%esp),>z6=%xmm5
+movdqa 160(%esp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=176(%esp),>z11=%xmm6
+movdqa 176(%esp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=208(%esp),>z1=%xmm7
+movdqa 208(%esp),%xmm7
+
+# qhasm:   z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#21
+# asm 2: movdqa <z5=%xmm0,>z5_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:   z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#22
+# asm 2: movdqa <z10=%xmm1,>z10_stack=368(%esp)
+movdqa %xmm1,368(%esp)
+
+# qhasm:   z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#23
+# asm 2: movdqa <z15=%xmm2,>z15_stack=384(%esp)
+movdqa %xmm2,384(%esp)
+
+# qhasm:   z14_stack = z14
+# asm 1: movdqa <z14=int6464#4,>z14_stack=stack128#24
+# asm 2: movdqa <z14=%xmm3,>z14_stack=400(%esp)
+movdqa %xmm3,400(%esp)
+
+# qhasm:   z3_stack = z3
+# asm 1: movdqa <z3=int6464#5,>z3_stack=stack128#25
+# asm 2: movdqa <z3=%xmm4,>z3_stack=416(%esp)
+movdqa %xmm4,416(%esp)
+
+# qhasm:   z6_stack = z6
+# asm 1: movdqa <z6=int6464#6,>z6_stack=stack128#26
+# asm 2: movdqa <z6=%xmm5,>z6_stack=432(%esp)
+movdqa %xmm5,432(%esp)
+
+# qhasm:   z11_stack = z11
+# asm 1: movdqa <z11=int6464#7,>z11_stack=stack128#27
+# asm 2: movdqa <z11=%xmm6,>z11_stack=448(%esp)
+movdqa %xmm6,448(%esp)
+
+# qhasm:   z1_stack = z1
+# asm 1: movdqa <z1=int6464#8,>z1_stack=stack128#28
+# asm 2: movdqa <z1=%xmm7,>z1_stack=464(%esp)
+movdqa %xmm7,464(%esp)
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#5
+# asm 2: movdqa <orig7=224(%esp),>z7=%xmm4
+movdqa 224(%esp),%xmm4
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#6
+# asm 2: movdqa <orig13=240(%esp),>z13=%xmm5
+movdqa 240(%esp),%xmm5
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#7
+# asm 2: movdqa <orig2=256(%esp),>z2=%xmm6
+movdqa 256(%esp),%xmm6
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#8
+# asm 2: movdqa <orig9=336(%esp),>z9=%xmm7
+movdqa 336(%esp),%xmm7
+
+# qhasm:                   p = orig0
+# asm 1: movdqa <orig0=stack128#8,>p=int6464#1
+# asm 2: movdqa <orig0=144(%esp),>p=%xmm0
+movdqa 144(%esp),%xmm0
+
+# qhasm:                   t = orig12
+# asm 1: movdqa <orig12=stack128#11,>t=int6464#3
+# asm 2: movdqa <orig12=192(%esp),>t=%xmm2
+movdqa 192(%esp),%xmm2
+
+# qhasm:                   q = orig4
+# asm 1: movdqa <orig4=stack128#16,>q=int6464#4
+# asm 2: movdqa <orig4=272(%esp),>q=%xmm3
+movdqa 272(%esp),%xmm3
+
+# qhasm:                   r = orig8
+# asm 1: movdqa <orig8=stack128#19,>r=int6464#2
+# asm 2: movdqa <orig8=320(%esp),>r=%xmm1
+movdqa 320(%esp),%xmm1
+
+# qhasm:   z7_stack = z7
+# asm 1: movdqa <z7=int6464#5,>z7_stack=stack128#29
+# asm 2: movdqa <z7=%xmm4,>z7_stack=480(%esp)
+movdqa %xmm4,480(%esp)
+
+# qhasm:   z13_stack = z13
+# asm 1: movdqa <z13=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <z13=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm:   z2_stack = z2
+# asm 1: movdqa <z2=int6464#7,>z2_stack=stack128#31
+# asm 2: movdqa <z2=%xmm6,>z2_stack=512(%esp)
+movdqa %xmm6,512(%esp)
+
+# qhasm:   z9_stack = z9
+# asm 1: movdqa <z9=int6464#8,>z9_stack=stack128#32
+# asm 2: movdqa <z9=%xmm7,>z9_stack=528(%esp)
+movdqa %xmm7,528(%esp)
+
+# qhasm:   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:   z12_stack = t
+# asm 1: movdqa <t=int6464#3,>z12_stack=stack128#34
+# asm 2: movdqa <t=%xmm2,>z12_stack=560(%esp)
+movdqa %xmm2,560(%esp)
+
+# qhasm:   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#35
+# asm 2: movdqa <q=%xmm3,>z4_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#36
+# asm 2: movdqa <r=%xmm1,>z8_stack=592(%esp)
+movdqa %xmm1,592(%esp)
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#33
+# asm 2: movdqa <q=%xmm3,>z4_stack=544(%esp)
+movdqa %xmm3,544(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#34
+# asm 2: movdqa <r=%xmm1,>z8_stack=560(%esp)
+movdqa %xmm1,560(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>mt=int6464#3
+# asm 2: movdqa <z1_stack=464(%esp),>mt=%xmm2
+movdqa 464(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>mp=int6464#5
+# asm 2: movdqa <z5_stack=352(%esp),>mp=%xmm4
+movdqa 352(%esp),%xmm4
+
+# qhasm:                                   mq = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>mq=int6464#4
+# asm 2: movdqa <z9_stack=528(%esp),>mq=%xmm3
+movdqa 528(%esp),%xmm3
+
+# qhasm:                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>mr=int6464#6
+# asm 2: movdqa <z13_stack=496(%esp),>mr=%xmm5
+movdqa 496(%esp),%xmm5
+
+# qhasm:                   z12_stack = s
+# asm 1: movdqa <s=int6464#7,>z12_stack=stack128#30
+# asm 2: movdqa <s=%xmm6,>z12_stack=496(%esp)
+movdqa %xmm6,496(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <p=%xmm0,>z0_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z9_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z9_stack=stack128#32
+# asm 2: movdqa <mq=%xmm3,>z9_stack=528(%esp)
+movdqa %xmm3,528(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#35
+# asm 2: movdqa <mr=%xmm5,>z13_stack=576(%esp)
+movdqa %xmm5,576(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>t=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>t=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>p=int6464#1
+# asm 2: movdqa <z10_stack=368(%esp),>p=%xmm0
+movdqa 368(%esp),%xmm0
+
+# qhasm:                                                   q = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>q=int6464#4
+# asm 2: movdqa <z14_stack=400(%esp),>q=%xmm3
+movdqa 400(%esp),%xmm3
+
+# qhasm:                                                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>r=int6464#2
+# asm 2: movdqa <z2_stack=512(%esp),>r=%xmm1
+movdqa 512(%esp),%xmm1
+
+# qhasm:                                   z1_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z1_stack=stack128#22
+# asm 2: movdqa <ms=%xmm6,>z1_stack=368(%esp)
+movdqa %xmm6,368(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#24
+# asm 2: movdqa <mp=%xmm4,>z5_stack=400(%esp)
+movdqa %xmm4,400(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z14_stack = q
+# asm 1: movdqa <q=int6464#4,>z14_stack=stack128#36
+# asm 2: movdqa <q=%xmm3,>z14_stack=592(%esp)
+movdqa %xmm3,592(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#26
+# asm 2: movdqa <r=%xmm1,>z2_stack=432(%esp)
+movdqa %xmm1,432(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>mt=int6464#3
+# asm 2: movdqa <z11_stack=448(%esp),>mt=%xmm2
+movdqa 448(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>mp=int6464#5
+# asm 2: movdqa <z15_stack=384(%esp),>mp=%xmm4
+movdqa 384(%esp),%xmm4
+
+# qhasm:                                                                   mq = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>mq=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>mq=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:                                                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                                                   z6_stack = s
+# asm 1: movdqa <s=int6464#7,>z6_stack=stack128#23
+# asm 2: movdqa <s=%xmm6,>z6_stack=384(%esp)
+movdqa %xmm6,384(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#27
+# asm 2: movdqa <p=%xmm0,>z10_stack=448(%esp)
+movdqa %xmm0,448(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z3_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z3_stack=stack128#25
+# asm 2: movdqa <mq=%xmm3,>z3_stack=416(%esp)
+movdqa %xmm3,416(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>t=int6464#3
+# asm 2: movdqa <z3_stack=416(%esp),>t=%xmm2
+movdqa 416(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>p=int6464#1
+# asm 2: movdqa <z0_stack=352(%esp),>p=%xmm0
+movdqa 352(%esp),%xmm0
+
+# qhasm:                   q = z1_stack
+# asm 1: movdqa <z1_stack=stack128#22,>q=int6464#4
+# asm 2: movdqa <z1_stack=368(%esp),>q=%xmm3
+movdqa 368(%esp),%xmm3
+
+# qhasm:                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#26,>r=int6464#2
+# asm 2: movdqa <z2_stack=432(%esp),>r=%xmm1
+movdqa 432(%esp),%xmm1
+
+# qhasm:                                                                   z11_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z11_stack=stack128#21
+# asm 2: movdqa <ms=%xmm6,>z11_stack=352(%esp)
+movdqa %xmm6,352(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#22
+# asm 2: movdqa <mp=%xmm4,>z15_stack=368(%esp)
+movdqa %xmm4,368(%esp)
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z1_stack = q
+# asm 1: movdqa <q=int6464#4,>z1_stack=stack128#28
+# asm 2: movdqa <q=%xmm3,>z1_stack=464(%esp)
+movdqa %xmm3,464(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#31
+# asm 2: movdqa <r=%xmm1,>z2_stack=512(%esp)
+movdqa %xmm1,512(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z4_stack
+# asm 1: movdqa <z4_stack=stack128#33,>mt=int6464#3
+# asm 2: movdqa <z4_stack=544(%esp),>mt=%xmm2
+movdqa 544(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#24,>mp=int6464#5
+# asm 2: movdqa <z5_stack=400(%esp),>mp=%xmm4
+movdqa 400(%esp),%xmm4
+
+# qhasm:                                   mq = z6_stack
+# asm 1: movdqa <z6_stack=stack128#23,>mq=int6464#4
+# asm 2: movdqa <z6_stack=384(%esp),>mq=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                   z3_stack = s
+# asm 1: movdqa <s=int6464#7,>z3_stack=stack128#25
+# asm 2: movdqa <s=%xmm6,>z3_stack=416(%esp)
+movdqa %xmm6,416(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z6_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z6_stack=stack128#26
+# asm 2: movdqa <mq=%xmm3,>z6_stack=432(%esp)
+movdqa %xmm3,432(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>t=int6464#3
+# asm 2: movdqa <z9_stack=528(%esp),>t=%xmm2
+movdqa 528(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#27,>p=int6464#1
+# asm 2: movdqa <z10_stack=448(%esp),>p=%xmm0
+movdqa 448(%esp),%xmm0
+
+# qhasm:                                                   q = z11_stack
+# asm 1: movdqa <z11_stack=stack128#21,>q=int6464#4
+# asm 2: movdqa <z11_stack=352(%esp),>q=%xmm3
+movdqa 352(%esp),%xmm3
+
+# qhasm:                                                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#34,>r=int6464#2
+# asm 2: movdqa <z8_stack=560(%esp),>r=%xmm1
+movdqa 560(%esp),%xmm1
+
+# qhasm:                                   z4_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z4_stack=stack128#34
+# asm 2: movdqa <ms=%xmm6,>z4_stack=560(%esp)
+movdqa %xmm6,560(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#21
+# asm 2: movdqa <mp=%xmm4,>z5_stack=352(%esp)
+movdqa %xmm4,352(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z11_stack = q
+# asm 1: movdqa <q=int6464#4,>z11_stack=stack128#27
+# asm 2: movdqa <q=%xmm3,>z11_stack=448(%esp)
+movdqa %xmm3,448(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#37
+# asm 2: movdqa <r=%xmm1,>z8_stack=608(%esp)
+movdqa %xmm1,608(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z14_stack
+# asm 1: movdqa <z14_stack=stack128#36,>mt=int6464#3
+# asm 2: movdqa <z14_stack=592(%esp),>mt=%xmm2
+movdqa 592(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>mp=int6464#5
+# asm 2: movdqa <z15_stack=368(%esp),>mp=%xmm4
+movdqa 368(%esp),%xmm4
+
+# qhasm:                                                                   mq = z12_stack
+# asm 1: movdqa <z12_stack=stack128#30,>mq=int6464#4
+# asm 2: movdqa <z12_stack=496(%esp),>mq=%xmm3
+movdqa 496(%esp),%xmm3
+
+# qhasm:                                                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#35,>mr=int6464#6
+# asm 2: movdqa <z13_stack=576(%esp),>mr=%xmm5
+movdqa 576(%esp),%xmm5
+
+# qhasm:                                                   z9_stack = s
+# asm 1: movdqa <s=int6464#7,>z9_stack=stack128#32
+# asm 2: movdqa <s=%xmm6,>z9_stack=528(%esp)
+movdqa %xmm6,528(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#22
+# asm 2: movdqa <p=%xmm0,>z10_stack=368(%esp)
+movdqa %xmm0,368(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z12_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z12_stack=stack128#35
+# asm 2: movdqa <mq=%xmm3,>z12_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <mr=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>t=int6464#3
+# asm 2: movdqa <z12_stack=576(%esp),>t=%xmm2
+movdqa 576(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>p=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>p=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:                   q = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>q=int6464#4
+# asm 2: movdqa <z4_stack=560(%esp),>q=%xmm3
+movdqa 560(%esp),%xmm3
+
+# qhasm:                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>r=int6464#2
+# asm 2: movdqa <z8_stack=608(%esp),>r=%xmm1
+movdqa 608(%esp),%xmm1
+
+# qhasm:                                                                   z14_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z14_stack=stack128#24
+# asm 2: movdqa <ms=%xmm6,>z14_stack=400(%esp)
+movdqa %xmm6,400(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#23
+# asm 2: movdqa <mp=%xmm4,>z15_stack=384(%esp)
+movdqa %xmm4,384(%esp)
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int32#1
+# asm 2: sub  $2,<i=%eax
+sub  $2,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   out = out_stack
+# asm 1: movl <out_stack=stack32#6,>out=int32#6
+# asm 2: movl <out_stack=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:   z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>z0=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>z0=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:   z1 = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>z1=int6464#2
+# asm 2: movdqa <z1_stack=464(%esp),>z1=%xmm1
+movdqa 464(%esp),%xmm1
+
+# qhasm:   z2 = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>z2=int6464#3
+# asm 2: movdqa <z2_stack=512(%esp),>z2=%xmm2
+movdqa 512(%esp),%xmm2
+
+# qhasm:   z3 = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>z3=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>z3=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#1
+# asm 2: paddd <orig0=144(%esp),<z0=%xmm0
+paddd 144(%esp),%xmm0
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#2
+# asm 2: paddd <orig1=208(%esp),<z1=%xmm1
+paddd 208(%esp),%xmm1
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#3
+# asm 2: paddd <orig2=256(%esp),<z2=%xmm2
+paddd 256(%esp),%xmm2
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#4
+# asm 2: paddd <orig3=304(%esp),<z3=%xmm3
+paddd 304(%esp),%xmm3
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int32#5),<in0=int32#1
+# asm 2: xorl 64(<m=%esi),<in0=%eax
+xorl 64(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int32#5),<in1=int32#2
+# asm 2: xorl 68(<m=%esi),<in1=%ecx
+xorl 68(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int32#5),<in2=int32#3
+# asm 2: xorl 72(<m=%esi),<in2=%edx
+xorl 72(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int32#5),<in3=int32#4
+# asm 2: xorl 76(<m=%esi),<in3=%ebx
+xorl 76(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl <in0=int32#1,64(<out=int32#6)
+# asm 2: movl <in0=%eax,64(<out=%edi)
+movl %eax,64(%edi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl <in1=int32#2,68(<out=int32#6)
+# asm 2: movl <in1=%ecx,68(<out=%edi)
+movl %ecx,68(%edi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl <in2=int32#3,72(<out=int32#6)
+# asm 2: movl <in2=%edx,72(<out=%edi)
+movl %edx,72(%edi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl <in3=int32#4,76(<out=int32#6)
+# asm 2: movl <in3=%ebx,76(<out=%edi)
+movl %ebx,76(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int32#5),<in0=int32#1
+# asm 2: xorl 128(<m=%esi),<in0=%eax
+xorl 128(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int32#5),<in1=int32#2
+# asm 2: xorl 132(<m=%esi),<in1=%ecx
+xorl 132(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int32#5),<in2=int32#3
+# asm 2: xorl 136(<m=%esi),<in2=%edx
+xorl 136(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int32#5),<in3=int32#4
+# asm 2: xorl 140(<m=%esi),<in3=%ebx
+xorl 140(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl <in0=int32#1,128(<out=int32#6)
+# asm 2: movl <in0=%eax,128(<out=%edi)
+movl %eax,128(%edi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl <in1=int32#2,132(<out=int32#6)
+# asm 2: movl <in1=%ecx,132(<out=%edi)
+movl %ecx,132(%edi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl <in2=int32#3,136(<out=int32#6)
+# asm 2: movl <in2=%edx,136(<out=%edi)
+movl %edx,136(%edi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl <in3=int32#4,140(<out=int32#6)
+# asm 2: movl <in3=%ebx,140(<out=%edi)
+movl %ebx,140(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int32#5),<in0=int32#1
+# asm 2: xorl 192(<m=%esi),<in0=%eax
+xorl 192(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int32#5),<in1=int32#2
+# asm 2: xorl 196(<m=%esi),<in1=%ecx
+xorl 196(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int32#5),<in2=int32#3
+# asm 2: xorl 200(<m=%esi),<in2=%edx
+xorl 200(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int32#5),<in3=int32#4
+# asm 2: xorl 204(<m=%esi),<in3=%ebx
+xorl 204(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl <in0=int32#1,192(<out=int32#6)
+# asm 2: movl <in0=%eax,192(<out=%edi)
+movl %eax,192(%edi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl <in1=int32#2,196(<out=int32#6)
+# asm 2: movl <in1=%ecx,196(<out=%edi)
+movl %ecx,196(%edi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl <in2=int32#3,200(<out=int32#6)
+# asm 2: movl <in2=%edx,200(<out=%edi)
+movl %edx,200(%edi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl <in3=int32#4,204(<out=int32#6)
+# asm 2: movl <in3=%ebx,204(<out=%edi)
+movl %ebx,204(%edi)
+
+# qhasm:   z4 = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>z4=int6464#1
+# asm 2: movdqa <z4_stack=560(%esp),>z4=%xmm0
+movdqa 560(%esp),%xmm0
+
+# qhasm:   z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>z5=int6464#2
+# asm 2: movdqa <z5_stack=352(%esp),>z5=%xmm1
+movdqa 352(%esp),%xmm1
+
+# qhasm:   z6 = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>z6=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>z6=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:   z7 = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>z7=int6464#4
+# asm 2: movdqa <z7_stack=480(%esp),>z7=%xmm3
+movdqa 480(%esp),%xmm3
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#1
+# asm 2: paddd <orig4=272(%esp),<z4=%xmm0
+paddd 272(%esp),%xmm0
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#2
+# asm 2: paddd <orig5=96(%esp),<z5=%xmm1
+paddd 96(%esp),%xmm1
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#3
+# asm 2: paddd <orig6=160(%esp),<z6=%xmm2
+paddd 160(%esp),%xmm2
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#4
+# asm 2: paddd <orig7=224(%esp),<z7=%xmm3
+paddd 224(%esp),%xmm3
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#1
+# asm 2: xorl 16(<m=%esi),<in4=%eax
+xorl 16(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#2
+# asm 2: xorl 20(<m=%esi),<in5=%ecx
+xorl 20(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#3
+# asm 2: xorl 24(<m=%esi),<in6=%edx
+xorl 24(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#4
+# asm 2: xorl 28(<m=%esi),<in7=%ebx
+xorl 28(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#1,16(<out=int32#6)
+# asm 2: movl <in4=%eax,16(<out=%edi)
+movl %eax,16(%edi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#2,20(<out=int32#6)
+# asm 2: movl <in5=%ecx,20(<out=%edi)
+movl %ecx,20(%edi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#3,24(<out=int32#6)
+# asm 2: movl <in6=%edx,24(<out=%edi)
+movl %edx,24(%edi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#4,28(<out=int32#6)
+# asm 2: movl <in7=%ebx,28(<out=%edi)
+movl %ebx,28(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int32#5),<in4=int32#1
+# asm 2: xorl 80(<m=%esi),<in4=%eax
+xorl 80(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int32#5),<in5=int32#2
+# asm 2: xorl 84(<m=%esi),<in5=%ecx
+xorl 84(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int32#5),<in6=int32#3
+# asm 2: xorl 88(<m=%esi),<in6=%edx
+xorl 88(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int32#5),<in7=int32#4
+# asm 2: xorl 92(<m=%esi),<in7=%ebx
+xorl 92(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl <in4=int32#1,80(<out=int32#6)
+# asm 2: movl <in4=%eax,80(<out=%edi)
+movl %eax,80(%edi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl <in5=int32#2,84(<out=int32#6)
+# asm 2: movl <in5=%ecx,84(<out=%edi)
+movl %ecx,84(%edi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl <in6=int32#3,88(<out=int32#6)
+# asm 2: movl <in6=%edx,88(<out=%edi)
+movl %edx,88(%edi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl <in7=int32#4,92(<out=int32#6)
+# asm 2: movl <in7=%ebx,92(<out=%edi)
+movl %ebx,92(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int32#5),<in4=int32#1
+# asm 2: xorl 144(<m=%esi),<in4=%eax
+xorl 144(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int32#5),<in5=int32#2
+# asm 2: xorl 148(<m=%esi),<in5=%ecx
+xorl 148(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int32#5),<in6=int32#3
+# asm 2: xorl 152(<m=%esi),<in6=%edx
+xorl 152(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int32#5),<in7=int32#4
+# asm 2: xorl 156(<m=%esi),<in7=%ebx
+xorl 156(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl <in4=int32#1,144(<out=int32#6)
+# asm 2: movl <in4=%eax,144(<out=%edi)
+movl %eax,144(%edi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl <in5=int32#2,148(<out=int32#6)
+# asm 2: movl <in5=%ecx,148(<out=%edi)
+movl %ecx,148(%edi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl <in6=int32#3,152(<out=int32#6)
+# asm 2: movl <in6=%edx,152(<out=%edi)
+movl %edx,152(%edi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl <in7=int32#4,156(<out=int32#6)
+# asm 2: movl <in7=%ebx,156(<out=%edi)
+movl %ebx,156(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int32#5),<in4=int32#1
+# asm 2: xorl 208(<m=%esi),<in4=%eax
+xorl 208(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int32#5),<in5=int32#2
+# asm 2: xorl 212(<m=%esi),<in5=%ecx
+xorl 212(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int32#5),<in6=int32#3
+# asm 2: xorl 216(<m=%esi),<in6=%edx
+xorl 216(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int32#5),<in7=int32#4
+# asm 2: xorl 220(<m=%esi),<in7=%ebx
+xorl 220(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl <in4=int32#1,208(<out=int32#6)
+# asm 2: movl <in4=%eax,208(<out=%edi)
+movl %eax,208(%edi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl <in5=int32#2,212(<out=int32#6)
+# asm 2: movl <in5=%ecx,212(<out=%edi)
+movl %ecx,212(%edi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl <in6=int32#3,216(<out=int32#6)
+# asm 2: movl <in6=%edx,216(<out=%edi)
+movl %edx,216(%edi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl <in7=int32#4,220(<out=int32#6)
+# asm 2: movl <in7=%ebx,220(<out=%edi)
+movl %ebx,220(%edi)
+
+# qhasm:   z8 = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>z8=int6464#1
+# asm 2: movdqa <z8_stack=608(%esp),>z8=%xmm0
+movdqa 608(%esp),%xmm0
+
+# qhasm:   z9 = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>z9=int6464#2
+# asm 2: movdqa <z9_stack=528(%esp),>z9=%xmm1
+movdqa 528(%esp),%xmm1
+
+# qhasm:   z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>z10=int6464#3
+# asm 2: movdqa <z10_stack=368(%esp),>z10=%xmm2
+movdqa 368(%esp),%xmm2
+
+# qhasm:   z11 = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>z11=int6464#4
+# asm 2: movdqa <z11_stack=448(%esp),>z11=%xmm3
+movdqa 448(%esp),%xmm3
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#1
+# asm 2: paddd <orig8=320(%esp),<z8=%xmm0
+paddd 320(%esp),%xmm0
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#2
+# asm 2: paddd <orig9=336(%esp),<z9=%xmm1
+paddd 336(%esp),%xmm1
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#3
+# asm 2: paddd <orig10=112(%esp),<z10=%xmm2
+paddd 112(%esp),%xmm2
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#4
+# asm 2: paddd <orig11=176(%esp),<z11=%xmm3
+paddd 176(%esp),%xmm3
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#1
+# asm 2: xorl 32(<m=%esi),<in8=%eax
+xorl 32(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#2
+# asm 2: xorl 36(<m=%esi),<in9=%ecx
+xorl 36(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#3
+# asm 2: xorl 40(<m=%esi),<in10=%edx
+xorl 40(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#4
+# asm 2: xorl 44(<m=%esi),<in11=%ebx
+xorl 44(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#1,32(<out=int32#6)
+# asm 2: movl <in8=%eax,32(<out=%edi)
+movl %eax,32(%edi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#2,36(<out=int32#6)
+# asm 2: movl <in9=%ecx,36(<out=%edi)
+movl %ecx,36(%edi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#3,40(<out=int32#6)
+# asm 2: movl <in10=%edx,40(<out=%edi)
+movl %edx,40(%edi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#4,44(<out=int32#6)
+# asm 2: movl <in11=%ebx,44(<out=%edi)
+movl %ebx,44(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int32#5),<in8=int32#1
+# asm 2: xorl 96(<m=%esi),<in8=%eax
+xorl 96(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int32#5),<in9=int32#2
+# asm 2: xorl 100(<m=%esi),<in9=%ecx
+xorl 100(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int32#5),<in10=int32#3
+# asm 2: xorl 104(<m=%esi),<in10=%edx
+xorl 104(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int32#5),<in11=int32#4
+# asm 2: xorl 108(<m=%esi),<in11=%ebx
+xorl 108(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl <in8=int32#1,96(<out=int32#6)
+# asm 2: movl <in8=%eax,96(<out=%edi)
+movl %eax,96(%edi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl <in9=int32#2,100(<out=int32#6)
+# asm 2: movl <in9=%ecx,100(<out=%edi)
+movl %ecx,100(%edi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl <in10=int32#3,104(<out=int32#6)
+# asm 2: movl <in10=%edx,104(<out=%edi)
+movl %edx,104(%edi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl <in11=int32#4,108(<out=int32#6)
+# asm 2: movl <in11=%ebx,108(<out=%edi)
+movl %ebx,108(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int32#5),<in8=int32#1
+# asm 2: xorl 160(<m=%esi),<in8=%eax
+xorl 160(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int32#5),<in9=int32#2
+# asm 2: xorl 164(<m=%esi),<in9=%ecx
+xorl 164(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int32#5),<in10=int32#3
+# asm 2: xorl 168(<m=%esi),<in10=%edx
+xorl 168(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int32#5),<in11=int32#4
+# asm 2: xorl 172(<m=%esi),<in11=%ebx
+xorl 172(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl <in8=int32#1,160(<out=int32#6)
+# asm 2: movl <in8=%eax,160(<out=%edi)
+movl %eax,160(%edi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl <in9=int32#2,164(<out=int32#6)
+# asm 2: movl <in9=%ecx,164(<out=%edi)
+movl %ecx,164(%edi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl <in10=int32#3,168(<out=int32#6)
+# asm 2: movl <in10=%edx,168(<out=%edi)
+movl %edx,168(%edi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl <in11=int32#4,172(<out=int32#6)
+# asm 2: movl <in11=%ebx,172(<out=%edi)
+movl %ebx,172(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int32#5),<in8=int32#1
+# asm 2: xorl 224(<m=%esi),<in8=%eax
+xorl 224(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int32#5),<in9=int32#2
+# asm 2: xorl 228(<m=%esi),<in9=%ecx
+xorl 228(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int32#5),<in10=int32#3
+# asm 2: xorl 232(<m=%esi),<in10=%edx
+xorl 232(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int32#5),<in11=int32#4
+# asm 2: xorl 236(<m=%esi),<in11=%ebx
+xorl 236(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl <in8=int32#1,224(<out=int32#6)
+# asm 2: movl <in8=%eax,224(<out=%edi)
+movl %eax,224(%edi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl <in9=int32#2,228(<out=int32#6)
+# asm 2: movl <in9=%ecx,228(<out=%edi)
+movl %ecx,228(%edi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl <in10=int32#3,232(<out=int32#6)
+# asm 2: movl <in10=%edx,232(<out=%edi)
+movl %edx,232(%edi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl <in11=int32#4,236(<out=int32#6)
+# asm 2: movl <in11=%ebx,236(<out=%edi)
+movl %ebx,236(%edi)
+
+# qhasm:   z12 = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>z12=int6464#1
+# asm 2: movdqa <z12_stack=576(%esp),>z12=%xmm0
+movdqa 576(%esp),%xmm0
+
+# qhasm:   z13 = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>z13=int6464#2
+# asm 2: movdqa <z13_stack=496(%esp),>z13=%xmm1
+movdqa 496(%esp),%xmm1
+
+# qhasm:   z14 = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>z14=int6464#3
+# asm 2: movdqa <z14_stack=400(%esp),>z14=%xmm2
+movdqa 400(%esp),%xmm2
+
+# qhasm:   z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>z15=int6464#4
+# asm 2: movdqa <z15_stack=384(%esp),>z15=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#1
+# asm 2: paddd <orig12=192(%esp),<z12=%xmm0
+paddd 192(%esp),%xmm0
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#2
+# asm 2: paddd <orig13=240(%esp),<z13=%xmm1
+paddd 240(%esp),%xmm1
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#3
+# asm 2: paddd <orig14=288(%esp),<z14=%xmm2
+paddd 288(%esp),%xmm2
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#4
+# asm 2: paddd <orig15=128(%esp),<z15=%xmm3
+paddd 128(%esp),%xmm3
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#1
+# asm 2: xorl 48(<m=%esi),<in12=%eax
+xorl 48(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#2
+# asm 2: xorl 52(<m=%esi),<in13=%ecx
+xorl 52(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#3
+# asm 2: xorl 56(<m=%esi),<in14=%edx
+xorl 56(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#4
+# asm 2: xorl 60(<m=%esi),<in15=%ebx
+xorl 60(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#1,48(<out=int32#6)
+# asm 2: movl <in12=%eax,48(<out=%edi)
+movl %eax,48(%edi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#2,52(<out=int32#6)
+# asm 2: movl <in13=%ecx,52(<out=%edi)
+movl %ecx,52(%edi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#3,56(<out=int32#6)
+# asm 2: movl <in14=%edx,56(<out=%edi)
+movl %edx,56(%edi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#4,60(<out=int32#6)
+# asm 2: movl <in15=%ebx,60(<out=%edi)
+movl %ebx,60(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int32#5),<in12=int32#1
+# asm 2: xorl 112(<m=%esi),<in12=%eax
+xorl 112(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int32#5),<in13=int32#2
+# asm 2: xorl 116(<m=%esi),<in13=%ecx
+xorl 116(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int32#5),<in14=int32#3
+# asm 2: xorl 120(<m=%esi),<in14=%edx
+xorl 120(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int32#5),<in15=int32#4
+# asm 2: xorl 124(<m=%esi),<in15=%ebx
+xorl 124(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl <in12=int32#1,112(<out=int32#6)
+# asm 2: movl <in12=%eax,112(<out=%edi)
+movl %eax,112(%edi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl <in13=int32#2,116(<out=int32#6)
+# asm 2: movl <in13=%ecx,116(<out=%edi)
+movl %ecx,116(%edi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl <in14=int32#3,120(<out=int32#6)
+# asm 2: movl <in14=%edx,120(<out=%edi)
+movl %edx,120(%edi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl <in15=int32#4,124(<out=int32#6)
+# asm 2: movl <in15=%ebx,124(<out=%edi)
+movl %ebx,124(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int32#5),<in12=int32#1
+# asm 2: xorl 176(<m=%esi),<in12=%eax
+xorl 176(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int32#5),<in13=int32#2
+# asm 2: xorl 180(<m=%esi),<in13=%ecx
+xorl 180(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int32#5),<in14=int32#3
+# asm 2: xorl 184(<m=%esi),<in14=%edx
+xorl 184(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int32#5),<in15=int32#4
+# asm 2: xorl 188(<m=%esi),<in15=%ebx
+xorl 188(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl <in12=int32#1,176(<out=int32#6)
+# asm 2: movl <in12=%eax,176(<out=%edi)
+movl %eax,176(%edi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl <in13=int32#2,180(<out=int32#6)
+# asm 2: movl <in13=%ecx,180(<out=%edi)
+movl %ecx,180(%edi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl <in14=int32#3,184(<out=int32#6)
+# asm 2: movl <in14=%edx,184(<out=%edi)
+movl %edx,184(%edi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl <in15=int32#4,188(<out=int32#6)
+# asm 2: movl <in15=%ebx,188(<out=%edi)
+movl %ebx,188(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int32#5),<in12=int32#1
+# asm 2: xorl 240(<m=%esi),<in12=%eax
+xorl 240(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int32#5),<in13=int32#2
+# asm 2: xorl 244(<m=%esi),<in13=%ecx
+xorl 244(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int32#5),<in14=int32#3
+# asm 2: xorl 248(<m=%esi),<in14=%edx
+xorl 248(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int32#5),<in15=int32#4
+# asm 2: xorl 252(<m=%esi),<in15=%ebx
+xorl 252(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl <in12=int32#1,240(<out=int32#6)
+# asm 2: movl <in12=%eax,240(<out=%edi)
+movl %eax,240(%edi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl <in13=int32#2,244(<out=int32#6)
+# asm 2: movl <in13=%ecx,244(<out=%edi)
+movl %ecx,244(%edi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl <in14=int32#3,248(<out=int32#6)
+# asm 2: movl <in14=%edx,248(<out=%edi)
+movl %edx,248(%edi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl <in15=int32#4,252(<out=int32#6)
+# asm 2: movl <in15=%ebx,252(<out=%edi)
+movl %ebx,252(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int32#1
+# asm 2: sub  $256,<bytes=%eax
+sub  $256,%eax
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int32#5
+# asm 2: add  $256,<m=%esi
+add  $256,%esi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int32#6
+# asm 2: add  $256,<out=%edi
+add  $256,%edi
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#1
+# asm 2: cmp  $0,<bytes=%eax
+cmp  $0,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: movl <out=int32#6,>ctarget=stack32#6
+# asm 2: movl <out=%edi,>ctarget=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     m = &tmp
+# asm 1: leal <tmp=stack512#1,>m=int32#5
+# asm 2: leal <tmp=640(%esp),>m=%esi
+leal 640(%esp),%esi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#3,>diag0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>diag0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#2,>diag1=int6464#2
+# asm 2: movdqa <x1=48(%esp),>diag1=%xmm1
+movdqa 48(%esp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#4,>diag2=int6464#3
+# asm 2: movdqa <x2=80(%esp),>diag2=%xmm2
+movdqa 80(%esp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#1,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%esp),>diag3=%xmm3
+movdqa 32(%esp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 20
+# asm 1: mov  $20,>i=int32#1
+# asm 2: mov  $20,>i=%eax
+mov  $20,%eax
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int32#1
+# asm 2: sub  $4,<i=%eax
+sub  $4,%eax
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#3,<diag0=int6464#1
+# asm 2: paddd <x0=64(%esp),<diag0=%xmm0
+paddd 64(%esp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#2,<diag1=int6464#2
+# asm 2: paddd <x1=48(%esp),<diag1=%xmm1
+paddd 48(%esp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#4,<diag2=int6464#3
+# asm 2: paddd <x2=80(%esp),<diag2=%xmm2
+paddd 80(%esp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#1,<diag3=int6464#4
+# asm 2: paddd <x3=32(%esp),<diag3=%xmm3
+paddd 32(%esp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int32#1
+# asm 2: movd   <diag0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int32#2
+# asm 2: movd   <diag1=%xmm1,>in12=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int32#3
+# asm 2: movd   <diag2=%xmm2,>in8=%edx
+movd   %xmm2,%edx
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int32#4
+# asm 2: movd   <diag3=%xmm3,>in4=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm: in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#2
+# asm 2: xorl 48(<m=%esi),<in12=%ecx
+xorl 48(%esi),%ecx
+
+# qhasm: in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#3
+# asm 2: xorl 32(<m=%esi),<in8=%edx
+xorl 32(%esi),%edx
+
+# qhasm: in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#4
+# asm 2: xorl 16(<m=%esi),<in4=%ebx
+xorl 16(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#2,48(<out=int32#6)
+# asm 2: movl <in12=%ecx,48(<out=%edi)
+movl %ecx,48(%edi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#3,32(<out=int32#6)
+# asm 2: movl <in8=%edx,32(<out=%edi)
+movl %edx,32(%edi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#4,16(<out=int32#6)
+# asm 2: movl <in4=%ebx,16(<out=%edi)
+movl %ebx,16(%edi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int32#1
+# asm 2: movd   <diag0=%xmm0,>in5=%eax
+movd   %xmm0,%eax
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int32#2
+# asm 2: movd   <diag1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int32#3
+# asm 2: movd   <diag2=%xmm2,>in13=%edx
+movd   %xmm2,%edx
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int32#4
+# asm 2: movd   <diag3=%xmm3,>in9=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#1
+# asm 2: xorl 20(<m=%esi),<in5=%eax
+xorl 20(%esi),%eax
+
+# qhasm: in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm: in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#3
+# asm 2: xorl 52(<m=%esi),<in13=%edx
+xorl 52(%esi),%edx
+
+# qhasm: in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#4
+# asm 2: xorl 36(<m=%esi),<in9=%ebx
+xorl 36(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#1,20(<out=int32#6)
+# asm 2: movl <in5=%eax,20(<out=%edi)
+movl %eax,20(%edi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#3,52(<out=int32#6)
+# asm 2: movl <in13=%edx,52(<out=%edi)
+movl %edx,52(%edi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#4,36(<out=int32#6)
+# asm 2: movl <in9=%ebx,36(<out=%edi)
+movl %ebx,36(%edi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int32#1
+# asm 2: movd   <diag0=%xmm0,>in10=%eax
+movd   %xmm0,%eax
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int32#2
+# asm 2: movd   <diag1=%xmm1,>in6=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int32#3
+# asm 2: movd   <diag2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int32#4
+# asm 2: movd   <diag3=%xmm3,>in14=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#1
+# asm 2: xorl 40(<m=%esi),<in10=%eax
+xorl 40(%esi),%eax
+
+# qhasm: in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#2
+# asm 2: xorl 24(<m=%esi),<in6=%ecx
+xorl 24(%esi),%ecx
+
+# qhasm: in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm: in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#4
+# asm 2: xorl 56(<m=%esi),<in14=%ebx
+xorl 56(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#1,40(<out=int32#6)
+# asm 2: movl <in10=%eax,40(<out=%edi)
+movl %eax,40(%edi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#2,24(<out=int32#6)
+# asm 2: movl <in6=%ecx,24(<out=%edi)
+movl %ecx,24(%edi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#4,56(<out=int32#6)
+# asm 2: movl <in14=%ebx,56(<out=%edi)
+movl %ebx,56(%edi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int32#1
+# asm 2: movd   <diag0=%xmm0,>in15=%eax
+movd   %xmm0,%eax
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int32#2
+# asm 2: movd   <diag1=%xmm1,>in11=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int32#3
+# asm 2: movd   <diag2=%xmm2,>in7=%edx
+movd   %xmm2,%edx
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int32#4
+# asm 2: movd   <diag3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#1
+# asm 2: xorl 60(<m=%esi),<in15=%eax
+xorl 60(%esi),%eax
+
+# qhasm: in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#2
+# asm 2: xorl 44(<m=%esi),<in11=%ecx
+xorl 44(%esi),%ecx
+
+# qhasm: in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#3
+# asm 2: xorl 28(<m=%esi),<in7=%edx
+xorl 28(%esi),%edx
+
+# qhasm: in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#1,60(<out=int32#6)
+# asm 2: movl <in15=%eax,60(<out=%edi)
+movl %eax,60(%edi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#2,44(<out=int32#6)
+# asm 2: movl <in11=%ecx,44(<out=%edi)
+movl %ecx,44(%edi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#3,28(<out=int32#6)
+# asm 2: movl <in7=%edx,28(<out=%edi)
+movl %edx,28(%edi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm:       out = ctarget
+# asm 1: movl <ctarget=stack32#6,>out=int32#6
+# asm 2: movl <ctarget=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     eax = eax_stack
+# asm 1: movl <eax_stack=stack32#1,>eax=int32#1
+# asm 2: movl <eax_stack=0(%esp),>eax=%eax
+movl 0(%esp),%eax
+
+# qhasm:     ebx = ebx_stack
+# asm 1: movl <ebx_stack=stack32#2,>ebx=int32#4
+# asm 2: movl <ebx_stack=4(%esp),>ebx=%ebx
+movl 4(%esp),%ebx
+
+# qhasm:     esi = esi_stack
+# asm 1: movl <esi_stack=stack32#3,>esi=int32#5
+# asm 2: movl <esi_stack=8(%esp),>esi=%esi
+movl 8(%esp),%esi
+
+# qhasm:     edi = edi_stack
+# asm 1: movl <edi_stack=stack32#4,>edi=int32#6
+# asm 2: movl <edi_stack=12(%esp),>edi=%edi
+movl 12(%esp),%edi
+
+# qhasm:     ebp = ebp_stack
+# asm 1: movl <ebp_stack=stack32#5,>ebp=int32#7
+# asm 2: movl <ebp_stack=16(%esp),>ebp=%ebp
+movl 16(%esp),%ebp
+
+# qhasm:     leave
+add %eax,%esp
+xor %eax,%eax
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int32#1
+# asm 2: sub  $64,<bytes=%eax
+sub  $64,%eax
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int32#6
+# asm 2: add  $64,<out=%edi
+add  $64,%edi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int32#5
+# asm 2: add  $64,<m=%esi
+add  $64,%esi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/api.h b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/implementors b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/stream.s b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/stream.s
new file mode 100644
index 00000000..0e26dc9f
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/amd64_xmm6/stream.s
@@ -0,0 +1,4823 @@
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 a
+
+# qhasm: int64 arg1
+
+# qhasm: int64 arg2
+
+# qhasm: int64 arg3
+
+# qhasm: int64 arg4
+
+# qhasm: int64 arg5
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: int64 k
+
+# qhasm: int64 kbits
+
+# qhasm: int64 iv
+
+# qhasm: int64 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int64 m
+
+# qhasm: int64 out
+
+# qhasm: int64 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: int6464 y0
+
+# qhasm: int6464 y1
+
+# qhasm: int6464 y2
+
+# qhasm: int6464 y3
+
+# qhasm: int6464 y4
+
+# qhasm: int6464 y5
+
+# qhasm: int6464 y6
+
+# qhasm: int6464 y7
+
+# qhasm: int6464 y8
+
+# qhasm: int6464 y9
+
+# qhasm: int6464 y10
+
+# qhasm: int6464 y11
+
+# qhasm: int6464 y12
+
+# qhasm: int6464 y13
+
+# qhasm: int6464 y14
+
+# qhasm: int6464 y15
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int64 in0
+
+# qhasm: int64 in1
+
+# qhasm: int64 in2
+
+# qhasm: int64 in3
+
+# qhasm: int64 in4
+
+# qhasm: int64 in5
+
+# qhasm: int64 in6
+
+# qhasm: int64 in7
+
+# qhasm: int64 in8
+
+# qhasm: int64 in9
+
+# qhasm: int64 in10
+
+# qhasm: int64 in11
+
+# qhasm: int64 in12
+
+# qhasm: int64 in13
+
+# qhasm: int64 in14
+
+# qhasm: int64 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: int64 ctarget
+
+# qhasm: stack64 bytes_backup
+
+# qhasm: enter crypto_stream_salsa2012_amd64_xmm6
+.text
+.p2align 5
+.globl _crypto_stream_salsa2012_amd64_xmm6
+.globl crypto_stream_salsa2012_amd64_xmm6
+_crypto_stream_salsa2012_amd64_xmm6:
+crypto_stream_salsa2012_amd64_xmm6:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: bytes = arg2
+# asm 1: mov  <arg2=int64#2,>bytes=int64#6
+# asm 2: mov  <arg2=%rsi,>bytes=%r9
+mov  %rsi,%r9
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm: iv = arg3
+# asm 1: mov  <arg3=int64#3,>iv=int64#3
+# asm 2: mov  <arg3=%rdx,>iv=%rdx
+mov  %rdx,%rdx
+
+# qhasm: k = arg4
+# asm 1: mov  <arg4=int64#4,>k=int64#8
+# asm 2: mov  <arg4=%rcx,>k=%r10
+mov  %rcx,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int64#7
+# asm 2: mov  $0,>a=%rax
+mov  $0,%rax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: sub  <bytes=int64#6,<out=int64#1
+# asm 2: sub  <bytes=%r9,<out=%rdi
+sub  %r9,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa2012_amd64_xmm6_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa2012_amd64_xmm6_xor
+.globl crypto_stream_salsa2012_amd64_xmm6_xor
+_crypto_stream_salsa2012_amd64_xmm6_xor:
+crypto_stream_salsa2012_amd64_xmm6_xor:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = arg2
+# asm 1: mov  <arg2=int64#2,>m=int64#2
+# asm 2: mov  <arg2=%rsi,>m=%rsi
+mov  %rsi,%rsi
+
+# qhasm: bytes = arg3
+# asm 1: mov  <arg3=int64#3,>bytes=int64#6
+# asm 2: mov  <arg3=%rdx,>bytes=%r9
+mov  %rdx,%r9
+
+# qhasm: iv = arg4
+# asm 1: mov  <arg4=int64#4,>iv=int64#3
+# asm 2: mov  <arg4=%rcx,>iv=%rdx
+mov  %rcx,%rdx
+
+# qhasm: k = arg5
+# asm 1: mov  <arg5=int64#5,>k=int64#8
+# asm 2: mov  <arg5=%r8,>k=%r10
+mov  %r8,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl   20(<k=int64#8),>in12=int64#4d
+# asm 2: movl   20(<k=%r10),>in12=%ecx
+movl   20(%r10),%ecx
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl   0(<k=int64#8),>in1=int64#5d
+# asm 2: movl   0(<k=%r10),>in1=%r8d
+movl   0(%r10),%r8d
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl   0(<iv=int64#3),>in6=int64#7d
+# asm 2: movl   0(<iv=%rdx),>in6=%eax
+movl   0(%rdx),%eax
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl   16(<k=int64#8),>in11=int64#9d
+# asm 2: movl   16(<k=%r10),>in11=%r11d
+movl   16(%r10),%r11d
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int64#4d,>x1=stack128#1
+# asm 2: movl <in12=%ecx,>x1=0(%rsp)
+movl %ecx,0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int64#5d,4+<x1=stack128#1
+# asm 2: movl <in1=%r8d,4+<x1=0(%rsp)
+movl %r8d,4+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int64#7d,8+<x1=stack128#1
+# asm 2: movl <in6=%eax,8+<x1=0(%rsp)
+movl %eax,8+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int64#9d,12+<x1=stack128#1
+# asm 2: movl <in11=%r11d,12+<x1=0(%rsp)
+movl %r11d,12+0(%rsp)
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int64#4
+# asm 2: mov  $0,>in8=%rcx
+mov  $0,%rcx
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl   24(<k=int64#8),>in13=int64#5d
+# asm 2: movl   24(<k=%r10),>in13=%r8d
+movl   24(%r10),%r8d
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl   4(<k=int64#8),>in2=int64#7d
+# asm 2: movl   4(<k=%r10),>in2=%eax
+movl   4(%r10),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl   4(<iv=int64#3),>in7=int64#3d
+# asm 2: movl   4(<iv=%rdx),>in7=%edx
+movl   4(%rdx),%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int64#5d,4+<x2=stack128#2
+# asm 2: movl <in13=%r8d,4+<x2=16(%rsp)
+movl %r8d,4+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int64#7d,8+<x2=stack128#2
+# asm 2: movl <in2=%eax,8+<x2=16(%rsp)
+movl %eax,8+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int64#3d,12+<x2=stack128#2
+# asm 2: movl <in7=%edx,12+<x2=16(%rsp)
+movl %edx,12+16(%rsp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl   12(<k=int64#8),>in4=int64#3d
+# asm 2: movl   12(<k=%r10),>in4=%edx
+movl   12(%r10),%edx
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int64#4
+# asm 2: mov  $0,>in9=%rcx
+mov  $0,%rcx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl   28(<k=int64#8),>in14=int64#5d
+# asm 2: movl   28(<k=%r10),>in14=%r8d
+movl   28(%r10),%r8d
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl   8(<k=int64#8),>in3=int64#7d
+# asm 2: movl   8(<k=%r10),>in3=%eax
+movl   8(%r10),%eax
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int64#3d,>x3=stack128#3
+# asm 2: movl <in4=%edx,>x3=32(%rsp)
+movl %edx,32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int64#5d,8+<x3=stack128#3
+# asm 2: movl <in14=%r8d,8+<x3=32(%rsp)
+movl %r8d,8+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int64#7d,12+<x3=stack128#3
+# asm 2: movl <in3=%eax,12+<x3=32(%rsp)
+movl %eax,12+32(%rsp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int64#3
+# asm 2: mov  $1634760805,>in0=%rdx
+mov  $1634760805,%rdx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int64#4
+# asm 2: mov  $857760878,>in5=%rcx
+mov  $857760878,%rcx
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int64#5
+# asm 2: mov  $2036477234,>in10=%r8
+mov  $2036477234,%r8
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int64#7
+# asm 2: mov  $1797285236,>in15=%rax
+mov  $1797285236,%rax
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int64#3d,>x0=stack128#4
+# asm 2: movl <in0=%edx,>x0=48(%rsp)
+movl %edx,48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int64#4d,4+<x0=stack128#4
+# asm 2: movl <in5=%ecx,4+<x0=48(%rsp)
+movl %ecx,4+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int64#5d,8+<x0=stack128#4
+# asm 2: movl <in10=%r8d,8+<x0=48(%rsp)
+movl %r8d,8+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int64#7d,12+<x0=stack128#4
+# asm 2: movl <in15=%eax,12+<x0=48(%rsp)
+movl %eax,12+48(%rsp)
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#4,>z0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>z0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=64(%rsp)
+movdqa %xmm1,64(%rsp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=80(%rsp)
+movdqa %xmm2,80(%rsp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=96(%rsp)
+movdqa %xmm3,96(%rsp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=112(%rsp)
+movdqa %xmm0,112(%rsp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#1,>z1=int6464#1
+# asm 2: movdqa <x1=0(%rsp),>z1=%xmm0
+movdqa 0(%rsp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=128(%rsp)
+movdqa %xmm1,128(%rsp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=144(%rsp)
+movdqa %xmm2,144(%rsp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=160(%rsp)
+movdqa %xmm3,160(%rsp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=176(%rsp)
+movdqa %xmm0,176(%rsp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#2,>z2=int6464#1
+# asm 2: movdqa <x2=16(%rsp),>z2=%xmm0
+movdqa 16(%rsp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=192(%rsp)
+movdqa %xmm1,192(%rsp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=208(%rsp)
+movdqa %xmm2,208(%rsp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=224(%rsp)
+movdqa %xmm0,224(%rsp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#3,>z3=int6464#1
+# asm 2: movdqa <x3=32(%rsp),>z3=%xmm0
+movdqa 32(%rsp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=240(%rsp)
+movdqa %xmm1,240(%rsp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=256(%rsp)
+movdqa %xmm2,256(%rsp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=272(%rsp)
+movdqa %xmm0,272(%rsp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#3d
+# asm 2: movl <x2=16(%rsp),>in8=%edx
+movl 16(%rsp),%edx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#4d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%ecx
+movl 4+32(%rsp),%ecx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int64#3d,>orig8=stack128#19
+# asm 2: movl <in8=%edx,>orig8=288(%rsp)
+movl %edx,288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int64#4d,>orig9=stack128#20
+# asm 2: movl <in9=%ecx,>orig9=304(%rsp)
+movl %ecx,304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int64#3d,4+<orig8=stack128#19
+# asm 2: movl <in8=%edx,4+<orig8=288(%rsp)
+movl %edx,4+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,4+<orig9=304(%rsp)
+movl %ecx,4+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int64#3d,8+<orig8=stack128#19
+# asm 2: movl <in8=%edx,8+<orig8=288(%rsp)
+movl %edx,8+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int64#4d,8+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,8+<orig9=304(%rsp)
+movl %ecx,8+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int64#3d,12+<orig8=stack128#19
+# asm 2: movl <in8=%edx,12+<orig8=288(%rsp)
+movl %edx,12+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int64#4d,12+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,12+<orig9=304(%rsp)
+movl %ecx,12+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#3d,>x2=stack128#2
+# asm 2: movl <in8=%edx,>x2=16(%rsp)
+movl %edx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: i = 12
+# asm 1: mov  $12,>i=int64#3
+# asm 2: mov  $12,>i=%rdx
+mov  $12,%rdx
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=64(%rsp),>z5=%xmm0
+movdqa 64(%rsp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=80(%rsp),>z10=%xmm1
+movdqa 80(%rsp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=96(%rsp),>z15=%xmm2
+movdqa 96(%rsp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=256(%rsp),>z14=%xmm3
+movdqa 256(%rsp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=272(%rsp),>z3=%xmm4
+movdqa 272(%rsp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=128(%rsp),>z6=%xmm5
+movdqa 128(%rsp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=144(%rsp),>z11=%xmm6
+movdqa 144(%rsp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=176(%rsp),>z1=%xmm7
+movdqa 176(%rsp),%xmm7
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#9
+# asm 2: movdqa <orig7=192(%rsp),>z7=%xmm8
+movdqa 192(%rsp),%xmm8
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#10
+# asm 2: movdqa <orig13=208(%rsp),>z13=%xmm9
+movdqa 208(%rsp),%xmm9
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#11
+# asm 2: movdqa <orig2=224(%rsp),>z2=%xmm10
+movdqa 224(%rsp),%xmm10
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#12
+# asm 2: movdqa <orig9=304(%rsp),>z9=%xmm11
+movdqa 304(%rsp),%xmm11
+
+# qhasm:   z0 = orig0
+# asm 1: movdqa <orig0=stack128#8,>z0=int6464#13
+# asm 2: movdqa <orig0=112(%rsp),>z0=%xmm12
+movdqa 112(%rsp),%xmm12
+
+# qhasm:   z12 = orig12
+# asm 1: movdqa <orig12=stack128#11,>z12=int6464#14
+# asm 2: movdqa <orig12=160(%rsp),>z12=%xmm13
+movdqa 160(%rsp),%xmm13
+
+# qhasm:   z4 = orig4
+# asm 1: movdqa <orig4=stack128#16,>z4=int6464#15
+# asm 2: movdqa <orig4=240(%rsp),>z4=%xmm14
+movdqa 240(%rsp),%xmm14
+
+# qhasm:   z8 = orig8
+# asm 1: movdqa <orig8=stack128#19,>z8=int6464#16
+# asm 2: movdqa <orig8=288(%rsp),>z8=%xmm15
+movdqa 288(%rsp),%xmm15
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 		y4 = z12
+# asm 1: movdqa <z12=int6464#14,>y4=int6464#2
+# asm 2: movdqa <z12=%xmm13,>y4=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: uint32323232	y4 += z0
+# asm 1: paddd <z0=int6464#13,<y4=int6464#2
+# asm 2: paddd <z0=%xmm12,<y4=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 		r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y4 <<= 7
+# asm 1: pslld $7,<y4=int6464#2
+# asm 2: pslld $7,<y4=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232	r4 >>= 25
+# asm 1: psrld $25,<r4=int6464#3
+# asm 2: psrld $25,<r4=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 		z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 				y9 = z1
+# asm 1: movdqa <z1=int6464#8,>y9=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y9=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232			y9 += z5
+# asm 1: paddd <z5=int6464#1,<y9=int6464#2
+# asm 2: paddd <z5=%xmm0,<y9=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 				r9 = y9
+# asm 1: movdqa <y9=int6464#2,>r9=int6464#3
+# asm 2: movdqa <y9=%xmm1,>r9=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y9 <<= 7
+# asm 1: pslld $7,<y9=int6464#2
+# asm 2: pslld $7,<y9=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z9 ^= y9
+# asm 1: pxor  <y9=int6464#2,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm1,<z9=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm: uint32323232			r9 >>= 25
+# asm 1: psrld $25,<r9=int6464#3
+# asm 2: psrld $25,<r9=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z9 ^= r9
+# asm 1: pxor  <r9=int6464#3,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm2,<z9=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm: 		y8 = z0
+# asm 1: movdqa <z0=int6464#13,>y8=int6464#2
+# asm 2: movdqa <z0=%xmm12,>y8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232	y8 += z4
+# asm 1: paddd <z4=int6464#15,<y8=int6464#2
+# asm 2: paddd <z4=%xmm14,<y8=%xmm1
+paddd %xmm14,%xmm1
+
+# qhasm: 		r8 = y8
+# asm 1: movdqa <y8=int6464#2,>r8=int6464#3
+# asm 2: movdqa <y8=%xmm1,>r8=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#2
+# asm 2: pslld $9,<y8=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z8 ^= y8
+# asm 1: pxor  <y8=int6464#2,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm1,<z8=%xmm15
+pxor  %xmm1,%xmm15
+
+# qhasm: uint32323232	r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#3
+# asm 2: psrld $23,<r8=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z8 ^= r8
+# asm 1: pxor  <r8=int6464#3,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm2,<z8=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm: 				y13 = z5
+# asm 1: movdqa <z5=int6464#1,>y13=int6464#2
+# asm 2: movdqa <z5=%xmm0,>y13=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232			y13 += z9
+# asm 1: paddd <z9=int6464#12,<y13=int6464#2
+# asm 2: paddd <z9=%xmm11,<y13=%xmm1
+paddd %xmm11,%xmm1
+
+# qhasm: 				r13 = y13
+# asm 1: movdqa <y13=int6464#2,>r13=int6464#3
+# asm 2: movdqa <y13=%xmm1,>r13=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#2
+# asm 2: pslld $9,<y13=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z13 ^= y13
+# asm 1: pxor  <y13=int6464#2,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm1,<z13=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm: uint32323232			r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#3
+# asm 2: psrld $23,<r13=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z13 ^= r13
+# asm 1: pxor  <r13=int6464#3,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm2,<z13=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm: 		y12 = z4
+# asm 1: movdqa <z4=int6464#15,>y12=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y12=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232	y12 += z8
+# asm 1: paddd <z8=int6464#16,<y12=int6464#2
+# asm 2: paddd <z8=%xmm15,<y12=%xmm1
+paddd %xmm15,%xmm1
+
+# qhasm: 		r12 = y12
+# asm 1: movdqa <y12=int6464#2,>r12=int6464#3
+# asm 2: movdqa <y12=%xmm1,>r12=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y12 <<= 13
+# asm 1: pslld $13,<y12=int6464#2
+# asm 2: pslld $13,<y12=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z12 ^= y12
+# asm 1: pxor  <y12=int6464#2,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm1,<z12=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm: uint32323232	r12 >>= 19
+# asm 1: psrld $19,<r12=int6464#3
+# asm 2: psrld $19,<r12=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z12 ^= r12
+# asm 1: pxor  <r12=int6464#3,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm2,<z12=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm: 				y1 = z9
+# asm 1: movdqa <z9=int6464#12,>y1=int6464#2
+# asm 2: movdqa <z9=%xmm11,>y1=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: uint32323232			y1 += z13
+# asm 1: paddd <z13=int6464#10,<y1=int6464#2
+# asm 2: paddd <z13=%xmm9,<y1=%xmm1
+paddd %xmm9,%xmm1
+
+# qhasm: 				r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#3
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y1 <<= 13
+# asm 1: pslld $13,<y1=int6464#2
+# asm 2: pslld $13,<y1=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232			r1 >>= 19
+# asm 1: psrld $19,<r1=int6464#3
+# asm 2: psrld $19,<r1=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z1 ^= r1
+# asm 1: pxor  <r1=int6464#3,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm2,<z1=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm: 		y0 = z8
+# asm 1: movdqa <z8=int6464#16,>y0=int6464#2
+# asm 2: movdqa <z8=%xmm15,>y0=%xmm1
+movdqa %xmm15,%xmm1
+
+# qhasm: uint32323232	y0 += z12
+# asm 1: paddd <z12=int6464#14,<y0=int6464#2
+# asm 2: paddd <z12=%xmm13,<y0=%xmm1
+paddd %xmm13,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#13
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm12
+pxor  %xmm1,%xmm12
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#13
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#13,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm12,>z0_stack=320(%rsp)
+movdqa %xmm12,320(%rsp)
+
+# qhasm: 				y5 = z13
+# asm 1: movdqa <z13=int6464#10,>y5=int6464#3
+# asm 2: movdqa <z13=%xmm9,>y5=%xmm2
+movdqa %xmm9,%xmm2
+
+# qhasm: uint32323232			y5 += z1
+# asm 1: paddd <z1=int6464#8,<y5=int6464#3
+# asm 2: paddd <z1=%xmm7,<y5=%xmm2
+paddd %xmm7,%xmm2
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#3,>r5=int6464#13
+# asm 2: movdqa <y5=%xmm2,>r5=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#3
+# asm 2: pslld $18,<y5=%xmm2
+pslld $18,%xmm2
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#3,<z5=int6464#1
+# asm 2: pxor  <y5=%xmm2,<z5=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#13
+# asm 2: psrld $14,<r5=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#13,<z5=int6464#1
+# asm 2: pxor  <r5=%xmm12,<z5=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm: 						y14 = z6
+# asm 1: movdqa <z6=int6464#6,>y14=int6464#3
+# asm 2: movdqa <z6=%xmm5,>y14=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: uint32323232					y14 += z10
+# asm 1: paddd <z10=int6464#2,<y14=int6464#3
+# asm 2: paddd <z10=%xmm1,<y14=%xmm2
+paddd %xmm1,%xmm2
+
+# qhasm: 						r14 = y14
+# asm 1: movdqa <y14=int6464#3,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm2,>r14=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232					y14 <<= 7
+# asm 1: pslld $7,<y14=int6464#3
+# asm 2: pslld $7,<y14=%xmm2
+pslld $7,%xmm2
+
+# qhasm: 						z14 ^= y14
+# asm 1: pxor  <y14=int6464#3,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm2,<z14=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232					r14 >>= 25
+# asm 1: psrld $25,<r14=int6464#13
+# asm 2: psrld $25,<r14=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 						z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm0,>z5_stack=336(%rsp)
+movdqa %xmm0,336(%rsp)
+
+# qhasm: 								y3 = z11
+# asm 1: movdqa <z11=int6464#7,>y3=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y3=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232							y3 += z15
+# asm 1: paddd <z15=int6464#3,<y3=int6464#1
+# asm 2: paddd <z15=%xmm2,<y3=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r3 = y3
+# asm 1: movdqa <y3=int6464#1,>r3=int6464#13
+# asm 2: movdqa <y3=%xmm0,>r3=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y3 <<= 7
+# asm 1: pslld $7,<y3=int6464#1
+# asm 2: pslld $7,<y3=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z3 ^= y3
+# asm 1: pxor  <y3=int6464#1,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm0,<z3=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm: uint32323232							r3 >>= 25
+# asm 1: psrld $25,<r3=int6464#13
+# asm 2: psrld $25,<r3=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z3 ^= r3
+# asm 1: pxor  <r3=int6464#13,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm12,<z3=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm: 						y2 = z10
+# asm 1: movdqa <z10=int6464#2,>y2=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y2=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y2 += z14
+# asm 1: paddd <z14=int6464#4,<y2=int6464#1
+# asm 2: paddd <z14=%xmm3,<y2=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 						r2 = y2
+# asm 1: movdqa <y2=int6464#1,>r2=int6464#13
+# asm 2: movdqa <y2=%xmm0,>r2=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#1
+# asm 2: pslld $9,<y2=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z2 ^= y2
+# asm 1: pxor  <y2=int6464#1,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm0,<z2=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm: uint32323232					r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#13
+# asm 2: psrld $23,<r2=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z2 ^= r2
+# asm 1: pxor  <r2=int6464#13,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm12,<z2=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm: 								y7 = z15
+# asm 1: movdqa <z15=int6464#3,>y7=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y7=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y7 += z3
+# asm 1: paddd <z3=int6464#5,<y7=int6464#1
+# asm 2: paddd <z3=%xmm4,<y7=%xmm0
+paddd %xmm4,%xmm0
+
+# qhasm: 								r7 = y7
+# asm 1: movdqa <y7=int6464#1,>r7=int6464#13
+# asm 2: movdqa <y7=%xmm0,>r7=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#1
+# asm 2: pslld $9,<y7=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z7 ^= y7
+# asm 1: pxor  <y7=int6464#1,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm0,<z7=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm: uint32323232							r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#13
+# asm 2: psrld $23,<r7=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z7 ^= r7
+# asm 1: pxor  <r7=int6464#13,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm12,<z7=%xmm8
+pxor  %xmm12,%xmm8
+
+# qhasm: 						y6 = z14
+# asm 1: movdqa <z14=int6464#4,>y6=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y6=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232					y6 += z2
+# asm 1: paddd <z2=int6464#11,<y6=int6464#1
+# asm 2: paddd <z2=%xmm10,<y6=%xmm0
+paddd %xmm10,%xmm0
+
+# qhasm: 						r6 = y6
+# asm 1: movdqa <y6=int6464#1,>r6=int6464#13
+# asm 2: movdqa <y6=%xmm0,>r6=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y6 <<= 13
+# asm 1: pslld $13,<y6=int6464#1
+# asm 2: pslld $13,<y6=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z6 ^= y6
+# asm 1: pxor  <y6=int6464#1,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm0,<z6=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232					r6 >>= 19
+# asm 1: psrld $19,<r6=int6464#13
+# asm 2: psrld $19,<r6=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z6 ^= r6
+# asm 1: pxor  <r6=int6464#13,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm12,<z6=%xmm5
+pxor  %xmm12,%xmm5
+
+# qhasm: 								y11 = z3
+# asm 1: movdqa <z3=int6464#5,>y11=int6464#1
+# asm 2: movdqa <z3=%xmm4,>y11=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232							y11 += z7
+# asm 1: paddd <z7=int6464#9,<y11=int6464#1
+# asm 2: paddd <z7=%xmm8,<y11=%xmm0
+paddd %xmm8,%xmm0
+
+# qhasm: 								r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#13
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y11 <<= 13
+# asm 1: pslld $13,<y11=int6464#1
+# asm 2: pslld $13,<y11=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232							r11 >>= 19
+# asm 1: psrld $19,<r11=int6464#13
+# asm 2: psrld $19,<r11=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z11 ^= r11
+# asm 1: pxor  <r11=int6464#13,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm12,<z11=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm: 						y10 = z2
+# asm 1: movdqa <z2=int6464#11,>y10=int6464#1
+# asm 2: movdqa <z2=%xmm10,>y10=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: uint32323232					y10 += z6
+# asm 1: paddd <z6=int6464#6,<y10=int6464#1
+# asm 2: paddd <z6=%xmm5,<y10=%xmm0
+paddd %xmm5,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#1
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm0
+movdqa 320(%rsp),%xmm0
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 		y1 = z3
+# asm 1: movdqa <z3=int6464#5,>y1=int6464#2
+# asm 2: movdqa <z3=%xmm4,>y1=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: uint32323232	y1 += z0
+# asm 1: paddd <z0=int6464#1,<y1=int6464#2
+# asm 2: paddd <z0=%xmm0,<y1=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 		r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#13
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232	y1 <<= 7
+# asm 1: pslld $7,<y1=int6464#2
+# asm 2: pslld $7,<y1=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232	r1 >>= 25
+# asm 1: psrld $25,<r1=int6464#13
+# asm 2: psrld $25,<r1=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 		z1 ^= r1
+# asm 1: pxor  <r1=int6464#13,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm12,<z1=%xmm7
+pxor  %xmm12,%xmm7
+
+# qhasm: 								y15 = z7
+# asm 1: movdqa <z7=int6464#9,>y15=int6464#2
+# asm 2: movdqa <z7=%xmm8,>y15=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: uint32323232							y15 += z11
+# asm 1: paddd <z11=int6464#7,<y15=int6464#2
+# asm 2: paddd <z11=%xmm6,<y15=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#2,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm1,>r15=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#2
+# asm 2: pslld $18,<y15=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#2,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm1,<z15=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#13
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm12
+movdqa 336(%rsp),%xmm12
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 				y6 = z4
+# asm 1: movdqa <z4=int6464#15,>y6=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y6=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232			y6 += z5
+# asm 1: paddd <z5=int6464#13,<y6=int6464#2
+# asm 2: paddd <z5=%xmm12,<y6=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 				r6 = y6
+# asm 1: movdqa <y6=int6464#2,>r6=int6464#3
+# asm 2: movdqa <y6=%xmm1,>r6=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y6 <<= 7
+# asm 1: pslld $7,<y6=int6464#2
+# asm 2: pslld $7,<y6=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z6 ^= y6
+# asm 1: pxor  <y6=int6464#2,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm1,<z6=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm: uint32323232			r6 >>= 25
+# asm 1: psrld $25,<r6=int6464#3
+# asm 2: psrld $25,<r6=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z6 ^= r6
+# asm 1: pxor  <r6=int6464#3,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm2,<z6=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm: 		y2 = z0
+# asm 1: movdqa <z0=int6464#1,>y2=int6464#2
+# asm 2: movdqa <z0=%xmm0,>y2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232	y2 += z1
+# asm 1: paddd <z1=int6464#8,<y2=int6464#2
+# asm 2: paddd <z1=%xmm7,<y2=%xmm1
+paddd %xmm7,%xmm1
+
+# qhasm: 		r2 = y2
+# asm 1: movdqa <y2=int6464#2,>r2=int6464#3
+# asm 2: movdqa <y2=%xmm1,>r2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#2
+# asm 2: pslld $9,<y2=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z2 ^= y2
+# asm 1: pxor  <y2=int6464#2,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm1,<z2=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm: uint32323232	r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#3
+# asm 2: psrld $23,<r2=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z2 ^= r2
+# asm 1: pxor  <r2=int6464#3,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm2,<z2=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm: 				y7 = z5
+# asm 1: movdqa <z5=int6464#13,>y7=int6464#2
+# asm 2: movdqa <z5=%xmm12,>y7=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232			y7 += z6
+# asm 1: paddd <z6=int6464#6,<y7=int6464#2
+# asm 2: paddd <z6=%xmm5,<y7=%xmm1
+paddd %xmm5,%xmm1
+
+# qhasm: 				r7 = y7
+# asm 1: movdqa <y7=int6464#2,>r7=int6464#3
+# asm 2: movdqa <y7=%xmm1,>r7=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#2
+# asm 2: pslld $9,<y7=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z7 ^= y7
+# asm 1: pxor  <y7=int6464#2,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm1,<z7=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm: uint32323232			r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#3
+# asm 2: psrld $23,<r7=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z7 ^= r7
+# asm 1: pxor  <r7=int6464#3,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm2,<z7=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm: 		y3 = z1
+# asm 1: movdqa <z1=int6464#8,>y3=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y3=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232	y3 += z2
+# asm 1: paddd <z2=int6464#11,<y3=int6464#2
+# asm 2: paddd <z2=%xmm10,<y3=%xmm1
+paddd %xmm10,%xmm1
+
+# qhasm: 		r3 = y3
+# asm 1: movdqa <y3=int6464#2,>r3=int6464#3
+# asm 2: movdqa <y3=%xmm1,>r3=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y3 <<= 13
+# asm 1: pslld $13,<y3=int6464#2
+# asm 2: pslld $13,<y3=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z3 ^= y3
+# asm 1: pxor  <y3=int6464#2,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm1,<z3=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm: uint32323232	r3 >>= 19
+# asm 1: psrld $19,<r3=int6464#3
+# asm 2: psrld $19,<r3=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z3 ^= r3
+# asm 1: pxor  <r3=int6464#3,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm2,<z3=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm: 				y4 = z6
+# asm 1: movdqa <z6=int6464#6,>y4=int6464#2
+# asm 2: movdqa <z6=%xmm5,>y4=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: uint32323232			y4 += z7
+# asm 1: paddd <z7=int6464#9,<y4=int6464#2
+# asm 2: paddd <z7=%xmm8,<y4=%xmm1
+paddd %xmm8,%xmm1
+
+# qhasm: 				r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y4 <<= 13
+# asm 1: pslld $13,<y4=int6464#2
+# asm 2: pslld $13,<y4=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232			r4 >>= 19
+# asm 1: psrld $19,<r4=int6464#3
+# asm 2: psrld $19,<r4=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 		y0 = z2
+# asm 1: movdqa <z2=int6464#11,>y0=int6464#2
+# asm 2: movdqa <z2=%xmm10,>y0=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: uint32323232	y0 += z3
+# asm 1: paddd <z3=int6464#5,<y0=int6464#2
+# asm 2: paddd <z3=%xmm4,<y0=%xmm1
+paddd %xmm4,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#1
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#1
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm0,>z0_stack=320(%rsp)
+movdqa %xmm0,320(%rsp)
+
+# qhasm: 				y5 = z7
+# asm 1: movdqa <z7=int6464#9,>y5=int6464#1
+# asm 2: movdqa <z7=%xmm8,>y5=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm: uint32323232			y5 += z4
+# asm 1: paddd <z4=int6464#15,<y5=int6464#1
+# asm 2: paddd <z4=%xmm14,<y5=%xmm0
+paddd %xmm14,%xmm0
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#1,>r5=int6464#3
+# asm 2: movdqa <y5=%xmm0,>r5=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#1
+# asm 2: pslld $18,<y5=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#1,<z5=int6464#13
+# asm 2: pxor  <y5=%xmm0,<z5=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#3
+# asm 2: psrld $14,<r5=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#3,<z5=int6464#13
+# asm 2: pxor  <r5=%xmm2,<z5=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						y11 = z9
+# asm 1: movdqa <z9=int6464#12,>y11=int6464#1
+# asm 2: movdqa <z9=%xmm11,>y11=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm: uint32323232					y11 += z10
+# asm 1: paddd <z10=int6464#2,<y11=int6464#1
+# asm 2: paddd <z10=%xmm1,<y11=%xmm0
+paddd %xmm1,%xmm0
+
+# qhasm: 						r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#3
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232					y11 <<= 7
+# asm 1: pslld $7,<y11=int6464#1
+# asm 2: pslld $7,<y11=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 						z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232					r11 >>= 25
+# asm 1: psrld $25,<r11=int6464#3
+# asm 2: psrld $25,<r11=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 						z11 ^= r11
+# asm 1: pxor  <r11=int6464#3,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm2,<z11=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#13,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm12,>z5_stack=336(%rsp)
+movdqa %xmm12,336(%rsp)
+
+# qhasm: 								y12 = z14
+# asm 1: movdqa <z14=int6464#4,>y12=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y12=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232							y12 += z15
+# asm 1: paddd <z15=int6464#3,<y12=int6464#1
+# asm 2: paddd <z15=%xmm2,<y12=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r12 = y12
+# asm 1: movdqa <y12=int6464#1,>r12=int6464#13
+# asm 2: movdqa <y12=%xmm0,>r12=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y12 <<= 7
+# asm 1: pslld $7,<y12=int6464#1
+# asm 2: pslld $7,<y12=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z12 ^= y12
+# asm 1: pxor  <y12=int6464#1,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm0,<z12=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm: uint32323232							r12 >>= 25
+# asm 1: psrld $25,<r12=int6464#13
+# asm 2: psrld $25,<r12=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z12 ^= r12
+# asm 1: pxor  <r12=int6464#13,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm12,<z12=%xmm13
+pxor  %xmm12,%xmm13
+
+# qhasm: 						y8 = z10
+# asm 1: movdqa <z10=int6464#2,>y8=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y8=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y8 += z11
+# asm 1: paddd <z11=int6464#7,<y8=int6464#1
+# asm 2: paddd <z11=%xmm6,<y8=%xmm0
+paddd %xmm6,%xmm0
+
+# qhasm: 						r8 = y8
+# asm 1: movdqa <y8=int6464#1,>r8=int6464#13
+# asm 2: movdqa <y8=%xmm0,>r8=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#1
+# asm 2: pslld $9,<y8=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z8 ^= y8
+# asm 1: pxor  <y8=int6464#1,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm0,<z8=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm: uint32323232					r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#13
+# asm 2: psrld $23,<r8=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z8 ^= r8
+# asm 1: pxor  <r8=int6464#13,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm12,<z8=%xmm15
+pxor  %xmm12,%xmm15
+
+# qhasm: 								y13 = z15
+# asm 1: movdqa <z15=int6464#3,>y13=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y13=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y13 += z12
+# asm 1: paddd <z12=int6464#14,<y13=int6464#1
+# asm 2: paddd <z12=%xmm13,<y13=%xmm0
+paddd %xmm13,%xmm0
+
+# qhasm: 								r13 = y13
+# asm 1: movdqa <y13=int6464#1,>r13=int6464#13
+# asm 2: movdqa <y13=%xmm0,>r13=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#1
+# asm 2: pslld $9,<y13=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z13 ^= y13
+# asm 1: pxor  <y13=int6464#1,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm0,<z13=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm: uint32323232							r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#13
+# asm 2: psrld $23,<r13=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z13 ^= r13
+# asm 1: pxor  <r13=int6464#13,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm12,<z13=%xmm9
+pxor  %xmm12,%xmm9
+
+# qhasm: 						y9 = z11
+# asm 1: movdqa <z11=int6464#7,>y9=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y9=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232					y9 += z8
+# asm 1: paddd <z8=int6464#16,<y9=int6464#1
+# asm 2: paddd <z8=%xmm15,<y9=%xmm0
+paddd %xmm15,%xmm0
+
+# qhasm: 						r9 = y9
+# asm 1: movdqa <y9=int6464#1,>r9=int6464#13
+# asm 2: movdqa <y9=%xmm0,>r9=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y9 <<= 13
+# asm 1: pslld $13,<y9=int6464#1
+# asm 2: pslld $13,<y9=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z9 ^= y9
+# asm 1: pxor  <y9=int6464#1,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm0,<z9=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm: uint32323232					r9 >>= 19
+# asm 1: psrld $19,<r9=int6464#13
+# asm 2: psrld $19,<r9=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z9 ^= r9
+# asm 1: pxor  <r9=int6464#13,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm12,<z9=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm: 								y14 = z12
+# asm 1: movdqa <z12=int6464#14,>y14=int6464#1
+# asm 2: movdqa <z12=%xmm13,>y14=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: uint32323232							y14 += z13
+# asm 1: paddd <z13=int6464#10,<y14=int6464#1
+# asm 2: paddd <z13=%xmm9,<y14=%xmm0
+paddd %xmm9,%xmm0
+
+# qhasm: 								r14 = y14
+# asm 1: movdqa <y14=int6464#1,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm0,>r14=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y14 <<= 13
+# asm 1: pslld $13,<y14=int6464#1
+# asm 2: pslld $13,<y14=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z14 ^= y14
+# asm 1: pxor  <y14=int6464#1,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm0,<z14=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm: uint32323232							r14 >>= 19
+# asm 1: psrld $19,<r14=int6464#13
+# asm 2: psrld $19,<r14=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 						y10 = z8
+# asm 1: movdqa <z8=int6464#16,>y10=int6464#1
+# asm 2: movdqa <z8=%xmm15,>y10=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm: uint32323232					y10 += z9
+# asm 1: paddd <z9=int6464#12,<y10=int6464#1
+# asm 2: paddd <z9=%xmm11,<y10=%xmm0
+paddd %xmm11,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 								y15 = z13
+# asm 1: movdqa <z13=int6464#10,>y15=int6464#1
+# asm 2: movdqa <z13=%xmm9,>y15=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm: uint32323232							y15 += z14
+# asm 1: paddd <z14=int6464#4,<y15=int6464#1
+# asm 2: paddd <z14=%xmm3,<y15=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#1,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm0,>r15=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#1
+# asm 2: pslld $18,<y15=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#1,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm0,<z15=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#13
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm12
+movdqa 320(%rsp),%xmm12
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#1
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm0
+movdqa 336(%rsp),%xmm0
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int64#3
+# asm 2: sub  $2,<i=%rdx
+sub  $2,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#13
+# asm 2: paddd <orig0=112(%rsp),<z0=%xmm12
+paddd 112(%rsp),%xmm12
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#8
+# asm 2: paddd <orig1=176(%rsp),<z1=%xmm7
+paddd 176(%rsp),%xmm7
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#11
+# asm 2: paddd <orig2=224(%rsp),<z2=%xmm10
+paddd 224(%rsp),%xmm10
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#5
+# asm 2: paddd <orig3=272(%rsp),<z3=%xmm4
+paddd 272(%rsp),%xmm4
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 0(<m=%rsi),<in0=%edx
+xorl 0(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 4(<m=%rsi),<in1=%ecx
+xorl 4(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 8(<m=%rsi),<in2=%r8d
+xorl 8(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 12(<m=%rsi),<in3=%r9d
+xorl 12(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#3d,0(<out=int64#1)
+# asm 2: movl   <in0=%edx,0(<out=%rdi)
+movl   %edx,0(%rdi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#4d,4(<out=int64#1)
+# asm 2: movl   <in1=%ecx,4(<out=%rdi)
+movl   %ecx,4(%rdi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#5d,8(<out=int64#1)
+# asm 2: movl   <in2=%r8d,8(<out=%rdi)
+movl   %r8d,8(%rdi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#6d,12(<out=int64#1)
+# asm 2: movl   <in3=%r9d,12(<out=%rdi)
+movl   %r9d,12(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 64(<m=%rsi),<in0=%edx
+xorl 64(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 68(<m=%rsi),<in1=%ecx
+xorl 68(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 72(<m=%rsi),<in2=%r8d
+xorl 72(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 76(<m=%rsi),<in3=%r9d
+xorl 76(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl   <in0=int64#3d,64(<out=int64#1)
+# asm 2: movl   <in0=%edx,64(<out=%rdi)
+movl   %edx,64(%rdi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl   <in1=int64#4d,68(<out=int64#1)
+# asm 2: movl   <in1=%ecx,68(<out=%rdi)
+movl   %ecx,68(%rdi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl   <in2=int64#5d,72(<out=int64#1)
+# asm 2: movl   <in2=%r8d,72(<out=%rdi)
+movl   %r8d,72(%rdi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl   <in3=int64#6d,76(<out=int64#1)
+# asm 2: movl   <in3=%r9d,76(<out=%rdi)
+movl   %r9d,76(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 128(<m=%rsi),<in0=%edx
+xorl 128(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 132(<m=%rsi),<in1=%ecx
+xorl 132(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 136(<m=%rsi),<in2=%r8d
+xorl 136(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 140(<m=%rsi),<in3=%r9d
+xorl 140(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl   <in0=int64#3d,128(<out=int64#1)
+# asm 2: movl   <in0=%edx,128(<out=%rdi)
+movl   %edx,128(%rdi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl   <in1=int64#4d,132(<out=int64#1)
+# asm 2: movl   <in1=%ecx,132(<out=%rdi)
+movl   %ecx,132(%rdi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl   <in2=int64#5d,136(<out=int64#1)
+# asm 2: movl   <in2=%r8d,136(<out=%rdi)
+movl   %r8d,136(%rdi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl   <in3=int64#6d,140(<out=int64#1)
+# asm 2: movl   <in3=%r9d,140(<out=%rdi)
+movl   %r9d,140(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 192(<m=%rsi),<in0=%edx
+xorl 192(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 196(<m=%rsi),<in1=%ecx
+xorl 196(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 200(<m=%rsi),<in2=%r8d
+xorl 200(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 204(<m=%rsi),<in3=%r9d
+xorl 204(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl   <in0=int64#3d,192(<out=int64#1)
+# asm 2: movl   <in0=%edx,192(<out=%rdi)
+movl   %edx,192(%rdi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl   <in1=int64#4d,196(<out=int64#1)
+# asm 2: movl   <in1=%ecx,196(<out=%rdi)
+movl   %ecx,196(%rdi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl   <in2=int64#5d,200(<out=int64#1)
+# asm 2: movl   <in2=%r8d,200(<out=%rdi)
+movl   %r8d,200(%rdi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl   <in3=int64#6d,204(<out=int64#1)
+# asm 2: movl   <in3=%r9d,204(<out=%rdi)
+movl   %r9d,204(%rdi)
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#15
+# asm 2: paddd <orig4=240(%rsp),<z4=%xmm14
+paddd 240(%rsp),%xmm14
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#1
+# asm 2: paddd <orig5=64(%rsp),<z5=%xmm0
+paddd 64(%rsp),%xmm0
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#6
+# asm 2: paddd <orig6=128(%rsp),<z6=%xmm5
+paddd 128(%rsp),%xmm5
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#9
+# asm 2: paddd <orig7=192(%rsp),<z7=%xmm8
+paddd 192(%rsp),%xmm8
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 16(<m=%rsi),<in4=%edx
+xorl 16(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#3d,16(<out=int64#1)
+# asm 2: movl   <in4=%edx,16(<out=%rdi)
+movl   %edx,16(%rdi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 80(<m=%rsi),<in4=%edx
+xorl 80(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 84(<m=%rsi),<in5=%ecx
+xorl 84(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 88(<m=%rsi),<in6=%r8d
+xorl 88(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 92(<m=%rsi),<in7=%r9d
+xorl 92(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl   <in4=int64#3d,80(<out=int64#1)
+# asm 2: movl   <in4=%edx,80(<out=%rdi)
+movl   %edx,80(%rdi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl   <in5=int64#4d,84(<out=int64#1)
+# asm 2: movl   <in5=%ecx,84(<out=%rdi)
+movl   %ecx,84(%rdi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl   <in6=int64#5d,88(<out=int64#1)
+# asm 2: movl   <in6=%r8d,88(<out=%rdi)
+movl   %r8d,88(%rdi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl   <in7=int64#6d,92(<out=int64#1)
+# asm 2: movl   <in7=%r9d,92(<out=%rdi)
+movl   %r9d,92(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 144(<m=%rsi),<in4=%edx
+xorl 144(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 148(<m=%rsi),<in5=%ecx
+xorl 148(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 152(<m=%rsi),<in6=%r8d
+xorl 152(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 156(<m=%rsi),<in7=%r9d
+xorl 156(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl   <in4=int64#3d,144(<out=int64#1)
+# asm 2: movl   <in4=%edx,144(<out=%rdi)
+movl   %edx,144(%rdi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl   <in5=int64#4d,148(<out=int64#1)
+# asm 2: movl   <in5=%ecx,148(<out=%rdi)
+movl   %ecx,148(%rdi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl   <in6=int64#5d,152(<out=int64#1)
+# asm 2: movl   <in6=%r8d,152(<out=%rdi)
+movl   %r8d,152(%rdi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl   <in7=int64#6d,156(<out=int64#1)
+# asm 2: movl   <in7=%r9d,156(<out=%rdi)
+movl   %r9d,156(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 208(<m=%rsi),<in4=%edx
+xorl 208(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 212(<m=%rsi),<in5=%ecx
+xorl 212(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 216(<m=%rsi),<in6=%r8d
+xorl 216(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 220(<m=%rsi),<in7=%r9d
+xorl 220(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl   <in4=int64#3d,208(<out=int64#1)
+# asm 2: movl   <in4=%edx,208(<out=%rdi)
+movl   %edx,208(%rdi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl   <in5=int64#4d,212(<out=int64#1)
+# asm 2: movl   <in5=%ecx,212(<out=%rdi)
+movl   %ecx,212(%rdi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl   <in6=int64#5d,216(<out=int64#1)
+# asm 2: movl   <in6=%r8d,216(<out=%rdi)
+movl   %r8d,216(%rdi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl   <in7=int64#6d,220(<out=int64#1)
+# asm 2: movl   <in7=%r9d,220(<out=%rdi)
+movl   %r9d,220(%rdi)
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#16
+# asm 2: paddd <orig8=288(%rsp),<z8=%xmm15
+paddd 288(%rsp),%xmm15
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#12
+# asm 2: paddd <orig9=304(%rsp),<z9=%xmm11
+paddd 304(%rsp),%xmm11
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#2
+# asm 2: paddd <orig10=80(%rsp),<z10=%xmm1
+paddd 80(%rsp),%xmm1
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#7
+# asm 2: paddd <orig11=144(%rsp),<z11=%xmm6
+paddd 144(%rsp),%xmm6
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 32(<m=%rsi),<in8=%edx
+xorl 32(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 36(<m=%rsi),<in9=%ecx
+xorl 36(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 40(<m=%rsi),<in10=%r8d
+xorl 40(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 44(<m=%rsi),<in11=%r9d
+xorl 44(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#3d,32(<out=int64#1)
+# asm 2: movl   <in8=%edx,32(<out=%rdi)
+movl   %edx,32(%rdi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#4d,36(<out=int64#1)
+# asm 2: movl   <in9=%ecx,36(<out=%rdi)
+movl   %ecx,36(%rdi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#5d,40(<out=int64#1)
+# asm 2: movl   <in10=%r8d,40(<out=%rdi)
+movl   %r8d,40(%rdi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#6d,44(<out=int64#1)
+# asm 2: movl   <in11=%r9d,44(<out=%rdi)
+movl   %r9d,44(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 96(<m=%rsi),<in8=%edx
+xorl 96(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 100(<m=%rsi),<in9=%ecx
+xorl 100(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 104(<m=%rsi),<in10=%r8d
+xorl 104(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 108(<m=%rsi),<in11=%r9d
+xorl 108(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl   <in8=int64#3d,96(<out=int64#1)
+# asm 2: movl   <in8=%edx,96(<out=%rdi)
+movl   %edx,96(%rdi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl   <in9=int64#4d,100(<out=int64#1)
+# asm 2: movl   <in9=%ecx,100(<out=%rdi)
+movl   %ecx,100(%rdi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl   <in10=int64#5d,104(<out=int64#1)
+# asm 2: movl   <in10=%r8d,104(<out=%rdi)
+movl   %r8d,104(%rdi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl   <in11=int64#6d,108(<out=int64#1)
+# asm 2: movl   <in11=%r9d,108(<out=%rdi)
+movl   %r9d,108(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 160(<m=%rsi),<in8=%edx
+xorl 160(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 164(<m=%rsi),<in9=%ecx
+xorl 164(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 168(<m=%rsi),<in10=%r8d
+xorl 168(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 172(<m=%rsi),<in11=%r9d
+xorl 172(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl   <in8=int64#3d,160(<out=int64#1)
+# asm 2: movl   <in8=%edx,160(<out=%rdi)
+movl   %edx,160(%rdi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl   <in9=int64#4d,164(<out=int64#1)
+# asm 2: movl   <in9=%ecx,164(<out=%rdi)
+movl   %ecx,164(%rdi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl   <in10=int64#5d,168(<out=int64#1)
+# asm 2: movl   <in10=%r8d,168(<out=%rdi)
+movl   %r8d,168(%rdi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl   <in11=int64#6d,172(<out=int64#1)
+# asm 2: movl   <in11=%r9d,172(<out=%rdi)
+movl   %r9d,172(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 224(<m=%rsi),<in8=%edx
+xorl 224(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 228(<m=%rsi),<in9=%ecx
+xorl 228(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 232(<m=%rsi),<in10=%r8d
+xorl 232(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 236(<m=%rsi),<in11=%r9d
+xorl 236(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl   <in8=int64#3d,224(<out=int64#1)
+# asm 2: movl   <in8=%edx,224(<out=%rdi)
+movl   %edx,224(%rdi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl   <in9=int64#4d,228(<out=int64#1)
+# asm 2: movl   <in9=%ecx,228(<out=%rdi)
+movl   %ecx,228(%rdi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl   <in10=int64#5d,232(<out=int64#1)
+# asm 2: movl   <in10=%r8d,232(<out=%rdi)
+movl   %r8d,232(%rdi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl   <in11=int64#6d,236(<out=int64#1)
+# asm 2: movl   <in11=%r9d,236(<out=%rdi)
+movl   %r9d,236(%rdi)
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#14
+# asm 2: paddd <orig12=160(%rsp),<z12=%xmm13
+paddd 160(%rsp),%xmm13
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#10
+# asm 2: paddd <orig13=208(%rsp),<z13=%xmm9
+paddd 208(%rsp),%xmm9
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#4
+# asm 2: paddd <orig14=256(%rsp),<z14=%xmm3
+paddd 256(%rsp),%xmm3
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#3
+# asm 2: paddd <orig15=96(%rsp),<z15=%xmm2
+paddd 96(%rsp),%xmm2
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 48(<m=%rsi),<in12=%edx
+xorl 48(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 52(<m=%rsi),<in13=%ecx
+xorl 52(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 56(<m=%rsi),<in14=%r8d
+xorl 56(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 60(<m=%rsi),<in15=%r9d
+xorl 60(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#3d,48(<out=int64#1)
+# asm 2: movl   <in12=%edx,48(<out=%rdi)
+movl   %edx,48(%rdi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#4d,52(<out=int64#1)
+# asm 2: movl   <in13=%ecx,52(<out=%rdi)
+movl   %ecx,52(%rdi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#5d,56(<out=int64#1)
+# asm 2: movl   <in14=%r8d,56(<out=%rdi)
+movl   %r8d,56(%rdi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#6d,60(<out=int64#1)
+# asm 2: movl   <in15=%r9d,60(<out=%rdi)
+movl   %r9d,60(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 112(<m=%rsi),<in12=%edx
+xorl 112(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 116(<m=%rsi),<in13=%ecx
+xorl 116(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 120(<m=%rsi),<in14=%r8d
+xorl 120(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 124(<m=%rsi),<in15=%r9d
+xorl 124(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl   <in12=int64#3d,112(<out=int64#1)
+# asm 2: movl   <in12=%edx,112(<out=%rdi)
+movl   %edx,112(%rdi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl   <in13=int64#4d,116(<out=int64#1)
+# asm 2: movl   <in13=%ecx,116(<out=%rdi)
+movl   %ecx,116(%rdi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl   <in14=int64#5d,120(<out=int64#1)
+# asm 2: movl   <in14=%r8d,120(<out=%rdi)
+movl   %r8d,120(%rdi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl   <in15=int64#6d,124(<out=int64#1)
+# asm 2: movl   <in15=%r9d,124(<out=%rdi)
+movl   %r9d,124(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 176(<m=%rsi),<in12=%edx
+xorl 176(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 180(<m=%rsi),<in13=%ecx
+xorl 180(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 184(<m=%rsi),<in14=%r8d
+xorl 184(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 188(<m=%rsi),<in15=%r9d
+xorl 188(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl   <in12=int64#3d,176(<out=int64#1)
+# asm 2: movl   <in12=%edx,176(<out=%rdi)
+movl   %edx,176(%rdi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl   <in13=int64#4d,180(<out=int64#1)
+# asm 2: movl   <in13=%ecx,180(<out=%rdi)
+movl   %ecx,180(%rdi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl   <in14=int64#5d,184(<out=int64#1)
+# asm 2: movl   <in14=%r8d,184(<out=%rdi)
+movl   %r8d,184(%rdi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl   <in15=int64#6d,188(<out=int64#1)
+# asm 2: movl   <in15=%r9d,188(<out=%rdi)
+movl   %r9d,188(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 240(<m=%rsi),<in12=%edx
+xorl 240(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 244(<m=%rsi),<in13=%ecx
+xorl 244(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 248(<m=%rsi),<in14=%r8d
+xorl 248(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 252(<m=%rsi),<in15=%r9d
+xorl 252(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl   <in12=int64#3d,240(<out=int64#1)
+# asm 2: movl   <in12=%edx,240(<out=%rdi)
+movl   %edx,240(%rdi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl   <in13=int64#4d,244(<out=int64#1)
+# asm 2: movl   <in13=%ecx,244(<out=%rdi)
+movl   %ecx,244(%rdi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl   <in14=int64#5d,248(<out=int64#1)
+# asm 2: movl   <in14=%r8d,248(<out=%rdi)
+movl   %r8d,248(%rdi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl   <in15=int64#6d,252(<out=int64#1)
+# asm 2: movl   <in15=%r9d,252(<out=%rdi)
+movl   %r9d,252(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int64#6
+# asm 2: sub  $256,<bytes=%r9
+sub  $256,%r9
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int64#2
+# asm 2: add  $256,<m=%rsi
+add  $256,%rsi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int64#1
+# asm 2: add  $256,<out=%rdi
+add  $256,%rdi
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: mov  <out=int64#1,>ctarget=int64#3
+# asm 2: mov  <out=%rdi,>ctarget=%rdx
+mov  %rdi,%rdx
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     m = &tmp
+# asm 1: leaq <tmp=stack512#1,>m=int64#2
+# asm 2: leaq <tmp=416(%rsp),>m=%rsi
+leaq 416(%rsp),%rsi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#4,>diag0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>diag0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#1,>diag1=int6464#2
+# asm 2: movdqa <x1=0(%rsp),>diag1=%xmm1
+movdqa 0(%rsp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#2,>diag2=int6464#3
+# asm 2: movdqa <x2=16(%rsp),>diag2=%xmm2
+movdqa 16(%rsp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#3,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%rsp),>diag3=%xmm3
+movdqa 32(%rsp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 12
+# asm 1: mov  $12,>i=int64#4
+# asm 2: mov  $12,>i=%rcx
+mov  $12,%rcx
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int64#4
+# asm 2: sub  $4,<i=%rcx
+sub  $4,%rcx
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#4,<diag0=int6464#1
+# asm 2: paddd <x0=48(%rsp),<diag0=%xmm0
+paddd 48(%rsp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#1,<diag1=int6464#2
+# asm 2: paddd <x1=0(%rsp),<diag1=%xmm1
+paddd 0(%rsp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#2,<diag2=int6464#3
+# asm 2: paddd <x2=16(%rsp),<diag2=%xmm2
+paddd 16(%rsp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#3,<diag3=int6464#4
+# asm 2: paddd <x3=32(%rsp),<diag3=%xmm3
+paddd 32(%rsp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int64#4
+# asm 2: movd   <diag0=%xmm0,>in0=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int64#5
+# asm 2: movd   <diag1=%xmm1,>in12=%r8
+movd   %xmm1,%r8
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int64#6
+# asm 2: movd   <diag2=%xmm2,>in8=%r9
+movd   %xmm2,%r9
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int64#7
+# asm 2: movd   <diag3=%xmm3,>in4=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#4d
+# asm 2: xorl 0(<m=%rsi),<in0=%ecx
+xorl 0(%rsi),%ecx
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#5d
+# asm 2: xorl 48(<m=%rsi),<in12=%r8d
+xorl 48(%rsi),%r8d
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#6d
+# asm 2: xorl 32(<m=%rsi),<in8=%r9d
+xorl 32(%rsi),%r9d
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#7d
+# asm 2: xorl 16(<m=%rsi),<in4=%eax
+xorl 16(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#4d,0(<out=int64#1)
+# asm 2: movl   <in0=%ecx,0(<out=%rdi)
+movl   %ecx,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#5d,48(<out=int64#1)
+# asm 2: movl   <in12=%r8d,48(<out=%rdi)
+movl   %r8d,48(%rdi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#6d,32(<out=int64#1)
+# asm 2: movl   <in8=%r9d,32(<out=%rdi)
+movl   %r9d,32(%rdi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#7d,16(<out=int64#1)
+# asm 2: movl   <in4=%eax,16(<out=%rdi)
+movl   %eax,16(%rdi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int64#4
+# asm 2: movd   <diag0=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int64#5
+# asm 2: movd   <diag1=%xmm1,>in1=%r8
+movd   %xmm1,%r8
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int64#6
+# asm 2: movd   <diag2=%xmm2,>in13=%r9
+movd   %xmm2,%r9
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int64#7
+# asm 2: movd   <diag3=%xmm3,>in9=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#5d
+# asm 2: xorl 4(<m=%rsi),<in1=%r8d
+xorl 4(%rsi),%r8d
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#6d
+# asm 2: xorl 52(<m=%rsi),<in13=%r9d
+xorl 52(%rsi),%r9d
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#7d
+# asm 2: xorl 36(<m=%rsi),<in9=%eax
+xorl 36(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#5d,4(<out=int64#1)
+# asm 2: movl   <in1=%r8d,4(<out=%rdi)
+movl   %r8d,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#6d,52(<out=int64#1)
+# asm 2: movl   <in13=%r9d,52(<out=%rdi)
+movl   %r9d,52(%rdi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#7d,36(<out=int64#1)
+# asm 2: movl   <in9=%eax,36(<out=%rdi)
+movl   %eax,36(%rdi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int64#4
+# asm 2: movd   <diag0=%xmm0,>in10=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int64#5
+# asm 2: movd   <diag1=%xmm1,>in6=%r8
+movd   %xmm1,%r8
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int64#6
+# asm 2: movd   <diag2=%xmm2,>in2=%r9
+movd   %xmm2,%r9
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int64#7
+# asm 2: movd   <diag3=%xmm3,>in14=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#4d
+# asm 2: xorl 40(<m=%rsi),<in10=%ecx
+xorl 40(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#6d
+# asm 2: xorl 8(<m=%rsi),<in2=%r9d
+xorl 8(%rsi),%r9d
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#7d
+# asm 2: xorl 56(<m=%rsi),<in14=%eax
+xorl 56(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#4d,40(<out=int64#1)
+# asm 2: movl   <in10=%ecx,40(<out=%rdi)
+movl   %ecx,40(%rdi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#6d,8(<out=int64#1)
+# asm 2: movl   <in2=%r9d,8(<out=%rdi)
+movl   %r9d,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#7d,56(<out=int64#1)
+# asm 2: movl   <in14=%eax,56(<out=%rdi)
+movl   %eax,56(%rdi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int64#4
+# asm 2: movd   <diag0=%xmm0,>in15=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int64#5
+# asm 2: movd   <diag1=%xmm1,>in11=%r8
+movd   %xmm1,%r8
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int64#6
+# asm 2: movd   <diag2=%xmm2,>in7=%r9
+movd   %xmm2,%r9
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int64#7
+# asm 2: movd   <diag3=%xmm3,>in3=%rax
+movd   %xmm3,%rax
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#4d
+# asm 2: xorl 60(<m=%rsi),<in15=%ecx
+xorl 60(%rsi),%ecx
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#5d
+# asm 2: xorl 44(<m=%rsi),<in11=%r8d
+xorl 44(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#7d
+# asm 2: xorl 12(<m=%rsi),<in3=%eax
+xorl 12(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#4d,60(<out=int64#1)
+# asm 2: movl   <in15=%ecx,60(<out=%rdi)
+movl   %ecx,60(%rdi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#5d,44(<out=int64#1)
+# asm 2: movl   <in11=%r8d,44(<out=%rdi)
+movl   %r8d,44(%rdi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#7d,12(<out=int64#1)
+# asm 2: movl   <in3=%eax,12(<out=%rdi)
+movl   %eax,12(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#4d
+# asm 2: movl <x2=16(%rsp),>in8=%ecx
+movl 16(%rsp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#5d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%r8d
+movl 4+32(%rsp),%r8d
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#4
+# asm 2: add  $1,<in8=%rcx
+add  $1,%rcx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#5
+# asm 2: shl  $32,<in9=%r8
+shl  $32,%r8
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#5,<in8=int64#4
+# asm 2: add  <in9=%r8,<in8=%rcx
+add  %r8,%rcx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#4,>in9=int64#5
+# asm 2: mov  <in8=%rcx,>in9=%r8
+mov  %rcx,%r8
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#5
+# asm 2: shr  $32,<in9=%r8
+shr  $32,%r8
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#5d,4+<x3=stack128#3
+# asm 2: movl <in9=%r8d,4+<x3=32(%rsp)
+movl %r8d,4+32(%rsp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm:       out = ctarget
+# asm 1: mov  <ctarget=int64#3,>out=int64#1
+# asm 2: mov  <ctarget=%rdx,>out=%rdi
+mov  %rdx,%rdi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=352(%rsp),>r11_caller=%r11
+movq 352(%rsp),%r11
+
+# qhasm:     r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=360(%rsp),>r12_caller=%r12
+movq 360(%rsp),%r12
+
+# qhasm:     r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=368(%rsp),>r13_caller=%r13
+movq 368(%rsp),%r13
+
+# qhasm:     r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=376(%rsp),>r14_caller=%r14
+movq 376(%rsp),%r14
+
+# qhasm:     r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=384(%rsp),>r15_caller=%r15
+movq 384(%rsp),%r15
+
+# qhasm:     rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=392(%rsp),>rbx_caller=%rbx
+movq 392(%rsp),%rbx
+
+# qhasm:     rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=400(%rsp),>rbp_caller=%rbp
+movq 400(%rsp),%rbp
+
+# qhasm:     leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int64#6
+# asm 2: sub  $64,<bytes=%r9
+sub  $64,%r9
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int64#1
+# asm 2: add  $64,<out=%rdi
+add  $64,%rdi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int64#2
+# asm 2: add  $64,<m=%rsi
+add  $64,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/checksum b/nacl/nacl-20110221/crypto_stream/salsa2012/checksum
new file mode 100644
index 00000000..f801d9e3
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/checksum
@@ -0,0 +1 @@
+ecc758f200061c3cc770b25797da73583548d4f90f69a967fbbe1a6d94d1705c
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/ref/api.h b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/ref/implementors b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/ref/stream.c b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/stream.c
new file mode 100644
index 00000000..86053337
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa2012.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa2012(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/ref/xor.c b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/xor.c
new file mode 100644
index 00000000..90206426
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa2012.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa2012(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/used b/nacl/nacl-20110221/crypto_stream/salsa2012/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/api.h b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/implementors b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/stream.s b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/stream.s
new file mode 100644
index 00000000..c511b0d3
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa2012/x86_xmm5/stream.s
@@ -0,0 +1,5078 @@
+
+# qhasm: int32 a
+
+# qhasm: stack32 arg1
+
+# qhasm: stack32 arg2
+
+# qhasm: stack32 arg3
+
+# qhasm: stack32 arg4
+
+# qhasm: stack32 arg5
+
+# qhasm: stack32 arg6
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: input arg6
+
+# qhasm: int32 eax
+
+# qhasm: int32 ebx
+
+# qhasm: int32 esi
+
+# qhasm: int32 edi
+
+# qhasm: int32 ebp
+
+# qhasm: caller eax
+
+# qhasm: caller ebx
+
+# qhasm: caller esi
+
+# qhasm: caller edi
+
+# qhasm: caller ebp
+
+# qhasm: int32 k
+
+# qhasm: int32 kbits
+
+# qhasm: int32 iv
+
+# qhasm: int32 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int32 m
+
+# qhasm: stack32 out_stack
+
+# qhasm: int32 out
+
+# qhasm: stack32 bytes_stack
+
+# qhasm: int32 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int6464 p
+
+# qhasm: int6464 q
+
+# qhasm: int6464 r
+
+# qhasm: int6464 s
+
+# qhasm: int6464 t
+
+# qhasm: int6464 u
+
+# qhasm: int6464 v
+
+# qhasm: int6464 w
+
+# qhasm: int6464 mp
+
+# qhasm: int6464 mq
+
+# qhasm: int6464 mr
+
+# qhasm: int6464 ms
+
+# qhasm: int6464 mt
+
+# qhasm: int6464 mu
+
+# qhasm: int6464 mv
+
+# qhasm: int6464 mw
+
+# qhasm: int32 in0
+
+# qhasm: int32 in1
+
+# qhasm: int32 in2
+
+# qhasm: int32 in3
+
+# qhasm: int32 in4
+
+# qhasm: int32 in5
+
+# qhasm: int32 in6
+
+# qhasm: int32 in7
+
+# qhasm: int32 in8
+
+# qhasm: int32 in9
+
+# qhasm: int32 in10
+
+# qhasm: int32 in11
+
+# qhasm: int32 in12
+
+# qhasm: int32 in13
+
+# qhasm: int32 in14
+
+# qhasm: int32 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: stack32 ctarget
+
+# qhasm: enter crypto_stream_salsa2012_x86_xmm5
+.text
+.p2align 5
+.globl _crypto_stream_salsa2012_x86_xmm5
+.globl crypto_stream_salsa2012_x86_xmm5
+_crypto_stream_salsa2012_x86_xmm5:
+crypto_stream_salsa2012_x86_xmm5:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: bytes = arg2
+# asm 1: movl <arg2=stack32#-2,>bytes=int32#3
+# asm 2: movl <arg2=8(%esp,%eax),>bytes=%edx
+movl 8(%esp,%eax),%edx
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm: iv = arg4
+# asm 1: movl <arg4=stack32#-4,>iv=int32#4
+# asm 2: movl <arg4=16(%esp,%eax),>iv=%ebx
+movl 16(%esp,%eax),%ebx
+
+# qhasm: k = arg5
+# asm 1: movl <arg5=stack32#-5,>k=int32#7
+# asm 2: movl <arg5=20(%esp,%eax),>k=%ebp
+movl 20(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int32#1
+# asm 2: mov  $0,>a=%eax
+mov  $0,%eax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int32#3,>i=int32#2
+# asm 2: mov  <bytes=%edx,>i=%ecx
+mov  %edx,%ecx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: subl <bytes=int32#3,<out=int32#6
+# asm 2: subl <bytes=%edx,<out=%edi
+subl %edx,%edi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa2012_x86_xmm5_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa2012_x86_xmm5_xor
+.globl crypto_stream_salsa2012_x86_xmm5_xor
+_crypto_stream_salsa2012_x86_xmm5_xor:
+crypto_stream_salsa2012_x86_xmm5_xor:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = arg2
+# asm 1: movl <arg2=stack32#-2,>m=int32#5
+# asm 2: movl <arg2=8(%esp,%eax),>m=%esi
+movl 8(%esp,%eax),%esi
+
+# qhasm: bytes = arg3
+# asm 1: movl <arg3=stack32#-3,>bytes=int32#3
+# asm 2: movl <arg3=12(%esp,%eax),>bytes=%edx
+movl 12(%esp,%eax),%edx
+
+# qhasm: iv = arg5
+# asm 1: movl <arg5=stack32#-5,>iv=int32#4
+# asm 2: movl <arg5=20(%esp,%eax),>iv=%ebx
+movl 20(%esp,%eax),%ebx
+
+# qhasm: k = arg6
+# asm 1: movl <arg6=stack32#-6,>k=int32#7
+# asm 2: movl <arg6=24(%esp,%eax),>k=%ebp
+movl 24(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#3,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%edx,>bytes_stack=24(%esp)
+movl %edx,24(%esp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int32#7),>in4=int32#1
+# asm 2: movl 12(<k=%ebp),>in4=%eax
+movl 12(%ebp),%eax
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl 20(<k=int32#7),>in12=int32#2
+# asm 2: movl 20(<k=%ebp),>in12=%ecx
+movl 20(%ebp),%ecx
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int32#1,>x3=stack128#1
+# asm 2: movl <in4=%eax,>x3=32(%esp)
+movl %eax,32(%esp)
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int32#2,>x1=stack128#2
+# asm 2: movl <in12=%ecx,>x1=48(%esp)
+movl %ecx,48(%esp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int32#1
+# asm 2: mov  $1634760805,>in0=%eax
+mov  $1634760805,%eax
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int32#2
+# asm 2: mov  $0,>in8=%ecx
+mov  $0,%ecx
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int32#1,>x0=stack128#3
+# asm 2: movl <in0=%eax,>x0=64(%esp)
+movl %eax,64(%esp)
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl 0(<iv=int32#4),>in6=int32#1
+# asm 2: movl 0(<iv=%ebx),>in6=%eax
+movl 0(%ebx),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl 4(<iv=int32#4),>in7=int32#2
+# asm 2: movl 4(<iv=%ebx),>in7=%ecx
+movl 4(%ebx),%ecx
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int32#1,8+<x1=stack128#2
+# asm 2: movl <in6=%eax,8+<x1=48(%esp)
+movl %eax,8+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int32#2,12+<x2=stack128#4
+# asm 2: movl <in7=%ecx,12+<x2=80(%esp)
+movl %ecx,12+80(%esp)
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int32#1
+# asm 2: mov  $0,>in9=%eax
+mov  $0,%eax
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int32#2
+# asm 2: mov  $2036477234,>in10=%ecx
+mov  $2036477234,%ecx
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#1,4+<x3=stack128#1
+# asm 2: movl <in9=%eax,4+<x3=32(%esp)
+movl %eax,4+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int32#2,8+<x0=stack128#3
+# asm 2: movl <in10=%ecx,8+<x0=64(%esp)
+movl %ecx,8+64(%esp)
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int32#7),>in1=int32#1
+# asm 2: movl 0(<k=%ebp),>in1=%eax
+movl 0(%ebp),%eax
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int32#7),>in2=int32#2
+# asm 2: movl 4(<k=%ebp),>in2=%ecx
+movl 4(%ebp),%ecx
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int32#7),>in3=int32#3
+# asm 2: movl 8(<k=%ebp),>in3=%edx
+movl 8(%ebp),%edx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int32#4
+# asm 2: mov  $857760878,>in5=%ebx
+mov  $857760878,%ebx
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int32#1,4+<x1=stack128#2
+# asm 2: movl <in1=%eax,4+<x1=48(%esp)
+movl %eax,4+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int32#2,8+<x2=stack128#4
+# asm 2: movl <in2=%ecx,8+<x2=80(%esp)
+movl %ecx,8+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int32#3,12+<x3=stack128#1
+# asm 2: movl <in3=%edx,12+<x3=32(%esp)
+movl %edx,12+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int32#4,4+<x0=stack128#3
+# asm 2: movl <in5=%ebx,4+<x0=64(%esp)
+movl %ebx,4+64(%esp)
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl 16(<k=int32#7),>in11=int32#1
+# asm 2: movl 16(<k=%ebp),>in11=%eax
+movl 16(%ebp),%eax
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl 24(<k=int32#7),>in13=int32#2
+# asm 2: movl 24(<k=%ebp),>in13=%ecx
+movl 24(%ebp),%ecx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl 28(<k=int32#7),>in14=int32#3
+# asm 2: movl 28(<k=%ebp),>in14=%edx
+movl 28(%ebp),%edx
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int32#4
+# asm 2: mov  $1797285236,>in15=%ebx
+mov  $1797285236,%ebx
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int32#1,12+<x1=stack128#2
+# asm 2: movl <in11=%eax,12+<x1=48(%esp)
+movl %eax,12+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int32#2,4+<x2=stack128#4
+# asm 2: movl <in13=%ecx,4+<x2=80(%esp)
+movl %ecx,4+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int32#3,8+<x3=stack128#1
+# asm 2: movl <in14=%edx,8+<x3=32(%esp)
+movl %edx,8+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int32#4,12+<x0=stack128#3
+# asm 2: movl <in15=%ebx,12+<x0=64(%esp)
+movl %ebx,12+64(%esp)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#3,>z0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>z0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=96(%esp)
+movdqa %xmm1,96(%esp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=112(%esp)
+movdqa %xmm2,112(%esp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=128(%esp)
+movdqa %xmm3,128(%esp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=144(%esp)
+movdqa %xmm0,144(%esp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#2,>z1=int6464#1
+# asm 2: movdqa <x1=48(%esp),>z1=%xmm0
+movdqa 48(%esp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=160(%esp)
+movdqa %xmm1,160(%esp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=176(%esp)
+movdqa %xmm2,176(%esp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=192(%esp)
+movdqa %xmm3,192(%esp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=208(%esp)
+movdqa %xmm0,208(%esp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#4,>z2=int6464#1
+# asm 2: movdqa <x2=80(%esp),>z2=%xmm0
+movdqa 80(%esp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=224(%esp)
+movdqa %xmm1,224(%esp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=240(%esp)
+movdqa %xmm2,240(%esp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=256(%esp)
+movdqa %xmm0,256(%esp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#1,>z3=int6464#1
+# asm 2: movdqa <x3=32(%esp),>z3=%xmm0
+movdqa 32(%esp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=272(%esp)
+movdqa %xmm1,272(%esp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=288(%esp)
+movdqa %xmm2,288(%esp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=304(%esp)
+movdqa %xmm0,304(%esp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int32#2,>orig8=stack128#19
+# asm 2: movl <in8=%ecx,>orig8=320(%esp)
+movl %ecx,320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int32#3,>orig9=stack128#20
+# asm 2: movl <in9=%edx,>orig9=336(%esp)
+movl %edx,336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int32#2,4+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,4+<orig8=320(%esp)
+movl %ecx,4+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int32#3,4+<orig9=stack128#20
+# asm 2: movl <in9=%edx,4+<orig9=336(%esp)
+movl %edx,4+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int32#2,8+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,8+<orig8=320(%esp)
+movl %ecx,8+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int32#3,8+<orig9=stack128#20
+# asm 2: movl <in9=%edx,8+<orig9=336(%esp)
+movl %edx,8+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int32#2,12+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,12+<orig8=320(%esp)
+movl %ecx,12+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int32#3,12+<orig9=stack128#20
+# asm 2: movl <in9=%edx,12+<orig9=336(%esp)
+movl %edx,12+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: i = 12
+# asm 1: mov  $12,>i=int32#1
+# asm 2: mov  $12,>i=%eax
+mov  $12,%eax
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=96(%esp),>z5=%xmm0
+movdqa 96(%esp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=112(%esp),>z10=%xmm1
+movdqa 112(%esp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=128(%esp),>z15=%xmm2
+movdqa 128(%esp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=288(%esp),>z14=%xmm3
+movdqa 288(%esp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=304(%esp),>z3=%xmm4
+movdqa 304(%esp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=160(%esp),>z6=%xmm5
+movdqa 160(%esp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=176(%esp),>z11=%xmm6
+movdqa 176(%esp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=208(%esp),>z1=%xmm7
+movdqa 208(%esp),%xmm7
+
+# qhasm:   z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#21
+# asm 2: movdqa <z5=%xmm0,>z5_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:   z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#22
+# asm 2: movdqa <z10=%xmm1,>z10_stack=368(%esp)
+movdqa %xmm1,368(%esp)
+
+# qhasm:   z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#23
+# asm 2: movdqa <z15=%xmm2,>z15_stack=384(%esp)
+movdqa %xmm2,384(%esp)
+
+# qhasm:   z14_stack = z14
+# asm 1: movdqa <z14=int6464#4,>z14_stack=stack128#24
+# asm 2: movdqa <z14=%xmm3,>z14_stack=400(%esp)
+movdqa %xmm3,400(%esp)
+
+# qhasm:   z3_stack = z3
+# asm 1: movdqa <z3=int6464#5,>z3_stack=stack128#25
+# asm 2: movdqa <z3=%xmm4,>z3_stack=416(%esp)
+movdqa %xmm4,416(%esp)
+
+# qhasm:   z6_stack = z6
+# asm 1: movdqa <z6=int6464#6,>z6_stack=stack128#26
+# asm 2: movdqa <z6=%xmm5,>z6_stack=432(%esp)
+movdqa %xmm5,432(%esp)
+
+# qhasm:   z11_stack = z11
+# asm 1: movdqa <z11=int6464#7,>z11_stack=stack128#27
+# asm 2: movdqa <z11=%xmm6,>z11_stack=448(%esp)
+movdqa %xmm6,448(%esp)
+
+# qhasm:   z1_stack = z1
+# asm 1: movdqa <z1=int6464#8,>z1_stack=stack128#28
+# asm 2: movdqa <z1=%xmm7,>z1_stack=464(%esp)
+movdqa %xmm7,464(%esp)
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#5
+# asm 2: movdqa <orig7=224(%esp),>z7=%xmm4
+movdqa 224(%esp),%xmm4
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#6
+# asm 2: movdqa <orig13=240(%esp),>z13=%xmm5
+movdqa 240(%esp),%xmm5
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#7
+# asm 2: movdqa <orig2=256(%esp),>z2=%xmm6
+movdqa 256(%esp),%xmm6
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#8
+# asm 2: movdqa <orig9=336(%esp),>z9=%xmm7
+movdqa 336(%esp),%xmm7
+
+# qhasm:                   p = orig0
+# asm 1: movdqa <orig0=stack128#8,>p=int6464#1
+# asm 2: movdqa <orig0=144(%esp),>p=%xmm0
+movdqa 144(%esp),%xmm0
+
+# qhasm:                   t = orig12
+# asm 1: movdqa <orig12=stack128#11,>t=int6464#3
+# asm 2: movdqa <orig12=192(%esp),>t=%xmm2
+movdqa 192(%esp),%xmm2
+
+# qhasm:                   q = orig4
+# asm 1: movdqa <orig4=stack128#16,>q=int6464#4
+# asm 2: movdqa <orig4=272(%esp),>q=%xmm3
+movdqa 272(%esp),%xmm3
+
+# qhasm:                   r = orig8
+# asm 1: movdqa <orig8=stack128#19,>r=int6464#2
+# asm 2: movdqa <orig8=320(%esp),>r=%xmm1
+movdqa 320(%esp),%xmm1
+
+# qhasm:   z7_stack = z7
+# asm 1: movdqa <z7=int6464#5,>z7_stack=stack128#29
+# asm 2: movdqa <z7=%xmm4,>z7_stack=480(%esp)
+movdqa %xmm4,480(%esp)
+
+# qhasm:   z13_stack = z13
+# asm 1: movdqa <z13=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <z13=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm:   z2_stack = z2
+# asm 1: movdqa <z2=int6464#7,>z2_stack=stack128#31
+# asm 2: movdqa <z2=%xmm6,>z2_stack=512(%esp)
+movdqa %xmm6,512(%esp)
+
+# qhasm:   z9_stack = z9
+# asm 1: movdqa <z9=int6464#8,>z9_stack=stack128#32
+# asm 2: movdqa <z9=%xmm7,>z9_stack=528(%esp)
+movdqa %xmm7,528(%esp)
+
+# qhasm:   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:   z12_stack = t
+# asm 1: movdqa <t=int6464#3,>z12_stack=stack128#34
+# asm 2: movdqa <t=%xmm2,>z12_stack=560(%esp)
+movdqa %xmm2,560(%esp)
+
+# qhasm:   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#35
+# asm 2: movdqa <q=%xmm3,>z4_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#36
+# asm 2: movdqa <r=%xmm1,>z8_stack=592(%esp)
+movdqa %xmm1,592(%esp)
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#33
+# asm 2: movdqa <q=%xmm3,>z4_stack=544(%esp)
+movdqa %xmm3,544(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#34
+# asm 2: movdqa <r=%xmm1,>z8_stack=560(%esp)
+movdqa %xmm1,560(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>mt=int6464#3
+# asm 2: movdqa <z1_stack=464(%esp),>mt=%xmm2
+movdqa 464(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>mp=int6464#5
+# asm 2: movdqa <z5_stack=352(%esp),>mp=%xmm4
+movdqa 352(%esp),%xmm4
+
+# qhasm:                                   mq = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>mq=int6464#4
+# asm 2: movdqa <z9_stack=528(%esp),>mq=%xmm3
+movdqa 528(%esp),%xmm3
+
+# qhasm:                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>mr=int6464#6
+# asm 2: movdqa <z13_stack=496(%esp),>mr=%xmm5
+movdqa 496(%esp),%xmm5
+
+# qhasm:                   z12_stack = s
+# asm 1: movdqa <s=int6464#7,>z12_stack=stack128#30
+# asm 2: movdqa <s=%xmm6,>z12_stack=496(%esp)
+movdqa %xmm6,496(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <p=%xmm0,>z0_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z9_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z9_stack=stack128#32
+# asm 2: movdqa <mq=%xmm3,>z9_stack=528(%esp)
+movdqa %xmm3,528(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#35
+# asm 2: movdqa <mr=%xmm5,>z13_stack=576(%esp)
+movdqa %xmm5,576(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>t=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>t=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>p=int6464#1
+# asm 2: movdqa <z10_stack=368(%esp),>p=%xmm0
+movdqa 368(%esp),%xmm0
+
+# qhasm:                                                   q = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>q=int6464#4
+# asm 2: movdqa <z14_stack=400(%esp),>q=%xmm3
+movdqa 400(%esp),%xmm3
+
+# qhasm:                                                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>r=int6464#2
+# asm 2: movdqa <z2_stack=512(%esp),>r=%xmm1
+movdqa 512(%esp),%xmm1
+
+# qhasm:                                   z1_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z1_stack=stack128#22
+# asm 2: movdqa <ms=%xmm6,>z1_stack=368(%esp)
+movdqa %xmm6,368(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#24
+# asm 2: movdqa <mp=%xmm4,>z5_stack=400(%esp)
+movdqa %xmm4,400(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z14_stack = q
+# asm 1: movdqa <q=int6464#4,>z14_stack=stack128#36
+# asm 2: movdqa <q=%xmm3,>z14_stack=592(%esp)
+movdqa %xmm3,592(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#26
+# asm 2: movdqa <r=%xmm1,>z2_stack=432(%esp)
+movdqa %xmm1,432(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>mt=int6464#3
+# asm 2: movdqa <z11_stack=448(%esp),>mt=%xmm2
+movdqa 448(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>mp=int6464#5
+# asm 2: movdqa <z15_stack=384(%esp),>mp=%xmm4
+movdqa 384(%esp),%xmm4
+
+# qhasm:                                                                   mq = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>mq=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>mq=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:                                                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                                                   z6_stack = s
+# asm 1: movdqa <s=int6464#7,>z6_stack=stack128#23
+# asm 2: movdqa <s=%xmm6,>z6_stack=384(%esp)
+movdqa %xmm6,384(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#27
+# asm 2: movdqa <p=%xmm0,>z10_stack=448(%esp)
+movdqa %xmm0,448(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z3_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z3_stack=stack128#25
+# asm 2: movdqa <mq=%xmm3,>z3_stack=416(%esp)
+movdqa %xmm3,416(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>t=int6464#3
+# asm 2: movdqa <z3_stack=416(%esp),>t=%xmm2
+movdqa 416(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>p=int6464#1
+# asm 2: movdqa <z0_stack=352(%esp),>p=%xmm0
+movdqa 352(%esp),%xmm0
+
+# qhasm:                   q = z1_stack
+# asm 1: movdqa <z1_stack=stack128#22,>q=int6464#4
+# asm 2: movdqa <z1_stack=368(%esp),>q=%xmm3
+movdqa 368(%esp),%xmm3
+
+# qhasm:                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#26,>r=int6464#2
+# asm 2: movdqa <z2_stack=432(%esp),>r=%xmm1
+movdqa 432(%esp),%xmm1
+
+# qhasm:                                                                   z11_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z11_stack=stack128#21
+# asm 2: movdqa <ms=%xmm6,>z11_stack=352(%esp)
+movdqa %xmm6,352(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#22
+# asm 2: movdqa <mp=%xmm4,>z15_stack=368(%esp)
+movdqa %xmm4,368(%esp)
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z1_stack = q
+# asm 1: movdqa <q=int6464#4,>z1_stack=stack128#28
+# asm 2: movdqa <q=%xmm3,>z1_stack=464(%esp)
+movdqa %xmm3,464(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#31
+# asm 2: movdqa <r=%xmm1,>z2_stack=512(%esp)
+movdqa %xmm1,512(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z4_stack
+# asm 1: movdqa <z4_stack=stack128#33,>mt=int6464#3
+# asm 2: movdqa <z4_stack=544(%esp),>mt=%xmm2
+movdqa 544(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#24,>mp=int6464#5
+# asm 2: movdqa <z5_stack=400(%esp),>mp=%xmm4
+movdqa 400(%esp),%xmm4
+
+# qhasm:                                   mq = z6_stack
+# asm 1: movdqa <z6_stack=stack128#23,>mq=int6464#4
+# asm 2: movdqa <z6_stack=384(%esp),>mq=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                   z3_stack = s
+# asm 1: movdqa <s=int6464#7,>z3_stack=stack128#25
+# asm 2: movdqa <s=%xmm6,>z3_stack=416(%esp)
+movdqa %xmm6,416(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z6_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z6_stack=stack128#26
+# asm 2: movdqa <mq=%xmm3,>z6_stack=432(%esp)
+movdqa %xmm3,432(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>t=int6464#3
+# asm 2: movdqa <z9_stack=528(%esp),>t=%xmm2
+movdqa 528(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#27,>p=int6464#1
+# asm 2: movdqa <z10_stack=448(%esp),>p=%xmm0
+movdqa 448(%esp),%xmm0
+
+# qhasm:                                                   q = z11_stack
+# asm 1: movdqa <z11_stack=stack128#21,>q=int6464#4
+# asm 2: movdqa <z11_stack=352(%esp),>q=%xmm3
+movdqa 352(%esp),%xmm3
+
+# qhasm:                                                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#34,>r=int6464#2
+# asm 2: movdqa <z8_stack=560(%esp),>r=%xmm1
+movdqa 560(%esp),%xmm1
+
+# qhasm:                                   z4_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z4_stack=stack128#34
+# asm 2: movdqa <ms=%xmm6,>z4_stack=560(%esp)
+movdqa %xmm6,560(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#21
+# asm 2: movdqa <mp=%xmm4,>z5_stack=352(%esp)
+movdqa %xmm4,352(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z11_stack = q
+# asm 1: movdqa <q=int6464#4,>z11_stack=stack128#27
+# asm 2: movdqa <q=%xmm3,>z11_stack=448(%esp)
+movdqa %xmm3,448(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#37
+# asm 2: movdqa <r=%xmm1,>z8_stack=608(%esp)
+movdqa %xmm1,608(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z14_stack
+# asm 1: movdqa <z14_stack=stack128#36,>mt=int6464#3
+# asm 2: movdqa <z14_stack=592(%esp),>mt=%xmm2
+movdqa 592(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>mp=int6464#5
+# asm 2: movdqa <z15_stack=368(%esp),>mp=%xmm4
+movdqa 368(%esp),%xmm4
+
+# qhasm:                                                                   mq = z12_stack
+# asm 1: movdqa <z12_stack=stack128#30,>mq=int6464#4
+# asm 2: movdqa <z12_stack=496(%esp),>mq=%xmm3
+movdqa 496(%esp),%xmm3
+
+# qhasm:                                                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#35,>mr=int6464#6
+# asm 2: movdqa <z13_stack=576(%esp),>mr=%xmm5
+movdqa 576(%esp),%xmm5
+
+# qhasm:                                                   z9_stack = s
+# asm 1: movdqa <s=int6464#7,>z9_stack=stack128#32
+# asm 2: movdqa <s=%xmm6,>z9_stack=528(%esp)
+movdqa %xmm6,528(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#22
+# asm 2: movdqa <p=%xmm0,>z10_stack=368(%esp)
+movdqa %xmm0,368(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z12_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z12_stack=stack128#35
+# asm 2: movdqa <mq=%xmm3,>z12_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <mr=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>t=int6464#3
+# asm 2: movdqa <z12_stack=576(%esp),>t=%xmm2
+movdqa 576(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>p=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>p=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:                   q = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>q=int6464#4
+# asm 2: movdqa <z4_stack=560(%esp),>q=%xmm3
+movdqa 560(%esp),%xmm3
+
+# qhasm:                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>r=int6464#2
+# asm 2: movdqa <z8_stack=608(%esp),>r=%xmm1
+movdqa 608(%esp),%xmm1
+
+# qhasm:                                                                   z14_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z14_stack=stack128#24
+# asm 2: movdqa <ms=%xmm6,>z14_stack=400(%esp)
+movdqa %xmm6,400(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#23
+# asm 2: movdqa <mp=%xmm4,>z15_stack=384(%esp)
+movdqa %xmm4,384(%esp)
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int32#1
+# asm 2: sub  $2,<i=%eax
+sub  $2,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   out = out_stack
+# asm 1: movl <out_stack=stack32#6,>out=int32#6
+# asm 2: movl <out_stack=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:   z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>z0=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>z0=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:   z1 = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>z1=int6464#2
+# asm 2: movdqa <z1_stack=464(%esp),>z1=%xmm1
+movdqa 464(%esp),%xmm1
+
+# qhasm:   z2 = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>z2=int6464#3
+# asm 2: movdqa <z2_stack=512(%esp),>z2=%xmm2
+movdqa 512(%esp),%xmm2
+
+# qhasm:   z3 = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>z3=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>z3=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#1
+# asm 2: paddd <orig0=144(%esp),<z0=%xmm0
+paddd 144(%esp),%xmm0
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#2
+# asm 2: paddd <orig1=208(%esp),<z1=%xmm1
+paddd 208(%esp),%xmm1
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#3
+# asm 2: paddd <orig2=256(%esp),<z2=%xmm2
+paddd 256(%esp),%xmm2
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#4
+# asm 2: paddd <orig3=304(%esp),<z3=%xmm3
+paddd 304(%esp),%xmm3
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int32#5),<in0=int32#1
+# asm 2: xorl 64(<m=%esi),<in0=%eax
+xorl 64(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int32#5),<in1=int32#2
+# asm 2: xorl 68(<m=%esi),<in1=%ecx
+xorl 68(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int32#5),<in2=int32#3
+# asm 2: xorl 72(<m=%esi),<in2=%edx
+xorl 72(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int32#5),<in3=int32#4
+# asm 2: xorl 76(<m=%esi),<in3=%ebx
+xorl 76(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl <in0=int32#1,64(<out=int32#6)
+# asm 2: movl <in0=%eax,64(<out=%edi)
+movl %eax,64(%edi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl <in1=int32#2,68(<out=int32#6)
+# asm 2: movl <in1=%ecx,68(<out=%edi)
+movl %ecx,68(%edi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl <in2=int32#3,72(<out=int32#6)
+# asm 2: movl <in2=%edx,72(<out=%edi)
+movl %edx,72(%edi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl <in3=int32#4,76(<out=int32#6)
+# asm 2: movl <in3=%ebx,76(<out=%edi)
+movl %ebx,76(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int32#5),<in0=int32#1
+# asm 2: xorl 128(<m=%esi),<in0=%eax
+xorl 128(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int32#5),<in1=int32#2
+# asm 2: xorl 132(<m=%esi),<in1=%ecx
+xorl 132(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int32#5),<in2=int32#3
+# asm 2: xorl 136(<m=%esi),<in2=%edx
+xorl 136(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int32#5),<in3=int32#4
+# asm 2: xorl 140(<m=%esi),<in3=%ebx
+xorl 140(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl <in0=int32#1,128(<out=int32#6)
+# asm 2: movl <in0=%eax,128(<out=%edi)
+movl %eax,128(%edi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl <in1=int32#2,132(<out=int32#6)
+# asm 2: movl <in1=%ecx,132(<out=%edi)
+movl %ecx,132(%edi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl <in2=int32#3,136(<out=int32#6)
+# asm 2: movl <in2=%edx,136(<out=%edi)
+movl %edx,136(%edi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl <in3=int32#4,140(<out=int32#6)
+# asm 2: movl <in3=%ebx,140(<out=%edi)
+movl %ebx,140(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int32#5),<in0=int32#1
+# asm 2: xorl 192(<m=%esi),<in0=%eax
+xorl 192(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int32#5),<in1=int32#2
+# asm 2: xorl 196(<m=%esi),<in1=%ecx
+xorl 196(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int32#5),<in2=int32#3
+# asm 2: xorl 200(<m=%esi),<in2=%edx
+xorl 200(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int32#5),<in3=int32#4
+# asm 2: xorl 204(<m=%esi),<in3=%ebx
+xorl 204(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl <in0=int32#1,192(<out=int32#6)
+# asm 2: movl <in0=%eax,192(<out=%edi)
+movl %eax,192(%edi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl <in1=int32#2,196(<out=int32#6)
+# asm 2: movl <in1=%ecx,196(<out=%edi)
+movl %ecx,196(%edi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl <in2=int32#3,200(<out=int32#6)
+# asm 2: movl <in2=%edx,200(<out=%edi)
+movl %edx,200(%edi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl <in3=int32#4,204(<out=int32#6)
+# asm 2: movl <in3=%ebx,204(<out=%edi)
+movl %ebx,204(%edi)
+
+# qhasm:   z4 = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>z4=int6464#1
+# asm 2: movdqa <z4_stack=560(%esp),>z4=%xmm0
+movdqa 560(%esp),%xmm0
+
+# qhasm:   z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>z5=int6464#2
+# asm 2: movdqa <z5_stack=352(%esp),>z5=%xmm1
+movdqa 352(%esp),%xmm1
+
+# qhasm:   z6 = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>z6=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>z6=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:   z7 = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>z7=int6464#4
+# asm 2: movdqa <z7_stack=480(%esp),>z7=%xmm3
+movdqa 480(%esp),%xmm3
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#1
+# asm 2: paddd <orig4=272(%esp),<z4=%xmm0
+paddd 272(%esp),%xmm0
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#2
+# asm 2: paddd <orig5=96(%esp),<z5=%xmm1
+paddd 96(%esp),%xmm1
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#3
+# asm 2: paddd <orig6=160(%esp),<z6=%xmm2
+paddd 160(%esp),%xmm2
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#4
+# asm 2: paddd <orig7=224(%esp),<z7=%xmm3
+paddd 224(%esp),%xmm3
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#1
+# asm 2: xorl 16(<m=%esi),<in4=%eax
+xorl 16(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#2
+# asm 2: xorl 20(<m=%esi),<in5=%ecx
+xorl 20(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#3
+# asm 2: xorl 24(<m=%esi),<in6=%edx
+xorl 24(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#4
+# asm 2: xorl 28(<m=%esi),<in7=%ebx
+xorl 28(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#1,16(<out=int32#6)
+# asm 2: movl <in4=%eax,16(<out=%edi)
+movl %eax,16(%edi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#2,20(<out=int32#6)
+# asm 2: movl <in5=%ecx,20(<out=%edi)
+movl %ecx,20(%edi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#3,24(<out=int32#6)
+# asm 2: movl <in6=%edx,24(<out=%edi)
+movl %edx,24(%edi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#4,28(<out=int32#6)
+# asm 2: movl <in7=%ebx,28(<out=%edi)
+movl %ebx,28(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int32#5),<in4=int32#1
+# asm 2: xorl 80(<m=%esi),<in4=%eax
+xorl 80(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int32#5),<in5=int32#2
+# asm 2: xorl 84(<m=%esi),<in5=%ecx
+xorl 84(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int32#5),<in6=int32#3
+# asm 2: xorl 88(<m=%esi),<in6=%edx
+xorl 88(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int32#5),<in7=int32#4
+# asm 2: xorl 92(<m=%esi),<in7=%ebx
+xorl 92(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl <in4=int32#1,80(<out=int32#6)
+# asm 2: movl <in4=%eax,80(<out=%edi)
+movl %eax,80(%edi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl <in5=int32#2,84(<out=int32#6)
+# asm 2: movl <in5=%ecx,84(<out=%edi)
+movl %ecx,84(%edi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl <in6=int32#3,88(<out=int32#6)
+# asm 2: movl <in6=%edx,88(<out=%edi)
+movl %edx,88(%edi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl <in7=int32#4,92(<out=int32#6)
+# asm 2: movl <in7=%ebx,92(<out=%edi)
+movl %ebx,92(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int32#5),<in4=int32#1
+# asm 2: xorl 144(<m=%esi),<in4=%eax
+xorl 144(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int32#5),<in5=int32#2
+# asm 2: xorl 148(<m=%esi),<in5=%ecx
+xorl 148(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int32#5),<in6=int32#3
+# asm 2: xorl 152(<m=%esi),<in6=%edx
+xorl 152(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int32#5),<in7=int32#4
+# asm 2: xorl 156(<m=%esi),<in7=%ebx
+xorl 156(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl <in4=int32#1,144(<out=int32#6)
+# asm 2: movl <in4=%eax,144(<out=%edi)
+movl %eax,144(%edi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl <in5=int32#2,148(<out=int32#6)
+# asm 2: movl <in5=%ecx,148(<out=%edi)
+movl %ecx,148(%edi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl <in6=int32#3,152(<out=int32#6)
+# asm 2: movl <in6=%edx,152(<out=%edi)
+movl %edx,152(%edi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl <in7=int32#4,156(<out=int32#6)
+# asm 2: movl <in7=%ebx,156(<out=%edi)
+movl %ebx,156(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int32#5),<in4=int32#1
+# asm 2: xorl 208(<m=%esi),<in4=%eax
+xorl 208(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int32#5),<in5=int32#2
+# asm 2: xorl 212(<m=%esi),<in5=%ecx
+xorl 212(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int32#5),<in6=int32#3
+# asm 2: xorl 216(<m=%esi),<in6=%edx
+xorl 216(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int32#5),<in7=int32#4
+# asm 2: xorl 220(<m=%esi),<in7=%ebx
+xorl 220(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl <in4=int32#1,208(<out=int32#6)
+# asm 2: movl <in4=%eax,208(<out=%edi)
+movl %eax,208(%edi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl <in5=int32#2,212(<out=int32#6)
+# asm 2: movl <in5=%ecx,212(<out=%edi)
+movl %ecx,212(%edi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl <in6=int32#3,216(<out=int32#6)
+# asm 2: movl <in6=%edx,216(<out=%edi)
+movl %edx,216(%edi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl <in7=int32#4,220(<out=int32#6)
+# asm 2: movl <in7=%ebx,220(<out=%edi)
+movl %ebx,220(%edi)
+
+# qhasm:   z8 = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>z8=int6464#1
+# asm 2: movdqa <z8_stack=608(%esp),>z8=%xmm0
+movdqa 608(%esp),%xmm0
+
+# qhasm:   z9 = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>z9=int6464#2
+# asm 2: movdqa <z9_stack=528(%esp),>z9=%xmm1
+movdqa 528(%esp),%xmm1
+
+# qhasm:   z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>z10=int6464#3
+# asm 2: movdqa <z10_stack=368(%esp),>z10=%xmm2
+movdqa 368(%esp),%xmm2
+
+# qhasm:   z11 = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>z11=int6464#4
+# asm 2: movdqa <z11_stack=448(%esp),>z11=%xmm3
+movdqa 448(%esp),%xmm3
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#1
+# asm 2: paddd <orig8=320(%esp),<z8=%xmm0
+paddd 320(%esp),%xmm0
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#2
+# asm 2: paddd <orig9=336(%esp),<z9=%xmm1
+paddd 336(%esp),%xmm1
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#3
+# asm 2: paddd <orig10=112(%esp),<z10=%xmm2
+paddd 112(%esp),%xmm2
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#4
+# asm 2: paddd <orig11=176(%esp),<z11=%xmm3
+paddd 176(%esp),%xmm3
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#1
+# asm 2: xorl 32(<m=%esi),<in8=%eax
+xorl 32(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#2
+# asm 2: xorl 36(<m=%esi),<in9=%ecx
+xorl 36(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#3
+# asm 2: xorl 40(<m=%esi),<in10=%edx
+xorl 40(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#4
+# asm 2: xorl 44(<m=%esi),<in11=%ebx
+xorl 44(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#1,32(<out=int32#6)
+# asm 2: movl <in8=%eax,32(<out=%edi)
+movl %eax,32(%edi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#2,36(<out=int32#6)
+# asm 2: movl <in9=%ecx,36(<out=%edi)
+movl %ecx,36(%edi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#3,40(<out=int32#6)
+# asm 2: movl <in10=%edx,40(<out=%edi)
+movl %edx,40(%edi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#4,44(<out=int32#6)
+# asm 2: movl <in11=%ebx,44(<out=%edi)
+movl %ebx,44(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int32#5),<in8=int32#1
+# asm 2: xorl 96(<m=%esi),<in8=%eax
+xorl 96(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int32#5),<in9=int32#2
+# asm 2: xorl 100(<m=%esi),<in9=%ecx
+xorl 100(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int32#5),<in10=int32#3
+# asm 2: xorl 104(<m=%esi),<in10=%edx
+xorl 104(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int32#5),<in11=int32#4
+# asm 2: xorl 108(<m=%esi),<in11=%ebx
+xorl 108(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl <in8=int32#1,96(<out=int32#6)
+# asm 2: movl <in8=%eax,96(<out=%edi)
+movl %eax,96(%edi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl <in9=int32#2,100(<out=int32#6)
+# asm 2: movl <in9=%ecx,100(<out=%edi)
+movl %ecx,100(%edi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl <in10=int32#3,104(<out=int32#6)
+# asm 2: movl <in10=%edx,104(<out=%edi)
+movl %edx,104(%edi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl <in11=int32#4,108(<out=int32#6)
+# asm 2: movl <in11=%ebx,108(<out=%edi)
+movl %ebx,108(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int32#5),<in8=int32#1
+# asm 2: xorl 160(<m=%esi),<in8=%eax
+xorl 160(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int32#5),<in9=int32#2
+# asm 2: xorl 164(<m=%esi),<in9=%ecx
+xorl 164(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int32#5),<in10=int32#3
+# asm 2: xorl 168(<m=%esi),<in10=%edx
+xorl 168(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int32#5),<in11=int32#4
+# asm 2: xorl 172(<m=%esi),<in11=%ebx
+xorl 172(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl <in8=int32#1,160(<out=int32#6)
+# asm 2: movl <in8=%eax,160(<out=%edi)
+movl %eax,160(%edi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl <in9=int32#2,164(<out=int32#6)
+# asm 2: movl <in9=%ecx,164(<out=%edi)
+movl %ecx,164(%edi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl <in10=int32#3,168(<out=int32#6)
+# asm 2: movl <in10=%edx,168(<out=%edi)
+movl %edx,168(%edi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl <in11=int32#4,172(<out=int32#6)
+# asm 2: movl <in11=%ebx,172(<out=%edi)
+movl %ebx,172(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int32#5),<in8=int32#1
+# asm 2: xorl 224(<m=%esi),<in8=%eax
+xorl 224(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int32#5),<in9=int32#2
+# asm 2: xorl 228(<m=%esi),<in9=%ecx
+xorl 228(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int32#5),<in10=int32#3
+# asm 2: xorl 232(<m=%esi),<in10=%edx
+xorl 232(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int32#5),<in11=int32#4
+# asm 2: xorl 236(<m=%esi),<in11=%ebx
+xorl 236(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl <in8=int32#1,224(<out=int32#6)
+# asm 2: movl <in8=%eax,224(<out=%edi)
+movl %eax,224(%edi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl <in9=int32#2,228(<out=int32#6)
+# asm 2: movl <in9=%ecx,228(<out=%edi)
+movl %ecx,228(%edi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl <in10=int32#3,232(<out=int32#6)
+# asm 2: movl <in10=%edx,232(<out=%edi)
+movl %edx,232(%edi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl <in11=int32#4,236(<out=int32#6)
+# asm 2: movl <in11=%ebx,236(<out=%edi)
+movl %ebx,236(%edi)
+
+# qhasm:   z12 = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>z12=int6464#1
+# asm 2: movdqa <z12_stack=576(%esp),>z12=%xmm0
+movdqa 576(%esp),%xmm0
+
+# qhasm:   z13 = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>z13=int6464#2
+# asm 2: movdqa <z13_stack=496(%esp),>z13=%xmm1
+movdqa 496(%esp),%xmm1
+
+# qhasm:   z14 = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>z14=int6464#3
+# asm 2: movdqa <z14_stack=400(%esp),>z14=%xmm2
+movdqa 400(%esp),%xmm2
+
+# qhasm:   z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>z15=int6464#4
+# asm 2: movdqa <z15_stack=384(%esp),>z15=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#1
+# asm 2: paddd <orig12=192(%esp),<z12=%xmm0
+paddd 192(%esp),%xmm0
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#2
+# asm 2: paddd <orig13=240(%esp),<z13=%xmm1
+paddd 240(%esp),%xmm1
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#3
+# asm 2: paddd <orig14=288(%esp),<z14=%xmm2
+paddd 288(%esp),%xmm2
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#4
+# asm 2: paddd <orig15=128(%esp),<z15=%xmm3
+paddd 128(%esp),%xmm3
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#1
+# asm 2: xorl 48(<m=%esi),<in12=%eax
+xorl 48(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#2
+# asm 2: xorl 52(<m=%esi),<in13=%ecx
+xorl 52(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#3
+# asm 2: xorl 56(<m=%esi),<in14=%edx
+xorl 56(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#4
+# asm 2: xorl 60(<m=%esi),<in15=%ebx
+xorl 60(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#1,48(<out=int32#6)
+# asm 2: movl <in12=%eax,48(<out=%edi)
+movl %eax,48(%edi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#2,52(<out=int32#6)
+# asm 2: movl <in13=%ecx,52(<out=%edi)
+movl %ecx,52(%edi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#3,56(<out=int32#6)
+# asm 2: movl <in14=%edx,56(<out=%edi)
+movl %edx,56(%edi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#4,60(<out=int32#6)
+# asm 2: movl <in15=%ebx,60(<out=%edi)
+movl %ebx,60(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int32#5),<in12=int32#1
+# asm 2: xorl 112(<m=%esi),<in12=%eax
+xorl 112(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int32#5),<in13=int32#2
+# asm 2: xorl 116(<m=%esi),<in13=%ecx
+xorl 116(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int32#5),<in14=int32#3
+# asm 2: xorl 120(<m=%esi),<in14=%edx
+xorl 120(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int32#5),<in15=int32#4
+# asm 2: xorl 124(<m=%esi),<in15=%ebx
+xorl 124(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl <in12=int32#1,112(<out=int32#6)
+# asm 2: movl <in12=%eax,112(<out=%edi)
+movl %eax,112(%edi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl <in13=int32#2,116(<out=int32#6)
+# asm 2: movl <in13=%ecx,116(<out=%edi)
+movl %ecx,116(%edi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl <in14=int32#3,120(<out=int32#6)
+# asm 2: movl <in14=%edx,120(<out=%edi)
+movl %edx,120(%edi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl <in15=int32#4,124(<out=int32#6)
+# asm 2: movl <in15=%ebx,124(<out=%edi)
+movl %ebx,124(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int32#5),<in12=int32#1
+# asm 2: xorl 176(<m=%esi),<in12=%eax
+xorl 176(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int32#5),<in13=int32#2
+# asm 2: xorl 180(<m=%esi),<in13=%ecx
+xorl 180(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int32#5),<in14=int32#3
+# asm 2: xorl 184(<m=%esi),<in14=%edx
+xorl 184(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int32#5),<in15=int32#4
+# asm 2: xorl 188(<m=%esi),<in15=%ebx
+xorl 188(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl <in12=int32#1,176(<out=int32#6)
+# asm 2: movl <in12=%eax,176(<out=%edi)
+movl %eax,176(%edi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl <in13=int32#2,180(<out=int32#6)
+# asm 2: movl <in13=%ecx,180(<out=%edi)
+movl %ecx,180(%edi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl <in14=int32#3,184(<out=int32#6)
+# asm 2: movl <in14=%edx,184(<out=%edi)
+movl %edx,184(%edi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl <in15=int32#4,188(<out=int32#6)
+# asm 2: movl <in15=%ebx,188(<out=%edi)
+movl %ebx,188(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int32#5),<in12=int32#1
+# asm 2: xorl 240(<m=%esi),<in12=%eax
+xorl 240(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int32#5),<in13=int32#2
+# asm 2: xorl 244(<m=%esi),<in13=%ecx
+xorl 244(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int32#5),<in14=int32#3
+# asm 2: xorl 248(<m=%esi),<in14=%edx
+xorl 248(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int32#5),<in15=int32#4
+# asm 2: xorl 252(<m=%esi),<in15=%ebx
+xorl 252(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl <in12=int32#1,240(<out=int32#6)
+# asm 2: movl <in12=%eax,240(<out=%edi)
+movl %eax,240(%edi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl <in13=int32#2,244(<out=int32#6)
+# asm 2: movl <in13=%ecx,244(<out=%edi)
+movl %ecx,244(%edi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl <in14=int32#3,248(<out=int32#6)
+# asm 2: movl <in14=%edx,248(<out=%edi)
+movl %edx,248(%edi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl <in15=int32#4,252(<out=int32#6)
+# asm 2: movl <in15=%ebx,252(<out=%edi)
+movl %ebx,252(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int32#1
+# asm 2: sub  $256,<bytes=%eax
+sub  $256,%eax
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int32#5
+# asm 2: add  $256,<m=%esi
+add  $256,%esi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int32#6
+# asm 2: add  $256,<out=%edi
+add  $256,%edi
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#1
+# asm 2: cmp  $0,<bytes=%eax
+cmp  $0,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: movl <out=int32#6,>ctarget=stack32#6
+# asm 2: movl <out=%edi,>ctarget=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     m = &tmp
+# asm 1: leal <tmp=stack512#1,>m=int32#5
+# asm 2: leal <tmp=640(%esp),>m=%esi
+leal 640(%esp),%esi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#3,>diag0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>diag0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#2,>diag1=int6464#2
+# asm 2: movdqa <x1=48(%esp),>diag1=%xmm1
+movdqa 48(%esp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#4,>diag2=int6464#3
+# asm 2: movdqa <x2=80(%esp),>diag2=%xmm2
+movdqa 80(%esp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#1,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%esp),>diag3=%xmm3
+movdqa 32(%esp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 12
+# asm 1: mov  $12,>i=int32#1
+# asm 2: mov  $12,>i=%eax
+mov  $12,%eax
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int32#1
+# asm 2: sub  $4,<i=%eax
+sub  $4,%eax
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#3,<diag0=int6464#1
+# asm 2: paddd <x0=64(%esp),<diag0=%xmm0
+paddd 64(%esp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#2,<diag1=int6464#2
+# asm 2: paddd <x1=48(%esp),<diag1=%xmm1
+paddd 48(%esp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#4,<diag2=int6464#3
+# asm 2: paddd <x2=80(%esp),<diag2=%xmm2
+paddd 80(%esp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#1,<diag3=int6464#4
+# asm 2: paddd <x3=32(%esp),<diag3=%xmm3
+paddd 32(%esp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int32#1
+# asm 2: movd   <diag0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int32#2
+# asm 2: movd   <diag1=%xmm1,>in12=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int32#3
+# asm 2: movd   <diag2=%xmm2,>in8=%edx
+movd   %xmm2,%edx
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int32#4
+# asm 2: movd   <diag3=%xmm3,>in4=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm: in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#2
+# asm 2: xorl 48(<m=%esi),<in12=%ecx
+xorl 48(%esi),%ecx
+
+# qhasm: in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#3
+# asm 2: xorl 32(<m=%esi),<in8=%edx
+xorl 32(%esi),%edx
+
+# qhasm: in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#4
+# asm 2: xorl 16(<m=%esi),<in4=%ebx
+xorl 16(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#2,48(<out=int32#6)
+# asm 2: movl <in12=%ecx,48(<out=%edi)
+movl %ecx,48(%edi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#3,32(<out=int32#6)
+# asm 2: movl <in8=%edx,32(<out=%edi)
+movl %edx,32(%edi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#4,16(<out=int32#6)
+# asm 2: movl <in4=%ebx,16(<out=%edi)
+movl %ebx,16(%edi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int32#1
+# asm 2: movd   <diag0=%xmm0,>in5=%eax
+movd   %xmm0,%eax
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int32#2
+# asm 2: movd   <diag1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int32#3
+# asm 2: movd   <diag2=%xmm2,>in13=%edx
+movd   %xmm2,%edx
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int32#4
+# asm 2: movd   <diag3=%xmm3,>in9=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#1
+# asm 2: xorl 20(<m=%esi),<in5=%eax
+xorl 20(%esi),%eax
+
+# qhasm: in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm: in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#3
+# asm 2: xorl 52(<m=%esi),<in13=%edx
+xorl 52(%esi),%edx
+
+# qhasm: in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#4
+# asm 2: xorl 36(<m=%esi),<in9=%ebx
+xorl 36(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#1,20(<out=int32#6)
+# asm 2: movl <in5=%eax,20(<out=%edi)
+movl %eax,20(%edi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#3,52(<out=int32#6)
+# asm 2: movl <in13=%edx,52(<out=%edi)
+movl %edx,52(%edi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#4,36(<out=int32#6)
+# asm 2: movl <in9=%ebx,36(<out=%edi)
+movl %ebx,36(%edi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int32#1
+# asm 2: movd   <diag0=%xmm0,>in10=%eax
+movd   %xmm0,%eax
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int32#2
+# asm 2: movd   <diag1=%xmm1,>in6=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int32#3
+# asm 2: movd   <diag2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int32#4
+# asm 2: movd   <diag3=%xmm3,>in14=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#1
+# asm 2: xorl 40(<m=%esi),<in10=%eax
+xorl 40(%esi),%eax
+
+# qhasm: in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#2
+# asm 2: xorl 24(<m=%esi),<in6=%ecx
+xorl 24(%esi),%ecx
+
+# qhasm: in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm: in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#4
+# asm 2: xorl 56(<m=%esi),<in14=%ebx
+xorl 56(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#1,40(<out=int32#6)
+# asm 2: movl <in10=%eax,40(<out=%edi)
+movl %eax,40(%edi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#2,24(<out=int32#6)
+# asm 2: movl <in6=%ecx,24(<out=%edi)
+movl %ecx,24(%edi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#4,56(<out=int32#6)
+# asm 2: movl <in14=%ebx,56(<out=%edi)
+movl %ebx,56(%edi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int32#1
+# asm 2: movd   <diag0=%xmm0,>in15=%eax
+movd   %xmm0,%eax
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int32#2
+# asm 2: movd   <diag1=%xmm1,>in11=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int32#3
+# asm 2: movd   <diag2=%xmm2,>in7=%edx
+movd   %xmm2,%edx
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int32#4
+# asm 2: movd   <diag3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#1
+# asm 2: xorl 60(<m=%esi),<in15=%eax
+xorl 60(%esi),%eax
+
+# qhasm: in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#2
+# asm 2: xorl 44(<m=%esi),<in11=%ecx
+xorl 44(%esi),%ecx
+
+# qhasm: in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#3
+# asm 2: xorl 28(<m=%esi),<in7=%edx
+xorl 28(%esi),%edx
+
+# qhasm: in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#1,60(<out=int32#6)
+# asm 2: movl <in15=%eax,60(<out=%edi)
+movl %eax,60(%edi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#2,44(<out=int32#6)
+# asm 2: movl <in11=%ecx,44(<out=%edi)
+movl %ecx,44(%edi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#3,28(<out=int32#6)
+# asm 2: movl <in7=%edx,28(<out=%edi)
+movl %edx,28(%edi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm:       out = ctarget
+# asm 1: movl <ctarget=stack32#6,>out=int32#6
+# asm 2: movl <ctarget=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     eax = eax_stack
+# asm 1: movl <eax_stack=stack32#1,>eax=int32#1
+# asm 2: movl <eax_stack=0(%esp),>eax=%eax
+movl 0(%esp),%eax
+
+# qhasm:     ebx = ebx_stack
+# asm 1: movl <ebx_stack=stack32#2,>ebx=int32#4
+# asm 2: movl <ebx_stack=4(%esp),>ebx=%ebx
+movl 4(%esp),%ebx
+
+# qhasm:     esi = esi_stack
+# asm 1: movl <esi_stack=stack32#3,>esi=int32#5
+# asm 2: movl <esi_stack=8(%esp),>esi=%esi
+movl 8(%esp),%esi
+
+# qhasm:     edi = edi_stack
+# asm 1: movl <edi_stack=stack32#4,>edi=int32#6
+# asm 2: movl <edi_stack=12(%esp),>edi=%edi
+movl 12(%esp),%edi
+
+# qhasm:     ebp = ebp_stack
+# asm 1: movl <ebp_stack=stack32#5,>ebp=int32#7
+# asm 2: movl <ebp_stack=16(%esp),>ebp=%ebp
+movl 16(%esp),%ebp
+
+# qhasm:     leave
+add %eax,%esp
+xor %eax,%eax
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int32#1
+# asm 2: sub  $64,<bytes=%eax
+sub  $64,%eax
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int32#6
+# asm 2: add  $64,<out=%edi
+add  $64,%edi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int32#5
+# asm 2: add  $64,<m=%esi
+add  $64,%esi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/api.h b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/implementors b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/stream.s b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/stream.s
new file mode 100644
index 00000000..f27411fe
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/amd64_xmm6/stream.s
@@ -0,0 +1,4823 @@
+
+# qhasm: int64 r11_caller
+
+# qhasm: int64 r12_caller
+
+# qhasm: int64 r13_caller
+
+# qhasm: int64 r14_caller
+
+# qhasm: int64 r15_caller
+
+# qhasm: int64 rbx_caller
+
+# qhasm: int64 rbp_caller
+
+# qhasm: caller r11_caller
+
+# qhasm: caller r12_caller
+
+# qhasm: caller r13_caller
+
+# qhasm: caller r14_caller
+
+# qhasm: caller r15_caller
+
+# qhasm: caller rbx_caller
+
+# qhasm: caller rbp_caller
+
+# qhasm: stack64 r11_stack
+
+# qhasm: stack64 r12_stack
+
+# qhasm: stack64 r13_stack
+
+# qhasm: stack64 r14_stack
+
+# qhasm: stack64 r15_stack
+
+# qhasm: stack64 rbx_stack
+
+# qhasm: stack64 rbp_stack
+
+# qhasm: int64 a
+
+# qhasm: int64 arg1
+
+# qhasm: int64 arg2
+
+# qhasm: int64 arg3
+
+# qhasm: int64 arg4
+
+# qhasm: int64 arg5
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: int64 k
+
+# qhasm: int64 kbits
+
+# qhasm: int64 iv
+
+# qhasm: int64 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int64 m
+
+# qhasm: int64 out
+
+# qhasm: int64 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: int6464 y0
+
+# qhasm: int6464 y1
+
+# qhasm: int6464 y2
+
+# qhasm: int6464 y3
+
+# qhasm: int6464 y4
+
+# qhasm: int6464 y5
+
+# qhasm: int6464 y6
+
+# qhasm: int6464 y7
+
+# qhasm: int6464 y8
+
+# qhasm: int6464 y9
+
+# qhasm: int6464 y10
+
+# qhasm: int6464 y11
+
+# qhasm: int6464 y12
+
+# qhasm: int6464 y13
+
+# qhasm: int6464 y14
+
+# qhasm: int6464 y15
+
+# qhasm: int6464 r0
+
+# qhasm: int6464 r1
+
+# qhasm: int6464 r2
+
+# qhasm: int6464 r3
+
+# qhasm: int6464 r4
+
+# qhasm: int6464 r5
+
+# qhasm: int6464 r6
+
+# qhasm: int6464 r7
+
+# qhasm: int6464 r8
+
+# qhasm: int6464 r9
+
+# qhasm: int6464 r10
+
+# qhasm: int6464 r11
+
+# qhasm: int6464 r12
+
+# qhasm: int6464 r13
+
+# qhasm: int6464 r14
+
+# qhasm: int6464 r15
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int64 in0
+
+# qhasm: int64 in1
+
+# qhasm: int64 in2
+
+# qhasm: int64 in3
+
+# qhasm: int64 in4
+
+# qhasm: int64 in5
+
+# qhasm: int64 in6
+
+# qhasm: int64 in7
+
+# qhasm: int64 in8
+
+# qhasm: int64 in9
+
+# qhasm: int64 in10
+
+# qhasm: int64 in11
+
+# qhasm: int64 in12
+
+# qhasm: int64 in13
+
+# qhasm: int64 in14
+
+# qhasm: int64 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: int64 ctarget
+
+# qhasm: stack64 bytes_backup
+
+# qhasm: enter crypto_stream_salsa208_amd64_xmm6
+.text
+.p2align 5
+.globl _crypto_stream_salsa208_amd64_xmm6
+.globl crypto_stream_salsa208_amd64_xmm6
+_crypto_stream_salsa208_amd64_xmm6:
+crypto_stream_salsa208_amd64_xmm6:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: bytes = arg2
+# asm 1: mov  <arg2=int64#2,>bytes=int64#6
+# asm 2: mov  <arg2=%rsi,>bytes=%r9
+mov  %rsi,%r9
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm: iv = arg3
+# asm 1: mov  <arg3=int64#3,>iv=int64#3
+# asm 2: mov  <arg3=%rdx,>iv=%rdx
+mov  %rdx,%rdx
+
+# qhasm: k = arg4
+# asm 1: mov  <arg4=int64#4,>k=int64#8
+# asm 2: mov  <arg4=%rcx,>k=%r10
+mov  %rcx,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int64#7
+# asm 2: mov  $0,>a=%rax
+mov  $0,%rax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: sub  <bytes=int64#6,<out=int64#1
+# asm 2: sub  <bytes=%r9,<out=%rdi
+sub  %r9,%rdi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa208_amd64_xmm6_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa208_amd64_xmm6_xor
+.globl crypto_stream_salsa208_amd64_xmm6_xor
+_crypto_stream_salsa208_amd64_xmm6_xor:
+crypto_stream_salsa208_amd64_xmm6_xor:
+mov %rsp,%r11
+and $31,%r11
+add $480,%r11
+sub %r11,%rsp
+
+# qhasm: r11_stack = r11_caller
+# asm 1: movq <r11_caller=int64#9,>r11_stack=stack64#1
+# asm 2: movq <r11_caller=%r11,>r11_stack=352(%rsp)
+movq %r11,352(%rsp)
+
+# qhasm: r12_stack = r12_caller
+# asm 1: movq <r12_caller=int64#10,>r12_stack=stack64#2
+# asm 2: movq <r12_caller=%r12,>r12_stack=360(%rsp)
+movq %r12,360(%rsp)
+
+# qhasm: r13_stack = r13_caller
+# asm 1: movq <r13_caller=int64#11,>r13_stack=stack64#3
+# asm 2: movq <r13_caller=%r13,>r13_stack=368(%rsp)
+movq %r13,368(%rsp)
+
+# qhasm: r14_stack = r14_caller
+# asm 1: movq <r14_caller=int64#12,>r14_stack=stack64#4
+# asm 2: movq <r14_caller=%r14,>r14_stack=376(%rsp)
+movq %r14,376(%rsp)
+
+# qhasm: r15_stack = r15_caller
+# asm 1: movq <r15_caller=int64#13,>r15_stack=stack64#5
+# asm 2: movq <r15_caller=%r15,>r15_stack=384(%rsp)
+movq %r15,384(%rsp)
+
+# qhasm: rbx_stack = rbx_caller
+# asm 1: movq <rbx_caller=int64#14,>rbx_stack=stack64#6
+# asm 2: movq <rbx_caller=%rbx,>rbx_stack=392(%rsp)
+movq %rbx,392(%rsp)
+
+# qhasm: rbp_stack = rbp_caller
+# asm 1: movq <rbp_caller=int64#15,>rbp_stack=stack64#7
+# asm 2: movq <rbp_caller=%rbp,>rbp_stack=400(%rsp)
+movq %rbp,400(%rsp)
+
+# qhasm: out = arg1
+# asm 1: mov  <arg1=int64#1,>out=int64#1
+# asm 2: mov  <arg1=%rdi,>out=%rdi
+mov  %rdi,%rdi
+
+# qhasm: m = arg2
+# asm 1: mov  <arg2=int64#2,>m=int64#2
+# asm 2: mov  <arg2=%rsi,>m=%rsi
+mov  %rsi,%rsi
+
+# qhasm: bytes = arg3
+# asm 1: mov  <arg3=int64#3,>bytes=int64#6
+# asm 2: mov  <arg3=%rdx,>bytes=%r9
+mov  %rdx,%r9
+
+# qhasm: iv = arg4
+# asm 1: mov  <arg4=int64#4,>iv=int64#3
+# asm 2: mov  <arg4=%rcx,>iv=%rdx
+mov  %rcx,%rdx
+
+# qhasm: k = arg5
+# asm 1: mov  <arg5=int64#5,>k=int64#8
+# asm 2: mov  <arg5=%r8,>k=%r10
+mov  %r8,%r10
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl   20(<k=int64#8),>in12=int64#4d
+# asm 2: movl   20(<k=%r10),>in12=%ecx
+movl   20(%r10),%ecx
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl   0(<k=int64#8),>in1=int64#5d
+# asm 2: movl   0(<k=%r10),>in1=%r8d
+movl   0(%r10),%r8d
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl   0(<iv=int64#3),>in6=int64#7d
+# asm 2: movl   0(<iv=%rdx),>in6=%eax
+movl   0(%rdx),%eax
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl   16(<k=int64#8),>in11=int64#9d
+# asm 2: movl   16(<k=%r10),>in11=%r11d
+movl   16(%r10),%r11d
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int64#4d,>x1=stack128#1
+# asm 2: movl <in12=%ecx,>x1=0(%rsp)
+movl %ecx,0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int64#5d,4+<x1=stack128#1
+# asm 2: movl <in1=%r8d,4+<x1=0(%rsp)
+movl %r8d,4+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int64#7d,8+<x1=stack128#1
+# asm 2: movl <in6=%eax,8+<x1=0(%rsp)
+movl %eax,8+0(%rsp)
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int64#9d,12+<x1=stack128#1
+# asm 2: movl <in11=%r11d,12+<x1=0(%rsp)
+movl %r11d,12+0(%rsp)
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int64#4
+# asm 2: mov  $0,>in8=%rcx
+mov  $0,%rcx
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl   24(<k=int64#8),>in13=int64#5d
+# asm 2: movl   24(<k=%r10),>in13=%r8d
+movl   24(%r10),%r8d
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl   4(<k=int64#8),>in2=int64#7d
+# asm 2: movl   4(<k=%r10),>in2=%eax
+movl   4(%r10),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl   4(<iv=int64#3),>in7=int64#3d
+# asm 2: movl   4(<iv=%rdx),>in7=%edx
+movl   4(%rdx),%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int64#5d,4+<x2=stack128#2
+# asm 2: movl <in13=%r8d,4+<x2=16(%rsp)
+movl %r8d,4+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int64#7d,8+<x2=stack128#2
+# asm 2: movl <in2=%eax,8+<x2=16(%rsp)
+movl %eax,8+16(%rsp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int64#3d,12+<x2=stack128#2
+# asm 2: movl <in7=%edx,12+<x2=16(%rsp)
+movl %edx,12+16(%rsp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl   12(<k=int64#8),>in4=int64#3d
+# asm 2: movl   12(<k=%r10),>in4=%edx
+movl   12(%r10),%edx
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int64#4
+# asm 2: mov  $0,>in9=%rcx
+mov  $0,%rcx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl   28(<k=int64#8),>in14=int64#5d
+# asm 2: movl   28(<k=%r10),>in14=%r8d
+movl   28(%r10),%r8d
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl   8(<k=int64#8),>in3=int64#7d
+# asm 2: movl   8(<k=%r10),>in3=%eax
+movl   8(%r10),%eax
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int64#3d,>x3=stack128#3
+# asm 2: movl <in4=%edx,>x3=32(%rsp)
+movl %edx,32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int64#5d,8+<x3=stack128#3
+# asm 2: movl <in14=%r8d,8+<x3=32(%rsp)
+movl %r8d,8+32(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int64#7d,12+<x3=stack128#3
+# asm 2: movl <in3=%eax,12+<x3=32(%rsp)
+movl %eax,12+32(%rsp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int64#3
+# asm 2: mov  $1634760805,>in0=%rdx
+mov  $1634760805,%rdx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int64#4
+# asm 2: mov  $857760878,>in5=%rcx
+mov  $857760878,%rcx
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int64#5
+# asm 2: mov  $2036477234,>in10=%r8
+mov  $2036477234,%r8
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int64#7
+# asm 2: mov  $1797285236,>in15=%rax
+mov  $1797285236,%rax
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int64#3d,>x0=stack128#4
+# asm 2: movl <in0=%edx,>x0=48(%rsp)
+movl %edx,48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int64#4d,4+<x0=stack128#4
+# asm 2: movl <in5=%ecx,4+<x0=48(%rsp)
+movl %ecx,4+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int64#5d,8+<x0=stack128#4
+# asm 2: movl <in10=%r8d,8+<x0=48(%rsp)
+movl %r8d,8+48(%rsp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int64#7d,12+<x0=stack128#4
+# asm 2: movl <in15=%eax,12+<x0=48(%rsp)
+movl %eax,12+48(%rsp)
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#4,>z0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>z0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=64(%rsp)
+movdqa %xmm1,64(%rsp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=80(%rsp)
+movdqa %xmm2,80(%rsp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=96(%rsp)
+movdqa %xmm3,96(%rsp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=112(%rsp)
+movdqa %xmm0,112(%rsp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#1,>z1=int6464#1
+# asm 2: movdqa <x1=0(%rsp),>z1=%xmm0
+movdqa 0(%rsp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=128(%rsp)
+movdqa %xmm1,128(%rsp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=144(%rsp)
+movdqa %xmm2,144(%rsp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=160(%rsp)
+movdqa %xmm3,160(%rsp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=176(%rsp)
+movdqa %xmm0,176(%rsp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#2,>z2=int6464#1
+# asm 2: movdqa <x2=16(%rsp),>z2=%xmm0
+movdqa 16(%rsp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=192(%rsp)
+movdqa %xmm1,192(%rsp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=208(%rsp)
+movdqa %xmm2,208(%rsp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=224(%rsp)
+movdqa %xmm0,224(%rsp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#3,>z3=int6464#1
+# asm 2: movdqa <x3=32(%rsp),>z3=%xmm0
+movdqa 32(%rsp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=240(%rsp)
+movdqa %xmm1,240(%rsp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=256(%rsp)
+movdqa %xmm2,256(%rsp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=272(%rsp)
+movdqa %xmm0,272(%rsp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#3d
+# asm 2: movl <x2=16(%rsp),>in8=%edx
+movl 16(%rsp),%edx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#4d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%ecx
+movl 4+32(%rsp),%ecx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int64#3d,>orig8=stack128#19
+# asm 2: movl <in8=%edx,>orig8=288(%rsp)
+movl %edx,288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int64#4d,>orig9=stack128#20
+# asm 2: movl <in9=%ecx,>orig9=304(%rsp)
+movl %ecx,304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int64#3d,4+<orig8=stack128#19
+# asm 2: movl <in8=%edx,4+<orig8=288(%rsp)
+movl %edx,4+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,4+<orig9=304(%rsp)
+movl %ecx,4+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int64#3d,8+<orig8=stack128#19
+# asm 2: movl <in8=%edx,8+<orig8=288(%rsp)
+movl %edx,8+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int64#4d,8+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,8+<orig9=304(%rsp)
+movl %ecx,8+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int64#3d,12+<orig8=stack128#19
+# asm 2: movl <in8=%edx,12+<orig8=288(%rsp)
+movl %edx,12+288(%rsp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int64#4d,12+<orig9=stack128#20
+# asm 2: movl <in9=%ecx,12+<orig9=304(%rsp)
+movl %ecx,12+304(%rsp)
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#3
+# asm 2: add  $1,<in8=%rdx
+add  $1,%rdx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#4
+# asm 2: shl  $32,<in9=%rcx
+shl  $32,%rcx
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#4,<in8=int64#3
+# asm 2: add  <in9=%rcx,<in8=%rdx
+add  %rcx,%rdx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#3,>in9=int64#4
+# asm 2: mov  <in8=%rdx,>in9=%rcx
+mov  %rdx,%rcx
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#4
+# asm 2: shr  $32,<in9=%rcx
+shr  $32,%rcx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#3d,>x2=stack128#2
+# asm 2: movl <in8=%edx,>x2=16(%rsp)
+movl %edx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#4d,4+<x3=stack128#3
+# asm 2: movl <in9=%ecx,4+<x3=32(%rsp)
+movl %ecx,4+32(%rsp)
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: i = 8
+# asm 1: mov  $8,>i=int64#3
+# asm 2: mov  $8,>i=%rdx
+mov  $8,%rdx
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=64(%rsp),>z5=%xmm0
+movdqa 64(%rsp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=80(%rsp),>z10=%xmm1
+movdqa 80(%rsp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=96(%rsp),>z15=%xmm2
+movdqa 96(%rsp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=256(%rsp),>z14=%xmm3
+movdqa 256(%rsp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=272(%rsp),>z3=%xmm4
+movdqa 272(%rsp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=128(%rsp),>z6=%xmm5
+movdqa 128(%rsp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=144(%rsp),>z11=%xmm6
+movdqa 144(%rsp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=176(%rsp),>z1=%xmm7
+movdqa 176(%rsp),%xmm7
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#9
+# asm 2: movdqa <orig7=192(%rsp),>z7=%xmm8
+movdqa 192(%rsp),%xmm8
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#10
+# asm 2: movdqa <orig13=208(%rsp),>z13=%xmm9
+movdqa 208(%rsp),%xmm9
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#11
+# asm 2: movdqa <orig2=224(%rsp),>z2=%xmm10
+movdqa 224(%rsp),%xmm10
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#12
+# asm 2: movdqa <orig9=304(%rsp),>z9=%xmm11
+movdqa 304(%rsp),%xmm11
+
+# qhasm:   z0 = orig0
+# asm 1: movdqa <orig0=stack128#8,>z0=int6464#13
+# asm 2: movdqa <orig0=112(%rsp),>z0=%xmm12
+movdqa 112(%rsp),%xmm12
+
+# qhasm:   z12 = orig12
+# asm 1: movdqa <orig12=stack128#11,>z12=int6464#14
+# asm 2: movdqa <orig12=160(%rsp),>z12=%xmm13
+movdqa 160(%rsp),%xmm13
+
+# qhasm:   z4 = orig4
+# asm 1: movdqa <orig4=stack128#16,>z4=int6464#15
+# asm 2: movdqa <orig4=240(%rsp),>z4=%xmm14
+movdqa 240(%rsp),%xmm14
+
+# qhasm:   z8 = orig8
+# asm 1: movdqa <orig8=stack128#19,>z8=int6464#16
+# asm 2: movdqa <orig8=288(%rsp),>z8=%xmm15
+movdqa 288(%rsp),%xmm15
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 		y4 = z12
+# asm 1: movdqa <z12=int6464#14,>y4=int6464#2
+# asm 2: movdqa <z12=%xmm13,>y4=%xmm1
+movdqa %xmm13,%xmm1
+
+# qhasm: uint32323232	y4 += z0
+# asm 1: paddd <z0=int6464#13,<y4=int6464#2
+# asm 2: paddd <z0=%xmm12,<y4=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 		r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y4 <<= 7
+# asm 1: pslld $7,<y4=int6464#2
+# asm 2: pslld $7,<y4=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232	r4 >>= 25
+# asm 1: psrld $25,<r4=int6464#3
+# asm 2: psrld $25,<r4=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 		z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 				y9 = z1
+# asm 1: movdqa <z1=int6464#8,>y9=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y9=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232			y9 += z5
+# asm 1: paddd <z5=int6464#1,<y9=int6464#2
+# asm 2: paddd <z5=%xmm0,<y9=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 				r9 = y9
+# asm 1: movdqa <y9=int6464#2,>r9=int6464#3
+# asm 2: movdqa <y9=%xmm1,>r9=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y9 <<= 7
+# asm 1: pslld $7,<y9=int6464#2
+# asm 2: pslld $7,<y9=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z9 ^= y9
+# asm 1: pxor  <y9=int6464#2,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm1,<z9=%xmm11
+pxor  %xmm1,%xmm11
+
+# qhasm: uint32323232			r9 >>= 25
+# asm 1: psrld $25,<r9=int6464#3
+# asm 2: psrld $25,<r9=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z9 ^= r9
+# asm 1: pxor  <r9=int6464#3,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm2,<z9=%xmm11
+pxor  %xmm2,%xmm11
+
+# qhasm: 		y8 = z0
+# asm 1: movdqa <z0=int6464#13,>y8=int6464#2
+# asm 2: movdqa <z0=%xmm12,>y8=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232	y8 += z4
+# asm 1: paddd <z4=int6464#15,<y8=int6464#2
+# asm 2: paddd <z4=%xmm14,<y8=%xmm1
+paddd %xmm14,%xmm1
+
+# qhasm: 		r8 = y8
+# asm 1: movdqa <y8=int6464#2,>r8=int6464#3
+# asm 2: movdqa <y8=%xmm1,>r8=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#2
+# asm 2: pslld $9,<y8=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z8 ^= y8
+# asm 1: pxor  <y8=int6464#2,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm1,<z8=%xmm15
+pxor  %xmm1,%xmm15
+
+# qhasm: uint32323232	r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#3
+# asm 2: psrld $23,<r8=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z8 ^= r8
+# asm 1: pxor  <r8=int6464#3,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm2,<z8=%xmm15
+pxor  %xmm2,%xmm15
+
+# qhasm: 				y13 = z5
+# asm 1: movdqa <z5=int6464#1,>y13=int6464#2
+# asm 2: movdqa <z5=%xmm0,>y13=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232			y13 += z9
+# asm 1: paddd <z9=int6464#12,<y13=int6464#2
+# asm 2: paddd <z9=%xmm11,<y13=%xmm1
+paddd %xmm11,%xmm1
+
+# qhasm: 				r13 = y13
+# asm 1: movdqa <y13=int6464#2,>r13=int6464#3
+# asm 2: movdqa <y13=%xmm1,>r13=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#2
+# asm 2: pslld $9,<y13=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z13 ^= y13
+# asm 1: pxor  <y13=int6464#2,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm1,<z13=%xmm9
+pxor  %xmm1,%xmm9
+
+# qhasm: uint32323232			r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#3
+# asm 2: psrld $23,<r13=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z13 ^= r13
+# asm 1: pxor  <r13=int6464#3,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm2,<z13=%xmm9
+pxor  %xmm2,%xmm9
+
+# qhasm: 		y12 = z4
+# asm 1: movdqa <z4=int6464#15,>y12=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y12=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232	y12 += z8
+# asm 1: paddd <z8=int6464#16,<y12=int6464#2
+# asm 2: paddd <z8=%xmm15,<y12=%xmm1
+paddd %xmm15,%xmm1
+
+# qhasm: 		r12 = y12
+# asm 1: movdqa <y12=int6464#2,>r12=int6464#3
+# asm 2: movdqa <y12=%xmm1,>r12=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y12 <<= 13
+# asm 1: pslld $13,<y12=int6464#2
+# asm 2: pslld $13,<y12=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z12 ^= y12
+# asm 1: pxor  <y12=int6464#2,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm1,<z12=%xmm13
+pxor  %xmm1,%xmm13
+
+# qhasm: uint32323232	r12 >>= 19
+# asm 1: psrld $19,<r12=int6464#3
+# asm 2: psrld $19,<r12=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z12 ^= r12
+# asm 1: pxor  <r12=int6464#3,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm2,<z12=%xmm13
+pxor  %xmm2,%xmm13
+
+# qhasm: 				y1 = z9
+# asm 1: movdqa <z9=int6464#12,>y1=int6464#2
+# asm 2: movdqa <z9=%xmm11,>y1=%xmm1
+movdqa %xmm11,%xmm1
+
+# qhasm: uint32323232			y1 += z13
+# asm 1: paddd <z13=int6464#10,<y1=int6464#2
+# asm 2: paddd <z13=%xmm9,<y1=%xmm1
+paddd %xmm9,%xmm1
+
+# qhasm: 				r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#3
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y1 <<= 13
+# asm 1: pslld $13,<y1=int6464#2
+# asm 2: pslld $13,<y1=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232			r1 >>= 19
+# asm 1: psrld $19,<r1=int6464#3
+# asm 2: psrld $19,<r1=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z1 ^= r1
+# asm 1: pxor  <r1=int6464#3,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm2,<z1=%xmm7
+pxor  %xmm2,%xmm7
+
+# qhasm: 		y0 = z8
+# asm 1: movdqa <z8=int6464#16,>y0=int6464#2
+# asm 2: movdqa <z8=%xmm15,>y0=%xmm1
+movdqa %xmm15,%xmm1
+
+# qhasm: uint32323232	y0 += z12
+# asm 1: paddd <z12=int6464#14,<y0=int6464#2
+# asm 2: paddd <z12=%xmm13,<y0=%xmm1
+paddd %xmm13,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#13
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm12
+pxor  %xmm1,%xmm12
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#13
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#13,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm12,>z0_stack=320(%rsp)
+movdqa %xmm12,320(%rsp)
+
+# qhasm: 				y5 = z13
+# asm 1: movdqa <z13=int6464#10,>y5=int6464#3
+# asm 2: movdqa <z13=%xmm9,>y5=%xmm2
+movdqa %xmm9,%xmm2
+
+# qhasm: uint32323232			y5 += z1
+# asm 1: paddd <z1=int6464#8,<y5=int6464#3
+# asm 2: paddd <z1=%xmm7,<y5=%xmm2
+paddd %xmm7,%xmm2
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#3,>r5=int6464#13
+# asm 2: movdqa <y5=%xmm2,>r5=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#3
+# asm 2: pslld $18,<y5=%xmm2
+pslld $18,%xmm2
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#3,<z5=int6464#1
+# asm 2: pxor  <y5=%xmm2,<z5=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#13
+# asm 2: psrld $14,<r5=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#13,<z5=int6464#1
+# asm 2: pxor  <r5=%xmm12,<z5=%xmm0
+pxor  %xmm12,%xmm0
+
+# qhasm: 						y14 = z6
+# asm 1: movdqa <z6=int6464#6,>y14=int6464#3
+# asm 2: movdqa <z6=%xmm5,>y14=%xmm2
+movdqa %xmm5,%xmm2
+
+# qhasm: uint32323232					y14 += z10
+# asm 1: paddd <z10=int6464#2,<y14=int6464#3
+# asm 2: paddd <z10=%xmm1,<y14=%xmm2
+paddd %xmm1,%xmm2
+
+# qhasm: 						r14 = y14
+# asm 1: movdqa <y14=int6464#3,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm2,>r14=%xmm12
+movdqa %xmm2,%xmm12
+
+# qhasm: uint32323232					y14 <<= 7
+# asm 1: pslld $7,<y14=int6464#3
+# asm 2: pslld $7,<y14=%xmm2
+pslld $7,%xmm2
+
+# qhasm: 						z14 ^= y14
+# asm 1: pxor  <y14=int6464#3,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm2,<z14=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232					r14 >>= 25
+# asm 1: psrld $25,<r14=int6464#13
+# asm 2: psrld $25,<r14=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 						z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm0,>z5_stack=336(%rsp)
+movdqa %xmm0,336(%rsp)
+
+# qhasm: 								y3 = z11
+# asm 1: movdqa <z11=int6464#7,>y3=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y3=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232							y3 += z15
+# asm 1: paddd <z15=int6464#3,<y3=int6464#1
+# asm 2: paddd <z15=%xmm2,<y3=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r3 = y3
+# asm 1: movdqa <y3=int6464#1,>r3=int6464#13
+# asm 2: movdqa <y3=%xmm0,>r3=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y3 <<= 7
+# asm 1: pslld $7,<y3=int6464#1
+# asm 2: pslld $7,<y3=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z3 ^= y3
+# asm 1: pxor  <y3=int6464#1,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm0,<z3=%xmm4
+pxor  %xmm0,%xmm4
+
+# qhasm: uint32323232							r3 >>= 25
+# asm 1: psrld $25,<r3=int6464#13
+# asm 2: psrld $25,<r3=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z3 ^= r3
+# asm 1: pxor  <r3=int6464#13,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm12,<z3=%xmm4
+pxor  %xmm12,%xmm4
+
+# qhasm: 						y2 = z10
+# asm 1: movdqa <z10=int6464#2,>y2=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y2=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y2 += z14
+# asm 1: paddd <z14=int6464#4,<y2=int6464#1
+# asm 2: paddd <z14=%xmm3,<y2=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 						r2 = y2
+# asm 1: movdqa <y2=int6464#1,>r2=int6464#13
+# asm 2: movdqa <y2=%xmm0,>r2=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#1
+# asm 2: pslld $9,<y2=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z2 ^= y2
+# asm 1: pxor  <y2=int6464#1,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm0,<z2=%xmm10
+pxor  %xmm0,%xmm10
+
+# qhasm: uint32323232					r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#13
+# asm 2: psrld $23,<r2=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z2 ^= r2
+# asm 1: pxor  <r2=int6464#13,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm12,<z2=%xmm10
+pxor  %xmm12,%xmm10
+
+# qhasm: 								y7 = z15
+# asm 1: movdqa <z15=int6464#3,>y7=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y7=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y7 += z3
+# asm 1: paddd <z3=int6464#5,<y7=int6464#1
+# asm 2: paddd <z3=%xmm4,<y7=%xmm0
+paddd %xmm4,%xmm0
+
+# qhasm: 								r7 = y7
+# asm 1: movdqa <y7=int6464#1,>r7=int6464#13
+# asm 2: movdqa <y7=%xmm0,>r7=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#1
+# asm 2: pslld $9,<y7=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z7 ^= y7
+# asm 1: pxor  <y7=int6464#1,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm0,<z7=%xmm8
+pxor  %xmm0,%xmm8
+
+# qhasm: uint32323232							r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#13
+# asm 2: psrld $23,<r7=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z7 ^= r7
+# asm 1: pxor  <r7=int6464#13,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm12,<z7=%xmm8
+pxor  %xmm12,%xmm8
+
+# qhasm: 						y6 = z14
+# asm 1: movdqa <z14=int6464#4,>y6=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y6=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232					y6 += z2
+# asm 1: paddd <z2=int6464#11,<y6=int6464#1
+# asm 2: paddd <z2=%xmm10,<y6=%xmm0
+paddd %xmm10,%xmm0
+
+# qhasm: 						r6 = y6
+# asm 1: movdqa <y6=int6464#1,>r6=int6464#13
+# asm 2: movdqa <y6=%xmm0,>r6=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y6 <<= 13
+# asm 1: pslld $13,<y6=int6464#1
+# asm 2: pslld $13,<y6=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z6 ^= y6
+# asm 1: pxor  <y6=int6464#1,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm0,<z6=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232					r6 >>= 19
+# asm 1: psrld $19,<r6=int6464#13
+# asm 2: psrld $19,<r6=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z6 ^= r6
+# asm 1: pxor  <r6=int6464#13,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm12,<z6=%xmm5
+pxor  %xmm12,%xmm5
+
+# qhasm: 								y11 = z3
+# asm 1: movdqa <z3=int6464#5,>y11=int6464#1
+# asm 2: movdqa <z3=%xmm4,>y11=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232							y11 += z7
+# asm 1: paddd <z7=int6464#9,<y11=int6464#1
+# asm 2: paddd <z7=%xmm8,<y11=%xmm0
+paddd %xmm8,%xmm0
+
+# qhasm: 								r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#13
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y11 <<= 13
+# asm 1: pslld $13,<y11=int6464#1
+# asm 2: pslld $13,<y11=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232							r11 >>= 19
+# asm 1: psrld $19,<r11=int6464#13
+# asm 2: psrld $19,<r11=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z11 ^= r11
+# asm 1: pxor  <r11=int6464#13,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm12,<z11=%xmm6
+pxor  %xmm12,%xmm6
+
+# qhasm: 						y10 = z2
+# asm 1: movdqa <z2=int6464#11,>y10=int6464#1
+# asm 2: movdqa <z2=%xmm10,>y10=%xmm0
+movdqa %xmm10,%xmm0
+
+# qhasm: uint32323232					y10 += z6
+# asm 1: paddd <z6=int6464#6,<y10=int6464#1
+# asm 2: paddd <z6=%xmm5,<y10=%xmm0
+paddd %xmm5,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#1
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm0
+movdqa 320(%rsp),%xmm0
+
+# qhasm: 						z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#21
+# asm 2: movdqa <z10=%xmm1,>z10_stack=320(%rsp)
+movdqa %xmm1,320(%rsp)
+
+# qhasm: 		y1 = z3
+# asm 1: movdqa <z3=int6464#5,>y1=int6464#2
+# asm 2: movdqa <z3=%xmm4,>y1=%xmm1
+movdqa %xmm4,%xmm1
+
+# qhasm: uint32323232	y1 += z0
+# asm 1: paddd <z0=int6464#1,<y1=int6464#2
+# asm 2: paddd <z0=%xmm0,<y1=%xmm1
+paddd %xmm0,%xmm1
+
+# qhasm: 		r1 = y1
+# asm 1: movdqa <y1=int6464#2,>r1=int6464#13
+# asm 2: movdqa <y1=%xmm1,>r1=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232	y1 <<= 7
+# asm 1: pslld $7,<y1=int6464#2
+# asm 2: pslld $7,<y1=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 		z1 ^= y1
+# asm 1: pxor  <y1=int6464#2,<z1=int6464#8
+# asm 2: pxor  <y1=%xmm1,<z1=%xmm7
+pxor  %xmm1,%xmm7
+
+# qhasm: uint32323232	r1 >>= 25
+# asm 1: psrld $25,<r1=int6464#13
+# asm 2: psrld $25,<r1=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 		z1 ^= r1
+# asm 1: pxor  <r1=int6464#13,<z1=int6464#8
+# asm 2: pxor  <r1=%xmm12,<z1=%xmm7
+pxor  %xmm12,%xmm7
+
+# qhasm: 								y15 = z7
+# asm 1: movdqa <z7=int6464#9,>y15=int6464#2
+# asm 2: movdqa <z7=%xmm8,>y15=%xmm1
+movdqa %xmm8,%xmm1
+
+# qhasm: uint32323232							y15 += z11
+# asm 1: paddd <z11=int6464#7,<y15=int6464#2
+# asm 2: paddd <z11=%xmm6,<y15=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#2,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm1,>r15=%xmm12
+movdqa %xmm1,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#2
+# asm 2: pslld $18,<y15=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#2,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm1,<z15=%xmm2
+pxor  %xmm1,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#13
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm12
+movdqa 336(%rsp),%xmm12
+
+# qhasm: 								z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#22
+# asm 2: movdqa <z15=%xmm2,>z15_stack=336(%rsp)
+movdqa %xmm2,336(%rsp)
+
+# qhasm: 				y6 = z4
+# asm 1: movdqa <z4=int6464#15,>y6=int6464#2
+# asm 2: movdqa <z4=%xmm14,>y6=%xmm1
+movdqa %xmm14,%xmm1
+
+# qhasm: uint32323232			y6 += z5
+# asm 1: paddd <z5=int6464#13,<y6=int6464#2
+# asm 2: paddd <z5=%xmm12,<y6=%xmm1
+paddd %xmm12,%xmm1
+
+# qhasm: 				r6 = y6
+# asm 1: movdqa <y6=int6464#2,>r6=int6464#3
+# asm 2: movdqa <y6=%xmm1,>r6=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y6 <<= 7
+# asm 1: pslld $7,<y6=int6464#2
+# asm 2: pslld $7,<y6=%xmm1
+pslld $7,%xmm1
+
+# qhasm: 				z6 ^= y6
+# asm 1: pxor  <y6=int6464#2,<z6=int6464#6
+# asm 2: pxor  <y6=%xmm1,<z6=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm: uint32323232			r6 >>= 25
+# asm 1: psrld $25,<r6=int6464#3
+# asm 2: psrld $25,<r6=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 				z6 ^= r6
+# asm 1: pxor  <r6=int6464#3,<z6=int6464#6
+# asm 2: pxor  <r6=%xmm2,<z6=%xmm5
+pxor  %xmm2,%xmm5
+
+# qhasm: 		y2 = z0
+# asm 1: movdqa <z0=int6464#1,>y2=int6464#2
+# asm 2: movdqa <z0=%xmm0,>y2=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232	y2 += z1
+# asm 1: paddd <z1=int6464#8,<y2=int6464#2
+# asm 2: paddd <z1=%xmm7,<y2=%xmm1
+paddd %xmm7,%xmm1
+
+# qhasm: 		r2 = y2
+# asm 1: movdqa <y2=int6464#2,>r2=int6464#3
+# asm 2: movdqa <y2=%xmm1,>r2=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y2 <<= 9
+# asm 1: pslld $9,<y2=int6464#2
+# asm 2: pslld $9,<y2=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 		z2 ^= y2
+# asm 1: pxor  <y2=int6464#2,<z2=int6464#11
+# asm 2: pxor  <y2=%xmm1,<z2=%xmm10
+pxor  %xmm1,%xmm10
+
+# qhasm: uint32323232	r2 >>= 23
+# asm 1: psrld $23,<r2=int6464#3
+# asm 2: psrld $23,<r2=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 		z2 ^= r2
+# asm 1: pxor  <r2=int6464#3,<z2=int6464#11
+# asm 2: pxor  <r2=%xmm2,<z2=%xmm10
+pxor  %xmm2,%xmm10
+
+# qhasm: 				y7 = z5
+# asm 1: movdqa <z5=int6464#13,>y7=int6464#2
+# asm 2: movdqa <z5=%xmm12,>y7=%xmm1
+movdqa %xmm12,%xmm1
+
+# qhasm: uint32323232			y7 += z6
+# asm 1: paddd <z6=int6464#6,<y7=int6464#2
+# asm 2: paddd <z6=%xmm5,<y7=%xmm1
+paddd %xmm5,%xmm1
+
+# qhasm: 				r7 = y7
+# asm 1: movdqa <y7=int6464#2,>r7=int6464#3
+# asm 2: movdqa <y7=%xmm1,>r7=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y7 <<= 9
+# asm 1: pslld $9,<y7=int6464#2
+# asm 2: pslld $9,<y7=%xmm1
+pslld $9,%xmm1
+
+# qhasm: 				z7 ^= y7
+# asm 1: pxor  <y7=int6464#2,<z7=int6464#9
+# asm 2: pxor  <y7=%xmm1,<z7=%xmm8
+pxor  %xmm1,%xmm8
+
+# qhasm: uint32323232			r7 >>= 23
+# asm 1: psrld $23,<r7=int6464#3
+# asm 2: psrld $23,<r7=%xmm2
+psrld $23,%xmm2
+
+# qhasm: 				z7 ^= r7
+# asm 1: pxor  <r7=int6464#3,<z7=int6464#9
+# asm 2: pxor  <r7=%xmm2,<z7=%xmm8
+pxor  %xmm2,%xmm8
+
+# qhasm: 		y3 = z1
+# asm 1: movdqa <z1=int6464#8,>y3=int6464#2
+# asm 2: movdqa <z1=%xmm7,>y3=%xmm1
+movdqa %xmm7,%xmm1
+
+# qhasm: uint32323232	y3 += z2
+# asm 1: paddd <z2=int6464#11,<y3=int6464#2
+# asm 2: paddd <z2=%xmm10,<y3=%xmm1
+paddd %xmm10,%xmm1
+
+# qhasm: 		r3 = y3
+# asm 1: movdqa <y3=int6464#2,>r3=int6464#3
+# asm 2: movdqa <y3=%xmm1,>r3=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y3 <<= 13
+# asm 1: pslld $13,<y3=int6464#2
+# asm 2: pslld $13,<y3=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 		z3 ^= y3
+# asm 1: pxor  <y3=int6464#2,<z3=int6464#5
+# asm 2: pxor  <y3=%xmm1,<z3=%xmm4
+pxor  %xmm1,%xmm4
+
+# qhasm: uint32323232	r3 >>= 19
+# asm 1: psrld $19,<r3=int6464#3
+# asm 2: psrld $19,<r3=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 		z3 ^= r3
+# asm 1: pxor  <r3=int6464#3,<z3=int6464#5
+# asm 2: pxor  <r3=%xmm2,<z3=%xmm4
+pxor  %xmm2,%xmm4
+
+# qhasm: 				y4 = z6
+# asm 1: movdqa <z6=int6464#6,>y4=int6464#2
+# asm 2: movdqa <z6=%xmm5,>y4=%xmm1
+movdqa %xmm5,%xmm1
+
+# qhasm: uint32323232			y4 += z7
+# asm 1: paddd <z7=int6464#9,<y4=int6464#2
+# asm 2: paddd <z7=%xmm8,<y4=%xmm1
+paddd %xmm8,%xmm1
+
+# qhasm: 				r4 = y4
+# asm 1: movdqa <y4=int6464#2,>r4=int6464#3
+# asm 2: movdqa <y4=%xmm1,>r4=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232			y4 <<= 13
+# asm 1: pslld $13,<y4=int6464#2
+# asm 2: pslld $13,<y4=%xmm1
+pslld $13,%xmm1
+
+# qhasm: 				z4 ^= y4
+# asm 1: pxor  <y4=int6464#2,<z4=int6464#15
+# asm 2: pxor  <y4=%xmm1,<z4=%xmm14
+pxor  %xmm1,%xmm14
+
+# qhasm: uint32323232			r4 >>= 19
+# asm 1: psrld $19,<r4=int6464#3
+# asm 2: psrld $19,<r4=%xmm2
+psrld $19,%xmm2
+
+# qhasm: 				z4 ^= r4
+# asm 1: pxor  <r4=int6464#3,<z4=int6464#15
+# asm 2: pxor  <r4=%xmm2,<z4=%xmm14
+pxor  %xmm2,%xmm14
+
+# qhasm: 		y0 = z2
+# asm 1: movdqa <z2=int6464#11,>y0=int6464#2
+# asm 2: movdqa <z2=%xmm10,>y0=%xmm1
+movdqa %xmm10,%xmm1
+
+# qhasm: uint32323232	y0 += z3
+# asm 1: paddd <z3=int6464#5,<y0=int6464#2
+# asm 2: paddd <z3=%xmm4,<y0=%xmm1
+paddd %xmm4,%xmm1
+
+# qhasm: 		r0 = y0
+# asm 1: movdqa <y0=int6464#2,>r0=int6464#3
+# asm 2: movdqa <y0=%xmm1,>r0=%xmm2
+movdqa %xmm1,%xmm2
+
+# qhasm: uint32323232	y0 <<= 18
+# asm 1: pslld $18,<y0=int6464#2
+# asm 2: pslld $18,<y0=%xmm1
+pslld $18,%xmm1
+
+# qhasm: 		z0 ^= y0
+# asm 1: pxor  <y0=int6464#2,<z0=int6464#1
+# asm 2: pxor  <y0=%xmm1,<z0=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232	r0 >>= 14
+# asm 1: psrld $14,<r0=int6464#3
+# asm 2: psrld $14,<r0=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 		z0 ^= r0
+# asm 1: pxor  <r0=int6464#3,<z0=int6464#1
+# asm 2: pxor  <r0=%xmm2,<z0=%xmm0
+pxor  %xmm2,%xmm0
+
+# qhasm: 						z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#21,>z10=int6464#2
+# asm 2: movdqa <z10_stack=320(%rsp),>z10=%xmm1
+movdqa 320(%rsp),%xmm1
+
+# qhasm: 		z0_stack = z0
+# asm 1: movdqa <z0=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <z0=%xmm0,>z0_stack=320(%rsp)
+movdqa %xmm0,320(%rsp)
+
+# qhasm: 				y5 = z7
+# asm 1: movdqa <z7=int6464#9,>y5=int6464#1
+# asm 2: movdqa <z7=%xmm8,>y5=%xmm0
+movdqa %xmm8,%xmm0
+
+# qhasm: uint32323232			y5 += z4
+# asm 1: paddd <z4=int6464#15,<y5=int6464#1
+# asm 2: paddd <z4=%xmm14,<y5=%xmm0
+paddd %xmm14,%xmm0
+
+# qhasm: 				r5 = y5
+# asm 1: movdqa <y5=int6464#1,>r5=int6464#3
+# asm 2: movdqa <y5=%xmm0,>r5=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232			y5 <<= 18
+# asm 1: pslld $18,<y5=int6464#1
+# asm 2: pslld $18,<y5=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 				z5 ^= y5
+# asm 1: pxor  <y5=int6464#1,<z5=int6464#13
+# asm 2: pxor  <y5=%xmm0,<z5=%xmm12
+pxor  %xmm0,%xmm12
+
+# qhasm: uint32323232			r5 >>= 14
+# asm 1: psrld $14,<r5=int6464#3
+# asm 2: psrld $14,<r5=%xmm2
+psrld $14,%xmm2
+
+# qhasm: 				z5 ^= r5
+# asm 1: pxor  <r5=int6464#3,<z5=int6464#13
+# asm 2: pxor  <r5=%xmm2,<z5=%xmm12
+pxor  %xmm2,%xmm12
+
+# qhasm: 						y11 = z9
+# asm 1: movdqa <z9=int6464#12,>y11=int6464#1
+# asm 2: movdqa <z9=%xmm11,>y11=%xmm0
+movdqa %xmm11,%xmm0
+
+# qhasm: uint32323232					y11 += z10
+# asm 1: paddd <z10=int6464#2,<y11=int6464#1
+# asm 2: paddd <z10=%xmm1,<y11=%xmm0
+paddd %xmm1,%xmm0
+
+# qhasm: 						r11 = y11
+# asm 1: movdqa <y11=int6464#1,>r11=int6464#3
+# asm 2: movdqa <y11=%xmm0,>r11=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232					y11 <<= 7
+# asm 1: pslld $7,<y11=int6464#1
+# asm 2: pslld $7,<y11=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 						z11 ^= y11
+# asm 1: pxor  <y11=int6464#1,<z11=int6464#7
+# asm 2: pxor  <y11=%xmm0,<z11=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm: uint32323232					r11 >>= 25
+# asm 1: psrld $25,<r11=int6464#3
+# asm 2: psrld $25,<r11=%xmm2
+psrld $25,%xmm2
+
+# qhasm: 						z11 ^= r11
+# asm 1: pxor  <r11=int6464#3,<z11=int6464#7
+# asm 2: pxor  <r11=%xmm2,<z11=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm: 								z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>z15=int6464#3
+# asm 2: movdqa <z15_stack=336(%rsp),>z15=%xmm2
+movdqa 336(%rsp),%xmm2
+
+# qhasm: 				z5_stack = z5
+# asm 1: movdqa <z5=int6464#13,>z5_stack=stack128#22
+# asm 2: movdqa <z5=%xmm12,>z5_stack=336(%rsp)
+movdqa %xmm12,336(%rsp)
+
+# qhasm: 								y12 = z14
+# asm 1: movdqa <z14=int6464#4,>y12=int6464#1
+# asm 2: movdqa <z14=%xmm3,>y12=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232							y12 += z15
+# asm 1: paddd <z15=int6464#3,<y12=int6464#1
+# asm 2: paddd <z15=%xmm2,<y12=%xmm0
+paddd %xmm2,%xmm0
+
+# qhasm: 								r12 = y12
+# asm 1: movdqa <y12=int6464#1,>r12=int6464#13
+# asm 2: movdqa <y12=%xmm0,>r12=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y12 <<= 7
+# asm 1: pslld $7,<y12=int6464#1
+# asm 2: pslld $7,<y12=%xmm0
+pslld $7,%xmm0
+
+# qhasm: 								z12 ^= y12
+# asm 1: pxor  <y12=int6464#1,<z12=int6464#14
+# asm 2: pxor  <y12=%xmm0,<z12=%xmm13
+pxor  %xmm0,%xmm13
+
+# qhasm: uint32323232							r12 >>= 25
+# asm 1: psrld $25,<r12=int6464#13
+# asm 2: psrld $25,<r12=%xmm12
+psrld $25,%xmm12
+
+# qhasm: 								z12 ^= r12
+# asm 1: pxor  <r12=int6464#13,<z12=int6464#14
+# asm 2: pxor  <r12=%xmm12,<z12=%xmm13
+pxor  %xmm12,%xmm13
+
+# qhasm: 						y8 = z10
+# asm 1: movdqa <z10=int6464#2,>y8=int6464#1
+# asm 2: movdqa <z10=%xmm1,>y8=%xmm0
+movdqa %xmm1,%xmm0
+
+# qhasm: uint32323232					y8 += z11
+# asm 1: paddd <z11=int6464#7,<y8=int6464#1
+# asm 2: paddd <z11=%xmm6,<y8=%xmm0
+paddd %xmm6,%xmm0
+
+# qhasm: 						r8 = y8
+# asm 1: movdqa <y8=int6464#1,>r8=int6464#13
+# asm 2: movdqa <y8=%xmm0,>r8=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y8 <<= 9
+# asm 1: pslld $9,<y8=int6464#1
+# asm 2: pslld $9,<y8=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 						z8 ^= y8
+# asm 1: pxor  <y8=int6464#1,<z8=int6464#16
+# asm 2: pxor  <y8=%xmm0,<z8=%xmm15
+pxor  %xmm0,%xmm15
+
+# qhasm: uint32323232					r8 >>= 23
+# asm 1: psrld $23,<r8=int6464#13
+# asm 2: psrld $23,<r8=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 						z8 ^= r8
+# asm 1: pxor  <r8=int6464#13,<z8=int6464#16
+# asm 2: pxor  <r8=%xmm12,<z8=%xmm15
+pxor  %xmm12,%xmm15
+
+# qhasm: 								y13 = z15
+# asm 1: movdqa <z15=int6464#3,>y13=int6464#1
+# asm 2: movdqa <z15=%xmm2,>y13=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232							y13 += z12
+# asm 1: paddd <z12=int6464#14,<y13=int6464#1
+# asm 2: paddd <z12=%xmm13,<y13=%xmm0
+paddd %xmm13,%xmm0
+
+# qhasm: 								r13 = y13
+# asm 1: movdqa <y13=int6464#1,>r13=int6464#13
+# asm 2: movdqa <y13=%xmm0,>r13=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y13 <<= 9
+# asm 1: pslld $9,<y13=int6464#1
+# asm 2: pslld $9,<y13=%xmm0
+pslld $9,%xmm0
+
+# qhasm: 								z13 ^= y13
+# asm 1: pxor  <y13=int6464#1,<z13=int6464#10
+# asm 2: pxor  <y13=%xmm0,<z13=%xmm9
+pxor  %xmm0,%xmm9
+
+# qhasm: uint32323232							r13 >>= 23
+# asm 1: psrld $23,<r13=int6464#13
+# asm 2: psrld $23,<r13=%xmm12
+psrld $23,%xmm12
+
+# qhasm: 								z13 ^= r13
+# asm 1: pxor  <r13=int6464#13,<z13=int6464#10
+# asm 2: pxor  <r13=%xmm12,<z13=%xmm9
+pxor  %xmm12,%xmm9
+
+# qhasm: 						y9 = z11
+# asm 1: movdqa <z11=int6464#7,>y9=int6464#1
+# asm 2: movdqa <z11=%xmm6,>y9=%xmm0
+movdqa %xmm6,%xmm0
+
+# qhasm: uint32323232					y9 += z8
+# asm 1: paddd <z8=int6464#16,<y9=int6464#1
+# asm 2: paddd <z8=%xmm15,<y9=%xmm0
+paddd %xmm15,%xmm0
+
+# qhasm: 						r9 = y9
+# asm 1: movdqa <y9=int6464#1,>r9=int6464#13
+# asm 2: movdqa <y9=%xmm0,>r9=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y9 <<= 13
+# asm 1: pslld $13,<y9=int6464#1
+# asm 2: pslld $13,<y9=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 						z9 ^= y9
+# asm 1: pxor  <y9=int6464#1,<z9=int6464#12
+# asm 2: pxor  <y9=%xmm0,<z9=%xmm11
+pxor  %xmm0,%xmm11
+
+# qhasm: uint32323232					r9 >>= 19
+# asm 1: psrld $19,<r9=int6464#13
+# asm 2: psrld $19,<r9=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 						z9 ^= r9
+# asm 1: pxor  <r9=int6464#13,<z9=int6464#12
+# asm 2: pxor  <r9=%xmm12,<z9=%xmm11
+pxor  %xmm12,%xmm11
+
+# qhasm: 								y14 = z12
+# asm 1: movdqa <z12=int6464#14,>y14=int6464#1
+# asm 2: movdqa <z12=%xmm13,>y14=%xmm0
+movdqa %xmm13,%xmm0
+
+# qhasm: uint32323232							y14 += z13
+# asm 1: paddd <z13=int6464#10,<y14=int6464#1
+# asm 2: paddd <z13=%xmm9,<y14=%xmm0
+paddd %xmm9,%xmm0
+
+# qhasm: 								r14 = y14
+# asm 1: movdqa <y14=int6464#1,>r14=int6464#13
+# asm 2: movdqa <y14=%xmm0,>r14=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y14 <<= 13
+# asm 1: pslld $13,<y14=int6464#1
+# asm 2: pslld $13,<y14=%xmm0
+pslld $13,%xmm0
+
+# qhasm: 								z14 ^= y14
+# asm 1: pxor  <y14=int6464#1,<z14=int6464#4
+# asm 2: pxor  <y14=%xmm0,<z14=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm: uint32323232							r14 >>= 19
+# asm 1: psrld $19,<r14=int6464#13
+# asm 2: psrld $19,<r14=%xmm12
+psrld $19,%xmm12
+
+# qhasm: 								z14 ^= r14
+# asm 1: pxor  <r14=int6464#13,<z14=int6464#4
+# asm 2: pxor  <r14=%xmm12,<z14=%xmm3
+pxor  %xmm12,%xmm3
+
+# qhasm: 						y10 = z8
+# asm 1: movdqa <z8=int6464#16,>y10=int6464#1
+# asm 2: movdqa <z8=%xmm15,>y10=%xmm0
+movdqa %xmm15,%xmm0
+
+# qhasm: uint32323232					y10 += z9
+# asm 1: paddd <z9=int6464#12,<y10=int6464#1
+# asm 2: paddd <z9=%xmm11,<y10=%xmm0
+paddd %xmm11,%xmm0
+
+# qhasm: 						r10 = y10
+# asm 1: movdqa <y10=int6464#1,>r10=int6464#13
+# asm 2: movdqa <y10=%xmm0,>r10=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232					y10 <<= 18
+# asm 1: pslld $18,<y10=int6464#1
+# asm 2: pslld $18,<y10=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 						z10 ^= y10
+# asm 1: pxor  <y10=int6464#1,<z10=int6464#2
+# asm 2: pxor  <y10=%xmm0,<z10=%xmm1
+pxor  %xmm0,%xmm1
+
+# qhasm: uint32323232					r10 >>= 14
+# asm 1: psrld $14,<r10=int6464#13
+# asm 2: psrld $14,<r10=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 						z10 ^= r10
+# asm 1: pxor  <r10=int6464#13,<z10=int6464#2
+# asm 2: pxor  <r10=%xmm12,<z10=%xmm1
+pxor  %xmm12,%xmm1
+
+# qhasm: 								y15 = z13
+# asm 1: movdqa <z13=int6464#10,>y15=int6464#1
+# asm 2: movdqa <z13=%xmm9,>y15=%xmm0
+movdqa %xmm9,%xmm0
+
+# qhasm: uint32323232							y15 += z14
+# asm 1: paddd <z14=int6464#4,<y15=int6464#1
+# asm 2: paddd <z14=%xmm3,<y15=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm: 								r15 = y15
+# asm 1: movdqa <y15=int6464#1,>r15=int6464#13
+# asm 2: movdqa <y15=%xmm0,>r15=%xmm12
+movdqa %xmm0,%xmm12
+
+# qhasm: uint32323232							y15 <<= 18
+# asm 1: pslld $18,<y15=int6464#1
+# asm 2: pslld $18,<y15=%xmm0
+pslld $18,%xmm0
+
+# qhasm: 								z15 ^= y15
+# asm 1: pxor  <y15=int6464#1,<z15=int6464#3
+# asm 2: pxor  <y15=%xmm0,<z15=%xmm2
+pxor  %xmm0,%xmm2
+
+# qhasm: uint32323232							r15 >>= 14
+# asm 1: psrld $14,<r15=int6464#13
+# asm 2: psrld $14,<r15=%xmm12
+psrld $14,%xmm12
+
+# qhasm: 								z15 ^= r15
+# asm 1: pxor  <r15=int6464#13,<z15=int6464#3
+# asm 2: pxor  <r15=%xmm12,<z15=%xmm2
+pxor  %xmm12,%xmm2
+
+# qhasm: 		z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>z0=int6464#13
+# asm 2: movdqa <z0_stack=320(%rsp),>z0=%xmm12
+movdqa 320(%rsp),%xmm12
+
+# qhasm: 				z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#22,>z5=int6464#1
+# asm 2: movdqa <z5_stack=336(%rsp),>z5=%xmm0
+movdqa 336(%rsp),%xmm0
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int64#3
+# asm 2: sub  $2,<i=%rdx
+sub  $2,%rdx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#13
+# asm 2: paddd <orig0=112(%rsp),<z0=%xmm12
+paddd 112(%rsp),%xmm12
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#8
+# asm 2: paddd <orig1=176(%rsp),<z1=%xmm7
+paddd 176(%rsp),%xmm7
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#11
+# asm 2: paddd <orig2=224(%rsp),<z2=%xmm10
+paddd 224(%rsp),%xmm10
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#5
+# asm 2: paddd <orig3=272(%rsp),<z3=%xmm4
+paddd 272(%rsp),%xmm4
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 0(<m=%rsi),<in0=%edx
+xorl 0(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 4(<m=%rsi),<in1=%ecx
+xorl 4(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 8(<m=%rsi),<in2=%r8d
+xorl 8(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 12(<m=%rsi),<in3=%r9d
+xorl 12(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#3d,0(<out=int64#1)
+# asm 2: movl   <in0=%edx,0(<out=%rdi)
+movl   %edx,0(%rdi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#4d,4(<out=int64#1)
+# asm 2: movl   <in1=%ecx,4(<out=%rdi)
+movl   %ecx,4(%rdi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#5d,8(<out=int64#1)
+# asm 2: movl   <in2=%r8d,8(<out=%rdi)
+movl   %r8d,8(%rdi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#6d,12(<out=int64#1)
+# asm 2: movl   <in3=%r9d,12(<out=%rdi)
+movl   %r9d,12(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 64(<m=%rsi),<in0=%edx
+xorl 64(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 68(<m=%rsi),<in1=%ecx
+xorl 68(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 72(<m=%rsi),<in2=%r8d
+xorl 72(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 76(<m=%rsi),<in3=%r9d
+xorl 76(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl   <in0=int64#3d,64(<out=int64#1)
+# asm 2: movl   <in0=%edx,64(<out=%rdi)
+movl   %edx,64(%rdi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl   <in1=int64#4d,68(<out=int64#1)
+# asm 2: movl   <in1=%ecx,68(<out=%rdi)
+movl   %ecx,68(%rdi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl   <in2=int64#5d,72(<out=int64#1)
+# asm 2: movl   <in2=%r8d,72(<out=%rdi)
+movl   %r8d,72(%rdi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl   <in3=int64#6d,76(<out=int64#1)
+# asm 2: movl   <in3=%r9d,76(<out=%rdi)
+movl   %r9d,76(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#13,<z0=int6464#13
+# asm 2: pshufd $0x39,<z0=%xmm12,<z0=%xmm12
+pshufd $0x39,%xmm12,%xmm12
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#8,<z1=int6464#8
+# asm 2: pshufd $0x39,<z1=%xmm7,<z1=%xmm7
+pshufd $0x39,%xmm7,%xmm7
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#11,<z2=int6464#11
+# asm 2: pshufd $0x39,<z2=%xmm10,<z2=%xmm10
+pshufd $0x39,%xmm10,%xmm10
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#5,<z3=int6464#5
+# asm 2: pshufd $0x39,<z3=%xmm4,<z3=%xmm4
+pshufd $0x39,%xmm4,%xmm4
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 128(<m=%rsi),<in0=%edx
+xorl 128(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 132(<m=%rsi),<in1=%ecx
+xorl 132(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 136(<m=%rsi),<in2=%r8d
+xorl 136(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 140(<m=%rsi),<in3=%r9d
+xorl 140(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl   <in0=int64#3d,128(<out=int64#1)
+# asm 2: movl   <in0=%edx,128(<out=%rdi)
+movl   %edx,128(%rdi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl   <in1=int64#4d,132(<out=int64#1)
+# asm 2: movl   <in1=%ecx,132(<out=%rdi)
+movl   %ecx,132(%rdi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl   <in2=int64#5d,136(<out=int64#1)
+# asm 2: movl   <in2=%r8d,136(<out=%rdi)
+movl   %r8d,136(%rdi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl   <in3=int64#6d,140(<out=int64#1)
+# asm 2: movl   <in3=%r9d,140(<out=%rdi)
+movl   %r9d,140(%rdi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#13,>in0=int64#3
+# asm 2: movd   <z0=%xmm12,>in0=%rdx
+movd   %xmm12,%rdx
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#8,>in1=int64#4
+# asm 2: movd   <z1=%xmm7,>in1=%rcx
+movd   %xmm7,%rcx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#11,>in2=int64#5
+# asm 2: movd   <z2=%xmm10,>in2=%r8
+movd   %xmm10,%r8
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#5,>in3=int64#6
+# asm 2: movd   <z3=%xmm4,>in3=%r9
+movd   %xmm4,%r9
+
+# qhasm:   (uint32) in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int64#2),<in0=int64#3d
+# asm 2: xorl 192(<m=%rsi),<in0=%edx
+xorl 192(%rsi),%edx
+
+# qhasm:   (uint32) in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int64#2),<in1=int64#4d
+# asm 2: xorl 196(<m=%rsi),<in1=%ecx
+xorl 196(%rsi),%ecx
+
+# qhasm:   (uint32) in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int64#2),<in2=int64#5d
+# asm 2: xorl 200(<m=%rsi),<in2=%r8d
+xorl 200(%rsi),%r8d
+
+# qhasm:   (uint32) in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int64#2),<in3=int64#6d
+# asm 2: xorl 204(<m=%rsi),<in3=%r9d
+xorl 204(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl   <in0=int64#3d,192(<out=int64#1)
+# asm 2: movl   <in0=%edx,192(<out=%rdi)
+movl   %edx,192(%rdi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl   <in1=int64#4d,196(<out=int64#1)
+# asm 2: movl   <in1=%ecx,196(<out=%rdi)
+movl   %ecx,196(%rdi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl   <in2=int64#5d,200(<out=int64#1)
+# asm 2: movl   <in2=%r8d,200(<out=%rdi)
+movl   %r8d,200(%rdi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl   <in3=int64#6d,204(<out=int64#1)
+# asm 2: movl   <in3=%r9d,204(<out=%rdi)
+movl   %r9d,204(%rdi)
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#15
+# asm 2: paddd <orig4=240(%rsp),<z4=%xmm14
+paddd 240(%rsp),%xmm14
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#1
+# asm 2: paddd <orig5=64(%rsp),<z5=%xmm0
+paddd 64(%rsp),%xmm0
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#6
+# asm 2: paddd <orig6=128(%rsp),<z6=%xmm5
+paddd 128(%rsp),%xmm5
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#9
+# asm 2: paddd <orig7=192(%rsp),<z7=%xmm8
+paddd 192(%rsp),%xmm8
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 16(<m=%rsi),<in4=%edx
+xorl 16(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#3d,16(<out=int64#1)
+# asm 2: movl   <in4=%edx,16(<out=%rdi)
+movl   %edx,16(%rdi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 80(<m=%rsi),<in4=%edx
+xorl 80(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 84(<m=%rsi),<in5=%ecx
+xorl 84(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 88(<m=%rsi),<in6=%r8d
+xorl 88(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 92(<m=%rsi),<in7=%r9d
+xorl 92(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl   <in4=int64#3d,80(<out=int64#1)
+# asm 2: movl   <in4=%edx,80(<out=%rdi)
+movl   %edx,80(%rdi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl   <in5=int64#4d,84(<out=int64#1)
+# asm 2: movl   <in5=%ecx,84(<out=%rdi)
+movl   %ecx,84(%rdi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl   <in6=int64#5d,88(<out=int64#1)
+# asm 2: movl   <in6=%r8d,88(<out=%rdi)
+movl   %r8d,88(%rdi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl   <in7=int64#6d,92(<out=int64#1)
+# asm 2: movl   <in7=%r9d,92(<out=%rdi)
+movl   %r9d,92(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#15,<z4=int6464#15
+# asm 2: pshufd $0x39,<z4=%xmm14,<z4=%xmm14
+pshufd $0x39,%xmm14,%xmm14
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#1,<z5=int6464#1
+# asm 2: pshufd $0x39,<z5=%xmm0,<z5=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#6,<z6=int6464#6
+# asm 2: pshufd $0x39,<z6=%xmm5,<z6=%xmm5
+pshufd $0x39,%xmm5,%xmm5
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#9,<z7=int6464#9
+# asm 2: pshufd $0x39,<z7=%xmm8,<z7=%xmm8
+pshufd $0x39,%xmm8,%xmm8
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 144(<m=%rsi),<in4=%edx
+xorl 144(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 148(<m=%rsi),<in5=%ecx
+xorl 148(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 152(<m=%rsi),<in6=%r8d
+xorl 152(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 156(<m=%rsi),<in7=%r9d
+xorl 156(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl   <in4=int64#3d,144(<out=int64#1)
+# asm 2: movl   <in4=%edx,144(<out=%rdi)
+movl   %edx,144(%rdi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl   <in5=int64#4d,148(<out=int64#1)
+# asm 2: movl   <in5=%ecx,148(<out=%rdi)
+movl   %ecx,148(%rdi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl   <in6=int64#5d,152(<out=int64#1)
+# asm 2: movl   <in6=%r8d,152(<out=%rdi)
+movl   %r8d,152(%rdi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl   <in7=int64#6d,156(<out=int64#1)
+# asm 2: movl   <in7=%r9d,156(<out=%rdi)
+movl   %r9d,156(%rdi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#15,>in4=int64#3
+# asm 2: movd   <z4=%xmm14,>in4=%rdx
+movd   %xmm14,%rdx
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#1,>in5=int64#4
+# asm 2: movd   <z5=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#6,>in6=int64#5
+# asm 2: movd   <z6=%xmm5,>in6=%r8
+movd   %xmm5,%r8
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#9,>in7=int64#6
+# asm 2: movd   <z7=%xmm8,>in7=%r9
+movd   %xmm8,%r9
+
+# qhasm:   (uint32) in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int64#2),<in4=int64#3d
+# asm 2: xorl 208(<m=%rsi),<in4=%edx
+xorl 208(%rsi),%edx
+
+# qhasm:   (uint32) in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 212(<m=%rsi),<in5=%ecx
+xorl 212(%rsi),%ecx
+
+# qhasm:   (uint32) in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 216(<m=%rsi),<in6=%r8d
+xorl 216(%rsi),%r8d
+
+# qhasm:   (uint32) in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 220(<m=%rsi),<in7=%r9d
+xorl 220(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl   <in4=int64#3d,208(<out=int64#1)
+# asm 2: movl   <in4=%edx,208(<out=%rdi)
+movl   %edx,208(%rdi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl   <in5=int64#4d,212(<out=int64#1)
+# asm 2: movl   <in5=%ecx,212(<out=%rdi)
+movl   %ecx,212(%rdi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl   <in6=int64#5d,216(<out=int64#1)
+# asm 2: movl   <in6=%r8d,216(<out=%rdi)
+movl   %r8d,216(%rdi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl   <in7=int64#6d,220(<out=int64#1)
+# asm 2: movl   <in7=%r9d,220(<out=%rdi)
+movl   %r9d,220(%rdi)
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#16
+# asm 2: paddd <orig8=288(%rsp),<z8=%xmm15
+paddd 288(%rsp),%xmm15
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#12
+# asm 2: paddd <orig9=304(%rsp),<z9=%xmm11
+paddd 304(%rsp),%xmm11
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#2
+# asm 2: paddd <orig10=80(%rsp),<z10=%xmm1
+paddd 80(%rsp),%xmm1
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#7
+# asm 2: paddd <orig11=144(%rsp),<z11=%xmm6
+paddd 144(%rsp),%xmm6
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 32(<m=%rsi),<in8=%edx
+xorl 32(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 36(<m=%rsi),<in9=%ecx
+xorl 36(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 40(<m=%rsi),<in10=%r8d
+xorl 40(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 44(<m=%rsi),<in11=%r9d
+xorl 44(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#3d,32(<out=int64#1)
+# asm 2: movl   <in8=%edx,32(<out=%rdi)
+movl   %edx,32(%rdi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#4d,36(<out=int64#1)
+# asm 2: movl   <in9=%ecx,36(<out=%rdi)
+movl   %ecx,36(%rdi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#5d,40(<out=int64#1)
+# asm 2: movl   <in10=%r8d,40(<out=%rdi)
+movl   %r8d,40(%rdi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#6d,44(<out=int64#1)
+# asm 2: movl   <in11=%r9d,44(<out=%rdi)
+movl   %r9d,44(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 96(<m=%rsi),<in8=%edx
+xorl 96(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 100(<m=%rsi),<in9=%ecx
+xorl 100(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 104(<m=%rsi),<in10=%r8d
+xorl 104(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 108(<m=%rsi),<in11=%r9d
+xorl 108(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl   <in8=int64#3d,96(<out=int64#1)
+# asm 2: movl   <in8=%edx,96(<out=%rdi)
+movl   %edx,96(%rdi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl   <in9=int64#4d,100(<out=int64#1)
+# asm 2: movl   <in9=%ecx,100(<out=%rdi)
+movl   %ecx,100(%rdi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl   <in10=int64#5d,104(<out=int64#1)
+# asm 2: movl   <in10=%r8d,104(<out=%rdi)
+movl   %r8d,104(%rdi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl   <in11=int64#6d,108(<out=int64#1)
+# asm 2: movl   <in11=%r9d,108(<out=%rdi)
+movl   %r9d,108(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#16,<z8=int6464#16
+# asm 2: pshufd $0x39,<z8=%xmm15,<z8=%xmm15
+pshufd $0x39,%xmm15,%xmm15
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#12,<z9=int6464#12
+# asm 2: pshufd $0x39,<z9=%xmm11,<z9=%xmm11
+pshufd $0x39,%xmm11,%xmm11
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#2,<z10=int6464#2
+# asm 2: pshufd $0x39,<z10=%xmm1,<z10=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#7,<z11=int6464#7
+# asm 2: pshufd $0x39,<z11=%xmm6,<z11=%xmm6
+pshufd $0x39,%xmm6,%xmm6
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 160(<m=%rsi),<in8=%edx
+xorl 160(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 164(<m=%rsi),<in9=%ecx
+xorl 164(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 168(<m=%rsi),<in10=%r8d
+xorl 168(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 172(<m=%rsi),<in11=%r9d
+xorl 172(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl   <in8=int64#3d,160(<out=int64#1)
+# asm 2: movl   <in8=%edx,160(<out=%rdi)
+movl   %edx,160(%rdi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl   <in9=int64#4d,164(<out=int64#1)
+# asm 2: movl   <in9=%ecx,164(<out=%rdi)
+movl   %ecx,164(%rdi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl   <in10=int64#5d,168(<out=int64#1)
+# asm 2: movl   <in10=%r8d,168(<out=%rdi)
+movl   %r8d,168(%rdi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl   <in11=int64#6d,172(<out=int64#1)
+# asm 2: movl   <in11=%r9d,172(<out=%rdi)
+movl   %r9d,172(%rdi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#16,>in8=int64#3
+# asm 2: movd   <z8=%xmm15,>in8=%rdx
+movd   %xmm15,%rdx
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#12,>in9=int64#4
+# asm 2: movd   <z9=%xmm11,>in9=%rcx
+movd   %xmm11,%rcx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#2,>in10=int64#5
+# asm 2: movd   <z10=%xmm1,>in10=%r8
+movd   %xmm1,%r8
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#7,>in11=int64#6
+# asm 2: movd   <z11=%xmm6,>in11=%r9
+movd   %xmm6,%r9
+
+# qhasm:   (uint32) in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int64#2),<in8=int64#3d
+# asm 2: xorl 224(<m=%rsi),<in8=%edx
+xorl 224(%rsi),%edx
+
+# qhasm:   (uint32) in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int64#2),<in9=int64#4d
+# asm 2: xorl 228(<m=%rsi),<in9=%ecx
+xorl 228(%rsi),%ecx
+
+# qhasm:   (uint32) in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int64#2),<in10=int64#5d
+# asm 2: xorl 232(<m=%rsi),<in10=%r8d
+xorl 232(%rsi),%r8d
+
+# qhasm:   (uint32) in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int64#2),<in11=int64#6d
+# asm 2: xorl 236(<m=%rsi),<in11=%r9d
+xorl 236(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl   <in8=int64#3d,224(<out=int64#1)
+# asm 2: movl   <in8=%edx,224(<out=%rdi)
+movl   %edx,224(%rdi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl   <in9=int64#4d,228(<out=int64#1)
+# asm 2: movl   <in9=%ecx,228(<out=%rdi)
+movl   %ecx,228(%rdi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl   <in10=int64#5d,232(<out=int64#1)
+# asm 2: movl   <in10=%r8d,232(<out=%rdi)
+movl   %r8d,232(%rdi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl   <in11=int64#6d,236(<out=int64#1)
+# asm 2: movl   <in11=%r9d,236(<out=%rdi)
+movl   %r9d,236(%rdi)
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#14
+# asm 2: paddd <orig12=160(%rsp),<z12=%xmm13
+paddd 160(%rsp),%xmm13
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#10
+# asm 2: paddd <orig13=208(%rsp),<z13=%xmm9
+paddd 208(%rsp),%xmm9
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#4
+# asm 2: paddd <orig14=256(%rsp),<z14=%xmm3
+paddd 256(%rsp),%xmm3
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#3
+# asm 2: paddd <orig15=96(%rsp),<z15=%xmm2
+paddd 96(%rsp),%xmm2
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 48(<m=%rsi),<in12=%edx
+xorl 48(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 52(<m=%rsi),<in13=%ecx
+xorl 52(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 56(<m=%rsi),<in14=%r8d
+xorl 56(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 60(<m=%rsi),<in15=%r9d
+xorl 60(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#3d,48(<out=int64#1)
+# asm 2: movl   <in12=%edx,48(<out=%rdi)
+movl   %edx,48(%rdi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#4d,52(<out=int64#1)
+# asm 2: movl   <in13=%ecx,52(<out=%rdi)
+movl   %ecx,52(%rdi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#5d,56(<out=int64#1)
+# asm 2: movl   <in14=%r8d,56(<out=%rdi)
+movl   %r8d,56(%rdi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#6d,60(<out=int64#1)
+# asm 2: movl   <in15=%r9d,60(<out=%rdi)
+movl   %r9d,60(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 112(<m=%rsi),<in12=%edx
+xorl 112(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 116(<m=%rsi),<in13=%ecx
+xorl 116(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 120(<m=%rsi),<in14=%r8d
+xorl 120(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 124(<m=%rsi),<in15=%r9d
+xorl 124(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl   <in12=int64#3d,112(<out=int64#1)
+# asm 2: movl   <in12=%edx,112(<out=%rdi)
+movl   %edx,112(%rdi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl   <in13=int64#4d,116(<out=int64#1)
+# asm 2: movl   <in13=%ecx,116(<out=%rdi)
+movl   %ecx,116(%rdi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl   <in14=int64#5d,120(<out=int64#1)
+# asm 2: movl   <in14=%r8d,120(<out=%rdi)
+movl   %r8d,120(%rdi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl   <in15=int64#6d,124(<out=int64#1)
+# asm 2: movl   <in15=%r9d,124(<out=%rdi)
+movl   %r9d,124(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#14,<z12=int6464#14
+# asm 2: pshufd $0x39,<z12=%xmm13,<z12=%xmm13
+pshufd $0x39,%xmm13,%xmm13
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#10,<z13=int6464#10
+# asm 2: pshufd $0x39,<z13=%xmm9,<z13=%xmm9
+pshufd $0x39,%xmm9,%xmm9
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#4,<z14=int6464#4
+# asm 2: pshufd $0x39,<z14=%xmm3,<z14=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#3,<z15=int6464#3
+# asm 2: pshufd $0x39,<z15=%xmm2,<z15=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 176(<m=%rsi),<in12=%edx
+xorl 176(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 180(<m=%rsi),<in13=%ecx
+xorl 180(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 184(<m=%rsi),<in14=%r8d
+xorl 184(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 188(<m=%rsi),<in15=%r9d
+xorl 188(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl   <in12=int64#3d,176(<out=int64#1)
+# asm 2: movl   <in12=%edx,176(<out=%rdi)
+movl   %edx,176(%rdi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl   <in13=int64#4d,180(<out=int64#1)
+# asm 2: movl   <in13=%ecx,180(<out=%rdi)
+movl   %ecx,180(%rdi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl   <in14=int64#5d,184(<out=int64#1)
+# asm 2: movl   <in14=%r8d,184(<out=%rdi)
+movl   %r8d,184(%rdi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl   <in15=int64#6d,188(<out=int64#1)
+# asm 2: movl   <in15=%r9d,188(<out=%rdi)
+movl   %r9d,188(%rdi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#14,>in12=int64#3
+# asm 2: movd   <z12=%xmm13,>in12=%rdx
+movd   %xmm13,%rdx
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#10,>in13=int64#4
+# asm 2: movd   <z13=%xmm9,>in13=%rcx
+movd   %xmm9,%rcx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#4,>in14=int64#5
+# asm 2: movd   <z14=%xmm3,>in14=%r8
+movd   %xmm3,%r8
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#3,>in15=int64#6
+# asm 2: movd   <z15=%xmm2,>in15=%r9
+movd   %xmm2,%r9
+
+# qhasm:   (uint32) in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int64#2),<in12=int64#3d
+# asm 2: xorl 240(<m=%rsi),<in12=%edx
+xorl 240(%rsi),%edx
+
+# qhasm:   (uint32) in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int64#2),<in13=int64#4d
+# asm 2: xorl 244(<m=%rsi),<in13=%ecx
+xorl 244(%rsi),%ecx
+
+# qhasm:   (uint32) in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int64#2),<in14=int64#5d
+# asm 2: xorl 248(<m=%rsi),<in14=%r8d
+xorl 248(%rsi),%r8d
+
+# qhasm:   (uint32) in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int64#2),<in15=int64#6d
+# asm 2: xorl 252(<m=%rsi),<in15=%r9d
+xorl 252(%rsi),%r9d
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl   <in12=int64#3d,240(<out=int64#1)
+# asm 2: movl   <in12=%edx,240(<out=%rdi)
+movl   %edx,240(%rdi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl   <in13=int64#4d,244(<out=int64#1)
+# asm 2: movl   <in13=%ecx,244(<out=%rdi)
+movl   %ecx,244(%rdi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl   <in14=int64#5d,248(<out=int64#1)
+# asm 2: movl   <in14=%r8d,248(<out=%rdi)
+movl   %r8d,248(%rdi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl   <in15=int64#6d,252(<out=int64#1)
+# asm 2: movl   <in15=%r9d,252(<out=%rdi)
+movl   %r9d,252(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int64#6
+# asm 2: sub  $256,<bytes=%r9
+sub  $256,%r9
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int64#2
+# asm 2: add  $256,<m=%rsi
+add  $256,%rsi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int64#1
+# asm 2: add  $256,<out=%rdi
+add  $256,%rdi
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int64#6
+# asm 2: cmp  $256,<bytes=%r9
+cmp  $256,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int64#6
+# asm 2: cmp  $0,<bytes=%r9
+cmp  $0,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: mov  <out=int64#1,>ctarget=int64#3
+# asm 2: mov  <out=%rdi,>ctarget=%rdx
+mov  %rdi,%rdx
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leaq <tmp=stack512#1,>out=int64#1
+# asm 2: leaq <tmp=416(%rsp),>out=%rdi
+leaq 416(%rsp),%rdi
+
+# qhasm:     m = &tmp
+# asm 1: leaq <tmp=stack512#1,>m=int64#2
+# asm 2: leaq <tmp=416(%rsp),>m=%rsi
+leaq 416(%rsp),%rsi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_backup = bytes
+# asm 1: movq <bytes=int64#6,>bytes_backup=stack64#8
+# asm 2: movq <bytes=%r9,>bytes_backup=408(%rsp)
+movq %r9,408(%rsp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#4,>diag0=int6464#1
+# asm 2: movdqa <x0=48(%rsp),>diag0=%xmm0
+movdqa 48(%rsp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#1,>diag1=int6464#2
+# asm 2: movdqa <x1=0(%rsp),>diag1=%xmm1
+movdqa 0(%rsp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#2,>diag2=int6464#3
+# asm 2: movdqa <x2=16(%rsp),>diag2=%xmm2
+movdqa 16(%rsp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#3,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%rsp),>diag3=%xmm3
+movdqa 32(%rsp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 8
+# asm 1: mov  $8,>i=int64#4
+# asm 2: mov  $8,>i=%rcx
+mov  $8,%rcx
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int64#4
+# asm 2: sub  $4,<i=%rcx
+sub  $4,%rcx
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#4,<diag0=int6464#1
+# asm 2: paddd <x0=48(%rsp),<diag0=%xmm0
+paddd 48(%rsp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#1,<diag1=int6464#2
+# asm 2: paddd <x1=0(%rsp),<diag1=%xmm1
+paddd 0(%rsp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#2,<diag2=int6464#3
+# asm 2: paddd <x2=16(%rsp),<diag2=%xmm2
+paddd 16(%rsp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#3,<diag3=int6464#4
+# asm 2: paddd <x3=32(%rsp),<diag3=%xmm3
+paddd 32(%rsp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int64#4
+# asm 2: movd   <diag0=%xmm0,>in0=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int64#5
+# asm 2: movd   <diag1=%xmm1,>in12=%r8
+movd   %xmm1,%r8
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int64#6
+# asm 2: movd   <diag2=%xmm2,>in8=%r9
+movd   %xmm2,%r9
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int64#7
+# asm 2: movd   <diag3=%xmm3,>in4=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int64#2),<in0=int64#4d
+# asm 2: xorl 0(<m=%rsi),<in0=%ecx
+xorl 0(%rsi),%ecx
+
+# qhasm: (uint32) in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int64#2),<in12=int64#5d
+# asm 2: xorl 48(<m=%rsi),<in12=%r8d
+xorl 48(%rsi),%r8d
+
+# qhasm: (uint32) in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int64#2),<in8=int64#6d
+# asm 2: xorl 32(<m=%rsi),<in8=%r9d
+xorl 32(%rsi),%r9d
+
+# qhasm: (uint32) in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int64#2),<in4=int64#7d
+# asm 2: xorl 16(<m=%rsi),<in4=%eax
+xorl 16(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl   <in0=int64#4d,0(<out=int64#1)
+# asm 2: movl   <in0=%ecx,0(<out=%rdi)
+movl   %ecx,0(%rdi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl   <in12=int64#5d,48(<out=int64#1)
+# asm 2: movl   <in12=%r8d,48(<out=%rdi)
+movl   %r8d,48(%rdi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl   <in8=int64#6d,32(<out=int64#1)
+# asm 2: movl   <in8=%r9d,32(<out=%rdi)
+movl   %r9d,32(%rdi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl   <in4=int64#7d,16(<out=int64#1)
+# asm 2: movl   <in4=%eax,16(<out=%rdi)
+movl   %eax,16(%rdi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int64#4
+# asm 2: movd   <diag0=%xmm0,>in5=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int64#5
+# asm 2: movd   <diag1=%xmm1,>in1=%r8
+movd   %xmm1,%r8
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int64#6
+# asm 2: movd   <diag2=%xmm2,>in13=%r9
+movd   %xmm2,%r9
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int64#7
+# asm 2: movd   <diag3=%xmm3,>in9=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int64#2),<in5=int64#4d
+# asm 2: xorl 20(<m=%rsi),<in5=%ecx
+xorl 20(%rsi),%ecx
+
+# qhasm: (uint32) in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int64#2),<in1=int64#5d
+# asm 2: xorl 4(<m=%rsi),<in1=%r8d
+xorl 4(%rsi),%r8d
+
+# qhasm: (uint32) in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int64#2),<in13=int64#6d
+# asm 2: xorl 52(<m=%rsi),<in13=%r9d
+xorl 52(%rsi),%r9d
+
+# qhasm: (uint32) in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int64#2),<in9=int64#7d
+# asm 2: xorl 36(<m=%rsi),<in9=%eax
+xorl 36(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl   <in5=int64#4d,20(<out=int64#1)
+# asm 2: movl   <in5=%ecx,20(<out=%rdi)
+movl   %ecx,20(%rdi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl   <in1=int64#5d,4(<out=int64#1)
+# asm 2: movl   <in1=%r8d,4(<out=%rdi)
+movl   %r8d,4(%rdi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl   <in13=int64#6d,52(<out=int64#1)
+# asm 2: movl   <in13=%r9d,52(<out=%rdi)
+movl   %r9d,52(%rdi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl   <in9=int64#7d,36(<out=int64#1)
+# asm 2: movl   <in9=%eax,36(<out=%rdi)
+movl   %eax,36(%rdi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int64#4
+# asm 2: movd   <diag0=%xmm0,>in10=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int64#5
+# asm 2: movd   <diag1=%xmm1,>in6=%r8
+movd   %xmm1,%r8
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int64#6
+# asm 2: movd   <diag2=%xmm2,>in2=%r9
+movd   %xmm2,%r9
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int64#7
+# asm 2: movd   <diag3=%xmm3,>in14=%rax
+movd   %xmm3,%rax
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: (uint32) in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int64#2),<in10=int64#4d
+# asm 2: xorl 40(<m=%rsi),<in10=%ecx
+xorl 40(%rsi),%ecx
+
+# qhasm: (uint32) in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int64#2),<in6=int64#5d
+# asm 2: xorl 24(<m=%rsi),<in6=%r8d
+xorl 24(%rsi),%r8d
+
+# qhasm: (uint32) in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int64#2),<in2=int64#6d
+# asm 2: xorl 8(<m=%rsi),<in2=%r9d
+xorl 8(%rsi),%r9d
+
+# qhasm: (uint32) in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int64#2),<in14=int64#7d
+# asm 2: xorl 56(<m=%rsi),<in14=%eax
+xorl 56(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl   <in10=int64#4d,40(<out=int64#1)
+# asm 2: movl   <in10=%ecx,40(<out=%rdi)
+movl   %ecx,40(%rdi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl   <in6=int64#5d,24(<out=int64#1)
+# asm 2: movl   <in6=%r8d,24(<out=%rdi)
+movl   %r8d,24(%rdi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl   <in2=int64#6d,8(<out=int64#1)
+# asm 2: movl   <in2=%r9d,8(<out=%rdi)
+movl   %r9d,8(%rdi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl   <in14=int64#7d,56(<out=int64#1)
+# asm 2: movl   <in14=%eax,56(<out=%rdi)
+movl   %eax,56(%rdi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int64#4
+# asm 2: movd   <diag0=%xmm0,>in15=%rcx
+movd   %xmm0,%rcx
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int64#5
+# asm 2: movd   <diag1=%xmm1,>in11=%r8
+movd   %xmm1,%r8
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int64#6
+# asm 2: movd   <diag2=%xmm2,>in7=%r9
+movd   %xmm2,%r9
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int64#7
+# asm 2: movd   <diag3=%xmm3,>in3=%rax
+movd   %xmm3,%rax
+
+# qhasm: (uint32) in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int64#2),<in15=int64#4d
+# asm 2: xorl 60(<m=%rsi),<in15=%ecx
+xorl 60(%rsi),%ecx
+
+# qhasm: (uint32) in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int64#2),<in11=int64#5d
+# asm 2: xorl 44(<m=%rsi),<in11=%r8d
+xorl 44(%rsi),%r8d
+
+# qhasm: (uint32) in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int64#2),<in7=int64#6d
+# asm 2: xorl 28(<m=%rsi),<in7=%r9d
+xorl 28(%rsi),%r9d
+
+# qhasm: (uint32) in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int64#2),<in3=int64#7d
+# asm 2: xorl 12(<m=%rsi),<in3=%eax
+xorl 12(%rsi),%eax
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl   <in15=int64#4d,60(<out=int64#1)
+# asm 2: movl   <in15=%ecx,60(<out=%rdi)
+movl   %ecx,60(%rdi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl   <in11=int64#5d,44(<out=int64#1)
+# asm 2: movl   <in11=%r8d,44(<out=%rdi)
+movl   %r8d,44(%rdi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl   <in7=int64#6d,28(<out=int64#1)
+# asm 2: movl   <in7=%r9d,28(<out=%rdi)
+movl   %r9d,28(%rdi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl   <in3=int64#7d,12(<out=int64#1)
+# asm 2: movl   <in3=%eax,12(<out=%rdi)
+movl   %eax,12(%rdi)
+
+# qhasm:   bytes = bytes_backup
+# asm 1: movq <bytes_backup=stack64#8,>bytes=int64#6
+# asm 2: movq <bytes_backup=408(%rsp),>bytes=%r9
+movq 408(%rsp),%r9
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#2,>in8=int64#4d
+# asm 2: movl <x2=16(%rsp),>in8=%ecx
+movl 16(%rsp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#3,>in9=int64#5d
+# asm 2: movl 4+<x3=32(%rsp),>in9=%r8d
+movl 4+32(%rsp),%r8d
+
+# qhasm:   in8 += 1
+# asm 1: add  $1,<in8=int64#4
+# asm 2: add  $1,<in8=%rcx
+add  $1,%rcx
+
+# qhasm:   in9 <<= 32
+# asm 1: shl  $32,<in9=int64#5
+# asm 2: shl  $32,<in9=%r8
+shl  $32,%r8
+
+# qhasm:   in8 += in9
+# asm 1: add  <in9=int64#5,<in8=int64#4
+# asm 2: add  <in9=%r8,<in8=%rcx
+add  %r8,%rcx
+
+# qhasm:   in9 = in8
+# asm 1: mov  <in8=int64#4,>in9=int64#5
+# asm 2: mov  <in8=%rcx,>in9=%r8
+mov  %rcx,%r8
+
+# qhasm:   (uint64) in9 >>= 32
+# asm 1: shr  $32,<in9=int64#5
+# asm 2: shr  $32,<in9=%r8
+shr  $32,%r8
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int64#4d,>x2=stack128#2
+# asm 2: movl <in8=%ecx,>x2=16(%rsp)
+movl %ecx,16(%rsp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int64#5d,4+<x3=stack128#3
+# asm 2: movl <in9=%r8d,4+<x3=32(%rsp)
+movl %r8d,4+32(%rsp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int64#6
+# asm 2: cmp  $64,<bytes=%r9
+cmp  $64,%r9
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int64#1,>m=int64#2
+# asm 2: mov  <out=%rdi,>m=%rsi
+mov  %rdi,%rsi
+
+# qhasm:       out = ctarget
+# asm 1: mov  <ctarget=int64#3,>out=int64#1
+# asm 2: mov  <ctarget=%rdx,>out=%rdi
+mov  %rdx,%rdi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int64#6,>i=int64#4
+# asm 2: mov  <bytes=%r9,>i=%rcx
+mov  %r9,%rcx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     r11_caller = r11_stack
+# asm 1: movq <r11_stack=stack64#1,>r11_caller=int64#9
+# asm 2: movq <r11_stack=352(%rsp),>r11_caller=%r11
+movq 352(%rsp),%r11
+
+# qhasm:     r12_caller = r12_stack
+# asm 1: movq <r12_stack=stack64#2,>r12_caller=int64#10
+# asm 2: movq <r12_stack=360(%rsp),>r12_caller=%r12
+movq 360(%rsp),%r12
+
+# qhasm:     r13_caller = r13_stack
+# asm 1: movq <r13_stack=stack64#3,>r13_caller=int64#11
+# asm 2: movq <r13_stack=368(%rsp),>r13_caller=%r13
+movq 368(%rsp),%r13
+
+# qhasm:     r14_caller = r14_stack
+# asm 1: movq <r14_stack=stack64#4,>r14_caller=int64#12
+# asm 2: movq <r14_stack=376(%rsp),>r14_caller=%r14
+movq 376(%rsp),%r14
+
+# qhasm:     r15_caller = r15_stack
+# asm 1: movq <r15_stack=stack64#5,>r15_caller=int64#13
+# asm 2: movq <r15_stack=384(%rsp),>r15_caller=%r15
+movq 384(%rsp),%r15
+
+# qhasm:     rbx_caller = rbx_stack
+# asm 1: movq <rbx_stack=stack64#6,>rbx_caller=int64#14
+# asm 2: movq <rbx_stack=392(%rsp),>rbx_caller=%rbx
+movq 392(%rsp),%rbx
+
+# qhasm:     rbp_caller = rbp_stack
+# asm 1: movq <rbp_stack=stack64#7,>rbp_caller=int64#15
+# asm 2: movq <rbp_stack=400(%rsp),>rbp_caller=%rbp
+movq 400(%rsp),%rbp
+
+# qhasm:     leave
+add %r11,%rsp
+xor %rax,%rax
+xor %rdx,%rdx
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int64#6
+# asm 2: sub  $64,<bytes=%r9
+sub  $64,%r9
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int64#1
+# asm 2: add  $64,<out=%rdi
+add  $64,%rdi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int64#2
+# asm 2: add  $64,<m=%rsi
+add  $64,%rsi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/checksum b/nacl/nacl-20110221/crypto_stream/salsa208/checksum
new file mode 100644
index 00000000..c87364e6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/checksum
@@ -0,0 +1 @@
+05f32b0647417aaa446b0b3127318133cf9af32b771869eab267000bf02710cd
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/ref/api.h b/nacl/nacl-20110221/crypto_stream/salsa208/ref/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/ref/implementors b/nacl/nacl-20110221/crypto_stream/salsa208/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/ref/stream.c b/nacl/nacl-20110221/crypto_stream/salsa208/ref/stream.c
new file mode 100644
index 00000000..cdcfbc0e
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/ref/stream.c
@@ -0,0 +1,49 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa208.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!clen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (clen >= 64) {
+    crypto_core_salsa208(c,in,k,sigma);
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    clen -= 64;
+    c += 64;
+  }
+
+  if (clen) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < clen;++i) c[i] = block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/ref/xor.c b/nacl/nacl-20110221/crypto_stream/salsa208/ref/xor.c
new file mode 100644
index 00000000..c017ac42
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/ref/xor.c
@@ -0,0 +1,52 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_salsa208.h"
+#include "crypto_stream.h"
+
+typedef unsigned int uint32;
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char in[16];
+  unsigned char block[64];
+  int i;
+  unsigned int u;
+
+  if (!mlen) return 0;
+
+  for (i = 0;i < 8;++i) in[i] = n[i];
+  for (i = 8;i < 16;++i) in[i] = 0;
+
+  while (mlen >= 64) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < 64;++i) c[i] = m[i] ^ block[i];
+
+    u = 1;
+    for (i = 8;i < 16;++i) {
+      u += (unsigned int) in[i];
+      in[i] = u;
+      u >>= 8;
+    }
+
+    mlen -= 64;
+    c += 64;
+    m += 64;
+  }
+
+  if (mlen) {
+    crypto_core_salsa208(block,in,k,sigma);
+    for (i = 0;i < mlen;++i) c[i] = m[i] ^ block[i];
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/used b/nacl/nacl-20110221/crypto_stream/salsa208/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/api.h b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/api.h
new file mode 100644
index 00000000..c2b18461
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 8
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/implementors b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/stream.s b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/stream.s
new file mode 100644
index 00000000..065253a8
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/salsa208/x86_xmm5/stream.s
@@ -0,0 +1,5078 @@
+
+# qhasm: int32 a
+
+# qhasm: stack32 arg1
+
+# qhasm: stack32 arg2
+
+# qhasm: stack32 arg3
+
+# qhasm: stack32 arg4
+
+# qhasm: stack32 arg5
+
+# qhasm: stack32 arg6
+
+# qhasm: input arg1
+
+# qhasm: input arg2
+
+# qhasm: input arg3
+
+# qhasm: input arg4
+
+# qhasm: input arg5
+
+# qhasm: input arg6
+
+# qhasm: int32 eax
+
+# qhasm: int32 ebx
+
+# qhasm: int32 esi
+
+# qhasm: int32 edi
+
+# qhasm: int32 ebp
+
+# qhasm: caller eax
+
+# qhasm: caller ebx
+
+# qhasm: caller esi
+
+# qhasm: caller edi
+
+# qhasm: caller ebp
+
+# qhasm: int32 k
+
+# qhasm: int32 kbits
+
+# qhasm: int32 iv
+
+# qhasm: int32 i
+
+# qhasm: stack128 x0
+
+# qhasm: stack128 x1
+
+# qhasm: stack128 x2
+
+# qhasm: stack128 x3
+
+# qhasm: int32 m
+
+# qhasm: stack32 out_stack
+
+# qhasm: int32 out
+
+# qhasm: stack32 bytes_stack
+
+# qhasm: int32 bytes
+
+# qhasm: stack32 eax_stack
+
+# qhasm: stack32 ebx_stack
+
+# qhasm: stack32 esi_stack
+
+# qhasm: stack32 edi_stack
+
+# qhasm: stack32 ebp_stack
+
+# qhasm: int6464 diag0
+
+# qhasm: int6464 diag1
+
+# qhasm: int6464 diag2
+
+# qhasm: int6464 diag3
+
+# qhasm: int6464 a0
+
+# qhasm: int6464 a1
+
+# qhasm: int6464 a2
+
+# qhasm: int6464 a3
+
+# qhasm: int6464 a4
+
+# qhasm: int6464 a5
+
+# qhasm: int6464 a6
+
+# qhasm: int6464 a7
+
+# qhasm: int6464 b0
+
+# qhasm: int6464 b1
+
+# qhasm: int6464 b2
+
+# qhasm: int6464 b3
+
+# qhasm: int6464 b4
+
+# qhasm: int6464 b5
+
+# qhasm: int6464 b6
+
+# qhasm: int6464 b7
+
+# qhasm: int6464 z0
+
+# qhasm: int6464 z1
+
+# qhasm: int6464 z2
+
+# qhasm: int6464 z3
+
+# qhasm: int6464 z4
+
+# qhasm: int6464 z5
+
+# qhasm: int6464 z6
+
+# qhasm: int6464 z7
+
+# qhasm: int6464 z8
+
+# qhasm: int6464 z9
+
+# qhasm: int6464 z10
+
+# qhasm: int6464 z11
+
+# qhasm: int6464 z12
+
+# qhasm: int6464 z13
+
+# qhasm: int6464 z14
+
+# qhasm: int6464 z15
+
+# qhasm: stack128 z0_stack
+
+# qhasm: stack128 z1_stack
+
+# qhasm: stack128 z2_stack
+
+# qhasm: stack128 z3_stack
+
+# qhasm: stack128 z4_stack
+
+# qhasm: stack128 z5_stack
+
+# qhasm: stack128 z6_stack
+
+# qhasm: stack128 z7_stack
+
+# qhasm: stack128 z8_stack
+
+# qhasm: stack128 z9_stack
+
+# qhasm: stack128 z10_stack
+
+# qhasm: stack128 z11_stack
+
+# qhasm: stack128 z12_stack
+
+# qhasm: stack128 z13_stack
+
+# qhasm: stack128 z14_stack
+
+# qhasm: stack128 z15_stack
+
+# qhasm: stack128 orig0
+
+# qhasm: stack128 orig1
+
+# qhasm: stack128 orig2
+
+# qhasm: stack128 orig3
+
+# qhasm: stack128 orig4
+
+# qhasm: stack128 orig5
+
+# qhasm: stack128 orig6
+
+# qhasm: stack128 orig7
+
+# qhasm: stack128 orig8
+
+# qhasm: stack128 orig9
+
+# qhasm: stack128 orig10
+
+# qhasm: stack128 orig11
+
+# qhasm: stack128 orig12
+
+# qhasm: stack128 orig13
+
+# qhasm: stack128 orig14
+
+# qhasm: stack128 orig15
+
+# qhasm: int6464 p
+
+# qhasm: int6464 q
+
+# qhasm: int6464 r
+
+# qhasm: int6464 s
+
+# qhasm: int6464 t
+
+# qhasm: int6464 u
+
+# qhasm: int6464 v
+
+# qhasm: int6464 w
+
+# qhasm: int6464 mp
+
+# qhasm: int6464 mq
+
+# qhasm: int6464 mr
+
+# qhasm: int6464 ms
+
+# qhasm: int6464 mt
+
+# qhasm: int6464 mu
+
+# qhasm: int6464 mv
+
+# qhasm: int6464 mw
+
+# qhasm: int32 in0
+
+# qhasm: int32 in1
+
+# qhasm: int32 in2
+
+# qhasm: int32 in3
+
+# qhasm: int32 in4
+
+# qhasm: int32 in5
+
+# qhasm: int32 in6
+
+# qhasm: int32 in7
+
+# qhasm: int32 in8
+
+# qhasm: int32 in9
+
+# qhasm: int32 in10
+
+# qhasm: int32 in11
+
+# qhasm: int32 in12
+
+# qhasm: int32 in13
+
+# qhasm: int32 in14
+
+# qhasm: int32 in15
+
+# qhasm: stack512 tmp
+
+# qhasm: stack32 ctarget
+
+# qhasm: enter crypto_stream_salsa208_x86_xmm5
+.text
+.p2align 5
+.globl _crypto_stream_salsa208_x86_xmm5
+.globl crypto_stream_salsa208_x86_xmm5
+_crypto_stream_salsa208_x86_xmm5:
+crypto_stream_salsa208_x86_xmm5:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: bytes = arg2
+# asm 1: movl <arg2=stack32#-2,>bytes=int32#3
+# asm 2: movl <arg2=8(%esp,%eax),>bytes=%edx
+movl 8(%esp,%eax),%edx
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm: iv = arg4
+# asm 1: movl <arg4=stack32#-4,>iv=int32#4
+# asm 2: movl <arg4=16(%esp,%eax),>iv=%ebx
+movl 16(%esp,%eax),%ebx
+
+# qhasm: k = arg5
+# asm 1: movl <arg5=stack32#-5,>k=int32#7
+# asm 2: movl <arg5=20(%esp,%eax),>k=%ebp
+movl 20(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+
+# qhasm: a = 0
+# asm 1: mov  $0,>a=int32#1
+# asm 2: mov  $0,>a=%eax
+mov  $0,%eax
+
+# qhasm: i = bytes
+# asm 1: mov  <bytes=int32#3,>i=int32#2
+# asm 2: mov  <bytes=%edx,>i=%ecx
+mov  %edx,%ecx
+
+# qhasm: while (i) { *out++ = a; --i }
+rep stosb
+
+# qhasm: out -= bytes
+# asm 1: subl <bytes=int32#3,<out=int32#6
+# asm 2: subl <bytes=%edx,<out=%edi
+subl %edx,%edi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto start
+jmp ._start
+
+# qhasm: enter crypto_stream_salsa208_x86_xmm5_xor
+.text
+.p2align 5
+.globl _crypto_stream_salsa208_x86_xmm5_xor
+.globl crypto_stream_salsa208_x86_xmm5_xor
+_crypto_stream_salsa208_x86_xmm5_xor:
+crypto_stream_salsa208_x86_xmm5_xor:
+mov %esp,%eax
+and $31,%eax
+add $704,%eax
+sub %eax,%esp
+
+# qhasm: eax_stack = eax
+# asm 1: movl <eax=int32#1,>eax_stack=stack32#1
+# asm 2: movl <eax=%eax,>eax_stack=0(%esp)
+movl %eax,0(%esp)
+
+# qhasm: ebx_stack = ebx
+# asm 1: movl <ebx=int32#4,>ebx_stack=stack32#2
+# asm 2: movl <ebx=%ebx,>ebx_stack=4(%esp)
+movl %ebx,4(%esp)
+
+# qhasm: esi_stack = esi
+# asm 1: movl <esi=int32#5,>esi_stack=stack32#3
+# asm 2: movl <esi=%esi,>esi_stack=8(%esp)
+movl %esi,8(%esp)
+
+# qhasm: edi_stack = edi
+# asm 1: movl <edi=int32#6,>edi_stack=stack32#4
+# asm 2: movl <edi=%edi,>edi_stack=12(%esp)
+movl %edi,12(%esp)
+
+# qhasm: ebp_stack = ebp
+# asm 1: movl <ebp=int32#7,>ebp_stack=stack32#5
+# asm 2: movl <ebp=%ebp,>ebp_stack=16(%esp)
+movl %ebp,16(%esp)
+
+# qhasm: out = arg1
+# asm 1: movl <arg1=stack32#-1,>out=int32#6
+# asm 2: movl <arg1=4(%esp,%eax),>out=%edi
+movl 4(%esp,%eax),%edi
+
+# qhasm: m = arg2
+# asm 1: movl <arg2=stack32#-2,>m=int32#5
+# asm 2: movl <arg2=8(%esp,%eax),>m=%esi
+movl 8(%esp,%eax),%esi
+
+# qhasm: bytes = arg3
+# asm 1: movl <arg3=stack32#-3,>bytes=int32#3
+# asm 2: movl <arg3=12(%esp,%eax),>bytes=%edx
+movl 12(%esp,%eax),%edx
+
+# qhasm: iv = arg5
+# asm 1: movl <arg5=stack32#-5,>iv=int32#4
+# asm 2: movl <arg5=20(%esp,%eax),>iv=%ebx
+movl 20(%esp,%eax),%ebx
+
+# qhasm: k = arg6
+# asm 1: movl <arg6=stack32#-6,>k=int32#7
+# asm 2: movl <arg6=24(%esp,%eax),>k=%ebp
+movl 24(%esp,%eax),%ebp
+
+# qhasm:               unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#3
+# asm 2: cmp  $0,<bytes=%edx
+cmp  $0,%edx
+# comment:fp stack unchanged by jump
+
+# qhasm: goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: start:
+._start:
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#3,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%edx,>bytes_stack=24(%esp)
+movl %edx,24(%esp)
+
+# qhasm:   in4 = *(uint32 *) (k + 12)
+# asm 1: movl 12(<k=int32#7),>in4=int32#1
+# asm 2: movl 12(<k=%ebp),>in4=%eax
+movl 12(%ebp),%eax
+
+# qhasm:   in12 = *(uint32 *) (k + 20)
+# asm 1: movl 20(<k=int32#7),>in12=int32#2
+# asm 2: movl 20(<k=%ebp),>in12=%ecx
+movl 20(%ebp),%ecx
+
+# qhasm:   ((uint32 *)&x3)[0] = in4
+# asm 1: movl <in4=int32#1,>x3=stack128#1
+# asm 2: movl <in4=%eax,>x3=32(%esp)
+movl %eax,32(%esp)
+
+# qhasm:   ((uint32 *)&x1)[0] = in12
+# asm 1: movl <in12=int32#2,>x1=stack128#2
+# asm 2: movl <in12=%ecx,>x1=48(%esp)
+movl %ecx,48(%esp)
+
+# qhasm:   in0 = 1634760805
+# asm 1: mov  $1634760805,>in0=int32#1
+# asm 2: mov  $1634760805,>in0=%eax
+mov  $1634760805,%eax
+
+# qhasm:   in8 = 0
+# asm 1: mov  $0,>in8=int32#2
+# asm 2: mov  $0,>in8=%ecx
+mov  $0,%ecx
+
+# qhasm:   ((uint32 *)&x0)[0] = in0
+# asm 1: movl <in0=int32#1,>x0=stack128#3
+# asm 2: movl <in0=%eax,>x0=64(%esp)
+movl %eax,64(%esp)
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   in6 = *(uint32 *) (iv + 0)
+# asm 1: movl 0(<iv=int32#4),>in6=int32#1
+# asm 2: movl 0(<iv=%ebx),>in6=%eax
+movl 0(%ebx),%eax
+
+# qhasm:   in7 = *(uint32 *) (iv + 4)
+# asm 1: movl 4(<iv=int32#4),>in7=int32#2
+# asm 2: movl 4(<iv=%ebx),>in7=%ecx
+movl 4(%ebx),%ecx
+
+# qhasm:   ((uint32 *)&x1)[2] = in6
+# asm 1: movl <in6=int32#1,8+<x1=stack128#2
+# asm 2: movl <in6=%eax,8+<x1=48(%esp)
+movl %eax,8+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[3] = in7
+# asm 1: movl <in7=int32#2,12+<x2=stack128#4
+# asm 2: movl <in7=%ecx,12+<x2=80(%esp)
+movl %ecx,12+80(%esp)
+
+# qhasm:   in9 = 0
+# asm 1: mov  $0,>in9=int32#1
+# asm 2: mov  $0,>in9=%eax
+mov  $0,%eax
+
+# qhasm:   in10 = 2036477234
+# asm 1: mov  $2036477234,>in10=int32#2
+# asm 2: mov  $2036477234,>in10=%ecx
+mov  $2036477234,%ecx
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#1,4+<x3=stack128#1
+# asm 2: movl <in9=%eax,4+<x3=32(%esp)
+movl %eax,4+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[2] = in10
+# asm 1: movl <in10=int32#2,8+<x0=stack128#3
+# asm 2: movl <in10=%ecx,8+<x0=64(%esp)
+movl %ecx,8+64(%esp)
+
+# qhasm:   in1 = *(uint32 *) (k + 0)
+# asm 1: movl 0(<k=int32#7),>in1=int32#1
+# asm 2: movl 0(<k=%ebp),>in1=%eax
+movl 0(%ebp),%eax
+
+# qhasm:   in2 = *(uint32 *) (k + 4)
+# asm 1: movl 4(<k=int32#7),>in2=int32#2
+# asm 2: movl 4(<k=%ebp),>in2=%ecx
+movl 4(%ebp),%ecx
+
+# qhasm:   in3 = *(uint32 *) (k + 8)
+# asm 1: movl 8(<k=int32#7),>in3=int32#3
+# asm 2: movl 8(<k=%ebp),>in3=%edx
+movl 8(%ebp),%edx
+
+# qhasm:   in5 = 857760878
+# asm 1: mov  $857760878,>in5=int32#4
+# asm 2: mov  $857760878,>in5=%ebx
+mov  $857760878,%ebx
+
+# qhasm:   ((uint32 *)&x1)[1] = in1
+# asm 1: movl <in1=int32#1,4+<x1=stack128#2
+# asm 2: movl <in1=%eax,4+<x1=48(%esp)
+movl %eax,4+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[2] = in2
+# asm 1: movl <in2=int32#2,8+<x2=stack128#4
+# asm 2: movl <in2=%ecx,8+<x2=80(%esp)
+movl %ecx,8+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[3] = in3
+# asm 1: movl <in3=int32#3,12+<x3=stack128#1
+# asm 2: movl <in3=%edx,12+<x3=32(%esp)
+movl %edx,12+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[1] = in5
+# asm 1: movl <in5=int32#4,4+<x0=stack128#3
+# asm 2: movl <in5=%ebx,4+<x0=64(%esp)
+movl %ebx,4+64(%esp)
+
+# qhasm:   in11 = *(uint32 *) (k + 16)
+# asm 1: movl 16(<k=int32#7),>in11=int32#1
+# asm 2: movl 16(<k=%ebp),>in11=%eax
+movl 16(%ebp),%eax
+
+# qhasm:   in13 = *(uint32 *) (k + 24)
+# asm 1: movl 24(<k=int32#7),>in13=int32#2
+# asm 2: movl 24(<k=%ebp),>in13=%ecx
+movl 24(%ebp),%ecx
+
+# qhasm:   in14 = *(uint32 *) (k + 28)
+# asm 1: movl 28(<k=int32#7),>in14=int32#3
+# asm 2: movl 28(<k=%ebp),>in14=%edx
+movl 28(%ebp),%edx
+
+# qhasm:   in15 = 1797285236
+# asm 1: mov  $1797285236,>in15=int32#4
+# asm 2: mov  $1797285236,>in15=%ebx
+mov  $1797285236,%ebx
+
+# qhasm:   ((uint32 *)&x1)[3] = in11
+# asm 1: movl <in11=int32#1,12+<x1=stack128#2
+# asm 2: movl <in11=%eax,12+<x1=48(%esp)
+movl %eax,12+48(%esp)
+
+# qhasm:   ((uint32 *)&x2)[1] = in13
+# asm 1: movl <in13=int32#2,4+<x2=stack128#4
+# asm 2: movl <in13=%ecx,4+<x2=80(%esp)
+movl %ecx,4+80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[2] = in14
+# asm 1: movl <in14=int32#3,8+<x3=stack128#1
+# asm 2: movl <in14=%edx,8+<x3=32(%esp)
+movl %edx,8+32(%esp)
+
+# qhasm:   ((uint32 *)&x0)[3] = in15
+# asm 1: movl <in15=int32#4,12+<x0=stack128#3
+# asm 2: movl <in15=%ebx,12+<x0=64(%esp)
+movl %ebx,12+64(%esp)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:                               unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesbetween1and255 if unsigned<
+jb ._bytesbetween1and255
+
+# qhasm:   z0 = x0
+# asm 1: movdqa <x0=stack128#3,>z0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>z0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm:   z5 = z0[1,1,1,1]
+# asm 1: pshufd $0x55,<z0=int6464#1,>z5=int6464#2
+# asm 2: pshufd $0x55,<z0=%xmm0,>z5=%xmm1
+pshufd $0x55,%xmm0,%xmm1
+
+# qhasm:   z10 = z0[2,2,2,2]
+# asm 1: pshufd $0xaa,<z0=int6464#1,>z10=int6464#3
+# asm 2: pshufd $0xaa,<z0=%xmm0,>z10=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z15 = z0[3,3,3,3]
+# asm 1: pshufd $0xff,<z0=int6464#1,>z15=int6464#4
+# asm 2: pshufd $0xff,<z0=%xmm0,>z15=%xmm3
+pshufd $0xff,%xmm0,%xmm3
+
+# qhasm:   z0 = z0[0,0,0,0]
+# asm 1: pshufd $0x00,<z0=int6464#1,>z0=int6464#1
+# asm 2: pshufd $0x00,<z0=%xmm0,>z0=%xmm0
+pshufd $0x00,%xmm0,%xmm0
+
+# qhasm:   orig5 = z5
+# asm 1: movdqa <z5=int6464#2,>orig5=stack128#5
+# asm 2: movdqa <z5=%xmm1,>orig5=96(%esp)
+movdqa %xmm1,96(%esp)
+
+# qhasm:   orig10 = z10
+# asm 1: movdqa <z10=int6464#3,>orig10=stack128#6
+# asm 2: movdqa <z10=%xmm2,>orig10=112(%esp)
+movdqa %xmm2,112(%esp)
+
+# qhasm:   orig15 = z15
+# asm 1: movdqa <z15=int6464#4,>orig15=stack128#7
+# asm 2: movdqa <z15=%xmm3,>orig15=128(%esp)
+movdqa %xmm3,128(%esp)
+
+# qhasm:   orig0 = z0
+# asm 1: movdqa <z0=int6464#1,>orig0=stack128#8
+# asm 2: movdqa <z0=%xmm0,>orig0=144(%esp)
+movdqa %xmm0,144(%esp)
+
+# qhasm:   z1 = x1
+# asm 1: movdqa <x1=stack128#2,>z1=int6464#1
+# asm 2: movdqa <x1=48(%esp),>z1=%xmm0
+movdqa 48(%esp),%xmm0
+
+# qhasm:   z6 = z1[2,2,2,2]
+# asm 1: pshufd $0xaa,<z1=int6464#1,>z6=int6464#2
+# asm 2: pshufd $0xaa,<z1=%xmm0,>z6=%xmm1
+pshufd $0xaa,%xmm0,%xmm1
+
+# qhasm:   z11 = z1[3,3,3,3]
+# asm 1: pshufd $0xff,<z1=int6464#1,>z11=int6464#3
+# asm 2: pshufd $0xff,<z1=%xmm0,>z11=%xmm2
+pshufd $0xff,%xmm0,%xmm2
+
+# qhasm:   z12 = z1[0,0,0,0]
+# asm 1: pshufd $0x00,<z1=int6464#1,>z12=int6464#4
+# asm 2: pshufd $0x00,<z1=%xmm0,>z12=%xmm3
+pshufd $0x00,%xmm0,%xmm3
+
+# qhasm:   z1 = z1[1,1,1,1]
+# asm 1: pshufd $0x55,<z1=int6464#1,>z1=int6464#1
+# asm 2: pshufd $0x55,<z1=%xmm0,>z1=%xmm0
+pshufd $0x55,%xmm0,%xmm0
+
+# qhasm:   orig6 = z6
+# asm 1: movdqa <z6=int6464#2,>orig6=stack128#9
+# asm 2: movdqa <z6=%xmm1,>orig6=160(%esp)
+movdqa %xmm1,160(%esp)
+
+# qhasm:   orig11 = z11
+# asm 1: movdqa <z11=int6464#3,>orig11=stack128#10
+# asm 2: movdqa <z11=%xmm2,>orig11=176(%esp)
+movdqa %xmm2,176(%esp)
+
+# qhasm:   orig12 = z12
+# asm 1: movdqa <z12=int6464#4,>orig12=stack128#11
+# asm 2: movdqa <z12=%xmm3,>orig12=192(%esp)
+movdqa %xmm3,192(%esp)
+
+# qhasm:   orig1 = z1
+# asm 1: movdqa <z1=int6464#1,>orig1=stack128#12
+# asm 2: movdqa <z1=%xmm0,>orig1=208(%esp)
+movdqa %xmm0,208(%esp)
+
+# qhasm:   z2 = x2
+# asm 1: movdqa <x2=stack128#4,>z2=int6464#1
+# asm 2: movdqa <x2=80(%esp),>z2=%xmm0
+movdqa 80(%esp),%xmm0
+
+# qhasm:   z7 = z2[3,3,3,3]
+# asm 1: pshufd $0xff,<z2=int6464#1,>z7=int6464#2
+# asm 2: pshufd $0xff,<z2=%xmm0,>z7=%xmm1
+pshufd $0xff,%xmm0,%xmm1
+
+# qhasm:   z13 = z2[1,1,1,1]
+# asm 1: pshufd $0x55,<z2=int6464#1,>z13=int6464#3
+# asm 2: pshufd $0x55,<z2=%xmm0,>z13=%xmm2
+pshufd $0x55,%xmm0,%xmm2
+
+# qhasm:   z2 = z2[2,2,2,2]
+# asm 1: pshufd $0xaa,<z2=int6464#1,>z2=int6464#1
+# asm 2: pshufd $0xaa,<z2=%xmm0,>z2=%xmm0
+pshufd $0xaa,%xmm0,%xmm0
+
+# qhasm:   orig7 = z7
+# asm 1: movdqa <z7=int6464#2,>orig7=stack128#13
+# asm 2: movdqa <z7=%xmm1,>orig7=224(%esp)
+movdqa %xmm1,224(%esp)
+
+# qhasm:   orig13 = z13
+# asm 1: movdqa <z13=int6464#3,>orig13=stack128#14
+# asm 2: movdqa <z13=%xmm2,>orig13=240(%esp)
+movdqa %xmm2,240(%esp)
+
+# qhasm:   orig2 = z2
+# asm 1: movdqa <z2=int6464#1,>orig2=stack128#15
+# asm 2: movdqa <z2=%xmm0,>orig2=256(%esp)
+movdqa %xmm0,256(%esp)
+
+# qhasm:   z3 = x3
+# asm 1: movdqa <x3=stack128#1,>z3=int6464#1
+# asm 2: movdqa <x3=32(%esp),>z3=%xmm0
+movdqa 32(%esp),%xmm0
+
+# qhasm:   z4 = z3[0,0,0,0]
+# asm 1: pshufd $0x00,<z3=int6464#1,>z4=int6464#2
+# asm 2: pshufd $0x00,<z3=%xmm0,>z4=%xmm1
+pshufd $0x00,%xmm0,%xmm1
+
+# qhasm:   z14 = z3[2,2,2,2]
+# asm 1: pshufd $0xaa,<z3=int6464#1,>z14=int6464#3
+# asm 2: pshufd $0xaa,<z3=%xmm0,>z14=%xmm2
+pshufd $0xaa,%xmm0,%xmm2
+
+# qhasm:   z3 = z3[3,3,3,3]
+# asm 1: pshufd $0xff,<z3=int6464#1,>z3=int6464#1
+# asm 2: pshufd $0xff,<z3=%xmm0,>z3=%xmm0
+pshufd $0xff,%xmm0,%xmm0
+
+# qhasm:   orig4 = z4
+# asm 1: movdqa <z4=int6464#2,>orig4=stack128#16
+# asm 2: movdqa <z4=%xmm1,>orig4=272(%esp)
+movdqa %xmm1,272(%esp)
+
+# qhasm:   orig14 = z14
+# asm 1: movdqa <z14=int6464#3,>orig14=stack128#17
+# asm 2: movdqa <z14=%xmm2,>orig14=288(%esp)
+movdqa %xmm2,288(%esp)
+
+# qhasm:   orig3 = z3
+# asm 1: movdqa <z3=int6464#1,>orig3=stack128#18
+# asm 2: movdqa <z3=%xmm0,>orig3=304(%esp)
+movdqa %xmm0,304(%esp)
+
+# qhasm: bytesatleast256:
+._bytesatleast256:
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   ((uint32 *) &orig8)[0] = in8
+# asm 1: movl <in8=int32#2,>orig8=stack128#19
+# asm 2: movl <in8=%ecx,>orig8=320(%esp)
+movl %ecx,320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[0] = in9
+# asm 1: movl <in9=int32#3,>orig9=stack128#20
+# asm 2: movl <in9=%edx,>orig9=336(%esp)
+movl %edx,336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[1] = in8
+# asm 1: movl <in8=int32#2,4+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,4+<orig8=320(%esp)
+movl %ecx,4+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[1] = in9
+# asm 1: movl <in9=int32#3,4+<orig9=stack128#20
+# asm 2: movl <in9=%edx,4+<orig9=336(%esp)
+movl %edx,4+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[2] = in8
+# asm 1: movl <in8=int32#2,8+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,8+<orig8=320(%esp)
+movl %ecx,8+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[2] = in9
+# asm 1: movl <in9=int32#3,8+<orig9=stack128#20
+# asm 2: movl <in9=%edx,8+<orig9=336(%esp)
+movl %edx,8+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *) &orig8)[3] = in8
+# asm 1: movl <in8=int32#2,12+<orig8=stack128#19
+# asm 2: movl <in8=%ecx,12+<orig8=320(%esp)
+movl %ecx,12+320(%esp)
+
+# qhasm:   ((uint32 *) &orig9)[3] = in9
+# asm 1: movl <in9=int32#3,12+<orig9=stack128#20
+# asm 2: movl <in9=%edx,12+<orig9=336(%esp)
+movl %edx,12+336(%esp)
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: i = 8
+# asm 1: mov  $8,>i=int32#1
+# asm 2: mov  $8,>i=%eax
+mov  $8,%eax
+
+# qhasm:   z5 = orig5
+# asm 1: movdqa <orig5=stack128#5,>z5=int6464#1
+# asm 2: movdqa <orig5=96(%esp),>z5=%xmm0
+movdqa 96(%esp),%xmm0
+
+# qhasm:   z10 = orig10
+# asm 1: movdqa <orig10=stack128#6,>z10=int6464#2
+# asm 2: movdqa <orig10=112(%esp),>z10=%xmm1
+movdqa 112(%esp),%xmm1
+
+# qhasm:   z15 = orig15
+# asm 1: movdqa <orig15=stack128#7,>z15=int6464#3
+# asm 2: movdqa <orig15=128(%esp),>z15=%xmm2
+movdqa 128(%esp),%xmm2
+
+# qhasm:   z14 = orig14
+# asm 1: movdqa <orig14=stack128#17,>z14=int6464#4
+# asm 2: movdqa <orig14=288(%esp),>z14=%xmm3
+movdqa 288(%esp),%xmm3
+
+# qhasm:   z3 = orig3
+# asm 1: movdqa <orig3=stack128#18,>z3=int6464#5
+# asm 2: movdqa <orig3=304(%esp),>z3=%xmm4
+movdqa 304(%esp),%xmm4
+
+# qhasm:   z6 = orig6
+# asm 1: movdqa <orig6=stack128#9,>z6=int6464#6
+# asm 2: movdqa <orig6=160(%esp),>z6=%xmm5
+movdqa 160(%esp),%xmm5
+
+# qhasm:   z11 = orig11
+# asm 1: movdqa <orig11=stack128#10,>z11=int6464#7
+# asm 2: movdqa <orig11=176(%esp),>z11=%xmm6
+movdqa 176(%esp),%xmm6
+
+# qhasm:   z1 = orig1
+# asm 1: movdqa <orig1=stack128#12,>z1=int6464#8
+# asm 2: movdqa <orig1=208(%esp),>z1=%xmm7
+movdqa 208(%esp),%xmm7
+
+# qhasm:   z5_stack = z5
+# asm 1: movdqa <z5=int6464#1,>z5_stack=stack128#21
+# asm 2: movdqa <z5=%xmm0,>z5_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:   z10_stack = z10
+# asm 1: movdqa <z10=int6464#2,>z10_stack=stack128#22
+# asm 2: movdqa <z10=%xmm1,>z10_stack=368(%esp)
+movdqa %xmm1,368(%esp)
+
+# qhasm:   z15_stack = z15
+# asm 1: movdqa <z15=int6464#3,>z15_stack=stack128#23
+# asm 2: movdqa <z15=%xmm2,>z15_stack=384(%esp)
+movdqa %xmm2,384(%esp)
+
+# qhasm:   z14_stack = z14
+# asm 1: movdqa <z14=int6464#4,>z14_stack=stack128#24
+# asm 2: movdqa <z14=%xmm3,>z14_stack=400(%esp)
+movdqa %xmm3,400(%esp)
+
+# qhasm:   z3_stack = z3
+# asm 1: movdqa <z3=int6464#5,>z3_stack=stack128#25
+# asm 2: movdqa <z3=%xmm4,>z3_stack=416(%esp)
+movdqa %xmm4,416(%esp)
+
+# qhasm:   z6_stack = z6
+# asm 1: movdqa <z6=int6464#6,>z6_stack=stack128#26
+# asm 2: movdqa <z6=%xmm5,>z6_stack=432(%esp)
+movdqa %xmm5,432(%esp)
+
+# qhasm:   z11_stack = z11
+# asm 1: movdqa <z11=int6464#7,>z11_stack=stack128#27
+# asm 2: movdqa <z11=%xmm6,>z11_stack=448(%esp)
+movdqa %xmm6,448(%esp)
+
+# qhasm:   z1_stack = z1
+# asm 1: movdqa <z1=int6464#8,>z1_stack=stack128#28
+# asm 2: movdqa <z1=%xmm7,>z1_stack=464(%esp)
+movdqa %xmm7,464(%esp)
+
+# qhasm:   z7 = orig7
+# asm 1: movdqa <orig7=stack128#13,>z7=int6464#5
+# asm 2: movdqa <orig7=224(%esp),>z7=%xmm4
+movdqa 224(%esp),%xmm4
+
+# qhasm:   z13 = orig13
+# asm 1: movdqa <orig13=stack128#14,>z13=int6464#6
+# asm 2: movdqa <orig13=240(%esp),>z13=%xmm5
+movdqa 240(%esp),%xmm5
+
+# qhasm:   z2 = orig2
+# asm 1: movdqa <orig2=stack128#15,>z2=int6464#7
+# asm 2: movdqa <orig2=256(%esp),>z2=%xmm6
+movdqa 256(%esp),%xmm6
+
+# qhasm:   z9 = orig9
+# asm 1: movdqa <orig9=stack128#20,>z9=int6464#8
+# asm 2: movdqa <orig9=336(%esp),>z9=%xmm7
+movdqa 336(%esp),%xmm7
+
+# qhasm:                   p = orig0
+# asm 1: movdqa <orig0=stack128#8,>p=int6464#1
+# asm 2: movdqa <orig0=144(%esp),>p=%xmm0
+movdqa 144(%esp),%xmm0
+
+# qhasm:                   t = orig12
+# asm 1: movdqa <orig12=stack128#11,>t=int6464#3
+# asm 2: movdqa <orig12=192(%esp),>t=%xmm2
+movdqa 192(%esp),%xmm2
+
+# qhasm:                   q = orig4
+# asm 1: movdqa <orig4=stack128#16,>q=int6464#4
+# asm 2: movdqa <orig4=272(%esp),>q=%xmm3
+movdqa 272(%esp),%xmm3
+
+# qhasm:                   r = orig8
+# asm 1: movdqa <orig8=stack128#19,>r=int6464#2
+# asm 2: movdqa <orig8=320(%esp),>r=%xmm1
+movdqa 320(%esp),%xmm1
+
+# qhasm:   z7_stack = z7
+# asm 1: movdqa <z7=int6464#5,>z7_stack=stack128#29
+# asm 2: movdqa <z7=%xmm4,>z7_stack=480(%esp)
+movdqa %xmm4,480(%esp)
+
+# qhasm:   z13_stack = z13
+# asm 1: movdqa <z13=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <z13=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm:   z2_stack = z2
+# asm 1: movdqa <z2=int6464#7,>z2_stack=stack128#31
+# asm 2: movdqa <z2=%xmm6,>z2_stack=512(%esp)
+movdqa %xmm6,512(%esp)
+
+# qhasm:   z9_stack = z9
+# asm 1: movdqa <z9=int6464#8,>z9_stack=stack128#32
+# asm 2: movdqa <z9=%xmm7,>z9_stack=528(%esp)
+movdqa %xmm7,528(%esp)
+
+# qhasm:   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:   z12_stack = t
+# asm 1: movdqa <t=int6464#3,>z12_stack=stack128#34
+# asm 2: movdqa <t=%xmm2,>z12_stack=560(%esp)
+movdqa %xmm2,560(%esp)
+
+# qhasm:   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#35
+# asm 2: movdqa <q=%xmm3,>z4_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#36
+# asm 2: movdqa <r=%xmm1,>z8_stack=592(%esp)
+movdqa %xmm1,592(%esp)
+
+# qhasm: mainloop1:
+._mainloop1:
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z4_stack = q
+# asm 1: movdqa <q=int6464#4,>z4_stack=stack128#33
+# asm 2: movdqa <q=%xmm3,>z4_stack=544(%esp)
+movdqa %xmm3,544(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#34
+# asm 2: movdqa <r=%xmm1,>z8_stack=560(%esp)
+movdqa %xmm1,560(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>mt=int6464#3
+# asm 2: movdqa <z1_stack=464(%esp),>mt=%xmm2
+movdqa 464(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>mp=int6464#5
+# asm 2: movdqa <z5_stack=352(%esp),>mp=%xmm4
+movdqa 352(%esp),%xmm4
+
+# qhasm:                                   mq = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>mq=int6464#4
+# asm 2: movdqa <z9_stack=528(%esp),>mq=%xmm3
+movdqa 528(%esp),%xmm3
+
+# qhasm:                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>mr=int6464#6
+# asm 2: movdqa <z13_stack=496(%esp),>mr=%xmm5
+movdqa 496(%esp),%xmm5
+
+# qhasm:                   z12_stack = s
+# asm 1: movdqa <s=int6464#7,>z12_stack=stack128#30
+# asm 2: movdqa <s=%xmm6,>z12_stack=496(%esp)
+movdqa %xmm6,496(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#21
+# asm 2: movdqa <p=%xmm0,>z0_stack=352(%esp)
+movdqa %xmm0,352(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z9_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z9_stack=stack128#32
+# asm 2: movdqa <mq=%xmm3,>z9_stack=528(%esp)
+movdqa %xmm3,528(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#35
+# asm 2: movdqa <mr=%xmm5,>z13_stack=576(%esp)
+movdqa %xmm5,576(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>t=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>t=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>p=int6464#1
+# asm 2: movdqa <z10_stack=368(%esp),>p=%xmm0
+movdqa 368(%esp),%xmm0
+
+# qhasm:                                                   q = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>q=int6464#4
+# asm 2: movdqa <z14_stack=400(%esp),>q=%xmm3
+movdqa 400(%esp),%xmm3
+
+# qhasm:                                                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>r=int6464#2
+# asm 2: movdqa <z2_stack=512(%esp),>r=%xmm1
+movdqa 512(%esp),%xmm1
+
+# qhasm:                                   z1_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z1_stack=stack128#22
+# asm 2: movdqa <ms=%xmm6,>z1_stack=368(%esp)
+movdqa %xmm6,368(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#24
+# asm 2: movdqa <mp=%xmm4,>z5_stack=400(%esp)
+movdqa %xmm4,400(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z14_stack = q
+# asm 1: movdqa <q=int6464#4,>z14_stack=stack128#36
+# asm 2: movdqa <q=%xmm3,>z14_stack=592(%esp)
+movdqa %xmm3,592(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#26
+# asm 2: movdqa <r=%xmm1,>z2_stack=432(%esp)
+movdqa %xmm1,432(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>mt=int6464#3
+# asm 2: movdqa <z11_stack=448(%esp),>mt=%xmm2
+movdqa 448(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>mp=int6464#5
+# asm 2: movdqa <z15_stack=384(%esp),>mp=%xmm4
+movdqa 384(%esp),%xmm4
+
+# qhasm:                                                                   mq = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>mq=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>mq=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:                                                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                                                   z6_stack = s
+# asm 1: movdqa <s=int6464#7,>z6_stack=stack128#23
+# asm 2: movdqa <s=%xmm6,>z6_stack=384(%esp)
+movdqa %xmm6,384(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#27
+# asm 2: movdqa <p=%xmm0,>z10_stack=448(%esp)
+movdqa %xmm0,448(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z3_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z3_stack=stack128#25
+# asm 2: movdqa <mq=%xmm3,>z3_stack=416(%esp)
+movdqa %xmm3,416(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>t=int6464#3
+# asm 2: movdqa <z3_stack=416(%esp),>t=%xmm2
+movdqa 416(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#21,>p=int6464#1
+# asm 2: movdqa <z0_stack=352(%esp),>p=%xmm0
+movdqa 352(%esp),%xmm0
+
+# qhasm:                   q = z1_stack
+# asm 1: movdqa <z1_stack=stack128#22,>q=int6464#4
+# asm 2: movdqa <z1_stack=368(%esp),>q=%xmm3
+movdqa 368(%esp),%xmm3
+
+# qhasm:                   r = z2_stack
+# asm 1: movdqa <z2_stack=stack128#26,>r=int6464#2
+# asm 2: movdqa <z2_stack=432(%esp),>r=%xmm1
+movdqa 432(%esp),%xmm1
+
+# qhasm:                                                                   z11_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z11_stack=stack128#21
+# asm 2: movdqa <ms=%xmm6,>z11_stack=352(%esp)
+movdqa %xmm6,352(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#22
+# asm 2: movdqa <mp=%xmm4,>z15_stack=368(%esp)
+movdqa %xmm4,368(%esp)
+
+# qhasm:                   assign xmm0 to p
+
+# qhasm:                   assign xmm1 to r
+
+# qhasm:                   assign xmm2 to t
+
+# qhasm:                   assign xmm3 to q
+
+# qhasm:                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                   z1_stack = q
+# asm 1: movdqa <q=int6464#4,>z1_stack=stack128#28
+# asm 2: movdqa <q=%xmm3,>z1_stack=464(%esp)
+movdqa %xmm3,464(%esp)
+
+# qhasm:                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                   z2_stack = r
+# asm 1: movdqa <r=int6464#2,>z2_stack=stack128#31
+# asm 2: movdqa <r=%xmm1,>z2_stack=512(%esp)
+movdqa %xmm1,512(%esp)
+
+# qhasm: uint32323232      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                   mt = z4_stack
+# asm 1: movdqa <z4_stack=stack128#33,>mt=int6464#3
+# asm 2: movdqa <z4_stack=544(%esp),>mt=%xmm2
+movdqa 544(%esp),%xmm2
+
+# qhasm:                                   mp = z5_stack
+# asm 1: movdqa <z5_stack=stack128#24,>mp=int6464#5
+# asm 2: movdqa <z5_stack=400(%esp),>mp=%xmm4
+movdqa 400(%esp),%xmm4
+
+# qhasm:                                   mq = z6_stack
+# asm 1: movdqa <z6_stack=stack128#23,>mq=int6464#4
+# asm 2: movdqa <z6_stack=384(%esp),>mq=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:                                   mr = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>mr=int6464#6
+# asm 2: movdqa <z7_stack=480(%esp),>mr=%xmm5
+movdqa 480(%esp),%xmm5
+
+# qhasm:                   z3_stack = s
+# asm 1: movdqa <s=int6464#7,>z3_stack=stack128#25
+# asm 2: movdqa <s=%xmm6,>z3_stack=416(%esp)
+movdqa %xmm6,416(%esp)
+
+# qhasm: uint32323232      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                   z0_stack = p
+# asm 1: movdqa <p=int6464#1,>z0_stack=stack128#33
+# asm 2: movdqa <p=%xmm0,>z0_stack=544(%esp)
+movdqa %xmm0,544(%esp)
+
+# qhasm:                                   assign xmm2 to mt
+
+# qhasm:                                   assign xmm3 to mq
+
+# qhasm:                                   assign xmm4 to mp
+
+# qhasm:                                   assign xmm5 to mr
+
+# qhasm:                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                   z6_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z6_stack=stack128#26
+# asm 2: movdqa <mq=%xmm3,>z6_stack=432(%esp)
+movdqa %xmm3,432(%esp)
+
+# qhasm:                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                   z7_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z7_stack=stack128#29
+# asm 2: movdqa <mr=%xmm5,>z7_stack=480(%esp)
+movdqa %xmm5,480(%esp)
+
+# qhasm: uint32323232                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                                                   t = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>t=int6464#3
+# asm 2: movdqa <z9_stack=528(%esp),>t=%xmm2
+movdqa 528(%esp),%xmm2
+
+# qhasm:                                                   p = z10_stack
+# asm 1: movdqa <z10_stack=stack128#27,>p=int6464#1
+# asm 2: movdqa <z10_stack=448(%esp),>p=%xmm0
+movdqa 448(%esp),%xmm0
+
+# qhasm:                                                   q = z11_stack
+# asm 1: movdqa <z11_stack=stack128#21,>q=int6464#4
+# asm 2: movdqa <z11_stack=352(%esp),>q=%xmm3
+movdqa 352(%esp),%xmm3
+
+# qhasm:                                                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#34,>r=int6464#2
+# asm 2: movdqa <z8_stack=560(%esp),>r=%xmm1
+movdqa 560(%esp),%xmm1
+
+# qhasm:                                   z4_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z4_stack=stack128#34
+# asm 2: movdqa <ms=%xmm6,>z4_stack=560(%esp)
+movdqa %xmm6,560(%esp)
+
+# qhasm: uint32323232                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                   z5_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z5_stack=stack128#21
+# asm 2: movdqa <mp=%xmm4,>z5_stack=352(%esp)
+movdqa %xmm4,352(%esp)
+
+# qhasm:                                                   assign xmm0 to p
+
+# qhasm:                                                   assign xmm1 to r
+
+# qhasm:                                                   assign xmm2 to t
+
+# qhasm:                                                   assign xmm3 to q
+
+# qhasm:                                                   s = t
+# asm 1: movdqa <t=int6464#3,>s=int6464#7
+# asm 2: movdqa <t=%xmm2,>s=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                      t += p
+# asm 1: paddd <p=int6464#1,<t=int6464#3
+# asm 2: paddd <p=%xmm0,<t=%xmm2
+paddd %xmm0,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 25
+# asm 1: psrld $25,<t=int6464#3
+# asm 2: psrld $25,<t=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                   q ^= t
+# asm 1: pxor  <t=int6464#3,<q=int6464#4
+# asm 2: pxor  <t=%xmm2,<q=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                      u <<= 7
+# asm 1: pslld $7,<u=int6464#5
+# asm 2: pslld $7,<u=%xmm4
+pslld $7,%xmm4
+
+# qhasm:                                                   q ^= u
+# asm 1: pxor  <u=int6464#5,<q=int6464#4
+# asm 2: pxor  <u=%xmm4,<q=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                                   z11_stack = q
+# asm 1: movdqa <q=int6464#4,>z11_stack=stack128#27
+# asm 2: movdqa <q=%xmm3,>z11_stack=448(%esp)
+movdqa %xmm3,448(%esp)
+
+# qhasm:                                                   t = p
+# asm 1: movdqa <p=int6464#1,>t=int6464#3
+# asm 2: movdqa <p=%xmm0,>t=%xmm2
+movdqa %xmm0,%xmm2
+
+# qhasm: uint32323232                                      t += q
+# asm 1: paddd <q=int6464#4,<t=int6464#3
+# asm 2: paddd <q=%xmm3,<t=%xmm2
+paddd %xmm3,%xmm2
+
+# qhasm:                                                   u = t
+# asm 1: movdqa <t=int6464#3,>u=int6464#5
+# asm 2: movdqa <t=%xmm2,>u=%xmm4
+movdqa %xmm2,%xmm4
+
+# qhasm: uint32323232                                      t >>= 23
+# asm 1: psrld $23,<t=int6464#3
+# asm 2: psrld $23,<t=%xmm2
+psrld $23,%xmm2
+
+# qhasm:                                                   r ^= t
+# asm 1: pxor  <t=int6464#3,<r=int6464#2
+# asm 2: pxor  <t=%xmm2,<r=%xmm1
+pxor  %xmm2,%xmm1
+
+# qhasm: uint32323232                                      u <<= 9
+# asm 1: pslld $9,<u=int6464#5
+# asm 2: pslld $9,<u=%xmm4
+pslld $9,%xmm4
+
+# qhasm:                                                   r ^= u
+# asm 1: pxor  <u=int6464#5,<r=int6464#2
+# asm 2: pxor  <u=%xmm4,<r=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                                   z8_stack = r
+# asm 1: movdqa <r=int6464#2,>z8_stack=stack128#37
+# asm 2: movdqa <r=%xmm1,>z8_stack=608(%esp)
+movdqa %xmm1,608(%esp)
+
+# qhasm: uint32323232                                      q += r
+# asm 1: paddd <r=int6464#2,<q=int6464#4
+# asm 2: paddd <r=%xmm1,<q=%xmm3
+paddd %xmm1,%xmm3
+
+# qhasm:                                                   u = q
+# asm 1: movdqa <q=int6464#4,>u=int6464#3
+# asm 2: movdqa <q=%xmm3,>u=%xmm2
+movdqa %xmm3,%xmm2
+
+# qhasm: uint32323232                                      q >>= 19
+# asm 1: psrld $19,<q=int6464#4
+# asm 2: psrld $19,<q=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                   s ^= q
+# asm 1: pxor  <q=int6464#4,<s=int6464#7
+# asm 2: pxor  <q=%xmm3,<s=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                      u <<= 13
+# asm 1: pslld $13,<u=int6464#3
+# asm 2: pslld $13,<u=%xmm2
+pslld $13,%xmm2
+
+# qhasm:                                                   s ^= u
+# asm 1: pxor  <u=int6464#3,<s=int6464#7
+# asm 2: pxor  <u=%xmm2,<s=%xmm6
+pxor  %xmm2,%xmm6
+
+# qhasm:                                                                   mt = z14_stack
+# asm 1: movdqa <z14_stack=stack128#36,>mt=int6464#3
+# asm 2: movdqa <z14_stack=592(%esp),>mt=%xmm2
+movdqa 592(%esp),%xmm2
+
+# qhasm:                                                                   mp = z15_stack
+# asm 1: movdqa <z15_stack=stack128#22,>mp=int6464#5
+# asm 2: movdqa <z15_stack=368(%esp),>mp=%xmm4
+movdqa 368(%esp),%xmm4
+
+# qhasm:                                                                   mq = z12_stack
+# asm 1: movdqa <z12_stack=stack128#30,>mq=int6464#4
+# asm 2: movdqa <z12_stack=496(%esp),>mq=%xmm3
+movdqa 496(%esp),%xmm3
+
+# qhasm:                                                                   mr = z13_stack
+# asm 1: movdqa <z13_stack=stack128#35,>mr=int6464#6
+# asm 2: movdqa <z13_stack=576(%esp),>mr=%xmm5
+movdqa 576(%esp),%xmm5
+
+# qhasm:                                                   z9_stack = s
+# asm 1: movdqa <s=int6464#7,>z9_stack=stack128#32
+# asm 2: movdqa <s=%xmm6,>z9_stack=528(%esp)
+movdqa %xmm6,528(%esp)
+
+# qhasm: uint32323232                                      r += s
+# asm 1: paddd <s=int6464#7,<r=int6464#2
+# asm 2: paddd <s=%xmm6,<r=%xmm1
+paddd %xmm6,%xmm1
+
+# qhasm:                                                   u = r
+# asm 1: movdqa <r=int6464#2,>u=int6464#7
+# asm 2: movdqa <r=%xmm1,>u=%xmm6
+movdqa %xmm1,%xmm6
+
+# qhasm: uint32323232                                      r >>= 14
+# asm 1: psrld $14,<r=int6464#2
+# asm 2: psrld $14,<r=%xmm1
+psrld $14,%xmm1
+
+# qhasm:                                                   p ^= r
+# asm 1: pxor  <r=int6464#2,<p=int6464#1
+# asm 2: pxor  <r=%xmm1,<p=%xmm0
+pxor  %xmm1,%xmm0
+
+# qhasm: uint32323232                                      u <<= 18
+# asm 1: pslld $18,<u=int6464#7
+# asm 2: pslld $18,<u=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                   p ^= u
+# asm 1: pxor  <u=int6464#7,<p=int6464#1
+# asm 2: pxor  <u=%xmm6,<p=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm:                                                   z10_stack = p
+# asm 1: movdqa <p=int6464#1,>z10_stack=stack128#22
+# asm 2: movdqa <p=%xmm0,>z10_stack=368(%esp)
+movdqa %xmm0,368(%esp)
+
+# qhasm:                                                                   assign xmm2 to mt
+
+# qhasm:                                                                   assign xmm3 to mq
+
+# qhasm:                                                                   assign xmm4 to mp
+
+# qhasm:                                                                   assign xmm5 to mr
+
+# qhasm:                                                                   ms = mt
+# asm 1: movdqa <mt=int6464#3,>ms=int6464#7
+# asm 2: movdqa <mt=%xmm2,>ms=%xmm6
+movdqa %xmm2,%xmm6
+
+# qhasm: uint32323232                                                      mt += mp
+# asm 1: paddd <mp=int6464#5,<mt=int6464#3
+# asm 2: paddd <mp=%xmm4,<mt=%xmm2
+paddd %xmm4,%xmm2
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#3,>mu=int6464#1
+# asm 2: movdqa <mt=%xmm2,>mu=%xmm0
+movdqa %xmm2,%xmm0
+
+# qhasm: uint32323232                                                      mt >>= 25
+# asm 1: psrld $25,<mt=int6464#3
+# asm 2: psrld $25,<mt=%xmm2
+psrld $25,%xmm2
+
+# qhasm:                                                                   mq ^= mt
+# asm 1: pxor  <mt=int6464#3,<mq=int6464#4
+# asm 2: pxor  <mt=%xmm2,<mq=%xmm3
+pxor  %xmm2,%xmm3
+
+# qhasm: uint32323232                                                      mu <<= 7
+# asm 1: pslld $7,<mu=int6464#1
+# asm 2: pslld $7,<mu=%xmm0
+pslld $7,%xmm0
+
+# qhasm:                                                                   mq ^= mu
+# asm 1: pxor  <mu=int6464#1,<mq=int6464#4
+# asm 2: pxor  <mu=%xmm0,<mq=%xmm3
+pxor  %xmm0,%xmm3
+
+# qhasm:                                                                   z12_stack = mq
+# asm 1: movdqa <mq=int6464#4,>z12_stack=stack128#35
+# asm 2: movdqa <mq=%xmm3,>z12_stack=576(%esp)
+movdqa %xmm3,576(%esp)
+
+# qhasm:                                                                   mt = mp
+# asm 1: movdqa <mp=int6464#5,>mt=int6464#1
+# asm 2: movdqa <mp=%xmm4,>mt=%xmm0
+movdqa %xmm4,%xmm0
+
+# qhasm: uint32323232                                                      mt += mq
+# asm 1: paddd <mq=int6464#4,<mt=int6464#1
+# asm 2: paddd <mq=%xmm3,<mt=%xmm0
+paddd %xmm3,%xmm0
+
+# qhasm:                                                                   mu = mt
+# asm 1: movdqa <mt=int6464#1,>mu=int6464#2
+# asm 2: movdqa <mt=%xmm0,>mu=%xmm1
+movdqa %xmm0,%xmm1
+
+# qhasm: uint32323232                                                      mt >>= 23
+# asm 1: psrld $23,<mt=int6464#1
+# asm 2: psrld $23,<mt=%xmm0
+psrld $23,%xmm0
+
+# qhasm:                                                                   mr ^= mt
+# asm 1: pxor  <mt=int6464#1,<mr=int6464#6
+# asm 2: pxor  <mt=%xmm0,<mr=%xmm5
+pxor  %xmm0,%xmm5
+
+# qhasm: uint32323232                                                      mu <<= 9
+# asm 1: pslld $9,<mu=int6464#2
+# asm 2: pslld $9,<mu=%xmm1
+pslld $9,%xmm1
+
+# qhasm:                                                                   mr ^= mu
+# asm 1: pxor  <mu=int6464#2,<mr=int6464#6
+# asm 2: pxor  <mu=%xmm1,<mr=%xmm5
+pxor  %xmm1,%xmm5
+
+# qhasm:                                                                   z13_stack = mr
+# asm 1: movdqa <mr=int6464#6,>z13_stack=stack128#30
+# asm 2: movdqa <mr=%xmm5,>z13_stack=496(%esp)
+movdqa %xmm5,496(%esp)
+
+# qhasm: uint32323232                                                      mq += mr
+# asm 1: paddd <mr=int6464#6,<mq=int6464#4
+# asm 2: paddd <mr=%xmm5,<mq=%xmm3
+paddd %xmm5,%xmm3
+
+# qhasm:                                                                   mu = mq
+# asm 1: movdqa <mq=int6464#4,>mu=int6464#1
+# asm 2: movdqa <mq=%xmm3,>mu=%xmm0
+movdqa %xmm3,%xmm0
+
+# qhasm: uint32323232                                                      mq >>= 19
+# asm 1: psrld $19,<mq=int6464#4
+# asm 2: psrld $19,<mq=%xmm3
+psrld $19,%xmm3
+
+# qhasm:                                                                   ms ^= mq
+# asm 1: pxor  <mq=int6464#4,<ms=int6464#7
+# asm 2: pxor  <mq=%xmm3,<ms=%xmm6
+pxor  %xmm3,%xmm6
+
+# qhasm: uint32323232                                                      mu <<= 13
+# asm 1: pslld $13,<mu=int6464#1
+# asm 2: pslld $13,<mu=%xmm0
+pslld $13,%xmm0
+
+# qhasm:                                                                   ms ^= mu
+# asm 1: pxor  <mu=int6464#1,<ms=int6464#7
+# asm 2: pxor  <mu=%xmm0,<ms=%xmm6
+pxor  %xmm0,%xmm6
+
+# qhasm:                   t = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>t=int6464#3
+# asm 2: movdqa <z12_stack=576(%esp),>t=%xmm2
+movdqa 576(%esp),%xmm2
+
+# qhasm:                   p = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>p=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>p=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:                   q = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>q=int6464#4
+# asm 2: movdqa <z4_stack=560(%esp),>q=%xmm3
+movdqa 560(%esp),%xmm3
+
+# qhasm:                   r = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>r=int6464#2
+# asm 2: movdqa <z8_stack=608(%esp),>r=%xmm1
+movdqa 608(%esp),%xmm1
+
+# qhasm:                                                                   z14_stack = ms
+# asm 1: movdqa <ms=int6464#7,>z14_stack=stack128#24
+# asm 2: movdqa <ms=%xmm6,>z14_stack=400(%esp)
+movdqa %xmm6,400(%esp)
+
+# qhasm: uint32323232                                                      mr += ms
+# asm 1: paddd <ms=int6464#7,<mr=int6464#6
+# asm 2: paddd <ms=%xmm6,<mr=%xmm5
+paddd %xmm6,%xmm5
+
+# qhasm:                                                                   mu = mr
+# asm 1: movdqa <mr=int6464#6,>mu=int6464#7
+# asm 2: movdqa <mr=%xmm5,>mu=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                      mr >>= 14
+# asm 1: psrld $14,<mr=int6464#6
+# asm 2: psrld $14,<mr=%xmm5
+psrld $14,%xmm5
+
+# qhasm:                                                                   mp ^= mr
+# asm 1: pxor  <mr=int6464#6,<mp=int6464#5
+# asm 2: pxor  <mr=%xmm5,<mp=%xmm4
+pxor  %xmm5,%xmm4
+
+# qhasm: uint32323232                                                      mu <<= 18
+# asm 1: pslld $18,<mu=int6464#7
+# asm 2: pslld $18,<mu=%xmm6
+pslld $18,%xmm6
+
+# qhasm:                                                                   mp ^= mu
+# asm 1: pxor  <mu=int6464#7,<mp=int6464#5
+# asm 2: pxor  <mu=%xmm6,<mp=%xmm4
+pxor  %xmm6,%xmm4
+
+# qhasm:                                                                   z15_stack = mp
+# asm 1: movdqa <mp=int6464#5,>z15_stack=stack128#23
+# asm 2: movdqa <mp=%xmm4,>z15_stack=384(%esp)
+movdqa %xmm4,384(%esp)
+
+# qhasm:                   unsigned>? i -= 2
+# asm 1: sub  $2,<i=int32#1
+# asm 2: sub  $2,<i=%eax
+sub  $2,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop1 if unsigned>
+ja ._mainloop1
+
+# qhasm:   out = out_stack
+# asm 1: movl <out_stack=stack32#6,>out=int32#6
+# asm 2: movl <out_stack=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:   z0 = z0_stack
+# asm 1: movdqa <z0_stack=stack128#33,>z0=int6464#1
+# asm 2: movdqa <z0_stack=544(%esp),>z0=%xmm0
+movdqa 544(%esp),%xmm0
+
+# qhasm:   z1 = z1_stack
+# asm 1: movdqa <z1_stack=stack128#28,>z1=int6464#2
+# asm 2: movdqa <z1_stack=464(%esp),>z1=%xmm1
+movdqa 464(%esp),%xmm1
+
+# qhasm:   z2 = z2_stack
+# asm 1: movdqa <z2_stack=stack128#31,>z2=int6464#3
+# asm 2: movdqa <z2_stack=512(%esp),>z2=%xmm2
+movdqa 512(%esp),%xmm2
+
+# qhasm:   z3 = z3_stack
+# asm 1: movdqa <z3_stack=stack128#25,>z3=int6464#4
+# asm 2: movdqa <z3_stack=416(%esp),>z3=%xmm3
+movdqa 416(%esp),%xmm3
+
+# qhasm:   uint32323232 z0 += orig0
+# asm 1: paddd <orig0=stack128#8,<z0=int6464#1
+# asm 2: paddd <orig0=144(%esp),<z0=%xmm0
+paddd 144(%esp),%xmm0
+
+# qhasm:   uint32323232 z1 += orig1
+# asm 1: paddd <orig1=stack128#12,<z1=int6464#2
+# asm 2: paddd <orig1=208(%esp),<z1=%xmm1
+paddd 208(%esp),%xmm1
+
+# qhasm:   uint32323232 z2 += orig2
+# asm 1: paddd <orig2=stack128#15,<z2=int6464#3
+# asm 2: paddd <orig2=256(%esp),<z2=%xmm2
+paddd 256(%esp),%xmm2
+
+# qhasm:   uint32323232 z3 += orig3
+# asm 1: paddd <orig3=stack128#18,<z3=int6464#4
+# asm 2: paddd <orig3=304(%esp),<z3=%xmm3
+paddd 304(%esp),%xmm3
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm:   *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm:   *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm:   *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 64)
+# asm 1: xorl 64(<m=int32#5),<in0=int32#1
+# asm 2: xorl 64(<m=%esi),<in0=%eax
+xorl 64(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 68)
+# asm 1: xorl 68(<m=int32#5),<in1=int32#2
+# asm 2: xorl 68(<m=%esi),<in1=%ecx
+xorl 68(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 72)
+# asm 1: xorl 72(<m=int32#5),<in2=int32#3
+# asm 2: xorl 72(<m=%esi),<in2=%edx
+xorl 72(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 76)
+# asm 1: xorl 76(<m=int32#5),<in3=int32#4
+# asm 2: xorl 76(<m=%esi),<in3=%ebx
+xorl 76(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 64) = in0
+# asm 1: movl <in0=int32#1,64(<out=int32#6)
+# asm 2: movl <in0=%eax,64(<out=%edi)
+movl %eax,64(%edi)
+
+# qhasm:   *(uint32 *) (out + 68) = in1
+# asm 1: movl <in1=int32#2,68(<out=int32#6)
+# asm 2: movl <in1=%ecx,68(<out=%edi)
+movl %ecx,68(%edi)
+
+# qhasm:   *(uint32 *) (out + 72) = in2
+# asm 1: movl <in2=int32#3,72(<out=int32#6)
+# asm 2: movl <in2=%edx,72(<out=%edi)
+movl %edx,72(%edi)
+
+# qhasm:   *(uint32 *) (out + 76) = in3
+# asm 1: movl <in3=int32#4,76(<out=int32#6)
+# asm 2: movl <in3=%ebx,76(<out=%edi)
+movl %ebx,76(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z0 <<<= 96
+# asm 1: pshufd $0x39,<z0=int6464#1,<z0=int6464#1
+# asm 2: pshufd $0x39,<z0=%xmm0,<z0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z1 <<<= 96
+# asm 1: pshufd $0x39,<z1=int6464#2,<z1=int6464#2
+# asm 2: pshufd $0x39,<z1=%xmm1,<z1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z2 <<<= 96
+# asm 1: pshufd $0x39,<z2=int6464#3,<z2=int6464#3
+# asm 2: pshufd $0x39,<z2=%xmm2,<z2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z3 <<<= 96
+# asm 1: pshufd $0x39,<z3=int6464#4,<z3=int6464#4
+# asm 2: pshufd $0x39,<z3=%xmm3,<z3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in0 ^= *(uint32 *) (m + 128)
+# asm 1: xorl 128(<m=int32#5),<in0=int32#1
+# asm 2: xorl 128(<m=%esi),<in0=%eax
+xorl 128(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 132)
+# asm 1: xorl 132(<m=int32#5),<in1=int32#2
+# asm 2: xorl 132(<m=%esi),<in1=%ecx
+xorl 132(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 136)
+# asm 1: xorl 136(<m=int32#5),<in2=int32#3
+# asm 2: xorl 136(<m=%esi),<in2=%edx
+xorl 136(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 140)
+# asm 1: xorl 140(<m=int32#5),<in3=int32#4
+# asm 2: xorl 140(<m=%esi),<in3=%ebx
+xorl 140(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 128) = in0
+# asm 1: movl <in0=int32#1,128(<out=int32#6)
+# asm 2: movl <in0=%eax,128(<out=%edi)
+movl %eax,128(%edi)
+
+# qhasm:   *(uint32 *) (out + 132) = in1
+# asm 1: movl <in1=int32#2,132(<out=int32#6)
+# asm 2: movl <in1=%ecx,132(<out=%edi)
+movl %ecx,132(%edi)
+
+# qhasm:   *(uint32 *) (out + 136) = in2
+# asm 1: movl <in2=int32#3,136(<out=int32#6)
+# asm 2: movl <in2=%edx,136(<out=%edi)
+movl %edx,136(%edi)
+
+# qhasm:   *(uint32 *) (out + 140) = in3
+# asm 1: movl <in3=int32#4,140(<out=int32#6)
+# asm 2: movl <in3=%ebx,140(<out=%edi)
+movl %ebx,140(%edi)
+
+# qhasm:   in0 = z0
+# asm 1: movd   <z0=int6464#1,>in0=int32#1
+# asm 2: movd   <z0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in1 = z1
+# asm 1: movd   <z1=int6464#2,>in1=int32#2
+# asm 2: movd   <z1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in2 = z2
+# asm 1: movd   <z2=int6464#3,>in2=int32#3
+# asm 2: movd   <z2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in3 = z3
+# asm 1: movd   <z3=int6464#4,>in3=int32#4
+# asm 2: movd   <z3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in0 ^= *(uint32 *) (m + 192)
+# asm 1: xorl 192(<m=int32#5),<in0=int32#1
+# asm 2: xorl 192(<m=%esi),<in0=%eax
+xorl 192(%esi),%eax
+
+# qhasm:   in1 ^= *(uint32 *) (m + 196)
+# asm 1: xorl 196(<m=int32#5),<in1=int32#2
+# asm 2: xorl 196(<m=%esi),<in1=%ecx
+xorl 196(%esi),%ecx
+
+# qhasm:   in2 ^= *(uint32 *) (m + 200)
+# asm 1: xorl 200(<m=int32#5),<in2=int32#3
+# asm 2: xorl 200(<m=%esi),<in2=%edx
+xorl 200(%esi),%edx
+
+# qhasm:   in3 ^= *(uint32 *) (m + 204)
+# asm 1: xorl 204(<m=int32#5),<in3=int32#4
+# asm 2: xorl 204(<m=%esi),<in3=%ebx
+xorl 204(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 192) = in0
+# asm 1: movl <in0=int32#1,192(<out=int32#6)
+# asm 2: movl <in0=%eax,192(<out=%edi)
+movl %eax,192(%edi)
+
+# qhasm:   *(uint32 *) (out + 196) = in1
+# asm 1: movl <in1=int32#2,196(<out=int32#6)
+# asm 2: movl <in1=%ecx,196(<out=%edi)
+movl %ecx,196(%edi)
+
+# qhasm:   *(uint32 *) (out + 200) = in2
+# asm 1: movl <in2=int32#3,200(<out=int32#6)
+# asm 2: movl <in2=%edx,200(<out=%edi)
+movl %edx,200(%edi)
+
+# qhasm:   *(uint32 *) (out + 204) = in3
+# asm 1: movl <in3=int32#4,204(<out=int32#6)
+# asm 2: movl <in3=%ebx,204(<out=%edi)
+movl %ebx,204(%edi)
+
+# qhasm:   z4 = z4_stack
+# asm 1: movdqa <z4_stack=stack128#34,>z4=int6464#1
+# asm 2: movdqa <z4_stack=560(%esp),>z4=%xmm0
+movdqa 560(%esp),%xmm0
+
+# qhasm:   z5 = z5_stack
+# asm 1: movdqa <z5_stack=stack128#21,>z5=int6464#2
+# asm 2: movdqa <z5_stack=352(%esp),>z5=%xmm1
+movdqa 352(%esp),%xmm1
+
+# qhasm:   z6 = z6_stack
+# asm 1: movdqa <z6_stack=stack128#26,>z6=int6464#3
+# asm 2: movdqa <z6_stack=432(%esp),>z6=%xmm2
+movdqa 432(%esp),%xmm2
+
+# qhasm:   z7 = z7_stack
+# asm 1: movdqa <z7_stack=stack128#29,>z7=int6464#4
+# asm 2: movdqa <z7_stack=480(%esp),>z7=%xmm3
+movdqa 480(%esp),%xmm3
+
+# qhasm:   uint32323232 z4 += orig4
+# asm 1: paddd <orig4=stack128#16,<z4=int6464#1
+# asm 2: paddd <orig4=272(%esp),<z4=%xmm0
+paddd 272(%esp),%xmm0
+
+# qhasm:   uint32323232 z5 += orig5
+# asm 1: paddd <orig5=stack128#5,<z5=int6464#2
+# asm 2: paddd <orig5=96(%esp),<z5=%xmm1
+paddd 96(%esp),%xmm1
+
+# qhasm:   uint32323232 z6 += orig6
+# asm 1: paddd <orig6=stack128#9,<z6=int6464#3
+# asm 2: paddd <orig6=160(%esp),<z6=%xmm2
+paddd 160(%esp),%xmm2
+
+# qhasm:   uint32323232 z7 += orig7
+# asm 1: paddd <orig7=stack128#13,<z7=int6464#4
+# asm 2: paddd <orig7=224(%esp),<z7=%xmm3
+paddd 224(%esp),%xmm3
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#1
+# asm 2: xorl 16(<m=%esi),<in4=%eax
+xorl 16(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#2
+# asm 2: xorl 20(<m=%esi),<in5=%ecx
+xorl 20(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#3
+# asm 2: xorl 24(<m=%esi),<in6=%edx
+xorl 24(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#4
+# asm 2: xorl 28(<m=%esi),<in7=%ebx
+xorl 28(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#1,16(<out=int32#6)
+# asm 2: movl <in4=%eax,16(<out=%edi)
+movl %eax,16(%edi)
+
+# qhasm:   *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#2,20(<out=int32#6)
+# asm 2: movl <in5=%ecx,20(<out=%edi)
+movl %ecx,20(%edi)
+
+# qhasm:   *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#3,24(<out=int32#6)
+# asm 2: movl <in6=%edx,24(<out=%edi)
+movl %edx,24(%edi)
+
+# qhasm:   *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#4,28(<out=int32#6)
+# asm 2: movl <in7=%ebx,28(<out=%edi)
+movl %ebx,28(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 80)
+# asm 1: xorl 80(<m=int32#5),<in4=int32#1
+# asm 2: xorl 80(<m=%esi),<in4=%eax
+xorl 80(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 84)
+# asm 1: xorl 84(<m=int32#5),<in5=int32#2
+# asm 2: xorl 84(<m=%esi),<in5=%ecx
+xorl 84(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 88)
+# asm 1: xorl 88(<m=int32#5),<in6=int32#3
+# asm 2: xorl 88(<m=%esi),<in6=%edx
+xorl 88(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 92)
+# asm 1: xorl 92(<m=int32#5),<in7=int32#4
+# asm 2: xorl 92(<m=%esi),<in7=%ebx
+xorl 92(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 80) = in4
+# asm 1: movl <in4=int32#1,80(<out=int32#6)
+# asm 2: movl <in4=%eax,80(<out=%edi)
+movl %eax,80(%edi)
+
+# qhasm:   *(uint32 *) (out + 84) = in5
+# asm 1: movl <in5=int32#2,84(<out=int32#6)
+# asm 2: movl <in5=%ecx,84(<out=%edi)
+movl %ecx,84(%edi)
+
+# qhasm:   *(uint32 *) (out + 88) = in6
+# asm 1: movl <in6=int32#3,88(<out=int32#6)
+# asm 2: movl <in6=%edx,88(<out=%edi)
+movl %edx,88(%edi)
+
+# qhasm:   *(uint32 *) (out + 92) = in7
+# asm 1: movl <in7=int32#4,92(<out=int32#6)
+# asm 2: movl <in7=%ebx,92(<out=%edi)
+movl %ebx,92(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z4 <<<= 96
+# asm 1: pshufd $0x39,<z4=int6464#1,<z4=int6464#1
+# asm 2: pshufd $0x39,<z4=%xmm0,<z4=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z5 <<<= 96
+# asm 1: pshufd $0x39,<z5=int6464#2,<z5=int6464#2
+# asm 2: pshufd $0x39,<z5=%xmm1,<z5=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z6 <<<= 96
+# asm 1: pshufd $0x39,<z6=int6464#3,<z6=int6464#3
+# asm 2: pshufd $0x39,<z6=%xmm2,<z6=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z7 <<<= 96
+# asm 1: pshufd $0x39,<z7=int6464#4,<z7=int6464#4
+# asm 2: pshufd $0x39,<z7=%xmm3,<z7=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in4 ^= *(uint32 *) (m + 144)
+# asm 1: xorl 144(<m=int32#5),<in4=int32#1
+# asm 2: xorl 144(<m=%esi),<in4=%eax
+xorl 144(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 148)
+# asm 1: xorl 148(<m=int32#5),<in5=int32#2
+# asm 2: xorl 148(<m=%esi),<in5=%ecx
+xorl 148(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 152)
+# asm 1: xorl 152(<m=int32#5),<in6=int32#3
+# asm 2: xorl 152(<m=%esi),<in6=%edx
+xorl 152(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 156)
+# asm 1: xorl 156(<m=int32#5),<in7=int32#4
+# asm 2: xorl 156(<m=%esi),<in7=%ebx
+xorl 156(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 144) = in4
+# asm 1: movl <in4=int32#1,144(<out=int32#6)
+# asm 2: movl <in4=%eax,144(<out=%edi)
+movl %eax,144(%edi)
+
+# qhasm:   *(uint32 *) (out + 148) = in5
+# asm 1: movl <in5=int32#2,148(<out=int32#6)
+# asm 2: movl <in5=%ecx,148(<out=%edi)
+movl %ecx,148(%edi)
+
+# qhasm:   *(uint32 *) (out + 152) = in6
+# asm 1: movl <in6=int32#3,152(<out=int32#6)
+# asm 2: movl <in6=%edx,152(<out=%edi)
+movl %edx,152(%edi)
+
+# qhasm:   *(uint32 *) (out + 156) = in7
+# asm 1: movl <in7=int32#4,156(<out=int32#6)
+# asm 2: movl <in7=%ebx,156(<out=%edi)
+movl %ebx,156(%edi)
+
+# qhasm:   in4 = z4
+# asm 1: movd   <z4=int6464#1,>in4=int32#1
+# asm 2: movd   <z4=%xmm0,>in4=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in5 = z5
+# asm 1: movd   <z5=int6464#2,>in5=int32#2
+# asm 2: movd   <z5=%xmm1,>in5=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in6 = z6
+# asm 1: movd   <z6=int6464#3,>in6=int32#3
+# asm 2: movd   <z6=%xmm2,>in6=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in7 = z7
+# asm 1: movd   <z7=int6464#4,>in7=int32#4
+# asm 2: movd   <z7=%xmm3,>in7=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in4 ^= *(uint32 *) (m + 208)
+# asm 1: xorl 208(<m=int32#5),<in4=int32#1
+# asm 2: xorl 208(<m=%esi),<in4=%eax
+xorl 208(%esi),%eax
+
+# qhasm:   in5 ^= *(uint32 *) (m + 212)
+# asm 1: xorl 212(<m=int32#5),<in5=int32#2
+# asm 2: xorl 212(<m=%esi),<in5=%ecx
+xorl 212(%esi),%ecx
+
+# qhasm:   in6 ^= *(uint32 *) (m + 216)
+# asm 1: xorl 216(<m=int32#5),<in6=int32#3
+# asm 2: xorl 216(<m=%esi),<in6=%edx
+xorl 216(%esi),%edx
+
+# qhasm:   in7 ^= *(uint32 *) (m + 220)
+# asm 1: xorl 220(<m=int32#5),<in7=int32#4
+# asm 2: xorl 220(<m=%esi),<in7=%ebx
+xorl 220(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 208) = in4
+# asm 1: movl <in4=int32#1,208(<out=int32#6)
+# asm 2: movl <in4=%eax,208(<out=%edi)
+movl %eax,208(%edi)
+
+# qhasm:   *(uint32 *) (out + 212) = in5
+# asm 1: movl <in5=int32#2,212(<out=int32#6)
+# asm 2: movl <in5=%ecx,212(<out=%edi)
+movl %ecx,212(%edi)
+
+# qhasm:   *(uint32 *) (out + 216) = in6
+# asm 1: movl <in6=int32#3,216(<out=int32#6)
+# asm 2: movl <in6=%edx,216(<out=%edi)
+movl %edx,216(%edi)
+
+# qhasm:   *(uint32 *) (out + 220) = in7
+# asm 1: movl <in7=int32#4,220(<out=int32#6)
+# asm 2: movl <in7=%ebx,220(<out=%edi)
+movl %ebx,220(%edi)
+
+# qhasm:   z8 = z8_stack
+# asm 1: movdqa <z8_stack=stack128#37,>z8=int6464#1
+# asm 2: movdqa <z8_stack=608(%esp),>z8=%xmm0
+movdqa 608(%esp),%xmm0
+
+# qhasm:   z9 = z9_stack
+# asm 1: movdqa <z9_stack=stack128#32,>z9=int6464#2
+# asm 2: movdqa <z9_stack=528(%esp),>z9=%xmm1
+movdqa 528(%esp),%xmm1
+
+# qhasm:   z10 = z10_stack
+# asm 1: movdqa <z10_stack=stack128#22,>z10=int6464#3
+# asm 2: movdqa <z10_stack=368(%esp),>z10=%xmm2
+movdqa 368(%esp),%xmm2
+
+# qhasm:   z11 = z11_stack
+# asm 1: movdqa <z11_stack=stack128#27,>z11=int6464#4
+# asm 2: movdqa <z11_stack=448(%esp),>z11=%xmm3
+movdqa 448(%esp),%xmm3
+
+# qhasm:   uint32323232 z8 += orig8
+# asm 1: paddd <orig8=stack128#19,<z8=int6464#1
+# asm 2: paddd <orig8=320(%esp),<z8=%xmm0
+paddd 320(%esp),%xmm0
+
+# qhasm:   uint32323232 z9 += orig9
+# asm 1: paddd <orig9=stack128#20,<z9=int6464#2
+# asm 2: paddd <orig9=336(%esp),<z9=%xmm1
+paddd 336(%esp),%xmm1
+
+# qhasm:   uint32323232 z10 += orig10
+# asm 1: paddd <orig10=stack128#6,<z10=int6464#3
+# asm 2: paddd <orig10=112(%esp),<z10=%xmm2
+paddd 112(%esp),%xmm2
+
+# qhasm:   uint32323232 z11 += orig11
+# asm 1: paddd <orig11=stack128#10,<z11=int6464#4
+# asm 2: paddd <orig11=176(%esp),<z11=%xmm3
+paddd 176(%esp),%xmm3
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#1
+# asm 2: xorl 32(<m=%esi),<in8=%eax
+xorl 32(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#2
+# asm 2: xorl 36(<m=%esi),<in9=%ecx
+xorl 36(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#3
+# asm 2: xorl 40(<m=%esi),<in10=%edx
+xorl 40(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#4
+# asm 2: xorl 44(<m=%esi),<in11=%ebx
+xorl 44(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#1,32(<out=int32#6)
+# asm 2: movl <in8=%eax,32(<out=%edi)
+movl %eax,32(%edi)
+
+# qhasm:   *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#2,36(<out=int32#6)
+# asm 2: movl <in9=%ecx,36(<out=%edi)
+movl %ecx,36(%edi)
+
+# qhasm:   *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#3,40(<out=int32#6)
+# asm 2: movl <in10=%edx,40(<out=%edi)
+movl %edx,40(%edi)
+
+# qhasm:   *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#4,44(<out=int32#6)
+# asm 2: movl <in11=%ebx,44(<out=%edi)
+movl %ebx,44(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 96)
+# asm 1: xorl 96(<m=int32#5),<in8=int32#1
+# asm 2: xorl 96(<m=%esi),<in8=%eax
+xorl 96(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 100)
+# asm 1: xorl 100(<m=int32#5),<in9=int32#2
+# asm 2: xorl 100(<m=%esi),<in9=%ecx
+xorl 100(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 104)
+# asm 1: xorl 104(<m=int32#5),<in10=int32#3
+# asm 2: xorl 104(<m=%esi),<in10=%edx
+xorl 104(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 108)
+# asm 1: xorl 108(<m=int32#5),<in11=int32#4
+# asm 2: xorl 108(<m=%esi),<in11=%ebx
+xorl 108(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 96) = in8
+# asm 1: movl <in8=int32#1,96(<out=int32#6)
+# asm 2: movl <in8=%eax,96(<out=%edi)
+movl %eax,96(%edi)
+
+# qhasm:   *(uint32 *) (out + 100) = in9
+# asm 1: movl <in9=int32#2,100(<out=int32#6)
+# asm 2: movl <in9=%ecx,100(<out=%edi)
+movl %ecx,100(%edi)
+
+# qhasm:   *(uint32 *) (out + 104) = in10
+# asm 1: movl <in10=int32#3,104(<out=int32#6)
+# asm 2: movl <in10=%edx,104(<out=%edi)
+movl %edx,104(%edi)
+
+# qhasm:   *(uint32 *) (out + 108) = in11
+# asm 1: movl <in11=int32#4,108(<out=int32#6)
+# asm 2: movl <in11=%ebx,108(<out=%edi)
+movl %ebx,108(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z8 <<<= 96
+# asm 1: pshufd $0x39,<z8=int6464#1,<z8=int6464#1
+# asm 2: pshufd $0x39,<z8=%xmm0,<z8=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z9 <<<= 96
+# asm 1: pshufd $0x39,<z9=int6464#2,<z9=int6464#2
+# asm 2: pshufd $0x39,<z9=%xmm1,<z9=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z10 <<<= 96
+# asm 1: pshufd $0x39,<z10=int6464#3,<z10=int6464#3
+# asm 2: pshufd $0x39,<z10=%xmm2,<z10=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z11 <<<= 96
+# asm 1: pshufd $0x39,<z11=int6464#4,<z11=int6464#4
+# asm 2: pshufd $0x39,<z11=%xmm3,<z11=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in8 ^= *(uint32 *) (m + 160)
+# asm 1: xorl 160(<m=int32#5),<in8=int32#1
+# asm 2: xorl 160(<m=%esi),<in8=%eax
+xorl 160(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 164)
+# asm 1: xorl 164(<m=int32#5),<in9=int32#2
+# asm 2: xorl 164(<m=%esi),<in9=%ecx
+xorl 164(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 168)
+# asm 1: xorl 168(<m=int32#5),<in10=int32#3
+# asm 2: xorl 168(<m=%esi),<in10=%edx
+xorl 168(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 172)
+# asm 1: xorl 172(<m=int32#5),<in11=int32#4
+# asm 2: xorl 172(<m=%esi),<in11=%ebx
+xorl 172(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 160) = in8
+# asm 1: movl <in8=int32#1,160(<out=int32#6)
+# asm 2: movl <in8=%eax,160(<out=%edi)
+movl %eax,160(%edi)
+
+# qhasm:   *(uint32 *) (out + 164) = in9
+# asm 1: movl <in9=int32#2,164(<out=int32#6)
+# asm 2: movl <in9=%ecx,164(<out=%edi)
+movl %ecx,164(%edi)
+
+# qhasm:   *(uint32 *) (out + 168) = in10
+# asm 1: movl <in10=int32#3,168(<out=int32#6)
+# asm 2: movl <in10=%edx,168(<out=%edi)
+movl %edx,168(%edi)
+
+# qhasm:   *(uint32 *) (out + 172) = in11
+# asm 1: movl <in11=int32#4,172(<out=int32#6)
+# asm 2: movl <in11=%ebx,172(<out=%edi)
+movl %ebx,172(%edi)
+
+# qhasm:   in8 = z8
+# asm 1: movd   <z8=int6464#1,>in8=int32#1
+# asm 2: movd   <z8=%xmm0,>in8=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in9 = z9
+# asm 1: movd   <z9=int6464#2,>in9=int32#2
+# asm 2: movd   <z9=%xmm1,>in9=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in10 = z10
+# asm 1: movd   <z10=int6464#3,>in10=int32#3
+# asm 2: movd   <z10=%xmm2,>in10=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in11 = z11
+# asm 1: movd   <z11=int6464#4,>in11=int32#4
+# asm 2: movd   <z11=%xmm3,>in11=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in8 ^= *(uint32 *) (m + 224)
+# asm 1: xorl 224(<m=int32#5),<in8=int32#1
+# asm 2: xorl 224(<m=%esi),<in8=%eax
+xorl 224(%esi),%eax
+
+# qhasm:   in9 ^= *(uint32 *) (m + 228)
+# asm 1: xorl 228(<m=int32#5),<in9=int32#2
+# asm 2: xorl 228(<m=%esi),<in9=%ecx
+xorl 228(%esi),%ecx
+
+# qhasm:   in10 ^= *(uint32 *) (m + 232)
+# asm 1: xorl 232(<m=int32#5),<in10=int32#3
+# asm 2: xorl 232(<m=%esi),<in10=%edx
+xorl 232(%esi),%edx
+
+# qhasm:   in11 ^= *(uint32 *) (m + 236)
+# asm 1: xorl 236(<m=int32#5),<in11=int32#4
+# asm 2: xorl 236(<m=%esi),<in11=%ebx
+xorl 236(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 224) = in8
+# asm 1: movl <in8=int32#1,224(<out=int32#6)
+# asm 2: movl <in8=%eax,224(<out=%edi)
+movl %eax,224(%edi)
+
+# qhasm:   *(uint32 *) (out + 228) = in9
+# asm 1: movl <in9=int32#2,228(<out=int32#6)
+# asm 2: movl <in9=%ecx,228(<out=%edi)
+movl %ecx,228(%edi)
+
+# qhasm:   *(uint32 *) (out + 232) = in10
+# asm 1: movl <in10=int32#3,232(<out=int32#6)
+# asm 2: movl <in10=%edx,232(<out=%edi)
+movl %edx,232(%edi)
+
+# qhasm:   *(uint32 *) (out + 236) = in11
+# asm 1: movl <in11=int32#4,236(<out=int32#6)
+# asm 2: movl <in11=%ebx,236(<out=%edi)
+movl %ebx,236(%edi)
+
+# qhasm:   z12 = z12_stack
+# asm 1: movdqa <z12_stack=stack128#35,>z12=int6464#1
+# asm 2: movdqa <z12_stack=576(%esp),>z12=%xmm0
+movdqa 576(%esp),%xmm0
+
+# qhasm:   z13 = z13_stack
+# asm 1: movdqa <z13_stack=stack128#30,>z13=int6464#2
+# asm 2: movdqa <z13_stack=496(%esp),>z13=%xmm1
+movdqa 496(%esp),%xmm1
+
+# qhasm:   z14 = z14_stack
+# asm 1: movdqa <z14_stack=stack128#24,>z14=int6464#3
+# asm 2: movdqa <z14_stack=400(%esp),>z14=%xmm2
+movdqa 400(%esp),%xmm2
+
+# qhasm:   z15 = z15_stack
+# asm 1: movdqa <z15_stack=stack128#23,>z15=int6464#4
+# asm 2: movdqa <z15_stack=384(%esp),>z15=%xmm3
+movdqa 384(%esp),%xmm3
+
+# qhasm:   uint32323232 z12 += orig12
+# asm 1: paddd <orig12=stack128#11,<z12=int6464#1
+# asm 2: paddd <orig12=192(%esp),<z12=%xmm0
+paddd 192(%esp),%xmm0
+
+# qhasm:   uint32323232 z13 += orig13
+# asm 1: paddd <orig13=stack128#14,<z13=int6464#2
+# asm 2: paddd <orig13=240(%esp),<z13=%xmm1
+paddd 240(%esp),%xmm1
+
+# qhasm:   uint32323232 z14 += orig14
+# asm 1: paddd <orig14=stack128#17,<z14=int6464#3
+# asm 2: paddd <orig14=288(%esp),<z14=%xmm2
+paddd 288(%esp),%xmm2
+
+# qhasm:   uint32323232 z15 += orig15
+# asm 1: paddd <orig15=stack128#7,<z15=int6464#4
+# asm 2: paddd <orig15=128(%esp),<z15=%xmm3
+paddd 128(%esp),%xmm3
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#1
+# asm 2: xorl 48(<m=%esi),<in12=%eax
+xorl 48(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#2
+# asm 2: xorl 52(<m=%esi),<in13=%ecx
+xorl 52(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#3
+# asm 2: xorl 56(<m=%esi),<in14=%edx
+xorl 56(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#4
+# asm 2: xorl 60(<m=%esi),<in15=%ebx
+xorl 60(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#1,48(<out=int32#6)
+# asm 2: movl <in12=%eax,48(<out=%edi)
+movl %eax,48(%edi)
+
+# qhasm:   *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#2,52(<out=int32#6)
+# asm 2: movl <in13=%ecx,52(<out=%edi)
+movl %ecx,52(%edi)
+
+# qhasm:   *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#3,56(<out=int32#6)
+# asm 2: movl <in14=%edx,56(<out=%edi)
+movl %edx,56(%edi)
+
+# qhasm:   *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#4,60(<out=int32#6)
+# asm 2: movl <in15=%ebx,60(<out=%edi)
+movl %ebx,60(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 112)
+# asm 1: xorl 112(<m=int32#5),<in12=int32#1
+# asm 2: xorl 112(<m=%esi),<in12=%eax
+xorl 112(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 116)
+# asm 1: xorl 116(<m=int32#5),<in13=int32#2
+# asm 2: xorl 116(<m=%esi),<in13=%ecx
+xorl 116(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 120)
+# asm 1: xorl 120(<m=int32#5),<in14=int32#3
+# asm 2: xorl 120(<m=%esi),<in14=%edx
+xorl 120(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 124)
+# asm 1: xorl 124(<m=int32#5),<in15=int32#4
+# asm 2: xorl 124(<m=%esi),<in15=%ebx
+xorl 124(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 112) = in12
+# asm 1: movl <in12=int32#1,112(<out=int32#6)
+# asm 2: movl <in12=%eax,112(<out=%edi)
+movl %eax,112(%edi)
+
+# qhasm:   *(uint32 *) (out + 116) = in13
+# asm 1: movl <in13=int32#2,116(<out=int32#6)
+# asm 2: movl <in13=%ecx,116(<out=%edi)
+movl %ecx,116(%edi)
+
+# qhasm:   *(uint32 *) (out + 120) = in14
+# asm 1: movl <in14=int32#3,120(<out=int32#6)
+# asm 2: movl <in14=%edx,120(<out=%edi)
+movl %edx,120(%edi)
+
+# qhasm:   *(uint32 *) (out + 124) = in15
+# asm 1: movl <in15=int32#4,124(<out=int32#6)
+# asm 2: movl <in15=%ebx,124(<out=%edi)
+movl %ebx,124(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   z12 <<<= 96
+# asm 1: pshufd $0x39,<z12=int6464#1,<z12=int6464#1
+# asm 2: pshufd $0x39,<z12=%xmm0,<z12=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm:   z13 <<<= 96
+# asm 1: pshufd $0x39,<z13=int6464#2,<z13=int6464#2
+# asm 2: pshufd $0x39,<z13=%xmm1,<z13=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:   z14 <<<= 96
+# asm 1: pshufd $0x39,<z14=int6464#3,<z14=int6464#3
+# asm 2: pshufd $0x39,<z14=%xmm2,<z14=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm:   z15 <<<= 96
+# asm 1: pshufd $0x39,<z15=int6464#4,<z15=int6464#4
+# asm 2: pshufd $0x39,<z15=%xmm3,<z15=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:   in12 ^= *(uint32 *) (m + 176)
+# asm 1: xorl 176(<m=int32#5),<in12=int32#1
+# asm 2: xorl 176(<m=%esi),<in12=%eax
+xorl 176(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 180)
+# asm 1: xorl 180(<m=int32#5),<in13=int32#2
+# asm 2: xorl 180(<m=%esi),<in13=%ecx
+xorl 180(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 184)
+# asm 1: xorl 184(<m=int32#5),<in14=int32#3
+# asm 2: xorl 184(<m=%esi),<in14=%edx
+xorl 184(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 188)
+# asm 1: xorl 188(<m=int32#5),<in15=int32#4
+# asm 2: xorl 188(<m=%esi),<in15=%ebx
+xorl 188(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 176) = in12
+# asm 1: movl <in12=int32#1,176(<out=int32#6)
+# asm 2: movl <in12=%eax,176(<out=%edi)
+movl %eax,176(%edi)
+
+# qhasm:   *(uint32 *) (out + 180) = in13
+# asm 1: movl <in13=int32#2,180(<out=int32#6)
+# asm 2: movl <in13=%ecx,180(<out=%edi)
+movl %ecx,180(%edi)
+
+# qhasm:   *(uint32 *) (out + 184) = in14
+# asm 1: movl <in14=int32#3,184(<out=int32#6)
+# asm 2: movl <in14=%edx,184(<out=%edi)
+movl %edx,184(%edi)
+
+# qhasm:   *(uint32 *) (out + 188) = in15
+# asm 1: movl <in15=int32#4,188(<out=int32#6)
+# asm 2: movl <in15=%ebx,188(<out=%edi)
+movl %ebx,188(%edi)
+
+# qhasm:   in12 = z12
+# asm 1: movd   <z12=int6464#1,>in12=int32#1
+# asm 2: movd   <z12=%xmm0,>in12=%eax
+movd   %xmm0,%eax
+
+# qhasm:   in13 = z13
+# asm 1: movd   <z13=int6464#2,>in13=int32#2
+# asm 2: movd   <z13=%xmm1,>in13=%ecx
+movd   %xmm1,%ecx
+
+# qhasm:   in14 = z14
+# asm 1: movd   <z14=int6464#3,>in14=int32#3
+# asm 2: movd   <z14=%xmm2,>in14=%edx
+movd   %xmm2,%edx
+
+# qhasm:   in15 = z15
+# asm 1: movd   <z15=int6464#4,>in15=int32#4
+# asm 2: movd   <z15=%xmm3,>in15=%ebx
+movd   %xmm3,%ebx
+
+# qhasm:   in12 ^= *(uint32 *) (m + 240)
+# asm 1: xorl 240(<m=int32#5),<in12=int32#1
+# asm 2: xorl 240(<m=%esi),<in12=%eax
+xorl 240(%esi),%eax
+
+# qhasm:   in13 ^= *(uint32 *) (m + 244)
+# asm 1: xorl 244(<m=int32#5),<in13=int32#2
+# asm 2: xorl 244(<m=%esi),<in13=%ecx
+xorl 244(%esi),%ecx
+
+# qhasm:   in14 ^= *(uint32 *) (m + 248)
+# asm 1: xorl 248(<m=int32#5),<in14=int32#3
+# asm 2: xorl 248(<m=%esi),<in14=%edx
+xorl 248(%esi),%edx
+
+# qhasm:   in15 ^= *(uint32 *) (m + 252)
+# asm 1: xorl 252(<m=int32#5),<in15=int32#4
+# asm 2: xorl 252(<m=%esi),<in15=%ebx
+xorl 252(%esi),%ebx
+
+# qhasm:   *(uint32 *) (out + 240) = in12
+# asm 1: movl <in12=int32#1,240(<out=int32#6)
+# asm 2: movl <in12=%eax,240(<out=%edi)
+movl %eax,240(%edi)
+
+# qhasm:   *(uint32 *) (out + 244) = in13
+# asm 1: movl <in13=int32#2,244(<out=int32#6)
+# asm 2: movl <in13=%ecx,244(<out=%edi)
+movl %ecx,244(%edi)
+
+# qhasm:   *(uint32 *) (out + 248) = in14
+# asm 1: movl <in14=int32#3,248(<out=int32#6)
+# asm 2: movl <in14=%edx,248(<out=%edi)
+movl %edx,248(%edi)
+
+# qhasm:   *(uint32 *) (out + 252) = in15
+# asm 1: movl <in15=int32#4,252(<out=int32#6)
+# asm 2: movl <in15=%ebx,252(<out=%edi)
+movl %ebx,252(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   bytes -= 256
+# asm 1: sub  $256,<bytes=int32#1
+# asm 2: sub  $256,<bytes=%eax
+sub  $256,%eax
+
+# qhasm:   m += 256
+# asm 1: add  $256,<m=int32#5
+# asm 2: add  $256,<m=%esi
+add  $256,%esi
+
+# qhasm:   out += 256
+# asm 1: add  $256,<out=int32#6
+# asm 2: add  $256,<out=%edi
+add  $256,%edi
+
+# qhasm:   out_stack = out
+# asm 1: movl <out=int32#6,>out_stack=stack32#6
+# asm 2: movl <out=%edi,>out_stack=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:                            unsigned<? bytes - 256
+# asm 1: cmp  $256,<bytes=int32#1
+# asm 2: cmp  $256,<bytes=%eax
+cmp  $256,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast256 if !unsigned<
+jae ._bytesatleast256
+
+# qhasm:                 unsigned>? bytes - 0
+# asm 1: cmp  $0,<bytes=int32#1
+# asm 2: cmp  $0,<bytes=%eax
+cmp  $0,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto done if !unsigned>
+jbe ._done
+# comment:fp stack unchanged by fallthrough
+
+# qhasm: bytesbetween1and255:
+._bytesbetween1and255:
+
+# qhasm:                   unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto nocopy if !unsigned<
+jae ._nocopy
+
+# qhasm:     ctarget = out
+# asm 1: movl <out=int32#6,>ctarget=stack32#6
+# asm 2: movl <out=%edi,>ctarget=20(%esp)
+movl %edi,20(%esp)
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:     while (i) { *out++ = *m++; --i }
+rep movsb
+
+# qhasm:     out = &tmp
+# asm 1: leal <tmp=stack512#1,>out=int32#6
+# asm 2: leal <tmp=640(%esp),>out=%edi
+leal 640(%esp),%edi
+
+# qhasm:     m = &tmp
+# asm 1: leal <tmp=stack512#1,>m=int32#5
+# asm 2: leal <tmp=640(%esp),>m=%esi
+leal 640(%esp),%esi
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:   nocopy:
+._nocopy:
+
+# qhasm:   bytes_stack = bytes
+# asm 1: movl <bytes=int32#1,>bytes_stack=stack32#7
+# asm 2: movl <bytes=%eax,>bytes_stack=24(%esp)
+movl %eax,24(%esp)
+
+# qhasm: diag0 = x0
+# asm 1: movdqa <x0=stack128#3,>diag0=int6464#1
+# asm 2: movdqa <x0=64(%esp),>diag0=%xmm0
+movdqa 64(%esp),%xmm0
+
+# qhasm: diag1 = x1
+# asm 1: movdqa <x1=stack128#2,>diag1=int6464#2
+# asm 2: movdqa <x1=48(%esp),>diag1=%xmm1
+movdqa 48(%esp),%xmm1
+
+# qhasm: diag2 = x2
+# asm 1: movdqa <x2=stack128#4,>diag2=int6464#3
+# asm 2: movdqa <x2=80(%esp),>diag2=%xmm2
+movdqa 80(%esp),%xmm2
+
+# qhasm: diag3 = x3
+# asm 1: movdqa <x3=stack128#1,>diag3=int6464#4
+# asm 2: movdqa <x3=32(%esp),>diag3=%xmm3
+movdqa 32(%esp),%xmm3
+
+# qhasm:                     a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm: i = 8
+# asm 1: mov  $8,>i=int32#1
+# asm 2: mov  $8,>i=%eax
+mov  $8,%eax
+
+# qhasm: mainloop2:
+._mainloop2:
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a0 += diag0
+# asm 1: paddd <diag0=int6464#1,<a0=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a0=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a1 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a1=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a1=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b0 = a0
+# asm 1: movdqa <a0=int6464#5,>b0=int6464#7
+# asm 2: movdqa <a0=%xmm4,>b0=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a0 <<= 7
+# asm 1: pslld $7,<a0=int6464#5
+# asm 2: pslld $7,<a0=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b0 >>= 25
+# asm 1: psrld $25,<b0=int6464#7
+# asm 2: psrld $25,<b0=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag3 ^= a0
+# asm 1: pxor  <a0=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a0=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                 diag3 ^= b0
+# asm 1: pxor  <b0=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b0=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm: uint32323232                        a1 += diag3
+# asm 1: paddd <diag3=int6464#4,<a1=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a1=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                                                 a2 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a2=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a2=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                     b1 = a1
+# asm 1: movdqa <a1=int6464#6,>b1=int6464#7
+# asm 2: movdqa <a1=%xmm5,>b1=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a1 <<= 9
+# asm 1: pslld $9,<a1=int6464#6
+# asm 2: pslld $9,<a1=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b1 >>= 23
+# asm 1: psrld $23,<b1=int6464#7
+# asm 2: psrld $23,<b1=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a1
+# asm 1: pxor  <a1=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a1=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag3 <<<= 32
+# asm 1: pshufd $0x93,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x93,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x93,%xmm3,%xmm3
+
+# qhasm:                                 diag2 ^= b1
+# asm 1: pxor  <b1=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b1=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a2 += diag2
+# asm 1: paddd <diag2=int6464#3,<a2=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a2=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a3 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a3=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a3=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b2 = a2
+# asm 1: movdqa <a2=int6464#5,>b2=int6464#7
+# asm 2: movdqa <a2=%xmm4,>b2=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a2 <<= 13
+# asm 1: pslld $13,<a2=int6464#5
+# asm 2: pslld $13,<a2=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b2 >>= 19
+# asm 1: psrld $19,<b2=int6464#7
+# asm 2: psrld $19,<b2=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag1 ^= a2
+# asm 1: pxor  <a2=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a2=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag1 ^= b2
+# asm 1: pxor  <b2=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b2=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                                                        a3 += diag1
+# asm 1: paddd <diag1=int6464#2,<a3=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a3=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                 a4 = diag3
+# asm 1: movdqa <diag3=int6464#4,>a4=int6464#5
+# asm 2: movdqa <diag3=%xmm3,>a4=%xmm4
+movdqa %xmm3,%xmm4
+
+# qhasm:                                                                     b3 = a3
+# asm 1: movdqa <a3=int6464#6,>b3=int6464#7
+# asm 2: movdqa <a3=%xmm5,>b3=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a3 <<= 18
+# asm 1: pslld $18,<a3=int6464#6
+# asm 2: pslld $18,<a3=%xmm5
+pslld $18,%xmm5
+
+# qhasm: uint32323232                                                        b3 >>= 14
+# asm 1: psrld $14,<b3=int6464#7
+# asm 2: psrld $14,<b3=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a3
+# asm 1: pxor  <a3=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a3=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm:                                                                 diag0 ^= b3
+# asm 1: pxor  <b3=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b3=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+
+# qhasm: uint32323232        a4 += diag0
+# asm 1: paddd <diag0=int6464#1,<a4=int6464#5
+# asm 2: paddd <diag0=%xmm0,<a4=%xmm4
+paddd %xmm0,%xmm4
+
+# qhasm:                                 a5 = diag0
+# asm 1: movdqa <diag0=int6464#1,>a5=int6464#6
+# asm 2: movdqa <diag0=%xmm0,>a5=%xmm5
+movdqa %xmm0,%xmm5
+
+# qhasm:                     b4 = a4
+# asm 1: movdqa <a4=int6464#5,>b4=int6464#7
+# asm 2: movdqa <a4=%xmm4,>b4=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232        a4 <<= 7
+# asm 1: pslld $7,<a4=int6464#5
+# asm 2: pslld $7,<a4=%xmm4
+pslld $7,%xmm4
+
+# qhasm: uint32323232        b4 >>= 25
+# asm 1: psrld $25,<b4=int6464#7
+# asm 2: psrld $25,<b4=%xmm6
+psrld $25,%xmm6
+
+# qhasm:                 diag1 ^= a4
+# asm 1: pxor  <a4=int6464#5,<diag1=int6464#2
+# asm 2: pxor  <a4=%xmm4,<diag1=%xmm1
+pxor  %xmm4,%xmm1
+
+# qhasm:                 diag1 ^= b4
+# asm 1: pxor  <b4=int6464#7,<diag1=int6464#2
+# asm 2: pxor  <b4=%xmm6,<diag1=%xmm1
+pxor  %xmm6,%xmm1
+
+# qhasm: uint32323232                        a5 += diag1
+# asm 1: paddd <diag1=int6464#2,<a5=int6464#6
+# asm 2: paddd <diag1=%xmm1,<a5=%xmm5
+paddd %xmm1,%xmm5
+
+# qhasm:                                                 a6 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a6=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a6=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                     b5 = a5
+# asm 1: movdqa <a5=int6464#6,>b5=int6464#7
+# asm 2: movdqa <a5=%xmm5,>b5=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                        a5 <<= 9
+# asm 1: pslld $9,<a5=int6464#6
+# asm 2: pslld $9,<a5=%xmm5
+pslld $9,%xmm5
+
+# qhasm: uint32323232                        b5 >>= 23
+# asm 1: psrld $23,<b5=int6464#7
+# asm 2: psrld $23,<b5=%xmm6
+psrld $23,%xmm6
+
+# qhasm:                                 diag2 ^= a5
+# asm 1: pxor  <a5=int6464#6,<diag2=int6464#3
+# asm 2: pxor  <a5=%xmm5,<diag2=%xmm2
+pxor  %xmm5,%xmm2
+
+# qhasm:                 diag1 <<<= 32
+# asm 1: pshufd $0x93,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x93,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x93,%xmm1,%xmm1
+
+# qhasm:                                 diag2 ^= b5
+# asm 1: pxor  <b5=int6464#7,<diag2=int6464#3
+# asm 2: pxor  <b5=%xmm6,<diag2=%xmm2
+pxor  %xmm6,%xmm2
+
+# qhasm: uint32323232                                        a6 += diag2
+# asm 1: paddd <diag2=int6464#3,<a6=int6464#5
+# asm 2: paddd <diag2=%xmm2,<a6=%xmm4
+paddd %xmm2,%xmm4
+
+# qhasm:                                                                 a7 = diag2
+# asm 1: movdqa <diag2=int6464#3,>a7=int6464#6
+# asm 2: movdqa <diag2=%xmm2,>a7=%xmm5
+movdqa %xmm2,%xmm5
+
+# qhasm:                                                     b6 = a6
+# asm 1: movdqa <a6=int6464#5,>b6=int6464#7
+# asm 2: movdqa <a6=%xmm4,>b6=%xmm6
+movdqa %xmm4,%xmm6
+
+# qhasm: uint32323232                                        a6 <<= 13
+# asm 1: pslld $13,<a6=int6464#5
+# asm 2: pslld $13,<a6=%xmm4
+pslld $13,%xmm4
+
+# qhasm: uint32323232                                        b6 >>= 19
+# asm 1: psrld $19,<b6=int6464#7
+# asm 2: psrld $19,<b6=%xmm6
+psrld $19,%xmm6
+
+# qhasm:                                                 diag3 ^= a6
+# asm 1: pxor  <a6=int6464#5,<diag3=int6464#4
+# asm 2: pxor  <a6=%xmm4,<diag3=%xmm3
+pxor  %xmm4,%xmm3
+
+# qhasm:                                 diag2 <<<= 64
+# asm 1: pshufd $0x4e,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x4e,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x4e,%xmm2,%xmm2
+
+# qhasm:                                                 diag3 ^= b6
+# asm 1: pxor  <b6=int6464#7,<diag3=int6464#4
+# asm 2: pxor  <b6=%xmm6,<diag3=%xmm3
+pxor  %xmm6,%xmm3
+
+# qhasm:                  unsigned>? i -= 4
+# asm 1: sub  $4,<i=int32#1
+# asm 2: sub  $4,<i=%eax
+sub  $4,%eax
+
+# qhasm: uint32323232                                                        a7 += diag3
+# asm 1: paddd <diag3=int6464#4,<a7=int6464#6
+# asm 2: paddd <diag3=%xmm3,<a7=%xmm5
+paddd %xmm3,%xmm5
+
+# qhasm:                 a0 = diag1
+# asm 1: movdqa <diag1=int6464#2,>a0=int6464#5
+# asm 2: movdqa <diag1=%xmm1,>a0=%xmm4
+movdqa %xmm1,%xmm4
+
+# qhasm:                                                                     b7 = a7
+# asm 1: movdqa <a7=int6464#6,>b7=int6464#7
+# asm 2: movdqa <a7=%xmm5,>b7=%xmm6
+movdqa %xmm5,%xmm6
+
+# qhasm: uint32323232                                                        a7 <<= 18
+# asm 1: pslld $18,<a7=int6464#6
+# asm 2: pslld $18,<a7=%xmm5
+pslld $18,%xmm5
+
+# qhasm:                 b0 = 0
+# asm 1: pxor   >b0=int6464#8,>b0=int6464#8
+# asm 2: pxor   >b0=%xmm7,>b0=%xmm7
+pxor   %xmm7,%xmm7
+
+# qhasm: uint32323232                                                        b7 >>= 14
+# asm 1: psrld $14,<b7=int6464#7
+# asm 2: psrld $14,<b7=%xmm6
+psrld $14,%xmm6
+
+# qhasm:                                                                 diag0 ^= a7
+# asm 1: pxor  <a7=int6464#6,<diag0=int6464#1
+# asm 2: pxor  <a7=%xmm5,<diag0=%xmm0
+pxor  %xmm5,%xmm0
+
+# qhasm:                                                 diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm:                                                                 diag0 ^= b7
+# asm 1: pxor  <b7=int6464#7,<diag0=int6464#1
+# asm 2: pxor  <b7=%xmm6,<diag0=%xmm0
+pxor  %xmm6,%xmm0
+# comment:fp stack unchanged by jump
+
+# qhasm: goto mainloop2 if unsigned>
+ja ._mainloop2
+
+# qhasm: uint32323232 diag0 += x0
+# asm 1: paddd <x0=stack128#3,<diag0=int6464#1
+# asm 2: paddd <x0=64(%esp),<diag0=%xmm0
+paddd 64(%esp),%xmm0
+
+# qhasm: uint32323232 diag1 += x1
+# asm 1: paddd <x1=stack128#2,<diag1=int6464#2
+# asm 2: paddd <x1=48(%esp),<diag1=%xmm1
+paddd 48(%esp),%xmm1
+
+# qhasm: uint32323232 diag2 += x2
+# asm 1: paddd <x2=stack128#4,<diag2=int6464#3
+# asm 2: paddd <x2=80(%esp),<diag2=%xmm2
+paddd 80(%esp),%xmm2
+
+# qhasm: uint32323232 diag3 += x3
+# asm 1: paddd <x3=stack128#1,<diag3=int6464#4
+# asm 2: paddd <x3=32(%esp),<diag3=%xmm3
+paddd 32(%esp),%xmm3
+
+# qhasm: in0 = diag0
+# asm 1: movd   <diag0=int6464#1,>in0=int32#1
+# asm 2: movd   <diag0=%xmm0,>in0=%eax
+movd   %xmm0,%eax
+
+# qhasm: in12 = diag1
+# asm 1: movd   <diag1=int6464#2,>in12=int32#2
+# asm 2: movd   <diag1=%xmm1,>in12=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in8 = diag2
+# asm 1: movd   <diag2=int6464#3,>in8=int32#3
+# asm 2: movd   <diag2=%xmm2,>in8=%edx
+movd   %xmm2,%edx
+
+# qhasm: in4 = diag3
+# asm 1: movd   <diag3=int6464#4,>in4=int32#4
+# asm 2: movd   <diag3=%xmm3,>in4=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in0 ^= *(uint32 *) (m + 0)
+# asm 1: xorl 0(<m=int32#5),<in0=int32#1
+# asm 2: xorl 0(<m=%esi),<in0=%eax
+xorl 0(%esi),%eax
+
+# qhasm: in12 ^= *(uint32 *) (m + 48)
+# asm 1: xorl 48(<m=int32#5),<in12=int32#2
+# asm 2: xorl 48(<m=%esi),<in12=%ecx
+xorl 48(%esi),%ecx
+
+# qhasm: in8 ^= *(uint32 *) (m + 32)
+# asm 1: xorl 32(<m=int32#5),<in8=int32#3
+# asm 2: xorl 32(<m=%esi),<in8=%edx
+xorl 32(%esi),%edx
+
+# qhasm: in4 ^= *(uint32 *) (m + 16)
+# asm 1: xorl 16(<m=int32#5),<in4=int32#4
+# asm 2: xorl 16(<m=%esi),<in4=%ebx
+xorl 16(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 0) = in0
+# asm 1: movl <in0=int32#1,0(<out=int32#6)
+# asm 2: movl <in0=%eax,0(<out=%edi)
+movl %eax,0(%edi)
+
+# qhasm: *(uint32 *) (out + 48) = in12
+# asm 1: movl <in12=int32#2,48(<out=int32#6)
+# asm 2: movl <in12=%ecx,48(<out=%edi)
+movl %ecx,48(%edi)
+
+# qhasm: *(uint32 *) (out + 32) = in8
+# asm 1: movl <in8=int32#3,32(<out=int32#6)
+# asm 2: movl <in8=%edx,32(<out=%edi)
+movl %edx,32(%edi)
+
+# qhasm: *(uint32 *) (out + 16) = in4
+# asm 1: movl <in4=int32#4,16(<out=int32#6)
+# asm 2: movl <in4=%ebx,16(<out=%edi)
+movl %ebx,16(%edi)
+
+# qhasm: in5 = diag0
+# asm 1: movd   <diag0=int6464#1,>in5=int32#1
+# asm 2: movd   <diag0=%xmm0,>in5=%eax
+movd   %xmm0,%eax
+
+# qhasm: in1 = diag1
+# asm 1: movd   <diag1=int6464#2,>in1=int32#2
+# asm 2: movd   <diag1=%xmm1,>in1=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in13 = diag2
+# asm 1: movd   <diag2=int6464#3,>in13=int32#3
+# asm 2: movd   <diag2=%xmm2,>in13=%edx
+movd   %xmm2,%edx
+
+# qhasm: in9 = diag3
+# asm 1: movd   <diag3=int6464#4,>in9=int32#4
+# asm 2: movd   <diag3=%xmm3,>in9=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in5 ^= *(uint32 *) (m + 20)
+# asm 1: xorl 20(<m=int32#5),<in5=int32#1
+# asm 2: xorl 20(<m=%esi),<in5=%eax
+xorl 20(%esi),%eax
+
+# qhasm: in1 ^= *(uint32 *) (m + 4)
+# asm 1: xorl 4(<m=int32#5),<in1=int32#2
+# asm 2: xorl 4(<m=%esi),<in1=%ecx
+xorl 4(%esi),%ecx
+
+# qhasm: in13 ^= *(uint32 *) (m + 52)
+# asm 1: xorl 52(<m=int32#5),<in13=int32#3
+# asm 2: xorl 52(<m=%esi),<in13=%edx
+xorl 52(%esi),%edx
+
+# qhasm: in9 ^= *(uint32 *) (m + 36)
+# asm 1: xorl 36(<m=int32#5),<in9=int32#4
+# asm 2: xorl 36(<m=%esi),<in9=%ebx
+xorl 36(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 20) = in5
+# asm 1: movl <in5=int32#1,20(<out=int32#6)
+# asm 2: movl <in5=%eax,20(<out=%edi)
+movl %eax,20(%edi)
+
+# qhasm: *(uint32 *) (out + 4) = in1
+# asm 1: movl <in1=int32#2,4(<out=int32#6)
+# asm 2: movl <in1=%ecx,4(<out=%edi)
+movl %ecx,4(%edi)
+
+# qhasm: *(uint32 *) (out + 52) = in13
+# asm 1: movl <in13=int32#3,52(<out=int32#6)
+# asm 2: movl <in13=%edx,52(<out=%edi)
+movl %edx,52(%edi)
+
+# qhasm: *(uint32 *) (out + 36) = in9
+# asm 1: movl <in9=int32#4,36(<out=int32#6)
+# asm 2: movl <in9=%ebx,36(<out=%edi)
+movl %ebx,36(%edi)
+
+# qhasm: in10 = diag0
+# asm 1: movd   <diag0=int6464#1,>in10=int32#1
+# asm 2: movd   <diag0=%xmm0,>in10=%eax
+movd   %xmm0,%eax
+
+# qhasm: in6 = diag1
+# asm 1: movd   <diag1=int6464#2,>in6=int32#2
+# asm 2: movd   <diag1=%xmm1,>in6=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in2 = diag2
+# asm 1: movd   <diag2=int6464#3,>in2=int32#3
+# asm 2: movd   <diag2=%xmm2,>in2=%edx
+movd   %xmm2,%edx
+
+# qhasm: in14 = diag3
+# asm 1: movd   <diag3=int6464#4,>in14=int32#4
+# asm 2: movd   <diag3=%xmm3,>in14=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: diag0 <<<= 96
+# asm 1: pshufd $0x39,<diag0=int6464#1,<diag0=int6464#1
+# asm 2: pshufd $0x39,<diag0=%xmm0,<diag0=%xmm0
+pshufd $0x39,%xmm0,%xmm0
+
+# qhasm: diag1 <<<= 96
+# asm 1: pshufd $0x39,<diag1=int6464#2,<diag1=int6464#2
+# asm 2: pshufd $0x39,<diag1=%xmm1,<diag1=%xmm1
+pshufd $0x39,%xmm1,%xmm1
+
+# qhasm: diag2 <<<= 96
+# asm 1: pshufd $0x39,<diag2=int6464#3,<diag2=int6464#3
+# asm 2: pshufd $0x39,<diag2=%xmm2,<diag2=%xmm2
+pshufd $0x39,%xmm2,%xmm2
+
+# qhasm: diag3 <<<= 96
+# asm 1: pshufd $0x39,<diag3=int6464#4,<diag3=int6464#4
+# asm 2: pshufd $0x39,<diag3=%xmm3,<diag3=%xmm3
+pshufd $0x39,%xmm3,%xmm3
+
+# qhasm: in10 ^= *(uint32 *) (m + 40)
+# asm 1: xorl 40(<m=int32#5),<in10=int32#1
+# asm 2: xorl 40(<m=%esi),<in10=%eax
+xorl 40(%esi),%eax
+
+# qhasm: in6 ^= *(uint32 *) (m + 24)
+# asm 1: xorl 24(<m=int32#5),<in6=int32#2
+# asm 2: xorl 24(<m=%esi),<in6=%ecx
+xorl 24(%esi),%ecx
+
+# qhasm: in2 ^= *(uint32 *) (m + 8)
+# asm 1: xorl 8(<m=int32#5),<in2=int32#3
+# asm 2: xorl 8(<m=%esi),<in2=%edx
+xorl 8(%esi),%edx
+
+# qhasm: in14 ^= *(uint32 *) (m + 56)
+# asm 1: xorl 56(<m=int32#5),<in14=int32#4
+# asm 2: xorl 56(<m=%esi),<in14=%ebx
+xorl 56(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 40) = in10
+# asm 1: movl <in10=int32#1,40(<out=int32#6)
+# asm 2: movl <in10=%eax,40(<out=%edi)
+movl %eax,40(%edi)
+
+# qhasm: *(uint32 *) (out + 24) = in6
+# asm 1: movl <in6=int32#2,24(<out=int32#6)
+# asm 2: movl <in6=%ecx,24(<out=%edi)
+movl %ecx,24(%edi)
+
+# qhasm: *(uint32 *) (out + 8) = in2
+# asm 1: movl <in2=int32#3,8(<out=int32#6)
+# asm 2: movl <in2=%edx,8(<out=%edi)
+movl %edx,8(%edi)
+
+# qhasm: *(uint32 *) (out + 56) = in14
+# asm 1: movl <in14=int32#4,56(<out=int32#6)
+# asm 2: movl <in14=%ebx,56(<out=%edi)
+movl %ebx,56(%edi)
+
+# qhasm: in15 = diag0
+# asm 1: movd   <diag0=int6464#1,>in15=int32#1
+# asm 2: movd   <diag0=%xmm0,>in15=%eax
+movd   %xmm0,%eax
+
+# qhasm: in11 = diag1
+# asm 1: movd   <diag1=int6464#2,>in11=int32#2
+# asm 2: movd   <diag1=%xmm1,>in11=%ecx
+movd   %xmm1,%ecx
+
+# qhasm: in7 = diag2
+# asm 1: movd   <diag2=int6464#3,>in7=int32#3
+# asm 2: movd   <diag2=%xmm2,>in7=%edx
+movd   %xmm2,%edx
+
+# qhasm: in3 = diag3
+# asm 1: movd   <diag3=int6464#4,>in3=int32#4
+# asm 2: movd   <diag3=%xmm3,>in3=%ebx
+movd   %xmm3,%ebx
+
+# qhasm: in15 ^= *(uint32 *) (m + 60)
+# asm 1: xorl 60(<m=int32#5),<in15=int32#1
+# asm 2: xorl 60(<m=%esi),<in15=%eax
+xorl 60(%esi),%eax
+
+# qhasm: in11 ^= *(uint32 *) (m + 44)
+# asm 1: xorl 44(<m=int32#5),<in11=int32#2
+# asm 2: xorl 44(<m=%esi),<in11=%ecx
+xorl 44(%esi),%ecx
+
+# qhasm: in7 ^= *(uint32 *) (m + 28)
+# asm 1: xorl 28(<m=int32#5),<in7=int32#3
+# asm 2: xorl 28(<m=%esi),<in7=%edx
+xorl 28(%esi),%edx
+
+# qhasm: in3 ^= *(uint32 *) (m + 12)
+# asm 1: xorl 12(<m=int32#5),<in3=int32#4
+# asm 2: xorl 12(<m=%esi),<in3=%ebx
+xorl 12(%esi),%ebx
+
+# qhasm: *(uint32 *) (out + 60) = in15
+# asm 1: movl <in15=int32#1,60(<out=int32#6)
+# asm 2: movl <in15=%eax,60(<out=%edi)
+movl %eax,60(%edi)
+
+# qhasm: *(uint32 *) (out + 44) = in11
+# asm 1: movl <in11=int32#2,44(<out=int32#6)
+# asm 2: movl <in11=%ecx,44(<out=%edi)
+movl %ecx,44(%edi)
+
+# qhasm: *(uint32 *) (out + 28) = in7
+# asm 1: movl <in7=int32#3,28(<out=int32#6)
+# asm 2: movl <in7=%edx,28(<out=%edi)
+movl %edx,28(%edi)
+
+# qhasm: *(uint32 *) (out + 12) = in3
+# asm 1: movl <in3=int32#4,12(<out=int32#6)
+# asm 2: movl <in3=%ebx,12(<out=%edi)
+movl %ebx,12(%edi)
+
+# qhasm:   bytes = bytes_stack
+# asm 1: movl <bytes_stack=stack32#7,>bytes=int32#1
+# asm 2: movl <bytes_stack=24(%esp),>bytes=%eax
+movl 24(%esp),%eax
+
+# qhasm:   in8 = ((uint32 *)&x2)[0]
+# asm 1: movl <x2=stack128#4,>in8=int32#2
+# asm 2: movl <x2=80(%esp),>in8=%ecx
+movl 80(%esp),%ecx
+
+# qhasm:   in9 = ((uint32 *)&x3)[1]
+# asm 1: movl 4+<x3=stack128#1,>in9=int32#3
+# asm 2: movl 4+<x3=32(%esp),>in9=%edx
+movl 4+32(%esp),%edx
+
+# qhasm:   carry? in8 += 1
+# asm 1: add  $1,<in8=int32#2
+# asm 2: add  $1,<in8=%ecx
+add  $1,%ecx
+
+# qhasm:   in9 += 0 + carry
+# asm 1: adc $0,<in9=int32#3
+# asm 2: adc $0,<in9=%edx
+adc $0,%edx
+
+# qhasm:   ((uint32 *)&x2)[0] = in8
+# asm 1: movl <in8=int32#2,>x2=stack128#4
+# asm 2: movl <in8=%ecx,>x2=80(%esp)
+movl %ecx,80(%esp)
+
+# qhasm:   ((uint32 *)&x3)[1] = in9
+# asm 1: movl <in9=int32#3,4+<x3=stack128#1
+# asm 2: movl <in9=%edx,4+<x3=32(%esp)
+movl %edx,4+32(%esp)
+
+# qhasm:                          unsigned>? unsigned<? bytes - 64
+# asm 1: cmp  $64,<bytes=int32#1
+# asm 2: cmp  $64,<bytes=%eax
+cmp  $64,%eax
+# comment:fp stack unchanged by jump
+
+# qhasm:   goto bytesatleast65 if unsigned>
+ja ._bytesatleast65
+# comment:fp stack unchanged by jump
+
+# qhasm:     goto bytesatleast64 if !unsigned<
+jae ._bytesatleast64
+
+# qhasm:       m = out
+# asm 1: mov  <out=int32#6,>m=int32#5
+# asm 2: mov  <out=%edi,>m=%esi
+mov  %edi,%esi
+
+# qhasm:       out = ctarget
+# asm 1: movl <ctarget=stack32#6,>out=int32#6
+# asm 2: movl <ctarget=20(%esp),>out=%edi
+movl 20(%esp),%edi
+
+# qhasm:       i = bytes
+# asm 1: mov  <bytes=int32#1,>i=int32#2
+# asm 2: mov  <bytes=%eax,>i=%ecx
+mov  %eax,%ecx
+
+# qhasm:       while (i) { *out++ = *m++; --i }
+rep movsb
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     bytesatleast64:
+._bytesatleast64:
+# comment:fp stack unchanged by fallthrough
+
+# qhasm:     done:
+._done:
+
+# qhasm:     eax = eax_stack
+# asm 1: movl <eax_stack=stack32#1,>eax=int32#1
+# asm 2: movl <eax_stack=0(%esp),>eax=%eax
+movl 0(%esp),%eax
+
+# qhasm:     ebx = ebx_stack
+# asm 1: movl <ebx_stack=stack32#2,>ebx=int32#4
+# asm 2: movl <ebx_stack=4(%esp),>ebx=%ebx
+movl 4(%esp),%ebx
+
+# qhasm:     esi = esi_stack
+# asm 1: movl <esi_stack=stack32#3,>esi=int32#5
+# asm 2: movl <esi_stack=8(%esp),>esi=%esi
+movl 8(%esp),%esi
+
+# qhasm:     edi = edi_stack
+# asm 1: movl <edi_stack=stack32#4,>edi=int32#6
+# asm 2: movl <edi_stack=12(%esp),>edi=%edi
+movl 12(%esp),%edi
+
+# qhasm:     ebp = ebp_stack
+# asm 1: movl <ebp_stack=stack32#5,>ebp=int32#7
+# asm 2: movl <ebp_stack=16(%esp),>ebp=%ebp
+movl 16(%esp),%ebp
+
+# qhasm:     leave
+add %eax,%esp
+xor %eax,%eax
+ret
+
+# qhasm:   bytesatleast65:
+._bytesatleast65:
+
+# qhasm:   bytes -= 64
+# asm 1: sub  $64,<bytes=int32#1
+# asm 2: sub  $64,<bytes=%eax
+sub  $64,%eax
+
+# qhasm:   out += 64
+# asm 1: add  $64,<out=int32#6
+# asm 2: add  $64,<out=%edi
+add  $64,%edi
+
+# qhasm:   m += 64
+# asm 1: add  $64,<m=int32#5
+# asm 2: add  $64,<m=%esi
+add  $64,%esi
+# comment:fp stack unchanged by jump
+
+# qhasm: goto bytesbetween1and255
+jmp ._bytesbetween1and255
diff --git a/nacl/nacl-20110221/crypto_stream/try.c b/nacl/nacl-20110221/crypto_stream/try.c
new file mode 100644
index 00000000..9a36d760
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/try.c
@@ -0,0 +1,124 @@
+/*
+ * crypto_stream/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_stream.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_stream_IMPLEMENTATION;
+
+#define MAXTEST_BYTES 10000
+#define CHECKSUM_BYTES 4096
+#define TUNE_BYTES 1536
+
+static unsigned char *k;
+static unsigned char *n;
+static unsigned char *m;
+static unsigned char *c;
+static unsigned char *s;
+static unsigned char *k2;
+static unsigned char *n2;
+static unsigned char *m2;
+static unsigned char *c2;
+static unsigned char *s2;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  k = alignedcalloc(crypto_stream_KEYBYTES);
+  n = alignedcalloc(crypto_stream_NONCEBYTES);
+  m = alignedcalloc(MAXTEST_BYTES);
+  c = alignedcalloc(MAXTEST_BYTES);
+  s = alignedcalloc(MAXTEST_BYTES);
+  k2 = alignedcalloc(crypto_stream_KEYBYTES);
+  n2 = alignedcalloc(crypto_stream_NONCEBYTES);
+  m2 = alignedcalloc(MAXTEST_BYTES);
+  c2 = alignedcalloc(MAXTEST_BYTES);
+  s2 = alignedcalloc(MAXTEST_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_stream_xor(c,m,TUNE_BYTES,n,k);
+}
+
+char checksum[crypto_stream_KEYBYTES * 2 + 1];
+
+const char *checksum_compute(void)
+{
+  long long i;
+  long long j;
+
+  for (i = 0;i < CHECKSUM_BYTES;++i) {
+    long long mlen = i;
+    long long clen = i;
+    long long slen = i;
+    long long klen = crypto_stream_KEYBYTES;
+    long long nlen = crypto_stream_NONCEBYTES;
+    for (j = -16;j < 0;++j) m[j] = random();
+    for (j = -16;j < 0;++j) c[j] = random();
+    for (j = -16;j < 0;++j) s[j] = random();
+    for (j = -16;j < 0;++j) n[j] = random();
+    for (j = -16;j < 0;++j) k[j] = random();
+    for (j = mlen;j < mlen + 16;++j) m[j] = random();
+    for (j = clen;j < clen + 16;++j) c[j] = random();
+    for (j = slen;j < slen + 16;++j) s[j] = random();
+    for (j = nlen;j < nlen + 16;++j) n[j] = random();
+    for (j = klen;j < klen + 16;++j) k[j] = random();
+    for (j = -16;j < mlen + 16;++j) m2[j] = m[j];
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+    for (j = -16;j < slen + 16;++j) s2[j] = s[j];
+    for (j = -16;j < nlen + 16;++j) n2[j] = n[j];
+    for (j = -16;j < klen + 16;++j) k2[j] = k[j];
+
+    crypto_stream_xor(c,m,mlen,n,k);
+
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream_xor overwrites m";
+    for (j = -16;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream_xor overwrites s";
+    for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream_xor overwrites n";
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream_xor overwrites k";
+    for (j = -16;j < 0;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes before output";
+    for (j = clen;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream_xor writes after output";
+
+    for (j = -16;j < clen + 16;++j) c2[j] = c[j];
+
+    crypto_stream(s,slen,n,k);
+
+    for (j = -16;j < mlen + 16;++j) if (m[j] != m2[j]) return "crypto_stream overwrites m";
+    for (j = -16;j < clen + 16;++j) if (c[j] != c2[j]) return "crypto_stream overwrites c";
+    for (j = -16;j < nlen + 16;++j) if (n[j] != n2[j]) return "crypto_stream overwrites n";
+    for (j = -16;j < klen + 16;++j) if (k[j] != k2[j]) return "crypto_stream overwrites k";
+    for (j = -16;j < 0;++j) if (s[j] != s2[j]) return "crypto_stream writes before output";
+    for (j = slen;j < slen + 16;++j) if (s[j] != s2[j]) return "crypto_stream writes after output";
+
+    for (j = 0;j < mlen;++j)
+      if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream";
+
+    for (j = 0;j < clen;++j) k[j % klen] ^= c[j];
+    crypto_stream_xor(m,c,clen,n,k);
+    crypto_stream(s,slen,n,k);
+    for (j = 0;j < mlen;++j)
+      if ((s[j] ^ m[j]) != c[j]) return "crypto_stream_xor does not match crypto_stream";
+    for (j = 0;j < mlen;++j) n[j % nlen] ^= m[j];
+    m[mlen] = 0;
+  }
+
+  for (i = 0;i < crypto_stream_KEYBYTES;++i) {
+    checksum[2 * i] = "0123456789abcdef"[15 & (k[i] >> 4)];
+    checksum[2 * i + 1] = "0123456789abcdef"[15 & k[i]];
+  }
+  checksum[2 * i] = 0;
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_stream/wrapper-stream.cpp b/nacl/nacl-20110221/crypto_stream/wrapper-stream.cpp
new file mode 100644
index 00000000..dd10c2f6
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/wrapper-stream.cpp
@@ -0,0 +1,12 @@
+#include <string>
+using std::string;
+#include "crypto_stream.h"
+
+string crypto_stream(size_t clen,const string &n,const string &k)
+{
+  if (n.size() != crypto_stream_NONCEBYTES) throw "incorrect nonce length";
+  if (k.size() != crypto_stream_KEYBYTES) throw "incorrect key length";
+  unsigned char c[clen];
+  crypto_stream(c,clen,(const unsigned char *) n.c_str(),(const unsigned char *) k.c_str());
+  return string((char *) c,clen);
+}
diff --git a/nacl/nacl-20110221/crypto_stream/wrapper-xor.cpp b/nacl/nacl-20110221/crypto_stream/wrapper-xor.cpp
new file mode 100644
index 00000000..8d770d1e
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/wrapper-xor.cpp
@@ -0,0 +1,17 @@
+#include <string>
+using std::string;
+#include "crypto_stream.h"
+
+string crypto_stream_xor(const string &m,const string &n,const string &k)
+{
+  if (n.size() != crypto_stream_NONCEBYTES) throw "incorrect nonce length";
+  if (k.size() != crypto_stream_KEYBYTES) throw "incorrect key length";
+  size_t mlen = m.size();
+  unsigned char c[mlen];
+  crypto_stream_xor(c,
+    (const unsigned char *) m.c_str(),mlen,
+    (const unsigned char *) n.c_str(),
+    (const unsigned char *) k.c_str()
+    );
+  return string((char *) c,mlen);
+}
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/checksum b/nacl/nacl-20110221/crypto_stream/xsalsa20/checksum
new file mode 100644
index 00000000..cae64c0d
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/xsalsa20/checksum
@@ -0,0 +1 @@
+201bc58a96adcb6ed339ca33c188af8ca04a4ce68be1e0953309ee09a0cf8e7a
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/api.h b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/api.h
new file mode 100644
index 00000000..6910a7dc
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/api.h
@@ -0,0 +1,2 @@
+#define CRYPTO_KEYBYTES 32
+#define CRYPTO_NONCEBYTES 24
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/implementors b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/implementors
new file mode 100644
index 00000000..f6fb3c73
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/implementors
@@ -0,0 +1 @@
+Daniel J. Bernstein
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/stream.c b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/stream.c
new file mode 100644
index 00000000..2d710709
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/stream.c
@@ -0,0 +1,22 @@
+/*
+version 20080914
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+#include "crypto_stream.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream(
+        unsigned char *c,unsigned long long clen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char subkey[32];
+  crypto_core_hsalsa20(subkey,n,k,sigma);
+  return crypto_stream_salsa20(c,clen,n + 16,subkey);
+}
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/xor.c b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/xor.c
new file mode 100644
index 00000000..13f3134a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_stream/xsalsa20/ref/xor.c
@@ -0,0 +1,23 @@
+/*
+version 20080913
+D. J. Bernstein
+Public domain.
+*/
+
+#include "crypto_core_hsalsa20.h"
+#include "crypto_stream_salsa20.h"
+#include "crypto_stream.h"
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+int crypto_stream_xor(
+        unsigned char *c,
+  const unsigned char *m,unsigned long long mlen,
+  const unsigned char *n,
+  const unsigned char *k
+)
+{
+  unsigned char subkey[32];
+  crypto_core_hsalsa20(subkey,n,k,sigma);
+  return crypto_stream_salsa20_xor(c,m,mlen,n + 16,subkey);
+}
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/selected b/nacl/nacl-20110221/crypto_stream/xsalsa20/selected
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_stream/xsalsa20/used b/nacl/nacl-20110221/crypto_stream/xsalsa20/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_verify/16/checksum b/nacl/nacl-20110221/crypto_verify/16/checksum
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/16/checksum
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/crypto_verify/16/ref/api.h b/nacl/nacl-20110221/crypto_verify/16/ref/api.h
new file mode 100644
index 00000000..32be2f97
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/16/ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 16
diff --git a/nacl/nacl-20110221/crypto_verify/16/ref/verify.c b/nacl/nacl-20110221/crypto_verify/16/ref/verify.c
new file mode 100644
index 00000000..d356060c
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/16/ref/verify.c
@@ -0,0 +1,24 @@
+#include "crypto_verify.h"
+
+int crypto_verify(const unsigned char *x,const unsigned char *y)
+{
+  unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+  F(0)
+  F(1)
+  F(2)
+  F(3)
+  F(4)
+  F(5)
+  F(6)
+  F(7)
+  F(8)
+  F(9)
+  F(10)
+  F(11)
+  F(12)
+  F(13)
+  F(14)
+  F(15)
+  return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/nacl/nacl-20110221/crypto_verify/16/used b/nacl/nacl-20110221/crypto_verify/16/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_verify/32/checksum b/nacl/nacl-20110221/crypto_verify/32/checksum
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/32/checksum
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/crypto_verify/32/ref/api.h b/nacl/nacl-20110221/crypto_verify/32/ref/api.h
new file mode 100644
index 00000000..ae8c7f6a
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/32/ref/api.h
@@ -0,0 +1 @@
+#define CRYPTO_BYTES 32
diff --git a/nacl/nacl-20110221/crypto_verify/32/ref/verify.c b/nacl/nacl-20110221/crypto_verify/32/ref/verify.c
new file mode 100644
index 00000000..a0e23afe
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/32/ref/verify.c
@@ -0,0 +1,40 @@
+#include "crypto_verify.h"
+
+int crypto_verify(const unsigned char *x,const unsigned char *y)
+{
+  unsigned int differentbits = 0;
+#define F(i) differentbits |= x[i] ^ y[i];
+  F(0)
+  F(1)
+  F(2)
+  F(3)
+  F(4)
+  F(5)
+  F(6)
+  F(7)
+  F(8)
+  F(9)
+  F(10)
+  F(11)
+  F(12)
+  F(13)
+  F(14)
+  F(15)
+  F(16)
+  F(17)
+  F(18)
+  F(19)
+  F(20)
+  F(21)
+  F(22)
+  F(23)
+  F(24)
+  F(25)
+  F(26)
+  F(27)
+  F(28)
+  F(29)
+  F(30)
+  F(31)
+  return (1 & ((differentbits - 1) >> 8)) - 1;
+}
diff --git a/nacl/nacl-20110221/crypto_verify/32/used b/nacl/nacl-20110221/crypto_verify/32/used
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/crypto_verify/measure.c b/nacl/nacl-20110221/crypto_verify/measure.c
new file mode 100644
index 00000000..bbfac4f1
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/measure.c
@@ -0,0 +1,18 @@
+#include "crypto_verify.h"
+
+const char *primitiveimplementation = crypto_verify_IMPLEMENTATION;
+const char *implementationversion = crypto_verify_VERSION;
+const char *sizenames[] = { "inputbytes", 0 };
+const long long sizes[] = { crypto_verify_BYTES };
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+}
+
+void measure(void)
+{
+}
diff --git a/nacl/nacl-20110221/crypto_verify/try.c b/nacl/nacl-20110221/crypto_verify/try.c
new file mode 100644
index 00000000..f555cb4e
--- /dev/null
+++ b/nacl/nacl-20110221/crypto_verify/try.c
@@ -0,0 +1,75 @@
+/*
+ * crypto_verify/try.c version 20090118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdlib.h>
+#include "crypto_verify.h"
+
+extern unsigned char *alignedcalloc(unsigned long long);
+
+const char *primitiveimplementation = crypto_verify_IMPLEMENTATION;
+
+static unsigned char *x;
+static unsigned char *y;
+
+void preallocate(void)
+{
+}
+
+void allocate(void)
+{
+  x = alignedcalloc(crypto_verify_BYTES);
+  y = alignedcalloc(crypto_verify_BYTES);
+}
+
+void predoit(void)
+{
+}
+
+void doit(void)
+{
+  crypto_verify(x,y);
+}
+
+static const char *check(void)
+{
+  int r = crypto_verify(x,y);
+  if (r == 0) {
+    if (memcmp(x,y,crypto_verify_BYTES)) return "different strings pass verify";
+  } else if (r == -1) {
+    if (!memcmp(x,y,crypto_verify_BYTES)) return "equal strings fail verify";
+  } else {
+    return "weird return value from verify";
+  }
+  return 0;
+}
+
+char checksum[2];
+
+const char *checksum_compute(void)
+{
+  long long tests;
+  long long i;
+  long long j;
+  const char *c;
+
+  for (tests = 0;tests < 100000;++tests) {
+    for (i = 0;i < crypto_verify_BYTES;++i) x[i] = random();
+    for (i = 0;i < crypto_verify_BYTES;++i) y[i] = random();
+    c = check(); if (c) return c;
+    for (i = 0;i < crypto_verify_BYTES;++i) y[i] = x[i];
+    c = check(); if (c) return c;
+    y[random() % crypto_verify_BYTES] = random();
+    c = check(); if (c) return c;
+    y[random() % crypto_verify_BYTES] = random();
+    c = check(); if (c) return c;
+    y[random() % crypto_verify_BYTES] = random();
+    c = check(); if (c) return c;
+  }
+
+  checksum[0] = '0';
+  checksum[1] = 0;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/crypto_verify/wrapper-empty.cpp b/nacl/nacl-20110221/crypto_verify/wrapper-empty.cpp
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/curvecp/LIBS b/nacl/nacl-20110221/curvecp/LIBS
new file mode 100644
index 00000000..2928c658
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/LIBS
@@ -0,0 +1,31 @@
+blocking.o
+byte_copy.o
+byte_isequal.o
+byte_zero.o
+crypto_block.o
+die.o
+e.o
+hexparse.o
+load.o
+nameparse.o
+nanoseconds.o
+open_cwd.o
+open_lock.o
+open_pipe.o
+open_read.o
+open_write.o
+portparse.o
+randommod.o
+safenonce.o
+savesync.o
+socket_bind.o
+socket_recv.o
+socket_send.o
+socket_udp.o
+uint16_pack.o
+uint16_unpack.o
+uint32_pack.o
+uint32_unpack.o
+uint64_pack.o
+uint64_unpack.o
+writeall.o
diff --git a/nacl/nacl-20110221/curvecp/README b/nacl/nacl-20110221/curvecp/README
new file mode 100644
index 00000000..1048c894
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/README
@@ -0,0 +1,10 @@
+Example of use (with nacl-20110221/build/*/bin in $PATH):
+  curvecpmakekey serverkey
+  curvecpprintkey serverkey > serverkey.hex
+  curvecpserver this.machine.name serverkey \
+    127.0.0.1 10000 31415926535897932384626433832795 \
+      curvecpmessage cat /usr/share/dict/words &
+  curvecpclient this.machine.name `cat serverkey.hex` \
+    127.0.0.1 10000 31415926535897932384626433832795 \
+      curvecpmessage -c sh -c 'nacl-sha512 <&6'
+  nacl-sha512 < /usr/share/dict/words
diff --git a/nacl/nacl-20110221/curvecp/SOURCES b/nacl/nacl-20110221/curvecp/SOURCES
new file mode 100644
index 00000000..3fc29751
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/SOURCES
@@ -0,0 +1,36 @@
+blocking
+byte_copy
+byte_isequal
+byte_zero
+crypto_block
+die
+e
+hexparse
+load
+nameparse
+nanoseconds
+open_cwd
+open_lock
+open_pipe
+open_read
+open_write
+portparse
+randommod
+safenonce
+savesync
+socket_bind
+socket_recv
+socket_send
+socket_udp
+uint16_pack
+uint16_unpack
+uint32_pack
+uint32_unpack
+uint64_pack
+uint64_unpack
+writeall
+curvecpprintkey
+curvecpmakekey
+curvecpclient
+curvecpserver
+curvecpmessage
diff --git a/nacl/nacl-20110221/curvecp/TARGETS b/nacl/nacl-20110221/curvecp/TARGETS
new file mode 100644
index 00000000..ab04272c
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/TARGETS
@@ -0,0 +1,5 @@
+curvecpprintkey
+curvecpmakekey
+curvecpclient
+curvecpserver
+curvecpmessage
diff --git a/nacl/nacl-20110221/curvecp/blocking.c b/nacl/nacl-20110221/curvecp/blocking.c
new file mode 100644
index 00000000..1594259c
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/blocking.c
@@ -0,0 +1,12 @@
+#include <fcntl.h>
+#include "blocking.h"
+
+void blocking_enable(int fd)
+{
+  fcntl(fd,F_SETFL,fcntl(fd,F_GETFL,0) & ~O_NONBLOCK);
+}
+
+void blocking_disable(int fd)
+{
+  fcntl(fd,F_SETFL,fcntl(fd,F_GETFL,0) | O_NONBLOCK);
+}
diff --git a/nacl/nacl-20110221/curvecp/blocking.h b/nacl/nacl-20110221/curvecp/blocking.h
new file mode 100644
index 00000000..9ba08a5e
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/blocking.h
@@ -0,0 +1,7 @@
+#ifndef BLOCKING_H
+#define BLOCKING_H
+
+extern void blocking_enable(int);
+extern void blocking_disable(int);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/byte.h b/nacl/nacl-20110221/curvecp/byte.h
new file mode 100644
index 00000000..5dbfbd96
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/byte.h
@@ -0,0 +1,8 @@
+#ifndef BYTE_H
+#define BYTE_H
+
+extern void byte_zero(void *,long long);
+extern void byte_copy(void *,long long,const void *);
+extern int byte_isequal(const void *,long long,const void *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/byte_copy.c b/nacl/nacl-20110221/curvecp/byte_copy.c
new file mode 100644
index 00000000..55f446a4
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/byte_copy.c
@@ -0,0 +1,8 @@
+#include "byte.h"
+
+void byte_copy(void *yv,long long ylen,const void *xv)
+{
+  char *y = yv;
+  const char *x = xv;
+  while (ylen > 0) { *y++ = *x++; --ylen; }
+}
diff --git a/nacl/nacl-20110221/curvecp/byte_isequal.c b/nacl/nacl-20110221/curvecp/byte_isequal.c
new file mode 100644
index 00000000..625d361e
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/byte_isequal.c
@@ -0,0 +1,10 @@
+#include "byte.h"
+
+int byte_isequal(const void *yv,long long ylen,const void *xv)
+{
+  const unsigned char *y = yv;
+  const unsigned char *x = xv;
+  unsigned char diff = 0;
+  while (ylen > 0) { diff |= (*y++ ^ *x++); --ylen; }
+  return (256 - (unsigned int) diff) >> 8;
+}
diff --git a/nacl/nacl-20110221/curvecp/byte_zero.c b/nacl/nacl-20110221/curvecp/byte_zero.c
new file mode 100644
index 00000000..bdc1f799
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/byte_zero.c
@@ -0,0 +1,7 @@
+#include "byte.h"
+
+void byte_zero(void *yv,long long ylen)
+{
+  char *y = yv;
+  while (ylen > 0) { *y++ = 0; --ylen; }
+}
diff --git a/nacl/nacl-20110221/curvecp/crypto_block.c b/nacl/nacl-20110221/curvecp/crypto_block.c
new file mode 100644
index 00000000..5c7cf35e
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/crypto_block.c
@@ -0,0 +1,35 @@
+#include "crypto_block.h"
+#include "crypto_uint64.h"
+#include "uint64_unpack.h"
+#include "uint64_pack.h"
+
+/*
+TEA with double-size words.
+XXX: Switch to crypto_block_aes256.
+XXX: Build crypto_stream_aes256 on top of crypto_block_aes256.
+*/
+
+int crypto_block(
+  unsigned char *out,
+  const unsigned char *in,
+  const unsigned char *k
+)
+{
+  crypto_uint64 v0 = uint64_unpack(in + 0);
+  crypto_uint64 v1 = uint64_unpack(in + 8);
+  crypto_uint64 k0 = uint64_unpack(k + 0);
+  crypto_uint64 k1 = uint64_unpack(k + 8);
+  crypto_uint64 k2 = uint64_unpack(k + 16);
+  crypto_uint64 k3 = uint64_unpack(k + 24);
+  crypto_uint64 sum = 0;
+  crypto_uint64 delta = 0x9e3779b97f4a7c15;
+  int i;
+  for (i = 0;i < 32;++i) {
+    sum += delta;
+    v0 += ((v1<<7) + k0) ^ (v1 + sum) ^ ((v1>>12) + k1);
+    v1 += ((v0<<16) + k2) ^ (v0 + sum) ^ ((v0>>8) + k3);
+  }
+  uint64_pack(out + 0,v0);
+  uint64_pack(out + 8,v1);
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/crypto_block.h b/nacl/nacl-20110221/curvecp/crypto_block.h
new file mode 100644
index 00000000..f13620c4
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/crypto_block.h
@@ -0,0 +1,4 @@
+#define crypto_block_BYTES 16
+#define crypto_block_KEYBYTES 32
+
+extern int crypto_block(unsigned char *,const unsigned char *,const unsigned char *);
diff --git a/nacl/nacl-20110221/curvecp/curvecpclient.c b/nacl/nacl-20110221/curvecp/curvecpclient.c
new file mode 100644
index 00000000..00793f00
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/curvecpclient.c
@@ -0,0 +1,476 @@
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <unistd.h>
+#include "e.h"
+#include "die.h"
+#include "load.h"
+#include "open.h"
+#include "byte.h"
+#include "socket.h"
+#include "uint64_pack.h"
+#include "uint64_unpack.h"
+#include "nanoseconds.h"
+#include "hexparse.h"
+#include "nameparse.h"
+#include "portparse.h"
+#include "writeall.h"
+#include "safenonce.h"
+#include "randommod.h"
+
+long long recent = 0;
+
+#define NUMIP 8
+long long hellowait[NUMIP] = {
+   1000000000
+,  1500000000
+,  2250000000
+,  3375000000
+,  5062500000
+,  7593750000
+, 11390625000
+, 17085937500
+} ;
+
+#include "crypto_box.h"
+#include "randombytes.h"
+#if crypto_box_PUBLICKEYBYTES != 32
+error!
+#endif
+#if crypto_box_NONCEBYTES != 24
+error!
+#endif
+#if crypto_box_BOXZEROBYTES != 16
+error!
+#endif
+#if crypto_box_ZEROBYTES != 32
+error!
+#endif
+#if crypto_box_BEFORENMBYTES != 32
+error!
+#endif
+
+int flagverbose = 1;
+
+#define USAGE "\
+curvecpclient: how to use:\n\
+curvecpclient:   -q (optional): no error messages\n\
+curvecpclient:   -Q (optional): print error messages (default)\n\
+curvecpclient:   -v (optional): print extra information\n\
+curvecpclient:   -c keydir (optional): use this public-key directory\n\
+curvecpclient:   sname: server's name\n\
+curvecpclient:   pk: server's public key\n\
+curvecpclient:   ip: server's IP address\n\
+curvecpclient:   port: server's UDP port\n\
+curvecpclient:   ext: server's extension\n\
+curvecpclient:   prog: run this client\n\
+"
+
+void die_usage(const char *s)
+{
+  if (s) die_4(100,USAGE,"curvecpclient: fatal: ",s,"\n");
+  die_1(100,USAGE);
+}
+
+void die_fatal(const char *trouble,const char *d,const char *fn)
+{
+  /* XXX: clean up? OS can do it much more reliably */
+  if (!flagverbose) die_0(111);
+  if (d) {
+    if (fn) die_9(111,"curvecpclient: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n");
+    die_7(111,"curvecpclient: fatal: ",trouble," ",d,": ",e_str(errno),"\n");
+  }
+  if (errno) die_5(111,"curvecpclient: fatal: ",trouble,": ",e_str(errno),"\n");
+  die_3(111,"curvecpclient: fatal: ",trouble,"\n");
+}
+
+int multiipparse(unsigned char *y,const char *x)
+{
+  long long pos;
+  long long pos2;
+  long long ynum;
+  long long ypos;
+  long long j;
+  long long k;
+  long long d;
+  for (j = 0;j < 4 * NUMIP;++j) y[j] = 0;
+  ynum = 0;
+  while (ynum < 1000) {
+    ++ynum;
+    ypos = randommod(ynum);
+    for (k = 0;k < 4;++k) {
+      pos = ypos * 4 + k;
+      pos2 = (ynum - 1) * 4 + k;
+      if (pos >= 0 && pos < 4 * NUMIP && pos2 >= 0 && pos2 < 4 * NUMIP) y[pos2] = y[pos];
+      d = 0;
+      for (j = 0;j < 3 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0');
+      if (j == 0) return 0;
+      x += j;
+      if (pos >= 0 && pos < 4 * NUMIP) y[pos] = d;
+      if (k < 3) {
+        if (*x != '.') return 0;
+        ++x;
+      }
+    }
+    if (!*x) break;
+    if (*x != ',') return 0;
+    ++x;
+  }
+  /* if fewer than 8 IP addresses, cycle through them: */
+  pos = 0;
+  pos2 = ynum * 4;
+  while (pos2 < 4 * NUMIP) {
+    if (pos >= 0 && pos < 4 * NUMIP && pos2 >= 0 && pos2 < 4 * NUMIP) y[pos2] = y[pos];
+    ++pos2;
+    ++pos;
+  }
+  return 1;
+}
+
+
+/* routing to the client: */
+unsigned char clientextension[16];
+long long clientextensionloadtime = 0;
+int udpfd = -1;
+
+void clientextension_init(void)
+{
+  if (recent >= clientextensionloadtime) {
+    clientextensionloadtime = recent + 30000000000LL;
+    if (load("/etc/curvecpextension",clientextension,16) == -1)
+      if (errno == ENOENT || errno == ENAMETOOLONG)
+        byte_zero(clientextension,16);
+  }
+}
+
+
+/* client security: */
+char *keydir = 0;
+unsigned char clientlongtermpk[32];
+unsigned char clientlongtermsk[32];
+unsigned char clientshorttermpk[32];
+unsigned char clientshorttermsk[32];
+crypto_uint64 clientshorttermnonce;
+unsigned char vouch[64];
+
+void clientshorttermnonce_update(void)
+{
+  ++clientshorttermnonce;
+  if (clientshorttermnonce) return;
+  errno = EPROTO;
+  die_fatal("nonce space expired",0,0);
+}
+
+/* routing to the server: */
+unsigned char serverip[4 * NUMIP];
+unsigned char serverport[2];
+unsigned char serverextension[16];
+
+/* server security: */
+unsigned char servername[256];
+unsigned char serverlongtermpk[32];
+unsigned char servershorttermpk[32];
+unsigned char servercookie[96];
+
+/* shared secrets: */
+unsigned char clientshortserverlong[32];
+unsigned char clientshortservershort[32];
+unsigned char clientlongserverlong[32];
+
+unsigned char allzero[128] = {0};
+
+unsigned char nonce[24];
+unsigned char text[2048];
+
+unsigned char packet[4096];
+unsigned char packetip[4];
+unsigned char packetport[2];
+crypto_uint64 packetnonce;
+int flagreceivedmessage = 0;
+crypto_uint64 receivednonce = 0;
+
+struct pollfd p[3];
+
+int fdwd = -1;
+
+int tochild[2] = {-1,-1};
+int fromchild[2] = {-1,-1};
+pid_t child = -1;
+int childstatus = 0;
+
+unsigned char childbuf[4096];
+long long childbuflen = 0;
+unsigned char childmessage[2048];
+long long childmessagelen = 0;
+
+int main(int argc,char **argv)
+{
+  long long hellopackets;
+  long long r;
+  long long nextaction;
+
+  signal(SIGPIPE,SIG_IGN);
+
+  if (!argv[0]) die_usage(0);
+  for (;;) {
+    char *x;
+    if (!argv[1]) break;
+    if (argv[1][0] != '-') break;
+    x = *++argv;
+    if (x[0] == '-' && x[1] == 0) break;
+    if (x[0] == '-' && x[1] == '-' && x[2] == 0) break;
+    while (*++x) {
+      if (*x == 'q') { flagverbose = 0; continue; }
+      if (*x == 'Q') { flagverbose = 1; continue; }
+      if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; }
+      if (*x == 'c') {
+        if (x[1]) { keydir = x + 1; break; }
+        if (argv[1]) { keydir = *++argv; break; }
+      }
+      die_usage(0);
+    }
+  }
+  if (!nameparse(servername,*++argv)) die_usage("sname must be at most 255 bytes, at most 63 bytes between dots");
+  if (!hexparse(serverlongtermpk,32,*++argv)) die_usage("pk must be exactly 64 hex characters");
+  if (!multiipparse(serverip,*++argv)) die_usage("ip must be a comma-separated series of IPv4 addresses");
+  if (!portparse(serverport,*++argv)) die_usage("port must be an integer between 0 and 65535");
+  if (!hexparse(serverextension,16,*++argv)) die_usage("ext must be exactly 32 hex characters");
+  if (!*++argv) die_usage("missing prog");
+
+  for (;;) {
+    r = open_read("/dev/null");
+    if (r == -1) die_fatal("unable to open /dev/null",0,0);
+    if (r > 9) { close(r); break; }
+  }
+
+  if (keydir) {
+    fdwd = open_cwd();
+    if (fdwd == -1) die_fatal("unable to open current working directory",0,0);
+    if (chdir(keydir) == -1) die_fatal("unable to change to directory",keydir,0);
+    if (load("publickey",clientlongtermpk,sizeof clientlongtermpk) == -1) die_fatal("unable to read public key from",keydir,0);
+    if (load(".expertsonly/secretkey",clientlongtermsk,sizeof clientlongtermsk) == -1) die_fatal("unable to read secret key from",keydir,0);
+  } else {
+    crypto_box_keypair(clientlongtermpk,clientlongtermsk);
+  }
+
+  crypto_box_keypair(clientshorttermpk,clientshorttermsk);
+  clientshorttermnonce = randommod(281474976710656LL);
+  crypto_box_beforenm(clientshortserverlong,serverlongtermpk,clientshorttermsk);
+  crypto_box_beforenm(clientlongserverlong,serverlongtermpk,clientlongtermsk);
+
+  udpfd = socket_udp();
+  if (udpfd == -1) die_fatal("unable to create socket",0,0);
+
+  for (hellopackets = 0;hellopackets < NUMIP;++hellopackets) {
+    recent = nanoseconds();
+
+    /* send a Hello packet: */
+
+    clientextension_init();
+
+    clientshorttermnonce_update();
+    byte_copy(nonce,16,"CurveCP-client-H");
+    uint64_pack(nonce + 16,clientshorttermnonce);
+
+    byte_copy(packet,8,"QvnQ5XlH");
+    byte_copy(packet + 8,16,serverextension);
+    byte_copy(packet + 24,16,clientextension);
+    byte_copy(packet + 40,32,clientshorttermpk);
+    byte_copy(packet + 72,64,allzero);
+    byte_copy(packet + 136,8,nonce + 16);
+    crypto_box_afternm(text,allzero,96,nonce,clientshortserverlong);
+    byte_copy(packet + 144,80,text + 16);
+
+    socket_send(udpfd,packet,224,serverip + 4 * hellopackets,serverport);
+
+    nextaction = recent + hellowait[hellopackets] + randommod(hellowait[hellopackets]);
+
+    for (;;) {
+      long long timeout = nextaction - recent;
+      if (timeout <= 0) break;
+      p[0].fd = udpfd;
+      p[0].events = POLLIN;
+      if (poll(p,1,timeout / 1000000 + 1) < 0) p[0].revents = 0;
+
+      do { /* try receiving a Cookie packet: */
+        if (!p[0].revents) break;
+        r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport);
+        if (r != 200) break;
+        if (!(byte_isequal(packetip,4,serverip + 4 * hellopackets) &
+              byte_isequal(packetport,2,serverport) &
+              byte_isequal(packet,8,"RL3aNMXK") &
+              byte_isequal(packet + 8,16,clientextension) &
+              byte_isequal(packet + 24,16,serverextension)
+           )) break;
+        byte_copy(nonce,8,"CurveCPK");
+        byte_copy(nonce + 8,16,packet + 40);
+        byte_zero(text,16);
+        byte_copy(text + 16,144,packet + 56);
+        if (crypto_box_open_afternm(text,text,160,nonce,clientshortserverlong)) break;
+        byte_copy(servershorttermpk,32,text + 32);
+        byte_copy(servercookie,96,text + 64);
+        byte_copy(serverip,4,serverip + 4 * hellopackets);
+        goto receivedcookie;
+      } while (0);
+
+      recent = nanoseconds();
+    }
+  }
+
+  errno = ETIMEDOUT; die_fatal("no response from server",0,0);
+
+  receivedcookie:
+
+  crypto_box_beforenm(clientshortservershort,servershorttermpk,clientshorttermsk);
+
+  byte_copy(nonce,8,"CurveCPV");
+  if (keydir) {
+    if (safenonce(nonce + 8,0) == -1) die_fatal("nonce-generation disaster",0,0);
+  } else {
+    randombytes(nonce + 8,16);
+  }
+
+  byte_zero(text,32);
+  byte_copy(text + 32,32,clientshorttermpk);
+  crypto_box_afternm(text,text,64,nonce,clientlongserverlong);
+  byte_copy(vouch,16,nonce + 8);
+  byte_copy(vouch + 16,48,text + 16);
+
+  /* server is responding, so start child: */
+
+  if (open_pipe(tochild) == -1) die_fatal("unable to create pipe",0,0);
+  if (open_pipe(fromchild) == -1) die_fatal("unable to create pipe",0,0);
+  
+  child = fork();
+  if (child == -1) die_fatal("unable to fork",0,0);
+  if (child == 0) {
+    if (keydir) if (fchdir(fdwd) == -1) die_fatal("unable to chdir to original directory",0,0);
+    close(8);
+    if (dup(tochild[0]) != 8) die_fatal("unable to dup",0,0);
+    close(9);
+    if (dup(fromchild[1]) != 9) die_fatal("unable to dup",0,0);
+    /* XXX: set up environment variables */
+    signal(SIGPIPE,SIG_DFL);
+    execvp(*argv,argv);
+    die_fatal("unable to run",*argv,0);
+  }
+
+  close(fromchild[1]);
+  close(tochild[0]);
+
+
+  for (;;) {
+    p[0].fd = udpfd;
+    p[0].events = POLLIN;
+    p[1].fd = fromchild[0];
+    p[1].events = POLLIN;
+
+    if (poll(p,2,-1) < 0) {
+      p[0].revents = 0;
+      p[1].revents = 0;
+    }
+
+    do { /* try receiving a Message packet: */
+      if (!p[0].revents) break;
+      r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport);
+      if (r < 80) break;
+      if (r > 1152) break;
+      if (r & 15) break;
+      packetnonce = uint64_unpack(packet + 40);
+      if (flagreceivedmessage && packetnonce <= receivednonce) break;
+      if (!(byte_isequal(packetip,4,serverip + 4 * hellopackets) &
+            byte_isequal(packetport,2,serverport) &
+            byte_isequal(packet,8,"RL3aNMXM") &
+            byte_isequal(packet + 8,16,clientextension) &
+            byte_isequal(packet + 24,16,serverextension)
+         )) break;
+      byte_copy(nonce,16,"CurveCP-server-M");
+      byte_copy(nonce + 16,8,packet + 40);
+      byte_zero(text,16);
+      byte_copy(text + 16,r - 48,packet + 48);
+      if (crypto_box_open_afternm(text,text,r - 32,nonce,clientshortservershort)) break;
+
+      if (!flagreceivedmessage) {
+        flagreceivedmessage = 1;
+	randombytes(clientlongtermpk,sizeof clientlongtermpk);
+	randombytes(vouch,sizeof vouch);
+	randombytes(servername,sizeof servername);
+	randombytes(servercookie,sizeof servercookie);
+      }
+
+      receivednonce = packetnonce;
+      text[31] = (r - 64) >> 4;
+      /* child is responsible for reading all data immediately, so we won't block: */
+      if (writeall(tochild[1],text + 31,r - 63) == -1) goto done;
+    } while (0);
+
+    do { /* try receiving data from child: */
+      long long i;
+      if (!p[1].revents) break;
+      r = read(fromchild[0],childbuf,sizeof childbuf);
+      if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break;
+      if (r <= 0) goto done;
+      childbuflen = r;
+      for (i = 0;i < childbuflen;++i) {
+	if (childmessagelen < 0) goto done;
+	if (childmessagelen >= sizeof childmessage) goto done;
+        childmessage[childmessagelen++] = childbuf[i];
+	if (childmessage[0] & 128) goto done;
+	if (childmessagelen == 1 + 16 * (unsigned long long) childmessage[0]) {
+	  clientextension_init();
+	  clientshorttermnonce_update();
+          uint64_pack(nonce + 16,clientshorttermnonce);
+	  if (flagreceivedmessage) {
+	    r = childmessagelen - 1;
+	    if (r < 16) goto done;
+	    if (r > 1088) goto done;
+            byte_copy(nonce,16,"CurveCP-client-M");
+	    byte_zero(text,32);
+	    byte_copy(text + 32,r,childmessage + 1);
+	    crypto_box_afternm(text,text,r + 32,nonce,clientshortservershort);
+	    byte_copy(packet,8,"QvnQ5XlM");
+	    byte_copy(packet + 8,16,serverextension);
+	    byte_copy(packet + 24,16,clientextension);
+	    byte_copy(packet + 40,32,clientshorttermpk);
+	    byte_copy(packet + 72,8,nonce + 16);
+	    byte_copy(packet + 80,r + 16,text + 16);
+            socket_send(udpfd,packet,r + 96,serverip,serverport);
+	  } else {
+	    r = childmessagelen - 1;
+	    if (r < 16) goto done;
+	    if (r > 640) goto done;
+	    byte_copy(nonce,16,"CurveCP-client-I");
+	    byte_zero(text,32);
+	    byte_copy(text + 32,32,clientlongtermpk);
+	    byte_copy(text + 64,64,vouch);
+	    byte_copy(text + 128,256,servername);
+	    byte_copy(text + 384,r,childmessage + 1);
+	    crypto_box_afternm(text,text,r + 384,nonce,clientshortservershort);
+	    byte_copy(packet,8,"QvnQ5XlI");
+	    byte_copy(packet + 8,16,serverextension);
+	    byte_copy(packet + 24,16,clientextension);
+	    byte_copy(packet + 40,32,clientshorttermpk);
+	    byte_copy(packet + 72,96,servercookie);
+	    byte_copy(packet + 168,8,nonce + 16);
+	    byte_copy(packet + 176,r + 368,text + 16);
+            socket_send(udpfd,packet,r + 544,serverip,serverport);
+	  }
+	  childmessagelen = 0;
+	}
+      }
+    } while (0);
+  }
+
+
+  done:
+
+  do {
+    r = waitpid(child,&childstatus,0);
+  } while (r == -1 && errno == EINTR);
+
+  if (!WIFEXITED(childstatus)) { errno = 0; die_fatal("process killed by signal",0,0); }
+  return WEXITSTATUS(childstatus);
+}
diff --git a/nacl/nacl-20110221/curvecp/curvecpmakekey.c b/nacl/nacl-20110221/curvecp/curvecpmakekey.c
new file mode 100644
index 00000000..dfa181b0
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/curvecpmakekey.c
@@ -0,0 +1,57 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "die.h"
+#include "e.h"
+#include "savesync.h"
+#include "randombytes.h"
+#include "crypto_box.h"
+
+void die_usage(void)
+{
+  die_1(111,"curvecpmakekey: usage: curvecpmakekey keydir\n");
+}
+
+void die_fatal(const char *trouble,const char *d,const char *fn)
+{
+  if (fn) die_9(111,"curvecpmakekey: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n");
+  die_7(111,"curvecpmakekey: fatal: ",trouble," ",d,": ",e_str(errno),"\n");
+}
+
+unsigned char pk[crypto_box_PUBLICKEYBYTES];
+unsigned char sk[crypto_box_SECRETKEYBYTES];
+unsigned char lock[1];
+unsigned char noncekey[32];
+unsigned char noncecounter[8];
+
+void create(const char *d,const char *fn,const unsigned char *x,long long xlen)
+{
+  if (savesync(fn,x,xlen) == -1) die_fatal("unable to create",d,fn);
+}
+
+int main(int argc,char **argv)
+{
+  char *d;
+
+  if (!argv[0]) die_usage();
+  if (!argv[1]) die_usage();
+  d = argv[1];
+
+  umask(022);
+  if (mkdir(d,0755) == -1) die_fatal("unable to create directory",d,0);
+  if (chdir(d) == -1) die_fatal("unable to chdir to directory",d,0);
+  if (mkdir(".expertsonly",0700) == -1) die_fatal("unable to create directory",d,".expertsonly");
+
+  crypto_box_keypair(pk,sk);
+  create(d,"publickey",pk,sizeof pk);
+
+  randombytes(noncekey,sizeof noncekey);
+
+  umask(077);
+  create(d,".expertsonly/secretkey",sk,sizeof sk);
+  create(d,".expertsonly/lock",lock,sizeof lock);
+  create(d,".expertsonly/noncekey",noncekey,sizeof noncekey);
+  create(d,".expertsonly/noncecounter",noncecounter,sizeof noncecounter);
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/curvecpmessage.c b/nacl/nacl-20110221/curvecp/curvecpmessage.c
new file mode 100644
index 00000000..df1e1664
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/curvecpmessage.c
@@ -0,0 +1,654 @@
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <signal.h>
+#include <poll.h>
+#include "open.h"
+#include "blocking.h"
+#include "e.h"
+#include "die.h"
+#include "randommod.h"
+#include "byte.h"
+#include "crypto_uint32.h"
+#include "uint16_pack.h"
+#include "uint32_pack.h"
+#include "uint64_pack.h"
+#include "uint16_unpack.h"
+#include "uint32_unpack.h"
+#include "uint64_unpack.h"
+#include "nanoseconds.h"
+#include "writeall.h"
+
+int flagverbose = 1;
+int flagserver = 1;
+int wantping = 0; /* 1: ping after a second; 2: ping immediately */
+
+#define USAGE "\
+curvecpmessage: how to use:\n\
+curvecpmessage:   -q (optional): no error messages\n\
+curvecpmessage:   -Q (optional): print error messages (default)\n\
+curvecpmessage:   -v (optional): print extra information\n\
+curvecpmessage:   -c (optional): program is a client; server starts first\n\
+curvecpmessage:   -C (optional): program is a client that starts first\n\
+curvecpmessage:   -s (optional): program is a server (default)\n\
+curvecpmessage:   prog: run this program\n\
+"
+
+void die_usage(const char *s)
+{
+  if (s) die_4(100,USAGE,"curvecpmessage: fatal: ",s,"\n");
+  die_1(100,USAGE);
+}
+
+void die_fatal(const char *trouble,const char *d,const char *fn)
+{
+  if (!flagverbose) die_0(111);
+  if (d) {
+    if (fn) die_9(111,"curvecpmessage: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n");
+    die_7(111,"curvecpmessage: fatal: ",trouble," ",d,": ",e_str(errno),"\n");
+  }
+  if (errno) die_5(111,"curvecpmessage: fatal: ",trouble,": ",e_str(errno),"\n");
+  die_3(111,"curvecpmessage: fatal: ",trouble,"\n");
+}
+
+void die_badmessage(void)
+{
+  errno = EPROTO;
+  die_fatal("unable to read from file descriptor 8",0,0);
+}
+
+void die_internalerror(void)
+{
+  errno = EPROTO;
+  die_fatal("internal error",0,0);
+}
+
+
+int tochild[2] = {-1,-1};
+int fromchild[2] = {-1,-1};
+pid_t child = -1;
+int childstatus;
+
+struct pollfd p[3];
+
+long long sendacked = 0; /* number of initial bytes sent and fully acknowledged */
+long long sendbytes = 0; /* number of additional bytes to send */
+unsigned char sendbuf[131072]; /* circular queue with the additional bytes; size must be power of 2 */
+long long sendprocessed = 0; /* within sendbytes, number of bytes absorbed into blocks */
+
+crypto_uint16 sendeof = 0; /* 2048 for normal eof after sendbytes, 4096 for error after sendbytes */
+int sendeofprocessed = 0;
+int sendeofacked = 0;
+
+long long totalblocktransmissions = 0;
+long long totalblocks = 0;
+
+#define OUTGOING 128 /* must be power of 2 */
+long long blocknum = 0; /* number of outgoing blocks being tracked */
+long long blockfirst = 0; /* circular queue */
+long long blockpos[OUTGOING]; /* position of block's first byte within stream */
+long long blocklen[OUTGOING]; /* number of bytes in this block */
+crypto_uint16 blockeof[OUTGOING]; /* 0, 2048, 4096 */
+long long blocktransmissions[OUTGOING];
+long long blocktime[OUTGOING]; /* time of last message sending this block; 0 means acked */
+long long earliestblocktime = 0; /* if nonzero, minimum of active blocktime values */
+crypto_uint32 blockid[OUTGOING]; /* ID of last message sending this block */
+
+#define INCOMING 64 /* must be power of 2 */
+long long messagenum = 0; /* number of messages in incoming queue */
+long long messagefirst = 0; /* position of first message; circular queue */
+unsigned char messagelen[INCOMING]; /* times 16 */
+unsigned char message[INCOMING][1088];
+unsigned char messagetodo[2048];
+long long messagetodolen = 0;
+
+long long receivebytes = 0; /* number of initial bytes fully received */
+long long receivewritten = 0; /* within receivebytes, number of bytes given to child */
+crypto_uint16 receiveeof = 0; /* 0, 2048, 4096 */
+long long receivetotalbytes = 0; /* total number of bytes in stream, if receiveeof */
+unsigned char receivebuf[131072]; /* circular queue beyond receivewritten; size must be power of 2 */
+unsigned char receivevalid[131072]; /* 1 for byte successfully received; XXX: use buddy structure to speed this up */
+
+long long maxblocklen = 512;
+crypto_uint32 nextmessageid = 1;
+
+unsigned char buf[4096];
+
+long long lastblocktime = 0;
+long long nsecperblock = 1000000000;
+long long lastspeedadjustment = 0;
+long long lastedge = 0;
+long long lastdoubling = 0;
+
+long long rtt;
+long long rtt_delta;
+long long rtt_average = 0;
+long long rtt_deviation = 0;
+long long rtt_lowwater = 0;
+long long rtt_highwater = 0;
+long long rtt_timeout = 1000000000;
+long long rtt_seenrecenthigh = 0;
+long long rtt_seenrecentlow = 0;
+long long rtt_seenolderhigh = 0;
+long long rtt_seenolderlow = 0;
+long long rtt_phase = 0;
+
+long long lastpanic = 0;
+
+void earliestblocktime_compute(void) /* XXX: use priority queue */
+{
+  long long i;
+  long long pos;
+  earliestblocktime = 0;
+  for (i = 0;i < blocknum;++i) {
+    pos = (blockfirst + i) & (OUTGOING - 1);
+    if (blocktime[pos]) {
+      if (!earliestblocktime)
+        earliestblocktime = blocktime[pos];
+      else
+        if (blocktime[pos] < earliestblocktime)
+	  earliestblocktime = blocktime[pos];
+    }
+  }
+}
+
+void acknowledged(unsigned long long start,unsigned long long stop)
+{
+  long long i;
+  long long pos;
+  if (stop == start) return;
+  for (i = 0;i < blocknum;++i) {
+    pos = (blockfirst + i) & (OUTGOING - 1);
+    if (blockpos[pos] >= start && blockpos[pos] + blocklen[pos] <= stop) {
+      blocktime[pos] = 0;
+      totalblocktransmissions += blocktransmissions[pos];
+      totalblocks += 1;
+    }
+  }
+  while (blocknum) {
+    pos = blockfirst & (OUTGOING - 1);
+    if (blocktime[pos]) break;
+    sendacked += blocklen[pos];
+    sendbytes -= blocklen[pos];
+    sendprocessed -= blocklen[pos];
+    ++blockfirst;
+    --blocknum;
+  }
+  if (sendeof)
+    if (start == 0)
+      if (stop > sendacked + sendbytes)
+	if (!sendeofacked) {
+          sendeofacked = 1;
+	}
+  earliestblocktime_compute();
+}
+
+int main(int argc,char **argv)
+{
+  long long pos;
+  long long len;
+  long long u;
+  long long r;
+  long long i;
+  long long k;
+  long long recent;
+  long long nextaction;
+  long long timeout;
+  struct pollfd *q;
+  struct pollfd *watch8;
+  struct pollfd *watchtochild;
+  struct pollfd *watchfromchild;
+
+  signal(SIGPIPE,SIG_IGN);
+
+  if (!argv[0]) die_usage(0);
+  for (;;) {
+    char *x;
+    if (!argv[1]) break;
+    if (argv[1][0] != '-') break;
+    x = *++argv;
+    if (x[0] == '-' && x[1] == 0) break;
+    if (x[0] == '-' && x[1] == '-' && x[2] == 0) break;
+    while (*++x) {
+      if (*x == 'q') { flagverbose = 0; continue; }
+      if (*x == 'Q') { flagverbose = 1; continue; }
+      if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; }
+      if (*x == 'c') { flagserver = 0; wantping = 2; continue; }
+      if (*x == 'C') { flagserver = 0; wantping = 1; continue; }
+      if (*x == 's') { flagserver = 1; wantping = 0; continue; }
+      die_usage(0);
+    }
+  }
+  if (!*++argv) die_usage("missing prog");
+
+  for (;;) {
+    r = open_read("/dev/null");
+    if (r == -1) die_fatal("unable to open /dev/null",0,0);
+    if (r > 9) { close(r); break; }
+  }
+
+  if (open_pipe(tochild) == -1) die_fatal("unable to create pipe",0,0);
+  if (open_pipe(fromchild) == -1) die_fatal("unable to create pipe",0,0);
+
+  blocking_enable(tochild[0]);
+  blocking_enable(fromchild[1]);
+
+  child = fork();
+  if (child == -1) die_fatal("unable to fork",0,0);
+  if (child == 0) {
+    close(8);
+    close(9);
+    if (flagserver) {
+      close(0);
+      if (dup(tochild[0]) != 0) die_fatal("unable to dup",0,0);
+      close(1);
+      if (dup(fromchild[1]) != 1) die_fatal("unable to dup",0,0);
+    } else {
+      close(6);
+      if (dup(tochild[0]) != 6) die_fatal("unable to dup",0,0);
+      close(7);
+      if (dup(fromchild[1]) != 7) die_fatal("unable to dup",0,0);
+    }
+    signal(SIGPIPE,SIG_DFL);
+    execvp(*argv,argv);
+    die_fatal("unable to run",*argv,0);
+  }
+
+  close(tochild[0]);
+  close(fromchild[1]);
+
+  recent = nanoseconds();
+  lastspeedadjustment = recent;
+  if (flagserver) maxblocklen = 1024;
+
+  for (;;) {
+    if (sendeofacked)
+      if (receivewritten == receivetotalbytes)
+        if (receiveeof)
+          if (tochild[1] < 0)
+	    break; /* XXX: to re-ack should enter a TIME-WAIT state here */
+
+    q = p;
+
+    watch8 = q;
+    if (watch8) { q->fd = 8; q->events = POLLIN; ++q; }
+
+    watchtochild = q;
+    if (tochild[1] < 0) watchtochild = 0;
+    if (receivewritten >= receivebytes) watchtochild = 0;
+    if (watchtochild) { q->fd = tochild[1]; q->events = POLLOUT; ++q; }
+
+    watchfromchild = q;
+    if (sendeof) watchfromchild = 0;
+    if (sendbytes + 4096 > sizeof sendbuf) watchfromchild = 0;
+    if (watchfromchild) { q->fd = fromchild[0]; q->events = POLLIN; ++q; }
+
+    nextaction = recent + 60000000000LL;
+    if (wantping == 1) nextaction = recent + 1000000000;
+    if (wantping == 2)
+      if (nextaction > lastblocktime + nsecperblock) nextaction = lastblocktime + nsecperblock;
+    if (blocknum < OUTGOING)
+      if (!(sendeof ? sendeofprocessed : sendprocessed >= sendbytes))
+        if (nextaction > lastblocktime + nsecperblock) nextaction = lastblocktime + nsecperblock;
+    if (earliestblocktime)
+      if (earliestblocktime + rtt_timeout > lastblocktime + nsecperblock)
+        if (earliestblocktime + rtt_timeout < nextaction)
+	  nextaction = earliestblocktime + rtt_timeout;
+
+    if (messagenum)
+      if (!watchtochild)
+        nextaction = 0;
+
+    if (nextaction <= recent)
+      timeout = 0;
+    else
+      timeout = (nextaction - recent) / 1000000 + 1;
+
+    if (poll(p,q - p,timeout) < 0) {
+      watch8 = 0;
+      watchtochild = 0;
+      watchfromchild = 0;
+    } else {
+      if (watch8) if (!watch8->revents) watch8 = 0;
+      if (watchtochild) if (!watchtochild->revents) watchtochild = 0;
+      if (watchfromchild) if (!watchfromchild->revents) watchfromchild = 0;
+    }
+
+    /* XXX: keepalives */
+
+    do { /* try receiving data from child: */
+      if (!watchfromchild) break;
+      if (sendeof) break;
+      if (sendbytes + 4096 > sizeof sendbuf) break;
+
+      pos = (sendacked & (sizeof sendbuf - 1)) + sendbytes;
+      if (pos < sizeof sendbuf) {
+        r = read(fromchild[0],sendbuf + pos,sizeof sendbuf - pos);
+      } else {
+        r = read(fromchild[0],sendbuf + pos - sizeof sendbuf,sizeof sendbuf - sendbytes);
+      }
+      if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break;
+      if (r < 0) { sendeof = 4096; break; }
+      if (r == 0) { sendeof = 2048; break; }
+      sendbytes += r;
+      if (sendbytes >= 1152921504606846976LL) die_internalerror();
+    } while(0);
+
+    recent = nanoseconds();
+
+    do { /* try re-sending an old block: */
+      if (recent < lastblocktime + nsecperblock) break;
+      if (earliestblocktime == 0) break;
+      if (recent < earliestblocktime + rtt_timeout) break;
+
+      for (i = 0;i < blocknum;++i) {
+	pos = (blockfirst + i) & (OUTGOING - 1);
+        if (blocktime[pos] == earliestblocktime) {
+	  if (recent > lastpanic + 4 * rtt_timeout) {
+	    nsecperblock *= 2;
+	    lastpanic = recent;
+	    lastedge = recent;
+	  }
+	  goto sendblock;
+        }
+      }
+    } while(0);
+
+    do { /* try sending a new block: */
+      if (recent < lastblocktime + nsecperblock) break;
+      if (blocknum >= OUTGOING) break;
+      if (!wantping)
+        if (sendeof ? sendeofprocessed : sendprocessed >= sendbytes) break;
+      /* XXX: if any Nagle-type processing is desired, do it here */
+
+      pos = (blockfirst + blocknum) & (OUTGOING - 1);
+      ++blocknum;
+      blockpos[pos] = sendacked + sendprocessed;
+      blocklen[pos] = sendbytes - sendprocessed;
+      if (blocklen[pos] > maxblocklen) blocklen[pos] = maxblocklen;
+      if ((blockpos[pos] & (sizeof sendbuf - 1)) + blocklen[pos] > sizeof sendbuf)
+        blocklen[pos] = sizeof sendbuf - (blockpos[pos] & (sizeof sendbuf - 1));
+	/* XXX: or could have the full block in post-buffer space */
+      sendprocessed += blocklen[pos];
+      blockeof[pos] = 0;
+      if (sendprocessed == sendbytes) {
+        blockeof[pos] = sendeof;
+	if (sendeof) sendeofprocessed = 1;
+      }
+      blocktransmissions[pos] = 0;
+
+      sendblock:
+
+      blocktransmissions[pos] += 1;
+      blocktime[pos] = recent;
+      blockid[pos] = nextmessageid;
+      if (!++nextmessageid) ++nextmessageid;
+
+      /* constraints: u multiple of 16; u >= 16; u <= 1088; u >= 48 + blocklen[pos] */
+      u = 64 + blocklen[pos];
+      if (u <= 192) u = 192;
+      else if (u <= 320) u = 320;
+      else if (u <= 576) u = 576;
+      else if (u <= 1088) u = 1088;
+      else die_internalerror();
+      if (blocklen[pos] < 0 || blocklen[pos] > 1024) die_internalerror();
+
+      byte_zero(buf + 8,u);
+      buf[7] = u / 16;
+      uint32_pack(buf + 8,blockid[pos]);
+      /* XXX: include any acknowledgments that have piled up */
+      uint16_pack(buf + 46,blockeof[pos] | (crypto_uint16) blocklen[pos]);
+      uint64_pack(buf + 48,blockpos[pos]);
+      byte_copy(buf + 8 + u - blocklen[pos],blocklen[pos],sendbuf + (blockpos[pos] & (sizeof sendbuf - 1)));
+
+      if (writeall(9,buf + 7,u + 1) == -1) die_fatal("unable to write descriptor 9",0,0);
+      lastblocktime = recent;
+      wantping = 0;
+
+      earliestblocktime_compute();
+    } while(0);
+
+    do { /* try receiving messages: */
+      if (!watch8) break;
+      r = read(8,buf,sizeof buf);
+      if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break;
+      if (r == 0) die_badmessage();
+      if (r < 0) die_fatal("unable to read from file descriptor 8",0,0);
+      for (k = 0;k < r;++k) {
+        messagetodo[messagetodolen++] = buf[k];
+	u = 16 * (unsigned long long) messagetodo[0];
+	if (u < 16) die_badmessage();
+	if (u > 1088) die_badmessage();
+	if (messagetodolen == 1 + u) {
+	  if (messagenum < INCOMING) {
+	    pos = (messagefirst + messagenum) & (INCOMING - 1);
+	    messagelen[pos] = messagetodo[0];
+	    byte_copy(message[pos],u,messagetodo + 1);
+	    ++messagenum;
+	  } else {
+	    ; /* drop tail */
+	  }
+	  messagetodolen = 0;
+	}
+      }
+    } while(0);
+
+    do { /* try processing a message: */
+      if (!messagenum) break;
+      if (tochild[1] >= 0 && receivewritten < receivebytes) break;
+
+      maxblocklen = 1024;
+
+      pos = messagefirst & (INCOMING - 1);
+      len = 16 * (unsigned long long) messagelen[pos];
+      do { /* handle this message if it's comprehensible: */
+	unsigned long long D;
+	unsigned long long SF;
+	unsigned long long startbyte;
+	unsigned long long stopbyte;
+	crypto_uint32 id;
+	long long i;
+
+        if (len < 48) break;
+        if (len > 1088) break;
+
+	id = uint32_unpack(message[pos] + 4);
+	for (i = 0;i < blocknum;++i) {
+	  k = (blockfirst + i) & (OUTGOING - 1);
+	  if (blockid[k] == id) {
+	    rtt = recent - blocktime[k];
+	    if (!rtt_average) {
+	      nsecperblock = rtt;
+	      rtt_average = rtt;
+	      rtt_deviation = rtt / 2;
+	      rtt_highwater = rtt;
+	      rtt_lowwater = rtt;
+	    }
+
+	    /* Jacobson's retransmission timeout calculation: */
+	    rtt_delta = rtt - rtt_average;
+	    rtt_average += rtt_delta / 8;
+	    if (rtt_delta < 0) rtt_delta = -rtt_delta;
+	    rtt_delta -= rtt_deviation;
+	    rtt_deviation += rtt_delta / 4;
+	    rtt_timeout = rtt_average + 4 * rtt_deviation;
+	    /* adjust for delayed acks with anti-spiking: */
+	    rtt_timeout += 8 * nsecperblock;
+
+	    /* recognizing top and bottom of congestion cycle: */
+	    rtt_delta = rtt - rtt_highwater;
+	    rtt_highwater += rtt_delta / 1024;
+	    rtt_delta = rtt - rtt_lowwater;
+	    if (rtt_delta > 0) rtt_lowwater += rtt_delta / 8192;
+	    else rtt_lowwater += rtt_delta / 256;
+
+	    if (rtt_average > rtt_highwater + 5000000) rtt_seenrecenthigh = 1;
+	    else if (rtt_average < rtt_lowwater) rtt_seenrecentlow = 1;
+
+	    if (recent >= lastspeedadjustment + 16 * nsecperblock) {
+	      if (recent - lastspeedadjustment > 10000000000LL) {
+	        nsecperblock = 1000000000; /* slow restart */
+		nsecperblock += randommod(nsecperblock / 8);
+	      }
+
+	      lastspeedadjustment = recent;
+
+	      if (nsecperblock >= 131072) {
+	        /* additive increase: adjust 1/N by a constant c */
+	        /* rtt-fair additive increase: adjust 1/N by a constant c every nanosecond */
+	        /* approximation: adjust 1/N by cN every N nanoseconds */
+	        /* i.e., N <- 1/(1/N + cN) = N/(1 + cN^2) every N nanoseconds */
+	        if (nsecperblock < 16777216) {
+		  /* N/(1+cN^2) approx N - cN^3 */
+		  u = nsecperblock / 131072;
+	          nsecperblock -= u * u * u;
+	        } else {
+	          double d = nsecperblock;
+	          nsecperblock = d/(1 + d*d / 2251799813685248.0);
+	        }
+	      }
+
+	      if (rtt_phase == 0) {
+	        if (rtt_seenolderhigh) {
+		  rtt_phase = 1;
+		  lastedge = recent;
+	          nsecperblock += randommod(nsecperblock / 4);
+		}
+	      } else {
+	        if (rtt_seenolderlow) {
+		  rtt_phase = 0;
+	        }
+	      }
+
+	      rtt_seenolderhigh = rtt_seenrecenthigh;
+	      rtt_seenolderlow = rtt_seenrecentlow;
+	      rtt_seenrecenthigh = 0;
+	      rtt_seenrecentlow = 0;
+	    }
+
+	    do {
+	      if (recent - lastedge < 60000000000LL) {
+	        if (recent < lastdoubling + 4 * nsecperblock + 64 * rtt_timeout + 5000000000LL) break;
+	      } else {
+	        if (recent < lastdoubling + 4 * nsecperblock + 2 * rtt_timeout) break;
+	      }
+	      if (nsecperblock <= 65535) break;
+
+              nsecperblock /= 2;
+	      lastdoubling = recent;
+	      if (lastedge) lastedge = recent;
+	    } while(0);
+	  }
+	}
+
+	stopbyte = uint64_unpack(message[pos] + 8);
+	acknowledged(0,stopbyte);
+	startbyte = stopbyte + (unsigned long long) uint32_unpack(message[pos] + 16);
+	stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 20);
+	acknowledged(startbyte,stopbyte);
+	startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 22);
+	stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 24);
+	acknowledged(startbyte,stopbyte);
+	startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 26);
+	stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 28);
+	acknowledged(startbyte,stopbyte);
+	startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 30);
+	stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 32);
+	acknowledged(startbyte,stopbyte);
+	startbyte = stopbyte + (unsigned long long) uint16_unpack(message[pos] + 34);
+	stopbyte = startbyte + (unsigned long long) uint16_unpack(message[pos] + 36);
+	acknowledged(startbyte,stopbyte);
+
+	D = uint16_unpack(message[pos] + 38);
+	SF = D & (2048 + 4096);
+	D -= SF;
+	if (D > 1024) break;
+	if (48 + D > len) break;
+
+	startbyte = uint64_unpack(message[pos] + 40);
+	stopbyte = startbyte + D;
+
+	if (stopbyte > receivewritten + sizeof receivebuf) {
+	  break;
+	  /* of course, flow control would avoid this case */
+	}
+
+	if (SF) {
+	  receiveeof = SF;
+	  receivetotalbytes = stopbyte;
+	}
+
+	for (k = 0;k < D;++k) {
+	  unsigned char ch = message[pos][len - D + k];
+	  unsigned long long where = startbyte + k;
+	  if (where >= receivewritten && where < receivewritten + sizeof receivebuf) {
+	    receivevalid[where & (sizeof receivebuf - 1)] = 1;
+	    receivebuf[where & (sizeof receivebuf - 1)] = ch;
+	  }
+	}
+	for (;;) {
+	  if (receivebytes >= receivewritten + sizeof receivebuf) break;
+	  if (!receivevalid[receivebytes & (sizeof receivebuf - 1)]) break;
+	  ++receivebytes;
+	}
+
+	if (!uint32_unpack(message[pos])) break; /* never acknowledge a pure acknowledgment */
+
+	/* XXX: delay acknowledgments */
+	u = 192;
+        byte_zero(buf + 8,u);
+        buf[7] = u / 16;
+	byte_copy(buf + 12,4,message[pos]);
+	if (receiveeof && receivebytes == receivetotalbytes) {
+	  uint64_pack(buf + 16,receivebytes + 1);
+	} else
+	  uint64_pack(buf + 16,receivebytes);
+	/* XXX: incorporate selective acknowledgments */
+  
+        if (writeall(9,buf + 7,u + 1) == -1) die_fatal("unable to write descriptor 9",0,0);
+      } while(0);
+
+      ++messagefirst;
+      --messagenum;
+    } while(0);
+
+    do { /* try sending data to child: */
+      if (!watchtochild) break;
+      if (tochild[1] < 0) { receivewritten = receivebytes; break; }
+      if (receivewritten >= receivebytes) break;
+
+      pos = receivewritten & (sizeof receivebuf - 1);
+      len = receivebytes - receivewritten;
+      if (pos + len > sizeof receivebuf) len = sizeof receivebuf - pos;
+      r = write(tochild[1],receivebuf + pos,len);
+      if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break;
+      if (r <= 0) {
+        close(tochild[1]);
+        tochild[1] = -1;
+	break;
+      }
+      byte_zero(receivevalid + pos,r);
+      receivewritten += r;
+    } while(0);
+
+    do { /* try closing pipe to child: */
+      if (!receiveeof) break;
+      if (receivewritten < receivetotalbytes) break;
+      if (tochild[1] < 0) break;
+
+      if (receiveeof == 4096)
+        ; /* XXX: UNIX doesn't provide a way to signal an error through a pipe */
+      close(tochild[1]);
+      tochild[1] = -1;
+    } while(0);
+
+  }
+
+
+  do {
+    r = waitpid(child,&childstatus,0);
+  } while (r == -1 && errno == EINTR);
+
+  if (!WIFEXITED(childstatus)) { errno = 0; die_fatal("process killed by signal",0,0); }
+  return WEXITSTATUS(childstatus);
+}
diff --git a/nacl/nacl-20110221/curvecp/curvecpprintkey.c b/nacl/nacl-20110221/curvecp/curvecpprintkey.c
new file mode 100644
index 00000000..8fd26bcf
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/curvecpprintkey.c
@@ -0,0 +1,46 @@
+#include <unistd.h>
+#include "die.h"
+#include "e.h"
+#include "load.h"
+#include "writeall.h"
+#include "crypto_box.h"
+
+unsigned char pk[crypto_box_PUBLICKEYBYTES];
+unsigned char out[crypto_box_PUBLICKEYBYTES * 2 + 1];
+
+void die_usage(void)
+{
+  die_1(111,"curvecpprintkey: usage: curvecpprintkey keydir\n");
+}
+
+void die_fatal(const char *trouble,const char *d,const char *fn)
+{
+  if (d) {
+    if (fn) die_9(111,"curvecpmakekey: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n");
+    die_7(111,"curvecpmakekey: fatal: ",trouble," ",d,": ",e_str(errno),"\n");
+  }
+  die_5(111,"curvecpmakekey: fatal: ",trouble,": ",e_str(errno),"\n");
+}
+
+int main(int argc,char **argv)
+{
+  char *d;
+  long long j;
+
+  if (!argv[0]) die_usage();
+  if (!argv[1]) die_usage();
+  d = argv[1];
+
+  if (chdir(d) == -1) die_fatal("unable to chdir to directory",d,0);
+  if (load("publickey",pk,sizeof pk) == -1) die_fatal("unable to read",d,"publickey");
+
+  for (j = 0;j < crypto_box_PUBLICKEYBYTES;++j) {
+    out[2 * j + 0] = "0123456789abcdef"[15 & (int) (pk[j] >> 4)];
+    out[2 * j + 1] = "0123456789abcdef"[15 & (int) (pk[j] >> 0)];
+  }
+  out[2 * j] = '\n';
+
+  if (writeall(1,out,sizeof out) == -1) die_fatal("unable to write output",0,0);
+  
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/curvecpserver.c b/nacl/nacl-20110221/curvecp/curvecpserver.c
new file mode 100644
index 00000000..82cc6670
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/curvecpserver.c
@@ -0,0 +1,497 @@
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <poll.h>
+#include "e.h"
+#include "die.h"
+#include "byte.h"
+#include "open.h"
+#include "load.h"
+#include "socket.h"
+#include "uint64_pack.h"
+#include "uint64_unpack.h"
+#include "writeall.h"
+#include "nanoseconds.h"
+#include "safenonce.h"
+#include "nameparse.h"
+#include "hexparse.h"
+#include "portparse.h"
+#include "randommod.h"
+
+#include "randombytes.h"
+#include "crypto_box.h"
+#include "crypto_secretbox.h"
+#if crypto_box_PUBLICKEYBYTES != 32
+error!
+#endif
+#if crypto_box_NONCEBYTES != 24
+error!
+#endif
+#if crypto_box_BOXZEROBYTES != 16
+error!
+#endif
+#if crypto_box_ZEROBYTES != 32
+error!
+#endif
+#if crypto_box_BEFORENMBYTES != 32
+error!
+#endif
+#if crypto_secretbox_KEYBYTES != 32
+error!
+#endif
+#if crypto_secretbox_NONCEBYTES != 24
+error!
+#endif
+#if crypto_secretbox_BOXZEROBYTES != 16
+error!
+#endif
+#if crypto_secretbox_ZEROBYTES != 32
+error!
+#endif
+
+int flagverbose;
+
+#define USAGE "\
+curvecpserver: how to use:\n\
+curvecpserver:   -q (optional): no error messages\n\
+curvecpserver:   -Q (optional): print error messages (default)\n\
+curvecpserver:   -v (optional): print extra information\n\
+curvecpserver:   -c n (optional): allow at most n clients at once (default 100)\n\
+curvecpserver:   sname: server's name\n\
+curvecpserver:   keydir: use this public-key directory\n\
+curvecpserver:   ip: server's IP address\n\
+curvecpserver:   port: server's UDP port\n\
+curvecpserver:   ext: server's extension\n\
+curvecpserver:   prog: run this server\n\
+"
+
+void die_usage(const char *s)
+{
+  if (s) die_4(100,USAGE,"curvecpserver: fatal: ",s,"\n");
+  die_1(100,USAGE);
+}
+
+void die_fatal(const char *trouble,const char *d,const char *fn)
+{
+  if (!flagverbose) die_0(111);
+  if (d) {
+    if (fn) die_9(111,"curvecpserver: fatal: ",trouble," ",d,"/",fn,": ",e_str(errno),"\n");
+    die_7(111,"curvecpserver: fatal: ",trouble," ",d,": ",e_str(errno),"\n");
+  }
+  die_5(111,"curvecpserver: fatal: ",trouble,": ",e_str(errno),"\n");
+}
+
+int ipparse(unsigned char *y,const char *x)
+{
+  long long j;
+  long long k;
+  long long d;
+
+  for (k = 0;k < 4;++k) y[k] = 0;
+  for (k = 0;k < 4;++k) {
+    d = 0;
+    for (j = 0;j < 3 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0');
+    if (j == 0) return 0;
+    x += j;
+    if (k >= 0 && k < 4) y[k] = d;
+    if (k < 3) {
+      if (*x != '.') return 0;
+      ++x;
+    }
+  }
+  if (*x) return 0;
+  return 1;
+}
+
+int maxparse(long long *y,const char *x)
+{
+  long long d;
+  long long j;
+
+  d = 0;
+  for (j = 0;j < 9 && x[j] >= '0' && x[j] <= '9';++j) d = d * 10 + (x[j] - '0');
+  if (x[j]) return 0;
+  if (d < 1) return 0;
+  if (d > 65535) return 0;
+  *y = d;
+  return 1;
+}
+
+/* cookies: */
+long long nextminute;
+unsigned char minutekey[32];
+unsigned char lastminutekey[32];
+
+/* routing to the server: */
+unsigned char serverip[4];
+unsigned char serverport[2];
+unsigned char serverextension[16];
+int udpfd = -1;
+
+/* server security: */
+char *keydir = 0;
+unsigned char servername[256];
+unsigned char serverlongtermsk[32];
+unsigned char servershorttermpk[32];
+unsigned char servershorttermsk[32];
+
+/* routing to the client: */
+unsigned char clientextension[16];
+
+/* client security: */
+unsigned char clientlongtermpk[32];
+unsigned char clientshorttermpk[32];
+
+/* shared secrets: */
+unsigned char clientshortserverlong[32];
+unsigned char clientshortservershort[32];
+unsigned char clientlongserverlong[32];
+
+unsigned char allzero[128] = {0};
+
+unsigned char nonce[24];
+unsigned char text[2048];
+
+unsigned char packetip[4];
+unsigned char packetport[2];
+unsigned char packet[4096];
+crypto_uint64 packetnonce;
+
+#define MESSAGELEN 1104
+
+struct activeclient {
+  unsigned char clientshorttermpk[32];
+  unsigned char clientshortservershort[32];
+  crypto_uint64 receivednonce;
+  crypto_uint64 sentnonce;
+  long long messagelen;
+  pid_t child;
+  int tochild;
+  int fromchild;
+  unsigned char clientextension[16];
+  unsigned char clientip[4];
+  unsigned char clientport[2];
+  unsigned char message[MESSAGELEN];
+} ;
+
+const char *strmaxactiveclients = "100";
+long long maxactiveclients = 0;
+long long numactiveclients = 0;
+struct activeclient *activeclients = 0;
+struct pollfd *p;
+
+int fdwd = -1;
+
+int pi0[2];
+int pi1[2];
+
+unsigned char childbuf[4096];
+long long childbuflen = 0;
+unsigned char childmessage[2048];
+long long childmessagelen = 0;
+
+int main(int argc,char **argv)
+{
+  long long r;
+  long long i;
+  long long k;
+
+  signal(SIGPIPE,SIG_IGN);
+  signal(SIGCHLD,SIG_IGN);
+
+  if (!argv[0]) die_usage(0);
+  for (;;) {
+    char *x;
+    if (!argv[1]) break;
+    if (argv[1][0] != '-') break;
+    x = *++argv;
+    if (x[0] == '-' && x[1] == 0) break;
+    if (x[0] == '-' && x[1] == '-' && x[2] == 0) break;
+    while (*++x) {
+      if (*x == 'q') { flagverbose = 0; continue; }
+      if (*x == 'Q') { flagverbose = 1; continue; }
+      if (*x == 'v') { if (flagverbose == 2) flagverbose = 3; else flagverbose = 2; continue; }
+      if (*x == 'c') {
+        if (x[1]) { strmaxactiveclients = x + 1; break; }
+	if (argv[1]) { strmaxactiveclients = *++argv; break; }
+      }
+      die_usage(0);
+    }
+  }
+  if (!maxparse(&maxactiveclients,strmaxactiveclients)) die_usage("concurrency must be between 1 and 65535");
+  if (!nameparse(servername,*++argv)) die_usage("sname must be at most 255 bytes, at most 63 bytes between dots");
+  keydir = *++argv; if (!keydir) die_usage("missing keydir");
+  if (!ipparse(serverip,*++argv)) die_usage("ip must be an IPv4 address");
+  if (!portparse(serverport,*++argv)) die_usage("port must be an integer between 0 and 65535");
+  if (!hexparse(serverextension,16,*++argv)) die_usage("ext must be exactly 32 hex characters");
+  if (!*++argv) die_usage("missing prog");
+
+  for (;;) {
+    r = open_read("/dev/null");
+    if (r == -1) die_fatal("unable to open /dev/null",0,0);
+    if (r > 9) { close(r); break; }
+  }
+
+  activeclients = malloc(maxactiveclients * sizeof(struct activeclient));
+  if (!activeclients) die_fatal("unable to create activeclients array",0,0);
+  randombytes((void *) activeclients,maxactiveclients * sizeof(struct activeclient));
+  for (i = 0;i < maxactiveclients;++i) {
+    activeclients[i].child = -1;
+    activeclients[i].tochild = -1;
+    activeclients[i].fromchild = -1;
+    activeclients[i].receivednonce = 0;
+    activeclients[i].sentnonce = randommod(281474976710656LL);
+  }
+  
+  p = malloc((1 + maxactiveclients) * sizeof(struct pollfd));
+  if (!p) die_fatal("unable to create poll array",0,0);
+
+  fdwd = open_cwd();
+  if (fdwd == -1) die_fatal("unable to open current directory",0,0);
+
+  if (chdir(keydir) == -1) die_fatal("unable to chdir to",keydir,0);
+  if (load(".expertsonly/secretkey",serverlongtermsk,sizeof serverlongtermsk) == -1) die_fatal("unable to read secret key from",keydir,0);
+
+  udpfd = socket_udp();
+  if (udpfd == -1) die_fatal("unable to create socket",0,0);
+  if (socket_bind(udpfd,serverip,serverport) == -1) die_fatal("unable to bind socket",0,0);
+
+  randombytes(minutekey,sizeof minutekey);
+  randombytes(lastminutekey,sizeof lastminutekey);
+  nextminute = nanoseconds() + 60000000000ULL;
+
+  for (;;) {
+    long long timeout = nextminute - nanoseconds();
+    if (timeout <= 0) {
+      timeout = 60000000000ULL;
+      byte_copy(lastminutekey,sizeof lastminutekey,minutekey);
+      randombytes(minutekey,sizeof minutekey);
+      nextminute = nanoseconds() + timeout;
+      randombytes(packet,sizeof packet);
+      randombytes(packetip,sizeof packetip);
+      randombytes(packetport,sizeof packetport);
+      randombytes(clientshorttermpk,sizeof clientshorttermpk);
+      randombytes(clientshortserverlong,sizeof clientshortserverlong);
+      randombytes(nonce,sizeof nonce);
+      randombytes(text,sizeof text);
+      randombytes(childbuf,sizeof childbuf);
+      randombytes(childmessage,sizeof childmessage);
+      randombytes(servershorttermpk,sizeof servershorttermpk);
+      randombytes(servershorttermsk,sizeof servershorttermsk);
+    }
+
+    for (i = 0;i < numactiveclients;++i) {
+      p[i].fd = activeclients[i].fromchild;
+      p[i].events = POLLIN;
+    }
+    p[numactiveclients].fd = udpfd;
+    p[numactiveclients].events = POLLIN;
+    if (poll(p,1 + numactiveclients,timeout / 1000000 + 1) < 0) continue;
+
+    do { /* try receiving a packet: */
+      if (!p[numactiveclients].revents) break;
+      r = socket_recv(udpfd,packet,sizeof packet,packetip,packetport);
+      if (r < 80) break;
+      if (r > 1184) break;
+      if (r & 15) break;
+      if (!(byte_isequal(packet,7,"QvnQ5Xl") & byte_isequal(packet + 8,16,serverextension))) break;
+      byte_copy(clientextension,16,packet + 24);
+      if (packet[7] == 'H') { /* Hello packet: */
+        if (r != 224) break;
+	byte_copy(clientshorttermpk,32,packet + 40);
+	crypto_box_beforenm(clientshortserverlong,clientshorttermpk,serverlongtermsk);
+	byte_copy(nonce,16,"CurveCP-client-H");
+	byte_copy(nonce + 16,8,packet + 136);
+	byte_zero(text,16);
+	byte_copy(text + 16,80,packet + 144);
+	if (crypto_box_open_afternm(text,text,96,nonce,clientshortserverlong)) break;
+
+	/* send Cookie packet: */
+
+	crypto_box_keypair(servershorttermpk,servershorttermsk);
+	byte_zero(text + 64,32);
+	byte_copy(text + 96,32,clientshorttermpk);
+	byte_copy(text + 128,32,servershorttermsk);
+	byte_copy(nonce,8,"minute-k");
+	if (safenonce(nonce + 8,1) == -1) die_fatal("nonce-generation disaster",0,0);
+	crypto_secretbox(text + 64,text + 64,96,nonce,minutekey);
+	byte_copy(text + 64,16,nonce + 8);
+
+	byte_zero(text,32);
+	byte_copy(text + 32,32,servershorttermpk);
+	byte_copy(nonce,8,"CurveCPK"); /* reusing the other 16 bytes */
+	crypto_box_afternm(text,text,160,nonce,clientshortserverlong);
+
+	byte_copy(packet,8,"RL3aNMXK");
+	byte_copy(packet + 8,16,clientextension);
+	byte_copy(packet + 24,16,serverextension);
+	byte_copy(packet + 40,16,nonce + 8);
+	byte_copy(packet + 56,144,text + 16);
+
+	socket_send(udpfd,packet,200,packetip,packetport);
+      }
+      if (packet[7] == 'I') { /* Initiate packet: */
+        if (r < 560) break;
+	for (i = 0;i < numactiveclients;++i) /* XXX use better data structure */
+	  if (byte_isequal(activeclients[i].clientshorttermpk,32,packet + 40))
+	    break;
+	if (i < numactiveclients) {
+	  packetnonce = uint64_unpack(packet + 168);
+	  if (packetnonce <= activeclients[i].receivednonce) break;
+	  byte_copy(nonce,16,"CurveCP-client-I");
+	  byte_copy(nonce + 16,8,packet + 168);
+	  byte_zero(text,16);
+  	  byte_copy(text + 16,r - 176,packet + 176);
+	  if (crypto_box_open_afternm(text,text,r - 160,nonce,activeclients[i].clientshortservershort)) break;
+
+	  /* XXX: update clientip, clientextension; but not if client has spoken recently */
+	  activeclients[i].receivednonce = packetnonce;
+	  text[383] = (r - 544) >> 4;
+	  if (writeall(activeclients[i].tochild,text + 383,r - 543) == -1)
+	    ; /* child is gone; will see eof later */
+	  break;
+	}
+	if (i == maxactiveclients) break;
+
+	byte_copy(nonce,8,"minute-k");
+	byte_copy(nonce + 8,16,packet + 72);
+	byte_zero(text,16);
+	byte_copy(text + 16,80,packet + 88);
+	if (crypto_secretbox_open(text,text,96,nonce,minutekey)) {
+	  byte_zero(text,16);
+	  byte_copy(text + 16,80,packet + 88);
+	  if (crypto_secretbox_open(text,text,96,nonce,lastminutekey)) break;
+	}
+	if (!byte_isequal(packet + 40,32,text + 32)) break;
+	byte_copy(servershorttermsk,32,text + 64);
+	byte_copy(clientshorttermpk,32,packet + 40);
+	crypto_box_beforenm(clientshortservershort,clientshorttermpk,servershorttermsk);
+
+	byte_copy(nonce,16,"CurveCP-client-I");
+	byte_copy(nonce + 16,8,packet + 168);
+	byte_zero(text,16);
+	byte_copy(text + 16,r - 176,packet + 176);
+	if (crypto_box_open_afternm(text,text,r - 160,nonce,clientshortservershort)) break;
+
+	if (!byte_isequal(text + 128,256,servername)) break;
+
+	/* XXX skip if client authentication is not desired: */
+	byte_copy(clientlongtermpk,32,text + 32);
+	/* XXX impose policy limitations on clients: known, maxconn */
+	/* XXX for known clients, retrieve shared secret from cache: */
+	crypto_box_beforenm(clientlongserverlong,clientlongtermpk,serverlongtermsk);
+	byte_copy(nonce,8,"CurveCPV");
+	byte_copy(nonce + 8,16,text + 64);
+	byte_zero(text + 64,16);
+	if (crypto_box_open_afternm(text + 64,text + 64,64,nonce,clientlongserverlong)) break;
+	if (!byte_isequal(text + 96,32,clientshorttermpk)) break;
+
+	if (open_pipe(pi0) == -1) break; /* XXX: error message */
+	if (open_pipe(pi1) == -1) { close(pi0[0]); close(pi0[1]); break; } /* XXX: error message */
+
+	activeclients[i].child = fork();
+	if (activeclients[i].child == -1) {
+	  close(pi0[0]); close(pi0[1]);
+	  close(pi1[0]); close(pi1[1]);
+	  break; /* XXX: error message */
+	}
+	if (activeclients[i].child == 0) {
+	  if (fchdir(fdwd) == -1) die_fatal("unable to chdir to original directory",0,0);
+	  close(8);
+	  if (dup(pi0[0]) != 8) die_fatal("unable to dup",0,0);
+	  close(9);
+	  if (dup(pi1[1]) != 9) die_fatal("unable to dup",0,0);
+	  /* XXX: set up environment variables */
+	  signal(SIGPIPE,SIG_DFL);
+	  signal(SIGCHLD,SIG_DFL);
+	  execvp(*argv,argv);
+	  die_fatal("unable to run",*argv,0);
+	}
+
+	activeclients[i].tochild = pi0[1]; close(pi0[0]);
+	activeclients[i].fromchild = pi1[0]; close(pi1[1]);
+	activeclients[i].messagelen = 0;
+	byte_copy(activeclients[i].clientshorttermpk,32,clientshorttermpk);
+	byte_copy(activeclients[i].clientshortservershort,32,clientshortservershort);
+	activeclients[i].receivednonce = uint64_unpack(packet + 168);
+	byte_copy(activeclients[i].clientextension,16,clientextension);
+	byte_copy(activeclients[i].clientip,4,packetip);
+	byte_copy(activeclients[i].clientport,2,packetport);
+	++numactiveclients;
+
+	text[383] = (r - 544) >> 4;
+	if (writeall(activeclients[i].tochild,text + 383,r - 543) == -1)
+	  ; /* child is gone; will see eof later */
+      }
+      if (packet[7] == 'M') { /* Message packet: */
+        if (r < 112) break;
+        for (i = 0;i < numactiveclients;++i) /* XXX use better data structure */
+	  if (byte_isequal(activeclients[i].clientshorttermpk,32,packet + 40))
+	    break;
+	if (i < numactiveclients) {
+	  packetnonce = uint64_unpack(packet + 72);
+	  if (packetnonce <= activeclients[i].receivednonce) break;
+          byte_copy(nonce,16,"CurveCP-client-M");
+	  byte_copy(nonce + 16,8,packet + 72);
+	  byte_zero(text,16);
+	  byte_copy(text + 16,r - 80,packet + 80);
+	  if (crypto_box_open_afternm(text,text,r - 64,nonce,activeclients[i].clientshortservershort)) break;
+
+	  /* XXX: update clientip, clientextension */
+	  activeclients[i].receivednonce = packetnonce;
+	  text[31] = (r - 96) >> 4;
+	  if (writeall(activeclients[i].tochild,text + 31,r - 95) == -1)
+	    ; /* child is gone; will see eof later */
+	  break;
+	}
+      }
+    } while (0);
+
+    for (i = numactiveclients - 1;i >= 0;--i) {
+      do {
+        if (!p[i].revents) break;
+	r = read(activeclients[i].fromchild,childbuf,sizeof childbuf);
+	if (r == -1) if (errno == EINTR || errno == EWOULDBLOCK || errno == EAGAIN) break;
+	if (r <= 0) goto endconnection;
+	childbuflen = r;
+	for (k = 0;k < childbuflen;++k) {
+	  r = activeclients[i].messagelen;
+	  if (r < 0) goto endconnection;
+	  if (r >= MESSAGELEN) goto endconnection;
+	  activeclients[i].message[r] = childbuf[k];
+	  if (r == 0) if (childbuf[k] & 128) goto endconnection;
+	  activeclients[i].messagelen = r + 1;
+	  if (r == 16 * (unsigned long long) activeclients[i].message[0]) {
+	    if (r < 16) goto endconnection;
+	    if (r > 1088) goto endconnection;
+	    byte_copy(nonce,16,"CurveCP-server-M");
+	    uint64_pack(nonce + 16,++activeclients[i].sentnonce);
+	    byte_zero(text,32);
+	    byte_copy(text + 32,r,activeclients[i].message + 1);
+	    crypto_box_afternm(text,text,r + 32,nonce,activeclients[i].clientshortservershort);
+	    byte_copy(packet,8,"RL3aNMXM");
+	    byte_copy(packet + 8,16,clientextension);
+	    byte_copy(packet + 24,16,serverextension);
+	    byte_copy(packet + 40,8,nonce + 16);
+	    byte_copy(packet + 48,r + 16,text + 16);
+	    socket_send(udpfd,packet,r + 64,activeclients[i].clientip,activeclients[i].clientport);
+	    activeclients[i].messagelen = 0;
+	  }
+	}
+	break;
+
+	endconnection:
+
+	/* XXX: cache cookie if it's recent */
+	close(activeclients[i].fromchild); activeclients[i].fromchild = -1;
+	close(activeclients[i].tochild); activeclients[i].tochild = -1;
+	--numactiveclients;
+	activeclients[i] = activeclients[numactiveclients];
+	randombytes((void *) &activeclients[numactiveclients],sizeof(struct activeclient));
+      } while (0);
+    }
+  }
+}
diff --git a/nacl/nacl-20110221/curvecp/die.c b/nacl/nacl-20110221/curvecp/die.c
new file mode 100644
index 00000000..2220cf38
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/die.c
@@ -0,0 +1,42 @@
+#include <unistd.h>
+#include "writeall.h"
+#include "die.h"
+
+void die_9(int e
+  ,const char *s0
+  ,const char *s1
+  ,const char *s2
+  ,const char *s3
+  ,const char *s4
+  ,const char *s5
+  ,const char *s6
+  ,const char *s7
+  ,const char *s8
+)
+{
+  const char *s[9];
+  const char *x;
+  char buf[1024];
+  int buflen = 0;
+  int i;
+
+  s[0] = s0;
+  s[1] = s1;
+  s[2] = s2;
+  s[3] = s3;
+  s[4] = s4;
+  s[5] = s5;
+  s[6] = s6;
+  s[7] = s7;
+  s[8] = s8;
+  for (i = 0;i < 9;++i) {
+    x = s[i];
+    if (!x) continue;
+    while (*x) {
+      if (buflen == sizeof buf) { writeall(2,buf,buflen); buflen = 0; }
+      buf[buflen++] = *x++;
+    }
+  }
+  writeall(2,buf,buflen);
+  _exit(e);
+}
diff --git a/nacl/nacl-20110221/curvecp/die.h b/nacl/nacl-20110221/curvecp/die.h
new file mode 100644
index 00000000..52ec7616
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/die.h
@@ -0,0 +1,16 @@
+#ifndef DIE_H
+#define DIE_H
+
+extern void die_9(int,const char *,const char *,const char *,const char *,const char *,const char *,const char *,const char *,const char *);
+
+#define die_8(x,a,b,c,d,e,f,g,h) die_9(x,a,b,c,d,e,f,g,h,0)
+#define die_7(x,a,b,c,d,e,f,g) die_8(x,a,b,c,d,e,f,g,0)
+#define die_6(x,a,b,c,d,e,f) die_7(x,a,b,c,d,e,f,0)
+#define die_5(x,a,b,c,d,e) die_6(x,a,b,c,d,e,0)
+#define die_4(x,a,b,c,d) die_5(x,a,b,c,d,0)
+#define die_3(x,a,b,c) die_4(x,a,b,c,0)
+#define die_2(x,a,b) die_3(x,a,b,0)
+#define die_1(x,a) die_2(x,a,0)
+#define die_0(x) die_1(x,0)
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/e.c b/nacl/nacl-20110221/curvecp/e.c
new file mode 100644
index 00000000..00ff7fd9
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/e.c
@@ -0,0 +1,106 @@
+#include "e.h"
+
+#define X(e,s) if (i == e) return s;
+
+const char *e_str(int i)
+{
+  X(0,"no error");
+  X(EINTR,"interrupted system call")
+  X(ENOMEM,"out of memory")
+  X(ENOENT,"file does not exist")
+  X(ETXTBSY,"text busy")
+  X(EIO,"input/output error")
+  X(EEXIST,"file already exists")
+  X(ETIMEDOUT,"timed out")
+  X(EINPROGRESS,"operation in progress")
+  X(EAGAIN,"temporary failure")
+  X(EWOULDBLOCK,"input/output would block")
+  X(EPIPE,"broken pipe")
+  X(EPERM,"permission denied")
+  X(EACCES,"access denied")
+  X(ENODEV,"device not configured")
+  X(EPROTO,"protocol error")
+  X(EISDIR,"is a directory")
+  X(ESRCH,"no such process")
+  X(E2BIG,"argument list too long")
+  X(ENOEXEC,"exec format error")
+  X(EBADF,"file descriptor not open")
+  X(ECHILD,"no child processes")
+  X(EDEADLK,"operation would cause deadlock")
+  X(EFAULT,"bad address")
+  X(ENOTBLK,"not a block device")
+  X(EBUSY,"device busy")
+  X(EXDEV,"cross-device link")
+  X(ENODEV,"device does not support operation")
+  X(ENOTDIR,"not a directory")
+  X(EINVAL,"invalid argument")
+  X(ENFILE,"system cannot open more files")
+  X(EMFILE,"process cannot open more files")
+  X(ENOTTY,"not a tty")
+  X(EFBIG,"file too big")
+  X(ENOSPC,"out of disk space")
+  X(ESPIPE,"unseekable descriptor")
+  X(EROFS,"read-only file system")
+  X(EMLINK,"too many links")
+  X(EDOM,"input out of range")
+  X(ERANGE,"output out of range")
+  X(EALREADY,"operation already in progress")
+  X(ENOTSOCK,"not a socket")
+  X(EDESTADDRREQ,"destination address required")
+  X(EMSGSIZE,"message too long")
+  X(EPROTOTYPE,"incorrect protocol type")
+  X(ENOPROTOOPT,"protocol not available")
+  X(EPROTONOSUPPORT,"protocol not supported")
+  X(ESOCKTNOSUPPORT,"socket type not supported")
+  X(EOPNOTSUPP,"operation not supported")
+  X(EPFNOSUPPORT,"protocol family not supported")
+  X(EAFNOSUPPORT,"address family not supported")
+  X(EADDRINUSE,"address already used")
+  X(EADDRNOTAVAIL,"address not available")
+  X(ENETDOWN,"network down")
+  X(ENETUNREACH,"network unreachable")
+  X(ENETRESET,"network reset")
+  X(ECONNABORTED,"connection aborted")
+  X(ECONNRESET,"connection reset")
+  X(ENOBUFS,"out of buffer space")
+  X(EISCONN,"already connected")
+  X(ENOTCONN,"not connected")
+  X(ESHUTDOWN,"socket shut down")
+  X(ETOOMANYREFS,"too many references")
+  X(ECONNREFUSED,"connection refused")
+  X(ELOOP,"symbolic link loop")
+  X(ENAMETOOLONG,"file name too long")
+  X(EHOSTDOWN,"host down")
+  X(EHOSTUNREACH,"host unreachable")
+  X(ENOTEMPTY,"directory not empty")
+  X(EPROCLIM,"too many processes")
+  X(EUSERS,"too many users")
+  X(EDQUOT,"disk quota exceeded")
+  X(ESTALE,"stale NFS file handle")
+  X(EREMOTE,"too many levels of remote in path")
+  X(EBADRPC,"RPC structure is bad")
+  X(ERPCMISMATCH,"RPC version mismatch")
+  X(EPROGUNAVAIL,"RPC program unavailable")
+  X(EPROGMISMATCH,"program version mismatch")
+  X(EPROCUNAVAIL,"bad procedure for program")
+  X(ENOLCK,"no locks available")
+  X(ENOSYS,"system call not available")
+  X(EFTYPE,"bad file type")
+  X(EAUTH,"authentication error")
+  X(ENEEDAUTH,"not authenticated")
+  X(ENOSTR,"not a stream device")
+  X(ETIME,"timer expired")
+  X(ENOSR,"out of stream resources")
+  X(ENOMSG,"no message of desired type")
+  X(EBADMSG,"bad message type")
+  X(EIDRM,"identifier removed")
+  X(ENONET,"machine not on network")
+  X(EREMOTE,"object not local")
+  X(ENOLINK,"link severed")
+  X(EADV,"advertise error")
+  X(ESRMNT,"srmount error")
+  X(ECOMM,"communication error")
+  X(EMULTIHOP,"multihop attempted")
+  X(EREMCHG,"remote address changed")
+  return "unknown error";
+}
diff --git a/nacl/nacl-20110221/curvecp/e.h b/nacl/nacl-20110221/curvecp/e.h
new file mode 100644
index 00000000..add0768b
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/e.h
@@ -0,0 +1,438 @@
+#ifndef E_H
+#define E_H
+
+#include <errno.h>
+
+extern const char *e_str(int);
+
+#ifndef EPERM
+#define EPERM (-5001)
+#endif
+#ifndef ENOENT
+#define ENOENT (-5002)
+#endif
+#ifndef ESRCH
+#define ESRCH (-5003)
+#endif
+#ifndef EINTR
+#define EINTR (-5004)
+#endif
+#ifndef EIO
+#define EIO (-5005)
+#endif
+#ifndef ENXIO
+#define ENXIO (-5006)
+#endif
+#ifndef E2BIG
+#define E2BIG (-5007)
+#endif
+#ifndef ENOEXEC
+#define ENOEXEC (-5008)
+#endif
+#ifndef EBADF
+#define EBADF (-5009)
+#endif
+#ifndef ECHILD
+#define ECHILD (-5010)
+#endif
+#ifndef EAGAIN
+#define EAGAIN (-5011)
+#endif
+#ifndef EWOULDBLOCK
+#define EWOULDBLOCK (-7011)
+#endif
+#ifndef ENOMEM
+#define ENOMEM (-5012)
+#endif
+#ifndef EACCES
+#define EACCES (-5013)
+#endif
+#ifndef EFAULT
+#define EFAULT (-5014)
+#endif
+#ifndef ENOTBLK
+#define ENOTBLK (-5015)
+#endif
+#ifndef EBUSY
+#define EBUSY (-5016)
+#endif
+#ifndef EEXIST
+#define EEXIST (-5017)
+#endif
+#ifndef EXDEV
+#define EXDEV (-5018)
+#endif
+#ifndef ENODEV
+#define ENODEV (-5019)
+#endif
+#ifndef ENOTDIR
+#define ENOTDIR (-5020)
+#endif
+#ifndef EISDIR
+#define EISDIR (-5021)
+#endif
+#ifndef EINVAL
+#define EINVAL (-5022)
+#endif
+#ifndef ENFILE
+#define ENFILE (-5023)
+#endif
+#ifndef EMFILE
+#define EMFILE (-5024)
+#endif
+#ifndef ENOTTY
+#define ENOTTY (-5025)
+#endif
+#ifndef ETXTBSY
+#define ETXTBSY (-5026)
+#endif
+#ifndef EFBIG
+#define EFBIG (-5027)
+#endif
+#ifndef ENOSPC
+#define ENOSPC (-5028)
+#endif
+#ifndef ESPIPE
+#define ESPIPE (-5029)
+#endif
+#ifndef EROFS
+#define EROFS (-5030)
+#endif
+#ifndef EMLINK
+#define EMLINK (-5031)
+#endif
+#ifndef EPIPE
+#define EPIPE (-5032)
+#endif
+#ifndef EDOM
+#define EDOM (-5033)
+#endif
+#ifndef ERANGE
+#define ERANGE (-5034)
+#endif
+#ifndef EDEADLK
+#define EDEADLK (-5035)
+#endif
+#ifndef EDEADLOCK
+#define EDEADLOCK (-7035)
+#endif
+#ifndef ENAMETOOLONG
+#define ENAMETOOLONG (-5036)
+#endif
+#ifndef ENOLCK
+#define ENOLCK (-5037)
+#endif
+#ifndef ENOSYS
+#define ENOSYS (-5038)
+#endif
+#ifndef ENOTEMPTY
+#define ENOTEMPTY (-5039)
+#endif
+#ifndef ELOOP
+#define ELOOP (-5040)
+#endif
+#ifndef ENOMSG
+#define ENOMSG (-5042)
+#endif
+#ifndef EIDRM
+#define EIDRM (-5043)
+#endif
+#ifndef ECHRNG
+#define ECHRNG (-5044)
+#endif
+#ifndef EL2NSYNC
+#define EL2NSYNC (-5045)
+#endif
+#ifndef EL3HLT
+#define EL3HLT (-5046)
+#endif
+#ifndef EL3RST
+#define EL3RST (-5047)
+#endif
+#ifndef ELNRNG
+#define ELNRNG (-5048)
+#endif
+#ifndef EUNATCH
+#define EUNATCH (-5049)
+#endif
+#ifndef ENOCSI
+#define ENOCSI (-5050)
+#endif
+#ifndef EL2HLT
+#define EL2HLT (-5051)
+#endif
+#ifndef EBADE
+#define EBADE (-5052)
+#endif
+#ifndef EBADR
+#define EBADR (-5053)
+#endif
+#ifndef EXFULL
+#define EXFULL (-5054)
+#endif
+#ifndef ENOANO
+#define ENOANO (-5055)
+#endif
+#ifndef EBADRQC
+#define EBADRQC (-5056)
+#endif
+#ifndef EBADSLT
+#define EBADSLT (-5057)
+#endif
+#ifndef EBFONT
+#define EBFONT (-5059)
+#endif
+#ifndef ENOSTR
+#define ENOSTR (-5060)
+#endif
+#ifndef ENODATA
+#define ENODATA (-5061)
+#endif
+#ifndef ETIME
+#define ETIME (-5062)
+#endif
+#ifndef ENOSR
+#define ENOSR (-5063)
+#endif
+#ifndef ENONET
+#define ENONET (-5064)
+#endif
+#ifndef ENOPKG
+#define ENOPKG (-5065)
+#endif
+#ifndef EREMOTE
+#define EREMOTE (-5066)
+#endif
+#ifndef ENOLINK
+#define ENOLINK (-5067)
+#endif
+#ifndef EADV
+#define EADV (-5068)
+#endif
+#ifndef ESRMNT
+#define ESRMNT (-5069)
+#endif
+#ifndef ECOMM
+#define ECOMM (-5070)
+#endif
+#ifndef EPROTO
+#define EPROTO (-5071)
+#endif
+#ifndef EMULTIHOP
+#define EMULTIHOP (-5072)
+#endif
+#ifndef EDOTDOT
+#define EDOTDOT (-5073)
+#endif
+#ifndef EBADMSG
+#define EBADMSG (-5074)
+#endif
+#ifndef EOVERFLOW
+#define EOVERFLOW (-5075)
+#endif
+#ifndef ENOTUNIQ
+#define ENOTUNIQ (-5076)
+#endif
+#ifndef EBADFD
+#define EBADFD (-5077)
+#endif
+#ifndef EREMCHG
+#define EREMCHG (-5078)
+#endif
+#ifndef ELIBACC
+#define ELIBACC (-5079)
+#endif
+#ifndef ELIBBAD
+#define ELIBBAD (-5080)
+#endif
+#ifndef ELIBSCN
+#define ELIBSCN (-5081)
+#endif
+#ifndef ELIBMAX
+#define ELIBMAX (-5082)
+#endif
+#ifndef ELIBEXEC
+#define ELIBEXEC (-5083)
+#endif
+#ifndef EILSEQ
+#define EILSEQ (-5084)
+#endif
+#ifndef ERESTART
+#define ERESTART (-5085)
+#endif
+#ifndef ESTRPIPE
+#define ESTRPIPE (-5086)
+#endif
+#ifndef EUSERS
+#define EUSERS (-5087)
+#endif
+#ifndef ENOTSOCK
+#define ENOTSOCK (-5088)
+#endif
+#ifndef EDESTADDRREQ
+#define EDESTADDRREQ (-5089)
+#endif
+#ifndef EMSGSIZE
+#define EMSGSIZE (-5090)
+#endif
+#ifndef EPROTOTYPE
+#define EPROTOTYPE (-5091)
+#endif
+#ifndef ENOPROTOOPT
+#define ENOPROTOOPT (-5092)
+#endif
+#ifndef EPROTONOSUPPORT
+#define EPROTONOSUPPORT (-5093)
+#endif
+#ifndef ESOCKTNOSUPPORT
+#define ESOCKTNOSUPPORT (-5094)
+#endif
+#ifndef EOPNOTSUPP
+#define EOPNOTSUPP (-5095)
+#endif
+#ifndef EPFNOSUPPORT
+#define EPFNOSUPPORT (-5096)
+#endif
+#ifndef EAFNOSUPPORT
+#define EAFNOSUPPORT (-5097)
+#endif
+#ifndef EADDRINUSE
+#define EADDRINUSE (-5098)
+#endif
+#ifndef EADDRNOTAVAIL
+#define EADDRNOTAVAIL (-5099)
+#endif
+#ifndef ENETDOWN
+#define ENETDOWN (-5100)
+#endif
+#ifndef ENETUNREACH
+#define ENETUNREACH (-5101)
+#endif
+#ifndef ENETRESET
+#define ENETRESET (-5102)
+#endif
+#ifndef ECONNABORTED
+#define ECONNABORTED (-5103)
+#endif
+#ifndef ECONNRESET
+#define ECONNRESET (-5104)
+#endif
+#ifndef ENOBUFS
+#define ENOBUFS (-5105)
+#endif
+#ifndef EISCONN
+#define EISCONN (-5106)
+#endif
+#ifndef ENOTCONN
+#define ENOTCONN (-5107)
+#endif
+#ifndef ESHUTDOWN
+#define ESHUTDOWN (-5108)
+#endif
+#ifndef ETOOMANYREFS
+#define ETOOMANYREFS (-5109)
+#endif
+#ifndef ETIMEDOUT
+#define ETIMEDOUT (-5110)
+#endif
+#ifndef ECONNREFUSED
+#define ECONNREFUSED (-5111)
+#endif
+#ifndef EHOSTDOWN
+#define EHOSTDOWN (-5112)
+#endif
+#ifndef EHOSTUNREACH
+#define EHOSTUNREACH (-5113)
+#endif
+#ifndef EALREADY
+#define EALREADY (-5114)
+#endif
+#ifndef EINPROGRESS
+#define EINPROGRESS (-5115)
+#endif
+#ifndef ESTALE
+#define ESTALE (-5116)
+#endif
+#ifndef EUCLEAN
+#define EUCLEAN (-5117)
+#endif
+#ifndef ENOTNAM
+#define ENOTNAM (-5118)
+#endif
+#ifndef ENAVAIL
+#define ENAVAIL (-5119)
+#endif
+#ifndef EISNAM
+#define EISNAM (-5120)
+#endif
+#ifndef EREMOTEIO
+#define EREMOTEIO (-5121)
+#endif
+#ifndef EDQUOT
+#define EDQUOT (-5122)
+#endif
+#ifndef ENOMEDIUM
+#define ENOMEDIUM (-5123)
+#endif
+#ifndef EMEDIUMTYPE
+#define EMEDIUMTYPE (-5124)
+#endif
+#ifndef ECANCELED
+#define ECANCELED (-5125)
+#endif
+#ifndef ENOKEY
+#define ENOKEY (-5126)
+#endif
+#ifndef EKEYEXPIRED
+#define EKEYEXPIRED (-5127)
+#endif
+#ifndef EKEYREVOKED
+#define EKEYREVOKED (-5128)
+#endif
+#ifndef EKEYREJECTED
+#define EKEYREJECTED (-5129)
+#endif
+#ifndef EOWNERDEAD
+#define EOWNERDEAD (-5130)
+#endif
+#ifndef ENOTRECOVERABLE
+#define ENOTRECOVERABLE (-5131)
+#endif
+#ifndef ERFKILL
+#define ERFKILL (-5132)
+#endif
+#ifndef EPROCLIM
+#define EPROCLIM (-6067)
+#endif
+#ifndef EBADRPC
+#define EBADRPC (-6072)
+#endif
+#ifndef ERPCMISMATCH
+#define ERPCMISMATCH (-6073)
+#endif
+#ifndef EPROGUNAVAIL
+#define EPROGUNAVAIL (-6074)
+#endif
+#ifndef EPROGMISMATCH
+#define EPROGMISMATCH (-6075)
+#endif
+#ifndef EPROCUNAVAIL
+#define EPROCUNAVAIL (-6076)
+#endif
+#ifndef EFTYPE
+#define EFTYPE (-6079)
+#endif
+#ifndef EAUTH
+#define EAUTH (-6080)
+#endif
+#ifndef ENEEDAUTH
+#define ENEEDAUTH (-6081)
+#endif
+#ifndef ENOATTR
+#define ENOATTR (-6087)
+#endif
+#ifndef ENOTCAPABLE
+#define ENOTCAPABLE (-6093)
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/hexparse.c b/nacl/nacl-20110221/curvecp/hexparse.c
new file mode 100644
index 00000000..43bfe044
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/hexparse.c
@@ -0,0 +1,25 @@
+#include "hexparse.h"
+
+static int hexdigit(char x)
+{
+  if (x >= '0' && x <= '9') return x - '0';
+  if (x >= 'a' && x <= 'f') return 10 + (x - 'a');
+  if (x >= 'A' && x <= 'F') return 10 + (x - 'A');
+  return -1;
+}
+
+int hexparse(unsigned char *y,long long len,const char *x)
+{
+  if (!x) return 0;
+  while (len > 0) {
+    int digit0;
+    int digit1;
+    digit0 = hexdigit(x[0]); if (digit0 == -1) return 0;
+    digit1 = hexdigit(x[1]); if (digit1 == -1) return 0;
+    *y++ = digit1 + 16 * digit0;
+    --len;
+    x += 2;
+  }
+  if (x[0]) return 0;
+  return 1;
+}
diff --git a/nacl/nacl-20110221/curvecp/hexparse.h b/nacl/nacl-20110221/curvecp/hexparse.h
new file mode 100644
index 00000000..4e88e187
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/hexparse.h
@@ -0,0 +1,6 @@
+#ifndef HEXPARSE_H
+#define HEXPARSE_H
+
+extern int hexparse(unsigned char *,long long,const char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/load.c b/nacl/nacl-20110221/curvecp/load.c
new file mode 100644
index 00000000..0cd4e43d
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/load.c
@@ -0,0 +1,33 @@
+#include <unistd.h>
+#include "open.h"
+#include "e.h"
+#include "load.h"
+
+static int readall(int fd,void *x,long long xlen)
+{
+  long long r;
+  while (xlen > 0) {
+    r = xlen;
+    if (r > 1048576) r = 1048576;
+    r = read(fd,x,r);
+    if (r == 0) errno = EPROTO;
+    if (r <= 0) {
+      if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) continue;
+      return -1;
+    }
+    x += r;
+    xlen -= r;
+  }
+  return 0;
+}
+
+int load(const char *fn,void *x,long long xlen)
+{
+  int fd;
+  int r;
+  fd = open_read(fn);
+  if (fd == -1) return -1;
+  r = readall(fd,x,xlen);
+  close(fd);
+  return r;
+}
diff --git a/nacl/nacl-20110221/curvecp/load.h b/nacl/nacl-20110221/curvecp/load.h
new file mode 100644
index 00000000..9ff1ab2c
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/load.h
@@ -0,0 +1,6 @@
+#ifndef LOAD_H
+#define LOAD_H
+
+extern int load(const char *,void *,long long);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/nameparse.c b/nacl/nacl-20110221/curvecp/nameparse.c
new file mode 100644
index 00000000..f6386d73
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/nameparse.c
@@ -0,0 +1,19 @@
+#include "nameparse.h"
+
+int nameparse(unsigned char *s,const char *x)
+{
+  long long pos;
+  long long j;
+  if (!x) return 0;
+  for (pos = 0;pos < 256;++pos) s[pos] = 0;
+  pos = 0;
+  while (*x) {
+    if (*x == '.') { ++x; continue; }
+    for (j = 0;x[j];++j) if (x[j] == '.') break;
+    if (j > 63) return 0;
+    if (pos < 0 || pos >= 256) return 0; s[pos++] = j;
+    while (j > 0) { if (pos < 0 || pos >= 256) return 0; s[pos++] = *x++; --j; }
+  }
+  if (pos < 0 || pos >= 256) return 0; s[pos++] = 0;
+  return 1;
+}
diff --git a/nacl/nacl-20110221/curvecp/nameparse.h b/nacl/nacl-20110221/curvecp/nameparse.h
new file mode 100644
index 00000000..97c56e8b
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/nameparse.h
@@ -0,0 +1,6 @@
+#ifndef NAMEPARSE_H
+#define NAMEPARSE_H
+
+extern int nameparse(unsigned char *,const char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/nanoseconds.c b/nacl/nacl-20110221/curvecp/nanoseconds.c
new file mode 100644
index 00000000..158ff402
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/nanoseconds.c
@@ -0,0 +1,12 @@
+#include <time.h>
+#include "nanoseconds.h"
+
+/* XXX: Y2036 problems; should upgrade to a 128-bit type for this */
+/* XXX: nanosecond granularity limits users to 1 terabyte per second */
+
+long long nanoseconds(void)
+{
+  struct timespec t;
+  if (clock_gettime(CLOCK_REALTIME,&t) != 0) return -1;
+  return t.tv_sec * 1000000000LL + t.tv_nsec;
+}
diff --git a/nacl/nacl-20110221/curvecp/nanoseconds.h b/nacl/nacl-20110221/curvecp/nanoseconds.h
new file mode 100644
index 00000000..eb72ec0f
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/nanoseconds.h
@@ -0,0 +1,6 @@
+#ifndef NANOSECONDS_H
+#define NANOSECONDS_H
+
+extern long long nanoseconds(void);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/open.h b/nacl/nacl-20110221/curvecp/open.h
new file mode 100644
index 00000000..a6ef9ec4
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open.h
@@ -0,0 +1,10 @@
+#ifndef OPEN_H
+#define OPEN_H
+
+extern int open_read(const char *);
+extern int open_write(const char *);
+extern int open_lock(const char *);
+extern int open_cwd(void);
+extern int open_pipe(int *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/open_cwd.c b/nacl/nacl-20110221/curvecp/open_cwd.c
new file mode 100644
index 00000000..65d53bcd
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open_cwd.c
@@ -0,0 +1,6 @@
+#include "open.h"
+
+int open_cwd(void)
+{
+  return open_read(".");
+}
diff --git a/nacl/nacl-20110221/curvecp/open_lock.c b/nacl/nacl-20110221/curvecp/open_lock.c
new file mode 100644
index 00000000..898f3b60
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open_lock.c
@@ -0,0 +1,19 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "open.h"
+
+int open_lock(const char *fn)
+{
+#ifdef O_CLOEXEC
+  int fd = open(fn,O_RDWR | O_CLOEXEC);
+  if (fd == -1) return -1;
+#else
+  int fd = open(fn,O_RDWR);
+  if (fd == -1) return -1;
+  fcntl(fd,F_SETFD,1);
+#endif
+  if (lockf(fd,F_LOCK,0) == -1) { close(fd); return -1; }
+  return fd;
+}
diff --git a/nacl/nacl-20110221/curvecp/open_pipe.c b/nacl/nacl-20110221/curvecp/open_pipe.c
new file mode 100644
index 00000000..2fc2b1af
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open_pipe.c
@@ -0,0 +1,15 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include "open.h"
+#include "blocking.h"
+
+int open_pipe(int *fd)
+{
+  int i;
+  if (pipe(fd) == -1) return -1;
+  for (i = 0;i < 2;++i) {
+    fcntl(fd[i],F_SETFD,1);
+    blocking_disable(fd[i]);
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/open_read.c b/nacl/nacl-20110221/curvecp/open_read.c
new file mode 100644
index 00000000..cea667b5
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open_read.c
@@ -0,0 +1,17 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "open.h"
+
+int open_read(const char *fn)
+{
+#ifdef O_CLOEXEC
+  return open(fn,O_RDONLY | O_NONBLOCK | O_CLOEXEC);
+#else
+  int fd = open(fn,O_RDONLY | O_NONBLOCK);
+  if (fd == -1) return -1;
+  fcntl(fd,F_SETFD,1);
+  return fd;
+#endif
+}
diff --git a/nacl/nacl-20110221/curvecp/open_write.c b/nacl/nacl-20110221/curvecp/open_write.c
new file mode 100644
index 00000000..e23752d1
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/open_write.c
@@ -0,0 +1,17 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "open.h"
+
+int open_write(const char *fn)
+{
+#ifdef O_CLOEXEC
+  return open(fn,O_CREAT | O_WRONLY | O_NONBLOCK | O_CLOEXEC,0644);
+#else
+  int fd = open(fn,O_CREAT | O_WRONLY | O_NONBLOCK,0644);
+  if (fd == -1) return -1;
+  fcntl(fd,F_SETFD,1);
+  return fd;
+#endif
+}
diff --git a/nacl/nacl-20110221/curvecp/portparse.c b/nacl/nacl-20110221/curvecp/portparse.c
new file mode 100644
index 00000000..37e4caca
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/portparse.c
@@ -0,0 +1,14 @@
+#include "portparse.h"
+
+int portparse(unsigned char *y,const char *x)
+{
+  long long d = 0;
+  long long j;
+  for (j = 0;j < 5 && x[j] >= '0' && x[j] <= '9';++j)
+    d = d * 10 + (x[j] - '0');
+  if (j == 0) return 0;
+  if (x[j]) return 0;
+  y[0] = d >> 8;
+  y[1] = d;
+  return 1;
+}
diff --git a/nacl/nacl-20110221/curvecp/portparse.h b/nacl/nacl-20110221/curvecp/portparse.h
new file mode 100644
index 00000000..99a17748
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/portparse.h
@@ -0,0 +1,6 @@
+#ifndef PORTPARSE_H
+#define PORTPARSE_H
+
+extern int portparse(unsigned char *,const char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/randommod.c b/nacl/nacl-20110221/curvecp/randommod.c
new file mode 100644
index 00000000..575a627b
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/randommod.c
@@ -0,0 +1,14 @@
+#include "randombytes.h"
+
+/* XXX: current implementation is limited to n<2^55 */
+
+long long randommod(long long n)
+{
+  long long result = 0;
+  long long j;
+  unsigned char r[32];
+  if (n <= 1) return 0;
+  randombytes(r,32);
+  for (j = 0;j < 32;++j) result = (result * 256 + (unsigned long long) r[j]) % n;
+  return result;
+}
diff --git a/nacl/nacl-20110221/curvecp/randommod.h b/nacl/nacl-20110221/curvecp/randommod.h
new file mode 100644
index 00000000..2b8405d6
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/randommod.h
@@ -0,0 +1,6 @@
+#ifndef RANDOMMOD_H
+#define RANDOMMOD_H
+
+extern long long randommod(long long);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/safenonce.c b/nacl/nacl-20110221/curvecp/safenonce.c
new file mode 100644
index 00000000..cfcabcd2
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/safenonce.c
@@ -0,0 +1,74 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "crypto_uint64.h"
+#include "uint64_pack.h"
+#include "uint64_unpack.h"
+#include "savesync.h"
+#include "open.h"
+#include "load.h"
+#include "randombytes.h"
+#include "safenonce.h"
+
+#include "crypto_block.h"
+#if crypto_block_BYTES != 16
+error!
+#endif
+#if crypto_block_KEYBYTES != 32
+error!
+#endif
+
+/*
+Output: 128-bit nonce y[0],...,y[15].
+Reads and writes existing 8-byte file ".expertsonly/noncecounter",
+locked via existing 1-byte file ".expertsonly/lock".
+Also reads existing 32-byte file ".expertsonly/noncekey".
+Not thread-safe.
+
+Invariants:
+This process is free to use counters that are >=counterlow and <counterhigh.
+The 8-byte file contains a counter that is safe to use and >=counterhigh.
+
+XXX: should rewrite file in background, rather than briefly pausing
+*/
+
+static crypto_uint64 counterlow = 0;
+static crypto_uint64 counterhigh = 0;
+
+static unsigned char flagkeyloaded = 0;
+static unsigned char noncekey[32];
+static unsigned char data[16];
+
+int safenonce(unsigned char *y,int flaglongterm)
+{
+  if (!flagkeyloaded) {
+    int fdlock;
+    fdlock = open_lock(".expertsonly/lock");
+    if (fdlock == -1) return -1;
+    if (load(".expertsonly/noncekey",noncekey,sizeof noncekey) == -1) { close(fdlock); return -1; }
+    close(fdlock);
+    flagkeyloaded = 1;
+  }
+  
+  if (counterlow >= counterhigh) {
+    int fdlock;
+    fdlock = open_lock(".expertsonly/lock");
+    if (fdlock == -1) return -1;
+    if (load(".expertsonly/noncecounter",data,8) == -1) { close(fdlock); return -1; }
+    counterlow = uint64_unpack(data);
+    if (flaglongterm)
+      counterhigh = counterlow + 1048576;
+    else
+      counterhigh = counterlow + 1;
+    uint64_pack(data,counterhigh);
+    if (savesync(".expertsonly/noncecounter",data,8) == -1) { close(fdlock); return -1; }
+    close(fdlock);
+  }
+
+  randombytes(data + 8,8);
+  uint64_pack(data,counterlow++);
+  crypto_block(y,data,noncekey);
+  
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/safenonce.h b/nacl/nacl-20110221/curvecp/safenonce.h
new file mode 100644
index 00000000..c01271aa
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/safenonce.h
@@ -0,0 +1,6 @@
+#ifndef SAFENONCE_H
+#define SAFENONCE_H
+
+extern int safenonce(unsigned char *,int);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/savesync.c b/nacl/nacl-20110221/curvecp/savesync.c
new file mode 100644
index 00000000..73074a4b
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/savesync.c
@@ -0,0 +1,24 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "open.h"
+#include "savesync.h"
+#include "writeall.h"
+
+static int writesync(int fd,const void *x,long long xlen)
+{
+  if (writeall(fd,x,xlen) == -1) return -1;
+  return fsync(fd);
+}
+
+int savesync(const char *fn,const void *x,long long xlen)
+{
+  int fd;
+  int r;
+  fd = open_write(fn);
+  if (fd == -1) return -1;
+  r = writesync(fd,x,xlen);
+  close(fd);
+  return r;
+}
diff --git a/nacl/nacl-20110221/curvecp/savesync.h b/nacl/nacl-20110221/curvecp/savesync.h
new file mode 100644
index 00000000..4c0cd3d2
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/savesync.h
@@ -0,0 +1,6 @@
+#ifndef SAVESYNC_H
+#define SAVESYNC_H
+
+extern int savesync(const char *,const void *,long long);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/socket.h b/nacl/nacl-20110221/curvecp/socket.h
new file mode 100644
index 00000000..9fab01c7
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/socket.h
@@ -0,0 +1,9 @@
+#ifndef SOCKET_H
+#define SOCKET_H
+
+extern int socket_udp(void);
+extern int socket_bind(int,const unsigned char *,const unsigned char *);
+extern int socket_send(int,const unsigned char *,long long,const unsigned char *,const unsigned char *);
+extern long long socket_recv(int,unsigned char *,long long,unsigned char *,unsigned char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/socket_bind.c b/nacl/nacl-20110221/curvecp/socket_bind.c
new file mode 100644
index 00000000..9e36925d
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/socket_bind.c
@@ -0,0 +1,15 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include "socket.h"
+#include "byte.h"
+
+int socket_bind(int fd,const unsigned char *ip,const unsigned char *port)
+{
+  struct sockaddr_in sa;
+  byte_zero(&sa,sizeof sa);
+  byte_copy(&sa.sin_addr,4,ip);
+  byte_copy(&sa.sin_port,2,port);
+  return bind(fd,(struct sockaddr *) &sa,sizeof sa);
+}
diff --git a/nacl/nacl-20110221/curvecp/socket_recv.c b/nacl/nacl-20110221/curvecp/socket_recv.c
new file mode 100644
index 00000000..8b266ba2
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/socket_recv.c
@@ -0,0 +1,23 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include "socket.h"
+#include "byte.h"
+
+long long socket_recv(int fd,unsigned char *x,long long xlen,unsigned char *ip,unsigned char *port)
+{
+  struct sockaddr_in sa;
+  socklen_t salen;
+  int r;
+
+  if (xlen < 0) { errno = EPROTO; return -1; }
+  if (xlen > 1048576) xlen = 1048576;
+
+  byte_zero(&sa,sizeof sa);
+  salen = sizeof sa;
+  r = recvfrom(fd,x,xlen,0,(struct sockaddr *) &sa,&salen);
+  byte_copy(ip,4,&sa.sin_addr);
+  byte_copy(port,2,&sa.sin_port);
+  return r;
+}
diff --git a/nacl/nacl-20110221/curvecp/socket_send.c b/nacl/nacl-20110221/curvecp/socket_send.c
new file mode 100644
index 00000000..1521384c
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/socket_send.c
@@ -0,0 +1,19 @@
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include "socket.h"
+#include "byte.h"
+
+int socket_send(int fd,const unsigned char *x,long long xlen,const unsigned char *ip,const unsigned char *port)
+{
+  struct sockaddr_in sa;
+
+  if (xlen < 0 || xlen > 1048576) { errno = EPROTO; return -1; }
+
+  byte_zero(&sa,sizeof sa);
+  sa.sin_family = AF_INET;
+  byte_copy(&sa.sin_addr,4,ip);
+  byte_copy(&sa.sin_port,2,port);
+  return sendto(fd,x,xlen,0,(struct sockaddr *) &sa,sizeof sa);
+}
diff --git a/nacl/nacl-20110221/curvecp/socket_udp.c b/nacl/nacl-20110221/curvecp/socket_udp.c
new file mode 100644
index 00000000..f64762f1
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/socket_udp.c
@@ -0,0 +1,36 @@
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include "socket.h"
+#include "blocking.h"
+
+static void enable_bsd_fragmentation(int fd)
+{
+#ifdef IP_DONTFRAG
+  const int x = 0;
+  setsockopt(fd,SOL_IP,IP_DONTFRAG,&x,sizeof x);
+#endif
+}
+
+static void enable_linux_fragmentation(int fd)
+{
+#ifdef IP_MTU_DISCOVER
+#ifdef IP_PMTUDISC_DONT
+  const int x = IP_PMTUDISC_DONT;
+  setsockopt(fd,SOL_IP,IP_MTU_DISCOVER,&x,sizeof x);
+#endif
+#endif
+}
+
+int socket_udp(void)
+{
+  int fd = socket(PF_INET,SOCK_DGRAM,0);
+  if (fd == -1) return -1;
+  fcntl(fd,F_SETFD,1);
+  blocking_disable(fd);
+  enable_bsd_fragmentation(fd);
+  enable_linux_fragmentation(fd);
+  return fd;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint16_pack.c b/nacl/nacl-20110221/curvecp/uint16_pack.c
new file mode 100644
index 00000000..f3761035
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint16_pack.c
@@ -0,0 +1,7 @@
+#include "uint16_pack.h"
+
+void uint16_pack(unsigned char *y,crypto_uint16 x)
+{
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint16_pack.h b/nacl/nacl-20110221/curvecp/uint16_pack.h
new file mode 100644
index 00000000..6c5b65e1
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint16_pack.h
@@ -0,0 +1,8 @@
+#ifndef UINT16_PACK_H
+#define UINT16_PACK_H
+
+#include "crypto_uint16.h"
+
+extern void uint16_pack(unsigned char *,crypto_uint16);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/uint16_unpack.c b/nacl/nacl-20110221/curvecp/uint16_unpack.c
new file mode 100644
index 00000000..b4e74ee4
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint16_unpack.c
@@ -0,0 +1,9 @@
+#include "uint16_unpack.h"
+
+crypto_uint16 uint16_unpack(const unsigned char *x)
+{
+  crypto_uint16 result;
+  result = x[1];
+  result <<= 8; result |= x[0];
+  return result;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint16_unpack.h b/nacl/nacl-20110221/curvecp/uint16_unpack.h
new file mode 100644
index 00000000..3e3aedfc
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint16_unpack.h
@@ -0,0 +1,8 @@
+#ifndef UINT16_UNPACK_H
+#define UINT16_UNPACK_H
+
+#include "crypto_uint16.h"
+
+extern crypto_uint16 uint16_unpack(const unsigned char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/uint32_pack.c b/nacl/nacl-20110221/curvecp/uint32_pack.c
new file mode 100644
index 00000000..d54fe542
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint32_pack.c
@@ -0,0 +1,9 @@
+#include "uint32_pack.h"
+
+void uint32_pack(unsigned char *y,crypto_uint32 x)
+{
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint32_pack.h b/nacl/nacl-20110221/curvecp/uint32_pack.h
new file mode 100644
index 00000000..efdf7919
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint32_pack.h
@@ -0,0 +1,8 @@
+#ifndef UINT32_PACK_H
+#define UINT32_PACK_H
+
+#include "crypto_uint32.h"
+
+extern void uint32_pack(unsigned char *,crypto_uint32);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/uint32_unpack.c b/nacl/nacl-20110221/curvecp/uint32_unpack.c
new file mode 100644
index 00000000..adde6987
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint32_unpack.c
@@ -0,0 +1,11 @@
+#include "uint32_unpack.h"
+
+crypto_uint32 uint32_unpack(const unsigned char *x)
+{
+  crypto_uint32 result;
+  result = x[3];
+  result <<= 8; result |= x[2];
+  result <<= 8; result |= x[1];
+  result <<= 8; result |= x[0];
+  return result;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint32_unpack.h b/nacl/nacl-20110221/curvecp/uint32_unpack.h
new file mode 100644
index 00000000..dd65f365
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint32_unpack.h
@@ -0,0 +1,8 @@
+#ifndef UINT32_UNPACK_H
+#define UINT32_UNPACK_H
+
+#include "crypto_uint32.h"
+
+extern crypto_uint32 uint32_unpack(const unsigned char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/uint64_pack.c b/nacl/nacl-20110221/curvecp/uint64_pack.c
new file mode 100644
index 00000000..898a80a3
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint64_pack.c
@@ -0,0 +1,13 @@
+#include "uint64_pack.h"
+
+void uint64_pack(unsigned char *y,crypto_uint64 x)
+{
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+  *y++ = x; x >>= 8;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint64_pack.h b/nacl/nacl-20110221/curvecp/uint64_pack.h
new file mode 100644
index 00000000..be8330fd
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint64_pack.h
@@ -0,0 +1,8 @@
+#ifndef UINT64_PACK_H
+#define UINT64_PACK_H
+
+#include "crypto_uint64.h"
+
+extern void uint64_pack(unsigned char *,crypto_uint64);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/uint64_unpack.c b/nacl/nacl-20110221/curvecp/uint64_unpack.c
new file mode 100644
index 00000000..2d69bf72
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint64_unpack.c
@@ -0,0 +1,15 @@
+#include "uint64_unpack.h"
+
+crypto_uint64 uint64_unpack(const unsigned char *x)
+{
+  crypto_uint64 result;
+  result = x[7];
+  result <<= 8; result |= x[6];
+  result <<= 8; result |= x[5];
+  result <<= 8; result |= x[4];
+  result <<= 8; result |= x[3];
+  result <<= 8; result |= x[2];
+  result <<= 8; result |= x[1];
+  result <<= 8; result |= x[0];
+  return result;
+}
diff --git a/nacl/nacl-20110221/curvecp/uint64_unpack.h b/nacl/nacl-20110221/curvecp/uint64_unpack.h
new file mode 100644
index 00000000..f40e7a8a
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/uint64_unpack.h
@@ -0,0 +1,8 @@
+#ifndef UINT64_UNPACK_H
+#define UINT64_UNPACK_H
+
+#include "crypto_uint64.h"
+
+extern crypto_uint64 uint64_unpack(const unsigned char *);
+
+#endif
diff --git a/nacl/nacl-20110221/curvecp/writeall.c b/nacl/nacl-20110221/curvecp/writeall.c
new file mode 100644
index 00000000..58f93011
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/writeall.c
@@ -0,0 +1,27 @@
+#include <poll.h>
+#include <unistd.h>
+#include "e.h"
+#include "writeall.h"
+
+int writeall(int fd,const void *x,long long xlen)
+{
+  long long w;
+  while (xlen > 0) {
+    w = xlen;
+    if (w > 1048576) w = 1048576;
+    w = write(fd,x,w);
+    if (w < 0) {
+      if (errno == EINTR || errno == EAGAIN || errno == EWOULDBLOCK) {
+        struct pollfd p;
+	p.fd = fd;
+	p.events = POLLOUT | POLLERR;
+	poll(&p,1,-1);
+        continue;
+      }
+      return -1;
+    }
+    x += w;
+    xlen -= w;
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/curvecp/writeall.h b/nacl/nacl-20110221/curvecp/writeall.h
new file mode 100644
index 00000000..92341236
--- /dev/null
+++ b/nacl/nacl-20110221/curvecp/writeall.h
@@ -0,0 +1,6 @@
+#ifndef WRITEALL_H
+#define WRITEALL_H
+
+extern int writeall(int,const void *,long long);
+
+#endif
diff --git a/nacl/nacl-20110221/do b/nacl/nacl-20110221/do
new file mode 100755
index 00000000..f953508d
--- /dev/null
+++ b/nacl/nacl-20110221/do
@@ -0,0 +1,468 @@
+#!/bin/sh
+
+# nacl/do
+# D. J. Bernstein
+# Public domain.
+
+version=`cat version`
+project=nacl
+shorthostname=`hostname | sed 's/\..*//' | tr -cd '[a-z][A-Z][0-9]'`
+
+top="`pwd`/build/$shorthostname"
+bin="$top/bin"
+lib="$top/lib"
+include="$top/include"
+work="$top/work"
+
+PATH="/usr/local/bin:$PATH"
+PATH="/usr/sfw/bin:$PATH"
+PATH="$bin:$PATH"
+export PATH
+
+LD_LIBRARY_PATH="/usr/local/lib/sparcv9:/usr/local/lib:$LD_LIBRARY_PATH"
+LD_LIBRARY_PATH="/usr/sfw/lib/sparcv9:/usr/sfw/lib:$LD_LIBRARY_PATH"
+export LD_LIBRARY_PATH
+
+# and wacky MacOS X
+DYLD_LIBRARY_PATH="/usr/local/lib/sparcv9:/usr/local/lib:$DYLD_LIBRARY_PATH"
+DYLD_LIBRARY_PATH="/usr/sfw/lib/sparcv9:/usr/sfw/lib:$DYLD_LIBRARY_PATH"
+export DYLD_LIBRARY_PATH
+
+# and work around bug in GNU sort
+LANG=C
+export LANG
+
+rm -rf "$top"
+mkdir -p "$top"
+mkdir -p "$bin"
+mkdir -p "$lib"
+mkdir -p "$include"
+
+exec >"$top/log"
+exec 2>&1
+exec 5>"$top/data"
+exec </dev/null
+
+echo "=== `date` === starting"
+
+echo "=== `date` === hostname"
+hostname || :
+echo "=== `date` === uname -a"
+uname -a || :
+echo "=== `date` === uname -M"
+uname -M || :
+echo "=== `date` === uname -F"
+uname -F || :
+echo "=== `date` === /usr/sbin/lscfg | grep proc"
+/usr/sbin/lscfg | grep proc || :
+echo "=== `date` === /usr/sbin/lsattr -El proc0"
+/usr/sbin/lsattr -El proc0 || :
+echo "=== `date` === cat /proc/cpuinfo"
+cat /proc/cpuinfo || :
+echo "=== `date` === cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq"
+cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_cur_freq || :
+echo "=== `date` === cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq"
+cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_min_freq || :
+echo "=== `date` === cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq"
+cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq || :
+echo "=== `date` === cat /sys/devices/system/cpu/cpu0/clock_tick"
+cat /sys/devices/system/cpu/cpu0/clock_tick || :
+echo "=== `date` === sysctl hw.model"
+sysctl hw.model || :
+echo "=== `date` === sysctl machdep.tsc_freq"
+sysctl machdep.tsc_freq || :
+echo "=== `date` === /usr/sbin/psrinfo -v"
+/usr/sbin/psrinfo -v || :
+
+echo "=== `date` === building okcompilers"
+rm -rf "$work"
+mkdir -p "$work"
+cp -pr okcompilers/* "$work"
+( cd "$work" && sh do )
+cp -p "$work"/bin/* "$bin"
+
+echo "=== `date` === building cpuid"
+rm -rf "$work"
+mkdir -p "$work"
+cp -pr cpuid/* "$work"
+( cd "$work" && sh do )
+cp -pr "$work"/include/* "$include"
+
+echo "=== `date` === building inttypes"
+rm -rf "$work"
+mkdir -p "$work"
+cp -pr inttypes/* "$work"
+( cd "$work" && sh do )
+cp -pr "$work"/include/* "$include"
+
+echo "=== `date` === building cpucycles"
+rm -rf "$work"
+mkdir -p "$work"
+cp -pr cpucycles/* "$work"
+( cd "$work" && sh do )
+cp -pr "$work"/lib/* "$lib"
+cp -pr "$work"/include/* "$include"
+
+echo "=== `date` === building randombytes"
+rm -rf "$work"
+mkdir -p "$work"
+cp -pr randombytes/* "$work"
+( cd "$work" && sh do )
+cp -pr "$work"/lib/* "$lib"
+cp -pr "$work"/include/* "$include"
+
+okabi \
+| while read abi
+do
+  rm -rf "$work"
+  mkdir -p "$work"
+  echo 'void crypto_'"$project"'_base(void) { ; }' > "$work/${project}_base.c"
+  okc-$abi \
+  | while read compiler
+  do
+    ( cd "$work" && $compiler -c ${project}_base.c ) && break
+  done
+  okar-$abi cr "$lib/$abi/lib${project}.a" "$work/${project}_base.o"
+  ( ranlib "$lib/$abi/lib${project}.a" || exit 0 )
+done
+
+# loop over operations
+cat OPERATIONS \
+| while read o
+do
+  [ -d "$o" ] || continue
+
+  selected=''
+  [ -f "$o/selected" ] && selected=`cat "$o/selected"`
+
+  # for each operation, loop over primitives
+  ls "$o" \
+  | sort \
+  | while read p
+  do
+    [ -d "$o/$p" ] || continue
+    expectedchecksum=''
+    [ -f "$o/$p/checksum" ] && expectedchecksum=`cat "$o/$p/checksum"`
+    op="${o}_${p}"
+
+    startdate=`date +%Y%m%d`
+
+    # for each operation primitive, loop over abis
+    okabi \
+    | while read abi
+    do
+      echo "=== `date` === $abi $o/$p"
+      libs=`"oklibs-$abi"`
+      libs="$lib/$abi/cpucycles.o $libs"
+      [ -f "$lib/$abi/lib${project}.a" ] && libs="$lib/$abi/lib${project}.a $libs"
+
+      rm -rf "$work"
+      mkdir -p "$work"
+      mkdir -p "$work/best"
+
+      # for each operation primitive abi, loop over implementations
+      find "$o/$p" -follow -name "api.h" \
+      | sort \
+      | while read doth
+      do
+        implementationdir=`dirname $doth`
+	opi=`echo "$implementationdir" | tr ./- ___`
+
+	echo "=== `date` === $abi $implementationdir"
+
+	rm -rf "$work/compile"
+	mkdir -p "$work/compile"
+  
+	cfiles=`ls "$implementationdir" | grep '\.c$' || :`
+	sfiles=`ls "$implementationdir" | grep '\.[sS]$' || :`
+	cppfiles=`ls "$o" | grep '\.cpp$' || :`
+  
+	cp -p "$o"/*.c "$work/compile/"
+	cp -p "$o"/*.cpp "$work/compile/"
+
+	cp -pr "$implementationdir"/* "$work/compile"
+
+	cp -p "try-anything.c" "$work/compile/try-anything.c"
+	cp -p "measure-anything.c" "$work/compile/measure-anything.c"
+
+	cp -p MACROS "$work/compile/MACROS"
+	cp -p PROTOTYPES.c "$work/compile/PROTOTYPES.c"
+	cp -p PROTOTYPES.cpp "$work/compile/PROTOTYPES.cpp"
+
+	(
+	  cd "$work/compile"
+	  (
+	    echo "#ifndef ${o}_H"
+	    echo "#define ${o}_H"
+	    echo ""
+	    echo "#include \"${op}.h\""
+	    echo ""
+	    egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < MACROS \
+	    | sed "s/$o/$op/" | while read mop
+	    do
+	      echo "#define ${mop} ${mop}" | sed "s/$op/$o/"
+	    done
+	    echo "#define ${o}_PRIMITIVE \"${p}\""
+	    echo "#define ${o}_IMPLEMENTATION ${op}_IMPLEMENTATION"
+	    echo "#define ${o}_VERSION ${op}_VERSION"
+	    echo ""
+	    echo "#endif"
+	  ) > "$o.h"
+	  (
+	    echo "#ifndef ${op}_H"
+	    echo "#define ${op}_H"
+	    echo ""
+	    sed 's/[ 	]CRYPTO_/ '"${opi}"'_/g' < api.h
+	    echo '#ifdef __cplusplus'
+	    echo '#include <string>'
+	    egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < PROTOTYPES.cpp \
+	    | sed "s/$o/$opi/"
+	    echo 'extern "C" {'
+	    echo '#endif'
+	    egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < PROTOTYPES.c \
+	    | sed "s/$o/$opi/"
+	    echo '#ifdef __cplusplus'
+	    echo '}'
+	    echo '#endif'
+	    echo ""
+	    egrep "${o}"'$|'"${o}"'\(|'"${o}"'_' < MACROS \
+	    | sed "s/$o/$opi/" | while read mopi
+	    do
+	      echo "#define ${mopi} ${mopi}" | sed "s/$opi/$op/"
+	    done
+	    echo "#define ${op}_IMPLEMENTATION \"${implementationdir}\""
+	    echo "#ifndef ${opi}_VERSION"
+	    echo "#define ${opi}_VERSION \"-\""
+	    echo "#endif"
+	    echo "#define ${op}_VERSION ${opi}_VERSION"
+	    echo ""
+	    echo "#endif"
+	  ) > "$op.h"
+
+	  okc-$abi \
+	  | while read compiler
+	  do
+	    echo "=== `date` === $abi $implementationdir $compiler"
+	    compilerword=`echo "$compiler" | tr ' ' '_'`
+	    ok=1
+	    for f in $cfiles $sfiles
+	    do
+	      if [ "$ok" = 1 ]
+	      then
+		$compiler \
+		  -I. -I"$include" -I"$include/$abi" \
+		  -c "$f" >../errors 2>&1 || ok=0
+		( if [ `wc -l < ../errors` -lt 25 ]
+		  then
+		    cat ../errors
+		  else
+		    head ../errors
+		    echo ...
+		    tail ../errors
+		  fi
+		) \
+		| while read err
+		do
+		  echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword $f $err" >&5
+		done
+	      fi
+	    done
+
+	    [ "$ok" = 1 ] || continue
+	    okar-$abi cr "$op.a" *.o || continue
+	    ranlib "$op.a"
+
+	    $compiler \
+	      -I. -I"$include" -I"$include/$abi" \
+	      -o try try.c try-anything.c \
+	      "$op.a" $libs >../errors 2>&1 || ok=0
+	    cat ../errors \
+	    | while read err
+	    do
+	      echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword try.c $err" >&5
+	    done
+	    [ "$ok" = 1 ] || continue
+
+	    if sh -c './try || exit $?' >../outputs 2>../errors
+	    then
+	      checksum=`awk '{print $1}' < ../outputs`
+	      cycles=`awk '{print $2}' < ../outputs`
+	      checksumcycles=`awk '{print $3}' < ../outputs`
+	      cyclespersecond=`awk '{print $4}' < ../outputs`
+	      impl=`awk '{print $5}' < ../outputs`
+	    else
+	      echo "$version $shorthostname $abi $startdate $o $p tryfails $implementationdir $compilerword error $?" >&5
+	      cat ../outputs ../errors \
+	      | while read err
+	      do
+	        echo "$version $shorthostname $abi $startdate $o $p tryfails $implementationdir $compilerword $err" >&5
+	      done
+	      continue
+	    fi
+
+	    checksumok=fails
+	    [ "x$expectedchecksum" = "x$checksum" ] && checksumok=ok
+	    [ "x$expectedchecksum" = "x" ] && checksumok=unknown
+	    echo "$version $shorthostname $abi $startdate $o $p try $checksum $checksumok $cycles $checksumcycles $cyclespersecond $impl $compilerword" >&5
+	    [ "$checksumok" = fails ] && continue
+
+	    [ -s ../bestmedian ] && [ `cat ../bestmedian` -le $cycles ] && continue
+	    echo "$cycles" > ../bestmedian
+
+	    $compiler -D'COMPILER="'"$compiler"'"' \
+	      -DLOOPS=1 \
+	      -I. -I"$include" -I"$include/$abi" \
+	      -o measure measure.c measure-anything.c \
+	      "$op.a" $libs >../errors 2>&1 || ok=0
+	    cat ../errors \
+	    | while read err
+	    do
+	      echo "$version $shorthostname $abi $startdate $o $p fromcompiler $implementationdir $compilerword measure.c $err" >&5
+	    done
+	    [ "$ok" = 1 ] || continue
+
+	    for f in $cppfiles
+	    do
+	      okcpp-$abi \
+	      | while read cppcompiler
+	      do
+	        echo "=== `date` === $abi $implementationdir $cppcompiler"
+	        $cppcompiler \
+		  -I. -I"$include" -I"$include/$abi" \
+		  -c "$f" && break
+	      done
+	    done
+
+	    rm -f ../best/*.o ../best/measure || continue
+	    for f in *.o
+	    do
+	      cp -p "$f" "../best/${opi}-$f"
+	    done
+	    cp -p "$op.h" "../$op.h"
+	    cp -p "$o.h" "../$o.h"
+	    cp -p measure ../best/measure
+	  done
+	)
+      done
+
+      echo "=== `date` === $abi $o/$p measuring"
+
+      "$work/best/measure" \
+      | while read measurement
+      do
+	echo "$version $shorthostname $abi $startdate $o $p $measurement" >&5
+      done
+
+      [ -f "$o/$p/used" ] \
+      && okar-$abi cr "$lib/$abi/lib${project}.a" "$work/best"/*.o \
+      && ( ranlib "$lib/$abi/lib${project}.a" || exit 0 ) \
+      && cp -p "$work/$op.h" "$include/$abi/$op.h" \
+      && [ -f "$o/$p/selected" ] \
+      && cp -p "$work/$o.h" "$include/$abi/$o.h" \
+      || :
+    done
+  done
+done
+
+for language in c cpp
+do
+  for bintype in commandline tests
+  do
+    ls $bintype \
+    | sed -n 's/\.'$language'$//p' \
+    | sort \
+    | while read cmd
+    do
+      echo "=== `date` === starting $bintype/$cmd"
+    
+      rm -rf "$work"
+      mkdir -p "$work/compile"
+    
+      cp "$bintype/$cmd.$language" "$work/compile/$cmd.$language"
+      [ "$bintype" = tests ] && cp -p "$bintype/$cmd.out" "$work/compile/$cmd.out"
+    
+      okabi \
+      | while read abi
+      do
+        [ -x "$bin/$cmd" ] && break
+    
+        libs=`"oklibs-$abi"`
+        libs="$lib/$abi/cpucycles.o $libs"
+        libs="$libs $lib/$abi/randombytes.o"
+    
+        ok${language}-$abi \
+        | while read compiler
+        do
+          [ -x "$bin/$cmd" ] && break
+    
+          echo "=== `date` === $bintype/$cmd $abi $compiler"
+          (
+            cd "$work/compile"
+            if $compiler \
+              -I"$include" -I"$include/$abi" \
+              -o "$cmd" "$cmd.${language}" \
+              "$lib/$abi/lib${project}.a" $libs
+	    then
+	      case "$bintype" in
+	        commandline) cp -p "$cmd" "$bin/$cmd" ;;
+		tests) "./$cmd" | cmp - "$cmd.out" || "./$cmd" ;;
+	      esac
+	    fi
+          )
+        done
+      done
+    done
+  done
+done
+
+echo "=== `date` === starting curvecp"
+
+okabi \
+| awk '
+  { if ($1=="amd64" || $1=="ia64" || $1=="ppc64" || $1=="sparcv9" || $1=="mips64") print 1,$1
+    else if ($1 == "mips32") print 2,$1
+    else print 3,$1
+  }
+' \
+| sort \
+| while read okabipriority abi
+do
+  [ -x "$bin/curvecpmessage" ] && break
+  libs=`"oklibs-$abi"`
+  libs="$lib/$abi/cpucycles.o $libs"
+  libs="$libs $lib/$abi/randombytes.o"
+
+  okc-$abi \
+  | while read compiler
+  do
+    [ -x "$bin/curvecpmessage" ] && break
+
+    echo "=== `date` === curvecp $abi $compiler"
+    rm -rf "$work"
+    mkdir -p "$work/compile"
+    cp curvecp/* "$work/compile"
+    (
+      cd "$work/compile"
+      cat SOURCES \
+      | while read x
+      do
+        $compiler -I"$include" -I"$include/$abi" -c "$x.c"
+      done
+
+      if okar-$abi cr curvecplibs.a `cat LIBS`
+      then
+        cat TARGETS \
+	| while read x
+	do
+	  $compiler -I"$include" -I"$include/$abi" \
+	  -o "$x" "$x.o" \
+	  curvecplibs.a "$lib/$abi/lib${project}.a" $libs \
+	  && cp -p "$x" "$bin/$x"
+	done
+      fi
+    )
+  done
+
+done
+
+echo "=== `date` === finishing"
diff --git a/nacl/nacl-20110221/inttypes/crypto_int16.c b/nacl/nacl-20110221/inttypes/crypto_int16.c
new file mode 100644
index 00000000..bc160669
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_int16.c
@@ -0,0 +1,3 @@
+#include "crypto_int16.h"
+#include "signed.h"
+DOIT(16,crypto_int16)
diff --git a/nacl/nacl-20110221/inttypes/crypto_int32.c b/nacl/nacl-20110221/inttypes/crypto_int32.c
new file mode 100644
index 00000000..520e6822
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_int32.c
@@ -0,0 +1,3 @@
+#include "crypto_int32.h"
+#include "signed.h"
+DOIT(32,crypto_int32)
diff --git a/nacl/nacl-20110221/inttypes/crypto_int64.c b/nacl/nacl-20110221/inttypes/crypto_int64.c
new file mode 100644
index 00000000..77e815bf
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_int64.c
@@ -0,0 +1,3 @@
+#include "crypto_int64.h"
+#include "signed.h"
+DOIT(64,crypto_int64)
diff --git a/nacl/nacl-20110221/inttypes/crypto_int8.c b/nacl/nacl-20110221/inttypes/crypto_int8.c
new file mode 100644
index 00000000..5966c62e
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_int8.c
@@ -0,0 +1,3 @@
+#include "crypto_int8.h"
+#include "signed.h"
+DOIT(8,crypto_int8)
diff --git a/nacl/nacl-20110221/inttypes/crypto_uint16.c b/nacl/nacl-20110221/inttypes/crypto_uint16.c
new file mode 100644
index 00000000..16ce4a69
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_uint16.c
@@ -0,0 +1,3 @@
+#include "crypto_uint16.h"
+#include "unsigned.h"
+DOIT(16,crypto_uint16)
diff --git a/nacl/nacl-20110221/inttypes/crypto_uint32.c b/nacl/nacl-20110221/inttypes/crypto_uint32.c
new file mode 100644
index 00000000..7050b573
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_uint32.c
@@ -0,0 +1,3 @@
+#include "crypto_uint32.h"
+#include "unsigned.h"
+DOIT(32,crypto_uint32)
diff --git a/nacl/nacl-20110221/inttypes/crypto_uint64.c b/nacl/nacl-20110221/inttypes/crypto_uint64.c
new file mode 100644
index 00000000..808055c7
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_uint64.c
@@ -0,0 +1,3 @@
+#include "crypto_uint64.h"
+#include "unsigned.h"
+DOIT(64,crypto_uint64)
diff --git a/nacl/nacl-20110221/inttypes/crypto_uint8.c b/nacl/nacl-20110221/inttypes/crypto_uint8.c
new file mode 100644
index 00000000..61683391
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/crypto_uint8.c
@@ -0,0 +1,3 @@
+#include "crypto_uint8.h"
+#include "unsigned.h"
+DOIT(8,crypto_uint8)
diff --git a/nacl/nacl-20110221/inttypes/do b/nacl/nacl-20110221/inttypes/do
new file mode 100644
index 00000000..af88b26a
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/do
@@ -0,0 +1,47 @@
+#!/bin/sh -e
+
+okabi | (
+  while read abi
+  do
+    (
+      echo 'int8 signed char'
+      echo 'int16 short'
+      echo 'int32 int'
+      echo 'int32 long'
+      echo 'int64 long long'
+      echo 'int64 long'
+      echo 'int64 int __attribute__((__mode__(__DI__)))'
+      echo 'uint8 unsigned char'
+      echo 'uint16 unsigned short'
+      echo 'uint32 unsigned int'
+      echo 'uint32 unsigned long'
+      echo 'uint64 unsigned long long'
+      echo 'uint64 unsigned long'
+      echo 'uint64 unsigned int __attribute__((__mode__(__DI__)))'
+    ) | (
+      while read target source
+      do
+        okc-$abi | (
+          while read c
+	  do
+            [ -f include/$abi/crypto_$target.h ] && continue
+            echo "=== `date` === $abi trying $source as $target under $c..." >&2
+            rm -f crypto_$target crypto_$target.h
+            (
+              echo "#ifndef crypto_${target}_h"
+              echo "#define crypto_${target}_h"
+              echo ""
+              echo "typedef ${source} crypto_${target};"
+              echo ""
+              echo "#endif"
+            ) > crypto_$target.h
+            $c -o crypto_$target crypto_$target.c || continue
+            ./crypto_$target || continue
+            mkdir -p include/$abi
+            cp crypto_$target.h include/$abi/crypto_$target.h
+	  done
+	)
+      done
+    )
+  done
+)
diff --git a/nacl/nacl-20110221/inttypes/signed.h b/nacl/nacl-20110221/inttypes/signed.h
new file mode 100644
index 00000000..92689ff8
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/signed.h
@@ -0,0 +1,17 @@
+#define DOIT(bits,target) \
+int main() \
+{ \
+  target x; \
+  int i; \
+ \
+  x = 1; \
+  for (i = 0;i < bits;++i) { \
+    if (x == 0) return 100; \
+    x += x; \
+  } \
+  if (x != 0) return 100; \
+  x -= 1; \
+  if (x > 0) return 100; \
+ \
+  return 0; \
+}
diff --git a/nacl/nacl-20110221/inttypes/unsigned.h b/nacl/nacl-20110221/inttypes/unsigned.h
new file mode 100644
index 00000000..31a7a6ea
--- /dev/null
+++ b/nacl/nacl-20110221/inttypes/unsigned.h
@@ -0,0 +1,17 @@
+#define DOIT(bits,target) \
+int main() \
+{ \
+  target x; \
+  int i; \
+ \
+  x = 1; \
+  for (i = 0;i < bits;++i) { \
+    if (x == 0) return 100; \
+    x += x; \
+  } \
+  if (x != 0) return 100; \
+  x -= 1; \
+  if (x < 0) return 100; \
+ \
+  return 0; \
+}
diff --git a/nacl/nacl-20110221/measure-anything.c b/nacl/nacl-20110221/measure-anything.c
new file mode 100644
index 00000000..32555060
--- /dev/null
+++ b/nacl/nacl-20110221/measure-anything.c
@@ -0,0 +1,225 @@
+/*
+ * measure-anything.c version 20090223
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include "cpucycles.h"
+#include "cpuid.h"
+
+typedef int uint32;
+
+static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ;
+static uint32 in[12];
+static uint32 out[8];
+static int outleft = 0;
+
+#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
+#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
+
+static void surf(void)
+{
+  uint32 t[12]; uint32 x; uint32 sum = 0;
+  int r; int i; int loop;
+
+  for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
+  for (i = 0;i < 8;++i) out[i] = seed[24 + i];
+  x = t[11];
+  for (loop = 0;loop < 2;++loop) {
+    for (r = 0;r < 16;++r) {
+      sum += 0x9e3779b9;
+      MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
+      MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
+      MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
+    }
+    for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
+  }
+}
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+  while (xlen > 0) {
+    if (!outleft) {
+      if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3];
+      surf();
+      outleft = 8;
+    }
+    *x = out[--outleft];
+    ++x;
+    --xlen;
+  }
+}
+
+extern const char *primitiveimplementation;
+extern const char *implementationversion;
+extern const char *sizenames[];
+extern const long long sizes[];
+extern void preallocate(void);
+extern void allocate(void);
+extern void measure(void);
+
+static void printword(const char *s)
+{
+  if (!*s) putchar('-');
+  while (*s) {
+    if (*s == ' ') putchar('_');
+    else if (*s == '\t') putchar('_');
+    else if (*s == '\r') putchar('_');
+    else if (*s == '\n') putchar('_');
+    else putchar(*s);
+    ++s;
+  }
+  putchar(' ');
+}
+
+static void printnum(long long x)
+{
+  printf("%lld ",x);
+}
+
+static void fail(const char *why)
+{
+  fprintf(stderr,"measure: fatal: %s\n",why);
+  exit(111);
+}
+
+unsigned char *alignedcalloc(unsigned long long len)
+{
+  unsigned char *x = (unsigned char *) calloc(1,len + 128);
+  if (!x) fail("out of memory");
+  /* will never deallocate so shifting is ok */
+  x += 63 & (-(unsigned long) x);
+  return x;
+}
+
+static long long cyclespersecond;
+
+static void printimplementations(void)
+{
+  int i;
+
+  printword("implementation");
+  printword(primitiveimplementation);
+  printword(implementationversion);
+  printf("\n"); fflush(stdout);
+
+  for (i = 0;sizenames[i];++i) {
+    printword(sizenames[i]);
+    printnum(sizes[i]);
+    printf("\n"); fflush(stdout);
+  }
+
+  printword("cpuid");
+  printword(cpuid);
+  printf("\n"); fflush(stdout);
+
+  printword("cpucycles_persecond");
+  printnum(cyclespersecond);
+  printf("\n"); fflush(stdout);
+
+  printword("cpucycles_implementation");
+  printword(cpucycles_implementation);
+  printf("\n"); fflush(stdout);
+
+  printword("compiler");
+  printword(COMPILER);
+#if defined(__VERSION__) && !defined(__ICC)
+  printword(__VERSION__);
+#elif defined(__xlc__)
+  printword(__xlc__);
+#elif defined(__ICC)
+  {
+    char buf[256];
+
+    sprintf(buf, "%d.%d.%d", __ICC/100, __ICC%100,
+            __INTEL_COMPILER_BUILD_DATE);
+    printword(buf);
+  }
+#elif defined(__PGIC__)
+  {
+    char buf[256];
+
+    sprintf(buf, "%d.%d.%d", __PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__);
+    printword(buf);
+  }
+#elif defined(__SUNPRO_C)
+  {
+    char buf[256];
+    int major, minor, micro;
+
+    micro = __SUNPRO_C & 0xf;
+    minor = (__SUNPRO_C >> 4) & 0xf;
+    major = (__SUNPRO_C >> 8) & 0xf;
+
+    if (micro)
+      sprintf(buf, "%d.%d.%d", major, minor, micro);
+    else
+      sprintf(buf, "%d.%d", major, minor);
+    printword(buf);
+  }
+#else
+  printword("unknown compiler version");
+#endif
+  printf("\n"); fflush(stdout);
+}
+
+void printentry(long long mbytes,const char *measuring,long long *m,long long mlen)
+{
+  long long i;
+  long long j;
+  long long belowj;
+  long long abovej;
+
+  printword(measuring);
+  if (mbytes >= 0) printnum(mbytes); else printword("");
+  if (mlen > 0) { 
+    for (j = 0;j + 1 < mlen;++j) { 
+      belowj = 0;
+      for (i = 0;i < mlen;++i) if (m[i] < m[j]) ++belowj;
+      abovej = 0;
+      for (i = 0;i < mlen;++i) if (m[i] > m[j]) ++abovej;
+      if (belowj * 2 < mlen && abovej * 2 < mlen) break;
+    } 
+    printnum(m[j]);
+    if (mlen > 1) { 
+      for (i = 0;i < mlen;++i) printnum(m[i]);
+    } 
+  } 
+  printf("\n"); fflush(stdout);
+}
+
+void limits()
+{
+#ifdef RLIM_INFINITY
+  struct rlimit r;
+  r.rlim_cur = 0;
+  r.rlim_max = 0;
+#ifdef RLIMIT_NOFILE
+  setrlimit(RLIMIT_NOFILE,&r);
+#endif
+#ifdef RLIMIT_NPROC
+  setrlimit(RLIMIT_NPROC,&r);
+#endif
+#ifdef RLIMIT_CORE
+  setrlimit(RLIMIT_CORE,&r);
+#endif
+#endif
+}
+
+int main()
+{
+  cyclespersecond = cpucycles_persecond();
+  preallocate();
+  limits();
+  printimplementations();
+  allocate();
+  measure();
+  return 0;
+}
diff --git a/nacl/nacl-20110221/okcompilers/abiname.c b/nacl/nacl-20110221/okcompilers/abiname.c
new file mode 100644
index 00000000..38373201
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/abiname.c
@@ -0,0 +1,45 @@
+#include <stdio.h>
+
+const char *abi(void)
+{
+#if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64)
+   return "amd64";
+#elif defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
+   return "x86";
+#elif defined(__ia64__) || defined(__IA64__) || defined(__M_IA64)
+   return "ia64";
+#elif defined(__SPU__)
+   return "cellspu";
+#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || defined(_ARCH_PPC64)
+   return "ppc64";
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC)
+   return "ppc32";
+#elif defined(__sparcv9__) || defined(__sparcv9)
+   return "sparcv9";
+#elif defined(__sparc_v8__)
+   return "sparcv8";
+#elif defined(__sparc__) || defined(__sparc)
+   if (sizeof(long) == 4) return "sparcv8";
+   return "sparcv9";
+#elif defined(__ARM_EABI__)
+   return "armeabi";
+#elif defined(__arm__)
+   return "arm";
+#elif defined(__mips__) || defined(__mips) || defined(__MIPS__)
+#  if defined(_ABIO32)
+     return "mipso32";
+#  elif defined(_ABIN32)
+     return "mips32";
+#  else
+     return "mips64";
+#  endif
+#else
+   return "default";
+#endif
+}
+
+int main(int argc,char **argv)
+{
+  printf("%s %s\n",argv[1],abi());
+  return 0;
+}
diff --git a/nacl/nacl-20110221/okcompilers/archivers b/nacl/nacl-20110221/okcompilers/archivers
new file mode 100644
index 00000000..d5851c37
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/archivers
@@ -0,0 +1,2 @@
+ar
+ar -X64
diff --git a/nacl/nacl-20110221/okcompilers/c b/nacl/nacl-20110221/okcompilers/c
new file mode 100644
index 00000000..7218da3a
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/c
@@ -0,0 +1,8 @@
+gcc -m64 -O3 -fomit-frame-pointer -funroll-loops
+gcc -m64 -O -fomit-frame-pointer
+gcc -m64 -fomit-frame-pointer
+gcc -m32 -O3 -fomit-frame-pointer -funroll-loops
+gcc -m32 -O -fomit-frame-pointer
+gcc -m32 -fomit-frame-pointer
+spu-gcc -mstdmain -march=cell -O3 -funroll-loops -fomit-frame-pointer -Drandom=rand -Dsrandom=srand
+spu-gcc -mstdmain -march=cell -O -fomit-frame-pointer -Drandom=rand -Dsrandom=srand
diff --git a/nacl/nacl-20110221/okcompilers/cpp b/nacl/nacl-20110221/okcompilers/cpp
new file mode 100644
index 00000000..d1b9ae6d
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/cpp
@@ -0,0 +1,8 @@
+g++ -m64 -O3 -fomit-frame-pointer -funroll-loops
+g++ -m64 -O -fomit-frame-pointer
+g++ -m64 -fomit-frame-pointer
+g++ -m32 -O3 -fomit-frame-pointer -funroll-loops
+g++ -m32 -O -fomit-frame-pointer
+g++ -m32 -fomit-frame-pointer
+spu-g++ -mstdmain -march=cell -O3 -funroll-loops -fomit-frame-pointer -Drandom=rand -Dsrandom=srand
+spu-g++ -mstdmain -march=cell -O -fomit-frame-pointer -Drandom=rand -Dsrandom=srand
diff --git a/nacl/nacl-20110221/okcompilers/do b/nacl/nacl-20110221/okcompilers/do
new file mode 100755
index 00000000..372b7e00
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/do
@@ -0,0 +1,196 @@
+#!/bin/sh -e
+
+mkdir oldbin
+mkdir bin
+
+for language in c cpp
+do
+  exec <$language
+  exec 9>${language}-works
+  
+  while read c options
+  do
+    echo "=== `date` === checking $c $options" >&2
+    rm -f test*
+    (
+      echo "#!/bin/sh"
+      echo 'PATH="'"$PATH"'"'
+      echo 'export PATH'
+      echo "$c" "$options" '"$@"'
+    ) > test-okc
+    chmod 755 test-okc
+    cat lib.c main.c > test.$language || continue
+    ./test-okc -o test test.$language || continue
+    ./test || continue
+    cp main.c test1.$language || continue
+    cp lib.c test2.$language || continue
+    ./test-okc -c test1.$language || continue
+    ./test-okc -c test2.$language || continue
+    ./test-okc -o test1 test1.o test2.o || continue
+    ./test1 || continue
+    echo "=== `date` === success: $c $options is ok"
+    echo "$c $options" >&9
+  done
+
+  mv ${language}-works $language
+done
+
+exec <c
+
+exec 7>oldbin/okabi
+chmod 755 oldbin/okabi
+echo "#!/bin/sh" >&7
+
+while :
+do
+  exec <c
+  read c options || break
+
+  for language in c cpp
+  do
+    exec 8>${language}-compatible
+    exec 9>${language}-incompatible
+    echo "=== `date` === checking compatibility with $c $options" >&2
+    exec <$language
+    while read c2 options2
+    do
+      echo "=== `date` === checking $c2 $options2" >&2
+      works=1
+      rm -f test*
+      (
+        echo "#!/bin/sh"
+        echo 'PATH="'"$PATH"'"'
+        echo 'export PATH'
+        echo "$c" "$options" '"$@"'
+      ) > test-okc
+      chmod 755 test-okc
+      (
+        echo "#!/bin/sh"
+        echo 'PATH="'"$PATH"'"'
+        echo 'export PATH'
+        echo "$c2" "$options2" '"$@"'
+      ) > test-okc2
+      chmod 755 test-okc2
+      if cp main.c test5.c \
+      && cp main.cpp test5.cpp \
+      && cp lib.c test6.c \
+      && ./test-okc2 -c test5.$language \
+      && ./test-okc -c test6.c \
+      && ./test-okc2 -o test5 test5.o test6.o \
+      && ./test5
+      then
+        echo "=== `date` === success: $c2 $options2 is compatible" >&2
+        echo "$c2 $options2" >&8
+      else
+        echo "$c2 $options2" >&9
+      fi
+    done
+  done
+
+  abi=`awk '{print length($0),$0}' < c-compatible \
+  | sort -n | head -1 | sed 's/ *$//' | sed 's/^[^ ]* //' | tr ' /' '__'`
+
+  echo "echo '"$abi"'" >&7
+
+  syslibs=""
+  for i in -lm -lnsl -lsocket -lrt
+  do
+    echo "=== `date` === checking $i" >&2
+    (
+      echo "#!/bin/sh"
+      echo 'PATH="'"$PATH"'"'
+      echo 'export PATH'
+      echo "$c" "$options" '"$@"' "$i" "$syslibs"
+    ) > test-okclink
+    chmod 755 test-okclink
+    cat lib.c main.c > test.c || continue
+    ./test-okclink -o test test.c $i $syslibs || continue
+    ./test || continue
+    syslibs="$i $syslibs"
+    (
+      echo '#!/bin/sh'
+      echo 'echo "'"$syslibs"'"'
+    ) > "oldbin/oklibs-$abi"
+    chmod 755 "oldbin/oklibs-$abi"
+  done
+
+  foundokar=0
+  exec <archivers
+  while read a
+  do
+    echo "=== `date` === checking archiver $a" >&2
+    (
+      echo "#!/bin/sh"
+      echo 'PATH="'"$PATH"'"'
+      echo 'export PATH'
+      echo "$a" '"$@"'
+    ) > test-okar
+    chmod 755 test-okar
+    cp main.c test9.c || continue
+    cp lib.c test10.c || continue
+    ./test-okc -c test10.c || continue
+    ./test-okar cr test10.a test10.o || continue
+    ranlib test10.a || echo "=== `date` === no ranlib; continuing anyway" >&2
+    ./test-okc -o test9 test9.c test10.a || continue
+    ./test9 || continue
+    cp -p test-okar "oldbin/okar-$abi"
+    echo "=== `date` === success: archiver $a is ok" >&2
+    foundokar=1
+    break
+  done
+
+  case $foundokar in
+    0)
+      echo "=== `date` === giving up; no archivers work" >&2
+      exit 111
+    ;;
+  esac
+
+  for language in c cpp
+  do
+    mv ${language}-incompatible ${language}
+    exec <${language}-compatible
+    exec 9>"oldbin/ok${language}-$abi"
+    chmod 755 "oldbin/ok${language}-$abi"
+  
+    echo "#!/bin/sh" >&9
+    while read c2 options2
+    do
+      echo "echo '"$c2 $options2"'" >&9
+    done
+  done
+done
+
+exec 7>/dev/null
+
+oldbin/okabi \
+| while read abi
+do
+  oldbin/okc-$abi \
+  | head -1 \
+  | while read c
+  do
+    $c -o abiname abiname.c \
+    && ./abiname "$abi"
+  done
+done > abinames
+
+numabinames=`awk '{print $2}' < abinames | sort -u | wc -l`
+numabis=`oldbin/okabi | wc -l`
+if [ "$numabis" = "$numabinames" ]
+then
+  exec <abinames
+  exec 7>bin/okabi
+  chmod 755 bin/okabi
+  echo '#!/bin/sh' >&7
+  while read oldabi newabi
+  do
+    mv "oldbin/okc-$oldabi" "bin/okc-$newabi"
+    mv "oldbin/okcpp-$oldabi" "bin/okcpp-$newabi"
+    mv "oldbin/okar-$oldabi" "bin/okar-$newabi"
+    mv "oldbin/oklibs-$oldabi" "bin/oklibs-$newabi"
+    echo "echo $newabi" >&7
+  done
+else
+  cp -p oldbin/* bin
+fi
diff --git a/nacl/nacl-20110221/okcompilers/lib.c b/nacl/nacl-20110221/okcompilers/lib.c
new file mode 100644
index 00000000..cf2e3790
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/lib.c
@@ -0,0 +1,29 @@
+int not3(int n)
+{
+  return n != 3;
+}
+
+int bytes(int n)
+{
+  return (n + 7) / 8;
+}
+
+long long shr32(long long n)
+{
+  return n >> 32;
+}
+
+double double5(void)
+{
+  return 5.0;
+}
+
+int intbytes(void)
+{
+  return sizeof(int);
+}
+
+int longbytes(void)
+{
+  return sizeof(long);
+}
diff --git a/nacl/nacl-20110221/okcompilers/lib.cpp b/nacl/nacl-20110221/okcompilers/lib.cpp
new file mode 100644
index 00000000..ea956244
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/lib.cpp
@@ -0,0 +1,19 @@
+int not3(int n)
+{
+  return n != 3;
+}
+
+int bytes(int n)
+{
+  return (n + 7) / 8;
+}
+
+long long shr32(long long n)
+{
+  return n >> 32;
+}
+
+double double5(void)
+{
+  return 5.0;
+}
diff --git a/nacl/nacl-20110221/okcompilers/main.c b/nacl/nacl-20110221/okcompilers/main.c
new file mode 100644
index 00000000..3b7efa25
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/main.c
@@ -0,0 +1,25 @@
+extern int not3(int);
+extern int bytes(int);
+extern long long shr32(long long);
+extern double double5(void);
+extern int longbytes(void);
+extern int intbytes(void);
+
+int main(int argc,char **argv)
+{
+  if (intbytes() != sizeof(int)) return 100;
+  if (longbytes() != sizeof(long)) return 100;
+
+  if (not3(3)) return 100;
+
+  /* on ppc32, gcc -mpowerpc64 produces SIGILL for >>32 */
+  if (!not3(shr32(1))) return 100;
+
+  /* on pentium 1, gcc -march=pentium2 produces SIGILL for (...+7)/8 */
+  if (bytes(not3(1)) != 1) return 100;
+
+  /* on pentium 1, gcc -march=prescott produces SIGILL for double comparison */
+  if (double5() < 0) return 100;
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/okcompilers/main.cpp b/nacl/nacl-20110221/okcompilers/main.cpp
new file mode 100644
index 00000000..6255102c
--- /dev/null
+++ b/nacl/nacl-20110221/okcompilers/main.cpp
@@ -0,0 +1,22 @@
+extern "C" {
+  extern int not3(int);
+  extern int bytes(int);
+  extern long long shr32(long long);
+  extern double double5(void);
+}
+
+int main(int argc,char **argv)
+{
+  if (not3(3)) return 100;
+
+  /* on ppc32, gcc -mpowerpc64 produces SIGILL for >>32 */
+  if (!not3(shr32(1))) return 100;
+
+  /* on pentium 1, gcc -march=pentium2 produces SIGILL for (...+7)/8 */
+  if (bytes(not3(1)) != 1) return 100;
+
+  /* on pentium 1, gcc -march=prescott produces SIGILL for double comparison */
+  if (double5() < 0) return 100;
+
+  return 0;
+}
diff --git a/nacl/nacl-20110221/randombytes/devurandom.c b/nacl/nacl-20110221/randombytes/devurandom.c
new file mode 100644
index 00000000..f3b8d418
--- /dev/null
+++ b/nacl/nacl-20110221/randombytes/devurandom.c
@@ -0,0 +1,34 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+/* it's really stupid that there isn't a syscall for this */
+
+static int fd = -1;
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+  int i;
+
+  if (fd == -1) {
+    for (;;) {
+      fd = open("/dev/urandom",O_RDONLY);
+      if (fd != -1) break;
+      sleep(1);
+    }
+  }
+
+  while (xlen > 0) {
+    if (xlen < 1048576) i = xlen; else i = 1048576;
+
+    i = read(fd,x,i);
+    if (i < 1) {
+      sleep(1);
+      continue;
+    }
+
+    x += i;
+    xlen -= i;
+  }
+}
diff --git a/nacl/nacl-20110221/randombytes/devurandom.h b/nacl/nacl-20110221/randombytes/devurandom.h
new file mode 100644
index 00000000..2e0caf8a
--- /dev/null
+++ b/nacl/nacl-20110221/randombytes/devurandom.h
@@ -0,0 +1,24 @@
+/*
+randombytes/devurandom.h version 20080713
+D. J. Bernstein
+Public domain.
+*/
+
+#ifndef randombytes_devurandom_H
+#define randombytes_devurandom_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void randombytes(unsigned char *,unsigned long long);
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifndef randombytes_implementation
+#define randombytes_implementation "devurandom"
+#endif
+
+#endif
diff --git a/nacl/nacl-20110221/randombytes/do b/nacl/nacl-20110221/randombytes/do
new file mode 100644
index 00000000..42586282
--- /dev/null
+++ b/nacl/nacl-20110221/randombytes/do
@@ -0,0 +1,43 @@
+#!/bin/sh -e
+
+okabi | (
+  while read abi
+  do
+
+    rm -f randombytes.o randombytes.h
+    
+    (
+      echo devurandom
+    ) | (
+      while read n
+      do
+        okc-$abi | (
+          while read c
+          do
+            echo "=== `date` === Trying $n.c with $c..." >&2
+            rm -f test randombytes-impl.o randombytes-impl.h randombytes-impl.c
+            cp $n.c randombytes-impl.c || continue
+            cp $n.h randombytes-impl.h || continue
+            $c -c randombytes-impl.c || continue
+            $c -o test test.c randombytes-impl.o || continue
+            ./test || continue
+            echo "=== `date` === Success. Using $n.c." >&2
+            mkdir -p lib/$abi
+            mv randombytes-impl.o lib/$abi/randombytes.o
+            mkdir -p include/$abi
+            mv randombytes-impl.h include/$abi/randombytes.h
+            exit 0
+          done
+          exit 111
+        ) && exit 0
+      done
+      exit 111
+    ) || (
+      echo ===== Giving up. >&2
+      rm -f test randombytes-impl.o randombytes-impl.h randombytes-impl.c
+      exit 111
+    ) || exit 111
+
+  done
+  exit 0
+) || exit 111
diff --git a/nacl/nacl-20110221/randombytes/test.c b/nacl/nacl-20110221/randombytes/test.c
new file mode 100644
index 00000000..646811ca
--- /dev/null
+++ b/nacl/nacl-20110221/randombytes/test.c
@@ -0,0 +1,15 @@
+#include "randombytes-impl.h"
+
+unsigned char x[65536];
+unsigned long long freq[256];
+
+int main()
+{
+  unsigned long long i;
+
+  randombytes(x,sizeof x);
+  for (i = 0;i < 256;++i) freq[i] = 0;
+  for (i = 0;i < sizeof x;++i) ++freq[255 & (int) x[i]];
+  for (i = 0;i < 256;++i) if (!freq[i]) return 111;
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth.c b/nacl/nacl-20110221/tests/auth.c
new file mode 100644
index 00000000..5086624e
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth.c
@@ -0,0 +1,19 @@
+#include <stdio.h>
+#include "crypto_auth_hmacsha512256.h"
+
+/* "Test Case 2" from RFC 4231 */
+unsigned char key[32] = "Jefe";
+unsigned char c[28] = "what do ya want for nothing?";
+
+unsigned char a[32];
+
+main()
+{
+  int i;
+  crypto_auth_hmacsha512256(a,c,sizeof c,key);
+  for (i = 0;i < 32;++i) {
+    printf(",0x%02x",(unsigned int) a[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth.out b/nacl/nacl-20110221/tests/auth.out
new file mode 100644
index 00000000..35e5909d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth.out
@@ -0,0 +1,4 @@
+,0x16,0x4b,0x7a,0x7b,0xfc,0xf8,0x19,0xe2
+,0xe3,0x95,0xfb,0xe7,0x3b,0x56,0xe0,0xa3
+,0x87,0xbd,0x64,0x22,0x2e,0x83,0x1f,0xd6
+,0x10,0x27,0x0c,0xd7,0xea,0x25,0x05,0x54
diff --git a/nacl/nacl-20110221/tests/auth2.c b/nacl/nacl-20110221/tests/auth2.c
new file mode 100644
index 00000000..ba191de4
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth2.c
@@ -0,0 +1,34 @@
+/* "Test Case AUTH256-4" from RFC 4868 */
+
+#include <stdio.h>
+#include "crypto_auth_hmacsha256.h"
+
+unsigned char key[32] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
+,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20
+} ;
+
+unsigned char c[50] = {
+ 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd
+} ;
+
+unsigned char a[32];
+
+main()
+{
+  int i;
+  crypto_auth_hmacsha256(a,c,sizeof c,key);
+  for (i = 0;i < 32;++i) {
+    printf(",0x%02x",(unsigned int) a[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth2.out b/nacl/nacl-20110221/tests/auth2.out
new file mode 100644
index 00000000..955951a2
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth2.out
@@ -0,0 +1,4 @@
+,0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2
+,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4
+,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25
+,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f
diff --git a/nacl/nacl-20110221/tests/auth3.c b/nacl/nacl-20110221/tests/auth3.c
new file mode 100644
index 00000000..b713b388
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth3.c
@@ -0,0 +1,34 @@
+/* "Test Case AUTH256-4" from RFC 4868 */
+
+#include <stdio.h>
+#include "crypto_auth_hmacsha256.h"
+
+unsigned char key[32] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
+,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20
+} ;
+
+unsigned char c[50] = {
+ 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd
+} ;
+
+unsigned char a[32] = {
+ 0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2
+,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4
+,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25
+,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f
+} ;
+
+main()
+{
+  printf("%d\n",crypto_auth_hmacsha256_verify(a,c,sizeof c,key));
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth3.out b/nacl/nacl-20110221/tests/auth3.out
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth3.out
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/tests/auth4.cpp b/nacl/nacl-20110221/tests/auth4.cpp
new file mode 100644
index 00000000..a94837d2
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth4.cpp
@@ -0,0 +1,44 @@
+/* "Test Case AUTH256-4" from RFC 4868 */
+
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_auth_hmacsha256.h"
+
+char key_bytes[32] = {
+ 0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08
+,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10
+,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18
+,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20
+} ;
+
+char c_bytes[50] = {
+ 0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd,0xcd
+,0xcd,0xcd
+} ;
+
+char a_bytes[32] = {
+ 0x37,0x2e,0xfc,0xf9,0xb4,0x0b,0x35,0xc2
+,0x11,0x5b,0x13,0x46,0x90,0x3d,0x2e,0xf4
+,0x2f,0xce,0xd4,0x6f,0x08,0x46,0xe7,0x25
+,0x7b,0xb1,0x56,0xd3,0xd7,0xb3,0x0d,0x3f
+} ;
+
+main()
+{
+  string key(key_bytes,sizeof key_bytes);
+  string c(c_bytes,sizeof c_bytes);
+  string a(a_bytes,sizeof a_bytes);
+  try {
+    crypto_auth_hmacsha256_verify(a,c,key);
+    printf("0\n");
+  } catch(const char *s) {
+    printf("%s\n",s);
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth4.out b/nacl/nacl-20110221/tests/auth4.out
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth4.out
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/tests/auth5.c b/nacl/nacl-20110221/tests/auth5.c
new file mode 100644
index 00000000..d304a073
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth5.c
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "crypto_auth_hmacsha512256.h"
+#include "randombytes.h"
+
+unsigned char key[32];
+unsigned char c[10000];
+unsigned char a[32];
+
+main()
+{
+  int clen;
+  int i;
+  for (clen = 0;clen < 10000;++clen) {
+    randombytes(key,sizeof key);
+    randombytes(c,clen);
+    crypto_auth_hmacsha512256(a,c,clen,key);
+    if (crypto_auth_hmacsha512256_verify(a,c,clen,key) != 0) {
+      printf("fail %d\n",clen);
+      return 100;
+    }
+    if (clen > 0) {
+      c[random() % clen] += 1 + (random() % 255);
+      if (crypto_auth_hmacsha512256_verify(a,c,clen,key) == 0) {
+        printf("forgery %d\n",clen);
+        return 100;
+      }
+      a[random() % sizeof a] += 1 + (random() % 255);
+      if (crypto_auth_hmacsha512256_verify(a,c,clen,key) == 0) {
+        printf("forgery %d\n",clen);
+        return 100;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth5.out b/nacl/nacl-20110221/tests/auth5.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/auth6.cpp b/nacl/nacl-20110221/tests/auth6.cpp
new file mode 100644
index 00000000..dffb6388
--- /dev/null
+++ b/nacl/nacl-20110221/tests/auth6.cpp
@@ -0,0 +1,46 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include <stdlib.h>
+#include "crypto_auth_hmacsha512256.h"
+#include "randombytes.h"
+
+main()
+{
+  int clen;
+  int i;
+  for (clen = 0;clen < 10000;++clen) {
+    unsigned char key_bytes[32];
+    randombytes(key_bytes,sizeof key_bytes);
+    string key((char *) key_bytes,sizeof key_bytes);
+    unsigned char c_bytes[clen];
+    randombytes(c_bytes,sizeof c_bytes);
+    string c((char *) c_bytes,sizeof c_bytes);
+    string a = crypto_auth_hmacsha512256(c,key);
+    try {
+      crypto_auth_hmacsha512256_verify(a,c,key);
+    } catch(const char *s) {
+      printf("fail %d %s\n",clen,s);
+      return 100;
+    }
+    if (clen > 0) {
+      size_t pos = random() % clen;
+      c.replace(pos,1,1,c[pos] + 1 + (random() % 255));
+      try {
+        crypto_auth_hmacsha512256_verify(a,c,key);
+	printf("forgery %d\n",clen);
+      } catch(const char *s) {
+        ;
+      }
+      pos = random() % a.size();
+      a.replace(pos,1,1,a[pos] + 1 + (random() % 255));
+      try {
+        crypto_auth_hmacsha512256_verify(a,c,key);
+	printf("forgery %d\n",clen);
+      } catch(const char *s) {
+        ;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/auth6.out b/nacl/nacl-20110221/tests/auth6.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/box.c b/nacl/nacl-20110221/tests/box.c
new file mode 100644
index 00000000..b57a9883
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box.c
@@ -0,0 +1,63 @@
+#include <stdio.h>
+#include "crypto_box_curve25519xsalsa20poly1305.h"
+
+unsigned char alicesk[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+unsigned char bobpk[32] = {
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+// API requires first 32 bytes to be 0
+unsigned char m[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+unsigned char c[163];
+
+main()
+{
+  int i;
+  crypto_box_curve25519xsalsa20poly1305(
+    c,m,163,nonce,bobpk,alicesk
+  );
+  for (i = 16;i < 163;++i) {
+    printf(",0x%02x",(unsigned int) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box.out b/nacl/nacl-20110221/tests/box.out
new file mode 100644
index 00000000..2b6c51ea
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box.out
@@ -0,0 +1,19 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/tests/box2.c b/nacl/nacl-20110221/tests/box2.c
new file mode 100644
index 00000000..0a531142
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box2.c
@@ -0,0 +1,64 @@
+#include <stdio.h>
+#include "crypto_box_curve25519xsalsa20poly1305.h"
+
+unsigned char bobsk[32] = {
+ 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b
+,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6
+,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd
+,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb
+} ;
+
+unsigned char alicepk[32] = {
+ 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54
+,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a
+,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4
+,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+// API requires first 16 bytes to be 0
+unsigned char c[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+unsigned char m[163];
+
+main()
+{
+  int i;
+  if (crypto_box_curve25519xsalsa20poly1305_open(
+       m,c,163,nonce,alicepk,bobsk
+     ) == 0) {
+    for (i = 32;i < 163;++i) {
+      printf(",0x%02x",(unsigned int) m[i]);
+      if (i % 8 == 7) printf("\n");
+    }
+    printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box2.out b/nacl/nacl-20110221/tests/box2.out
new file mode 100644
index 00000000..c61d4557
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box2.out
@@ -0,0 +1,17 @@
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
diff --git a/nacl/nacl-20110221/tests/box3.cpp b/nacl/nacl-20110221/tests/box3.cpp
new file mode 100644
index 00000000..db89dd03
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box3.cpp
@@ -0,0 +1,60 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_box_curve25519xsalsa20poly1305.h"
+
+char alicesk_bytes[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+char bobpk_bytes[32] = {
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+char m_bytes[131] = {
+ 0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+main()
+{
+  int i;
+  string m(m_bytes,sizeof m_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string bobpk(bobpk_bytes,sizeof bobpk_bytes);
+  string alicesk(alicesk_bytes,sizeof alicesk_bytes);
+  string c = crypto_box_curve25519xsalsa20poly1305(m,nonce,bobpk,alicesk);
+  for (i = 0;i < c.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box3.out b/nacl/nacl-20110221/tests/box3.out
new file mode 100644
index 00000000..2b6c51ea
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box3.out
@@ -0,0 +1,19 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/tests/box4.cpp b/nacl/nacl-20110221/tests/box4.cpp
new file mode 100644
index 00000000..7f48fcd6
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box4.cpp
@@ -0,0 +1,66 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_box_curve25519xsalsa20poly1305.h"
+
+char bobsk_bytes[32] = {
+ 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b
+,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6
+,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd
+,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb
+} ;
+
+char alicepk_bytes[32] = {
+ 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54
+,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a
+,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4
+,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+char c_bytes[147] = {
+ 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+main()
+{
+  int i;
+  string c(c_bytes,sizeof c_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string alicepk(alicepk_bytes,sizeof alicepk_bytes);
+  string bobsk(bobsk_bytes,sizeof bobsk_bytes);
+  try {
+    string m = crypto_box_curve25519xsalsa20poly1305_open(c,nonce,alicepk,bobsk);
+    for (i = 0;i < m.size();++i) {
+      printf(",0x%02x",(unsigned int) (unsigned char) m[i]);
+      if (i % 8 == 7) printf("\n");
+    }
+    printf("\n");
+  } catch(const char *s) {
+    printf("%s\n",s);
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box4.out b/nacl/nacl-20110221/tests/box4.out
new file mode 100644
index 00000000..c61d4557
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box4.out
@@ -0,0 +1,17 @@
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
diff --git a/nacl/nacl-20110221/tests/box5.cpp b/nacl/nacl-20110221/tests/box5.cpp
new file mode 100644
index 00000000..366e2e30
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box5.cpp
@@ -0,0 +1,30 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_box.h"
+#include "randombytes.h"
+
+main()
+{
+  int mlen;
+  for (mlen = 0;mlen < 1000;++mlen) {
+    string alicesk;
+    string alicepk = crypto_box_keypair(&alicesk);
+    string bobsk;
+    string bobpk = crypto_box_keypair(&bobsk);
+    unsigned char nbytes[crypto_box_NONCEBYTES];
+    randombytes(nbytes,crypto_box_NONCEBYTES);
+    string n((char *) nbytes,crypto_box_NONCEBYTES);
+    unsigned char mbytes[mlen];
+    randombytes(mbytes,mlen);
+    string m((char *) mbytes,mlen);
+    string c = crypto_box(m,n,bobpk,alicesk);
+    try {
+      string m2 = crypto_box_open(c,n,alicepk,bobsk);
+      if (m != m2) printf("bad decryption\n");
+    } catch(const char *s) {
+      printf("%s\n",s);
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box5.out b/nacl/nacl-20110221/tests/box5.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/box6.cpp b/nacl/nacl-20110221/tests/box6.cpp
new file mode 100644
index 00000000..bab18105
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box6.cpp
@@ -0,0 +1,43 @@
+#include <string>
+using std::string;
+#include <stdlib.h>
+#include <stdio.h>
+#include "crypto_box.h"
+#include "randombytes.h"
+
+main()
+{
+  int mlen;
+  for (mlen = 0;mlen < 1000;++mlen) {
+    string alicesk;
+    string alicepk = crypto_box_keypair(&alicesk);
+    string bobsk;
+    string bobpk = crypto_box_keypair(&bobsk);
+    unsigned char nbytes[crypto_box_NONCEBYTES];
+    randombytes(nbytes,crypto_box_NONCEBYTES);
+    string n((char *) nbytes,crypto_box_NONCEBYTES);
+    unsigned char mbytes[mlen];
+    randombytes(mbytes,mlen);
+    string m((char *) mbytes,mlen);
+    string c = crypto_box(m,n,bobpk,alicesk);
+    int caught = 0;
+    while (caught < 10) {
+      c.replace(random() % c.size(),1,1,random());
+      try {
+        string m2 = crypto_box_open(c,n,alicepk,bobsk);
+        if (m != m2) {
+	  printf("forgery\n");
+	  return 100;
+        }
+      } catch(const char *s) {
+	if (string(s) == string("ciphertext fails verification"))
+	  ++caught;
+	else {
+	  printf("%s\n",s);
+	  return 111;
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box6.out b/nacl/nacl-20110221/tests/box6.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/box7.c b/nacl/nacl-20110221/tests/box7.c
new file mode 100644
index 00000000..809301c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box7.c
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include "crypto_box.h"
+#include "randombytes.h"
+
+unsigned char alicesk[crypto_box_SECRETKEYBYTES];
+unsigned char alicepk[crypto_box_PUBLICKEYBYTES];
+unsigned char bobsk[crypto_box_SECRETKEYBYTES];
+unsigned char bobpk[crypto_box_PUBLICKEYBYTES];
+unsigned char n[crypto_box_NONCEBYTES];
+unsigned char m[10000];
+unsigned char c[10000];
+unsigned char m2[10000];
+
+main()
+{
+  int mlen;
+  int i;
+
+  for (mlen = 0;mlen < 1000 && mlen + crypto_box_ZEROBYTES < sizeof m;++mlen) {
+    crypto_box_keypair(alicepk,alicesk);
+    crypto_box_keypair(bobpk,bobsk);
+    randombytes(n,crypto_box_NONCEBYTES);
+    randombytes(m + crypto_box_ZEROBYTES,mlen);
+    crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,bobpk,alicesk);
+    if (crypto_box_open(m2,c,mlen + crypto_box_ZEROBYTES,n,alicepk,bobsk) == 0) {
+      for (i = 0;i < mlen + crypto_box_ZEROBYTES;++i)
+        if (m2[i] != m[i]) {
+	  printf("bad decryption\n");
+	  break;
+	}
+    } else {
+      printf("ciphertext fails verification\n");
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box7.out b/nacl/nacl-20110221/tests/box7.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/box8.c b/nacl/nacl-20110221/tests/box8.c
new file mode 100644
index 00000000..dac676ef
--- /dev/null
+++ b/nacl/nacl-20110221/tests/box8.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include "crypto_box.h"
+#include "randombytes.h"
+
+unsigned char alicesk[crypto_box_SECRETKEYBYTES];
+unsigned char alicepk[crypto_box_PUBLICKEYBYTES];
+unsigned char bobsk[crypto_box_SECRETKEYBYTES];
+unsigned char bobpk[crypto_box_PUBLICKEYBYTES];
+unsigned char n[crypto_box_NONCEBYTES];
+unsigned char m[10000];
+unsigned char c[10000];
+unsigned char m2[10000];
+
+main()
+{
+  int mlen;
+  int i;
+  int caught;
+
+  for (mlen = 0;mlen < 1000 && mlen + crypto_box_ZEROBYTES < sizeof m;++mlen) {
+    crypto_box_keypair(alicepk,alicesk);
+    crypto_box_keypair(bobpk,bobsk);
+    randombytes(n,crypto_box_NONCEBYTES);
+    randombytes(m + crypto_box_ZEROBYTES,mlen);
+    crypto_box(c,m,mlen + crypto_box_ZEROBYTES,n,bobpk,alicesk);
+    caught = 0;
+    while (caught < 10) {
+      c[random() % (mlen + crypto_box_ZEROBYTES)] = random();
+      if (crypto_box_open(m2,c,mlen + crypto_box_ZEROBYTES,n,alicepk,bobsk) == 0) {
+        for (i = 0;i < mlen + crypto_box_ZEROBYTES;++i)
+          if (m2[i] != m[i]) {
+	    printf("forgery\n");
+	    return 100;
+	  }
+      } else {
+        ++caught;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/box8.out b/nacl/nacl-20110221/tests/box8.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/core1.c b/nacl/nacl-20110221/tests/core1.c
new file mode 100644
index 00000000..9a8fc51d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core1.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include "crypto_core_hsalsa20.h"
+
+unsigned char shared[32] = {
+ 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1
+,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25
+,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33
+,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42
+} ;
+
+unsigned char zero[32] = { 0 };
+
+unsigned char c[16] = {
+ 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33
+,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b
+} ;
+
+unsigned char firstkey[32];
+
+main()
+{
+  int i;
+  crypto_core_hsalsa20(firstkey,zero,shared,c);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) firstkey[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core1.out b/nacl/nacl-20110221/tests/core1.out
new file mode 100644
index 00000000..715a489d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core1.out
@@ -0,0 +1,4 @@
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
diff --git a/nacl/nacl-20110221/tests/core2.c b/nacl/nacl-20110221/tests/core2.c
new file mode 100644
index 00000000..08402285
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core2.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include "crypto_core_hsalsa20.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonceprefix[16] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+} ;
+
+unsigned char c[16] = {
+ 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33
+,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b
+} ;
+
+unsigned char secondkey[32];
+
+main()
+{
+  int i;
+  crypto_core_hsalsa20(secondkey,nonceprefix,firstkey,c);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) secondkey[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core2.out b/nacl/nacl-20110221/tests/core2.out
new file mode 100644
index 00000000..f4682af0
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core2.out
@@ -0,0 +1,4 @@
+ 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9
+,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88
+,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9
+,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
diff --git a/nacl/nacl-20110221/tests/core3.c b/nacl/nacl-20110221/tests/core3.c
new file mode 100644
index 00000000..4c759a5b
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core3.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include "crypto_core_salsa20.h"
+#include "crypto_hash_sha256.h"
+
+unsigned char secondkey[32] = {
+ 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9
+,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88
+,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9
+,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
+} ;
+
+unsigned char noncesuffix[8] = {
+ 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+unsigned char c[16] = {
+ 0x65,0x78,0x70,0x61,0x6e,0x64,0x20,0x33
+,0x32,0x2d,0x62,0x79,0x74,0x65,0x20,0x6b
+} ;
+
+unsigned char in[16] = { 0 } ;
+
+unsigned char output[64 * 256 * 256];
+
+unsigned char h[32];
+
+main()
+{
+  int i;
+  long long pos = 0;
+  for (i = 0;i < 8;++i) in[i] = noncesuffix[i];
+  do {
+    do {
+      crypto_core_salsa20(output + pos,in,secondkey,c);
+      pos += 64;
+    } while (++in[8]);
+  } while (++in[9]);
+  crypto_hash_sha256(h,output,sizeof output);
+  for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core3.out b/nacl/nacl-20110221/tests/core3.out
new file mode 100644
index 00000000..5fa208c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core3.out
@@ -0,0 +1 @@
+662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2
diff --git a/nacl/nacl-20110221/tests/core4.c b/nacl/nacl-20110221/tests/core4.c
new file mode 100644
index 00000000..1f238c5e
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core4.c
@@ -0,0 +1,33 @@
+#include <stdio.h>
+#include "crypto_core_salsa20.h"
+
+unsigned char k[32] = {
+   1,  2,  3,  4,  5,  6,  7,  8
+,  9, 10, 11, 12, 13, 14, 15, 16
+,201,202,203,204,205,206,207,208
+,209,210,211,212,213,214,215,216
+} ;
+
+unsigned char in[16] = {
+ 101,102,103,104,105,106,107,108
+,109,110,111,112,113,114,115,116
+} ;
+
+unsigned char c[16] = {
+ 101,120,112, 97,110,100, 32, 51
+, 50, 45, 98,121,116,101, 32,107
+} ;
+
+unsigned char out[64];
+
+main()
+{
+  int i;
+  crypto_core_salsa20(out,in,k,c);
+  for (i = 0;i < 64;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("%3d",(unsigned int) out[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core4.out b/nacl/nacl-20110221/tests/core4.out
new file mode 100644
index 00000000..d04e5b5e
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core4.out
@@ -0,0 +1,8 @@
+  69, 37, 68, 39, 41, 15,107,193
+,255,139,122,  6,170,233,217, 98
+, 89,144,182,106, 21, 51,200, 65
+,239, 49,222, 34,215,114, 40,126
+,104,197,  7,225,197,153, 31,  2
+,102, 78, 76,176, 84,245,246,184
+,177,160,133,130,  6, 72,149,119
+,192,195,132,236,234,103,246, 74
diff --git a/nacl/nacl-20110221/tests/core5.c b/nacl/nacl-20110221/tests/core5.c
new file mode 100644
index 00000000..6353477d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core5.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include "crypto_core_hsalsa20.h"
+
+unsigned char k[32] = {
+ 0xee,0x30,0x4f,0xca,0x27,0x00,0x8d,0x8c
+,0x12,0x6f,0x90,0x02,0x79,0x01,0xd8,0x0f
+,0x7f,0x1d,0x8b,0x8d,0xc9,0x36,0xcf,0x3b
+,0x9f,0x81,0x96,0x92,0x82,0x7e,0x57,0x77
+} ;
+
+unsigned char in[16] = {
+ 0x81,0x91,0x8e,0xf2,0xa5,0xe0,0xda,0x9b
+,0x3e,0x90,0x60,0x52,0x1e,0x4b,0xb3,0x52
+} ;
+
+unsigned char c[16] = {
+ 101,120,112, 97,110,100, 32, 51
+, 50, 45, 98,121,116,101, 32,107
+} ;
+
+unsigned char out[32];
+
+main()
+{
+  int i;
+  crypto_core_hsalsa20(out,in,k,c);
+  for (i = 0;i < 32;++i) {
+    printf(",0x%02x",(unsigned int) out[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core5.out b/nacl/nacl-20110221/tests/core5.out
new file mode 100644
index 00000000..562cf717
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core5.out
@@ -0,0 +1,4 @@
+,0xbc,0x1b,0x30,0xfc,0x07,0x2c,0xc1,0x40
+,0x75,0xe4,0xba,0xa7,0x31,0xb5,0xa8,0x45
+,0xea,0x9b,0x11,0xe9,0xa5,0x19,0x1f,0x94
+,0xe1,0x8c,0xba,0x8f,0xd8,0x21,0xa7,0xcd
diff --git a/nacl/nacl-20110221/tests/core6.c b/nacl/nacl-20110221/tests/core6.c
new file mode 100644
index 00000000..67f35df9
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core6.c
@@ -0,0 +1,47 @@
+#include <stdio.h>
+#include "crypto_core_salsa20.h"
+
+unsigned char k[32] = {
+ 0xee,0x30,0x4f,0xca,0x27,0x00,0x8d,0x8c
+,0x12,0x6f,0x90,0x02,0x79,0x01,0xd8,0x0f
+,0x7f,0x1d,0x8b,0x8d,0xc9,0x36,0xcf,0x3b
+,0x9f,0x81,0x96,0x92,0x82,0x7e,0x57,0x77
+} ;
+
+unsigned char in[16] = {
+ 0x81,0x91,0x8e,0xf2,0xa5,0xe0,0xda,0x9b
+,0x3e,0x90,0x60,0x52,0x1e,0x4b,0xb3,0x52
+} ;
+
+unsigned char c[16] = {
+ 101,120,112, 97,110,100, 32, 51
+, 50, 45, 98,121,116,101, 32,107
+} ;
+
+unsigned char out[64];
+
+void print(unsigned char *x,unsigned char *y)
+{
+  int i;
+  unsigned int borrow = 0;
+  for (i = 0;i < 4;++i) {
+    unsigned int xi = x[i];
+    unsigned int yi = y[i];
+    printf(",0x%02x",255 & (xi - yi - borrow));
+    borrow = (xi < yi + borrow);
+  }
+}
+
+main()
+{
+  crypto_core_salsa20(out,in,k,c);
+  print(out,c);
+  print(out + 20,c + 4); printf("\n");
+  print(out + 40,c + 8);
+  print(out + 60,c + 12); printf("\n");
+  print(out + 24,in);
+  print(out + 28,in + 4); printf("\n");
+  print(out + 32,in + 8);
+  print(out + 36,in + 12); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/core6.out b/nacl/nacl-20110221/tests/core6.out
new file mode 100644
index 00000000..562cf717
--- /dev/null
+++ b/nacl/nacl-20110221/tests/core6.out
@@ -0,0 +1,4 @@
+,0xbc,0x1b,0x30,0xfc,0x07,0x2c,0xc1,0x40
+,0x75,0xe4,0xba,0xa7,0x31,0xb5,0xa8,0x45
+,0xea,0x9b,0x11,0xe9,0xa5,0x19,0x1f,0x94
+,0xe1,0x8c,0xba,0x8f,0xd8,0x21,0xa7,0xcd
diff --git a/nacl/nacl-20110221/tests/hash.c b/nacl/nacl-20110221/tests/hash.c
new file mode 100644
index 00000000..8de470aa
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash.c
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include "crypto_hash.h"
+
+unsigned char x[8] = "testing\n";
+unsigned char h[crypto_hash_BYTES];
+
+int main()
+{
+  int i;
+  crypto_hash(h,x,sizeof x);
+  for (i = 0;i < crypto_hash_BYTES;++i) printf("%02x",(unsigned int) h[i]);
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/hash.out b/nacl/nacl-20110221/tests/hash.out
new file mode 100644
index 00000000..df582172
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash.out
@@ -0,0 +1 @@
+24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28
diff --git a/nacl/nacl-20110221/tests/hash2.cpp b/nacl/nacl-20110221/tests/hash2.cpp
new file mode 100644
index 00000000..6594620d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash2.cpp
@@ -0,0 +1,18 @@
+#include <iostream>
+#include <string>
+using std::string;
+using std::cout;
+using std::hex;
+#include "crypto_hash.h"
+
+int main()
+{
+  string x = "testing\n";
+  string h = crypto_hash(x);
+  for (int i = 0;i < h.size();++i) {
+    cout << hex << (15 & (int) (h[i] >> 4));
+    cout << hex << (15 & (int) h[i]);
+  }
+  cout << "\n";
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/hash2.out b/nacl/nacl-20110221/tests/hash2.out
new file mode 100644
index 00000000..df582172
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash2.out
@@ -0,0 +1 @@
+24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28
diff --git a/nacl/nacl-20110221/tests/hash3.c b/nacl/nacl-20110221/tests/hash3.c
new file mode 100644
index 00000000..10b89b90
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash3.c
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include "crypto_hash_sha512.h"
+
+unsigned char x[8] = "testing\n";
+unsigned char h[crypto_hash_sha512_BYTES];
+
+int main()
+{
+  int i;
+  crypto_hash_sha512(h,x,sizeof x);
+  for (i = 0;i < crypto_hash_sha512_BYTES;++i) printf("%02x",(unsigned int) h[i]);
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/hash3.out b/nacl/nacl-20110221/tests/hash3.out
new file mode 100644
index 00000000..df582172
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash3.out
@@ -0,0 +1 @@
+24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28
diff --git a/nacl/nacl-20110221/tests/hash4.cpp b/nacl/nacl-20110221/tests/hash4.cpp
new file mode 100644
index 00000000..1d0a3f37
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash4.cpp
@@ -0,0 +1,18 @@
+#include <iostream>
+#include <string>
+using std::string;
+using std::cout;
+using std::hex;
+#include "crypto_hash_sha512.h"
+
+int main()
+{
+  string x = "testing\n";
+  string h = crypto_hash_sha512(x);
+  for (int i = 0;i < h.size();++i) {
+    cout << hex << (15 & (int) (h[i] >> 4));
+    cout << hex << (15 & (int) h[i]);
+  }
+  cout << "\n";
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/hash4.out b/nacl/nacl-20110221/tests/hash4.out
new file mode 100644
index 00000000..df582172
--- /dev/null
+++ b/nacl/nacl-20110221/tests/hash4.out
@@ -0,0 +1 @@
+24f950aac7b9ea9b3cb728228a0c82b67c39e96b4b344798870d5daee93e3ae5931baae8c7cacfea4b629452c38026a81d138bc7aad1af3ef7bfd5ec646d6c28
diff --git a/nacl/nacl-20110221/tests/onetimeauth.c b/nacl/nacl-20110221/tests/onetimeauth.c
new file mode 100644
index 00000000..60a2df14
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth.c
@@ -0,0 +1,42 @@
+#include <stdio.h>
+#include "crypto_onetimeauth_poly1305.h"
+
+unsigned char rs[32] = {
+ 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
+} ;
+
+unsigned char c[131] = {
+ 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+unsigned char a[16];
+
+main()
+{
+  int i;
+  crypto_onetimeauth_poly1305(a,c,131,rs);
+  for (i = 0;i < 16;++i) {
+    printf(",0x%02x",(unsigned int) a[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth.out b/nacl/nacl-20110221/tests/onetimeauth.out
new file mode 100644
index 00000000..6d914615
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth.out
@@ -0,0 +1,2 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
diff --git a/nacl/nacl-20110221/tests/onetimeauth2.c b/nacl/nacl-20110221/tests/onetimeauth2.c
new file mode 100644
index 00000000..64c1a9cd
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth2.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+#include "crypto_onetimeauth_poly1305.h"
+
+unsigned char rs[32] = {
+ 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
+} ;
+
+unsigned char c[131] = {
+ 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+unsigned char a[16] = {
+ 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+} ;
+
+main()
+{
+  printf("%d\n",crypto_onetimeauth_poly1305_verify(a,c,131,rs));
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth2.out b/nacl/nacl-20110221/tests/onetimeauth2.out
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth2.out
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/tests/onetimeauth5.cpp b/nacl/nacl-20110221/tests/onetimeauth5.cpp
new file mode 100644
index 00000000..884892ac
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth5.cpp
@@ -0,0 +1,46 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_onetimeauth_poly1305.h"
+
+char rs_bytes[32] = {
+ 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
+} ;
+
+char c_bytes[131] = {
+ 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+unsigned char a[16];
+
+main()
+{
+  int i;
+  string c(c_bytes,sizeof c_bytes);
+  string rs(rs_bytes,sizeof rs_bytes);
+  string a = crypto_onetimeauth_poly1305(c,rs);
+  for (i = 0;i < a.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) a[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth5.out b/nacl/nacl-20110221/tests/onetimeauth5.out
new file mode 100644
index 00000000..6d914615
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth5.out
@@ -0,0 +1,2 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
diff --git a/nacl/nacl-20110221/tests/onetimeauth6.cpp b/nacl/nacl-20110221/tests/onetimeauth6.cpp
new file mode 100644
index 00000000..d79d4613
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth6.cpp
@@ -0,0 +1,50 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_onetimeauth_poly1305.h"
+
+char rs_bytes[32] = {
+ 0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
+} ;
+
+char c_bytes[131] = {
+ 0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+char a_bytes[16] = {
+ 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+} ;
+
+main()
+{
+  string rs(rs_bytes,sizeof rs_bytes);
+  string c(c_bytes,sizeof c_bytes);
+  string a(a_bytes,sizeof a_bytes);
+  try {
+    crypto_onetimeauth_poly1305_verify(a,c,rs);
+    printf("0\n");
+  } catch(const char *s) {
+    printf("%s\n",s);
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth6.out b/nacl/nacl-20110221/tests/onetimeauth6.out
new file mode 100644
index 00000000..573541ac
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth6.out
@@ -0,0 +1 @@
+0
diff --git a/nacl/nacl-20110221/tests/onetimeauth7.c b/nacl/nacl-20110221/tests/onetimeauth7.c
new file mode 100644
index 00000000..349b8751
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth7.c
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "crypto_onetimeauth_poly1305.h"
+#include "randombytes.h"
+
+unsigned char key[32];
+unsigned char c[10000];
+unsigned char a[16];
+
+main()
+{
+  int clen;
+  int i;
+  for (clen = 0;clen < 10000;++clen) {
+    randombytes(key,sizeof key);
+    randombytes(c,clen);
+    crypto_onetimeauth_poly1305(a,c,clen,key);
+    if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) != 0) {
+      printf("fail %d\n",clen);
+      return 100;
+    }
+    if (clen > 0) {
+      c[random() % clen] += 1 + (random() % 255);
+      if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) == 0) {
+        printf("forgery %d\n",clen);
+        return 100;
+      }
+      a[random() % sizeof a] += 1 + (random() % 255);
+      if (crypto_onetimeauth_poly1305_verify(a,c,clen,key) == 0) {
+        printf("forgery %d\n",clen);
+        return 100;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth7.out b/nacl/nacl-20110221/tests/onetimeauth7.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/onetimeauth8.cpp b/nacl/nacl-20110221/tests/onetimeauth8.cpp
new file mode 100644
index 00000000..ce554fb4
--- /dev/null
+++ b/nacl/nacl-20110221/tests/onetimeauth8.cpp
@@ -0,0 +1,46 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include <stdlib.h>
+#include "crypto_onetimeauth_poly1305.h"
+#include "randombytes.h"
+
+main()
+{
+  int clen;
+  int i;
+  for (clen = 0;clen < 10000;++clen) {
+    unsigned char key_bytes[32];
+    randombytes(key_bytes,sizeof key_bytes);
+    string key((char *) key_bytes,sizeof key_bytes);
+    unsigned char c_bytes[clen];
+    randombytes(c_bytes,sizeof c_bytes);
+    string c((char *) c_bytes,sizeof c_bytes);
+    string a = crypto_onetimeauth_poly1305(c,key);
+    try {
+      crypto_onetimeauth_poly1305_verify(a,c,key);
+    } catch(const char *s) {
+      printf("fail %d %s\n",clen,s);
+      return 100;
+    }
+    if (clen > 0) {
+      size_t pos = random() % clen;
+      c.replace(pos,1,1,c[pos] + 1 + (random() % 255));
+      try {
+        crypto_onetimeauth_poly1305_verify(a,c,key);
+	printf("forgery %d\n",clen);
+      } catch(const char *s) {
+        ;
+      }
+      pos = random() % a.size();
+      a.replace(pos,1,1,a[pos] + 1 + (random() % 255));
+      try {
+        crypto_onetimeauth_poly1305_verify(a,c,key);
+	printf("forgery %d\n",clen);
+      } catch(const char *s) {
+        ;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/onetimeauth8.out b/nacl/nacl-20110221/tests/onetimeauth8.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/scalarmult.c b/nacl/nacl-20110221/tests/scalarmult.c
new file mode 100644
index 00000000..d9265954
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include "crypto_scalarmult_curve25519.h"
+
+unsigned char alicesk[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+unsigned char alicepk[32];
+
+main()
+{
+  int i;
+  crypto_scalarmult_curve25519_base(alicepk,alicesk);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) alicepk[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult.out b/nacl/nacl-20110221/tests/scalarmult.out
new file mode 100644
index 00000000..ddd130d6
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult.out
@@ -0,0 +1,4 @@
+ 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54
+,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a
+,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4
+,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
diff --git a/nacl/nacl-20110221/tests/scalarmult2.c b/nacl/nacl-20110221/tests/scalarmult2.c
new file mode 100644
index 00000000..90e6360d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult2.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include "crypto_scalarmult_curve25519.h"
+
+unsigned char bobsk[32] = {
+ 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b
+,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6
+,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd
+,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb
+} ;
+
+unsigned char bobpk[32];
+
+main()
+{
+  int i;
+  crypto_scalarmult_curve25519_base(bobpk,bobsk);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) bobpk[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult2.out b/nacl/nacl-20110221/tests/scalarmult2.out
new file mode 100644
index 00000000..b5391865
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult2.out
@@ -0,0 +1,4 @@
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
diff --git a/nacl/nacl-20110221/tests/scalarmult3.cpp b/nacl/nacl-20110221/tests/scalarmult3.cpp
new file mode 100644
index 00000000..4e8fef3d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult3.cpp
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <iomanip>
+#include <string>
+using std::string;
+using std::cout;
+using std::setfill;
+using std::setw;
+using std::hex;
+#include "crypto_scalarmult_curve25519.h"
+
+char alicesk_bytes[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+main()
+{
+  int i;
+  cout << setfill('0');
+  string alicesk(alicesk_bytes,sizeof alicesk_bytes);
+  string alicepk = crypto_scalarmult_curve25519_base(alicesk);
+  for (i = 0;i < alicepk.size();++i) {
+    unsigned char c = alicepk[i];
+    if (i > 0) cout << ","; else cout << " ";
+    cout << "0x" << hex << setw(2) << (unsigned int) c;
+    if (i % 8 == 7) cout << "\n";
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult3.out b/nacl/nacl-20110221/tests/scalarmult3.out
new file mode 100644
index 00000000..ddd130d6
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult3.out
@@ -0,0 +1,4 @@
+ 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54
+,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a
+,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4
+,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
diff --git a/nacl/nacl-20110221/tests/scalarmult4.cpp b/nacl/nacl-20110221/tests/scalarmult4.cpp
new file mode 100644
index 00000000..8e4d64e9
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult4.cpp
@@ -0,0 +1,31 @@
+#include <iostream>
+#include <iomanip>
+#include <string>
+using std::string;
+using std::cout;
+using std::setfill;
+using std::setw;
+using std::hex;
+#include "crypto_scalarmult_curve25519.h"
+
+char bobsk_bytes[32] = {
+ 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b
+,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6
+,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd
+,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb
+} ;
+
+main()
+{
+  int i;
+  cout << setfill('0');
+  string bobsk(bobsk_bytes,sizeof bobsk_bytes);
+  string bobpk = crypto_scalarmult_curve25519_base(bobsk);
+  for (i = 0;i < bobpk.size();++i) {
+    unsigned char c = bobpk[i];
+    if (i > 0) cout << ","; else cout << " ";
+    cout << "0x" << hex << setw(2) << (unsigned int) c;
+    if (i % 8 == 7) cout << "\n";
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult4.out b/nacl/nacl-20110221/tests/scalarmult4.out
new file mode 100644
index 00000000..b5391865
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult4.out
@@ -0,0 +1,4 @@
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
diff --git a/nacl/nacl-20110221/tests/scalarmult5.c b/nacl/nacl-20110221/tests/scalarmult5.c
new file mode 100644
index 00000000..14f8159d
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult5.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include "crypto_scalarmult_curve25519.h"
+
+unsigned char alicesk[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+unsigned char bobpk[32] = {
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
+} ;
+
+unsigned char k[32];
+
+main()
+{
+  int i;
+  crypto_scalarmult_curve25519(k,alicesk,bobpk);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) k[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult5.out b/nacl/nacl-20110221/tests/scalarmult5.out
new file mode 100644
index 00000000..bec21130
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult5.out
@@ -0,0 +1,4 @@
+ 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1
+,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25
+,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33
+,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42
diff --git a/nacl/nacl-20110221/tests/scalarmult6.c b/nacl/nacl-20110221/tests/scalarmult6.c
new file mode 100644
index 00000000..89bf9bdd
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult6.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include "crypto_scalarmult_curve25519.h"
+
+unsigned char bobsk[32] = {
+ 0x5d,0xab,0x08,0x7e,0x62,0x4a,0x8a,0x4b
+,0x79,0xe1,0x7f,0x8b,0x83,0x80,0x0e,0xe6
+,0x6f,0x3b,0xb1,0x29,0x26,0x18,0xb6,0xfd
+,0x1c,0x2f,0x8b,0x27,0xff,0x88,0xe0,0xeb
+} ;
+
+unsigned char alicepk[32] = {
+ 0x85,0x20,0xf0,0x09,0x89,0x30,0xa7,0x54
+,0x74,0x8b,0x7d,0xdc,0xb4,0x3e,0xf7,0x5a
+,0x0d,0xbf,0x3a,0x0d,0x26,0x38,0x1a,0xf4
+,0xeb,0xa4,0xa9,0x8e,0xaa,0x9b,0x4e,0x6a
+} ;
+
+unsigned char k[32];
+
+main()
+{
+  int i;
+  crypto_scalarmult_curve25519(k,bobsk,alicepk);
+  for (i = 0;i < 32;++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) k[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult6.out b/nacl/nacl-20110221/tests/scalarmult6.out
new file mode 100644
index 00000000..bec21130
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult6.out
@@ -0,0 +1,4 @@
+ 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1
+,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25
+,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33
+,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42
diff --git a/nacl/nacl-20110221/tests/scalarmult7.cpp b/nacl/nacl-20110221/tests/scalarmult7.cpp
new file mode 100644
index 00000000..8382d747
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult7.cpp
@@ -0,0 +1,32 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_scalarmult_curve25519.h"
+
+char alicesk_bytes[32] = {
+ 0x77,0x07,0x6d,0x0a,0x73,0x18,0xa5,0x7d
+,0x3c,0x16,0xc1,0x72,0x51,0xb2,0x66,0x45
+,0xdf,0x4c,0x2f,0x87,0xeb,0xc0,0x99,0x2a
+,0xb1,0x77,0xfb,0xa5,0x1d,0xb9,0x2c,0x2a
+} ;
+
+char bobpk_bytes[32] = {
+ 0xde,0x9e,0xdb,0x7d,0x7b,0x7d,0xc1,0xb4
+,0xd3,0x5b,0x61,0xc2,0xec,0xe4,0x35,0x37
+,0x3f,0x83,0x43,0xc8,0x5b,0x78,0x67,0x4d
+,0xad,0xfc,0x7e,0x14,0x6f,0x88,0x2b,0x4f
+} ;
+
+main()
+{
+  int i;
+  string alicesk(alicesk_bytes,sizeof alicesk_bytes);
+  string bobpk(bobpk_bytes,sizeof bobpk_bytes);
+  string k = crypto_scalarmult_curve25519(alicesk,bobpk);
+  for (i = 0;i < k.size();++i) {
+    if (i > 0) printf(","); else printf(" ");
+    printf("0x%02x",(unsigned int) (unsigned char) k[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/scalarmult7.out b/nacl/nacl-20110221/tests/scalarmult7.out
new file mode 100644
index 00000000..bec21130
--- /dev/null
+++ b/nacl/nacl-20110221/tests/scalarmult7.out
@@ -0,0 +1,4 @@
+ 0x4a,0x5d,0x9d,0x5b,0xa4,0xce,0x2d,0xe1
+,0x72,0x8e,0x3b,0xf4,0x80,0x35,0x0f,0x25
+,0xe0,0x7e,0x21,0xc9,0x47,0xd1,0x9e,0x33
+,0x76,0xf0,0x9b,0x3c,0x1e,0x16,0x17,0x42
diff --git a/nacl/nacl-20110221/tests/secretbox.c b/nacl/nacl-20110221/tests/secretbox.c
new file mode 100644
index 00000000..773f5b62
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include "crypto_secretbox_xsalsa20poly1305.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+// API requires first 32 bytes to be 0
+unsigned char m[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+unsigned char c[163];
+
+main()
+{
+  int i;
+  crypto_secretbox_xsalsa20poly1305(
+    c,m,163,nonce,firstkey
+  );
+  for (i = 16;i < 163;++i) {
+    printf(",0x%02x",(unsigned int) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox.out b/nacl/nacl-20110221/tests/secretbox.out
new file mode 100644
index 00000000..2b6c51ea
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox.out
@@ -0,0 +1,19 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/tests/secretbox2.c b/nacl/nacl-20110221/tests/secretbox2.c
new file mode 100644
index 00000000..b6a2a937
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox2.c
@@ -0,0 +1,57 @@
+#include <stdio.h>
+#include "crypto_secretbox_xsalsa20poly1305.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+// API requires first 16 bytes to be 0
+unsigned char c[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+unsigned char m[163];
+
+main()
+{
+  int i;
+  if (crypto_secretbox_xsalsa20poly1305_open(
+       m,c,163,nonce,firstkey
+      ) == 0) {
+    for (i = 32;i < 163;++i) {
+      printf(",0x%02x",(unsigned int) m[i]);
+      if (i % 8 == 7) printf("\n");
+    }
+    printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox2.out b/nacl/nacl-20110221/tests/secretbox2.out
new file mode 100644
index 00000000..c61d4557
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox2.out
@@ -0,0 +1,17 @@
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
diff --git a/nacl/nacl-20110221/tests/secretbox3.cpp b/nacl/nacl-20110221/tests/secretbox3.cpp
new file mode 100644
index 00000000..39ca7c53
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox3.cpp
@@ -0,0 +1,52 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_secretbox_xsalsa20poly1305.h"
+
+char firstkey_bytes[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+char m_bytes[131] = {
+ 0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+main()
+{
+  int i;
+  string m(m_bytes,sizeof m_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string firstkey(firstkey_bytes,sizeof firstkey_bytes);
+  string c = crypto_secretbox_xsalsa20poly1305(m,nonce,firstkey);
+  for (i = 0;i < c.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox3.out b/nacl/nacl-20110221/tests/secretbox3.out
new file mode 100644
index 00000000..2b6c51ea
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox3.out
@@ -0,0 +1,19 @@
+,0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/tests/secretbox4.cpp b/nacl/nacl-20110221/tests/secretbox4.cpp
new file mode 100644
index 00000000..416e4d9e
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox4.cpp
@@ -0,0 +1,54 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_secretbox_xsalsa20poly1305.h"
+
+char firstkey_bytes[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+char c_bytes[147] = {
+ 0xf3,0xff,0xc7,0x70,0x3f,0x94,0x00,0xe5
+,0x2a,0x7d,0xfb,0x4b,0x3d,0x33,0x05,0xd9
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
+} ;
+
+main()
+{
+  int i;
+  string firstkey(firstkey_bytes,sizeof firstkey_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string c(c_bytes,sizeof c_bytes);
+  string m = crypto_secretbox_xsalsa20poly1305_open(c,nonce,firstkey);
+  for (i = 0;i < m.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) m[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox4.out b/nacl/nacl-20110221/tests/secretbox4.out
new file mode 100644
index 00000000..c61d4557
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox4.out
@@ -0,0 +1,17 @@
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
diff --git a/nacl/nacl-20110221/tests/secretbox5.cpp b/nacl/nacl-20110221/tests/secretbox5.cpp
new file mode 100644
index 00000000..e8cc0eeb
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox5.cpp
@@ -0,0 +1,29 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_secretbox.h"
+#include "randombytes.h"
+
+main()
+{
+  int mlen;
+  for (mlen = 0;mlen < 1000;++mlen) {
+    unsigned char kbytes[crypto_secretbox_KEYBYTES];
+    randombytes(kbytes,crypto_secretbox_KEYBYTES);
+    string k((char *) kbytes,crypto_secretbox_KEYBYTES);
+    unsigned char nbytes[crypto_secretbox_NONCEBYTES];
+    randombytes(nbytes,crypto_secretbox_NONCEBYTES);
+    string n((char *) nbytes,crypto_secretbox_NONCEBYTES);
+    unsigned char mbytes[mlen];
+    randombytes(mbytes,mlen);
+    string m((char *) mbytes,mlen);
+    string c = crypto_secretbox(m,n,k);
+    try {
+      string m2 = crypto_secretbox_open(c,n,k);
+      if (m != m2) printf("bad decryption\n");
+    } catch(const char *s) {
+      printf("%s\n",s);
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox5.out b/nacl/nacl-20110221/tests/secretbox5.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/secretbox6.cpp b/nacl/nacl-20110221/tests/secretbox6.cpp
new file mode 100644
index 00000000..e8274006
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox6.cpp
@@ -0,0 +1,42 @@
+#include <string>
+using std::string;
+#include <stdlib.h>
+#include <stdio.h>
+#include "crypto_secretbox.h"
+#include "randombytes.h"
+
+main()
+{
+  int mlen;
+  for (mlen = 0;mlen < 1000;++mlen) {
+    unsigned char kbytes[crypto_secretbox_KEYBYTES];
+    randombytes(kbytes,crypto_secretbox_KEYBYTES);
+    string k((char *) kbytes,crypto_secretbox_KEYBYTES);
+    unsigned char nbytes[crypto_secretbox_NONCEBYTES];
+    randombytes(nbytes,crypto_secretbox_NONCEBYTES);
+    string n((char *) nbytes,crypto_secretbox_NONCEBYTES);
+    unsigned char mbytes[mlen];
+    randombytes(mbytes,mlen);
+    string m((char *) mbytes,mlen);
+    string c = crypto_secretbox(m,n,k);
+    int caught = 0;
+    while (caught < 10) {
+      c.replace(random() % c.size(),1,1,random());
+      try {
+        string m2 = crypto_secretbox_open(c,n,k);
+        if (m != m2) {
+	  printf("forgery\n");
+	  return 100;
+        }
+      } catch(const char *s) {
+	if (string(s) == string("ciphertext fails verification"))
+	  ++caught;
+	else {
+	  printf("%s\n",s);
+	  return 111;
+        }
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox6.out b/nacl/nacl-20110221/tests/secretbox6.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/secretbox7.c b/nacl/nacl-20110221/tests/secretbox7.c
new file mode 100644
index 00000000..d4be9b49
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox7.c
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include "crypto_secretbox.h"
+#include "randombytes.h"
+
+unsigned char k[crypto_secretbox_KEYBYTES];
+unsigned char n[crypto_secretbox_NONCEBYTES];
+unsigned char m[10000];
+unsigned char c[10000];
+unsigned char m2[10000];
+
+main()
+{
+  int mlen;
+  int i;
+
+  for (mlen = 0;mlen < 1000 && mlen + crypto_secretbox_ZEROBYTES < sizeof m;++mlen) {
+    randombytes(k,crypto_secretbox_KEYBYTES);
+    randombytes(n,crypto_secretbox_NONCEBYTES);
+    randombytes(m + crypto_secretbox_ZEROBYTES,mlen);
+    crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k);
+    if (crypto_secretbox_open(m2,c,mlen + crypto_secretbox_ZEROBYTES,n,k) == 0) {
+      for (i = 0;i < mlen + crypto_secretbox_ZEROBYTES;++i)
+        if (m2[i] != m[i]) {
+	  printf("bad decryption\n");
+	  break;
+	}
+    } else {
+      printf("ciphertext fails verification\n");
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox7.out b/nacl/nacl-20110221/tests/secretbox7.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/secretbox8.c b/nacl/nacl-20110221/tests/secretbox8.c
new file mode 100644
index 00000000..a6c75c23
--- /dev/null
+++ b/nacl/nacl-20110221/tests/secretbox8.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include "crypto_secretbox.h"
+#include "randombytes.h"
+
+unsigned char k[crypto_secretbox_KEYBYTES];
+unsigned char n[crypto_secretbox_NONCEBYTES];
+unsigned char m[10000];
+unsigned char c[10000];
+unsigned char m2[10000];
+
+main()
+{
+  int mlen;
+  int i;
+  int caught;
+
+  for (mlen = 0;mlen < 1000 && mlen + crypto_secretbox_ZEROBYTES < sizeof m;++mlen) {
+    randombytes(k,crypto_secretbox_KEYBYTES);
+    randombytes(n,crypto_secretbox_NONCEBYTES);
+    randombytes(m + crypto_secretbox_ZEROBYTES,mlen);
+    crypto_secretbox(c,m,mlen + crypto_secretbox_ZEROBYTES,n,k);
+    caught = 0;
+    while (caught < 10) {
+      c[random() % (mlen + crypto_secretbox_ZEROBYTES)] = random();
+      if (crypto_secretbox_open(m2,c,mlen + crypto_secretbox_ZEROBYTES,n,k) == 0) {
+        for (i = 0;i < mlen + crypto_secretbox_ZEROBYTES;++i)
+          if (m2[i] != m[i]) {
+	    printf("forgery\n");
+	    return 100;
+	  }
+      } else {
+        ++caught;
+      }
+    }
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/secretbox8.out b/nacl/nacl-20110221/tests/secretbox8.out
new file mode 100644
index 00000000..e69de29b
diff --git a/nacl/nacl-20110221/tests/stream.c b/nacl/nacl-20110221/tests/stream.c
new file mode 100644
index 00000000..ebb39398
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+#include "crypto_hash_sha256.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+unsigned char output[4194304];
+
+unsigned char h[32];
+
+main()
+{
+  int i;
+  crypto_stream_xsalsa20(output,4194304,nonce,firstkey);
+  crypto_hash_sha256(h,output,sizeof output);
+  for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream.out b/nacl/nacl-20110221/tests/stream.out
new file mode 100644
index 00000000..5fa208c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream.out
@@ -0,0 +1 @@
+662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2
diff --git a/nacl/nacl-20110221/tests/stream2.c b/nacl/nacl-20110221/tests/stream2.c
new file mode 100644
index 00000000..12f13de4
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream2.c
@@ -0,0 +1,27 @@
+#include <stdio.h>
+#include "crypto_stream_salsa20.h"
+#include "crypto_hash_sha256.h"
+
+unsigned char secondkey[32] = {
+ 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9
+,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88
+,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9
+,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
+} ;
+
+unsigned char noncesuffix[8] = {
+ 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+unsigned char output[4194304];
+
+unsigned char h[32];
+
+main()
+{
+  int i;
+  crypto_stream_salsa20(output,4194304,noncesuffix,secondkey);
+  crypto_hash_sha256(h,output,sizeof output);
+  for (i = 0;i < 32;++i) printf("%02x",h[i]); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream2.out b/nacl/nacl-20110221/tests/stream2.out
new file mode 100644
index 00000000..5fa208c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream2.out
@@ -0,0 +1 @@
+662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2
diff --git a/nacl/nacl-20110221/tests/stream3.c b/nacl/nacl-20110221/tests/stream3.c
new file mode 100644
index 00000000..7798dc18
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream3.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+unsigned char rs[32];
+
+main()
+{
+  int i;
+  crypto_stream_xsalsa20(rs,32,nonce,firstkey);
+  for (i = 0;i < 32;++i) {
+    printf(",0x%02x",(unsigned int) rs[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream3.out b/nacl/nacl-20110221/tests/stream3.out
new file mode 100644
index 00000000..9cd78798
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream3.out
@@ -0,0 +1,4 @@
+,0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
diff --git a/nacl/nacl-20110221/tests/stream4.c b/nacl/nacl-20110221/tests/stream4.c
new file mode 100644
index 00000000..84d8c523
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream4.c
@@ -0,0 +1,53 @@
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+
+unsigned char firstkey[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+unsigned char nonce[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+unsigned char m[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+unsigned char c[163];
+
+main()
+{
+  int i;
+  crypto_stream_xsalsa20_xor(c,m,163,nonce,firstkey);
+  for (i = 32;i < 163;++i) {
+    printf(",0x%02x",(unsigned int) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream4.out b/nacl/nacl-20110221/tests/stream4.out
new file mode 100644
index 00000000..0d3d8e94
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream4.out
@@ -0,0 +1,17 @@
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/tests/stream5.cpp b/nacl/nacl-20110221/tests/stream5.cpp
new file mode 100644
index 00000000..66f3839b
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream5.cpp
@@ -0,0 +1,29 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+#include "crypto_hash_sha256.h"
+
+char firstkey_bytes[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+main()
+{
+  int i;
+  string firstkey(firstkey_bytes,sizeof firstkey_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string output = crypto_stream_xsalsa20(4194304,nonce,firstkey);
+  string h = crypto_hash_sha256(output);
+  for (i = 0;i < 32;++i) printf("%02x",(unsigned int) (unsigned char) h[i]); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream5.out b/nacl/nacl-20110221/tests/stream5.out
new file mode 100644
index 00000000..5fa208c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream5.out
@@ -0,0 +1 @@
+662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2
diff --git a/nacl/nacl-20110221/tests/stream6.cpp b/nacl/nacl-20110221/tests/stream6.cpp
new file mode 100644
index 00000000..d9ed61f7
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream6.cpp
@@ -0,0 +1,27 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_stream_salsa20.h"
+#include "crypto_hash_sha256.h"
+
+char secondkey_bytes[32] = {
+ 0xdc,0x90,0x8d,0xda,0x0b,0x93,0x44,0xa9
+,0x53,0x62,0x9b,0x73,0x38,0x20,0x77,0x88
+,0x80,0xf3,0xce,0xb4,0x21,0xbb,0x61,0xb9
+,0x1c,0xbd,0x4c,0x3e,0x66,0x25,0x6c,0xe4
+} ;
+
+char noncesuffix_bytes[8] = {
+ 0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+main()
+{
+  int i;
+  string secondkey(secondkey_bytes,sizeof secondkey_bytes);
+  string noncesuffix(noncesuffix_bytes,sizeof noncesuffix_bytes);
+  string output = crypto_stream_salsa20(4194304,noncesuffix,secondkey);
+  string h = crypto_hash_sha256(output);
+  for (i = 0;i < 32;++i) printf("%02x",(unsigned int) (unsigned char) h[i]); printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream6.out b/nacl/nacl-20110221/tests/stream6.out
new file mode 100644
index 00000000..5fa208c1
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream6.out
@@ -0,0 +1 @@
+662b9d0e3463029156069b12f918691a98f7dfb2ca0393c96bbfc6b1fbd630a2
diff --git a/nacl/nacl-20110221/tests/stream7.cpp b/nacl/nacl-20110221/tests/stream7.cpp
new file mode 100644
index 00000000..d2f106e5
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream7.cpp
@@ -0,0 +1,30 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+
+char firstkey_bytes[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+main()
+{
+  int i;
+  string firstkey(firstkey_bytes,sizeof firstkey_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string rs = crypto_stream_xsalsa20(32,nonce,firstkey);
+  for (i = 0;i < rs.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) rs[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream7.out b/nacl/nacl-20110221/tests/stream7.out
new file mode 100644
index 00000000..9cd78798
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream7.out
@@ -0,0 +1,4 @@
+,0xee,0xa6,0xa7,0x25,0x1c,0x1e,0x72,0x91
+,0x6d,0x11,0xc2,0xcb,0x21,0x4d,0x3c,0x25
+,0x25,0x39,0x12,0x1d,0x8e,0x23,0x4e,0x65
+,0x2d,0x65,0x1f,0xa4,0xc8,0xcf,0xf8,0x80
diff --git a/nacl/nacl-20110221/tests/stream8.cpp b/nacl/nacl-20110221/tests/stream8.cpp
new file mode 100644
index 00000000..ea95d68f
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream8.cpp
@@ -0,0 +1,56 @@
+#include <string>
+using std::string;
+#include <stdio.h>
+#include "crypto_stream_xsalsa20.h"
+
+char firstkey_bytes[32] = {
+ 0x1b,0x27,0x55,0x64,0x73,0xe9,0x85,0xd4
+,0x62,0xcd,0x51,0x19,0x7a,0x9a,0x46,0xc7
+,0x60,0x09,0x54,0x9e,0xac,0x64,0x74,0xf2
+,0x06,0xc4,0xee,0x08,0x44,0xf6,0x83,0x89
+} ;
+
+char nonce_bytes[24] = {
+ 0x69,0x69,0x6e,0xe9,0x55,0xb6,0x2b,0x73
+,0xcd,0x62,0xbd,0xa8,0x75,0xfc,0x73,0xd6
+,0x82,0x19,0xe0,0x03,0x6b,0x7a,0x0b,0x37
+} ;
+
+char m_bytes[163] = {
+    0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,   0,   0,   0,   0,   0,   0,   0,   0
+,0xbe,0x07,0x5f,0xc5,0x3c,0x81,0xf2,0xd5
+,0xcf,0x14,0x13,0x16,0xeb,0xeb,0x0c,0x7b
+,0x52,0x28,0xc5,0x2a,0x4c,0x62,0xcb,0xd4
+,0x4b,0x66,0x84,0x9b,0x64,0x24,0x4f,0xfc
+,0xe5,0xec,0xba,0xaf,0x33,0xbd,0x75,0x1a
+,0x1a,0xc7,0x28,0xd4,0x5e,0x6c,0x61,0x29
+,0x6c,0xdc,0x3c,0x01,0x23,0x35,0x61,0xf4
+,0x1d,0xb6,0x6c,0xce,0x31,0x4a,0xdb,0x31
+,0x0e,0x3b,0xe8,0x25,0x0c,0x46,0xf0,0x6d
+,0xce,0xea,0x3a,0x7f,0xa1,0x34,0x80,0x57
+,0xe2,0xf6,0x55,0x6a,0xd6,0xb1,0x31,0x8a
+,0x02,0x4a,0x83,0x8f,0x21,0xaf,0x1f,0xde
+,0x04,0x89,0x77,0xeb,0x48,0xf5,0x9f,0xfd
+,0x49,0x24,0xca,0x1c,0x60,0x90,0x2e,0x52
+,0xf0,0xa0,0x89,0xbc,0x76,0x89,0x70,0x40
+,0xe0,0x82,0xf9,0x37,0x76,0x38,0x48,0x64
+,0x5e,0x07,0x05
+} ;
+
+main()
+{
+  int i;
+  string firstkey(firstkey_bytes,sizeof firstkey_bytes);
+  string nonce(nonce_bytes,sizeof nonce_bytes);
+  string m(m_bytes,sizeof m_bytes);
+  string c = crypto_stream_xsalsa20_xor(m,nonce,firstkey);
+  for (i = 32;i < c.size();++i) {
+    printf(",0x%02x",(unsigned int) (unsigned char) c[i]);
+    if (i % 8 == 7) printf("\n");
+  }
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/tests/stream8.out b/nacl/nacl-20110221/tests/stream8.out
new file mode 100644
index 00000000..0d3d8e94
--- /dev/null
+++ b/nacl/nacl-20110221/tests/stream8.out
@@ -0,0 +1,17 @@
+,0x8e,0x99,0x3b,0x9f,0x48,0x68,0x12,0x73
+,0xc2,0x96,0x50,0xba,0x32,0xfc,0x76,0xce
+,0x48,0x33,0x2e,0xa7,0x16,0x4d,0x96,0xa4
+,0x47,0x6f,0xb8,0xc5,0x31,0xa1,0x18,0x6a
+,0xc0,0xdf,0xc1,0x7c,0x98,0xdc,0xe8,0x7b
+,0x4d,0xa7,0xf0,0x11,0xec,0x48,0xc9,0x72
+,0x71,0xd2,0xc2,0x0f,0x9b,0x92,0x8f,0xe2
+,0x27,0x0d,0x6f,0xb8,0x63,0xd5,0x17,0x38
+,0xb4,0x8e,0xee,0xe3,0x14,0xa7,0xcc,0x8a
+,0xb9,0x32,0x16,0x45,0x48,0xe5,0x26,0xae
+,0x90,0x22,0x43,0x68,0x51,0x7a,0xcf,0xea
+,0xbd,0x6b,0xb3,0x73,0x2b,0xc0,0xe9,0xda
+,0x99,0x83,0x2b,0x61,0xca,0x01,0xb6,0xde
+,0x56,0x24,0x4a,0x9e,0x88,0xd5,0xf9,0xb3
+,0x79,0x73,0xf6,0x22,0xa4,0x3d,0x14,0xa6
+,0x59,0x9b,0x1f,0x65,0x4c,0xb4,0x5a,0x74
+,0xe3,0x55,0xa5
diff --git a/nacl/nacl-20110221/try-anything.c b/nacl/nacl-20110221/try-anything.c
new file mode 100644
index 00000000..b6847473
--- /dev/null
+++ b/nacl/nacl-20110221/try-anything.c
@@ -0,0 +1,173 @@
+/*
+ * try-anything.c version 20090215
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include "cpucycles.h"
+
+typedef int uint32;
+
+static uint32 seed[32] = { 3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5 } ;
+static uint32 in[12];
+static uint32 out[8];
+static int outleft = 0;
+
+#define ROTATE(x,b) (((x) << (b)) | ((x) >> (32 - (b))))
+#define MUSH(i,b) x = t[i] += (((x ^ seed[i]) + sum) ^ ROTATE(x,b));
+
+static void surf(void)
+{
+  uint32 t[12]; uint32 x; uint32 sum = 0;
+  int r; int i; int loop;
+
+  for (i = 0;i < 12;++i) t[i] = in[i] ^ seed[12 + i];
+  for (i = 0;i < 8;++i) out[i] = seed[24 + i];
+  x = t[11];
+  for (loop = 0;loop < 2;++loop) {
+    for (r = 0;r < 16;++r) {
+      sum += 0x9e3779b9;
+      MUSH(0,5) MUSH(1,7) MUSH(2,9) MUSH(3,13)
+      MUSH(4,5) MUSH(5,7) MUSH(6,9) MUSH(7,13)
+      MUSH(8,5) MUSH(9,7) MUSH(10,9) MUSH(11,13)
+    }
+    for (i = 0;i < 8;++i) out[i] ^= t[i + 4];
+  }
+}
+
+void randombytes(unsigned char *x,unsigned long long xlen)
+{
+  while (xlen > 0) {
+    if (!outleft) {
+      if (!++in[0]) if (!++in[1]) if (!++in[2]) ++in[3];
+      surf();
+      outleft = 8;
+    }
+    *x = out[--outleft];
+    ++x;
+    --xlen;
+  }
+}
+
+extern void preallocate(void);
+extern void allocate(void);
+extern void predoit(void);
+extern void doit(void);
+extern char checksum[];
+extern const char *checksum_compute(void);
+extern const char *primitiveimplementation;
+
+static void printword(const char *s)
+{
+  if (!*s) putchar('-');
+  while (*s) {
+    if (*s == ' ') putchar('_');
+    else if (*s == '\t') putchar('_');
+    else if (*s == '\r') putchar('_');
+    else if (*s == '\n') putchar('_');
+    else putchar(*s);
+    ++s;
+  }
+  putchar(' ');
+}
+
+static void printnum(long long x)
+{
+  printf("%lld ",x);
+}
+
+static void fail(const char *why)
+{
+  printf("%s\n",why);
+  exit(111);
+}
+
+unsigned char *alignedcalloc(unsigned long long len)
+{
+  unsigned char *x = (unsigned char *) calloc(1,len + 256);
+  long long i;
+  if (!x) fail("out of memory");
+  /* will never deallocate so shifting is ok */
+  for (i = 0;i < len + 256;++i) x[i] = random();
+  x += 64;
+  x += 63 & (-(unsigned long) x);
+  for (i = 0;i < len;++i) x[i] = 0;
+  return x;
+}
+
+#define TIMINGS 63
+static long long cycles[TIMINGS + 1];
+
+void limits()
+{
+#ifdef RLIM_INFINITY
+  struct rlimit r;
+  r.rlim_cur = 0;
+  r.rlim_max = 0;
+#ifdef RLIMIT_NOFILE
+  setrlimit(RLIMIT_NOFILE,&r);
+#endif
+#ifdef RLIMIT_NPROC
+  setrlimit(RLIMIT_NPROC,&r);
+#endif
+#ifdef RLIMIT_CORE
+  setrlimit(RLIMIT_CORE,&r);
+#endif
+#endif
+}
+
+int main()
+{
+  long long i;
+  long long j;
+  long long abovej;
+  long long belowj;
+  long long checksumcycles;
+  long long cyclespersecond;
+  const char *problem;
+
+  cyclespersecond = cpucycles_persecond();
+  preallocate();
+  limits();
+
+  allocate();
+  srandom(getpid());
+
+  cycles[0] = cpucycles();
+  problem = checksum_compute(); if (problem) fail(problem);
+  cycles[1] = cpucycles();
+  checksumcycles = cycles[1] - cycles[0];
+
+  predoit();
+  for (i = 0;i <= TIMINGS;++i) {
+    cycles[i] = cpucycles();
+  }
+  for (i = 0;i <= TIMINGS;++i) {
+    cycles[i] = cpucycles();
+    doit();
+  }
+  for (i = 0;i < TIMINGS;++i) cycles[i] = cycles[i + 1] - cycles[i];
+  for (j = 0;j < TIMINGS;++j) {
+    belowj = 0;
+    for (i = 0;i < TIMINGS;++i) if (cycles[i] < cycles[j]) ++belowj;
+    abovej = 0;
+    for (i = 0;i < TIMINGS;++i) if (cycles[i] > cycles[j]) ++abovej;
+    if (belowj * 2 < TIMINGS && abovej * 2 < TIMINGS) break;
+  }
+
+  printword(checksum);
+  printnum(cycles[j]);
+  printnum(checksumcycles);
+  printnum(cyclespersecond);
+  printword(primitiveimplementation);
+  printf("\n");
+  return 0;
+}
diff --git a/nacl/nacl-20110221/version b/nacl/nacl-20110221/version
new file mode 100644
index 00000000..97840598
--- /dev/null
+++ b/nacl/nacl-20110221/version
@@ -0,0 +1 @@
+20110221