12 #ifndef CRYPTOPP_IMPORTS
13 #ifndef CRYPTOPP_GENERATE_X64_MASM
16 #if defined(_MSC_VER) && (_MSC_VER < 1400)
17 # pragma optimize("", off)
23 #if defined(CRYPTOPP_DISABLE_GCM_ASM)
24 # undef CRYPTOPP_X86_ASM_AVAILABLE
25 # undef CRYPTOPP_X32_ASM_AVAILABLE
26 # undef CRYPTOPP_X64_ASM_AVAILABLE
27 # undef CRYPTOPP_SSE2_ASM_AVAILABLE
32 #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
35 #if defined(CRYPTOPP_DISABLE_MIXED_ASM)
37 # define USE_MOVD_REG32 1
38 #elif defined(__GNUC__) || defined(_MSC_VER)
40 # define USE_MOVD_REG32_OR_REG64 1
43 # define USE_MOV_REG32_OR_REG64 1
45 #endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
48 #define M128_CAST(x) ((__m128i *)(void *)(x))
49 #define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
51 word16 GCM_Base::s_reductionTable[256];
52 volatile bool GCM_Base::s_reductionTableInitialized =
false;
54 void GCM_Base::GCTR::IncrementCounterBy256()
59 static inline void Xor16(
byte *a,
const byte *b,
const byte *c)
64 ((word64 *)(
void *)a)[0] = ((word64 *)(
void *)b)[0] ^ ((word64 *)(
void *)c)[0];
65 ((word64 *)(
void *)a)[1] = ((word64 *)(
void *)b)[1] ^ ((word64 *)(
void *)c)[1];
68 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
71 extern void GCM_Xor16_SSE2(
byte *a,
const byte *b,
const byte *c);
74 #if CRYPTOPP_ARM_NEON_AVAILABLE
75 extern void GCM_Xor16_NEON(
byte *a,
const byte *b,
const byte *c);
78 #if CRYPTOPP_POWER8_AVAILABLE
79 extern void GCM_Xor16_POWER8(
byte *a,
const byte *b,
const byte *c);
82 #if CRYPTOPP_CLMUL_AVAILABLE
83 extern void GCM_SetKeyWithoutResync_CLMUL(
const byte *hashKey,
byte *mulTable,
unsigned int tableSize);
84 extern size_t GCM_AuthenticateBlocks_CLMUL(
const byte *data,
size_t len,
const byte *mtable,
byte *hbuffer);
85 const unsigned int s_cltableSizeInBlocks = 8;
86 extern void GCM_ReverseHashBufferIfNeeded_CLMUL(
byte *hashBuffer);
87 #endif // CRYPTOPP_CLMUL_AVAILABLE
89 #if CRYPTOPP_ARM_PMULL_AVAILABLE
90 extern void GCM_SetKeyWithoutResync_PMULL(
const byte *hashKey,
byte *mulTable,
unsigned int tableSize);
91 extern size_t GCM_AuthenticateBlocks_PMULL(
const byte *data,
size_t len,
const byte *mtable,
byte *hbuffer);
92 const unsigned int s_cltableSizeInBlocks = 8;
93 extern void GCM_ReverseHashBufferIfNeeded_PMULL(
byte *hashBuffer);
94 #endif // CRYPTOPP_ARM_PMULL_AVAILABLE
96 #if CRYPTOPP_POWER8_VMULL_AVAILABLE
97 extern void GCM_SetKeyWithoutResync_VMULL(
const byte *hashKey,
byte *mulTable,
unsigned int tableSize);
98 extern size_t GCM_AuthenticateBlocks_VMULL(
const byte *data,
size_t len,
const byte *mtable,
byte *hbuffer);
99 const unsigned int s_cltableSizeInBlocks = 8;
100 extern void GCM_ReverseHashBufferIfNeeded_VMULL(
byte *hashBuffer);
101 #endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
103 void GCM_Base::SetKeyWithoutResync(
const byte *userKey,
size_t keylength,
const NameValuePairs ¶ms)
106 blockCipher.
SetKey(userKey, keylength, params);
112 const unsigned int blockSize = blockCipher.
BlockSize();
114 if (blockCipher.
BlockSize() != REQUIRED_BLOCKSIZE)
117 int tableSize, i, j, k;
119 #if CRYPTOPP_CLMUL_AVAILABLE
123 (void)params.
GetIntValue(Name::TableSize(), tableSize);
124 tableSize = s_cltableSizeInBlocks * blockSize;
128 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
132 (void)params.
GetIntValue(Name::TableSize(), tableSize);
133 tableSize = s_cltableSizeInBlocks * blockSize;
137 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
141 (void)params.
GetIntValue(Name::TableSize(), tableSize);
142 tableSize = s_cltableSizeInBlocks * blockSize;
148 if (params.
GetIntValue(Name::TableSize(), tableSize))
149 tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
151 tableSize = (GetTablesOption() ==
GCM_64K_Tables) ? 64*1024 : 2*1024;
159 m_buffer.resize(3*blockSize + tableSize);
160 byte *mulTable = MulTable();
161 byte *hashKey = HashKey();
162 memset(hashKey, 0, REQUIRED_BLOCKSIZE);
165 #if CRYPTOPP_CLMUL_AVAILABLE
168 GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
171 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
174 GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
177 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
180 GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
187 Block::Get(hashKey)(V0)(V1);
189 if (tableSize == 64*1024)
191 for (i=0; i<128; i++)
194 Block::Put(NULLPTR, mulTable+(i/8)*256*16+(
size_t(1)<<(11-k)))(V0)(V1);
197 V1 = (V1>>1) | (V0<<63);
198 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
203 memset(mulTable+i*256*16, 0, 16);
204 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
206 for (j=2; j<=0x80; j*=2)
208 GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
210 #elif CRYPTOPP_ARM_NEON_AVAILABLE
212 for (j=2; j<=0x80; j*=2)
214 GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
216 #elif CRYPTOPP_POWER8_AVAILABLE
218 for (j=2; j<=0x80; j*=2)
220 GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
223 for (j=2; j<=0x80; j*=2)
225 Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
230 if (!s_reductionTableInitialized)
232 s_reductionTable[0] = 0;
235 for (
unsigned int ii=2; ii<=0x80; ii*=2)
239 for (
unsigned int jj=1; jj<ii; jj++)
240 s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
242 s_reductionTableInitialized =
true;
245 for (i=0; i<128-24; i++)
249 Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(
size_t(1)<<(7-k)))(V0)(V1);
251 Block::Put(NULLPTR, mulTable+(i/32)*256+(
size_t(1)<<(11-k)))(V0)(V1);
254 V1 = (V1>>1) | (V0<<63);
255 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
260 memset(mulTable+i*256, 0, 16);
261 memset(mulTable+1024+i*256, 0, 16);
262 #if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
264 for (j=2; j<=8; j*=2)
267 GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
268 GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
271 #elif CRYPTOPP_ARM_NEON_AVAILABLE
273 for (j=2; j<=8; j*=2)
276 GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
277 GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
280 #elif CRYPTOPP_POWER8_AVAILABLE
282 for (j=2; j<=8; j*=2)
285 GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
286 GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
290 for (j=2; j<=8; j*=2)
293 Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
294 Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
300 inline void GCM_Base::ReverseHashBufferIfNeeded()
302 #if CRYPTOPP_CLMUL_AVAILABLE
305 GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
307 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
310 GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
312 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
315 GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
320 void GCM_Base::Resync(
const byte *iv,
size_t len)
323 byte *hashBuffer = HashBuffer();
327 memcpy(hashBuffer, iv, len);
328 memset(hashBuffer+len, 0, 3);
329 hashBuffer[len+3] = 1;
333 size_t origLen = len;
334 memset(hashBuffer, 0, HASH_BLOCKSIZE);
336 if (len >= HASH_BLOCKSIZE)
338 len = GCM_Base::AuthenticateBlocks(iv, len);
339 iv += (origLen - len);
344 memcpy(m_buffer, iv, len);
345 memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
346 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
350 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
352 ReverseHashBufferIfNeeded();
355 if (m_state >= State_IVSet)
356 m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
358 m_ctr.SetCipherWithIV(cipher, hashBuffer);
360 m_ctr.Seek(HASH_BLOCKSIZE);
362 memset(hashBuffer, 0, HASH_BLOCKSIZE);
368 #if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
370 #elif CRYPTOPP_ARM_NEON_AVAILABLE
372 #elif CRYPTOPP_POWER8_AVAILABLE
378 #if CRYPTOPP_MSC_VERSION
379 # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
382 #endif // Not CRYPTOPP_GENERATE_X64_MASM
384 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
386 void GCM_AuthenticateBlocks_2K_SSE2(
const byte *data,
size_t blocks, word64 *hashBuffer,
const word16 *reductionTable);
387 void GCM_AuthenticateBlocks_64K_SSE2(
const byte *data,
size_t blocks, word64 *hashBuffer);
391 #ifndef CRYPTOPP_GENERATE_X64_MASM
393 size_t GCM_Base::AuthenticateBlocks(
const byte *data,
size_t len)
395 #if CRYPTOPP_CLMUL_AVAILABLE
398 return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
400 #elif CRYPTOPP_ARM_PMULL_AVAILABLE
403 return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
405 #elif CRYPTOPP_POWER8_VMULL_AVAILABLE
408 return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
413 word64 *hashBuffer = (word64 *)(
void *)HashBuffer();
416 switch (2*(m_buffer.size()>=64*1024)
417 #
if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
426 byte *mulTable = MulTable();
427 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
431 word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
432 Block::Get(data)(y0)(y1);
436 data += HASH_BLOCKSIZE;
437 len -= HASH_BLOCKSIZE;
439 #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
441 #if (CRYPTOPP_LITTLE_ENDIAN)
442 #if CRYPTOPP_BOOL_SLOW_WORD64
443 word32 z0 = (word32)x0;
444 word32 z1 = (word32)(x0>>32);
445 word32 z2 = (word32)x1;
446 word32 z3 = (word32)(x1>>32);
447 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
449 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
451 #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
452 #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
454 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
455 #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
456 #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
459 #define GF_MUL_32BY128(op, a, b, c) \
460 a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
461 a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
462 b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
463 b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
464 c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
465 c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
466 d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
467 d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
469 GF_MUL_32BY128(=, 0, 0, 0)
470 GF_MUL_32BY128(^=, 0, 1, 1)
471 GF_MUL_32BY128(^=, 1, 0, 2)
472 GF_MUL_32BY128(^=, 1, 1, 3)
474 word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
477 r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
480 r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
486 while (len >= HASH_BLOCKSIZE);
488 hashBuffer[0] = x0; hashBuffer[1] = x1;
494 byte *mulTable = MulTable();
495 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
499 word64 y0, y1, a0, a1;
500 Block::Get(data)(y0)(y1);
504 data += HASH_BLOCKSIZE;
505 len -= HASH_BLOCKSIZE;
507 #undef READ_TABLE_WORD64_COMMON
508 #undef READ_TABLE_WORD64
510 #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
512 #if (CRYPTOPP_LITTLE_ENDIAN)
513 #if CRYPTOPP_BOOL_SLOW_WORD64
514 word32 z0 = (word32)x0;
515 word32 z1 = (word32)(x0>>32);
516 word32 z2 = (word32)x1;
517 word32 z3 = (word32)(x1>>32);
518 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
520 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
523 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
526 #define GF_MUL_8BY128(op, b, c, d) \
527 a0 op READ_TABLE_WORD64(b, c, d, 0);\
528 a1 op READ_TABLE_WORD64(b, c, d, 1);\
530 GF_MUL_8BY128(=, 0, 0, 0)
531 GF_MUL_8BY128(^=, 0, 0, 1)
532 GF_MUL_8BY128(^=, 0, 0, 2)
533 GF_MUL_8BY128(^=, 0, 0, 3)
534 GF_MUL_8BY128(^=, 0, 1, 0)
535 GF_MUL_8BY128(^=, 0, 1, 1)
536 GF_MUL_8BY128(^=, 0, 1, 2)
537 GF_MUL_8BY128(^=, 0, 1, 3)
538 GF_MUL_8BY128(^=, 1, 2, 0)
539 GF_MUL_8BY128(^=, 1, 2, 1)
540 GF_MUL_8BY128(^=, 1, 2, 2)
541 GF_MUL_8BY128(^=, 1, 2, 3)
542 GF_MUL_8BY128(^=, 1, 3, 0)
543 GF_MUL_8BY128(^=, 1, 3, 1)
544 GF_MUL_8BY128(^=, 1, 3, 2)
545 GF_MUL_8BY128(^=, 1, 3, 3)
549 while (len >= HASH_BLOCKSIZE);
551 hashBuffer[0] = x0; hashBuffer[1] = x1;
554 #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
556 #ifdef CRYPTOPP_X64_MASM_AVAILABLE
558 GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
561 GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
565 #if CRYPTOPP_SSE2_ASM_AVAILABLE
572 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
574 GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
582 AS2( mov WORD_REG(cx), data )
583 AS2( mov WORD_REG(dx), len )
584 AS2( mov WORD_REG(si), hashBuffer )
585 AS2( shr WORD_REG(dx), 4 )
588 #
if CRYPTOPP_BOOL_X32
597 AS2( mov AS_REG_7, WORD_REG(di))
598 #elif CRYPTOPP_BOOL_X86
599 AS2( lea AS_REG_7, s_reductionTable)
602 AS2( movdqa xmm0, [WORD_REG(si)] )
604 #define MUL_TABLE_0 WORD_REG(si) + 32
605 #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
606 #define RED_TABLE AS_REG_7
609 AS2( movdqu xmm4, [WORD_REG(cx)] )
610 AS2( pxor xmm0, xmm4 )
612 AS2( movd ebx, xmm0 )
613 AS2( mov eax, AS_HEX(f0f0f0f0) )
616 AS2( and ebx, AS_HEX(f0f0f0f0) )
618 AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
620 AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
623 AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
625 AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
627 #define SSE2_MUL_32BITS(i) \
628 AS2( psrldq xmm0, 4 )\
629 AS2( movd eax, xmm0 )\
630 AS2( and eax, AS_HEX(f0f0f0f0) )\
631 AS2( movzx edi, bh )\
632 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
633 AS2( movzx edi, bl )\
634 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
636 AS2( movzx edi, bh )\
637 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
638 AS2( movzx edi, bl )\
639 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
640 AS2( movd ebx, xmm0 )\
642 AS2( and ebx, AS_HEX(f0f0f0f0) )\
643 AS2( movzx edi, ah )\
644 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
645 AS2( movzx edi, al )\
646 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
648 AS2( movzx edi, ah )\
649 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
650 AS2( movzx edi, al )\
651 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
658 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
660 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
663 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
665 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
667 AS2( movdqa xmm0, xmm3 )
668 AS2( pslldq xmm3, 1 )
669 AS2( pxor xmm2, xmm3 )
670 AS2( movdqa xmm1, xmm2 )
671 AS2( pslldq xmm2, 1 )
672 AS2( pxor xmm5, xmm2 )
674 AS2( psrldq xmm0, 15 )
676 AS2( movd edi, xmm0 )
677 #elif USE_MOV_REG32_OR_REG64
678 AS2( mov WORD_REG(di), xmm0 )
680 AS2( movd WORD_REG(di), xmm0 )
682 AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
685 AS2( movdqa xmm0, xmm5 )
686 AS2( pslldq xmm5, 1 )
687 AS2( pxor xmm4, xmm5 )
689 AS2( psrldq xmm1, 15 )
691 AS2( movd edi, xmm1 )
692 #elif USE_MOV_REG32_OR_REG64
693 AS2( mov WORD_REG(di), xmm1 )
695 AS2( movd WORD_REG(di), xmm1 )
697 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
700 AS2( psrldq xmm0, 15 )
702 AS2( movd edi, xmm0 )
703 #elif USE_MOV_REG32_OR_REG64
704 AS2( mov WORD_REG(di), xmm0 )
706 AS2( movd WORD_REG(di), xmm0 )
708 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
710 AS2( movd xmm0, eax )
711 AS2( pxor xmm0, xmm4 )
713 AS2( add WORD_REG(cx), 16 )
714 AS2( sub WORD_REG(dx), 1 )
718 AS2( movdqa [WORD_REG(si)], xmm0 )
720 #
if CRYPTOPP_BOOL_X32
731 :
"c" (data),
"d" (len/16),
"S" (hashBuffer),
"D" (s_reductionTable)
732 :
"memory",
"cc",
"%eax"
733 #
if CRYPTOPP_BOOL_X64
737 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
742 GCM_AuthenticateBlocks_2K_SSE2 ENDP
753 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
755 GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
761 AS2( mov WORD_REG(cx), data )
762 AS2( mov WORD_REG(dx), len )
763 AS2( mov WORD_REG(si), hashBuffer )
764 AS2( shr WORD_REG(dx), 4 )
767 AS2( movdqa xmm0, [WORD_REG(si)] )
770 #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
773 AS2( movdqu xmm1, [WORD_REG(cx)] )
774 AS2( pxor xmm1, xmm0 )
775 AS2( pxor xmm0, xmm0 )
777 #undef SSE2_MUL_32BITS
778 #define SSE2_MUL_32BITS(i) \
779 AS2( movd eax, xmm1 )\
780 AS2( psrldq xmm1, 4 )\
781 AS2( movzx edi, al )\
782 AS2( add WORD_REG(di), WORD_REG(di) )\
783 AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
784 AS2( movzx edi, ah )\
785 AS2( add WORD_REG(di), WORD_REG(di) )\
786 AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
788 AS2( movzx edi, al )\
789 AS2( add WORD_REG(di), WORD_REG(di) )\
790 AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
791 AS2( movzx edi, ah )\
792 AS2( add WORD_REG(di), WORD_REG(di) )\
793 AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
800 AS2( add WORD_REG(cx), 16 )
801 AS2( sub WORD_REG(dx), 1 )
805 AS2( movdqa [WORD_REG(si)], xmm0 )
810 :
"c" (data),
"d" (len/16),
"S" (hashBuffer)
811 :
"memory",
"cc",
"%edi",
"%eax"
813 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
817 GCM_AuthenticateBlocks_64K_SSE2 ENDP
823 #ifndef CRYPTOPP_GENERATE_X64_MASM
829 void GCM_Base::AuthenticateLastHeaderBlock()
831 if (m_bufferedDataLength > 0)
833 memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834 m_bufferedDataLength = 0;
835 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
839 void GCM_Base::AuthenticateLastConfidentialBlock()
841 GCM_Base::AuthenticateLastHeaderBlock();
843 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
846 void GCM_Base::AuthenticateLastFooterBlock(
byte *mac,
size_t macSize)
849 ReverseHashBufferIfNeeded();
850 m_ctr.ProcessData(mac, HashBuffer(), macSize);
855 #endif // Not CRYPTOPP_GENERATE_X64_MASM