1 /*------------------------------------------------------------------------
2 / OCB Version 3 Reference Code (Optimized C)     Last modified 12-JUN-2013
3 /-------------------------------------------------------------------------
4 / Copyright (c) 2013 Ted Krovetz.
5 /
6 / Permission to use, copy, modify, and/or distribute this software for any
7 / purpose with or without fee is hereby granted, provided that the above
8 / copyright notice and this permission notice appear in all copies.
9 /
10 / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 /
18 / Phillip Rogaway holds patents relevant to OCB. See the following for
19 / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
20 /
21 / Special thanks to Keegan McAllister for suggesting several good improvements
22 /
23 / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
24 /------------------------------------------------------------------------- */
25 
26 /* ----------------------------------------------------------------------- */
27 /* Usage notes                                                             */
28 /* ----------------------------------------------------------------------- */
29 
30 /* - When AE_PENDING is passed as the 'final' parameter of any function,
31 /    the length parameters must be a multiple of (BPI*16).
32 /  - When available, SSE or AltiVec registers are used to manipulate data.
33 /    So, when on machines with these facilities, all pointers passed to
34 /    any function should be 16-byte aligned.
35 /  - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
36 /    encrypted in-place), but no other pair of pointers may be equal.
37 /  - This code assumes all x86 processors have SSE2 and SSSE3 instructions
38 /    when compiling under MSVC. If untrue, alter the #define.
39 /  - This code is tested for C99 and recent versions of GCC and MSVC.      */
40 
41 /* ----------------------------------------------------------------------- */
42 /* User configuration options                                              */
43 /* ----------------------------------------------------------------------- */
44 
45 /* Set the AES key length to use and length of authentication tag to produce.
46 /  Setting either to 0 requires the value be set at runtime via ae_init().
47 /  Some optimizations occur for each when set to a fixed value.            */
48 #define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */
49 #define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init         */
50 
51 /* This implementation has built-in support for multiple AES APIs. Set any
52 /  one of the following to non-zero to specify which to use.               */
53 #define USE_OPENSSL_AES 1   /* http://openssl.org                      */
54 #define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c     */
55 #define USE_AES_NI 0        /* Uses compiler's intrinsics              */
56 
57 /* During encryption and decryption, various "L values" are required.
58 /  The L values can be precomputed during initialization (requiring extra
59 /  space in ae_ctx), generated as needed (slightly slowing encryption and
60 /  decryption), or some combination of the two. L_TABLE_SZ specifies how many
61 /  L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
62 /  are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
63 /  2^L_TABLE_SZ blocks need no L values calculated dynamically.            */
64 #define L_TABLE_SZ 16
65 
66 /* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
67 /  will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
68 /  in better performance.                                                  */
69 #define L_TABLE_SZ_IS_ENOUGH 1
70 
71 /* ----------------------------------------------------------------------- */
72 /* Includes and compiler specific definitions                              */
73 /* ----------------------------------------------------------------------- */
74 
75 #include <keymaster/key_blob_utils/ae.h>
76 #include <stdlib.h>
77 #include <string.h>
78 
79 /* Define standard sized integers                                          */
80 #if defined(_MSC_VER) && (_MSC_VER < 1600)
81 typedef unsigned __int8 uint8_t;
82 typedef unsigned __int32 uint32_t;
83 typedef unsigned __int64 uint64_t;
84 typedef __int64 int64_t;
85 #else
86 #include <stdint.h>
87 #endif
88 
89 /* Compiler-specific intrinsics and fixes: bswap64, ntz                    */
90 #if _MSC_VER
91 #define inline __inline                           /* MSVC doesn't recognize "inline" in C */
92 #define restrict __restrict                       /* MSVC doesn't recognize "restrict" in C */
93 #define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64)  /* Assume SSE2  */
94 #define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */
95 #include <intrin.h>
96 #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
97 #define bswap64(x) _byteswap_uint64(x)
ntz(unsigned x)98 static inline unsigned ntz(unsigned x) {
99     _BitScanForward(&x, x);
100     return x;
101 }
102 #elif __GNUC__
103 #define inline __inline__                   /* No "inline" in GCC ansi C mode */
104 #define restrict __restrict__               /* No "restrict" in GCC ansi C mode */
105 #define bswap64(x) __builtin_bswap64(x)     /* Assuming GCC 4.3+ */
106 #define ntz(x) __builtin_ctz((unsigned)(x)) /* Assuming GCC 3.4+ */
107 #else /* Assume some C99 features: stdint.h, inline, restrict */
108 #define bswap32(x)                                                                                 \
109     ((((x)&0xff000000u) >> 24) | (((x)&0x00ff0000u) >> 8) | (((x)&0x0000ff00u) << 8) |             \
110      (((x)&0x000000ffu) << 24))
111 
bswap64(uint64_t x)112 static inline uint64_t bswap64(uint64_t x) {
113     union {
114         uint64_t u64;
115         uint32_t u32[2];
116     } in, out;
117     in.u64 = x;
118     out.u32[0] = bswap32(in.u32[1]);
119     out.u32[1] = bswap32(in.u32[0]);
120     return out.u64;
121 }
122 
123 #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */
ntz(unsigned x)124 static inline unsigned ntz(unsigned x) {
125     static const unsigned char tz_table[] = {
126         0, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2,
127         3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2,
128         4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2,
129         3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2,
130         5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2};
131     return tz_table[x / 4];
132 }
133 #else                                           /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
ntz(unsigned x)134 static inline unsigned ntz(unsigned x) {
135     static const unsigned char tz_table[32] = {0,  1,  28, 2,  29, 14, 24, 3,  30, 22, 20,
136                                                15, 25, 17, 4,  8,  31, 27, 13, 23, 21, 19,
137                                                16, 7,  26, 12, 18, 6,  11, 5,  10, 9};
138     return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
139 }
140 #endif
141 #endif
142 
143 /* ----------------------------------------------------------------------- */
144 /* Define blocks and operations -- Patch if incorrect on your compiler.    */
145 /* ----------------------------------------------------------------------- */
146 
147 #if __SSE2__ && !KEYMASTER_CLANG_TEST_BUILD
148 #include <xmmintrin.h> /* SSE instructions and _mm_malloc */
149 #include <emmintrin.h> /* SSE2 instructions               */
150 typedef __m128i block;
151 #define xor_block(x, y) _mm_xor_si128(x, y)
152 #define zero_block() _mm_setzero_si128()
153 #define unequal_blocks(x, y) (_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) != 0xffff)
154 #if __SSSE3__ || USE_AES_NI
155 #include <tmmintrin.h> /* SSSE3 instructions              */
156 #define swap_if_le(b)                                                                              \
157     _mm_shuffle_epi8(b, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
158 #else
swap_if_le(block b)159 static inline block swap_if_le(block b) {
160     block a = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 1, 2, 3));
161     a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
162     a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
163     return _mm_xor_si128(_mm_srli_epi16(a, 8), _mm_slli_epi16(a, 8));
164 }
165 #endif
gen_offset(uint64_t KtopStr[3],unsigned bot)166 static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
167     block hi = _mm_load_si128((__m128i*)(KtopStr + 0));  /* hi = B A */
168     block lo = _mm_loadu_si128((__m128i*)(KtopStr + 1)); /* lo = C B */
169     __m128i lshift = _mm_cvtsi32_si128(bot);
170     __m128i rshift = _mm_cvtsi32_si128(64 - bot);
171     lo = _mm_xor_si128(_mm_sll_epi64(hi, lshift), _mm_srl_epi64(lo, rshift));
172 #if __SSSE3__ || USE_AES_NI
173     return _mm_shuffle_epi8(lo, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7));
174 #else
175     return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1, 0, 3, 2)));
176 #endif
177 }
double_block(block bl)178 static inline block double_block(block bl) {
179     const __m128i mask = _mm_set_epi32(135, 1, 1, 1);
180     __m128i tmp = _mm_srai_epi32(bl, 31);
181     tmp = _mm_and_si128(tmp, mask);
182     tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 0, 3));
183     bl = _mm_slli_epi32(bl, 1);
184     return _mm_xor_si128(bl, tmp);
185 }
186 #elif __ALTIVEC__
187 #include <altivec.h>
188 typedef vector unsigned block;
189 #define xor_block(x, y) vec_xor(x, y)
190 #define zero_block() vec_splat_u32(0)
191 #define unequal_blocks(x, y) vec_any_ne(x, y)
192 #define swap_if_le(b) (b)
193 #if __PPC64__
gen_offset(uint64_t KtopStr[3],unsigned bot)194 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
195     union {
196         uint64_t u64[2];
197         block bl;
198     } rval;
199     rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
200     rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
201     return rval.bl;
202 }
203 #else
204 /* Special handling: Shifts are mod 32, and no 64-bit types */
gen_offset(uint64_t KtopStr[3],unsigned bot)205 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
206     const vector unsigned k32 = {32, 32, 32, 32};
207     vector unsigned hi = *(vector unsigned*)(KtopStr + 0);
208     vector unsigned lo = *(vector unsigned*)(KtopStr + 2);
209     vector unsigned bot_vec;
210     if (bot < 32) {
211         lo = vec_sld(hi, lo, 4);
212     } else {
213         vector unsigned t = vec_sld(hi, lo, 4);
214         lo = vec_sld(hi, lo, 8);
215         hi = t;
216         bot = bot - 32;
217     }
218     if (bot == 0)
219         return hi;
220     *(unsigned*)&bot_vec = bot;
221     vector unsigned lshift = vec_splat(bot_vec, 0);
222     vector unsigned rshift = vec_sub(k32, lshift);
223     hi = vec_sl(hi, lshift);
224     lo = vec_sr(lo, rshift);
225     return vec_xor(hi, lo);
226 }
227 #endif
double_block(block b)228 static inline block double_block(block b) {
229     const vector unsigned char mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
230     const vector unsigned char perm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0};
231     const vector unsigned char shift7 = vec_splat_u8(7);
232     const vector unsigned char shift1 = vec_splat_u8(1);
233     vector unsigned char c = (vector unsigned char)b;
234     vector unsigned char t = vec_sra(c, shift7);
235     t = vec_and(t, mask);
236     t = vec_perm(t, t, perm);
237     c = vec_sl(c, shift1);
238     return (block)vec_xor(c, t);
239 }
240 #elif __ARM_NEON__
241 #include <arm_neon.h>
242 typedef int8x16_t block __attribute__ ((aligned (16))); /* Yay! Endian-neutral reads! */
243 #define xor_block(x, y) veorq_s8(x, y)
244 #define zero_block() vdupq_n_s8(0)
unequal_blocks(block a,block b)245 static inline int unequal_blocks(block a, block b) {
246     int64x2_t t = veorq_s64((int64x2_t)a, (int64x2_t)b);
247     return (vgetq_lane_s64(t, 0) | vgetq_lane_s64(t, 1)) != 0;
248 }
249 #define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */
250 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)251 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
252     const union {
253         unsigned x;
254         unsigned char endian;
255     } little = {1};
256     const int64x2_t k64 = {-64, -64};
257     /* Copy hi and lo into local variables to ensure proper alignment */
258     uint64x2_t hi = vld1q_u64(KtopStr + 0); /* hi = A B */
259     uint64x2_t lo = vld1q_u64(KtopStr + 1); /* lo = B C */
260     int64x2_t ls = vdupq_n_s64(bot);
261     int64x2_t rs = vqaddq_s64(k64, ls);
262     block rval = (block)veorq_u64(vshlq_u64(hi, ls), vshlq_u64(lo, rs));
263     if (little.endian)
264         rval = vrev64q_s8(rval);
265     return rval;
266 }
double_block(block b)267 static inline block double_block(block b) {
268     const block mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
269     block tmp = vshrq_n_s8(b, 7);
270     tmp = vandq_s8(tmp, mask);
271     tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */
272     b = vshlq_n_s8(b, 1);
273     return veorq_s8(tmp, b);
274 }
275 #else
276 typedef struct { uint64_t l, r; } block;
xor_block(block x,block y)277 static inline block xor_block(block x, block y) {
278     x.l ^= y.l;
279     x.r ^= y.r;
280     return x;
281 }
zero_block(void)282 static inline block zero_block(void) {
283     const block t = {0, 0};
284     return t;
285 }
286 #define unequal_blocks(x, y) ((((x).l ^ (y).l) | ((x).r ^ (y).r)) != 0)
swap_if_le(block b)287 static inline block swap_if_le(block b) {
288     const union {
289         unsigned x;
290         unsigned char endian;
291     } little = {1};
292     if (little.endian) {
293         block r;
294         r.l = bswap64(b.l);
295         r.r = bswap64(b.r);
296         return r;
297     } else
298         return b;
299 }
300 
301 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)302 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
303     block rval;
304     if (bot != 0) {
305         rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
306         rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
307     } else {
308         rval.l = KtopStr[0];
309         rval.r = KtopStr[1];
310     }
311     return swap_if_le(rval);
312 }
313 
314 #if __GNUC__ && __arm__
double_block(block b)315 static inline block double_block(block b) {
316     __asm__("adds %1,%1,%1\n\t"
317             "adcs %H1,%H1,%H1\n\t"
318             "adcs %0,%0,%0\n\t"
319             "adcs %H0,%H0,%H0\n\t"
320             "it cs\n\t"
321             "eorcs %1,%1,#135"
322             : "+r"(b.l), "+r"(b.r)
323             :
324             : "cc");
325     return b;
326 }
327 #else
double_block(block b)328 static inline block double_block(block b) {
329     uint64_t t = (uint64_t)((int64_t)b.l >> 63);
330     b.l = (b.l + b.l) ^ (b.r >> 63);
331     b.r = (b.r + b.r) ^ (t & 135);
332     return b;
333 }
334 #endif
335 
336 #endif
337 
338 #ifndef __has_attribute
339 #define __has_attribute(x) 0
340 #endif
341 
342 #if __has_attribute(fallthrough)
343 #define __fallthrough __attribute__((__fallthrough__));
344 #else
345 #define __fallthrough
346 #endif
347 
348 /* ----------------------------------------------------------------------- */
349 /* AES - Code uses OpenSSL API. Other implementations get mapped to it.    */
350 /* ----------------------------------------------------------------------- */
351 
352 /*---------------*/
353 #if USE_OPENSSL_AES
354 /*---------------*/
355 
356 #include <openssl/aes.h> /* http://openssl.org/ */
357 
358 /* How to ECB encrypt an array of blocks, in place                         */
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)359 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
360     while (nblks) {
361         --nblks;
362         AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
363     }
364 }
365 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)366 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
367     while (nblks) {
368         --nblks;
369         AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
370     }
371 }
372 
373 #define BPI 4 /* Number of blocks in buffer per ECB call */
374 
375 /*-------------------*/
376 #elif USE_REFERENCE_AES
377 /*-------------------*/
378 
379 #include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */
380 #if (OCB_KEY_LEN == 0)
381 typedef struct {
382     uint32_t rd_key[60];
383     int rounds;
384 } AES_KEY;
385 #define ROUNDS(ctx) ((ctx)->rounds)
386 #define AES_set_encrypt_key(x, y, z)                                                               \
387     do {                                                                                           \
388         rijndaelKeySetupEnc((z)->rd_key, x, y);                                                    \
389         (z)->rounds = y / 32 + 6;                                                                  \
390     } while (0)
391 #define AES_set_decrypt_key(x, y, z)                                                               \
392     do {                                                                                           \
393         rijndaelKeySetupDec((z)->rd_key, x, y);                                                    \
394         (z)->rounds = y / 32 + 6;                                                                  \
395     } while (0)
396 #else
397 typedef struct { uint32_t rd_key[OCB_KEY_LEN + 28]; } AES_KEY;
398 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
399 #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
400 #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
401 #endif
402 #define AES_encrypt(x, y, z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
403 #define AES_decrypt(x, y, z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
404 
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)405 static void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
406     while (nblks) {
407         --nblks;
408         AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
409     }
410 }
411 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)412 void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
413     while (nblks) {
414         --nblks;
415         AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
416     }
417 }
418 
419 #define BPI 4 /* Number of blocks in buffer per ECB call */
420 
421 /*----------*/
422 #elif USE_AES_NI
423 /*----------*/
424 
425 #include <wmmintrin.h>
426 
427 #if (OCB_KEY_LEN == 0)
428 typedef struct {
429     __m128i rd_key[15];
430     int rounds;
431 } AES_KEY;
432 #define ROUNDS(ctx) ((ctx)->rounds)
433 #else
434 typedef struct { __m128i rd_key[7 + OCB_KEY_LEN / 4]; } AES_KEY;
435 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
436 #endif
437 
438 #define EXPAND_ASSIST(v1, v2, v3, v4, shuff_const, aes_const)                                      \
439     v2 = _mm_aeskeygenassist_si128(v4, aes_const);                                                 \
440     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 16));         \
441     v1 = _mm_xor_si128(v1, v3);                                                                    \
442     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 140));        \
443     v1 = _mm_xor_si128(v1, v3);                                                                    \
444     v2 = _mm_shuffle_epi32(v2, shuff_const);                                                       \
445     v1 = _mm_xor_si128(v1, v2)
446 
447 #define EXPAND192_STEP(idx, aes_const)                                                             \
448     EXPAND_ASSIST(x0, x1, x2, x3, 85, aes_const);                                                  \
449     x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
450     x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
451     kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(x0), 68));   \
452     kp[idx + 1] =                                                                                  \
453         _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), _mm_castsi128_ps(x3), 78));          \
454     EXPAND_ASSIST(x0, x1, x2, x3, 85, (aes_const * 2));                                            \
455     x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
456     x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
457     kp[idx + 2] = x0;                                                                              \
458     tmp = x3
459 
AES_128_Key_Expansion(const unsigned char * userkey,void * key)460 static void AES_128_Key_Expansion(const unsigned char* userkey, void* key) {
461     __m128i x0, x1, x2;
462     __m128i* kp = (__m128i*)key;
463     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
464     x2 = _mm_setzero_si128();
465     EXPAND_ASSIST(x0, x1, x2, x0, 255, 1);
466     kp[1] = x0;
467     EXPAND_ASSIST(x0, x1, x2, x0, 255, 2);
468     kp[2] = x0;
469     EXPAND_ASSIST(x0, x1, x2, x0, 255, 4);
470     kp[3] = x0;
471     EXPAND_ASSIST(x0, x1, x2, x0, 255, 8);
472     kp[4] = x0;
473     EXPAND_ASSIST(x0, x1, x2, x0, 255, 16);
474     kp[5] = x0;
475     EXPAND_ASSIST(x0, x1, x2, x0, 255, 32);
476     kp[6] = x0;
477     EXPAND_ASSIST(x0, x1, x2, x0, 255, 64);
478     kp[7] = x0;
479     EXPAND_ASSIST(x0, x1, x2, x0, 255, 128);
480     kp[8] = x0;
481     EXPAND_ASSIST(x0, x1, x2, x0, 255, 27);
482     kp[9] = x0;
483     EXPAND_ASSIST(x0, x1, x2, x0, 255, 54);
484     kp[10] = x0;
485 }
486 
AES_192_Key_Expansion(const unsigned char * userkey,void * key)487 static void AES_192_Key_Expansion(const unsigned char* userkey, void* key) {
488     __m128i x0, x1, x2, x3, tmp, *kp = (__m128i*)key;
489     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
490     tmp = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
491     x2 = _mm_setzero_si128();
492     EXPAND192_STEP(1, 1);
493     EXPAND192_STEP(4, 4);
494     EXPAND192_STEP(7, 16);
495     EXPAND192_STEP(10, 64);
496 }
497 
AES_256_Key_Expansion(const unsigned char * userkey,void * key)498 static void AES_256_Key_Expansion(const unsigned char* userkey, void* key) {
499     __m128i x0, x1, x2, x3, *kp = (__m128i*)key;
500     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
501     kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
502     x2 = _mm_setzero_si128();
503     EXPAND_ASSIST(x0, x1, x2, x3, 255, 1);
504     kp[2] = x0;
505     EXPAND_ASSIST(x3, x1, x2, x0, 170, 1);
506     kp[3] = x3;
507     EXPAND_ASSIST(x0, x1, x2, x3, 255, 2);
508     kp[4] = x0;
509     EXPAND_ASSIST(x3, x1, x2, x0, 170, 2);
510     kp[5] = x3;
511     EXPAND_ASSIST(x0, x1, x2, x3, 255, 4);
512     kp[6] = x0;
513     EXPAND_ASSIST(x3, x1, x2, x0, 170, 4);
514     kp[7] = x3;
515     EXPAND_ASSIST(x0, x1, x2, x3, 255, 8);
516     kp[8] = x0;
517     EXPAND_ASSIST(x3, x1, x2, x0, 170, 8);
518     kp[9] = x3;
519     EXPAND_ASSIST(x0, x1, x2, x3, 255, 16);
520     kp[10] = x0;
521     EXPAND_ASSIST(x3, x1, x2, x0, 170, 16);
522     kp[11] = x3;
523     EXPAND_ASSIST(x0, x1, x2, x3, 255, 32);
524     kp[12] = x0;
525     EXPAND_ASSIST(x3, x1, x2, x0, 170, 32);
526     kp[13] = x3;
527     EXPAND_ASSIST(x0, x1, x2, x3, 255, 64);
528     kp[14] = x0;
529 }
530 
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)531 static int AES_set_encrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
532     if (bits == 128) {
533         AES_128_Key_Expansion(userKey, key);
534     } else if (bits == 192) {
535         AES_192_Key_Expansion(userKey, key);
536     } else if (bits == 256) {
537         AES_256_Key_Expansion(userKey, key);
538     }
539 #if (OCB_KEY_LEN == 0)
540     key->rounds = 6 + bits / 32;
541 #endif
542     return 0;
543 }
544 
AES_set_decrypt_key_fast(AES_KEY * dkey,const AES_KEY * ekey)545 static void AES_set_decrypt_key_fast(AES_KEY* dkey, const AES_KEY* ekey) {
546     int j = 0;
547     int i = ROUNDS(ekey);
548 #if (OCB_KEY_LEN == 0)
549     dkey->rounds = i;
550 #endif
551     dkey->rd_key[i--] = ekey->rd_key[j++];
552     while (i)
553         dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
554     dkey->rd_key[i] = ekey->rd_key[j];
555 }
556 
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)557 static int AES_set_decrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
558     AES_KEY temp_key;
559     AES_set_encrypt_key(userKey, bits, &temp_key);
560     AES_set_decrypt_key_fast(key, &temp_key);
561     return 0;
562 }
563 
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)564 static inline void AES_encrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
565     int j, rnds = ROUNDS(key);
566     const __m128i* sched = ((__m128i*)(key->rd_key));
567     __m128i tmp = _mm_load_si128((__m128i*)in);
568     tmp = _mm_xor_si128(tmp, sched[0]);
569     for (j = 1; j < rnds; j++)
570         tmp = _mm_aesenc_si128(tmp, sched[j]);
571     tmp = _mm_aesenclast_si128(tmp, sched[j]);
572     _mm_store_si128((__m128i*)out, tmp);
573 }
574 
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)575 static inline void AES_decrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
576     int j, rnds = ROUNDS(key);
577     const __m128i* sched = ((__m128i*)(key->rd_key));
578     __m128i tmp = _mm_load_si128((__m128i*)in);
579     tmp = _mm_xor_si128(tmp, sched[0]);
580     for (j = 1; j < rnds; j++)
581         tmp = _mm_aesdec_si128(tmp, sched[j]);
582     tmp = _mm_aesdeclast_si128(tmp, sched[j]);
583     _mm_store_si128((__m128i*)out, tmp);
584 }
585 
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)586 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
587     unsigned i, j, rnds = ROUNDS(key);
588     const __m128i* sched = ((__m128i*)(key->rd_key));
589     for (i = 0; i < nblks; ++i)
590         blks[i] = _mm_xor_si128(blks[i], sched[0]);
591     for (j = 1; j < rnds; ++j)
592         for (i = 0; i < nblks; ++i)
593             blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
594     for (i = 0; i < nblks; ++i)
595         blks[i] = _mm_aesenclast_si128(blks[i], sched[j]);
596 }
597 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)598 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
599     unsigned i, j, rnds = ROUNDS(key);
600     const __m128i* sched = ((__m128i*)(key->rd_key));
601     for (i = 0; i < nblks; ++i)
602         blks[i] = _mm_xor_si128(blks[i], sched[0]);
603     for (j = 1; j < rnds; ++j)
604         for (i = 0; i < nblks; ++i)
605             blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
606     for (i = 0; i < nblks; ++i)
607         blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]);
608 }
609 
610 #define BPI 8 /* Number of blocks in buffer per ECB call   */
611 /* Set to 4 for Westmere, 8 for Sandy Bridge */
612 
613 #endif
614 
615 /* ----------------------------------------------------------------------- */
616 /* Define OCB context structure.                                           */
617 /* ----------------------------------------------------------------------- */
618 
619 /*------------------------------------------------------------------------
620 / Each item in the OCB context is stored either "memory correct" or
621 / "register correct". On big-endian machines, this is identical. On
622 / little-endian machines, one must choose whether the byte-string
623 / is in the correct order when it resides in memory or in registers.
624 / It must be register correct whenever it is to be manipulated
625 / arithmetically, but must be memory correct whenever it interacts
626 / with the plaintext or ciphertext.
627 /------------------------------------------------------------------------- */
628 
629 struct _ae_ctx {
630     block offset;        /* Memory correct               */
631     block checksum;      /* Memory correct               */
632     block Lstar;         /* Memory correct               */
633     block Ldollar;       /* Memory correct               */
634     block L[L_TABLE_SZ]; /* Memory correct               */
635     block ad_checksum;   /* Memory correct               */
636     block ad_offset;     /* Memory correct               */
637     block cached_Top;    /* Memory correct               */
638     uint64_t KtopStr[3]; /* Register correct, each item  */
639     uint32_t ad_blocks_processed;
640     uint32_t blocks_processed;
641     AES_KEY decrypt_key;
642     AES_KEY encrypt_key;
643 #if (OCB_TAG_LEN == 0)
644     unsigned tag_len;
645 #endif
646 };
647 
648 /* ----------------------------------------------------------------------- */
649 /* L table lookup (or on-the-fly generation)                               */
650 /* ----------------------------------------------------------------------- */
651 
652 #if L_TABLE_SZ_IS_ENOUGH
653 #define getL(_ctx, _tz) ((_ctx)->L[_tz])
654 #else
getL(const ae_ctx * ctx,unsigned tz)655 static block getL(const ae_ctx* ctx, unsigned tz) {
656     if (tz < L_TABLE_SZ)
657         return ctx->L[tz];
658     else {
659         unsigned i;
660         /* Bring L[MAX] into registers, make it register correct */
661         block rval = swap_if_le(ctx->L[L_TABLE_SZ - 1]);
662         rval = double_block(rval);
663         for (i = L_TABLE_SZ; i < tz; i++)
664             rval = double_block(rval);
665         return swap_if_le(rval); /* To memory correct */
666     }
667 }
668 #endif
669 
670 /* ----------------------------------------------------------------------- */
671 /* Public functions                                                        */
672 /* ----------------------------------------------------------------------- */
673 
674 /* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
675    on 16-byte alignments. (I believe all major 64-bit systems do already.) */
676 
ae_allocate(void * misc)677 ae_ctx* ae_allocate(void* misc) {
678     void* p;
679     (void)misc; /* misc unused in this implementation */
680 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
681     p = _mm_malloc(sizeof(ae_ctx), 16);
682 #elif ((__ALTIVEC__ && !__PPC64__) || __ARM_NEON__)
683     if (posix_memalign(&p, 16, sizeof(ae_ctx)) != 0) p = NULL;
684 #else
685     p = malloc(sizeof(ae_ctx));
686 #endif
687     return (ae_ctx*)p;
688 }
689 
ae_free(ae_ctx * ctx)690 void ae_free(ae_ctx* ctx) {
691 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
692     _mm_free(ctx);
693 #else
694     free(ctx);
695 #endif
696 }
697 
698 /* ----------------------------------------------------------------------- */
699 
ae_clear(ae_ctx * ctx)700 int ae_clear(ae_ctx* ctx) /* Zero ae_ctx and undo initialization          */
701 {
702     memset(ctx, 0, sizeof(ae_ctx));
703     return AE_SUCCESS;
704 }
705 
ae_ctx_sizeof(void)706 int ae_ctx_sizeof(void) {
707     return (int)sizeof(ae_ctx);
708 }
709 
710 /* ----------------------------------------------------------------------- */
711 
ae_init(ae_ctx * ctx,const void * key,int key_len,int nonce_len,int tag_len)712 int ae_init(ae_ctx* ctx, const void* key, int key_len, int nonce_len, int tag_len) {
713     unsigned i;
714     block tmp_blk;
715 
716     if (nonce_len != 12)
717         return AE_NOT_SUPPORTED;
718 
719 /* Initialize encryption & decryption keys */
720 #if (OCB_KEY_LEN > 0)
721     key_len = OCB_KEY_LEN;
722 #endif
723     AES_set_encrypt_key((unsigned char*)key, key_len * 8, &ctx->encrypt_key);
724 #if USE_AES_NI
725     AES_set_decrypt_key_fast(&ctx->decrypt_key, &ctx->encrypt_key);
726 #else
727     AES_set_decrypt_key((unsigned char*)key, (int)(key_len * 8), &ctx->decrypt_key);
728 #endif
729 
730     /* Zero things that need zeroing */
731     ctx->cached_Top = ctx->ad_checksum = zero_block();
732     ctx->ad_blocks_processed = 0;
733 
734     /* Compute key-dependent values */
735     AES_encrypt((unsigned char*)&ctx->cached_Top, (unsigned char*)&ctx->Lstar, &ctx->encrypt_key);
736     tmp_blk = swap_if_le(ctx->Lstar);
737     tmp_blk = double_block(tmp_blk);
738     ctx->Ldollar = swap_if_le(tmp_blk);
739     tmp_blk = double_block(tmp_blk);
740     ctx->L[0] = swap_if_le(tmp_blk);
741     for (i = 1; i < L_TABLE_SZ; i++) {
742         tmp_blk = double_block(tmp_blk);
743         ctx->L[i] = swap_if_le(tmp_blk);
744     }
745 
746 #if (OCB_TAG_LEN == 0)
747     ctx->tag_len = tag_len;
748 #else
749     (void)tag_len; /* Suppress var not used error */
750 #endif
751 
752     return AE_SUCCESS;
753 }
754 
755 /* ----------------------------------------------------------------------- */
756 
gen_offset_from_nonce(ae_ctx * ctx,const void * nonce)757 static block gen_offset_from_nonce(ae_ctx* ctx, const void* nonce) {
758     const union {
759         unsigned x;
760         unsigned char endian;
761     } little = {1};
762     union {
763         uint32_t u32[4];
764         uint8_t u8[16];
765         block bl;
766     } tmp;
767     unsigned idx;
768 
769 /* Replace cached nonce Top if needed */
770 #if (OCB_TAG_LEN > 0)
771     if (little.endian)
772         tmp.u32[0] = 0x01000000 + ((OCB_TAG_LEN * 8 % 128) << 1);
773     else
774         tmp.u32[0] = 0x00000001 + ((OCB_TAG_LEN * 8 % 128) << 25);
775 #else
776     if (little.endian)
777         tmp.u32[0] = 0x01000000 + ((ctx->tag_len * 8 % 128) << 1);
778     else
779         tmp.u32[0] = 0x00000001 + ((ctx->tag_len * 8 % 128) << 25);
780 #endif
781     tmp.u32[1] = ((uint32_t*)nonce)[0];
782     tmp.u32[2] = ((uint32_t*)nonce)[1];
783     tmp.u32[3] = ((uint32_t*)nonce)[2];
784     idx = (unsigned)(tmp.u8[15] & 0x3f);           /* Get low 6 bits of nonce  */
785     tmp.u8[15] = tmp.u8[15] & 0xc0;                /* Zero low 6 bits of nonce */
786     if (unequal_blocks(tmp.bl, ctx->cached_Top)) { /* Cached?       */
787         ctx->cached_Top = tmp.bl;                  /* Update cache, KtopStr    */
788         AES_encrypt(tmp.u8, (unsigned char*)&ctx->KtopStr, &ctx->encrypt_key);
789         if (little.endian) { /* Make Register Correct    */
790             ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
791             ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
792         }
793         ctx->KtopStr[2] = ctx->KtopStr[0] ^ (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
794     }
795     return gen_offset(ctx->KtopStr, idx);
796 }
797 
process_ad(ae_ctx * ctx,const void * ad,int ad_len,int final)798 static void process_ad(ae_ctx* ctx, const void* ad, int ad_len, int final) {
799     union {
800         uint32_t u32[4];
801         uint8_t u8[16];
802         block bl;
803     } tmp;
804     block ad_offset, ad_checksum;
805     const block* adp = (block*)ad;
806     unsigned i, k, tz, remaining;
807 
808     ad_offset = ctx->ad_offset;
809     ad_checksum = ctx->ad_checksum;
810     i = ad_len / (BPI * 16);
811     if (i) {
812         unsigned ad_block_num = ctx->ad_blocks_processed;
813         do {
814             block ta[BPI], oa[BPI];
815             ad_block_num += BPI;
816             tz = ntz(ad_block_num);
817             oa[0] = xor_block(ad_offset, ctx->L[0]);
818             ta[0] = xor_block(oa[0], adp[0]);
819             oa[1] = xor_block(oa[0], ctx->L[1]);
820             ta[1] = xor_block(oa[1], adp[1]);
821             oa[2] = xor_block(ad_offset, ctx->L[1]);
822             ta[2] = xor_block(oa[2], adp[2]);
823 #if BPI == 4
824             ad_offset = xor_block(oa[2], getL(ctx, tz));
825             ta[3] = xor_block(ad_offset, adp[3]);
826 #elif BPI == 8
827             oa[3] = xor_block(oa[2], ctx->L[2]);
828             ta[3] = xor_block(oa[3], adp[3]);
829             oa[4] = xor_block(oa[1], ctx->L[2]);
830             ta[4] = xor_block(oa[4], adp[4]);
831             oa[5] = xor_block(oa[0], ctx->L[2]);
832             ta[5] = xor_block(oa[5], adp[5]);
833             oa[6] = xor_block(ad_offset, ctx->L[2]);
834             ta[6] = xor_block(oa[6], adp[6]);
835             ad_offset = xor_block(oa[6], getL(ctx, tz));
836             ta[7] = xor_block(ad_offset, adp[7]);
837 #endif
838             AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
839             ad_checksum = xor_block(ad_checksum, ta[0]);
840             ad_checksum = xor_block(ad_checksum, ta[1]);
841             ad_checksum = xor_block(ad_checksum, ta[2]);
842             ad_checksum = xor_block(ad_checksum, ta[3]);
843 #if (BPI == 8)
844             ad_checksum = xor_block(ad_checksum, ta[4]);
845             ad_checksum = xor_block(ad_checksum, ta[5]);
846             ad_checksum = xor_block(ad_checksum, ta[6]);
847             ad_checksum = xor_block(ad_checksum, ta[7]);
848 #endif
849             adp += BPI;
850         } while (--i);
851         ctx->ad_blocks_processed = ad_block_num;
852         ctx->ad_offset = ad_offset;
853         ctx->ad_checksum = ad_checksum;
854     }
855 
856     if (final) {
857         block ta[BPI];
858 
859         /* Process remaining associated data, compute its tag contribution */
860         remaining = ((unsigned)ad_len) % (BPI * 16);
861         if (remaining) {
862             k = 0;
863 #if (BPI == 8)
864             if (remaining >= 64) {
865                 tmp.bl = xor_block(ad_offset, ctx->L[0]);
866                 ta[0] = xor_block(tmp.bl, adp[0]);
867                 tmp.bl = xor_block(tmp.bl, ctx->L[1]);
868                 ta[1] = xor_block(tmp.bl, adp[1]);
869                 ad_offset = xor_block(ad_offset, ctx->L[1]);
870                 ta[2] = xor_block(ad_offset, adp[2]);
871                 ad_offset = xor_block(ad_offset, ctx->L[2]);
872                 ta[3] = xor_block(ad_offset, adp[3]);
873                 remaining -= 64;
874                 k = 4;
875             }
876 #endif
877             if (remaining >= 32) {
878                 ad_offset = xor_block(ad_offset, ctx->L[0]);
879                 ta[k] = xor_block(ad_offset, adp[k]);
880                 ad_offset = xor_block(ad_offset, getL(ctx, ntz(k + 2)));
881                 ta[k + 1] = xor_block(ad_offset, adp[k + 1]);
882                 remaining -= 32;
883                 k += 2;
884             }
885             if (remaining >= 16) {
886                 ad_offset = xor_block(ad_offset, ctx->L[0]);
887                 ta[k] = xor_block(ad_offset, adp[k]);
888                 remaining = remaining - 16;
889                 ++k;
890             }
891             if (remaining) {
892                 ad_offset = xor_block(ad_offset, ctx->Lstar);
893                 tmp.bl = zero_block();
894                 memcpy(tmp.u8, adp + k, remaining);
895                 tmp.u8[remaining] = (unsigned char)0x80u;
896                 ta[k] = xor_block(ad_offset, tmp.bl);
897                 ++k;
898             }
899             AES_ecb_encrypt_blks(ta, k, &ctx->encrypt_key);
900             switch (k) {
901 #if (BPI == 8)
902             case 8:
903                 ad_checksum = xor_block(ad_checksum, ta[7]);
904                 __fallthrough;
905             case 7:
906                 ad_checksum = xor_block(ad_checksum, ta[6]);
907                 __fallthrough;
908             case 6:
909                 ad_checksum = xor_block(ad_checksum, ta[5]);
910                 __fallthrough;
911             case 5:
912                 ad_checksum = xor_block(ad_checksum, ta[4]);
913                 __fallthrough;
914 #endif
915             case 4:
916                 ad_checksum = xor_block(ad_checksum, ta[3]);
917                 __fallthrough;
918             case 3:
919                 ad_checksum = xor_block(ad_checksum, ta[2]);
920                 __fallthrough;
921             case 2:
922                 ad_checksum = xor_block(ad_checksum, ta[1]);
923                 __fallthrough;
924             case 1:
925                 ad_checksum = xor_block(ad_checksum, ta[0]);
926             }
927             ctx->ad_checksum = ad_checksum;
928         }
929     }
930 }
931 
932 /* ----------------------------------------------------------------------- */
933 
ae_encrypt(ae_ctx * ctx,const void * nonce,const void * pt,int pt_len,const void * ad,int ad_len,void * ct,void * tag,int final)934 int ae_encrypt(ae_ctx* ctx, const void* nonce, const void* pt, int pt_len, const void* ad,
935                int ad_len, void* ct, void* tag, int final) {
936     union {
937         uint32_t u32[4];
938         uint8_t u8[16];
939         block bl;
940     } tmp;
941     block offset, checksum;
942     unsigned i, k;
943     block* ctp = (block*)ct;
944     const block* ptp = (block*)pt;
945 
946     /* Non-null nonce means start of new message, init per-message values */
947     if (nonce) {
948         ctx->offset = gen_offset_from_nonce(ctx, nonce);
949         ctx->ad_offset = ctx->checksum = zero_block();
950         ctx->ad_blocks_processed = ctx->blocks_processed = 0;
951         if (ad_len >= 0)
952             ctx->ad_checksum = zero_block();
953     }
954 
955     /* Process associated data */
956     if (ad_len > 0)
957         process_ad(ctx, ad, ad_len, final);
958 
959     /* Encrypt plaintext data BPI blocks at a time */
960     offset = ctx->offset;
961     checksum = ctx->checksum;
962     i = pt_len / (BPI * 16);
963     if (i) {
964         block oa[BPI];
965         unsigned block_num = ctx->blocks_processed;
966         oa[BPI - 1] = offset;
967         do {
968             block ta[BPI];
969             block_num += BPI;
970             oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
971             ta[0] = xor_block(oa[0], ptp[0]);
972             checksum = xor_block(checksum, ptp[0]);
973             oa[1] = xor_block(oa[0], ctx->L[1]);
974             ta[1] = xor_block(oa[1], ptp[1]);
975             checksum = xor_block(checksum, ptp[1]);
976             oa[2] = xor_block(oa[1], ctx->L[0]);
977             ta[2] = xor_block(oa[2], ptp[2]);
978             checksum = xor_block(checksum, ptp[2]);
979 #if BPI == 4
980             oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
981             ta[3] = xor_block(oa[3], ptp[3]);
982             checksum = xor_block(checksum, ptp[3]);
983 #elif BPI == 8
984             oa[3] = xor_block(oa[2], ctx->L[2]);
985             ta[3] = xor_block(oa[3], ptp[3]);
986             checksum = xor_block(checksum, ptp[3]);
987             oa[4] = xor_block(oa[1], ctx->L[2]);
988             ta[4] = xor_block(oa[4], ptp[4]);
989             checksum = xor_block(checksum, ptp[4]);
990             oa[5] = xor_block(oa[0], ctx->L[2]);
991             ta[5] = xor_block(oa[5], ptp[5]);
992             checksum = xor_block(checksum, ptp[5]);
993             oa[6] = xor_block(oa[7], ctx->L[2]);
994             ta[6] = xor_block(oa[6], ptp[6]);
995             checksum = xor_block(checksum, ptp[6]);
996             oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
997             ta[7] = xor_block(oa[7], ptp[7]);
998             checksum = xor_block(checksum, ptp[7]);
999 #endif
1000             AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
1001             ctp[0] = xor_block(ta[0], oa[0]);
1002             ctp[1] = xor_block(ta[1], oa[1]);
1003             ctp[2] = xor_block(ta[2], oa[2]);
1004             ctp[3] = xor_block(ta[3], oa[3]);
1005 #if (BPI == 8)
1006             ctp[4] = xor_block(ta[4], oa[4]);
1007             ctp[5] = xor_block(ta[5], oa[5]);
1008             ctp[6] = xor_block(ta[6], oa[6]);
1009             ctp[7] = xor_block(ta[7], oa[7]);
1010 #endif
1011             ptp += BPI;
1012             ctp += BPI;
1013         } while (--i);
1014         ctx->offset = offset = oa[BPI - 1];
1015         ctx->blocks_processed = block_num;
1016         ctx->checksum = checksum;
1017     }
1018 
1019     if (final) {
1020         block ta[BPI + 1], oa[BPI];
1021 
1022         /* Process remaining plaintext and compute its tag contribution    */
1023         unsigned remaining = ((unsigned)pt_len) % (BPI * 16);
1024         k = 0; /* How many blocks in ta[] need ECBing */
1025         if (remaining) {
1026 #if (BPI == 8)
1027             if (remaining >= 64) {
1028                 oa[0] = xor_block(offset, ctx->L[0]);
1029                 ta[0] = xor_block(oa[0], ptp[0]);
1030                 checksum = xor_block(checksum, ptp[0]);
1031                 oa[1] = xor_block(oa[0], ctx->L[1]);
1032                 ta[1] = xor_block(oa[1], ptp[1]);
1033                 checksum = xor_block(checksum, ptp[1]);
1034                 oa[2] = xor_block(oa[1], ctx->L[0]);
1035                 ta[2] = xor_block(oa[2], ptp[2]);
1036                 checksum = xor_block(checksum, ptp[2]);
1037                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1038                 ta[3] = xor_block(offset, ptp[3]);
1039                 checksum = xor_block(checksum, ptp[3]);
1040                 remaining -= 64;
1041                 k = 4;
1042             }
1043 #endif
1044             if (remaining >= 32) {
1045                 oa[k] = xor_block(offset, ctx->L[0]);
1046                 ta[k] = xor_block(oa[k], ptp[k]);
1047                 checksum = xor_block(checksum, ptp[k]);
1048                 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1049                 ta[k + 1] = xor_block(offset, ptp[k + 1]);
1050                 checksum = xor_block(checksum, ptp[k + 1]);
1051                 remaining -= 32;
1052                 k += 2;
1053             }
1054             if (remaining >= 16) {
1055                 offset = oa[k] = xor_block(offset, ctx->L[0]);
1056                 ta[k] = xor_block(offset, ptp[k]);
1057                 checksum = xor_block(checksum, ptp[k]);
1058                 remaining -= 16;
1059                 ++k;
1060             }
1061             if (remaining) {
1062                 tmp.bl = zero_block();
1063                 memcpy(tmp.u8, ptp + k, remaining);
1064                 tmp.u8[remaining] = (unsigned char)0x80u;
1065                 checksum = xor_block(checksum, tmp.bl);
1066                 ta[k] = offset = xor_block(offset, ctx->Lstar);
1067                 ++k;
1068             }
1069         }
1070         offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */
1071         ta[k] = xor_block(offset, checksum);      /* Part of tag gen */
1072         AES_ecb_encrypt_blks(ta, k + 1, &ctx->encrypt_key);
1073         offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */
1074         if (remaining) {
1075             --k;
1076             tmp.bl = xor_block(tmp.bl, ta[k]);
1077             memcpy(ctp + k, tmp.u8, remaining);
1078         }
1079         switch (k) {
1080 #if (BPI == 8)
1081         case 7:
1082             ctp[6] = xor_block(ta[6], oa[6]);
1083             __fallthrough;
1084         case 6:
1085             ctp[5] = xor_block(ta[5], oa[5]);
1086             __fallthrough;
1087         case 5:
1088             ctp[4] = xor_block(ta[4], oa[4]);
1089             __fallthrough;
1090         case 4:
1091             ctp[3] = xor_block(ta[3], oa[3]);
1092             __fallthrough;
1093 #endif
1094         case 3:
1095             ctp[2] = xor_block(ta[2], oa[2]);
1096             __fallthrough;
1097         case 2:
1098             ctp[1] = xor_block(ta[1], oa[1]);
1099             __fallthrough;
1100         case 1:
1101             ctp[0] = xor_block(ta[0], oa[0]);
1102         }
1103 
1104         /* Tag is placed at the correct location
1105          */
1106         if (tag) {
1107 #if (OCB_TAG_LEN == 16)
1108             *(block*)tag = offset;
1109 #elif(OCB_TAG_LEN > 0)
1110             memcpy((char*)tag, &offset, OCB_TAG_LEN);
1111 #else
1112             memcpy((char*)tag, &offset, ctx->tag_len);
1113 #endif
1114         } else {
1115 #if (OCB_TAG_LEN > 0)
1116             memcpy((char*)ct + pt_len, &offset, OCB_TAG_LEN);
1117             pt_len += OCB_TAG_LEN;
1118 #else
1119             memcpy((char*)ct + pt_len, &offset, ctx->tag_len);
1120             pt_len += ctx->tag_len;
1121 #endif
1122         }
1123     }
1124     return (int)pt_len;
1125 }
1126 
1127 /* ----------------------------------------------------------------------- */
1128 
1129 /* Compare two regions of memory, taking a constant amount of time for a
1130    given buffer size -- under certain assumptions about the compiler
1131    and machine, of course.
1132 
1133    Use this to avoid timing side-channel attacks.
1134 
1135    Returns 0 for memory regions with equal contents; non-zero otherwise. */
constant_time_memcmp(const void * av,const void * bv,size_t n)1136 static int constant_time_memcmp(const void* av, const void* bv, size_t n) {
1137     const uint8_t* a = (const uint8_t*)av;
1138     const uint8_t* b = (const uint8_t*)bv;
1139     uint8_t result = 0;
1140     size_t i;
1141 
1142     for (i = 0; i < n; i++) {
1143         result |= *a ^ *b;
1144         a++;
1145         b++;
1146     }
1147 
1148     return (int)result;
1149 }
1150 
ae_decrypt(ae_ctx * ctx,const void * nonce,const void * ct,int ct_len,const void * ad,int ad_len,void * pt,const void * tag,int final)1151 int ae_decrypt(ae_ctx* ctx, const void* nonce, const void* ct, int ct_len, const void* ad,
1152                int ad_len, void* pt, const void* tag, int final) {
1153     union {
1154         uint32_t u32[4];
1155         uint8_t u8[16];
1156         block bl;
1157     } tmp;
1158     block offset, checksum;
1159     unsigned i, k;
1160     block* ctp = (block*)ct;
1161     block* ptp = (block*)pt;
1162 
1163     /* Reduce ct_len tag bundled in ct */
1164     if ((final) && (!tag))
1165 #if (OCB_TAG_LEN > 0)
1166         ct_len -= OCB_TAG_LEN;
1167 #else
1168         ct_len -= ctx->tag_len;
1169 #endif
1170 
1171     /* Non-null nonce means start of new message, init per-message values */
1172     if (nonce) {
1173         ctx->offset = gen_offset_from_nonce(ctx, nonce);
1174         ctx->ad_offset = ctx->checksum = zero_block();
1175         ctx->ad_blocks_processed = ctx->blocks_processed = 0;
1176         if (ad_len >= 0)
1177             ctx->ad_checksum = zero_block();
1178     }
1179 
1180     /* Process associated data */
1181     if (ad_len > 0)
1182         process_ad(ctx, ad, ad_len, final);
1183 
1184     /* Encrypt plaintext data BPI blocks at a time */
1185     offset = ctx->offset;
1186     checksum = ctx->checksum;
1187     i = ct_len / (BPI * 16);
1188     if (i) {
1189         block oa[BPI];
1190         unsigned block_num = ctx->blocks_processed;
1191         oa[BPI - 1] = offset;
1192         do {
1193             block ta[BPI];
1194             block_num += BPI;
1195             oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
1196             ta[0] = xor_block(oa[0], ctp[0]);
1197             oa[1] = xor_block(oa[0], ctx->L[1]);
1198             ta[1] = xor_block(oa[1], ctp[1]);
1199             oa[2] = xor_block(oa[1], ctx->L[0]);
1200             ta[2] = xor_block(oa[2], ctp[2]);
1201 #if BPI == 4
1202             oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
1203             ta[3] = xor_block(oa[3], ctp[3]);
1204 #elif BPI == 8
1205             oa[3] = xor_block(oa[2], ctx->L[2]);
1206             ta[3] = xor_block(oa[3], ctp[3]);
1207             oa[4] = xor_block(oa[1], ctx->L[2]);
1208             ta[4] = xor_block(oa[4], ctp[4]);
1209             oa[5] = xor_block(oa[0], ctx->L[2]);
1210             ta[5] = xor_block(oa[5], ctp[5]);
1211             oa[6] = xor_block(oa[7], ctx->L[2]);
1212             ta[6] = xor_block(oa[6], ctp[6]);
1213             oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1214             ta[7] = xor_block(oa[7], ctp[7]);
1215 #endif
1216             AES_ecb_decrypt_blks(ta, BPI, &ctx->decrypt_key);
1217             ptp[0] = xor_block(ta[0], oa[0]);
1218             checksum = xor_block(checksum, ptp[0]);
1219             ptp[1] = xor_block(ta[1], oa[1]);
1220             checksum = xor_block(checksum, ptp[1]);
1221             ptp[2] = xor_block(ta[2], oa[2]);
1222             checksum = xor_block(checksum, ptp[2]);
1223             ptp[3] = xor_block(ta[3], oa[3]);
1224             checksum = xor_block(checksum, ptp[3]);
1225 #if (BPI == 8)
1226             ptp[4] = xor_block(ta[4], oa[4]);
1227             checksum = xor_block(checksum, ptp[4]);
1228             ptp[5] = xor_block(ta[5], oa[5]);
1229             checksum = xor_block(checksum, ptp[5]);
1230             ptp[6] = xor_block(ta[6], oa[6]);
1231             checksum = xor_block(checksum, ptp[6]);
1232             ptp[7] = xor_block(ta[7], oa[7]);
1233             checksum = xor_block(checksum, ptp[7]);
1234 #endif
1235             ptp += BPI;
1236             ctp += BPI;
1237         } while (--i);
1238         ctx->offset = offset = oa[BPI - 1];
1239         ctx->blocks_processed = block_num;
1240         ctx->checksum = checksum;
1241     }
1242 
1243     if (final) {
1244         block ta[BPI + 1], oa[BPI];
1245 
1246         /* Process remaining plaintext and compute its tag contribution    */
1247         unsigned remaining = ((unsigned)ct_len) % (BPI * 16);
1248         k = 0; /* How many blocks in ta[] need ECBing */
1249         if (remaining) {
1250 #if (BPI == 8)
1251             if (remaining >= 64) {
1252                 oa[0] = xor_block(offset, ctx->L[0]);
1253                 ta[0] = xor_block(oa[0], ctp[0]);
1254                 oa[1] = xor_block(oa[0], ctx->L[1]);
1255                 ta[1] = xor_block(oa[1], ctp[1]);
1256                 oa[2] = xor_block(oa[1], ctx->L[0]);
1257                 ta[2] = xor_block(oa[2], ctp[2]);
1258                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1259                 ta[3] = xor_block(offset, ctp[3]);
1260                 remaining -= 64;
1261                 k = 4;
1262             }
1263 #endif
1264             if (remaining >= 32) {
1265                 oa[k] = xor_block(offset, ctx->L[0]);
1266                 ta[k] = xor_block(oa[k], ctp[k]);
1267                 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1268                 ta[k + 1] = xor_block(offset, ctp[k + 1]);
1269                 remaining -= 32;
1270                 k += 2;
1271             }
1272             if (remaining >= 16) {
1273                 offset = oa[k] = xor_block(offset, ctx->L[0]);
1274                 ta[k] = xor_block(offset, ctp[k]);
1275                 remaining -= 16;
1276                 ++k;
1277             }
1278             if (remaining) {
1279                 block pad;
1280                 offset = xor_block(offset, ctx->Lstar);
1281                 AES_encrypt((unsigned char*)&offset, tmp.u8, &ctx->encrypt_key);
1282                 pad = tmp.bl;
1283                 memcpy(tmp.u8, ctp + k, remaining);
1284                 tmp.bl = xor_block(tmp.bl, pad);
1285                 tmp.u8[remaining] = (unsigned char)0x80u;
1286                 memcpy(ptp + k, tmp.u8, remaining);
1287                 checksum = xor_block(checksum, tmp.bl);
1288             }
1289         }
1290         AES_ecb_decrypt_blks(ta, k, &ctx->decrypt_key);
1291         switch (k) {
1292 #if (BPI == 8)
1293         case 7:
1294             ptp[6] = xor_block(ta[6], oa[6]);
1295             checksum = xor_block(checksum, ptp[6]);
1296             __fallthrough;
1297         case 6:
1298             ptp[5] = xor_block(ta[5], oa[5]);
1299             checksum = xor_block(checksum, ptp[5]);
1300             __fallthrough;
1301         case 5:
1302             ptp[4] = xor_block(ta[4], oa[4]);
1303             checksum = xor_block(checksum, ptp[4]);
1304             __fallthrough;
1305         case 4:
1306             ptp[3] = xor_block(ta[3], oa[3]);
1307             checksum = xor_block(checksum, ptp[3]);
1308             __fallthrough;
1309 #endif
1310         case 3:
1311             ptp[2] = xor_block(ta[2], oa[2]);
1312             checksum = xor_block(checksum, ptp[2]);
1313             __fallthrough;
1314         case 2:
1315             ptp[1] = xor_block(ta[1], oa[1]);
1316             checksum = xor_block(checksum, ptp[1]);
1317             __fallthrough;
1318         case 1:
1319             ptp[0] = xor_block(ta[0], oa[0]);
1320             checksum = xor_block(checksum, ptp[0]);
1321         }
1322 
1323         /* Calculate expected tag */
1324         offset = xor_block(offset, ctx->Ldollar);
1325         tmp.bl = xor_block(offset, checksum);
1326         AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
1327         tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
1328 
1329         /* Compare with proposed tag, change ct_len if invalid */
1330         if ((OCB_TAG_LEN == 16) && tag) {
1331             if (unequal_blocks(tmp.bl, *(block*)tag))
1332                 ct_len = AE_INVALID;
1333         } else {
1334 #if (OCB_TAG_LEN > 0)
1335             int len = OCB_TAG_LEN;
1336 #else
1337             int len = ctx->tag_len;
1338 #endif
1339             if (tag) {
1340                 if (constant_time_memcmp(tag, tmp.u8, len) != 0)
1341                     ct_len = AE_INVALID;
1342             } else {
1343                 if (constant_time_memcmp((char*)ct + ct_len, tmp.u8, len) != 0)
1344                     ct_len = AE_INVALID;
1345             }
1346         }
1347     }
1348     return ct_len;
1349 }
1350 
1351 /* ----------------------------------------------------------------------- */
1352 /* Simple test program                                                     */
1353 /* ----------------------------------------------------------------------- */
1354 
1355 #if 0
1356 
1357 #include <stdio.h>
1358 #include <time.h>
1359 
1360 #if __GNUC__
1361 #define ALIGN(n) __attribute__((aligned(n)))
1362 #elif _MSC_VER
1363 #define ALIGN(n) __declspec(align(n))
1364 #else /* Not GNU/Microsoft: delete alignment uses.     */
1365 #define ALIGN(n)
1366 #endif
1367 
1368 static void pbuf(void *p, unsigned len, const void *s)
1369 {
1370     unsigned i;
1371     if (s)
1372         printf("%s", (char *)s);
1373     for (i = 0; i < len; i++)
1374         printf("%02X", (unsigned)(((unsigned char *)p)[i]));
1375     printf("\n");
1376 }
1377 
1378 static void vectors(ae_ctx *ctx, int len)
1379 {
1380     ALIGN(16) char pt[128];
1381     ALIGN(16) char ct[144];
1382     ALIGN(16) char nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1383     int i;
1384     for (i=0; i < 128; i++) pt[i] = i;
1385     i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
1386     printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
1387     i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
1388     printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
1389     i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
1390     printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
1391 }
1392 
1393 void validate()
1394 {
1395     ALIGN(16) char pt[1024];
1396     ALIGN(16) char ct[1024];
1397     ALIGN(16) char tag[16];
1398     ALIGN(16) char nonce[12] = {0,};
1399     ALIGN(16) char key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
1400     ae_ctx ctx;
1401     char *val_buf, *next;
1402     int i, len;
1403 
1404     val_buf = (char *)malloc(22400 + 16);
1405     next = val_buf = (char *)(((size_t)val_buf + 16) & ~((size_t)15));
1406 
1407     if (0) {
1408 		ae_init(&ctx, key, 16, 12, 16);
1409 		/* pbuf(&ctx, sizeof(ctx), "CTX: "); */
1410 		vectors(&ctx,0);
1411 		vectors(&ctx,8);
1412 		vectors(&ctx,16);
1413 		vectors(&ctx,24);
1414 		vectors(&ctx,32);
1415 		vectors(&ctx,40);
1416     }
1417 
1418     memset(key,0,32);
1419     memset(pt,0,128);
1420     ae_init(&ctx, key, OCB_KEY_LEN, 12, OCB_TAG_LEN);
1421 
1422     /* RFC Vector test */
1423     for (i = 0; i < 128; i++) {
1424         int first = ((i/3)/(BPI*16))*(BPI*16);
1425         int second = first;
1426         int third = i - (first + second);
1427 
1428         nonce[11] = i;
1429 
1430         if (0) {
1431             ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
1432             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1433             next = next+i+OCB_TAG_LEN;
1434 
1435             ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
1436             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1437             next = next+i+OCB_TAG_LEN;
1438 
1439             ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
1440             memcpy(next,ct,OCB_TAG_LEN);
1441             next = next+OCB_TAG_LEN;
1442         } else {
1443             ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
1444             ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
1445             ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
1446             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1447             next = next+i+OCB_TAG_LEN;
1448 
1449             ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
1450             ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
1451             ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
1452             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1453             next = next+i+OCB_TAG_LEN;
1454 
1455             ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
1456             ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
1457             ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
1458             memcpy(next,ct,OCB_TAG_LEN);
1459             next = next+OCB_TAG_LEN;
1460         }
1461 
1462     }
1463     nonce[11] = 0;
1464     ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
1465     pbuf(tag,OCB_TAG_LEN,0);
1466 
1467 
1468     /* Encrypt/Decrypt test */
1469     for (i = 0; i < 128; i++) {
1470         int first = ((i/3)/(BPI*16))*(BPI*16);
1471         int second = first;
1472         int third = i - (first + second);
1473 
1474         nonce[11] = i%128;
1475 
1476         if (1) {
1477             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
1478             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
1479             len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
1480             if (len == -1) { printf("Authentication error: %d\n", i); return; }
1481             if (len != i) { printf("Length error: %d\n", i); return; }
1482             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1483         } else {
1484             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
1485             ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
1486             ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
1487             len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
1488             if (len == -1) { printf("Authentication error: %d\n", i); return; }
1489             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1490         }
1491 
1492     }
1493     printf("Decrypt: PASS\n");
1494 }
1495 
1496 int main()
1497 {
1498     validate();
1499     return 0;
1500 }
1501 #endif
1502 
1503 #if USE_AES_NI
1504 char infoString[] = "OCB3 (AES-NI)";
1505 #elif USE_REFERENCE_AES
1506 char infoString[] = "OCB3 (Reference)";
1507 #elif USE_OPENSSL_AES
1508 char infoString[] = "OCB3 (OpenSSL)";
1509 #endif
1510