1 /*------------------------------------------------------------------------
2 / OCB Version 3 Reference Code (Optimized C) Last modified 12-JUN-2013
3 /-------------------------------------------------------------------------
4 / Copyright (c) 2013 Ted Krovetz.
5 /
6 / Permission to use, copy, modify, and/or distribute this software for any
7 / purpose with or without fee is hereby granted, provided that the above
8 / copyright notice and this permission notice appear in all copies.
9 /
10 / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 /
18 / Phillip Rogaway holds patents relevant to OCB. See the following for
19 / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
20 /
21 / Special thanks to Keegan McAllister for suggesting several good improvements
22 /
23 / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
24 /------------------------------------------------------------------------- */
25
26 /* ----------------------------------------------------------------------- */
27 /* Usage notes */
28 /* ----------------------------------------------------------------------- */
29
30 /* - When AE_PENDING is passed as the 'final' parameter of any function,
31 / the length parameters must be a multiple of (BPI*16).
32 / - When available, SSE or AltiVec registers are used to manipulate data.
33 / So, when on machines with these facilities, all pointers passed to
34 / any function should be 16-byte aligned.
35 / - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
36 / encrypted in-place), but no other pair of pointers may be equal.
37 / - This code assumes all x86 processors have SSE2 and SSSE3 instructions
38 / when compiling under MSVC. If untrue, alter the #define.
39 / - This code is tested for C99 and recent versions of GCC and MSVC. */
40
41 /* ----------------------------------------------------------------------- */
42 /* User configuration options */
43 /* ----------------------------------------------------------------------- */
44
45 /* Set the AES key length to use and length of authentication tag to produce.
46 / Setting either to 0 requires the value be set at runtime via ae_init().
47 / Some optimizations occur for each when set to a fixed value. */
48 #define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */
49 #define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init */
50
51 /* This implementation has built-in support for multiple AES APIs. Set any
52 / one of the following to non-zero to specify which to use. */
53 #define USE_OPENSSL_AES 1 /* http://openssl.org */
54 #define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c */
55 #define USE_AES_NI 0 /* Uses compiler's intrinsics */
56
57 /* During encryption and decryption, various "L values" are required.
58 / The L values can be precomputed during initialization (requiring extra
59 / space in ae_ctx), generated as needed (slightly slowing encryption and
60 / decryption), or some combination of the two. L_TABLE_SZ specifies how many
61 / L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
62 / are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
63 / 2^L_TABLE_SZ blocks need no L values calculated dynamically. */
64 #define L_TABLE_SZ 16
65
66 /* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
67 / will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
68 / in better performance. */
69 #define L_TABLE_SZ_IS_ENOUGH 1
70
71 /* ----------------------------------------------------------------------- */
72 /* Includes and compiler specific definitions */
73 /* ----------------------------------------------------------------------- */
74
75 #include <keymaster/key_blob_utils/ae.h>
76 #include <stdlib.h>
77 #include <string.h>
78
79 /* Define standard sized integers */
80 #if defined(_MSC_VER) && (_MSC_VER < 1600)
81 typedef unsigned __int8 uint8_t;
82 typedef unsigned __int32 uint32_t;
83 typedef unsigned __int64 uint64_t;
84 typedef __int64 int64_t;
85 #else
86 #include <stdint.h>
87 #endif
88
89 /* Compiler-specific intrinsics and fixes: bswap64, ntz */
90 #if _MSC_VER
91 #define inline __inline /* MSVC doesn't recognize "inline" in C */
92 #define restrict __restrict /* MSVC doesn't recognize "restrict" in C */
93 #define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSE2 */
94 #define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */
95 #include <intrin.h>
96 #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
97 #define bswap64(x) _byteswap_uint64(x)
ntz(unsigned x)98 static inline unsigned ntz(unsigned x) {
99 _BitScanForward(&x, x);
100 return x;
101 }
102 #elif __GNUC__
103 #define inline __inline__ /* No "inline" in GCC ansi C mode */
104 #define restrict __restrict__ /* No "restrict" in GCC ansi C mode */
105 #define bswap64(x) __builtin_bswap64(x) /* Assuming GCC 4.3+ */
106 #define ntz(x) __builtin_ctz((unsigned)(x)) /* Assuming GCC 3.4+ */
107 #else /* Assume some C99 features: stdint.h, inline, restrict */
108 #define bswap32(x) \
109 ((((x)&0xff000000u) >> 24) | (((x)&0x00ff0000u) >> 8) | (((x)&0x0000ff00u) << 8) | \
110 (((x)&0x000000ffu) << 24))
111
bswap64(uint64_t x)112 static inline uint64_t bswap64(uint64_t x) {
113 union {
114 uint64_t u64;
115 uint32_t u32[2];
116 } in, out;
117 in.u64 = x;
118 out.u32[0] = bswap32(in.u32[1]);
119 out.u32[1] = bswap32(in.u32[0]);
120 return out.u64;
121 }
122
123 #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */
ntz(unsigned x)124 static inline unsigned ntz(unsigned x) {
125 static const unsigned char tz_table[] = {
126 0, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2,
127 3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2,
128 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2,
129 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2,
130 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2};
131 return tz_table[x / 4];
132 }
133 #else /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
ntz(unsigned x)134 static inline unsigned ntz(unsigned x) {
135 static const unsigned char tz_table[32] = {0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20,
136 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19,
137 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
138 return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
139 }
140 #endif
141 #endif
142
143 /* ----------------------------------------------------------------------- */
144 /* Define blocks and operations -- Patch if incorrect on your compiler. */
145 /* ----------------------------------------------------------------------- */
146
147 #if __SSE2__ && !KEYMASTER_CLANG_TEST_BUILD
148 #include <xmmintrin.h> /* SSE instructions and _mm_malloc */
149 #include <emmintrin.h> /* SSE2 instructions */
150 typedef __m128i block;
151 #define xor_block(x, y) _mm_xor_si128(x, y)
152 #define zero_block() _mm_setzero_si128()
153 #define unequal_blocks(x, y) (_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) != 0xffff)
154 #if __SSSE3__ || USE_AES_NI
155 #include <tmmintrin.h> /* SSSE3 instructions */
156 #define swap_if_le(b) \
157 _mm_shuffle_epi8(b, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
158 #else
swap_if_le(block b)159 static inline block swap_if_le(block b) {
160 block a = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 1, 2, 3));
161 a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
162 a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
163 return _mm_xor_si128(_mm_srli_epi16(a, 8), _mm_slli_epi16(a, 8));
164 }
165 #endif
gen_offset(uint64_t KtopStr[3],unsigned bot)166 static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
167 block hi = _mm_load_si128((__m128i*)(KtopStr + 0)); /* hi = B A */
168 block lo = _mm_loadu_si128((__m128i*)(KtopStr + 1)); /* lo = C B */
169 __m128i lshift = _mm_cvtsi32_si128(bot);
170 __m128i rshift = _mm_cvtsi32_si128(64 - bot);
171 lo = _mm_xor_si128(_mm_sll_epi64(hi, lshift), _mm_srl_epi64(lo, rshift));
172 #if __SSSE3__ || USE_AES_NI
173 return _mm_shuffle_epi8(lo, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7));
174 #else
175 return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1, 0, 3, 2)));
176 #endif
177 }
double_block(block bl)178 static inline block double_block(block bl) {
179 const __m128i mask = _mm_set_epi32(135, 1, 1, 1);
180 __m128i tmp = _mm_srai_epi32(bl, 31);
181 tmp = _mm_and_si128(tmp, mask);
182 tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 0, 3));
183 bl = _mm_slli_epi32(bl, 1);
184 return _mm_xor_si128(bl, tmp);
185 }
186 #elif __ALTIVEC__
187 #include <altivec.h>
188 typedef vector unsigned block;
189 #define xor_block(x, y) vec_xor(x, y)
190 #define zero_block() vec_splat_u32(0)
191 #define unequal_blocks(x, y) vec_any_ne(x, y)
192 #define swap_if_le(b) (b)
193 #if __PPC64__
gen_offset(uint64_t KtopStr[3],unsigned bot)194 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
195 union {
196 uint64_t u64[2];
197 block bl;
198 } rval;
199 rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
200 rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
201 return rval.bl;
202 }
203 #else
204 /* Special handling: Shifts are mod 32, and no 64-bit types */
gen_offset(uint64_t KtopStr[3],unsigned bot)205 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
206 const vector unsigned k32 = {32, 32, 32, 32};
207 vector unsigned hi = *(vector unsigned*)(KtopStr + 0);
208 vector unsigned lo = *(vector unsigned*)(KtopStr + 2);
209 vector unsigned bot_vec;
210 if (bot < 32) {
211 lo = vec_sld(hi, lo, 4);
212 } else {
213 vector unsigned t = vec_sld(hi, lo, 4);
214 lo = vec_sld(hi, lo, 8);
215 hi = t;
216 bot = bot - 32;
217 }
218 if (bot == 0)
219 return hi;
220 *(unsigned*)&bot_vec = bot;
221 vector unsigned lshift = vec_splat(bot_vec, 0);
222 vector unsigned rshift = vec_sub(k32, lshift);
223 hi = vec_sl(hi, lshift);
224 lo = vec_sr(lo, rshift);
225 return vec_xor(hi, lo);
226 }
227 #endif
double_block(block b)228 static inline block double_block(block b) {
229 const vector unsigned char mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
230 const vector unsigned char perm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0};
231 const vector unsigned char shift7 = vec_splat_u8(7);
232 const vector unsigned char shift1 = vec_splat_u8(1);
233 vector unsigned char c = (vector unsigned char)b;
234 vector unsigned char t = vec_sra(c, shift7);
235 t = vec_and(t, mask);
236 t = vec_perm(t, t, perm);
237 c = vec_sl(c, shift1);
238 return (block)vec_xor(c, t);
239 }
240 #elif __ARM_NEON__
241 #include <arm_neon.h>
242 typedef int8x16_t block __attribute__ ((aligned (16))); /* Yay! Endian-neutral reads! */
243 #define xor_block(x, y) veorq_s8(x, y)
244 #define zero_block() vdupq_n_s8(0)
unequal_blocks(block a,block b)245 static inline int unequal_blocks(block a, block b) {
246 int64x2_t t = veorq_s64((int64x2_t)a, (int64x2_t)b);
247 return (vgetq_lane_s64(t, 0) | vgetq_lane_s64(t, 1)) != 0;
248 }
249 #define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */
250 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)251 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
252 const union {
253 unsigned x;
254 unsigned char endian;
255 } little = {1};
256 const int64x2_t k64 = {-64, -64};
257 /* Copy hi and lo into local variables to ensure proper alignment */
258 uint64x2_t hi = vld1q_u64(KtopStr + 0); /* hi = A B */
259 uint64x2_t lo = vld1q_u64(KtopStr + 1); /* lo = B C */
260 int64x2_t ls = vdupq_n_s64(bot);
261 int64x2_t rs = vqaddq_s64(k64, ls);
262 block rval = (block)veorq_u64(vshlq_u64(hi, ls), vshlq_u64(lo, rs));
263 if (little.endian)
264 rval = vrev64q_s8(rval);
265 return rval;
266 }
double_block(block b)267 static inline block double_block(block b) {
268 const block mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
269 block tmp = vshrq_n_s8(b, 7);
270 tmp = vandq_s8(tmp, mask);
271 tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */
272 b = vshlq_n_s8(b, 1);
273 return veorq_s8(tmp, b);
274 }
275 #else
276 typedef struct { uint64_t l, r; } block;
xor_block(block x,block y)277 static inline block xor_block(block x, block y) {
278 x.l ^= y.l;
279 x.r ^= y.r;
280 return x;
281 }
zero_block(void)282 static inline block zero_block(void) {
283 const block t = {0, 0};
284 return t;
285 }
286 #define unequal_blocks(x, y) ((((x).l ^ (y).l) | ((x).r ^ (y).r)) != 0)
swap_if_le(block b)287 static inline block swap_if_le(block b) {
288 const union {
289 unsigned x;
290 unsigned char endian;
291 } little = {1};
292 if (little.endian) {
293 block r;
294 r.l = bswap64(b.l);
295 r.r = bswap64(b.r);
296 return r;
297 } else
298 return b;
299 }
300
301 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)302 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
303 block rval;
304 if (bot != 0) {
305 rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
306 rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
307 } else {
308 rval.l = KtopStr[0];
309 rval.r = KtopStr[1];
310 }
311 return swap_if_le(rval);
312 }
313
314 #if __GNUC__ && __arm__
double_block(block b)315 static inline block double_block(block b) {
316 __asm__("adds %1,%1,%1\n\t"
317 "adcs %H1,%H1,%H1\n\t"
318 "adcs %0,%0,%0\n\t"
319 "adcs %H0,%H0,%H0\n\t"
320 "it cs\n\t"
321 "eorcs %1,%1,#135"
322 : "+r"(b.l), "+r"(b.r)
323 :
324 : "cc");
325 return b;
326 }
327 #else
double_block(block b)328 static inline block double_block(block b) {
329 uint64_t t = (uint64_t)((int64_t)b.l >> 63);
330 b.l = (b.l + b.l) ^ (b.r >> 63);
331 b.r = (b.r + b.r) ^ (t & 135);
332 return b;
333 }
334 #endif
335
336 #endif
337
338 #ifndef __has_attribute
339 #define __has_attribute(x) 0
340 #endif
341
342 #if __has_attribute(fallthrough)
343 #define __fallthrough __attribute__((__fallthrough__));
344 #else
345 #define __fallthrough
346 #endif
347
348 /* ----------------------------------------------------------------------- */
349 /* AES - Code uses OpenSSL API. Other implementations get mapped to it. */
350 /* ----------------------------------------------------------------------- */
351
352 /*---------------*/
353 #if USE_OPENSSL_AES
354 /*---------------*/
355
356 #include <openssl/aes.h> /* http://openssl.org/ */
357
358 /* How to ECB encrypt an array of blocks, in place */
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)359 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
360 while (nblks) {
361 --nblks;
362 AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
363 }
364 }
365
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)366 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
367 while (nblks) {
368 --nblks;
369 AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
370 }
371 }
372
373 #define BPI 4 /* Number of blocks in buffer per ECB call */
374
375 /*-------------------*/
376 #elif USE_REFERENCE_AES
377 /*-------------------*/
378
379 #include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */
380 #if (OCB_KEY_LEN == 0)
381 typedef struct {
382 uint32_t rd_key[60];
383 int rounds;
384 } AES_KEY;
385 #define ROUNDS(ctx) ((ctx)->rounds)
386 #define AES_set_encrypt_key(x, y, z) \
387 do { \
388 rijndaelKeySetupEnc((z)->rd_key, x, y); \
389 (z)->rounds = y / 32 + 6; \
390 } while (0)
391 #define AES_set_decrypt_key(x, y, z) \
392 do { \
393 rijndaelKeySetupDec((z)->rd_key, x, y); \
394 (z)->rounds = y / 32 + 6; \
395 } while (0)
396 #else
397 typedef struct { uint32_t rd_key[OCB_KEY_LEN + 28]; } AES_KEY;
398 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
399 #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
400 #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
401 #endif
402 #define AES_encrypt(x, y, z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
403 #define AES_decrypt(x, y, z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
404
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)405 static void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
406 while (nblks) {
407 --nblks;
408 AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
409 }
410 }
411
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)412 void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
413 while (nblks) {
414 --nblks;
415 AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
416 }
417 }
418
419 #define BPI 4 /* Number of blocks in buffer per ECB call */
420
421 /*----------*/
422 #elif USE_AES_NI
423 /*----------*/
424
425 #include <wmmintrin.h>
426
427 #if (OCB_KEY_LEN == 0)
428 typedef struct {
429 __m128i rd_key[15];
430 int rounds;
431 } AES_KEY;
432 #define ROUNDS(ctx) ((ctx)->rounds)
433 #else
434 typedef struct { __m128i rd_key[7 + OCB_KEY_LEN / 4]; } AES_KEY;
435 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
436 #endif
437
438 #define EXPAND_ASSIST(v1, v2, v3, v4, shuff_const, aes_const) \
439 v2 = _mm_aeskeygenassist_si128(v4, aes_const); \
440 v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 16)); \
441 v1 = _mm_xor_si128(v1, v3); \
442 v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 140)); \
443 v1 = _mm_xor_si128(v1, v3); \
444 v2 = _mm_shuffle_epi32(v2, shuff_const); \
445 v1 = _mm_xor_si128(v1, v2)
446
447 #define EXPAND192_STEP(idx, aes_const) \
448 EXPAND_ASSIST(x0, x1, x2, x3, 85, aes_const); \
449 x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4)); \
450 x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255)); \
451 kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(x0), 68)); \
452 kp[idx + 1] = \
453 _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), _mm_castsi128_ps(x3), 78)); \
454 EXPAND_ASSIST(x0, x1, x2, x3, 85, (aes_const * 2)); \
455 x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4)); \
456 x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255)); \
457 kp[idx + 2] = x0; \
458 tmp = x3
459
AES_128_Key_Expansion(const unsigned char * userkey,void * key)460 static void AES_128_Key_Expansion(const unsigned char* userkey, void* key) {
461 __m128i x0, x1, x2;
462 __m128i* kp = (__m128i*)key;
463 kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
464 x2 = _mm_setzero_si128();
465 EXPAND_ASSIST(x0, x1, x2, x0, 255, 1);
466 kp[1] = x0;
467 EXPAND_ASSIST(x0, x1, x2, x0, 255, 2);
468 kp[2] = x0;
469 EXPAND_ASSIST(x0, x1, x2, x0, 255, 4);
470 kp[3] = x0;
471 EXPAND_ASSIST(x0, x1, x2, x0, 255, 8);
472 kp[4] = x0;
473 EXPAND_ASSIST(x0, x1, x2, x0, 255, 16);
474 kp[5] = x0;
475 EXPAND_ASSIST(x0, x1, x2, x0, 255, 32);
476 kp[6] = x0;
477 EXPAND_ASSIST(x0, x1, x2, x0, 255, 64);
478 kp[7] = x0;
479 EXPAND_ASSIST(x0, x1, x2, x0, 255, 128);
480 kp[8] = x0;
481 EXPAND_ASSIST(x0, x1, x2, x0, 255, 27);
482 kp[9] = x0;
483 EXPAND_ASSIST(x0, x1, x2, x0, 255, 54);
484 kp[10] = x0;
485 }
486
AES_192_Key_Expansion(const unsigned char * userkey,void * key)487 static void AES_192_Key_Expansion(const unsigned char* userkey, void* key) {
488 __m128i x0, x1, x2, x3, tmp, *kp = (__m128i*)key;
489 kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
490 tmp = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
491 x2 = _mm_setzero_si128();
492 EXPAND192_STEP(1, 1);
493 EXPAND192_STEP(4, 4);
494 EXPAND192_STEP(7, 16);
495 EXPAND192_STEP(10, 64);
496 }
497
AES_256_Key_Expansion(const unsigned char * userkey,void * key)498 static void AES_256_Key_Expansion(const unsigned char* userkey, void* key) {
499 __m128i x0, x1, x2, x3, *kp = (__m128i*)key;
500 kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
501 kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
502 x2 = _mm_setzero_si128();
503 EXPAND_ASSIST(x0, x1, x2, x3, 255, 1);
504 kp[2] = x0;
505 EXPAND_ASSIST(x3, x1, x2, x0, 170, 1);
506 kp[3] = x3;
507 EXPAND_ASSIST(x0, x1, x2, x3, 255, 2);
508 kp[4] = x0;
509 EXPAND_ASSIST(x3, x1, x2, x0, 170, 2);
510 kp[5] = x3;
511 EXPAND_ASSIST(x0, x1, x2, x3, 255, 4);
512 kp[6] = x0;
513 EXPAND_ASSIST(x3, x1, x2, x0, 170, 4);
514 kp[7] = x3;
515 EXPAND_ASSIST(x0, x1, x2, x3, 255, 8);
516 kp[8] = x0;
517 EXPAND_ASSIST(x3, x1, x2, x0, 170, 8);
518 kp[9] = x3;
519 EXPAND_ASSIST(x0, x1, x2, x3, 255, 16);
520 kp[10] = x0;
521 EXPAND_ASSIST(x3, x1, x2, x0, 170, 16);
522 kp[11] = x3;
523 EXPAND_ASSIST(x0, x1, x2, x3, 255, 32);
524 kp[12] = x0;
525 EXPAND_ASSIST(x3, x1, x2, x0, 170, 32);
526 kp[13] = x3;
527 EXPAND_ASSIST(x0, x1, x2, x3, 255, 64);
528 kp[14] = x0;
529 }
530
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)531 static int AES_set_encrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
532 if (bits == 128) {
533 AES_128_Key_Expansion(userKey, key);
534 } else if (bits == 192) {
535 AES_192_Key_Expansion(userKey, key);
536 } else if (bits == 256) {
537 AES_256_Key_Expansion(userKey, key);
538 }
539 #if (OCB_KEY_LEN == 0)
540 key->rounds = 6 + bits / 32;
541 #endif
542 return 0;
543 }
544
AES_set_decrypt_key_fast(AES_KEY * dkey,const AES_KEY * ekey)545 static void AES_set_decrypt_key_fast(AES_KEY* dkey, const AES_KEY* ekey) {
546 int j = 0;
547 int i = ROUNDS(ekey);
548 #if (OCB_KEY_LEN == 0)
549 dkey->rounds = i;
550 #endif
551 dkey->rd_key[i--] = ekey->rd_key[j++];
552 while (i)
553 dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
554 dkey->rd_key[i] = ekey->rd_key[j];
555 }
556
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)557 static int AES_set_decrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
558 AES_KEY temp_key;
559 AES_set_encrypt_key(userKey, bits, &temp_key);
560 AES_set_decrypt_key_fast(key, &temp_key);
561 return 0;
562 }
563
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)564 static inline void AES_encrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
565 int j, rnds = ROUNDS(key);
566 const __m128i* sched = ((__m128i*)(key->rd_key));
567 __m128i tmp = _mm_load_si128((__m128i*)in);
568 tmp = _mm_xor_si128(tmp, sched[0]);
569 for (j = 1; j < rnds; j++)
570 tmp = _mm_aesenc_si128(tmp, sched[j]);
571 tmp = _mm_aesenclast_si128(tmp, sched[j]);
572 _mm_store_si128((__m128i*)out, tmp);
573 }
574
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)575 static inline void AES_decrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
576 int j, rnds = ROUNDS(key);
577 const __m128i* sched = ((__m128i*)(key->rd_key));
578 __m128i tmp = _mm_load_si128((__m128i*)in);
579 tmp = _mm_xor_si128(tmp, sched[0]);
580 for (j = 1; j < rnds; j++)
581 tmp = _mm_aesdec_si128(tmp, sched[j]);
582 tmp = _mm_aesdeclast_si128(tmp, sched[j]);
583 _mm_store_si128((__m128i*)out, tmp);
584 }
585
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)586 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
587 unsigned i, j, rnds = ROUNDS(key);
588 const __m128i* sched = ((__m128i*)(key->rd_key));
589 for (i = 0; i < nblks; ++i)
590 blks[i] = _mm_xor_si128(blks[i], sched[0]);
591 for (j = 1; j < rnds; ++j)
592 for (i = 0; i < nblks; ++i)
593 blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
594 for (i = 0; i < nblks; ++i)
595 blks[i] = _mm_aesenclast_si128(blks[i], sched[j]);
596 }
597
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)598 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
599 unsigned i, j, rnds = ROUNDS(key);
600 const __m128i* sched = ((__m128i*)(key->rd_key));
601 for (i = 0; i < nblks; ++i)
602 blks[i] = _mm_xor_si128(blks[i], sched[0]);
603 for (j = 1; j < rnds; ++j)
604 for (i = 0; i < nblks; ++i)
605 blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
606 for (i = 0; i < nblks; ++i)
607 blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]);
608 }
609
610 #define BPI 8 /* Number of blocks in buffer per ECB call */
611 /* Set to 4 for Westmere, 8 for Sandy Bridge */
612
613 #endif
614
615 /* ----------------------------------------------------------------------- */
616 /* Define OCB context structure. */
617 /* ----------------------------------------------------------------------- */
618
619 /*------------------------------------------------------------------------
620 / Each item in the OCB context is stored either "memory correct" or
621 / "register correct". On big-endian machines, this is identical. On
622 / little-endian machines, one must choose whether the byte-string
623 / is in the correct order when it resides in memory or in registers.
624 / It must be register correct whenever it is to be manipulated
625 / arithmetically, but must be memory correct whenever it interacts
626 / with the plaintext or ciphertext.
627 /------------------------------------------------------------------------- */
628
629 struct _ae_ctx {
630 block offset; /* Memory correct */
631 block checksum; /* Memory correct */
632 block Lstar; /* Memory correct */
633 block Ldollar; /* Memory correct */
634 block L[L_TABLE_SZ]; /* Memory correct */
635 block ad_checksum; /* Memory correct */
636 block ad_offset; /* Memory correct */
637 block cached_Top; /* Memory correct */
638 uint64_t KtopStr[3]; /* Register correct, each item */
639 uint32_t ad_blocks_processed;
640 uint32_t blocks_processed;
641 AES_KEY decrypt_key;
642 AES_KEY encrypt_key;
643 #if (OCB_TAG_LEN == 0)
644 unsigned tag_len;
645 #endif
646 };
647
648 /* ----------------------------------------------------------------------- */
649 /* L table lookup (or on-the-fly generation) */
650 /* ----------------------------------------------------------------------- */
651
652 #if L_TABLE_SZ_IS_ENOUGH
653 #define getL(_ctx, _tz) ((_ctx)->L[_tz])
654 #else
getL(const ae_ctx * ctx,unsigned tz)655 static block getL(const ae_ctx* ctx, unsigned tz) {
656 if (tz < L_TABLE_SZ)
657 return ctx->L[tz];
658 else {
659 unsigned i;
660 /* Bring L[MAX] into registers, make it register correct */
661 block rval = swap_if_le(ctx->L[L_TABLE_SZ - 1]);
662 rval = double_block(rval);
663 for (i = L_TABLE_SZ; i < tz; i++)
664 rval = double_block(rval);
665 return swap_if_le(rval); /* To memory correct */
666 }
667 }
668 #endif
669
670 /* ----------------------------------------------------------------------- */
671 /* Public functions */
672 /* ----------------------------------------------------------------------- */
673
674 /* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
675 on 16-byte alignments. (I believe all major 64-bit systems do already.) */
676
ae_allocate(void * misc)677 ae_ctx* ae_allocate(void* misc) {
678 void* p;
679 (void)misc; /* misc unused in this implementation */
680 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
681 p = _mm_malloc(sizeof(ae_ctx), 16);
682 #elif ((__ALTIVEC__ && !__PPC64__) || __ARM_NEON__)
683 if (posix_memalign(&p, 16, sizeof(ae_ctx)) != 0) p = NULL;
684 #else
685 p = malloc(sizeof(ae_ctx));
686 #endif
687 return (ae_ctx*)p;
688 }
689
ae_free(ae_ctx * ctx)690 void ae_free(ae_ctx* ctx) {
691 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
692 _mm_free(ctx);
693 #else
694 free(ctx);
695 #endif
696 }
697
698 /* ----------------------------------------------------------------------- */
699
ae_clear(ae_ctx * ctx)700 int ae_clear(ae_ctx* ctx) /* Zero ae_ctx and undo initialization */
701 {
702 memset(ctx, 0, sizeof(ae_ctx));
703 return AE_SUCCESS;
704 }
705
ae_ctx_sizeof(void)706 int ae_ctx_sizeof(void) {
707 return (int)sizeof(ae_ctx);
708 }
709
710 /* ----------------------------------------------------------------------- */
711
ae_init(ae_ctx * ctx,const void * key,int key_len,int nonce_len,int tag_len)712 int ae_init(ae_ctx* ctx, const void* key, int key_len, int nonce_len, int tag_len) {
713 unsigned i;
714 block tmp_blk;
715
716 if (nonce_len != 12)
717 return AE_NOT_SUPPORTED;
718
719 /* Initialize encryption & decryption keys */
720 #if (OCB_KEY_LEN > 0)
721 key_len = OCB_KEY_LEN;
722 #endif
723 AES_set_encrypt_key((unsigned char*)key, key_len * 8, &ctx->encrypt_key);
724 #if USE_AES_NI
725 AES_set_decrypt_key_fast(&ctx->decrypt_key, &ctx->encrypt_key);
726 #else
727 AES_set_decrypt_key((unsigned char*)key, (int)(key_len * 8), &ctx->decrypt_key);
728 #endif
729
730 /* Zero things that need zeroing */
731 ctx->cached_Top = ctx->ad_checksum = zero_block();
732 ctx->ad_blocks_processed = 0;
733
734 /* Compute key-dependent values */
735 AES_encrypt((unsigned char*)&ctx->cached_Top, (unsigned char*)&ctx->Lstar, &ctx->encrypt_key);
736 tmp_blk = swap_if_le(ctx->Lstar);
737 tmp_blk = double_block(tmp_blk);
738 ctx->Ldollar = swap_if_le(tmp_blk);
739 tmp_blk = double_block(tmp_blk);
740 ctx->L[0] = swap_if_le(tmp_blk);
741 for (i = 1; i < L_TABLE_SZ; i++) {
742 tmp_blk = double_block(tmp_blk);
743 ctx->L[i] = swap_if_le(tmp_blk);
744 }
745
746 #if (OCB_TAG_LEN == 0)
747 ctx->tag_len = tag_len;
748 #else
749 (void)tag_len; /* Suppress var not used error */
750 #endif
751
752 return AE_SUCCESS;
753 }
754
755 /* ----------------------------------------------------------------------- */
756
gen_offset_from_nonce(ae_ctx * ctx,const void * nonce)757 static block gen_offset_from_nonce(ae_ctx* ctx, const void* nonce) {
758 const union {
759 unsigned x;
760 unsigned char endian;
761 } little = {1};
762 union {
763 uint32_t u32[4];
764 uint8_t u8[16];
765 block bl;
766 } tmp;
767 unsigned idx;
768
769 /* Replace cached nonce Top if needed */
770 #if (OCB_TAG_LEN > 0)
771 if (little.endian)
772 tmp.u32[0] = 0x01000000 + ((OCB_TAG_LEN * 8 % 128) << 1);
773 else
774 tmp.u32[0] = 0x00000001 + ((OCB_TAG_LEN * 8 % 128) << 25);
775 #else
776 if (little.endian)
777 tmp.u32[0] = 0x01000000 + ((ctx->tag_len * 8 % 128) << 1);
778 else
779 tmp.u32[0] = 0x00000001 + ((ctx->tag_len * 8 % 128) << 25);
780 #endif
781 tmp.u32[1] = ((uint32_t*)nonce)[0];
782 tmp.u32[2] = ((uint32_t*)nonce)[1];
783 tmp.u32[3] = ((uint32_t*)nonce)[2];
784 idx = (unsigned)(tmp.u8[15] & 0x3f); /* Get low 6 bits of nonce */
785 tmp.u8[15] = tmp.u8[15] & 0xc0; /* Zero low 6 bits of nonce */
786 if (unequal_blocks(tmp.bl, ctx->cached_Top)) { /* Cached? */
787 ctx->cached_Top = tmp.bl; /* Update cache, KtopStr */
788 AES_encrypt(tmp.u8, (unsigned char*)&ctx->KtopStr, &ctx->encrypt_key);
789 if (little.endian) { /* Make Register Correct */
790 ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
791 ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
792 }
793 ctx->KtopStr[2] = ctx->KtopStr[0] ^ (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
794 }
795 return gen_offset(ctx->KtopStr, idx);
796 }
797
process_ad(ae_ctx * ctx,const void * ad,int ad_len,int final)798 static void process_ad(ae_ctx* ctx, const void* ad, int ad_len, int final) {
799 union {
800 uint32_t u32[4];
801 uint8_t u8[16];
802 block bl;
803 } tmp;
804 block ad_offset, ad_checksum;
805 const block* adp = (block*)ad;
806 unsigned i, k, tz, remaining;
807
808 ad_offset = ctx->ad_offset;
809 ad_checksum = ctx->ad_checksum;
810 i = ad_len / (BPI * 16);
811 if (i) {
812 unsigned ad_block_num = ctx->ad_blocks_processed;
813 do {
814 block ta[BPI], oa[BPI];
815 ad_block_num += BPI;
816 tz = ntz(ad_block_num);
817 oa[0] = xor_block(ad_offset, ctx->L[0]);
818 ta[0] = xor_block(oa[0], adp[0]);
819 oa[1] = xor_block(oa[0], ctx->L[1]);
820 ta[1] = xor_block(oa[1], adp[1]);
821 oa[2] = xor_block(ad_offset, ctx->L[1]);
822 ta[2] = xor_block(oa[2], adp[2]);
823 #if BPI == 4
824 ad_offset = xor_block(oa[2], getL(ctx, tz));
825 ta[3] = xor_block(ad_offset, adp[3]);
826 #elif BPI == 8
827 oa[3] = xor_block(oa[2], ctx->L[2]);
828 ta[3] = xor_block(oa[3], adp[3]);
829 oa[4] = xor_block(oa[1], ctx->L[2]);
830 ta[4] = xor_block(oa[4], adp[4]);
831 oa[5] = xor_block(oa[0], ctx->L[2]);
832 ta[5] = xor_block(oa[5], adp[5]);
833 oa[6] = xor_block(ad_offset, ctx->L[2]);
834 ta[6] = xor_block(oa[6], adp[6]);
835 ad_offset = xor_block(oa[6], getL(ctx, tz));
836 ta[7] = xor_block(ad_offset, adp[7]);
837 #endif
838 AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
839 ad_checksum = xor_block(ad_checksum, ta[0]);
840 ad_checksum = xor_block(ad_checksum, ta[1]);
841 ad_checksum = xor_block(ad_checksum, ta[2]);
842 ad_checksum = xor_block(ad_checksum, ta[3]);
843 #if (BPI == 8)
844 ad_checksum = xor_block(ad_checksum, ta[4]);
845 ad_checksum = xor_block(ad_checksum, ta[5]);
846 ad_checksum = xor_block(ad_checksum, ta[6]);
847 ad_checksum = xor_block(ad_checksum, ta[7]);
848 #endif
849 adp += BPI;
850 } while (--i);
851 ctx->ad_blocks_processed = ad_block_num;
852 ctx->ad_offset = ad_offset;
853 ctx->ad_checksum = ad_checksum;
854 }
855
856 if (final) {
857 block ta[BPI];
858
859 /* Process remaining associated data, compute its tag contribution */
860 remaining = ((unsigned)ad_len) % (BPI * 16);
861 if (remaining) {
862 k = 0;
863 #if (BPI == 8)
864 if (remaining >= 64) {
865 tmp.bl = xor_block(ad_offset, ctx->L[0]);
866 ta[0] = xor_block(tmp.bl, adp[0]);
867 tmp.bl = xor_block(tmp.bl, ctx->L[1]);
868 ta[1] = xor_block(tmp.bl, adp[1]);
869 ad_offset = xor_block(ad_offset, ctx->L[1]);
870 ta[2] = xor_block(ad_offset, adp[2]);
871 ad_offset = xor_block(ad_offset, ctx->L[2]);
872 ta[3] = xor_block(ad_offset, adp[3]);
873 remaining -= 64;
874 k = 4;
875 }
876 #endif
877 if (remaining >= 32) {
878 ad_offset = xor_block(ad_offset, ctx->L[0]);
879 ta[k] = xor_block(ad_offset, adp[k]);
880 ad_offset = xor_block(ad_offset, getL(ctx, ntz(k + 2)));
881 ta[k + 1] = xor_block(ad_offset, adp[k + 1]);
882 remaining -= 32;
883 k += 2;
884 }
885 if (remaining >= 16) {
886 ad_offset = xor_block(ad_offset, ctx->L[0]);
887 ta[k] = xor_block(ad_offset, adp[k]);
888 remaining = remaining - 16;
889 ++k;
890 }
891 if (remaining) {
892 ad_offset = xor_block(ad_offset, ctx->Lstar);
893 tmp.bl = zero_block();
894 memcpy(tmp.u8, adp + k, remaining);
895 tmp.u8[remaining] = (unsigned char)0x80u;
896 ta[k] = xor_block(ad_offset, tmp.bl);
897 ++k;
898 }
899 AES_ecb_encrypt_blks(ta, k, &ctx->encrypt_key);
900 switch (k) {
901 #if (BPI == 8)
902 case 8:
903 ad_checksum = xor_block(ad_checksum, ta[7]);
904 __fallthrough;
905 case 7:
906 ad_checksum = xor_block(ad_checksum, ta[6]);
907 __fallthrough;
908 case 6:
909 ad_checksum = xor_block(ad_checksum, ta[5]);
910 __fallthrough;
911 case 5:
912 ad_checksum = xor_block(ad_checksum, ta[4]);
913 __fallthrough;
914 #endif
915 case 4:
916 ad_checksum = xor_block(ad_checksum, ta[3]);
917 __fallthrough;
918 case 3:
919 ad_checksum = xor_block(ad_checksum, ta[2]);
920 __fallthrough;
921 case 2:
922 ad_checksum = xor_block(ad_checksum, ta[1]);
923 __fallthrough;
924 case 1:
925 ad_checksum = xor_block(ad_checksum, ta[0]);
926 }
927 ctx->ad_checksum = ad_checksum;
928 }
929 }
930 }
931
932 /* ----------------------------------------------------------------------- */
933
ae_encrypt(ae_ctx * ctx,const void * nonce,const void * pt,int pt_len,const void * ad,int ad_len,void * ct,void * tag,int final)934 int ae_encrypt(ae_ctx* ctx, const void* nonce, const void* pt, int pt_len, const void* ad,
935 int ad_len, void* ct, void* tag, int final) {
936 union {
937 uint32_t u32[4];
938 uint8_t u8[16];
939 block bl;
940 } tmp;
941 block offset, checksum;
942 unsigned i, k;
943 block* ctp = (block*)ct;
944 const block* ptp = (block*)pt;
945
946 /* Non-null nonce means start of new message, init per-message values */
947 if (nonce) {
948 ctx->offset = gen_offset_from_nonce(ctx, nonce);
949 ctx->ad_offset = ctx->checksum = zero_block();
950 ctx->ad_blocks_processed = ctx->blocks_processed = 0;
951 if (ad_len >= 0)
952 ctx->ad_checksum = zero_block();
953 }
954
955 /* Process associated data */
956 if (ad_len > 0)
957 process_ad(ctx, ad, ad_len, final);
958
959 /* Encrypt plaintext data BPI blocks at a time */
960 offset = ctx->offset;
961 checksum = ctx->checksum;
962 i = pt_len / (BPI * 16);
963 if (i) {
964 block oa[BPI];
965 unsigned block_num = ctx->blocks_processed;
966 oa[BPI - 1] = offset;
967 do {
968 block ta[BPI];
969 block_num += BPI;
970 oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
971 ta[0] = xor_block(oa[0], ptp[0]);
972 checksum = xor_block(checksum, ptp[0]);
973 oa[1] = xor_block(oa[0], ctx->L[1]);
974 ta[1] = xor_block(oa[1], ptp[1]);
975 checksum = xor_block(checksum, ptp[1]);
976 oa[2] = xor_block(oa[1], ctx->L[0]);
977 ta[2] = xor_block(oa[2], ptp[2]);
978 checksum = xor_block(checksum, ptp[2]);
979 #if BPI == 4
980 oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
981 ta[3] = xor_block(oa[3], ptp[3]);
982 checksum = xor_block(checksum, ptp[3]);
983 #elif BPI == 8
984 oa[3] = xor_block(oa[2], ctx->L[2]);
985 ta[3] = xor_block(oa[3], ptp[3]);
986 checksum = xor_block(checksum, ptp[3]);
987 oa[4] = xor_block(oa[1], ctx->L[2]);
988 ta[4] = xor_block(oa[4], ptp[4]);
989 checksum = xor_block(checksum, ptp[4]);
990 oa[5] = xor_block(oa[0], ctx->L[2]);
991 ta[5] = xor_block(oa[5], ptp[5]);
992 checksum = xor_block(checksum, ptp[5]);
993 oa[6] = xor_block(oa[7], ctx->L[2]);
994 ta[6] = xor_block(oa[6], ptp[6]);
995 checksum = xor_block(checksum, ptp[6]);
996 oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
997 ta[7] = xor_block(oa[7], ptp[7]);
998 checksum = xor_block(checksum, ptp[7]);
999 #endif
1000 AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
1001 ctp[0] = xor_block(ta[0], oa[0]);
1002 ctp[1] = xor_block(ta[1], oa[1]);
1003 ctp[2] = xor_block(ta[2], oa[2]);
1004 ctp[3] = xor_block(ta[3], oa[3]);
1005 #if (BPI == 8)
1006 ctp[4] = xor_block(ta[4], oa[4]);
1007 ctp[5] = xor_block(ta[5], oa[5]);
1008 ctp[6] = xor_block(ta[6], oa[6]);
1009 ctp[7] = xor_block(ta[7], oa[7]);
1010 #endif
1011 ptp += BPI;
1012 ctp += BPI;
1013 } while (--i);
1014 ctx->offset = offset = oa[BPI - 1];
1015 ctx->blocks_processed = block_num;
1016 ctx->checksum = checksum;
1017 }
1018
1019 if (final) {
1020 block ta[BPI + 1], oa[BPI];
1021
1022 /* Process remaining plaintext and compute its tag contribution */
1023 unsigned remaining = ((unsigned)pt_len) % (BPI * 16);
1024 k = 0; /* How many blocks in ta[] need ECBing */
1025 if (remaining) {
1026 #if (BPI == 8)
1027 if (remaining >= 64) {
1028 oa[0] = xor_block(offset, ctx->L[0]);
1029 ta[0] = xor_block(oa[0], ptp[0]);
1030 checksum = xor_block(checksum, ptp[0]);
1031 oa[1] = xor_block(oa[0], ctx->L[1]);
1032 ta[1] = xor_block(oa[1], ptp[1]);
1033 checksum = xor_block(checksum, ptp[1]);
1034 oa[2] = xor_block(oa[1], ctx->L[0]);
1035 ta[2] = xor_block(oa[2], ptp[2]);
1036 checksum = xor_block(checksum, ptp[2]);
1037 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1038 ta[3] = xor_block(offset, ptp[3]);
1039 checksum = xor_block(checksum, ptp[3]);
1040 remaining -= 64;
1041 k = 4;
1042 }
1043 #endif
1044 if (remaining >= 32) {
1045 oa[k] = xor_block(offset, ctx->L[0]);
1046 ta[k] = xor_block(oa[k], ptp[k]);
1047 checksum = xor_block(checksum, ptp[k]);
1048 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1049 ta[k + 1] = xor_block(offset, ptp[k + 1]);
1050 checksum = xor_block(checksum, ptp[k + 1]);
1051 remaining -= 32;
1052 k += 2;
1053 }
1054 if (remaining >= 16) {
1055 offset = oa[k] = xor_block(offset, ctx->L[0]);
1056 ta[k] = xor_block(offset, ptp[k]);
1057 checksum = xor_block(checksum, ptp[k]);
1058 remaining -= 16;
1059 ++k;
1060 }
1061 if (remaining) {
1062 tmp.bl = zero_block();
1063 memcpy(tmp.u8, ptp + k, remaining);
1064 tmp.u8[remaining] = (unsigned char)0x80u;
1065 checksum = xor_block(checksum, tmp.bl);
1066 ta[k] = offset = xor_block(offset, ctx->Lstar);
1067 ++k;
1068 }
1069 }
1070 offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */
1071 ta[k] = xor_block(offset, checksum); /* Part of tag gen */
1072 AES_ecb_encrypt_blks(ta, k + 1, &ctx->encrypt_key);
1073 offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */
1074 if (remaining) {
1075 --k;
1076 tmp.bl = xor_block(tmp.bl, ta[k]);
1077 memcpy(ctp + k, tmp.u8, remaining);
1078 }
1079 switch (k) {
1080 #if (BPI == 8)
1081 case 7:
1082 ctp[6] = xor_block(ta[6], oa[6]);
1083 __fallthrough;
1084 case 6:
1085 ctp[5] = xor_block(ta[5], oa[5]);
1086 __fallthrough;
1087 case 5:
1088 ctp[4] = xor_block(ta[4], oa[4]);
1089 __fallthrough;
1090 case 4:
1091 ctp[3] = xor_block(ta[3], oa[3]);
1092 __fallthrough;
1093 #endif
1094 case 3:
1095 ctp[2] = xor_block(ta[2], oa[2]);
1096 __fallthrough;
1097 case 2:
1098 ctp[1] = xor_block(ta[1], oa[1]);
1099 __fallthrough;
1100 case 1:
1101 ctp[0] = xor_block(ta[0], oa[0]);
1102 }
1103
1104 /* Tag is placed at the correct location
1105 */
1106 if (tag) {
1107 #if (OCB_TAG_LEN == 16)
1108 *(block*)tag = offset;
1109 #elif(OCB_TAG_LEN > 0)
1110 memcpy((char*)tag, &offset, OCB_TAG_LEN);
1111 #else
1112 memcpy((char*)tag, &offset, ctx->tag_len);
1113 #endif
1114 } else {
1115 #if (OCB_TAG_LEN > 0)
1116 memcpy((char*)ct + pt_len, &offset, OCB_TAG_LEN);
1117 pt_len += OCB_TAG_LEN;
1118 #else
1119 memcpy((char*)ct + pt_len, &offset, ctx->tag_len);
1120 pt_len += ctx->tag_len;
1121 #endif
1122 }
1123 }
1124 return (int)pt_len;
1125 }
1126
1127 /* ----------------------------------------------------------------------- */
1128
1129 /* Compare two regions of memory, taking a constant amount of time for a
1130 given buffer size -- under certain assumptions about the compiler
1131 and machine, of course.
1132
1133 Use this to avoid timing side-channel attacks.
1134
1135 Returns 0 for memory regions with equal contents; non-zero otherwise. */
constant_time_memcmp(const void * av,const void * bv,size_t n)1136 static int constant_time_memcmp(const void* av, const void* bv, size_t n) {
1137 const uint8_t* a = (const uint8_t*)av;
1138 const uint8_t* b = (const uint8_t*)bv;
1139 uint8_t result = 0;
1140 size_t i;
1141
1142 for (i = 0; i < n; i++) {
1143 result |= *a ^ *b;
1144 a++;
1145 b++;
1146 }
1147
1148 return (int)result;
1149 }
1150
ae_decrypt(ae_ctx * ctx,const void * nonce,const void * ct,int ct_len,const void * ad,int ad_len,void * pt,const void * tag,int final)1151 int ae_decrypt(ae_ctx* ctx, const void* nonce, const void* ct, int ct_len, const void* ad,
1152 int ad_len, void* pt, const void* tag, int final) {
1153 union {
1154 uint32_t u32[4];
1155 uint8_t u8[16];
1156 block bl;
1157 } tmp;
1158 block offset, checksum;
1159 unsigned i, k;
1160 block* ctp = (block*)ct;
1161 block* ptp = (block*)pt;
1162
1163 /* Reduce ct_len tag bundled in ct */
1164 if ((final) && (!tag))
1165 #if (OCB_TAG_LEN > 0)
1166 ct_len -= OCB_TAG_LEN;
1167 #else
1168 ct_len -= ctx->tag_len;
1169 #endif
1170
1171 /* Non-null nonce means start of new message, init per-message values */
1172 if (nonce) {
1173 ctx->offset = gen_offset_from_nonce(ctx, nonce);
1174 ctx->ad_offset = ctx->checksum = zero_block();
1175 ctx->ad_blocks_processed = ctx->blocks_processed = 0;
1176 if (ad_len >= 0)
1177 ctx->ad_checksum = zero_block();
1178 }
1179
1180 /* Process associated data */
1181 if (ad_len > 0)
1182 process_ad(ctx, ad, ad_len, final);
1183
1184 /* Encrypt plaintext data BPI blocks at a time */
1185 offset = ctx->offset;
1186 checksum = ctx->checksum;
1187 i = ct_len / (BPI * 16);
1188 if (i) {
1189 block oa[BPI];
1190 unsigned block_num = ctx->blocks_processed;
1191 oa[BPI - 1] = offset;
1192 do {
1193 block ta[BPI];
1194 block_num += BPI;
1195 oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
1196 ta[0] = xor_block(oa[0], ctp[0]);
1197 oa[1] = xor_block(oa[0], ctx->L[1]);
1198 ta[1] = xor_block(oa[1], ctp[1]);
1199 oa[2] = xor_block(oa[1], ctx->L[0]);
1200 ta[2] = xor_block(oa[2], ctp[2]);
1201 #if BPI == 4
1202 oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
1203 ta[3] = xor_block(oa[3], ctp[3]);
1204 #elif BPI == 8
1205 oa[3] = xor_block(oa[2], ctx->L[2]);
1206 ta[3] = xor_block(oa[3], ctp[3]);
1207 oa[4] = xor_block(oa[1], ctx->L[2]);
1208 ta[4] = xor_block(oa[4], ctp[4]);
1209 oa[5] = xor_block(oa[0], ctx->L[2]);
1210 ta[5] = xor_block(oa[5], ctp[5]);
1211 oa[6] = xor_block(oa[7], ctx->L[2]);
1212 ta[6] = xor_block(oa[6], ctp[6]);
1213 oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1214 ta[7] = xor_block(oa[7], ctp[7]);
1215 #endif
1216 AES_ecb_decrypt_blks(ta, BPI, &ctx->decrypt_key);
1217 ptp[0] = xor_block(ta[0], oa[0]);
1218 checksum = xor_block(checksum, ptp[0]);
1219 ptp[1] = xor_block(ta[1], oa[1]);
1220 checksum = xor_block(checksum, ptp[1]);
1221 ptp[2] = xor_block(ta[2], oa[2]);
1222 checksum = xor_block(checksum, ptp[2]);
1223 ptp[3] = xor_block(ta[3], oa[3]);
1224 checksum = xor_block(checksum, ptp[3]);
1225 #if (BPI == 8)
1226 ptp[4] = xor_block(ta[4], oa[4]);
1227 checksum = xor_block(checksum, ptp[4]);
1228 ptp[5] = xor_block(ta[5], oa[5]);
1229 checksum = xor_block(checksum, ptp[5]);
1230 ptp[6] = xor_block(ta[6], oa[6]);
1231 checksum = xor_block(checksum, ptp[6]);
1232 ptp[7] = xor_block(ta[7], oa[7]);
1233 checksum = xor_block(checksum, ptp[7]);
1234 #endif
1235 ptp += BPI;
1236 ctp += BPI;
1237 } while (--i);
1238 ctx->offset = offset = oa[BPI - 1];
1239 ctx->blocks_processed = block_num;
1240 ctx->checksum = checksum;
1241 }
1242
1243 if (final) {
1244 block ta[BPI + 1], oa[BPI];
1245
1246 /* Process remaining plaintext and compute its tag contribution */
1247 unsigned remaining = ((unsigned)ct_len) % (BPI * 16);
1248 k = 0; /* How many blocks in ta[] need ECBing */
1249 if (remaining) {
1250 #if (BPI == 8)
1251 if (remaining >= 64) {
1252 oa[0] = xor_block(offset, ctx->L[0]);
1253 ta[0] = xor_block(oa[0], ctp[0]);
1254 oa[1] = xor_block(oa[0], ctx->L[1]);
1255 ta[1] = xor_block(oa[1], ctp[1]);
1256 oa[2] = xor_block(oa[1], ctx->L[0]);
1257 ta[2] = xor_block(oa[2], ctp[2]);
1258 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1259 ta[3] = xor_block(offset, ctp[3]);
1260 remaining -= 64;
1261 k = 4;
1262 }
1263 #endif
1264 if (remaining >= 32) {
1265 oa[k] = xor_block(offset, ctx->L[0]);
1266 ta[k] = xor_block(oa[k], ctp[k]);
1267 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1268 ta[k + 1] = xor_block(offset, ctp[k + 1]);
1269 remaining -= 32;
1270 k += 2;
1271 }
1272 if (remaining >= 16) {
1273 offset = oa[k] = xor_block(offset, ctx->L[0]);
1274 ta[k] = xor_block(offset, ctp[k]);
1275 remaining -= 16;
1276 ++k;
1277 }
1278 if (remaining) {
1279 block pad;
1280 offset = xor_block(offset, ctx->Lstar);
1281 AES_encrypt((unsigned char*)&offset, tmp.u8, &ctx->encrypt_key);
1282 pad = tmp.bl;
1283 memcpy(tmp.u8, ctp + k, remaining);
1284 tmp.bl = xor_block(tmp.bl, pad);
1285 tmp.u8[remaining] = (unsigned char)0x80u;
1286 memcpy(ptp + k, tmp.u8, remaining);
1287 checksum = xor_block(checksum, tmp.bl);
1288 }
1289 }
1290 AES_ecb_decrypt_blks(ta, k, &ctx->decrypt_key);
1291 switch (k) {
1292 #if (BPI == 8)
1293 case 7:
1294 ptp[6] = xor_block(ta[6], oa[6]);
1295 checksum = xor_block(checksum, ptp[6]);
1296 __fallthrough;
1297 case 6:
1298 ptp[5] = xor_block(ta[5], oa[5]);
1299 checksum = xor_block(checksum, ptp[5]);
1300 __fallthrough;
1301 case 5:
1302 ptp[4] = xor_block(ta[4], oa[4]);
1303 checksum = xor_block(checksum, ptp[4]);
1304 __fallthrough;
1305 case 4:
1306 ptp[3] = xor_block(ta[3], oa[3]);
1307 checksum = xor_block(checksum, ptp[3]);
1308 __fallthrough;
1309 #endif
1310 case 3:
1311 ptp[2] = xor_block(ta[2], oa[2]);
1312 checksum = xor_block(checksum, ptp[2]);
1313 __fallthrough;
1314 case 2:
1315 ptp[1] = xor_block(ta[1], oa[1]);
1316 checksum = xor_block(checksum, ptp[1]);
1317 __fallthrough;
1318 case 1:
1319 ptp[0] = xor_block(ta[0], oa[0]);
1320 checksum = xor_block(checksum, ptp[0]);
1321 }
1322
1323 /* Calculate expected tag */
1324 offset = xor_block(offset, ctx->Ldollar);
1325 tmp.bl = xor_block(offset, checksum);
1326 AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
1327 tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
1328
1329 /* Compare with proposed tag, change ct_len if invalid */
1330 if ((OCB_TAG_LEN == 16) && tag) {
1331 if (unequal_blocks(tmp.bl, *(block*)tag))
1332 ct_len = AE_INVALID;
1333 } else {
1334 #if (OCB_TAG_LEN > 0)
1335 int len = OCB_TAG_LEN;
1336 #else
1337 int len = ctx->tag_len;
1338 #endif
1339 if (tag) {
1340 if (constant_time_memcmp(tag, tmp.u8, len) != 0)
1341 ct_len = AE_INVALID;
1342 } else {
1343 if (constant_time_memcmp((char*)ct + ct_len, tmp.u8, len) != 0)
1344 ct_len = AE_INVALID;
1345 }
1346 }
1347 }
1348 return ct_len;
1349 }
1350
1351 /* ----------------------------------------------------------------------- */
1352 /* Simple test program */
1353 /* ----------------------------------------------------------------------- */
1354
1355 #if 0
1356
1357 #include <stdio.h>
1358 #include <time.h>
1359
1360 #if __GNUC__
1361 #define ALIGN(n) __attribute__((aligned(n)))
1362 #elif _MSC_VER
1363 #define ALIGN(n) __declspec(align(n))
1364 #else /* Not GNU/Microsoft: delete alignment uses. */
1365 #define ALIGN(n)
1366 #endif
1367
1368 static void pbuf(void *p, unsigned len, const void *s)
1369 {
1370 unsigned i;
1371 if (s)
1372 printf("%s", (char *)s);
1373 for (i = 0; i < len; i++)
1374 printf("%02X", (unsigned)(((unsigned char *)p)[i]));
1375 printf("\n");
1376 }
1377
1378 static void vectors(ae_ctx *ctx, int len)
1379 {
1380 ALIGN(16) char pt[128];
1381 ALIGN(16) char ct[144];
1382 ALIGN(16) char nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1383 int i;
1384 for (i=0; i < 128; i++) pt[i] = i;
1385 i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
1386 printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
1387 i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
1388 printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
1389 i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
1390 printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
1391 }
1392
1393 void validate()
1394 {
1395 ALIGN(16) char pt[1024];
1396 ALIGN(16) char ct[1024];
1397 ALIGN(16) char tag[16];
1398 ALIGN(16) char nonce[12] = {0,};
1399 ALIGN(16) char key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
1400 ae_ctx ctx;
1401 char *val_buf, *next;
1402 int i, len;
1403
1404 val_buf = (char *)malloc(22400 + 16);
1405 next = val_buf = (char *)(((size_t)val_buf + 16) & ~((size_t)15));
1406
1407 if (0) {
1408 ae_init(&ctx, key, 16, 12, 16);
1409 /* pbuf(&ctx, sizeof(ctx), "CTX: "); */
1410 vectors(&ctx,0);
1411 vectors(&ctx,8);
1412 vectors(&ctx,16);
1413 vectors(&ctx,24);
1414 vectors(&ctx,32);
1415 vectors(&ctx,40);
1416 }
1417
1418 memset(key,0,32);
1419 memset(pt,0,128);
1420 ae_init(&ctx, key, OCB_KEY_LEN, 12, OCB_TAG_LEN);
1421
1422 /* RFC Vector test */
1423 for (i = 0; i < 128; i++) {
1424 int first = ((i/3)/(BPI*16))*(BPI*16);
1425 int second = first;
1426 int third = i - (first + second);
1427
1428 nonce[11] = i;
1429
1430 if (0) {
1431 ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
1432 memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1433 next = next+i+OCB_TAG_LEN;
1434
1435 ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
1436 memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1437 next = next+i+OCB_TAG_LEN;
1438
1439 ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
1440 memcpy(next,ct,OCB_TAG_LEN);
1441 next = next+OCB_TAG_LEN;
1442 } else {
1443 ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
1444 ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
1445 ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
1446 memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1447 next = next+i+OCB_TAG_LEN;
1448
1449 ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
1450 ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
1451 ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
1452 memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1453 next = next+i+OCB_TAG_LEN;
1454
1455 ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
1456 ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
1457 ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
1458 memcpy(next,ct,OCB_TAG_LEN);
1459 next = next+OCB_TAG_LEN;
1460 }
1461
1462 }
1463 nonce[11] = 0;
1464 ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
1465 pbuf(tag,OCB_TAG_LEN,0);
1466
1467
1468 /* Encrypt/Decrypt test */
1469 for (i = 0; i < 128; i++) {
1470 int first = ((i/3)/(BPI*16))*(BPI*16);
1471 int second = first;
1472 int third = i - (first + second);
1473
1474 nonce[11] = i%128;
1475
1476 if (1) {
1477 len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
1478 len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
1479 len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
1480 if (len == -1) { printf("Authentication error: %d\n", i); return; }
1481 if (len != i) { printf("Length error: %d\n", i); return; }
1482 if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1483 } else {
1484 len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
1485 ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
1486 ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
1487 len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
1488 if (len == -1) { printf("Authentication error: %d\n", i); return; }
1489 if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1490 }
1491
1492 }
1493 printf("Decrypt: PASS\n");
1494 }
1495
1496 int main()
1497 {
1498 validate();
1499 return 0;
1500 }
1501 #endif
1502
1503 #if USE_AES_NI
1504 char infoString[] = "OCB3 (AES-NI)";
1505 #elif USE_REFERENCE_AES
1506 char infoString[] = "OCB3 (Reference)";
1507 #elif USE_OPENSSL_AES
1508 char infoString[] = "OCB3 (OpenSSL)";
1509 #endif
1510