1/* OPENBSD ORIGINAL: lib/libc/crypt/chacha_private.h */
2
3/*
4chacha-merged.c version 20080118
5D. J. Bernstein
6Public domain.
7*/
8
9/* $OpenBSD: chacha_private.h,v 1.3 2022/02/28 21:56:29 dtucker Exp $ */
10
11typedef unsigned char u8;
12typedef unsigned int u32;
13typedef unsigned int u_int;
14typedef unsigned char u_char;
15
16typedef struct
17{
18  u32 input[16]; /* could be compressed */
19} chacha_ctx;
20
21#define U8C(v) (v##U)
22#define U32C(v) (v##U)
23
24#define U8V(v) ((u8)(v) & U8C(0xFF))
25#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
26
27#define ROTL32(v, n) \
28  (U32V((v) << (n)) | ((v) >> (32 - (n))))
29
30#define U8TO32_LITTLE(p) \
31  (((u32)((p)[0])      ) | \
32   ((u32)((p)[1]) <<  8) | \
33   ((u32)((p)[2]) << 16) | \
34   ((u32)((p)[3]) << 24))
35
36#define U32TO8_LITTLE(p, v) \
37  do { \
38    (p)[0] = U8V((v)      ); \
39    (p)[1] = U8V((v) >>  8); \
40    (p)[2] = U8V((v) >> 16); \
41    (p)[3] = U8V((v) >> 24); \
42  } while (0)
43
44#define ROTATE(v,c) (ROTL32(v,c))
45#define XOR(v,w) ((v) ^ (w))
46#define PLUS(v,w) (U32V((v) + (w)))
47#define PLUSONE(v) (PLUS((v),1))
48
49#define QUARTERROUND(a,b,c,d) \
50  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
51  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
52  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
53  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
54
55static const char sigma[16] = "expand 32-byte k";
56static const char tau[16] = "expand 16-byte k";
57
58static void
59chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
60{
61  const char *constants;
62
63  x->input[4] = U8TO32_LITTLE(k + 0);
64  x->input[5] = U8TO32_LITTLE(k + 4);
65  x->input[6] = U8TO32_LITTLE(k + 8);
66  x->input[7] = U8TO32_LITTLE(k + 12);
67  if (kbits == 256) { /* recommended */
68    k += 16;
69    constants = sigma;
70  } else { /* kbits == 128 */
71    constants = tau;
72  }
73  x->input[8] = U8TO32_LITTLE(k + 0);
74  x->input[9] = U8TO32_LITTLE(k + 4);
75  x->input[10] = U8TO32_LITTLE(k + 8);
76  x->input[11] = U8TO32_LITTLE(k + 12);
77  x->input[0] = U8TO32_LITTLE(constants + 0);
78  x->input[1] = U8TO32_LITTLE(constants + 4);
79  x->input[2] = U8TO32_LITTLE(constants + 8);
80  x->input[3] = U8TO32_LITTLE(constants + 12);
81}
82
83static void
84chacha_ivsetup(chacha_ctx *x,const u8 *iv)
85{
86  x->input[12] = 0;
87  x->input[13] = 0;
88  x->input[14] = U8TO32_LITTLE(iv + 0);
89  x->input[15] = U8TO32_LITTLE(iv + 4);
90}
91
92static void
93chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
94{
95  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
96  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
97  u8 *ctarget = NULL;
98  u8 tmp[64];
99  u_int i;
100
101  if (!bytes) return;
102
103  j0 = x->input[0];
104  j1 = x->input[1];
105  j2 = x->input[2];
106  j3 = x->input[3];
107  j4 = x->input[4];
108  j5 = x->input[5];
109  j6 = x->input[6];
110  j7 = x->input[7];
111  j8 = x->input[8];
112  j9 = x->input[9];
113  j10 = x->input[10];
114  j11 = x->input[11];
115  j12 = x->input[12];
116  j13 = x->input[13];
117  j14 = x->input[14];
118  j15 = x->input[15];
119
120  for (;;) {
121    if (bytes < 64) {
122      for (i = 0;i < bytes;++i) tmp[i] = m[i];
123      m = tmp;
124      ctarget = c;
125      c = tmp;
126    }
127    x0 = j0;
128    x1 = j1;
129    x2 = j2;
130    x3 = j3;
131    x4 = j4;
132    x5 = j5;
133    x6 = j6;
134    x7 = j7;
135    x8 = j8;
136    x9 = j9;
137    x10 = j10;
138    x11 = j11;
139    x12 = j12;
140    x13 = j13;
141    x14 = j14;
142    x15 = j15;
143    for (i = 20;i > 0;i -= 2) {
144      QUARTERROUND( x0, x4, x8,x12)
145      QUARTERROUND( x1, x5, x9,x13)
146      QUARTERROUND( x2, x6,x10,x14)
147      QUARTERROUND( x3, x7,x11,x15)
148      QUARTERROUND( x0, x5,x10,x15)
149      QUARTERROUND( x1, x6,x11,x12)
150      QUARTERROUND( x2, x7, x8,x13)
151      QUARTERROUND( x3, x4, x9,x14)
152    }
153    x0 = PLUS(x0,j0);
154    x1 = PLUS(x1,j1);
155    x2 = PLUS(x2,j2);
156    x3 = PLUS(x3,j3);
157    x4 = PLUS(x4,j4);
158    x5 = PLUS(x5,j5);
159    x6 = PLUS(x6,j6);
160    x7 = PLUS(x7,j7);
161    x8 = PLUS(x8,j8);
162    x9 = PLUS(x9,j9);
163    x10 = PLUS(x10,j10);
164    x11 = PLUS(x11,j11);
165    x12 = PLUS(x12,j12);
166    x13 = PLUS(x13,j13);
167    x14 = PLUS(x14,j14);
168    x15 = PLUS(x15,j15);
169
170#ifndef KEYSTREAM_ONLY
171    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
172    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
173    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
174    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
175    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
176    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
177    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
178    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
179    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
180    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
181    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
182    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
183    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
184    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
185    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
186    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
187#endif
188
189    j12 = PLUSONE(j12);
190    if (!j12) {
191      j13 = PLUSONE(j13);
192      /* stopping at 2^70 bytes per nonce is user's responsibility */
193    }
194
195    U32TO8_LITTLE(c + 0,x0);
196    U32TO8_LITTLE(c + 4,x1);
197    U32TO8_LITTLE(c + 8,x2);
198    U32TO8_LITTLE(c + 12,x3);
199    U32TO8_LITTLE(c + 16,x4);
200    U32TO8_LITTLE(c + 20,x5);
201    U32TO8_LITTLE(c + 24,x6);
202    U32TO8_LITTLE(c + 28,x7);
203    U32TO8_LITTLE(c + 32,x8);
204    U32TO8_LITTLE(c + 36,x9);
205    U32TO8_LITTLE(c + 40,x10);
206    U32TO8_LITTLE(c + 44,x11);
207    U32TO8_LITTLE(c + 48,x12);
208    U32TO8_LITTLE(c + 52,x13);
209    U32TO8_LITTLE(c + 56,x14);
210    U32TO8_LITTLE(c + 60,x15);
211
212    if (bytes <= 64) {
213      if (bytes < 64) {
214        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
215      }
216      x->input[12] = j12;
217      x->input[13] = j13;
218      return;
219    }
220    bytes -= 64;
221    c += 64;
222#ifndef KEYSTREAM_ONLY
223    m += 64;
224#endif
225  }
226}
227