1/*
2chacha-merged.c version 20080118
3D. J. Bernstein
4Public domain.
5*/
6
7#include <libwebsockets.h>
8#include "lws-ssh.h"
9
10#include <string.h>
11#include <stdlib.h>
12
13struct chacha_ctx {
14	u_int input[16];
15};
16
17#define CHACHA_MINKEYLEN 	16
18#define CHACHA_NONCELEN		8
19#define CHACHA_CTRLEN		8
20#define CHACHA_STATELEN		(CHACHA_NONCELEN+CHACHA_CTRLEN)
21#define CHACHA_BLOCKLEN		64
22
23typedef unsigned char u8;
24typedef unsigned int u32;
25
26typedef struct chacha_ctx chacha_ctx;
27
28#define U8C(v) (v##U)
29#define U32C(v) (v##U)
30
31#define U8V(v) ((u8)((v) & U8C(0xFF)))
32#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF))
33
34#define ROTL32(v, n) \
35  (U32V((v) << (n)) | ((v) >> (32 - (n))))
36
37#define U8TO32_LITTLE(p) \
38  (((u32)((p)[0])      ) | \
39   ((u32)((p)[1]) <<  8) | \
40   ((u32)((p)[2]) << 16) | \
41   ((u32)((p)[3]) << 24))
42
43#define U32TO8_LITTLE(p, v) \
44  do { \
45    (p)[0] = U8V((v)      ); \
46    (p)[1] = U8V((v) >>  8); \
47    (p)[2] = U8V((v) >> 16); \
48    (p)[3] = U8V((v) >> 24); \
49  } while (0)
50
51#define ROTATE(v,c) (ROTL32(v,c))
52#define XOR(v,w) ((v) ^ (w))
53#define PLUS(v,w) (U32V((v) + (w)))
54#define PLUSONE(v) (PLUS((v),1))
55
56#define QUARTERROUND(a,b,c,d) \
57  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
58  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
59  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
60  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
61
62static const char sigma[16] = "expand 32-byte k";
63static const char tau[16] = "expand 16-byte k";
64
65void
66chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits)
67{
68  const char *constants;
69
70  x->input[4] = U8TO32_LITTLE(k + 0);
71  x->input[5] = U8TO32_LITTLE(k + 4);
72  x->input[6] = U8TO32_LITTLE(k + 8);
73  x->input[7] = U8TO32_LITTLE(k + 12);
74  if (kbits == 256) { /* recommended */
75    k += 16;
76    constants = sigma;
77  } else { /* kbits == 128 */
78    constants = tau;
79  }
80  x->input[8] = U8TO32_LITTLE(k + 0);
81  x->input[9] = U8TO32_LITTLE(k + 4);
82  x->input[10] = U8TO32_LITTLE(k + 8);
83  x->input[11] = U8TO32_LITTLE(k + 12);
84  x->input[0] = U8TO32_LITTLE(constants + 0);
85  x->input[1] = U8TO32_LITTLE(constants + 4);
86  x->input[2] = U8TO32_LITTLE(constants + 8);
87  x->input[3] = U8TO32_LITTLE(constants + 12);
88}
89
90void
91chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter)
92{
93  x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0);
94  x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4);
95  x->input[14] = U8TO32_LITTLE(iv + 0);
96  x->input[15] = U8TO32_LITTLE(iv + 4);
97}
98
99void
100chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes)
101{
102  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
103  u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15;
104  u8 *ctarget = NULL;
105  u8 tmp[64];
106  u_int i;
107
108  if (!bytes) return;
109
110  j0 = x->input[0];
111  j1 = x->input[1];
112  j2 = x->input[2];
113  j3 = x->input[3];
114  j4 = x->input[4];
115  j5 = x->input[5];
116  j6 = x->input[6];
117  j7 = x->input[7];
118  j8 = x->input[8];
119  j9 = x->input[9];
120  j10 = x->input[10];
121  j11 = x->input[11];
122  j12 = x->input[12];
123  j13 = x->input[13];
124  j14 = x->input[14];
125  j15 = x->input[15];
126
127  for (;;) {
128    if (bytes < 64) {
129      for (i = 0;i < bytes;++i) tmp[i] = m[i];
130      m = tmp;
131      ctarget = c;
132      c = tmp;
133    }
134    x0 = j0;
135    x1 = j1;
136    x2 = j2;
137    x3 = j3;
138    x4 = j4;
139    x5 = j5;
140    x6 = j6;
141    x7 = j7;
142    x8 = j8;
143    x9 = j9;
144    x10 = j10;
145    x11 = j11;
146    x12 = j12;
147    x13 = j13;
148    x14 = j14;
149    x15 = j15;
150    for (i = 20;i > 0;i -= 2) {
151      QUARTERROUND( x0, x4, x8,x12)
152      QUARTERROUND( x1, x5, x9,x13)
153      QUARTERROUND( x2, x6,x10,x14)
154      QUARTERROUND( x3, x7,x11,x15)
155      QUARTERROUND( x0, x5,x10,x15)
156      QUARTERROUND( x1, x6,x11,x12)
157      QUARTERROUND( x2, x7, x8,x13)
158      QUARTERROUND( x3, x4, x9,x14)
159    }
160    x0 = PLUS(x0,j0);
161    x1 = PLUS(x1,j1);
162    x2 = PLUS(x2,j2);
163    x3 = PLUS(x3,j3);
164    x4 = PLUS(x4,j4);
165    x5 = PLUS(x5,j5);
166    x6 = PLUS(x6,j6);
167    x7 = PLUS(x7,j7);
168    x8 = PLUS(x8,j8);
169    x9 = PLUS(x9,j9);
170    x10 = PLUS(x10,j10);
171    x11 = PLUS(x11,j11);
172    x12 = PLUS(x12,j12);
173    x13 = PLUS(x13,j13);
174    x14 = PLUS(x14,j14);
175    x15 = PLUS(x15,j15);
176
177    x0 = XOR(x0,U8TO32_LITTLE(m + 0));
178    x1 = XOR(x1,U8TO32_LITTLE(m + 4));
179    x2 = XOR(x2,U8TO32_LITTLE(m + 8));
180    x3 = XOR(x3,U8TO32_LITTLE(m + 12));
181    x4 = XOR(x4,U8TO32_LITTLE(m + 16));
182    x5 = XOR(x5,U8TO32_LITTLE(m + 20));
183    x6 = XOR(x6,U8TO32_LITTLE(m + 24));
184    x7 = XOR(x7,U8TO32_LITTLE(m + 28));
185    x8 = XOR(x8,U8TO32_LITTLE(m + 32));
186    x9 = XOR(x9,U8TO32_LITTLE(m + 36));
187    x10 = XOR(x10,U8TO32_LITTLE(m + 40));
188    x11 = XOR(x11,U8TO32_LITTLE(m + 44));
189    x12 = XOR(x12,U8TO32_LITTLE(m + 48));
190    x13 = XOR(x13,U8TO32_LITTLE(m + 52));
191    x14 = XOR(x14,U8TO32_LITTLE(m + 56));
192    x15 = XOR(x15,U8TO32_LITTLE(m + 60));
193
194    j12 = PLUSONE(j12);
195    if (!j12)
196      j13 = PLUSONE(j13);
197      /* stopping at 2^70 bytes per nonce is user's responsibility */
198
199    U32TO8_LITTLE(c + 0,x0);
200    U32TO8_LITTLE(c + 4,x1);
201    U32TO8_LITTLE(c + 8,x2);
202    U32TO8_LITTLE(c + 12,x3);
203    U32TO8_LITTLE(c + 16,x4);
204    U32TO8_LITTLE(c + 20,x5);
205    U32TO8_LITTLE(c + 24,x6);
206    U32TO8_LITTLE(c + 28,x7);
207    U32TO8_LITTLE(c + 32,x8);
208    U32TO8_LITTLE(c + 36,x9);
209    U32TO8_LITTLE(c + 40,x10);
210    U32TO8_LITTLE(c + 44,x11);
211    U32TO8_LITTLE(c + 48,x12);
212    U32TO8_LITTLE(c + 52,x13);
213    U32TO8_LITTLE(c + 56,x14);
214    U32TO8_LITTLE(c + 60,x15);
215
216    if (bytes <= 64) {
217      if (bytes < 64) {
218        for (i = 0;i < bytes;++i) ctarget[i] = c[i];
219      }
220      x->input[12] = j12;
221      x->input[13] = j13;
222      return;
223    }
224    bytes -= 64;
225    c += 64;
226    m += 64;
227  }
228}
229
230struct lws_cipher_chacha {
231	struct chacha_ctx ccctx[2];
232};
233
234#define K_1(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[0]
235#define K_2(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[1]
236
237int
238lws_chacha_activate(struct lws_ssh_keys *keys)
239{
240	if (keys->cipher) {
241		free(keys->cipher);
242		keys->cipher = NULL;
243	}
244
245	keys->cipher = malloc(sizeof(struct lws_cipher_chacha));
246	if (!keys->cipher)
247		return 1;
248
249	memset(keys->cipher, 0, sizeof(struct lws_cipher_chacha));
250
251	/* uses 2 x 256-bit keys, so 512 bits (64 bytes) needed */
252	chacha_keysetup(K_2(keys), keys->key[SSH_KEYIDX_ENC], 256);
253	chacha_keysetup(K_1(keys), &keys->key[SSH_KEYIDX_ENC][32], 256);
254
255	keys->valid = 1;
256	keys->full_length = 1;
257	keys->padding_alignment = 8; // CHACHA_BLOCKLEN;
258	keys->MAC_length = POLY1305_TAGLEN;
259
260	return 0;
261}
262
263void
264lws_chacha_destroy(struct lws_ssh_keys *keys)
265{
266	if (keys->cipher) {
267		free(keys->cipher);
268		keys->cipher = NULL;
269	}
270}
271
272uint32_t
273lws_chachapoly_get_length(struct lws_ssh_keys *keys, uint32_t seq,
274			  const uint8_t *in4)
275{
276        uint8_t buf[4], seqbuf[8];
277
278	/*
279	 * When receiving a packet, the length must be decrypted first.  When 4
280	 * bytes of ciphertext length have been received, they may be decrypted
281	 * using the K_1 key, a nonce consisting of the packet sequence number
282	 * encoded as a uint64 under the usual SSH wire encoding and a zero
283	 * block counter to obtain the plaintext length.
284	 */
285        POKE_U64(seqbuf, seq);
286	chacha_ivsetup(K_1(keys), seqbuf, NULL);
287        chacha_encrypt_bytes(K_1(keys), in4, buf, 4);
288
289	return PEEK_U32(buf);
290}
291
292/*
293 * chachapoly_crypt() operates as following:
294 * En/decrypt with header key 'aadlen' bytes from 'src', storing result
295 * to 'dest'. The ciphertext here is treated as additional authenticated
296 * data for MAC calculation.
297 * En/decrypt 'len' bytes at offset 'aadlen' from 'src' to 'dest'. Use
298 * POLY1305_TAGLEN bytes at offset 'len'+'aadlen' as the authentication
299 * tag. This tag is written on encryption and verified on decryption.
300 */
301int
302chachapoly_crypt(struct lws_ssh_keys *keys, u_int seqnr, u_char *dest,
303    const u_char *src, u_int len, u_int aadlen, u_int authlen, int do_encrypt)
304{
305        u_char seqbuf[8];
306        const u_char one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 }; /* NB little-endian */
307        u_char expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN];
308        int r = 1;
309
310        /*
311         * Run ChaCha20 once to generate the Poly1305 key. The IV is the
312         * packet sequence number.
313         */
314        memset(poly_key, 0, sizeof(poly_key));
315        POKE_U64(seqbuf, seqnr);
316        chacha_ivsetup(K_2(keys), seqbuf, NULL);
317        chacha_encrypt_bytes(K_2(keys),
318            poly_key, poly_key, sizeof(poly_key));
319
320        /* If decrypting, check tag before anything else */
321        if (!do_encrypt) {
322                const u_char *tag = src + aadlen + len;
323
324                poly1305_auth(expected_tag, src, aadlen + len, poly_key);
325                if (lws_timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN)) {
326                        r = 2;
327                        goto out;
328                }
329        }
330
331        /* Crypt additional data */
332        if (aadlen) {
333                chacha_ivsetup(K_1(keys), seqbuf, NULL);
334                chacha_encrypt_bytes(K_1(keys), src, dest, aadlen);
335        }
336
337        /* Set Chacha's block counter to 1 */
338        chacha_ivsetup(K_2(keys), seqbuf, one);
339        chacha_encrypt_bytes(K_2(keys), src + aadlen, dest + aadlen, len);
340
341        /* If encrypting, calculate and append tag */
342        if (do_encrypt) {
343                poly1305_auth(dest + aadlen + len, dest, aadlen + len,
344                    poly_key);
345        }
346        r = 0;
347 out:
348        lws_explicit_bzero(expected_tag, sizeof(expected_tag));
349        lws_explicit_bzero(seqbuf, sizeof(seqbuf));
350        lws_explicit_bzero(poly_key, sizeof(poly_key));
351        return r;
352}
353
354int
355lws_chacha_decrypt(struct lws_ssh_keys *keys, uint32_t seq,
356		   const uint8_t *ct, uint32_t len, uint8_t *pt)
357{
358	return chachapoly_crypt(keys, seq, pt, ct, len - POLY1305_TAGLEN - 4, 4,
359			 POLY1305_TAGLEN, 0);
360}
361
362int
363lws_chacha_encrypt(struct lws_ssh_keys *keys, uint32_t seq,
364		   const uint8_t *ct, uint32_t len, uint8_t *pt)
365{
366	return chachapoly_crypt(keys, seq, pt, ct, len - 4, 4, 0, 1);
367}
368
369