1/* 2chacha-merged.c version 20080118 3D. J. Bernstein 4Public domain. 5*/ 6 7#include <libwebsockets.h> 8#include "lws-ssh.h" 9 10#include <string.h> 11#include <stdlib.h> 12 13struct chacha_ctx { 14 u_int input[16]; 15}; 16 17#define CHACHA_MINKEYLEN 16 18#define CHACHA_NONCELEN 8 19#define CHACHA_CTRLEN 8 20#define CHACHA_STATELEN (CHACHA_NONCELEN+CHACHA_CTRLEN) 21#define CHACHA_BLOCKLEN 64 22 23typedef unsigned char u8; 24typedef unsigned int u32; 25 26typedef struct chacha_ctx chacha_ctx; 27 28#define U8C(v) (v##U) 29#define U32C(v) (v##U) 30 31#define U8V(v) ((u8)((v) & U8C(0xFF))) 32#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) 33 34#define ROTL32(v, n) \ 35 (U32V((v) << (n)) | ((v) >> (32 - (n)))) 36 37#define U8TO32_LITTLE(p) \ 38 (((u32)((p)[0]) ) | \ 39 ((u32)((p)[1]) << 8) | \ 40 ((u32)((p)[2]) << 16) | \ 41 ((u32)((p)[3]) << 24)) 42 43#define U32TO8_LITTLE(p, v) \ 44 do { \ 45 (p)[0] = U8V((v) ); \ 46 (p)[1] = U8V((v) >> 8); \ 47 (p)[2] = U8V((v) >> 16); \ 48 (p)[3] = U8V((v) >> 24); \ 49 } while (0) 50 51#define ROTATE(v,c) (ROTL32(v,c)) 52#define XOR(v,w) ((v) ^ (w)) 53#define PLUS(v,w) (U32V((v) + (w))) 54#define PLUSONE(v) (PLUS((v),1)) 55 56#define QUARTERROUND(a,b,c,d) \ 57 a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ 58 c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ 59 a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ 60 c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); 61 62static const char sigma[16] = "expand 32-byte k"; 63static const char tau[16] = "expand 16-byte k"; 64 65void 66chacha_keysetup(chacha_ctx *x,const u8 *k,u32 kbits) 67{ 68 const char *constants; 69 70 x->input[4] = U8TO32_LITTLE(k + 0); 71 x->input[5] = U8TO32_LITTLE(k + 4); 72 x->input[6] = U8TO32_LITTLE(k + 8); 73 x->input[7] = U8TO32_LITTLE(k + 12); 74 if (kbits == 256) { /* recommended */ 75 k += 16; 76 constants = sigma; 77 } else { /* kbits == 128 */ 78 constants = tau; 79 } 80 x->input[8] = U8TO32_LITTLE(k + 0); 81 x->input[9] = U8TO32_LITTLE(k + 4); 82 x->input[10] = U8TO32_LITTLE(k + 8); 83 x->input[11] = U8TO32_LITTLE(k + 12); 84 x->input[0] = U8TO32_LITTLE(constants + 0); 85 x->input[1] = U8TO32_LITTLE(constants + 4); 86 x->input[2] = U8TO32_LITTLE(constants + 8); 87 x->input[3] = U8TO32_LITTLE(constants + 12); 88} 89 90void 91chacha_ivsetup(chacha_ctx *x, const u8 *iv, const u8 *counter) 92{ 93 x->input[12] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 0); 94 x->input[13] = counter == NULL ? 0 : U8TO32_LITTLE(counter + 4); 95 x->input[14] = U8TO32_LITTLE(iv + 0); 96 x->input[15] = U8TO32_LITTLE(iv + 4); 97} 98 99void 100chacha_encrypt_bytes(chacha_ctx *x,const u8 *m,u8 *c,u32 bytes) 101{ 102 u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; 103 u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; 104 u8 *ctarget = NULL; 105 u8 tmp[64]; 106 u_int i; 107 108 if (!bytes) return; 109 110 j0 = x->input[0]; 111 j1 = x->input[1]; 112 j2 = x->input[2]; 113 j3 = x->input[3]; 114 j4 = x->input[4]; 115 j5 = x->input[5]; 116 j6 = x->input[6]; 117 j7 = x->input[7]; 118 j8 = x->input[8]; 119 j9 = x->input[9]; 120 j10 = x->input[10]; 121 j11 = x->input[11]; 122 j12 = x->input[12]; 123 j13 = x->input[13]; 124 j14 = x->input[14]; 125 j15 = x->input[15]; 126 127 for (;;) { 128 if (bytes < 64) { 129 for (i = 0;i < bytes;++i) tmp[i] = m[i]; 130 m = tmp; 131 ctarget = c; 132 c = tmp; 133 } 134 x0 = j0; 135 x1 = j1; 136 x2 = j2; 137 x3 = j3; 138 x4 = j4; 139 x5 = j5; 140 x6 = j6; 141 x7 = j7; 142 x8 = j8; 143 x9 = j9; 144 x10 = j10; 145 x11 = j11; 146 x12 = j12; 147 x13 = j13; 148 x14 = j14; 149 x15 = j15; 150 for (i = 20;i > 0;i -= 2) { 151 QUARTERROUND( x0, x4, x8,x12) 152 QUARTERROUND( x1, x5, x9,x13) 153 QUARTERROUND( x2, x6,x10,x14) 154 QUARTERROUND( x3, x7,x11,x15) 155 QUARTERROUND( x0, x5,x10,x15) 156 QUARTERROUND( x1, x6,x11,x12) 157 QUARTERROUND( x2, x7, x8,x13) 158 QUARTERROUND( x3, x4, x9,x14) 159 } 160 x0 = PLUS(x0,j0); 161 x1 = PLUS(x1,j1); 162 x2 = PLUS(x2,j2); 163 x3 = PLUS(x3,j3); 164 x4 = PLUS(x4,j4); 165 x5 = PLUS(x5,j5); 166 x6 = PLUS(x6,j6); 167 x7 = PLUS(x7,j7); 168 x8 = PLUS(x8,j8); 169 x9 = PLUS(x9,j9); 170 x10 = PLUS(x10,j10); 171 x11 = PLUS(x11,j11); 172 x12 = PLUS(x12,j12); 173 x13 = PLUS(x13,j13); 174 x14 = PLUS(x14,j14); 175 x15 = PLUS(x15,j15); 176 177 x0 = XOR(x0,U8TO32_LITTLE(m + 0)); 178 x1 = XOR(x1,U8TO32_LITTLE(m + 4)); 179 x2 = XOR(x2,U8TO32_LITTLE(m + 8)); 180 x3 = XOR(x3,U8TO32_LITTLE(m + 12)); 181 x4 = XOR(x4,U8TO32_LITTLE(m + 16)); 182 x5 = XOR(x5,U8TO32_LITTLE(m + 20)); 183 x6 = XOR(x6,U8TO32_LITTLE(m + 24)); 184 x7 = XOR(x7,U8TO32_LITTLE(m + 28)); 185 x8 = XOR(x8,U8TO32_LITTLE(m + 32)); 186 x9 = XOR(x9,U8TO32_LITTLE(m + 36)); 187 x10 = XOR(x10,U8TO32_LITTLE(m + 40)); 188 x11 = XOR(x11,U8TO32_LITTLE(m + 44)); 189 x12 = XOR(x12,U8TO32_LITTLE(m + 48)); 190 x13 = XOR(x13,U8TO32_LITTLE(m + 52)); 191 x14 = XOR(x14,U8TO32_LITTLE(m + 56)); 192 x15 = XOR(x15,U8TO32_LITTLE(m + 60)); 193 194 j12 = PLUSONE(j12); 195 if (!j12) 196 j13 = PLUSONE(j13); 197 /* stopping at 2^70 bytes per nonce is user's responsibility */ 198 199 U32TO8_LITTLE(c + 0,x0); 200 U32TO8_LITTLE(c + 4,x1); 201 U32TO8_LITTLE(c + 8,x2); 202 U32TO8_LITTLE(c + 12,x3); 203 U32TO8_LITTLE(c + 16,x4); 204 U32TO8_LITTLE(c + 20,x5); 205 U32TO8_LITTLE(c + 24,x6); 206 U32TO8_LITTLE(c + 28,x7); 207 U32TO8_LITTLE(c + 32,x8); 208 U32TO8_LITTLE(c + 36,x9); 209 U32TO8_LITTLE(c + 40,x10); 210 U32TO8_LITTLE(c + 44,x11); 211 U32TO8_LITTLE(c + 48,x12); 212 U32TO8_LITTLE(c + 52,x13); 213 U32TO8_LITTLE(c + 56,x14); 214 U32TO8_LITTLE(c + 60,x15); 215 216 if (bytes <= 64) { 217 if (bytes < 64) { 218 for (i = 0;i < bytes;++i) ctarget[i] = c[i]; 219 } 220 x->input[12] = j12; 221 x->input[13] = j13; 222 return; 223 } 224 bytes -= 64; 225 c += 64; 226 m += 64; 227 } 228} 229 230struct lws_cipher_chacha { 231 struct chacha_ctx ccctx[2]; 232}; 233 234#define K_1(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[0] 235#define K_2(_keys) &((struct lws_cipher_chacha *)_keys->cipher)->ccctx[1] 236 237int 238lws_chacha_activate(struct lws_ssh_keys *keys) 239{ 240 if (keys->cipher) { 241 free(keys->cipher); 242 keys->cipher = NULL; 243 } 244 245 keys->cipher = malloc(sizeof(struct lws_cipher_chacha)); 246 if (!keys->cipher) 247 return 1; 248 249 memset(keys->cipher, 0, sizeof(struct lws_cipher_chacha)); 250 251 /* uses 2 x 256-bit keys, so 512 bits (64 bytes) needed */ 252 chacha_keysetup(K_2(keys), keys->key[SSH_KEYIDX_ENC], 256); 253 chacha_keysetup(K_1(keys), &keys->key[SSH_KEYIDX_ENC][32], 256); 254 255 keys->valid = 1; 256 keys->full_length = 1; 257 keys->padding_alignment = 8; // CHACHA_BLOCKLEN; 258 keys->MAC_length = POLY1305_TAGLEN; 259 260 return 0; 261} 262 263void 264lws_chacha_destroy(struct lws_ssh_keys *keys) 265{ 266 if (keys->cipher) { 267 free(keys->cipher); 268 keys->cipher = NULL; 269 } 270} 271 272uint32_t 273lws_chachapoly_get_length(struct lws_ssh_keys *keys, uint32_t seq, 274 const uint8_t *in4) 275{ 276 uint8_t buf[4], seqbuf[8]; 277 278 /* 279 * When receiving a packet, the length must be decrypted first. When 4 280 * bytes of ciphertext length have been received, they may be decrypted 281 * using the K_1 key, a nonce consisting of the packet sequence number 282 * encoded as a uint64 under the usual SSH wire encoding and a zero 283 * block counter to obtain the plaintext length. 284 */ 285 POKE_U64(seqbuf, seq); 286 chacha_ivsetup(K_1(keys), seqbuf, NULL); 287 chacha_encrypt_bytes(K_1(keys), in4, buf, 4); 288 289 return PEEK_U32(buf); 290} 291 292/* 293 * chachapoly_crypt() operates as following: 294 * En/decrypt with header key 'aadlen' bytes from 'src', storing result 295 * to 'dest'. The ciphertext here is treated as additional authenticated 296 * data for MAC calculation. 297 * En/decrypt 'len' bytes at offset 'aadlen' from 'src' to 'dest'. Use 298 * POLY1305_TAGLEN bytes at offset 'len'+'aadlen' as the authentication 299 * tag. This tag is written on encryption and verified on decryption. 300 */ 301int 302chachapoly_crypt(struct lws_ssh_keys *keys, u_int seqnr, u_char *dest, 303 const u_char *src, u_int len, u_int aadlen, u_int authlen, int do_encrypt) 304{ 305 u_char seqbuf[8]; 306 const u_char one[8] = { 1, 0, 0, 0, 0, 0, 0, 0 }; /* NB little-endian */ 307 u_char expected_tag[POLY1305_TAGLEN], poly_key[POLY1305_KEYLEN]; 308 int r = 1; 309 310 /* 311 * Run ChaCha20 once to generate the Poly1305 key. The IV is the 312 * packet sequence number. 313 */ 314 memset(poly_key, 0, sizeof(poly_key)); 315 POKE_U64(seqbuf, seqnr); 316 chacha_ivsetup(K_2(keys), seqbuf, NULL); 317 chacha_encrypt_bytes(K_2(keys), 318 poly_key, poly_key, sizeof(poly_key)); 319 320 /* If decrypting, check tag before anything else */ 321 if (!do_encrypt) { 322 const u_char *tag = src + aadlen + len; 323 324 poly1305_auth(expected_tag, src, aadlen + len, poly_key); 325 if (lws_timingsafe_bcmp(expected_tag, tag, POLY1305_TAGLEN)) { 326 r = 2; 327 goto out; 328 } 329 } 330 331 /* Crypt additional data */ 332 if (aadlen) { 333 chacha_ivsetup(K_1(keys), seqbuf, NULL); 334 chacha_encrypt_bytes(K_1(keys), src, dest, aadlen); 335 } 336 337 /* Set Chacha's block counter to 1 */ 338 chacha_ivsetup(K_2(keys), seqbuf, one); 339 chacha_encrypt_bytes(K_2(keys), src + aadlen, dest + aadlen, len); 340 341 /* If encrypting, calculate and append tag */ 342 if (do_encrypt) { 343 poly1305_auth(dest + aadlen + len, dest, aadlen + len, 344 poly_key); 345 } 346 r = 0; 347 out: 348 lws_explicit_bzero(expected_tag, sizeof(expected_tag)); 349 lws_explicit_bzero(seqbuf, sizeof(seqbuf)); 350 lws_explicit_bzero(poly_key, sizeof(poly_key)); 351 return r; 352} 353 354int 355lws_chacha_decrypt(struct lws_ssh_keys *keys, uint32_t seq, 356 const uint8_t *ct, uint32_t len, uint8_t *pt) 357{ 358 return chachapoly_crypt(keys, seq, pt, ct, len - POLY1305_TAGLEN - 4, 4, 359 POLY1305_TAGLEN, 0); 360} 361 362int 363lws_chacha_encrypt(struct lws_ssh_keys *keys, uint32_t seq, 364 const uint8_t *ct, uint32_t len, uint8_t *pt) 365{ 366 return chachapoly_crypt(keys, seq, pt, ct, len - 4, 4, 0, 1); 367} 368 369