xref: /third_party/openssl/crypto/sm4/sm4.c (revision e1051a39)
1/*
2 * Copyright 2017-2021 The OpenSSL Project Authors. All Rights Reserved.
3 * Copyright 2017 Ribose Inc. All Rights Reserved.
4 * Ported from Ribose contributions from Botan.
5 *
6 * Licensed under the Apache License 2.0 (the "License").  You may not use
7 * this file except in compliance with the License.  You can obtain a copy
8 * in the file LICENSE in the source distribution or at
9 * https://www.openssl.org/source/license.html
10 */
11
12#include <openssl/e_os2.h>
13#include "crypto/sm4.h"
14
15static const uint8_t SM4_S[256] = {
16    0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2,
17    0x28, 0xFB, 0x2C, 0x05, 0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3,
18    0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0x9C, 0x42, 0x50, 0xF4,
19    0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
20    0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA,
21    0x75, 0x8F, 0x3F, 0xA6, 0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA,
22    0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8, 0x68, 0x6B, 0x81, 0xB2,
23    0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
24    0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B,
25    0x01, 0x21, 0x78, 0x87, 0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52,
26    0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E, 0xEA, 0xBF, 0x8A, 0xD2,
27    0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
28    0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30,
29    0xF5, 0x8C, 0xB1, 0xE3, 0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60,
30    0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F, 0xD5, 0xDB, 0x37, 0x45,
31    0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
32    0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41,
33    0x1F, 0x10, 0x5A, 0xD8, 0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD,
34    0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0, 0x89, 0x69, 0x97, 0x4A,
35    0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
36    0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
37    0xD7, 0xCB, 0x39, 0x48
38};
39
40/*
41 * SM4_SBOX_T[j] == L(SM4_SBOX[j]).
42 */
43static const uint32_t SM4_SBOX_T[256] = {
44    0x8ED55B5B, 0xD0924242, 0x4DEAA7A7, 0x06FDFBFB, 0xFCCF3333, 0x65E28787,
45    0xC93DF4F4, 0x6BB5DEDE, 0x4E165858, 0x6EB4DADA, 0x44145050, 0xCAC10B0B,
46    0x8828A0A0, 0x17F8EFEF, 0x9C2CB0B0, 0x11051414, 0x872BACAC, 0xFB669D9D,
47    0xF2986A6A, 0xAE77D9D9, 0x822AA8A8, 0x46BCFAFA, 0x14041010, 0xCFC00F0F,
48    0x02A8AAAA, 0x54451111, 0x5F134C4C, 0xBE269898, 0x6D482525, 0x9E841A1A,
49    0x1E061818, 0xFD9B6666, 0xEC9E7272, 0x4A430909, 0x10514141, 0x24F7D3D3,
50    0xD5934646, 0x53ECBFBF, 0xF89A6262, 0x927BE9E9, 0xFF33CCCC, 0x04555151,
51    0x270B2C2C, 0x4F420D0D, 0x59EEB7B7, 0xF3CC3F3F, 0x1CAEB2B2, 0xEA638989,
52    0x74E79393, 0x7FB1CECE, 0x6C1C7070, 0x0DABA6A6, 0xEDCA2727, 0x28082020,
53    0x48EBA3A3, 0xC1975656, 0x80820202, 0xA3DC7F7F, 0xC4965252, 0x12F9EBEB,
54    0xA174D5D5, 0xB38D3E3E, 0xC33FFCFC, 0x3EA49A9A, 0x5B461D1D, 0x1B071C1C,
55    0x3BA59E9E, 0x0CFFF3F3, 0x3FF0CFCF, 0xBF72CDCD, 0x4B175C5C, 0x52B8EAEA,
56    0x8F810E0E, 0x3D586565, 0xCC3CF0F0, 0x7D196464, 0x7EE59B9B, 0x91871616,
57    0x734E3D3D, 0x08AAA2A2, 0xC869A1A1, 0xC76AADAD, 0x85830606, 0x7AB0CACA,
58    0xB570C5C5, 0xF4659191, 0xB2D96B6B, 0xA7892E2E, 0x18FBE3E3, 0x47E8AFAF,
59    0x330F3C3C, 0x674A2D2D, 0xB071C1C1, 0x0E575959, 0xE99F7676, 0xE135D4D4,
60    0x661E7878, 0xB4249090, 0x360E3838, 0x265F7979, 0xEF628D8D, 0x38596161,
61    0x95D24747, 0x2AA08A8A, 0xB1259494, 0xAA228888, 0x8C7DF1F1, 0xD73BECEC,
62    0x05010404, 0xA5218484, 0x9879E1E1, 0x9B851E1E, 0x84D75353, 0x00000000,
63    0x5E471919, 0x0B565D5D, 0xE39D7E7E, 0x9FD04F4F, 0xBB279C9C, 0x1A534949,
64    0x7C4D3131, 0xEE36D8D8, 0x0A020808, 0x7BE49F9F, 0x20A28282, 0xD4C71313,
65    0xE8CB2323, 0xE69C7A7A, 0x42E9ABAB, 0x43BDFEFE, 0xA2882A2A, 0x9AD14B4B,
66    0x40410101, 0xDBC41F1F, 0xD838E0E0, 0x61B7D6D6, 0x2FA18E8E, 0x2BF4DFDF,
67    0x3AF1CBCB, 0xF6CD3B3B, 0x1DFAE7E7, 0xE5608585, 0x41155454, 0x25A38686,
68    0x60E38383, 0x16ACBABA, 0x295C7575, 0x34A69292, 0xF7996E6E, 0xE434D0D0,
69    0x721A6868, 0x01545555, 0x19AFB6B6, 0xDF914E4E, 0xFA32C8C8, 0xF030C0C0,
70    0x21F6D7D7, 0xBC8E3232, 0x75B3C6C6, 0x6FE08F8F, 0x691D7474, 0x2EF5DBDB,
71    0x6AE18B8B, 0x962EB8B8, 0x8A800A0A, 0xFE679999, 0xE2C92B2B, 0xE0618181,
72    0xC0C30303, 0x8D29A4A4, 0xAF238C8C, 0x07A9AEAE, 0x390D3434, 0x1F524D4D,
73    0x764F3939, 0xD36EBDBD, 0x81D65757, 0xB7D86F6F, 0xEB37DCDC, 0x51441515,
74    0xA6DD7B7B, 0x09FEF7F7, 0xB68C3A3A, 0x932FBCBC, 0x0F030C0C, 0x03FCFFFF,
75    0xC26BA9A9, 0xBA73C9C9, 0xD96CB5B5, 0xDC6DB1B1, 0x375A6D6D, 0x15504545,
76    0xB98F3636, 0x771B6C6C, 0x13ADBEBE, 0xDA904A4A, 0x57B9EEEE, 0xA9DE7777,
77    0x4CBEF2F2, 0x837EFDFD, 0x55114444, 0xBDDA6767, 0x2C5D7171, 0x45400505,
78    0x631F7C7C, 0x50104040, 0x325B6969, 0xB8DB6363, 0x220A2828, 0xC5C20707,
79    0xF531C4C4, 0xA88A2222, 0x31A79696, 0xF9CE3737, 0x977AEDED, 0x49BFF6F6,
80    0x992DB4B4, 0xA475D1D1, 0x90D34343, 0x5A124848, 0x58BAE2E2, 0x71E69797,
81    0x64B6D2D2, 0x70B2C2C2, 0xAD8B2626, 0xCD68A5A5, 0xCB955E5E, 0x624B2929,
82    0x3C0C3030, 0xCE945A5A, 0xAB76DDDD, 0x867FF9F9, 0xF1649595, 0x5DBBE6E6,
83    0x35F2C7C7, 0x2D092424, 0xD1C61717, 0xD66FB9B9, 0xDEC51B1B, 0x94861212,
84    0x78186060, 0x30F3C3C3, 0x897CF5F5, 0x5CEFB3B3, 0xD23AE8E8, 0xACDF7373,
85    0x794C3535, 0xA0208080, 0x9D78E5E5, 0x56EDBBBB, 0x235E7D7D, 0xC63EF8F8,
86    0x8BD45F5F, 0xE7C82F2F, 0xDD39E4E4, 0x68492121 };
87
88static ossl_inline uint32_t rotl(uint32_t a, uint8_t n)
89{
90    return (a << n) | (a >> (32 - n));
91}
92
93static ossl_inline uint32_t load_u32_be(const uint8_t *b, uint32_t n)
94{
95    return ((uint32_t)b[4 * n] << 24) |
96           ((uint32_t)b[4 * n + 1] << 16) |
97           ((uint32_t)b[4 * n + 2] << 8) |
98           ((uint32_t)b[4 * n + 3]);
99}
100
101static ossl_inline void store_u32_be(uint32_t v, uint8_t *b)
102{
103    b[0] = (uint8_t)(v >> 24);
104    b[1] = (uint8_t)(v >> 16);
105    b[2] = (uint8_t)(v >> 8);
106    b[3] = (uint8_t)(v);
107}
108
109static ossl_inline uint32_t SM4_T_slow(uint32_t X)
110{
111    uint32_t t = 0;
112
113    t |= ((uint32_t)SM4_S[(uint8_t)(X >> 24)]) << 24;
114    t |= ((uint32_t)SM4_S[(uint8_t)(X >> 16)]) << 16;
115    t |= ((uint32_t)SM4_S[(uint8_t)(X >> 8)]) << 8;
116    t |= SM4_S[(uint8_t)X];
117
118    /*
119     * L linear transform
120     */
121    return t ^ rotl(t, 2) ^ rotl(t, 10) ^ rotl(t, 18) ^ rotl(t, 24);
122}
123
124static ossl_inline uint32_t SM4_T(uint32_t X)
125{
126    return SM4_SBOX_T[(uint8_t)(X >> 24)] ^
127           rotl(SM4_SBOX_T[(uint8_t)(X >> 16)], 24) ^
128           rotl(SM4_SBOX_T[(uint8_t)(X >> 8)], 16) ^
129           rotl(SM4_SBOX_T[(uint8_t)X], 8);
130}
131
132int ossl_sm4_set_key(const uint8_t *key, SM4_KEY *ks)
133{
134    /*
135     * Family Key
136     */
137    static const uint32_t FK[4] =
138        { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc };
139
140    /*
141     * Constant Key
142     */
143    static const uint32_t CK[32] = {
144        0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269,
145        0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9,
146        0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249,
147        0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9,
148        0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229,
149        0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299,
150        0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209,
151        0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
152    };
153
154    uint32_t K[4];
155    int i;
156
157    K[0] = load_u32_be(key, 0) ^ FK[0];
158    K[1] = load_u32_be(key, 1) ^ FK[1];
159    K[2] = load_u32_be(key, 2) ^ FK[2];
160    K[3] = load_u32_be(key, 3) ^ FK[3];
161
162    for (i = 0; i != SM4_KEY_SCHEDULE; ++i) {
163        uint32_t X = K[(i + 1) % 4] ^ K[(i + 2) % 4] ^ K[(i + 3) % 4] ^ CK[i];
164        uint32_t t = 0;
165
166        t |= ((uint32_t)SM4_S[(uint8_t)(X >> 24)]) << 24;
167        t |= ((uint32_t)SM4_S[(uint8_t)(X >> 16)]) << 16;
168        t |= ((uint32_t)SM4_S[(uint8_t)(X >> 8)]) << 8;
169        t |= SM4_S[(uint8_t)X];
170
171        t = t ^ rotl(t, 13) ^ rotl(t, 23);
172        K[i % 4] ^= t;
173        ks->rk[i] = K[i % 4];
174    }
175
176    return 1;
177}
178
179#define SM4_RNDS(k0, k1, k2, k3, F)          \
180      do {                                   \
181         B0 ^= F(B1 ^ B2 ^ B3 ^ ks->rk[k0]); \
182         B1 ^= F(B0 ^ B2 ^ B3 ^ ks->rk[k1]); \
183         B2 ^= F(B0 ^ B1 ^ B3 ^ ks->rk[k2]); \
184         B3 ^= F(B0 ^ B1 ^ B2 ^ ks->rk[k3]); \
185      } while(0)
186
187void ossl_sm4_encrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks)
188{
189    uint32_t B0 = load_u32_be(in, 0);
190    uint32_t B1 = load_u32_be(in, 1);
191    uint32_t B2 = load_u32_be(in, 2);
192    uint32_t B3 = load_u32_be(in, 3);
193
194    /*
195     * Uses byte-wise sbox in the first and last rounds to provide some
196     * protection from cache based side channels.
197     */
198    SM4_RNDS( 0,  1,  2,  3, SM4_T_slow);
199    SM4_RNDS( 4,  5,  6,  7, SM4_T);
200    SM4_RNDS( 8,  9, 10, 11, SM4_T);
201    SM4_RNDS(12, 13, 14, 15, SM4_T);
202    SM4_RNDS(16, 17, 18, 19, SM4_T);
203    SM4_RNDS(20, 21, 22, 23, SM4_T);
204    SM4_RNDS(24, 25, 26, 27, SM4_T);
205    SM4_RNDS(28, 29, 30, 31, SM4_T_slow);
206
207    store_u32_be(B3, out);
208    store_u32_be(B2, out + 4);
209    store_u32_be(B1, out + 8);
210    store_u32_be(B0, out + 12);
211}
212
213void ossl_sm4_decrypt(const uint8_t *in, uint8_t *out, const SM4_KEY *ks)
214{
215    uint32_t B0 = load_u32_be(in, 0);
216    uint32_t B1 = load_u32_be(in, 1);
217    uint32_t B2 = load_u32_be(in, 2);
218    uint32_t B3 = load_u32_be(in, 3);
219
220    SM4_RNDS(31, 30, 29, 28, SM4_T_slow);
221    SM4_RNDS(27, 26, 25, 24, SM4_T);
222    SM4_RNDS(23, 22, 21, 20, SM4_T);
223    SM4_RNDS(19, 18, 17, 16, SM4_T);
224    SM4_RNDS(15, 14, 13, 12, SM4_T);
225    SM4_RNDS(11, 10,  9,  8, SM4_T);
226    SM4_RNDS( 7,  6,  5,  4, SM4_T);
227    SM4_RNDS( 3,  2,  1,  0, SM4_T_slow);
228
229    store_u32_be(B3, out);
230    store_u32_be(B2, out + 4);
231    store_u32_be(B1, out + 8);
232    store_u32_be(B0, out + 12);
233}
234