xref: /third_party/python/Modules/md5module.c (revision 7db96d56)
1/* MD5 module */
2
3/* This module provides an interface to the MD5 algorithm */
4
5/* See below for information about the original code this module was
6   based upon. Additional work performed by:
7
8   Andrew Kuchling (amk@amk.ca)
9   Greg Stein (gstein@lyra.org)
10   Trevor Perrin (trevp@trevp.net)
11
12   Copyright (C) 2005-2007   Gregory P. Smith (greg@krypto.org)
13   Licensed to PSF under a Contributor Agreement.
14
15*/
16
17/* MD5 objects */
18#ifndef Py_BUILD_CORE_BUILTIN
19#  define Py_BUILD_CORE_MODULE 1
20#endif
21
22#include "Python.h"
23#include "hashlib.h"
24#include "pycore_strhex.h"        // _Py_strhex()
25
26/*[clinic input]
27module _md5
28class MD5Type "MD5object *" "&PyType_Type"
29[clinic start generated code]*/
30/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6e5261719957a912]*/
31
32/* Some useful types */
33
34#if SIZEOF_INT == 4
35typedef unsigned int MD5_INT32; /* 32-bit integer */
36typedef long long MD5_INT64; /* 64-bit integer */
37#else
38/* not defined. compilation will die. */
39#endif
40
41/* The MD5 block size and message digest sizes, in bytes */
42
43#define MD5_BLOCKSIZE    64
44#define MD5_DIGESTSIZE   16
45
46/* The structure for storing MD5 info */
47
48struct md5_state {
49    MD5_INT64 length;
50    MD5_INT32 state[4], curlen;
51    unsigned char buf[MD5_BLOCKSIZE];
52};
53
54typedef struct {
55    PyObject_HEAD
56
57    struct md5_state hash_state;
58} MD5object;
59
60#include "clinic/md5module.c.h"
61
62/* ------------------------------------------------------------------------
63 *
64 * This code for the MD5 algorithm was noted as public domain. The
65 * original headers are pasted below.
66 *
67 * Several changes have been made to make it more compatible with the
68 * Python environment and desired interface.
69 *
70 */
71
72/* LibTomCrypt, modular cryptographic library -- Tom St Denis
73 *
74 * LibTomCrypt is a library that provides various cryptographic
75 * algorithms in a highly modular and flexible manner.
76 *
77 * The library is free for all purposes without any express
78 * guarantee it works.
79 *
80 * Tom St Denis, tomstdenis@gmail.com, https://www.libtom.net
81 */
82
83/* rotate the hard way (platform optimizations could be done) */
84#define ROLc(x, y) ( (((unsigned long)(x)<<(unsigned long)((y)&31)) | (((unsigned long)(x)&0xFFFFFFFFUL)>>(unsigned long)(32-((y)&31)))) & 0xFFFFFFFFUL)
85
86/* Endian Neutral macros that work on all platforms */
87
88#define STORE32L(x, y)                                                                     \
89     { (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
90       (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
91
92#define LOAD32L(x, y)                            \
93     { x = ((unsigned long)((y)[3] & 255)<<24) | \
94           ((unsigned long)((y)[2] & 255)<<16) | \
95           ((unsigned long)((y)[1] & 255)<<8)  | \
96           ((unsigned long)((y)[0] & 255)); }
97
98#define STORE64L(x, y)                                                                     \
99     { (y)[7] = (unsigned char)(((x)>>56)&255); (y)[6] = (unsigned char)(((x)>>48)&255);   \
100       (y)[5] = (unsigned char)(((x)>>40)&255); (y)[4] = (unsigned char)(((x)>>32)&255);   \
101       (y)[3] = (unsigned char)(((x)>>24)&255); (y)[2] = (unsigned char)(((x)>>16)&255);   \
102       (y)[1] = (unsigned char)(((x)>>8)&255); (y)[0] = (unsigned char)((x)&255); }
103
104
105/* MD5 macros */
106
107#define F(x,y,z)  (z ^ (x & (y ^ z)))
108#define G(x,y,z)  (y ^ (z & (y ^ x)))
109#define H(x,y,z)  (x^y^z)
110#define I(x,y,z)  (y^(x|(~z)))
111
112#define FF(a,b,c,d,M,s,t) \
113    a = (a + F(b,c,d) + M + t); a = ROLc(a, s) + b;
114
115#define GG(a,b,c,d,M,s,t) \
116    a = (a + G(b,c,d) + M + t); a = ROLc(a, s) + b;
117
118#define HH(a,b,c,d,M,s,t) \
119    a = (a + H(b,c,d) + M + t); a = ROLc(a, s) + b;
120
121#define II(a,b,c,d,M,s,t) \
122    a = (a + I(b,c,d) + M + t); a = ROLc(a, s) + b;
123
124
125static void md5_compress(struct md5_state *md5, const unsigned char *buf)
126{
127    MD5_INT32 i, W[16], a, b, c, d;
128
129    assert(md5 != NULL);
130    assert(buf != NULL);
131
132    /* copy the state into 512-bits into W[0..15] */
133    for (i = 0; i < 16; i++) {
134        LOAD32L(W[i], buf + (4*i));
135    }
136
137    /* copy state */
138    a = md5->state[0];
139    b = md5->state[1];
140    c = md5->state[2];
141    d = md5->state[3];
142
143    FF(a,b,c,d,W[0],7,0xd76aa478UL)
144    FF(d,a,b,c,W[1],12,0xe8c7b756UL)
145    FF(c,d,a,b,W[2],17,0x242070dbUL)
146    FF(b,c,d,a,W[3],22,0xc1bdceeeUL)
147    FF(a,b,c,d,W[4],7,0xf57c0fafUL)
148    FF(d,a,b,c,W[5],12,0x4787c62aUL)
149    FF(c,d,a,b,W[6],17,0xa8304613UL)
150    FF(b,c,d,a,W[7],22,0xfd469501UL)
151    FF(a,b,c,d,W[8],7,0x698098d8UL)
152    FF(d,a,b,c,W[9],12,0x8b44f7afUL)
153    FF(c,d,a,b,W[10],17,0xffff5bb1UL)
154    FF(b,c,d,a,W[11],22,0x895cd7beUL)
155    FF(a,b,c,d,W[12],7,0x6b901122UL)
156    FF(d,a,b,c,W[13],12,0xfd987193UL)
157    FF(c,d,a,b,W[14],17,0xa679438eUL)
158    FF(b,c,d,a,W[15],22,0x49b40821UL)
159    GG(a,b,c,d,W[1],5,0xf61e2562UL)
160    GG(d,a,b,c,W[6],9,0xc040b340UL)
161    GG(c,d,a,b,W[11],14,0x265e5a51UL)
162    GG(b,c,d,a,W[0],20,0xe9b6c7aaUL)
163    GG(a,b,c,d,W[5],5,0xd62f105dUL)
164    GG(d,a,b,c,W[10],9,0x02441453UL)
165    GG(c,d,a,b,W[15],14,0xd8a1e681UL)
166    GG(b,c,d,a,W[4],20,0xe7d3fbc8UL)
167    GG(a,b,c,d,W[9],5,0x21e1cde6UL)
168    GG(d,a,b,c,W[14],9,0xc33707d6UL)
169    GG(c,d,a,b,W[3],14,0xf4d50d87UL)
170    GG(b,c,d,a,W[8],20,0x455a14edUL)
171    GG(a,b,c,d,W[13],5,0xa9e3e905UL)
172    GG(d,a,b,c,W[2],9,0xfcefa3f8UL)
173    GG(c,d,a,b,W[7],14,0x676f02d9UL)
174    GG(b,c,d,a,W[12],20,0x8d2a4c8aUL)
175    HH(a,b,c,d,W[5],4,0xfffa3942UL)
176    HH(d,a,b,c,W[8],11,0x8771f681UL)
177    HH(c,d,a,b,W[11],16,0x6d9d6122UL)
178    HH(b,c,d,a,W[14],23,0xfde5380cUL)
179    HH(a,b,c,d,W[1],4,0xa4beea44UL)
180    HH(d,a,b,c,W[4],11,0x4bdecfa9UL)
181    HH(c,d,a,b,W[7],16,0xf6bb4b60UL)
182    HH(b,c,d,a,W[10],23,0xbebfbc70UL)
183    HH(a,b,c,d,W[13],4,0x289b7ec6UL)
184    HH(d,a,b,c,W[0],11,0xeaa127faUL)
185    HH(c,d,a,b,W[3],16,0xd4ef3085UL)
186    HH(b,c,d,a,W[6],23,0x04881d05UL)
187    HH(a,b,c,d,W[9],4,0xd9d4d039UL)
188    HH(d,a,b,c,W[12],11,0xe6db99e5UL)
189    HH(c,d,a,b,W[15],16,0x1fa27cf8UL)
190    HH(b,c,d,a,W[2],23,0xc4ac5665UL)
191    II(a,b,c,d,W[0],6,0xf4292244UL)
192    II(d,a,b,c,W[7],10,0x432aff97UL)
193    II(c,d,a,b,W[14],15,0xab9423a7UL)
194    II(b,c,d,a,W[5],21,0xfc93a039UL)
195    II(a,b,c,d,W[12],6,0x655b59c3UL)
196    II(d,a,b,c,W[3],10,0x8f0ccc92UL)
197    II(c,d,a,b,W[10],15,0xffeff47dUL)
198    II(b,c,d,a,W[1],21,0x85845dd1UL)
199    II(a,b,c,d,W[8],6,0x6fa87e4fUL)
200    II(d,a,b,c,W[15],10,0xfe2ce6e0UL)
201    II(c,d,a,b,W[6],15,0xa3014314UL)
202    II(b,c,d,a,W[13],21,0x4e0811a1UL)
203    II(a,b,c,d,W[4],6,0xf7537e82UL)
204    II(d,a,b,c,W[11],10,0xbd3af235UL)
205    II(c,d,a,b,W[2],15,0x2ad7d2bbUL)
206    II(b,c,d,a,W[9],21,0xeb86d391UL)
207
208    md5->state[0] = md5->state[0] + a;
209    md5->state[1] = md5->state[1] + b;
210    md5->state[2] = md5->state[2] + c;
211    md5->state[3] = md5->state[3] + d;
212}
213
214
215/**
216   Initialize the hash state
217   @param md5   The hash state you wish to initialize
218*/
219static void
220md5_init(struct md5_state *md5)
221{
222    assert(md5 != NULL);
223    md5->state[0] = 0x67452301UL;
224    md5->state[1] = 0xefcdab89UL;
225    md5->state[2] = 0x98badcfeUL;
226    md5->state[3] = 0x10325476UL;
227    md5->curlen = 0;
228    md5->length = 0;
229}
230
231/**
232   Process a block of memory though the hash
233   @param md5   The hash state
234   @param in     The data to hash
235   @param inlen  The length of the data (octets)
236*/
237static void
238md5_process(struct md5_state *md5, const unsigned char *in, Py_ssize_t inlen)
239{
240    Py_ssize_t n;
241
242    assert(md5 != NULL);
243    assert(in != NULL);
244    assert(md5->curlen <= sizeof(md5->buf));
245
246    while (inlen > 0) {
247        if (md5->curlen == 0 && inlen >= MD5_BLOCKSIZE) {
248           md5_compress(md5, in);
249           md5->length    += MD5_BLOCKSIZE * 8;
250           in             += MD5_BLOCKSIZE;
251           inlen          -= MD5_BLOCKSIZE;
252        } else {
253           n = Py_MIN(inlen, (Py_ssize_t)(MD5_BLOCKSIZE - md5->curlen));
254           memcpy(md5->buf + md5->curlen, in, (size_t)n);
255           md5->curlen    += (MD5_INT32)n;
256           in             += n;
257           inlen          -= n;
258           if (md5->curlen == MD5_BLOCKSIZE) {
259              md5_compress(md5, md5->buf);
260              md5->length += 8*MD5_BLOCKSIZE;
261              md5->curlen = 0;
262           }
263       }
264    }
265}
266
267/**
268   Terminate the hash to get the digest
269   @param md5  The hash state
270   @param out [out] The destination of the hash (16 bytes)
271*/
272static void
273md5_done(struct md5_state *md5, unsigned char *out)
274{
275    int i;
276
277    assert(md5 != NULL);
278    assert(out != NULL);
279    assert(md5->curlen < sizeof(md5->buf));
280
281    /* increase the length of the message */
282    md5->length += md5->curlen * 8;
283
284    /* append the '1' bit */
285    md5->buf[md5->curlen++] = (unsigned char)0x80;
286
287    /* if the length is currently above 56 bytes we append zeros
288     * then compress.  Then we can fall back to padding zeros and length
289     * encoding like normal.
290     */
291    if (md5->curlen > 56) {
292        while (md5->curlen < 64) {
293            md5->buf[md5->curlen++] = (unsigned char)0;
294        }
295        md5_compress(md5, md5->buf);
296        md5->curlen = 0;
297    }
298
299    /* pad up to 56 bytes of zeroes */
300    while (md5->curlen < 56) {
301        md5->buf[md5->curlen++] = (unsigned char)0;
302    }
303
304    /* store length */
305    STORE64L(md5->length, md5->buf+56);
306    md5_compress(md5, md5->buf);
307
308    /* copy output */
309    for (i = 0; i < 4; i++) {
310        STORE32L(md5->state[i], out+(4*i));
311    }
312}
313
314/* .Source: /cvs/libtom/libtomcrypt/src/hashes/md5.c,v $ */
315/* .Revision: 1.10 $ */
316/* .Date: 2007/05/12 14:25:28 $ */
317
318/*
319 * End of copied MD5 code.
320 *
321 * ------------------------------------------------------------------------
322 */
323
324typedef struct {
325    PyTypeObject* md5_type;
326} MD5State;
327
328static inline MD5State*
329md5_get_state(PyObject *module)
330{
331    void *state = PyModule_GetState(module);
332    assert(state != NULL);
333    return (MD5State *)state;
334}
335
336static MD5object *
337newMD5object(MD5State * st)
338{
339    MD5object *md5 = (MD5object *)PyObject_GC_New(MD5object, st->md5_type);
340    PyObject_GC_Track(md5);
341    return md5;
342}
343
344/* Internal methods for a hash object */
345static int
346MD5_traverse(PyObject *ptr, visitproc visit, void *arg)
347{
348    Py_VISIT(Py_TYPE(ptr));
349    return 0;
350}
351
352static void
353MD5_dealloc(PyObject *ptr)
354{
355    PyTypeObject *tp = Py_TYPE(ptr);
356    PyObject_GC_UnTrack(ptr);
357    PyObject_GC_Del(ptr);
358    Py_DECREF(tp);
359}
360
361
362/* External methods for a hash object */
363
364/*[clinic input]
365MD5Type.copy
366
367    cls: defining_class
368
369Return a copy of the hash object.
370[clinic start generated code]*/
371
372static PyObject *
373MD5Type_copy_impl(MD5object *self, PyTypeObject *cls)
374/*[clinic end generated code: output=bf055e08244bf5ee input=d89087dcfb2a8620]*/
375{
376    MD5State *st = PyType_GetModuleState(cls);
377
378    MD5object *newobj;
379    if ((newobj = newMD5object(st))==NULL)
380        return NULL;
381
382    newobj->hash_state = self->hash_state;
383    return (PyObject *)newobj;
384}
385
386/*[clinic input]
387MD5Type.digest
388
389Return the digest value as a bytes object.
390[clinic start generated code]*/
391
392static PyObject *
393MD5Type_digest_impl(MD5object *self)
394/*[clinic end generated code: output=eb691dc4190a07ec input=bc0c4397c2994be6]*/
395{
396    unsigned char digest[MD5_DIGESTSIZE];
397    struct md5_state temp;
398
399    temp = self->hash_state;
400    md5_done(&temp, digest);
401    return PyBytes_FromStringAndSize((const char *)digest, MD5_DIGESTSIZE);
402}
403
404/*[clinic input]
405MD5Type.hexdigest
406
407Return the digest value as a string of hexadecimal digits.
408[clinic start generated code]*/
409
410static PyObject *
411MD5Type_hexdigest_impl(MD5object *self)
412/*[clinic end generated code: output=17badced1f3ac932 input=b60b19de644798dd]*/
413{
414    unsigned char digest[MD5_DIGESTSIZE];
415    struct md5_state temp;
416
417    /* Get the raw (binary) digest value */
418    temp = self->hash_state;
419    md5_done(&temp, digest);
420
421    return _Py_strhex((const char*)digest, MD5_DIGESTSIZE);
422}
423
424/*[clinic input]
425MD5Type.update
426
427    obj: object
428    /
429
430Update this hash object's state with the provided string.
431[clinic start generated code]*/
432
433static PyObject *
434MD5Type_update(MD5object *self, PyObject *obj)
435/*[clinic end generated code: output=f6ad168416338423 input=6e1efcd9ecf17032]*/
436{
437    Py_buffer buf;
438
439    GET_BUFFER_VIEW_OR_ERROUT(obj, &buf);
440
441    md5_process(&self->hash_state, buf.buf, buf.len);
442
443    PyBuffer_Release(&buf);
444    Py_RETURN_NONE;
445}
446
447static PyMethodDef MD5_methods[] = {
448    MD5TYPE_COPY_METHODDEF
449    MD5TYPE_DIGEST_METHODDEF
450    MD5TYPE_HEXDIGEST_METHODDEF
451    MD5TYPE_UPDATE_METHODDEF
452    {NULL,        NULL}         /* sentinel */
453};
454
455static PyObject *
456MD5_get_block_size(PyObject *self, void *closure)
457{
458    return PyLong_FromLong(MD5_BLOCKSIZE);
459}
460
461static PyObject *
462MD5_get_name(PyObject *self, void *closure)
463{
464    return PyUnicode_FromStringAndSize("md5", 3);
465}
466
467static PyObject *
468md5_get_digest_size(PyObject *self, void *closure)
469{
470    return PyLong_FromLong(MD5_DIGESTSIZE);
471}
472
473static PyGetSetDef MD5_getseters[] = {
474    {"block_size",
475     (getter)MD5_get_block_size, NULL,
476     NULL,
477     NULL},
478    {"name",
479     (getter)MD5_get_name, NULL,
480     NULL,
481     NULL},
482    {"digest_size",
483     (getter)md5_get_digest_size, NULL,
484     NULL,
485     NULL},
486    {NULL}  /* Sentinel */
487};
488
489static PyType_Slot md5_type_slots[] = {
490    {Py_tp_dealloc, MD5_dealloc},
491    {Py_tp_methods, MD5_methods},
492    {Py_tp_getset, MD5_getseters},
493    {Py_tp_traverse, MD5_traverse},
494    {0,0}
495};
496
497static PyType_Spec md5_type_spec = {
498    .name = "_md5.md5",
499    .basicsize =  sizeof(MD5object),
500    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_DISALLOW_INSTANTIATION |
501              Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_HAVE_GC),
502    .slots = md5_type_slots
503};
504
505/* The single module-level function: new() */
506
507/*[clinic input]
508_md5.md5
509
510    string: object(c_default="NULL") = b''
511    *
512    usedforsecurity: bool = True
513
514Return a new MD5 hash object; optionally initialized with a string.
515[clinic start generated code]*/
516
517static PyObject *
518_md5_md5_impl(PyObject *module, PyObject *string, int usedforsecurity)
519/*[clinic end generated code: output=587071f76254a4ac input=7a144a1905636985]*/
520{
521    MD5object *new;
522    Py_buffer buf;
523
524    if (string)
525        GET_BUFFER_VIEW_OR_ERROUT(string, &buf);
526
527    MD5State *st = md5_get_state(module);
528    if ((new = newMD5object(st)) == NULL) {
529        if (string)
530            PyBuffer_Release(&buf);
531        return NULL;
532    }
533
534    md5_init(&new->hash_state);
535
536    if (PyErr_Occurred()) {
537        Py_DECREF(new);
538        if (string)
539            PyBuffer_Release(&buf);
540        return NULL;
541    }
542    if (string) {
543        md5_process(&new->hash_state, buf.buf, buf.len);
544        PyBuffer_Release(&buf);
545    }
546
547    return (PyObject *)new;
548}
549
550
551/* List of functions exported by this module */
552
553static struct PyMethodDef MD5_functions[] = {
554    _MD5_MD5_METHODDEF
555    {NULL,      NULL}            /* Sentinel */
556};
557
558static int
559_md5_traverse(PyObject *module, visitproc visit, void *arg)
560{
561    MD5State *state = md5_get_state(module);
562    Py_VISIT(state->md5_type);
563    return 0;
564}
565
566static int
567_md5_clear(PyObject *module)
568{
569    MD5State *state = md5_get_state(module);
570    Py_CLEAR(state->md5_type);
571    return 0;
572}
573
574static void
575_md5_free(void *module)
576{
577    _md5_clear((PyObject *)module);
578}
579
580/* Initialize this module. */
581static int
582md5_exec(PyObject *m)
583{
584    MD5State *st = md5_get_state(m);
585
586    st->md5_type = (PyTypeObject *)PyType_FromModuleAndSpec(
587        m, &md5_type_spec, NULL);
588
589    if (st->md5_type == NULL) {
590        return -1;
591    }
592
593    Py_INCREF((PyObject *)st->md5_type);
594    if (PyModule_AddObject(m, "MD5Type", (PyObject *)st->md5_type) < 0) {
595         Py_DECREF(st->md5_type);
596        return -1;
597    }
598
599    return 0;
600}
601
602static PyModuleDef_Slot _md5_slots[] = {
603    {Py_mod_exec, md5_exec},
604    {0, NULL}
605};
606
607
608static struct PyModuleDef _md5module = {
609        PyModuleDef_HEAD_INIT,
610        .m_name = "_md5",
611        .m_size = sizeof(MD5State),
612        .m_methods = MD5_functions,
613        .m_slots = _md5_slots,
614        .m_traverse = _md5_traverse,
615        .m_clear = _md5_clear,
616        .m_free = _md5_free,
617};
618
619PyMODINIT_FUNC
620PyInit__md5(void)
621{
622    return PyModuleDef_Init(&_md5module);
623}
624