C implementation of MD5 gives wrong hash

0

Recently, I did my own C implementation of the md5 algorithm. I know that open-source libraries exist, but I wanted to do so for practice. My implementation returns the right digest on the empty string "", however it does not return the right digest for "The quick brown fox jumps over the lazy dog". Since it works with the empty string, I kind of assume that it either has to do with padding or the appending of the length to the end of the block.

Here my code:

/*
 * md5.c
 *
 *  Created on: Mar 25, 2020
 */

#include "checksum_algorithms.h"
#include "math.h"
#include "stdlib.h"

/**************************************************************************
 * Local variables
 *************************************************************************/

uint32_t K[] =
    {0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
    0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391};

uint32_t s[] =
    { 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
      5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
      4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
      6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};

uint32_t a0 = 0x67452301;
uint32_t b0 = 0xEFCDAB89;
uint32_t c0 = 0x98BADCFE;
uint32_t d0 = 0x10325476;

/**************************************************************************
 * Local functions
 *************************************************************************/

static uint32_t SWAPWORD32(uint32_t x)
{
    return
    (((x >> 24) & 0xff) |
    ((x << 8) & 0xff0000) |
    ((x >> 8) & 0xff00) |
    ((x << 24) & 0xff000000));
}

static uint32_t leftrotate(uint32_t x, uint16_t N)
{
    return ((x << N) | ((x & 0xFFFFFFFF) >> (32-N)));
}

static uint32_t f(uint32_t x, uint32_t y, uint32_t z)
{
    return ((x & y) | (~x & z));
}

static uint32_t g(uint32_t x, uint32_t y, uint32_t z)
{
    return ((x & z) | (y & ~z));
}

static uint32_t h(uint32_t x, uint32_t y, uint32_t z)
{
    return (x ^ y ^ z);
}

static uint32_t i(uint32_t x, uint32_t y, uint32_t z)
{
    return (y ^ (x | ~z));
}

static void length_padding(uint8_t* pWords, uint64_t bitSize, uint64_t* padSize)
{
    uint64_t fullBytes = bitSize >> 3;
    /* Fill the remaining byte with zeros */
    uint64_t remaining_bytes = (512 - bitSize - 64)%512 >> 3;;
    memset(pWords + fullBytes, 0, remaining_bytes);
    *(pWords + fullBytes) = (1 << 7);
    /* Add the length */
    memcpy(pWords + fullBytes + remaining_bytes, &bitSize, sizeof(bitSize));
    *padSize = (fullBytes + remaining_bytes + 8) << 3;
}

/**************************************************************************
 * Global functions
 *************************************************************************/

void calculate_md5(void* pData, uint64_t bitSize, uint32_t* digest)
{
    int k, j;
    uint32_t G, F, temp;
    uint32_t* pBuf = (uint32_t*)pData;
    uint64_t padSize = 0;

    uint32_t A, B, C, D;
    length_padding((uint8_t*)pBuf, bitSize, &padSize);

    for (k = 0; k < padSize/512; k++)
    {
        A = a0;
        B = b0;
        C = c0;
        D = d0;
        for (j = 0; j < 64; j++)
        {
            if (j < 16)
            {
                F = f(B, C, D);
                G = j;
            }
            else if (j < 32)
            {
                F = g(B, C, D);
                G = (5*j + 1)%16;
            }
            else if (j < 48)
            {
                F = h(B, C, D);
                G = (3*j + 5)%16;
            }
            else if (j < 64)
            {
                F = i(B, C, D);
                G = (7*j)%16;
            }
            temp = D;
            D = C;
            C = B;
            B += leftrotate(A + F + K[j] + (uint32_t)(*(pBuf + G + k*MD5_BUF_LEN/4)), s[j]);
            A = temp;
        }
        a0 += A;
        b0 += B;
        c0 += C;
        d0 += D;
    }

    uint32_t hash[4] = {SWAPWORD32(a0), SWAPWORD32(b0), SWAPWORD32(c0), SWAPWORD32(d0)};
    memcpy(digest, hash, sizeof(hash));

}

The header file "checksum_algorithms.h" just contains a few function prototypes, and definitions. MD5_BUF_LEN is defined as 64. I then call the function in main() as follows:

String md5_test = "The quick brown fox jumps over the lazy dog";
uint32_t digest[4] = {0, 0, 0, 0};
calculate_md5(&md5_test, strlen(md5_test)*8, digest);

Does anybody see where the problem origins from? I know that this is a very basic example, but I am grateful for any tips, also if you have any inputs on coding style. And as a side note: I am running this example on a 32-bit processor with byte order little endian. Thank you in advance!

c
hash
md5
asked on Stack Overflow Mar 28, 2020 by LittleCoderGirl

0 Answers

Nobody has answered this question yet.


User contributions licensed under CC BY-SA 3.0