Wrong SHA-1 hash

1

I'm planning to use AVR-Crypto's SHA-1 implementation for HMAC. However, I can't seem to generate the correct SHA-1 sum.

For instance, if I call the function with the following

  unsigned char sha1sum[20];
  char *msg = "FFFFFFFFFF";

  sha1( sha1sum, msg, strlen(msg));

I get 000000000000000000002C002312290000000029 rather than the expected c1bb92851109fe950a2655fa1d4ba1d04719f6fb. Does anyone know what might be wrong? Here's the AVR-Crypto's implementation

#include <string.h> /* memcpy & co */
#include <stdint.h>
#include "config.h"
#include "debug.h"
#include "sha1.h"

#ifdef DEBUG
#  undef DEBUG
#endif

#include "cli.h"

#define LITTLE_ENDIAN

/********************************************************************************************************/

/**
 * \brief initialises given SHA-1 context
 *
 */
void sha1_init(sha1_ctx_t *state){
    DEBUG_S("\r\nSHA1_INIT");
    state->h[0] = 0x67452301;
    state->h[1] = 0xefcdab89;
    state->h[2] = 0x98badcfe;
    state->h[3] = 0x10325476;
    state->h[4] = 0xc3d2e1f0;
    state->length = 0;
}

/********************************************************************************************************/
/* some helping functions */
uint32_t rotl32(uint32_t n, uint8_t bits){
    return ((n<<bits) | (n>>(32-bits)));
}

uint32_t change_endian32(uint32_t x){
    return (((x)<<24) | ((x)>>24) | (((x)& 0x0000ff00)<<8) | (((x)& 0x00ff0000)>>8));
}


/* three SHA-1 inner functions */
uint32_t ch(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nCH");
    return ((x&y)^((~x)&z));
}

uint32_t maj(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nMAJ");
    return ((x&y)^(x&z)^(y&z));
}

uint32_t parity(uint32_t x, uint32_t y, uint32_t z){
    DEBUG_S("\r\nPARITY");
    return ((x^y)^z);
}

/********************************************************************************************************/
/**
 * \brief "add" a block to the hash
 * This is the core function of the hash algorithm. To understand how it's working
 * and what thoese variables do, take a look at FIPS-182. This is an "alternativ" implementation
 */

#define MASK 0x0000000f

typedef uint32_t (*pf_t)(uint32_t x, uint32_t y, uint32_t z);

void sha1_nextBlock (sha1_ctx_t *state, const void *block){
    uint32_t a[5];
    uint32_t w[16];
    uint32_t temp;
    uint8_t t,s,fi, fib;
    pf_t f[] = {ch,parity,maj,parity};
    uint32_t k[4]={ 0x5a827999,
                    0x6ed9eba1,
                    0x8f1bbcdc,
                    0xca62c1d6};

    /* load the w array (changing the endian and so) */
    for(t=0; t<16; ++t){
        w[t] = change_endian32(((uint32_t*)block)[t]);
    }

#if DEBUG
    uint8_t dbgi;
    for(dbgi=0; dbgi<16; ++dbgi){
        /*
        DEBUG_S("\n\rBlock:");
        DEBUG_B(dbgi);
        DEBUG_C(':');
        */
        cli_putstr_P(PSTR("\r\nBlock:"));
        cli_hexdump(&dbgi, 1);
        cli_putc(':');
        cli_hexdump(&(w[dbgi]) ,4);
    }
#endif

    /* load the state */
    memcpy(a, state->h, 5*sizeof(uint32_t));


    /* the fun stuff */
    for(fi=0,fib=0,t=0; t<=79; ++t){
        s = t & MASK;
        if(t>=16){
            #if DEBUG
             DEBUG_S("\r\n ws = "); cli_hexdump(&(w[s]), 4);
            #endif
            w[s] = rotl32( w[(s+13)&MASK] ^ w[(s+8)&MASK] ^
                 w[(s+ 2)&MASK] ^ w[s] ,1);
            #ifdef DEBUG
             DEBUG_S(" --> ws = "); cli_hexdump(&(w[s]), 4);
            #endif
        }

        uint32_t dtemp;
        temp = rotl32(a[0],5) + (dtemp=f[fi](a[1],a[2],a[3])) + a[4] + k[fi] + w[s];
        memmove(&(a[1]), &(a[0]), 4*sizeof(uint32_t)); /* e=d; d=c; c=b; b=a; */
        a[0] = temp;
        a[2] = rotl32(a[2],30); /* we might also do rotr32(c,2) */
        fib++;
        if(fib==20){
            fib=0;
            fi = (fi+1)%4;
        }
        #if DEBUG
        /* debug dump */
        DEBUG_S("\r\nt = "); DEBUG_B(t);
        DEBUG_S("; a[]: ");
         cli_hexdump(a, 5*4);
        DEBUG_S("; k = ");
         cli_hexdump(&(k[t/20]), 4);
        DEBUG_S("; f(b,c,d) = ");
         cli_hexdump(&dtemp, 4);
        #endif
    }

    /* update the state */
    for(t=0; t<5; ++t){
        state->h[t] += a[t];
    }
    state->length += 512;
}

/********************************************************************************************************/

void sha1_lastBlock(sha1_ctx_t *state, const void *block, uint16_t length){
    uint8_t lb[SHA1_BLOCK_BYTES]; /* local block */
    while(length>=SHA1_BLOCK_BITS){
        sha1_nextBlock(state, block);
        length -= SHA1_BLOCK_BITS;
        block = (uint8_t*)block + SHA1_BLOCK_BYTES;
    }
    state->length += length;
    memset(lb, 0, SHA1_BLOCK_BYTES);
    memcpy (lb, block, (length+7)>>3);

    /* set the final one bit */
    lb[length>>3] |= 0x80>>(length & 0x07);

    if (length>512-64-1){ /* not enouth space for 64bit length value */
        sha1_nextBlock(state, lb);
        state->length -= 512;
        memset(lb, 0, SHA1_BLOCK_BYTES);
    }
    /* store the 64bit length value */
#if defined LITTLE_ENDIAN
        /* this is now rolled up */
    uint8_t i;
    for (i=0; i<8; ++i){
        lb[56+i] = ((uint8_t*)&(state->length))[7-i];
    }
#elif defined BIG_ENDIAN
    *((uint64_t)&(lb[56])) = state->length;
#endif
    sha1_nextBlock(state, lb);
}

/********************************************************************************************************/

void sha1_ctx2hash (void *dest, sha1_ctx_t *state){
#if defined LITTLE_ENDIAN
    uint8_t i;
    for(i=0; i<5; ++i){
        ((uint32_t*)dest)[i] = change_endian32(state->h[i]);
    }
#elif BIG_ENDIAN
    if (dest != state->h)
        memcpy(dest, state->h, SHA1_HASH_BITS/8);
#else
# error unsupported endian type!
#endif
}

/********************************************************************************************************/
/**
 *
 *
 */
void sha1 (void *dest, const void *msg, uint32_t length){
    sha1_ctx_t s;
    DEBUG_S("\r\nBLA BLUB");
    sha1_init(&s);
    while(length & (~0x0001ff)){ /* length>=512 */
        DEBUG_S("\r\none block");
        sha1_nextBlock(&s, msg);
        msg = (uint8_t*)msg + SHA1_BLOCK_BITS/8; /* increment pointer to next block */
        length -= SHA1_BLOCK_BITS;
    }
    sha1_lastBlock(&s, msg, length);
    sha1_ctx2hash(dest, &s);
}

Here's the header:

#ifndef SHA1_H_
#define SHA1_H_

#include "stdint.h"
/** \def SHA1_HASH_BITS
 * definees the size of a SHA-1 hash in bits 
 */

/** \def SHA1_HASH_BYTES
 * definees the size of a SHA-1 hash in bytes 
 */

/** \def SHA1_BLOCK_BITS
 * definees the size of a SHA-1 input block in bits 
 */

/** \def SHA1_BLOCK_BYTES
 * definees the size of a SHA-1 input block in bytes 
 */
#define SHA1_HASH_BITS  160
#define SHA1_HASH_BYTES (SHA1_HASH_BITS/8)
#define SHA1_BLOCK_BITS 512
#define SHA1_BLOCK_BYTES (SHA1_BLOCK_BITS/8)

/** \typedef sha1_ctx_t
 * \brief SHA-1 context type
 * 
 * A vatiable of this type may hold the state of a SHA-1 hashing process
 */
typedef struct {
    uint32_t h[5];
//  uint64_t length;
    uint8_t length;
} sha1_ctx_t;

/** \typedef sha1_hash_t
 * \brief hash value type
 * A variable of this type may hold a SHA-1 hash value 
 */
/*
typedef uint8_t sha1_hash_t[SHA1_HASH_BITS/8];
*/

/** \fn sha1_init(sha1_ctx_t *state)
 * \brief initializes a SHA-1 context
 * This function sets a ::sha1_ctx_t variable to the initialization vector
 * for SHA-1 hashing.
 * \param state pointer to the SHA-1 context variable
 */
void sha1_init(sha1_ctx_t *state);

/** \fn sha1_nextBlock(sha1_ctx_t *state, const void *block)
 *  \brief process one input block
 * This function processes one input block and updates the hash context 
 * accordingly
 * \param state pointer to the state variable to update
 * \param block pointer to the message block to process
 */
void sha1_nextBlock (sha1_ctx_t *state, const void *block);

/** \fn sha1_lastBlock(sha1_ctx_t *state, const void *block, uint16_t length_b)
 * \brief processes the given block and finalizes the context
 * This function processes the last block in a SHA-1 hashing process.
 * The block should have a maximum length of a single input block.
 * \param state pointer to the state variable to update and finalize
 * \param block pointer to themessage block to process
 * \param length_b length of the message block in bits  
 */
void sha1_lastBlock (sha1_ctx_t *state, const void *block, uint16_t length_b);

/** \fn sha1_ctx2hash(sha1_hash_t *dest, sha1_ctx_t *state)
 * \brief convert a state variable into an actual hash value
 * Writes the hash value corresponding to the state to the memory pointed by dest.
 * \param dest pointer to the hash value destination
 * \param state pointer to the hash context
 */ 
void sha1_ctx2hash (void *dest, sha1_ctx_t *state);

/** \fn sha1(sha1_hash_t *dest, const void *msg, uint32_t length_b)
 * \brief hashing a message which in located entirely in RAM
 * This function automatically hashes a message which is entirely in RAM with
 * the SHA-1 hashing algorithm.
 * \param dest pointer to the hash value destination
 * \param msg  pointer to the message which should be hashed
 * \param length_b length of the message in bits
 */ 
void sha1(void *dest, const void *msg, uint32_t length_b);



#endif /*SHA1_H_*/

UPDATE If I initialise sha1sum with unsigned char sha1sum[20] = 0;, the resulting sum is all 0x00.

c
encryption
cryptography
sha
asked on Stack Overflow Nov 3, 2015 by Kar • edited Nov 4, 2015 by Kar

1 Answer

6

There are at least two bugs in the question's code (detailed below), but neither can explain the result shown, and the added fact that unsigned char sha1sum[20] = {0} in the calling code changes the result. Something is wrong with the translation from the C source code that we read into machine code! Most likely, sha1_ctx2hash did not write where it should.

Problem could be in a header not in the question, a compiler bug... Since we are on a 8051, that could be/have been a problem with pointer types, especially in the pointer casts that MUST be to a pointer of the same size.

Also, is it dead sure the 8051 compiler is little-endian? It seems the common Keil C51 uses big-endian convention. That's an arbitrary choice of the compiler+support library, since on the original 8051 there is no multi-byte data-related instruction, the closest thing is LCALL which stack pushes are little-endian, but LJMP and MOV DPTR,# code is big-endian. Update: We are told the compiler is by IAR. According to IAR's documentation, version 5 was big-endian, and that changed to little-endian in version 6.

Update: we found another critical issue (beyond likely unsafe pointer casting, and the two bugs discussed below). At some point in the hunt, replacing the code with a single procedure having no endianness dependency or pointer cast, the output became 0000eb1700007f3d000004f0000059290000fc21 and that suggested would-be-32-bit values are truncated to 16-bit. Indeed, the OP revealed:

I have this in my stdint.h:   typedef unsigned uint32_t;

This is correct only on compilers where unsigned int is exactly 32-bit, when the only insurance given by the C standard is that it is at least 16-bit, and that minimum is used by most C compilers for less-than-32-bit CPUs (for efficiency reasons; some even have an option to disable promotion of byte operands to integer, and are even happy with 80+80+96 being 0).


Bug in the test code: sha1( sha1sum, msg, strlen(msg)) should be sha1( sha1sum, msg, strlen(msg)*8) or the like, because the length parameters is in bits.

Bug in the sha1_lastBlock w.r.t. header file: the code reading

for (i=0; i<8; ++i){
    lb[56+i] = ((uint8_t*)&(state->length))[7-i];
}

assumes that state->length is 8 bytes, when it is not, because uint64_t length was changed to uint8_t length in the header (it is common that uint64_t is not available on 8051 compilers). The code for the big-endian case (not presently compiled) is affected too.

If indeed uint8_t length and thus a restriction to length of 31 bytes at most is acceptable, both the little-endian and big-endian cases reduce to lb[SHA1_BLOCK_BYTES-1] = state->length; (without a loop).

Or, for whatever unsigned type and endianness length might use:

for (i = SHA1_BLOCK_BYTES; state->length != 0; state->length >>= 8)
    lb[--i] = (uint8_t)(state->length);

Note: The code *((uint64_t*)&(lb[56])) = state->length was writing the 8 bytes of length to the end of array lb[], but is correct only on big-endian machines with proper uint64_t.


The code has as potential extra problem when (length+7)%8 < 6: at least one bit in the last byte to hash is not masked, and if set it enters the hash and makes it wrong. That won't harm in the use case where full bytes are hashed.


The original code might be correct (except for the above potential extra problem), but is needlessly complex given that the objective is hashing in-memory data with a single call (what sha1 does), and neither compact nor readable. Among other issues:

  • there is (rightly) a block loop in sha1_lastBlock, thus the restriction worded in the header The block should have a maximum length of a single input block does not exist;
  • that makes the other block loop in sha1 redundant;
  • both of these loops can be removed if using uint8_t length, or otherwise hashing less than 56 bytes;
  • the round loop is likely slowed by a memmove of 16 bytes, and a function call in a vector from an indexed table;
  • endianness conversion in the little-endian case is rather inefficient;
  • in sha1_ctx2hash, the #elif BIG_ENDIAN triggers an error in my mental compiler, since BIG_ENDIAN seems undefined and #elif is supposed to have an argument; that should be #elif defined BIG_ENDIAN (as used a few lines above);
  • pf_t f[] = {ch,parity,maj,parity}; is a good candidate for const, perhaps static: every C compiler for 8051 that I ever used won't recognize that the array is not changed after setup, and thus can be carved in code;
  • with such compilers, needlessly using function pointers (as above) is a tried and test method to harm performance, or worse; at the very least it blocks the analysis of the call tree, needed to allocate automatic variables at static addresses with overlay, which in turn improves performance and code size significantly.

If you are after speed, the code you start from is inadequate, and nothing will quite match assembly language. Like two decades ago I wrote SHA-1 for some 8051 toolchain, and assembly tuneups gave huge savings compared to C only (IIRC: mostly because 32-bit rotations where abyssal from a performance standpoint).


Updated: Here is illustrative code to hash a short message, in an endian-neutral way, without any pointer cast, and without depending on <stdint.h> (which turns out to be inadequate for the compiler used). Beware that the length parameter is in bytes (not bits), with a limit of 55 bytes that does NOT allow implementing HMAC-SHA-1 on top. That's to keep the code simple: over that limit we need several iterations of the compression function, thus either heavy code duplication, at least two functions, or some sort of state machine.

#include <limits.h> // for UCHAR_MAX, UINT_MAX, ULONG_MAX

// Compute the SHA-1 hash of a short msg, of length at most 55 bytes
// Result hash must be 20 bytes; it can overlap msg.
// CAUTION: if length>55 the result is wrong, and if length>59
// we loose second-preimage resistance, thus collision-resistance.
void sha1upto55bytes(
          unsigned char *hash,  // result, 20 bytes
    const unsigned char *msg,   // bytes to hash
          unsigned char length  // length of msg in bytes, maximum 55
    )
    {
    // We locally (re)define uint8_t and uint32_t so as not to depend of <stdint.h>
    // which is not available on some old C compilers for embedded systems.
#if 255==UCHAR_MAX
    typedef unsigned char uint8_t;
#endif
#if 16383==UINT_MAX>>9>>9
    typedef unsigned int uint32_t;
#elif  16383==ULONG_MAX>>9>>9
    typedef unsigned long uint32_t;
#endif

    // Internal buffer (64 bytes)
    // We require 8-bit uint8_t, 32-bit uint32_t, and integer promotion; otherwise,
    // we try to abort compilation on the following declaration.
    uint32_t w[
        99==(uint8_t)355              &&  // check uint8_t
        4303==(uint32_t)(-1)/999u/999 &&  // check uint32_t
        440==(uint8_t)55<<3               // check integer promotion
        ? 16 : -1];                       // negative index if error

    // Type for state, so that we can use struct copy for that
    typedef struct state_t { uint32_t q[5]; } state_t;

    // Initial state; use single quotes if the compiler barks
    const state_t s = {{ 0x67452301,0xefcdab89,0x98badcfe,0x10325476,0xc3d2e1f0 }};

    // Active state (20 bytes); on 8051 should be in internal RAM for best performance
    state_t h = s;  // initialize the state using a struct copy

   // Workhorse temporary; on 8051 should be in internal RAM for best performance
    uint32_t x;

    // Workhorse index; on 8051 should be a register for performance
    uint8_t  j;

    // Prepare the single block to hash; this code works regardless of endianness,
    // and does not perform misaligned memory accesses if msg is misaligned.
    x = 0;  // This is only to prevent a bogus compiler warning
    j = 0;
    do
        {   // for each block byte, up to and including high 4 bytes of length
        x <<= 8;
        if (j < length)
            x |= *msg++;    // message byte
        else
            if (j == length)
                x |= 0x80;  // padding byte
        if ((j&3)==3)
            w[j >> 2] = x;
        }
    while (++j!=60);
    w[15] = length << 3;    // length in bits, needs integer promotion for length>31

    // Hash that block
    j = 0;
    do {        // round loop, run 80 times
        do {        // dummy loop (avoid a goto)
            if (j<40) {
                if (j<20) {             // for rounds 0..19
                    x = (((h.q[2] ^ h.q[3])&h.q[1]) ^ h.q[3]) + 0x5A827999;
                    break;  // out of dummy loop
                    }
                else
                    x = 0x6ED9EBA1;     // for rounds 20..39
                }
            else {
                if (j<60) {             // for rounds 40..59
                    x = (h.q[1] | h.q[2])&h.q[3];
                    x |= h.q[1] & h.q[2];
                    x += 0x8F1BBCDC;
                    break;
                    }
                else
                    x = 0xCA62C1D6;     // for rounds 60..79
                }
            // for rounds 20..39 and 60..79
            x += h.q[1] ^ h.q[2] ^ h.q[3];
            }
        while (0);      // end of of dummy loop
        // for all rounds
        x += (h.q[0] << 5) | (h.q[0] >> 27);
        x += h.q[4];
        h.q[4] = h.q[3];
        h.q[3] = h.q[2];
        h.q[2] = (h.q[1] << 30) | (h.q[1] >> 2);
        h.q[1] = h.q[0];
        h.q[0] = x;
        x = w[j & 15];
        if (j>=16) {    // rounds 16..79
            x ^= w[(j + 2) & 15];
            x ^= w[(j + 8) & 15];
            x ^= w[(j + 13) & 15];
            w[j & 15] = x = (x << 1) | (x >> 31);
            }
        h.q[0] += x;    // for all rounds
        }
    while (++j != 80);
    // The five final 32-bit modular additions are made in the next loop, and
    // reuse the constants (rather than a RAM copy), saving code and RAM.

    // Final addition and store result; this code works regardless of endianness,
    // and does not perform misaligned memory accesses if hash is misaligned.
    j = 0;
    do
        {
        x = h.q[j] + s.q[j];    // final 32-bit modular additions
        *hash++ = (uint8_t)(x>>24);
        *hash++ = (uint8_t)(x>>16);
        *hash++ = (uint8_t)(x>> 8);
        *hash++ = (uint8_t)(x    );
        }
    while (++j != 5);
    }
answered on Stack Overflow Nov 3, 2015 by fgrieu • edited Nov 6, 2015 by fgrieu

User contributions licensed under CC BY-SA 3.0