C program: [Done] exited with code=3221225477 in 2.322 seconds (Calloc / Free / Segmenation Error?)

1

I'm working on a C program to manipulate large volumes of CSV data. It was running fine in develoment with a smallish test file. But when the size of the file increases, it starts to fail. Depending on whether I compile it with gcc or minGW's gcc, it fails with segmentation error or 3221225477 / 0xC0000005 at different places, always either :

if (fclose(fp)) { 
    printf("Error closing file: %s, %s, %d.\n", fileName, __func__, __LINE__);
    exit(300); 
}

Note it doesn't get past the fclose(). Or one of these:

data_PE_T12         = calloc(width_T*dataDepthDay, sizeof(*data_PE_T12));

It's long, so I'll try to show the relevant parts. First the Main function:

#include <stdio.h>
#include <string.h> // strtok
#include <stdlib.h> // atoi & atof
#include <time.h>   // time functions
#include <math.h>   // expf()


...
// Array Sizes
static int dataDepth, dataDepthDay;                                                                                             
static int fromTime, toTime;                                                                                                    
static int width, width_T, width_H, width_C;


// Array Pointers
static int                      *timeArray,         *timeArrayDay,      *timeArrayPE;                                          
static struct sensorHeader_t    *headerArray,       *headerArray_T,     *headerArray_H,     *headerArray_C;                 

// of depth dataDepthDay
static float                    *data_E_T25,        *data_E_T30;        
static float                    *data_E_T12,        *data_E_T18,        *data_E_H60,        *data_E_H70,        *data_E_C1500;
static float                    *data_PE_T12,       *data_PE_T18,       *data_PE_H60,       *data_PE_H70,       *data_PE_C1500;
... plus loads more.

// functions
void  grabDepth(void);                  // OK
void  grabPayload(void);                // OK
... plus loads more.


int main(int argc, char **argv)
{

// Grab Input File Name 
    if (argc == 2) {
        strcpy(rawFile, "in/");
        strcat(rawFile, argv[1]);
    } else { // dev
        strcpy(rawFile, "in/sensor_report.csv");
    }

    printf("size max = %d", __SIZE_MAX__);

// Parse and Copy File
    grabDepth();
    grabPayload();

// Run functions
    genRawData();       // Raw T, H & C files   
    genExposureE();     // 
    genExposureAPE();   // 


    return 0;
}

Next the first function that is called. This one opens the main input file and pulls out a number of array widths and depths that are used to calloc for arrays already declared as static pointers. The idea is that this will make the memory handling nice and flexible as the file size increases...

void grabDepth(void)
{
// 1. Open File
    FILE *fp = fopen(rawFile, "r");
    char buf[15000]; // Big enough to deal with lots of devices.
    
    if (!fp) {
        printf("Can't open the file: %s: %s, %d.\n", rawFile, __func__, __LINE__);
        exit(100);
    }
    
    while (fgets (buf, sizeof(buf), fp)) {
        int lineLen = strlen(buf);
        int colNum = 1;
        char *field = strtok(buf, ",");

        if (field && strcmp(field, "From") == 0) {
            // printf("\n\n*** row 2 ***\n\n");
            // int fromTime, toTime = 0;
            
            while (field) {
                if (colNum == 2) {
                    fromTime = atof(field);
                }
                
                if (colNum == 4) {
                    toTime = atof(field);
                }
                field = strtok(NULL, ",");
                colNum++;
            }
            
            // printf("FromTime = %d. ToTime = %d.\n", fromTime, toTime);
            dataDepth = ( toTime - fromTime )/900;
            // printf("dataDepth = %d.\n", dataDepth);
            continue; // to next iteration. 
             
        }
        
// 3. Grab file width from line 10 (commsType) Check if buf Overruns too
        if (field && strcmp(field, "TimeStamp") == 0) {
            // First Check Line is long enough!
            if (lineLen == sizeof(buf)-1) { // buf has overrun!
                printf("File Read-Line Overrun: %s, %d.\n", rawFile, __func__, __LINE__);
                exit(200);
            }
            // printf("Line Length = %d\n", lineLen);
            // printf("Buf Size    = %d\n", sizeof(buf));
            width = -2; // ignore timestamps : I ballsed up the commas in csv file (-2 instead of -1)
            while (field) {
                if(field = strtok(NULL, ",")) {
                    width ++;
                }
            }
            break; // out of loop!
        }
    }
    
    //dataDepthDay = dataDepth/96 + (dataDepth % 96 !=0); // round up?!
    dataDepthDay = dataDepth/96;                        // round down?!
    printf("\n 1. grabDepth() Results\n");
    printf(  "------------------------\n");
    printf("Raw Data Width     = %d\n", width);
    printf("Raw Data Depth     = %d\n", dataDepth);
    printf("dataDepthDay Depth = %d\n\n", dataDepthDay);

    if (fclose(fp)) { 
        printf("Error closing file: %s, %d.\n", rawFile, __func__, __LINE__);
        exit(300); 
    }
}

After that, it's just calling one function after another, all of which follow the general pattern of:

void _genRawData(char* sensorType, struct sensorHeader_t *sensorHeader, float *dataArray, int *timeArray, size_t dataDepth, size_t width) {
    FILE *fp;
    strcpy(fileName, "out/");
    strcat(fileName, sensorType);
    strcat(fileName, "_raw.csv");
    fp = fopen(fileName, "w");

    // check file opened OK. 
    if (fp == NULL) {
        printf("Error! Couldn't Create file: %s\n", fileName);
        return;
    }
    printf("building file : %s\n", fileName);
    

    // Allocate Memory
    timeArrayDay    = calloc(dataDepthDay, sizeof(*timeArrayDay));
    timeArrayPE     = calloc(dataDepthDay, sizeof(*timeArrayPE)); // xxxx same array as day time array!?
    data_E_T12      = calloc(width_T*dataDepthDay, sizeof(*data_E_T12));
    data_E_T18      = calloc(width_T*dataDepthDay, sizeof(*data_E_T18));
    data_E_H60      = calloc(width_H*dataDepthDay, sizeof(*data_E_H60));
    data_E_H70      = calloc(width_H*dataDepthDay, sizeof(*data_E_H70));
    
    // do stuff and build new arrays up and put into files...
    
    
    
        if (fclose(fp)) { 
        printf("Error closing file: %s, %d.\n", rawFile, __func__, __LINE__);
        exit(300); 
    }
}

I've only called calloc once on each 2-D array, and for the sake of debugging I've removed the free() calls.

I figure I'm doing something wrong with memory management, which is biting me when the array sizes grow past a certain point, but I can't figure out what's wrong. I've tried to make sure the memory I access has been allocated correctly and working on a big powerful actual computer (I'm an embedded person usually), I wouldn't expect any issues with OS handing out data? Isn't there plenty to go around!?

c
gcc
mingw
asked on Stack Overflow Aug 28, 2020 by monkey

1 Answer

0

In case the outcome is of use to others. I suspected there was an issue with the calloc and subsequent use of the allocated memory. So I tried 2 things:

1: Checked the memory usage in the code:

// Add Values & Write Line on new Day & Reset Accumulator
for (i=0; i < dataDepth; i++) {
    for (j=0; j < width; j++) {
            if (newDay) {
                fprintf(fp, ",%.2f", APE_Accum[j]);
                data_E_Array[(data_E_Index-1)*width+j] = APE_Accum[j];
                if ((data_E_Index-1)*width+j+1 > (width_T*dataDepthDay)) {
                    printf("Oh bugger...\n");
                    printf("width_T*dataDepthDay = %d\n", width_T*dataDepthDay);
                    printf("data_E_Index-1 = %d\n", data_E_Index-1);
                    printf("width = %d\n", width);
                    printf("dataDepthDay = %d\n", dataDepthDay);
                    printf("width_T = %d\n", width_T);
                    printf("j = %d\n\n", j);

Really messy code, so you can understand how I lost track of the array bounds. Basically, it became obvious that I'd messed up my calculation of the size of the calloc. I was possible to find the problem like this, but I don't think it's a vaiable answer to my question, since it would scale to large or even more convoluted code.

2: Valgrind. Following @dbush s advice. I moved over to Ubuntu, installed Valgrind, and recompiled...

$ sudo apt install valgrind
$ ps aux | grep-i apt
$ gcc -o graphomatic ./graphomatic.c -lm -g
$ valgrind --leak-check=full --show-leak-kinds=all --verbose --track-origins=yes --log-file=valgrind-log
$ less valgrind-log

And Bob's your uncle. The problems jumped right out. I needed to add the -lm to link to the math library. And teh -g to make sure line numbers were included in the Valgrind output.

==15878== Invalid write of size 4
==15878==    at 0x4038EA: _genExposureE (graphomatic.c:867)
==15878==    by 0x404A0C: genExposureE (graphomatic.c:1235)
==15878==    by 0x400EAA: main (graphomatic.c:122)
==15878==  Address 0x75cd604 is 0 bytes after a block of size 660 alloc'd
==15878==    at 0x4C2FB55: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==15878==    by 0x404911: genExposureE (graphomatic.c:1222)
==15878==    by 0x400EAA: main (graphomatic.c:122)
answered on Stack Overflow Aug 31, 2020 by monkey

User contributions licensed under CC BY-SA 3.0