Warm tip: This article is reproduced from stackoverflow.com, please click
c io strtok

Reading a CSV file in C using fgets and strtok only reads first column

发布于 2020-03-27 10:22:03

I have a .csv file that looks like

Config,Prob,MAN,ATL,CVERT,TVERT,LVERT,PELV,SAC,RIB,SCAP,PHUM,DHUM,PRAD,DRAD,CARP,PMC,DMC,PHX,PFEM,DFEM,PTIB,DTIB,TARS,PMT,DMT
LH,1,2,2,7,13,6,2,1,13,2,2,2,1,1,6,2,2,24,2,2,2,2,8,2,2
LH,1,0,0,0,0,0,0,0,9,1,2,2,2,2,12,2,2,18,1,1,1,1,4,1,1
LH,1,2,2,7,3,0,2,1,3,1,1,1,1,1,6,1,1,6,0,0,0,0,0,0,0
LH,1,0,0,0,13,6,2,1,8,0,2,2,1,1,0,0,0,0,0,0,0,0,0,0,0
LH,1,2,2,4,13,6,2,1,18,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0
LH,1,0,0,0,13,6,2,1,18,2,2,2,0,0,0,0,0,0,2,2,0,0,0,0,0
LH,3,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
LH,1,0,0,0,13,6,2,1,24,2,2,2,2,2,12,2,2,24,2,2,2,2,8,2,2
LH,1,0,0,1,13,3,2,1,15,2,0,0,2,2,0,0,0,6,2,2,2,2,0,0,0
LH,1,0,0,0,0,0,0,0,10,0,1,1,0,0,0,0,0,18,0,0,0,0,0,0,0
LH,1,0,2,7,3,0,0,0,7,2,2,2,2,2,12,2,2,24,2,2,2,2,8,2,2
LH,1,0,0,2,0,0,0,0,14,1,2,2,2,2,0,0,0,18,2,2,2,2,0,0,0
LK,1,0,0,0,0,0,0,0,13,0,0,0,1,1,6,0,0,0,0,0,0,0,0,0,0
LK,1,2,2,7,13,6,2,1,17,1,0,0,0,0,0,0,0,6,1,1,1,1,4,1,1
LK,1,0,0,0,10,6,0,0,23,1,1,1,1,1,6,1,1,18,2,2,2,2,8,2,2
LK,1,2,2,7,0,0,0,0,18,2,0,0,1,1,12,2,2,24,2,2,2,2,8,2,2
LK,1,0,0,3,0,0,0,0,8,0,0,0,2,2,12,2,2,24,2,2,2,2,8,2,2
LK,1,2,2,7,0,0,0,0,8,0,0,0,2,2,12,2,2,24,0,0,2,2,8,2,2
LK,3,2,2,7,13,6,2,1,22,2,2,2,2,2,12,2,2,24,2,2,2,2,8,2,2
LK,1,2,2,7,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
LK,1,2,2,6,0,3,0,0,11,0,2,2,0,0,12,2,2,18,0,0,0,0,8,2,2
LK,1,2,2,7,13,6,2,1,16,2,1,1,2,2,12,2,2,6,2,2,2,2,8,2,2
LK,1,2,0,0,10,6,2,1,19,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
LK,1,2,2,5,13,6,2,1,12,1,0,0,0,0,12,2,2,6,0,0,0,0,8,2,2

I would like to ignore the text and just grab the numbers. Here is my code:

    int arr[rows][columns]; /* rows/columns of .csv file */
    char buf[1000];
    int r = 0;

    while (fgets(buf,1000,ifp)) {
        char read = 'N';
        const char *tok;
        int ret =0;
        int c = 0;
        int count = 0;
        char *ptr;
        printf("%s \n", buf);

        for (tok = strtok(buf, ","); tok && *tok; tok = strtok(NULL, ",\n")){
                printf("%s ", buf); /* replace buf with tok */
                if(isNumber(tok)==1){
                    read = 'Y';
                    ret = strtol(tok,&ptr,10);
                    arr[r][c] = ret;
                    c++;
                    printf("Entered ");
                    //printf("%ld ", arr[r][c]);
                }
                if(strtok(NULL,"\n") && read == 'N')
                    count++;
        }
        r++;
        //r -= count;
    }

isNumber is essentially an extension of isdigt and works as intended. However, whenever printing out tok for each iteration, it stops after the first comma. Sample output: Config LH LH LH LH LH LH LH LH LH LH LH LH LK LK LK LK LK LK LK LK LK LK LK LK. It seems to be reading the input just fine using fgets, as it prints out each line of the .csv file. So it seems like the issue lies with my for loop. It seems that I am incrementing my token incorrectly.

Attempting with another example:

    char alph[] = "a-b-c-e";
    for (const char *tok = strtok(alph, "-"); tok && *tok; tok = strtok(NULL, "-\0")){
        printf("%s ", tok);
    }

yields: a b c e, which is the correct result. Therefore, I feel like I am missing something here with parsing my file. Any help is appreciated. Thank you.

Questioner
hkj447
Viewed
377
Jonathan Leffler 2019-07-03 22:24

Your loop is:

 for (tok = strtok(buf, ","); tok && *tok; tok = strtok(NULL, ",\n")) 
 {
     …omitted…
     if (strtok(NULL, "\n") && read == 'N')
         count++;
 }

You have three calls to strtok(), and the one in the if eats up everything to the newline. I'm not sure what you were thinking there. It looks like you should omit that strtok() — but I'm not sure what you need to replace it with.

Because the second call in the loop control reads up to newline or comma (strtok(NULL, ",\n")), you can't tell when you reach the end of the line except that tok is set to NULL to indicate no more tokens.