Path: utzoo!utgpu!watmath!watdragon!violet!ajmyrvold
From: ajmyrvold@violet.waterloo.edu (Alan Myrvold)
Newsgroups: comp.software-eng
Subject: Re: C source lines in file
Message-ID: <15957@watdragon.waterloo.edu>
Date: 18 Aug 89 01:44:31 GMT
References: <35120@ccicpg.UUCP>
Sender: daemon@watdragon.waterloo.edu
Reply-To: alanm@cognos.uucp (Alan Myrvold)
Distribution: world
Organization: Cognos Inc.
Lines: 333
Keywords:

In article <35120@ccicpg.UUCP> swonk@ccicpg.UUCP (Glen Swonk) writes:
>-Does anyone have a program or a method of determing
>-the number of C source lines in a source file?

Ok. First off, sources don't really belong in comp.software-eng ...
so I feel a bit guilty, but here's a reasonably portable C program
to count :

     NCSL - non-commentary source lines
     LINES - source lines
     COMMENTS - C comments
     NCC - non-contiguous comments

It will even run on systems where (heaven forbid) the argv[] list
isn't as convienient to use as Unix's. And should compile with
either a K&R or ANSI-style compiler.

One known bug in the program is that VAX CC (and others) allow:

#include "foo.c""

Which confuses the string parsing part of my program.
Obfusicated C contest winners may also foul the program.

Flames and comments to alanm@cognos.uucp, please.

                                 - Alan
--- cut here ---
/* LOC.C count C lines of code, comments                                  */
/* For each c file, produces
     NCSL - non-commentary source lines
     LINES - source lines
     COMMENTS - C comments
     NCC - non-contiguous comments

If invoked with no arguments and the file "cfiles.lis" does not
exist, input is taken from stdin, output goes to stdout.

If invoked with no arguments and "cfiles.lis" does exist,
the filenames are assume to be in "cfiles.lis", and the
output is written to BOTH stdout and "cfiles.out".

If invoked with arguments, the args are taken as filenames, and
output is written to stdout.

*/

/* Alan Myrvold          3755 Riverside Dr.  uunet!mitel!sce!cognos!alanm */
/* Cognos Incorporated   P.O. Box 9707       alanm@cognos.uucp            */
/* (613) 738-1440 x5530  Ottawa, Ontario                                  */
/*                       CANADA  K1G 3Z4                                  */

#include 
#include 
#include 

#define NORM 0
#define COMMENT 1
#define STRING 2
#define CHAR 3
#define ID 4
#define SPECIAL 5
#define WHITE 6

static long LINES_OF_CODE,LAST_LINE,CURRENT_LINE,COMMENTS,NCC,IS_CONTIG;

#define id1(c) (isalpha(c) || ((c) == '_'))
#define id2(c) (id1(c) || (('0' <= (c)) && ((c) <= '9')) || ((c) == '$'))
#define is_white(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n'))

void echo_fn(k,s)
int k;
char *s;
{
   fputs(s,stdout);
}

void dump_white(s)
char *s;
{
   for (; *s; s++) {
       switch (*s) {
          case '\t' : printf("\\t"); break;
          case '\n' : printf("\\n"); break;
          case ' '  : printf("_"); break;
          default   : putchar(*s);
       }
   }
}

void dump_fn(k,s)
int k;
char *s;
{
   switch (k) {
      case ID : printf("ID %s\n",s); break;
      case COMMENT : printf("COMMENT %s\n",s); break;
      case SPECIAL : printf("SPECIAL %s\n",s); break;
      case STRING : printf("STRING %s\n",s); break;
      case CHAR : printf("CHAR %s\n",s); break;
      case WHITE : printf("WHITE ");
                   dump_white(s); 
                   putchar('\n');
                   break;
      default : printf("unknown %s\n",s);
   }
}

void count_fn(k,s)
int k;
char *s;
{
   switch (k) {
      case ID : 
      case SPECIAL :
      case STRING : 
      case CHAR :
          if (CURRENT_LINE != LAST_LINE) {
             LINES_OF_CODE++;
             LAST_LINE = CURRENT_LINE;
          }
          IS_CONTIG = 0;
          break;
      case COMMENT : COMMENTS++; 
                     if (!IS_CONTIG) {
                        IS_CONTIG = 1;
                        NCC++;
                     }
                     break;
   }
}

/* Beware trespassers of this code ... it is rather obtuse... */
/* but it SEEMS to work */
void tokenize(f,t)
FILE *f;
void (*t)();
{
   int skip_next,in_id,in_white,bptr,mode,c,old_c,retain;
   static char buffer[8000];

   IS_CONTIG = NCC = LINES_OF_CODE = COMMENTS = CURRENT_LINE = 0;
   LAST_LINE = -1;
   bptr = 0;
   mode = NORM;
   old_c = ' ';
   skip_next = in_id = in_white = 0;
   while (old_c != EOF) {
      c = getc(f);
      if (c == '\n') CURRENT_LINE++;
      retain = 0;

      /* Now, in NORM mode, we read one too many
         characters before deciding to start a new
         token */
      if (mode == NORM) {

         /* already in id mode */
         if (in_id) {
            if (id2(c)) {
               /* stay in mode */
               retain = 1;
               buffer[bptr++] = c;
            } else {
               /* send off identifier */
               buffer[bptr] = 0;
               t(ID,buffer);
               in_id = bptr = 0;
            }
         }

         /* already in white mode */
         if (in_white) {
            if (is_white(c)) {
               /* stay in mode */
               retain = 1;
               buffer[bptr++] = c;
            } else {
               /* send off white space */
               buffer[bptr] = 0;
               t(WHITE,buffer);
               in_white = bptr = 0;
            }
         }

         /* Check if we are going to change modes now */

         if (!in_white && is_white(c)) {
            /* start white mode */
            retain = 1;
            buffer[bptr++] = c;
            in_white = 1;
         }

         if (!in_id && id1(c)) {
            /* start id mode */
            retain = 1;
            in_id = 1;
            buffer[bptr++] = c;
         }

         /* start other modes */
         switch (c) {
            case '/'  : 
               /* look ahead 1 character */
               if (ungetc(getc(f),f) == '*') {
                  retain = 1;
                  mode = COMMENT; 
               }
            break;
            case '\'' : retain = 1; mode = CHAR; break;
            case '\"' : retain = 1; mode = STRING; break;
        }
      }

      /* Now deal with the modes where we know when we are done */
      switch (mode) {
         case COMMENT : 
           retain = 1;
           buffer[bptr++] = c;
           if ((c == '/') && (old_c == '*')) {
              mode = NORM; 
              buffer[bptr] = 0;
              t(COMMENT,buffer);
              bptr = 0;
           }
         break;
         case CHAR :
           retain = 1;
           buffer[bptr++] = c;
           if (skip_next) {
              skip_next = 0;
           } else {
              skip_next = (c == '\\');
              if ((bptr > 1) && (c == '\'')) {
                 mode = NORM; 
                 buffer[bptr] = 0;
                 t(CHAR,buffer);
                 bptr = 0;
              }
           }
           break;
         case STRING :
           retain = 1;
           buffer[bptr++] = c;
           if (skip_next) {
              skip_next = 0;
           } else {
              skip_next = (c == '\\');
              if ((bptr > 1) && (c == '\"')) {
                 mode = NORM; 
                 buffer[bptr] = 0;
                 t(STRING,buffer);
                 bptr = 0;
              }
           }
           break;
      }

      /* one-character token */
      if (!retain) {
         buffer[0] = c;
         buffer[1] = 0;
         if (c != EOF) t(SPECIAL,buffer);
      }

      /* save previous character */
      old_c = c;
   }
}


int count_main(argc,argv)
int argc;
char *argv[];
{
   int i,ier;
   FILE *fout,*mas,*f;
   char fbuf[80];
   int exit();

   ier = 0;
   if ((argc < 2) || ((argc == 2) && (strcmp(argv[1],"-") == 0))) {
      mas = fopen("cfiles.lis","rt");
      if (mas) {
         fout = fopen("cfiles.out","wt");
         if (!fout) exit(0);
         while (fscanf(mas,"%s",fbuf) == 1) {
            f = fopen(fbuf,"rt");
            if (!f) {
               ier = 1;
            } else {
               tokenize(f,count_fn);
               printf("%s %ld %ld %ld %ld\n",fbuf,
                      LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC);
               fprintf(fout,"%s %ld %ld %ld %ld\n",fbuf,
                      LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC);
               fclose(f);
            }
         }
         fclose(mas);
         fclose(fout);
      } else {
         tokenize(stdin,count_fn);
         printf("%ld %ld %ld %ld\n",LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC);
      }
   } else {
      for (i = 1; i < argc; i++) {
          f = fopen(argv[i],"rt");
          if (!f) {
             ier = 1;
          } else {
             tokenize(f,count_fn);
             printf("%s %ld %ld %ld %ld\n",argv[i],
                    LINES_OF_CODE,CURRENT_LINE,COMMENTS,NCC);
             fclose(f);
          }
      }
   }
   return ier;
}

int main(argc,argv)
int argc;
char *argv[];
{
#if VAX
   return !count_main(argc,argv);
#else
   return count_main(argc,argv);
#endif
}
--- cut here ---