CS 3723 Programming Languages
Finite-state Automaton to
recognize C-style comments

C comment

Recognizing C-style comments:


C source programs that recognizes C-style comments -- with and without gotos: Notice that only the file scan.c has been changed -- the other files and the output are unchanged. Boldface = differences.

C Program With gotos C Program Without gotos
/* scan.h: scanner header file */
void scan(char *s);

/* scan.c: scanner implementation. * Recognizes C-type comments * Based on a finite-state automaton. * Uses gotos for implementation. */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "scan.h" char * makestr(char ch); void scan(char *s) {   char ch; strcpy(s, "/*");     L0: ch = getchar(); if (ch == '/') goto L1; else goto L0; L1: ch = getchar(); if (ch == '*') goto L2; else goto L0; L2: ch = getchar(); strcat(s, makestr(ch)); if (ch == '*') goto L3; else goto L2; L3: ch = getchar(); strcat(s, makestr(ch)); if (ch == '*') goto L3; else if (ch == '/') goto L4; else goto L2; L4: return;     } /* makestr: convert a char to a string */ char *makestr(char ch) { char *r = (char *) malloc(2); r[0] = ch; r[1] = '\0'; return r; }
/* comment.c: driver to test comment scanner */ #include <stdio.h> #include <string.h> #include "scan.h"   void main(void) { char s[50]; for ( ; ; ) { scan(s); printf("Next comment:\"%s\"\n", s); if (strcmp(s, "/*quit*/") == 0) break; } }
/* scan.h: scanner header file */
void scan(char *s);

/* scan2.c: scanner implementation. * Recognizes C-type comments * Based on a finite-state automaton. * No gotos here; uses state variable. */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "scan.h" char * makestr(char ch); void scan(char *s) { int state = 0; char ch; strcpy(s, "/*"); while (state != 4) { switch (state) { case 0: ch = getchar(); if (ch == '/') state = 1; else state = 0; break; case 1: ch = getchar(); if (ch == '*') state = 2; else state = 0; break; case 2: ch = getchar(); strcat(s, makestr(ch)); if (ch == '*') state = 3; else state = 2; break; case 3: ch = getchar(); strcat(s, makestr(ch)); if (ch == '*') state = 3; else if (ch == '/') state = 4; else state = 2; break; case 4: return; break; } /* end of switch */ } /* end of while */ } /* makestr: convert a char to a string */ char *makestr(char ch) { char *r = (char *) malloc(2); r[0] = ch; r[1] = '\0'; return r; }
/* comment.c: driver to test comment scanner */ #include <stdio.h> #include <string.h> #include "scan.h"   void main(void) { char s[50]; for ( ; ; ) { scan(s); printf("Next comment:\"%s\"\n", s); if (strcmp(s, "/*quit*/") == 0) break; } }
Run and Output Run and Output
% cc -o comment comment.c scan.c
% cat comment.source
 x = 47 / 13 * 12; /**/
 /***/ /****/ /*/ ?? * / */
 /* abc; */    /* over
  two lines */ JUNK! 3.14159; /*quit*/
% comment < comment.source
Next comment:"/**/"
Next comment:"/***/"
Next comment:"/****/"
Next comment:"/*/ ?? * / */"
Next comment:"/* abc; */"
Next comment:"/* over
  two lines */"
Next comment:"/*quit*/"
% cc -o comment2 comment.c scan2.c
% cat comment.source
 x = 47 / 13 * 12; /**/
 /***/ /****/ /*/ ?? * / */
 /* abc; */    /* over
  two lines */ JUNK! 3.14159; /*quit*/
% comment2 < comment.source
Next comment:"/**/"
Next comment:"/***/"
Next comment:"/****/"
Next comment:"/*/ ?? * / */"
Next comment:"/* abc; */"
Next comment:"/* over
  two lines */"
Next comment:"/*quit*/"


Java source program that recognizes C-style comments: The program below is just a translation of the one on the right above, with a little extra concerning redirection. Java Sources (without the "package" line needed in JBuilder): Scan.javaScanTest.java

Java Program to Recognize Comments
/* Scan.java: scanner implementation.
 * Recognizes C-type comments
 * Based on a finite-state automaton.
 */
import java.io.*;
public class Scan {
   Reader in; // internal file name for input stream
   boolean fileOpen = false; // is the file open yet?
   String fileName; // name of input file, if present

   // Scan(): constructor providing no input file name
   public Scan() {
      fileName = "";
   }

   // Scan(String ): constructor providing input file name
   public Scan(String f) {
      fileName = f;
   }

   // getComment: recognize and return the next comment
   public String getComment()  {
      int state = 0; // state for the finite automaton
      char ch = 0; // single-character buffer
      String s = "/*";
      while (state != 4) {
         if (ch == 65535) { //end-of-file
            System.out.println("Th-th-th-th-that's all folks");
            System.exit(0);
         }
         switch (state) {
         case 0: ch = getNextChar();
              if (ch == '/') state = 1;
              else state = 0; break;
         case 1: ch = getNextChar();
              if (ch == '*') state = 2;
              else state = 0; break;
         case 2: ch = getNextChar();
              s += ch;
              if (ch == '*') state = 3;
              else state = 2; break;
         case 3: ch = getNextChar();
              s += ch;
              if (ch == '*') state = 3;
              else if (ch == '/') state = 4;
              else state = 2; break;
         case 4: return s;
         } // end of switch
      } // end of while
      return s;
   }

   // getNextChar: fetches next char.  Also opens input file
   private char getNextChar() {
      if (!fileOpen) {
         try {
            fileOpen = true;
            if (fileName == "")
               in = new InputStreamReader(System.in);
            else
               in = new FileReader(fileName);
         } catch (IOException e) {
            System.out.println("Exception opening " + fileName);
            System.exit(1);
         }
      }
      char ch = ' '; // keep compiler happy
      try {
         ch = (char)in.read();
      } catch (IOException e) {
         System.out.println("Exception reading character");
         System.exit(1);
      }
      return ch;
   }

}

// ScanTest: Test a scanner for C-style comments import java.io.*; public class ScanTest { public static void main(String[] args) { Scan scanner; // the scanner String s; // pass an input file name if present on command line if (args.length > 0) scanner = new Scan(args[0]); else scanner = new Scan(); while (true) { // fetch and print the next comment // end-of-file is detected inside Scan.java s = scanner.getComment(); System.out.println("Next comment: " + "\"" + s + "\""); } } }
Java Run and Output
% javac Scan.java
% javac ScanTest.java
% cat -n comment.source
     1	 x = 47 / 13 * 12; /**/
     2	 /***/ /****/ /*/ ?? * / */
     3	 /* abc; */    /* over
     4	  two lines */ JUNK! 3.14159; /*quit*/
% java ScanTest        (the file comment.source pasted here)
 x = 47 / 13 * 12; /**/
 /***/ /****/ /*/ ?? * / */
 /* abc; */    /* over
  two lines */ JUNK! 3.14159; /*quit*/  
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
  two lines */"
Next comment: "/*quit*/"              (control-D typed here)
Th-th-th-that's all folks
% java ScanTest < comment.source    (using redirected input)
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
  two lines */"
Next comment: "/*quit*/"
Th-th-th-that's all folks
% java ScanTest comment.source     (using named source file)
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
  two lines */"
Next comment: "/*quit*/"
Th-th-th-that's all folks


Revision date: 2003-12-30. (Please use ISO 8601, the International Standard Date and Time Notation.)