|
 |
CS 3723
Programming Languages |
Simulating DFAs:
C-style comments |
Recognizing C-style comments.
A C-style comment starts with "/*" and ends with "*/".
Anything besides "*/" is allowed in between, including
a newline, but not end-of-file. Here is a deterministic finite
automaton (DFA) for these comments:

DFA for C-style Comments
- The programs below
(two in C and one in Java)
recognize a C-style comment and return the text of successive
comments, ignoring all other input.
Here "recognize" means to examine each input character in turn and
decide whether a legal comment has been processed.
In contrast, a scanner for C, C++, or Java
needs to recognize comments in order to discard them, while paying
attention to all other input.
(The FSM above and programs below do not handle an input of "//"
properly, assuming that one allowed a C++/Java-style comment on input.)
- Each program below is based almost exactly on the FSM shown above.
It is possible to simplify the code and eliminate some of the states.
It is also possible to write a completely different and somewhat
simpler program. For example, one can read two characters ahead,
first looking for "/*" and after that looking for "*/".
Sometimes students who look for such a simpler program end up
with one that doesn't work correctly. (Beware. Common mistakes
are to accept the non-comment "/*/"
or to reject the comment "/***/".)
C source programs that recognizes
C-style comments -- with and without gotos:
Notice that only the file scan.c has
been changed -- the other files and the output are unchanged.
Boldface = differences.
C Program With gotos |
C Program Without
gotos |
/* scan.h: scanner header file */
void scan(char *s);
/* scan.c: scanner implementation.
* Recognizes C-type comments
* Based on a finite-state automaton.
* Uses gotos for implementation.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "scan.h"
char * makestr(char ch);
void scan(char *s) {
char ch;
strcpy(s, "/*");
L0: ch = getchar();
if (ch == '/') goto L1;
else goto L0;
L1: ch = getchar();
if (ch == '*') goto L2;
else goto L0;
L2: ch = getchar();
strcat(s, makestr(ch));
if (ch == '*') goto L3;
else goto L2;
L3: ch = getchar();
strcat(s, makestr(ch));
if (ch == '*') goto L3;
else if (ch == '/') goto L4;
else goto L2;
L4: return;
}
/* makestr: convert a char to a string */
char *makestr(char ch) {
char *r = (char *) malloc(2);
r[0] = ch;
r[1] = '\0';
return r;
}
/* comment.c: comment scanner driver */
#include <stdio.h>
#include <string.h>
#include "scan.h"
void main(void) {
char s[50];
for ( ; ; ) {
scan(s);
printf("Next comment:\"%s\"\n", s);
if (strcmp(s, "/*quit*/") == 0)
break;
}
}
|
/* scan.h: scanner header file */
void scan(char *s);
/* scan2.c: scanner implementation.
* Recognizes C-type comments
* Based on a finite-state automaton.
* No gotos here; uses state variable.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "scan.h"
char * makestr(char ch);
void scan(char *s) {
int state = 0;
char ch;
strcpy(s, "/*");
while (state != 4) {
switch (state) {
case 0: ch = getchar();
if (ch == '/') state = 1;
else state = 0; break;
case 1: ch = getchar();
if (ch == '*') state = 2;
else state = 0; break;
case 2: ch = getchar();
strcat(s, makestr(ch));
if (ch == '*') state = 3;
else state = 2; break;
case 3: ch = getchar();
strcat(s, makestr(ch));
if (ch == '*') state = 3;
else if (ch == '/') state=4;
else state = 2; break;
case 4: return; break;
} /* end of switch */
} /* end of while */
}
/* makestr: convert a char to a string */
char *makestr(char ch) {
char *r = (char *) malloc(2);
r[0] = ch;
r[1] = '\0';
return r;
}
/* comment.c: comment scanner driver */
#include <stdio.h>
#include <string.h>
#include "scan.h"
void main(void) {
char s[50];
for ( ; ; ) {
scan(s);
printf("Next comment:\"%s\"\n", s);
if (strcmp(s, "/*quit*/") == 0)
break;
}
}
| Run and Output |
Run and Output |
% cc -o comment comment.c scan.c
% cat comment.source
x = 47 / 13 * 12; /**/
/***/ /****/ /*/ ?? * / */
/* abc; */ /* over
two lines */ JUNK! 3.14159; /*quit*/
% comment < comment.source
Next comment:"/**/"
Next comment:"/***/"
Next comment:"/****/"
Next comment:"/*/ ?? * / */"
Next comment:"/* abc; */"
Next comment:"/* over
two lines */"
Next comment:"/*quit*/"
|
% cc -o comment2 comment.c scan2.c
% cat comment.source
x = 47 / 13 * 12; /**/
/***/ /****/ /*/ ?? * / */
/* abc; */ /* over
two lines */ JUNK! 3.14159; /*quit*/
% comment2 < comment.source
Next comment:"/**/"
Next comment:"/***/"
Next comment:"/****/"
Next comment:"/*/ ?? * / */"
Next comment:"/* abc; */"
Next comment:"/* over
two lines */"
Next comment:"/*quit*/"
|
Java source program that
recognizes C-style comments:
The program below is just a translation of the one on the right above, with
a little extra concerning redirection.
Java Sources:
Scan.java,
ScanTest.java
Java Program to Recognize Comments |
Java Driver Program, plus Run and Output |
/* Scan.java: scanner implementation.
* Recognizes C-type comments
* Based on a finite-state automaton.
*/
import java.io.*;
public class Scan {
Reader in; // internal filename, input stream
boolean fileOpen = false; // is file open?
String fileName; // input file, if present
// Scan(): constructor with no input filename
public Scan() {
fileName = "";
}
// Scan(String ): constructor with filename
public Scan(String f) {
fileName = f;
}
// getComment: recognize, return next comment
public String getComment() {
int state = 0; // start state for DFA
char ch = 0; // single-character buffer
String s = "/*";
while (state != 4) {
if (ch == 65535) { //end-of-file
System.out.println(
"Th-th-th-th-that's all folks");
System.exit(0);
}
switch (state) {
case 0: ch = getNextChar();
if (ch == '/') state = 1;
else state = 0; break;
case 1: ch = getNextChar();
if (ch == '*') state = 2;
else state = 0; break;
case 2: ch = getNextChar();
s += ch;
if (ch == '*') state = 3;
else state = 2; break;
case 3: ch = getNextChar();
s += ch;
if (ch == '*') state = 3;
else if (ch == '/') state = 4;
else state = 2; break;
case 4: return s;
} // end of switch
} // end of while
return s;
}
|
// getNextChar: fetches next char, opens input
private char getNextChar() {
if (!fileOpen) {
try {
fileOpen = true;
if (fileName == "")
in = new InputStreamReader(System.in);
else
in = new FileReader(fileName);
} catch (IOException e) {
System.out.println("Exception opening " +
fileName);
System.exit(1);
}
}
char ch = ' '; // keep compiler happy
try {
ch = (char)in.read();
} catch (IOException e) {
System.out.println("Exception reading char");
System.exit(1);
}
return ch;
}
}
// ScanTest: Test a scanner for C-style comments
import java.io.*;
public class ScanTest {
public static void main(String[] args) {
Scan scanner; // the scanner
String s;
// pass input file name if on command line
if (args.length > 0)
scanner = new Scan(args[0]);
else
scanner = new Scan();
while (true) {
// fetch and print the next comment
// end-of-file is detected inside Scan.java
s = scanner.getComment();
System.out.println("Next comment: " +
"\"" + s + "\"");
}
}
}
| Run and Output |
% javac Scan.java
% javac ScanTest.java
% cat -n comment.source
1 x = 47 / 13 * 12; /**/
2 /***/ /****/ /*/ ?? * / */
3 /* abc; */ /* over
4 two lines */ JUNK! 3.14159; /*quit*/
% java ScanTest (comment.source pasted)
x = 47 / 13 * 12; /**/
/***/ /****/ /*/ ?? * / */
/* abc; */ /* over
two lines */ JUNK! 3.14159; /*quit*/
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
two lines */"
Next comment: "/*quit*/" (control-D typed)
Th-th-th-that's all folks
|
% java ScanTest < comment.source (redirected)
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
two lines */"
Next comment: "/*quit*/"
Th-th-th-that's all folks
% java ScanTest comment.source (source file)
Next comment: "/**/"
Next comment: "/***/"
Next comment: "/****/"
Next comment: "/*/ ?? * / */"
Next comment: "/* abc; */"
Next comment: "/* over
two lines */"
Next comment: "/*quit*/"
Th-th-th-that's all folks
|
Revision date: 2012-12-25.
(Please use ISO 8601,
the International Standard Date and Time Notation.)
|