CS 3723 REs: Grammar & Parser

	CS 3723 Programming Languages
REs: Grammar & Parser

Grammar: Regular Expressions (without explicit concatenation operator +)
M ---> P '$' P ---> Q { '\|' Q } Q ---> R { '+' R } Q ---> R { R } (replacement) *R ---> S '' \| S S ---> '(' P ')' \| '@' \| '.' \| lower-case**

Regular Expressions: Parser and Translator to RPN

/* Re.java: reg. expr.,simple parser,
 * convert to RPN. ignore whitespace.
 * grammar:
 *   M ---> P '$'
 *   P ---> Q { '|' Q }
 *   Q ---> R { R }
 *   R ---> S { '*' }
 *   S ---> '(' P ')' | '@' | '.' | lc
 */
public class Re {
   private GetChar getChar;
   private char next;
   private String reRPN;

   public Re() {
      getChar = new GetChar();
      reRPN = "";
   }

   public String M() {
      scan();
      P();
      if (next != '$') error(2);
      // note: have end marker in Q
      else {
         gen('$');
         gen('\n');
      }
      return reRPN;
   }

   private void P() {
      Q();
      while (next == '|') {
         scan();
         Q();
         gen('|');
      }
   }

   private void Q() {
      R();
      while (next == '+') {
         scan();
         R();
         gen('+');
      }
   }

   private void R() {
      S();
      if (next == '*') {
         scan();
         gen('*');
         if (next == '*') error(1);
      }
   }

   private void S() {
      if (next == '(') {
         scan();
         P();
         if (next == ')') scan();
         else error(3);
      }
      else if (next == '@' || next == '.'){
         gen(next);
         scan();
      }
      else if (Character.isLetter(next)) {
         gen(next);
         scan();
      }
      else {
         error(4);
      }
   }

   private void scan() {
      while (Character.isWhitespace(next =
            getChar.getNextChar()))
         ;
   }

   private void error(int n) {
      System.out.println("ERR: " + n +
         ", next: " + next);
      System.exit(1);
   }

   private void gen(char ch) {
      reRPN = reRPN + ch;
      // System.out.println("\n" + reRPN);
   }

   public static void main(String[] args) {
      Re re = new Re();
      String res = re.M();
      System.out.println(res);
   }
}
   private void Q() {
      R();
      while (next != '*' && next != ')' &&
             next != '$' && next != '|') {
         R();
         gen('+');
      }
   }
   private void Q() {
      R();
      while (next == '(' || next == '.' ||
             Character.isLetter(next)) {
         R();
         gen('+');
      }
   }
// GetChar.java: fetch next char 
import java.io.*;
public class GetChar { 
   private Reader in; // input file
  
   // GetChar: constructor  
   public GetChar () { 
      in = new
         InputStreamReader(System.in);
   }

   // getNextChar: fetches next char
   public char getNextChar() {
      char ch = ' '; // for compiler
      try {
         ch = (char)in.read();
      } catch (IOException e) {
         System.out.println("Except.");
      }
      return ch;
   }
}
Output:
% java Re
(ab)*$
ab+*$

% java Re
(a|b)*abb$
ab|*a+b+b+$

% java Re
a*b*c*$
a*b*+c*+$

% java Re
.*(ab|aac)$
.*ab+aa+c+|+$

% java Re
.*(ab|ac)(ab|ac)*d$
.*ab+ac+|+ab+ac+|*+d+$

% java Re
(a|b)*(aa|bb)(a|b)*$
ab|*aa+bb+|+ab|*+$

% java Re
(ab)**$
ERR: 1, next: *

Revision date: 2014-04-14. (Please use ISO 8601, the International Standard.)