CS 3723
 Programming Languages 
   REs: Grammar & Parser 


Grammar: Regular Expressions
(without explicit concatenation operator +)
   M ---> P '$'
   P ---> Q { '|' Q }
   Q ---> R { '+' R }
   Q ---> R { R }  (replacement)
   R ---> S  '*'  |  S
   S ---> '(' P ')' | '@' | '.' | lower-case 

Regular Expressions: Parser and Translator to RPN
/* Re.java: reg. expr.,simple parser,
 * convert to RPN. ignore whitespace.
 * grammar:
 *   M ---> P '$'
 *   P ---> Q { '|' Q }
 *   Q ---> R { R }
 *   R ---> S { '*' }
 *   S ---> '(' P ')' | '@' | '.' | lc
 */
public class Re {
   private GetChar getChar;
   private char next;
   private String reRPN;

   public Re() {
      getChar = new GetChar();
      reRPN = "";
   }

   public String M() {
      scan();
      P();
      if (next != '$') error(2);
      // note: have end marker in Q
      else {
         gen('$');
         gen('\n');
      }
      return reRPN;
   }

   private void P() {
      Q();
      while (next == '|') {
         scan();
         Q();
         gen('|');
      }
   }

   private void Q() {
      R();
      while (next == '+') {
         scan();
         R();
         gen('+');
      }
   }

   private void R() {
      S();
      if (next == '*') {
         scan();
         gen('*');
         if (next == '*') error(1);
      }
   }

   private void S() {
      if (next == '(') {
         scan();
         P();
         if (next == ')') scan();
         else error(3);
      }
      else if (next == '@' || next == '.'){
         gen(next);
         scan();
      }
      else if (Character.isLetter(next)) {
         gen(next);
         scan();
      }
      else {
         error(4);
      }
   }

   private void scan() {
      while (Character.isWhitespace(next =
            getChar.getNextChar()))
         ;
   }
   private void error(int n) {
      System.out.println("ERR: " + n +
         ", next: " + next);
      System.exit(1);
   }

   private void gen(char ch) {
      reRPN = reRPN + ch;
      // System.out.println("\n" + reRPN);
   }

   public static void main(String[] args) {
      Re re = new Re();
      String res = re.M();
      System.out.println(res);
   }
}

private void Q() { R(); while (next != '*' && next != ')' && next != '$' && next != '|') { R(); gen('+'); } }
private void Q() { R(); while (next == '(' || next == '.' || Character.isLetter(next)) { R(); gen('+'); } }

// GetChar.java: fetch next char import java.io.*; public class GetChar { private Reader in; // input file // GetChar: constructor public GetChar () { in = new InputStreamReader(System.in); } // getNextChar: fetches next char public char getNextChar() { char ch = ' '; // for compiler try { ch = (char)in.read(); } catch (IOException e) { System.out.println("Except."); } return ch; } }
Output: % java Re (ab)*$ ab+*$ % java Re (a|b)*abb$ ab|*a+b+b+$ % java Re a*b*c*$ a*b*+c*+$ % java Re .*(ab|aac)$ .*ab+aa+c+|+$ % java Re .*(ab|ac)(ab|ac)*d$ .*ab+ac+|+ab+ac+|*+d+$ % java Re (a|b)*(aa|bb)(a|b)*$ ab|*aa+bb+|+ab|*+$ % java Re (ab)**$ ERR: 1, next: *


Revision date: 2014-04-14. (Please use ISO 8601, the International Standard.)