CS 3723
 Programming Languages 
 RE to NFA-ε 
"ReTrans" Programs

Java Automata Program
/* Re.java: regular expressions
 * simple parser -- convert to RPN
 * input ignores whitespace
 *
 * grammar:
 *   M ---> '/' P '/'
 *   P ---> Q { '|' T }
 *   Q ---> R { R }
 *   R ---> S { '*' }
 *   S ---> '(' M ')' | '@' | '.' | lower-case
 */
public class Re {
   private GetChar getChar;
   private char next;
   private String reRPN;

   public Re() {
      getChar = new GetChar();
      reRPN = "";
   }

   public String M() {
      scan();
      if (next == '/') {
         scan();
         P();
      }
      else error(1);
      if (next != '/') error(2);
      else {
         gen('$');
         gen('\n');
      }
      return reRPN;
   }

   private void P() {
      Q();
      while (next == '|') {
         scan();
         Q();
         gen('|');
      }
   }

   private void Q() {
      R();
      while (next != '*' && next != ')' &&
             next != '/' && next != '|') {
         R();
         gen('+');
      }
   }

   private void R() {
      S();
      while (next == '*') {
         scan();
         gen('*');
      }
   }

   private void S() {
      if (next == '(') {
         scan();
         P();
         if (next == ')') scan();
         else error(3);
      }
      else if (next == '@' || next == '.') {
         gen(next);
         scan();
      }
      else if (Character.isLetter(next)) {
         gen(next);
         scan();
      }
      else {
         error(4);
      }
   }

   private void scan() {
      while (Character.isWhitespace(next =
            getChar.getNextChar()))
         ;
   }

   private void error(int n) {
      System.out.println("*** ERROR: " + n +
         ", next: " + next);
      System.exit(1);
   }

   private void gen(char ch) {
      reRPN = reRPN + ch;
   }

   public static void main(String[] args) {
      Re re = new Re();
      String res = re.M();
      System.out.println(res);
   }
}

// GetChar: fetch next char import java.io.*; public class GetChar { private Reader in; // input stream // GetChar: constructor public GetChar () { in = new InputStreamReader(System.in); } // getNextChar: fetches next char public char getNextChar() { char ch = ' '; // for compiler try { ch = (char)in.read(); } catch (IOException e) { System.out.println("Read Except"); } return ch; } }
// ST.java: // holds pair: start, term public class ST { int s; // start int t; // terminal public ST (int start, int terminal) { s = start; t = terminal; } }
// ReTrans.java: input RE in RPN form
public class ReTrans {
   private Re re; // trans. input RE to RPN
   public String reRPN; // RE in RPN
   private GetChar getChar;
   private Auto auto;
   private ST st;
   private ST st1;
   private ST st2;
   private STstack sts; // stack for pairs
   private char ch;
   private int currNode; // to build NFA

   public ReTrans() {
      re = new Re();
      reRPN = re.M(); // input reg expr in RPN
      getChar = new GetChar();
      auto = new Auto(100); // NFA
      sts = new STstack(30); // stack of pairs
      currNode = 0;
   }

   public void trans() { // trans RPN to NFA-eps
      // input from RE in RPN form: reRPN
      int w, x, y, z, u, v;
      int loc = 0;  // move through reRPN
      while ((ch = reRPN.charAt(loc)) != '$') {
         if (Character.isLetter(ch) || 
              ch == '@' || ch == '.') {
            auto.insert(currNode,ch,currNode+1);
            sts.push(currNode, currNode + 1);
            currNode += 2;
         }
         else if (ch == '+') {
            st2 = sts.pop();
            st1 = sts.pop();
            w = st1.s; 
            x = st1.t;
            y = st2.s;
            z = st2.t;
            auto.insert(x, '@', y);
            sts.push(w, z);
         }
         else if (ch == '|') {
            st2 = sts.pop();
            st1 = sts.pop();
            w = st1.s; 
            x = st1.t;
            y = st2.s;
            z = st2.t;
            u = currNode++;
            v = currNode++;
            auto.insert(u, '@', w);
            auto.insert(u, '@', y);
            auto.insert(x, '@', v);
            auto.insert(z, '@', v);
            sts.push(u, v);
         }
         else if (ch == '*') {
            st1 = sts.pop();
            w = st1.s; 
            x = st1.t;
            u = currNode++;
            v = currNode++;
            auto.insert(u, '@', w);
            auto.insert(u, '@', v);
            auto.insert(x, '@', v);
            auto.insert(x, '@', w);
            sts.push(u, v);
         }
         loc++;
      }
      System.out.println();
      auto.setStart(sts.getStart(0));
      auto.setTerminal(sts.getTerm(0));
      auto.setMaxStates(currNode);
      auto.printA(currNode);
      sts.printStack();
      System.out.println("Auto: start: " +
         sts.getStart(0) + ", term: " +
         sts.getTerm(0));
      System.out.println("Total states: " +
         currNode);
      String inNFA = "";
      while ((ch = getChar.getNextChar())!='$')
         inNFA = inNFA + ch;
      inNFA = inNFA + '$';
      auto.execNFA(inNFA);
   }
            
   private char scan() { // for match string
      char next;
      while (Character.isWhitespace(next =
            getChar.getNextChar()))
         ;
      return next;
   }

   public static void main(String[] args) {
      ReTrans tr = new ReTrans();
      System.out.println(tr.reRPN);
      tr.trans();
   }
}
// STstack.java: // stack of pairs public class STstack { private int size; private ST[] ststack; // main data structure private int top; public STstack(int s) { size = s; ststack = new ST[size]; top = 0; } public void push(ST st) { ststack[top++] = st; } public void push(int s, int t) { ST st = new ST(s, t); ststack[top++] = st; } public ST pop() { if (top == 0) return null; return ststack[--top]; } public int size() { return top; } public int getStart(int i) { return ststack[i].s; } public int getTerm(int i) { return ststack[i].t; } public void printST(ST st) { System.out.println("[" + st.s + "," + st.t + "]"); } public void printStack() { for (int i = 0; i < top; i++) { System.out.print(i + " "); System.out.println("[" + ststack[i].s + "," + ststack[i].t + "]"); } } }

Runs of the Program (Red is user input)
/.*(ab|aac)/
.*ab+aa+c+|+$

   0 --> [., 1]
   1 --> [@, 0] --> [@, 3]
s  2 --> [@, 3] --> [@, 0]
   3 --> [@,14]
   4 --> [a, 5]
   5 --> [@, 6]
   6 --> [b, 7]
   7 --> [@,15]
   8 --> [a, 9]
   9 --> [@,10]
  10 --> [a,11]
  11 --> [@,12]
  12 --> [c,13]
  13 --> [@,15]
  14 --> [@, 8] --> [@, 4]
t 15 --> 

0 [2,15]
Auto: start: 2, term: 15
Total states: 16
cabcaaacabac$
execNFA: execStr: cabcaaacabac$, states: 100
 |                     1 1 1 1 1 1 
 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 
 | - - - - - - - - - - - - - - - - 
 : 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0  s
 : 1 0 1 1 1 0 0 0 1 0 0 0 0 0 1 0  s
c: 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0  
b: 1 1 0 1 1 0 0 1 1 0 0 0 0 0 1 1  t
c: 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0  
c: 1 1 0 1 1 0 0 0 1 0 0 0 0 1 1 1  t
a: 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0  
b: 1 1 0 1 1 0 0 1 1 0 0 0 0 0 1 1  t
a: 1 1 0 1 1 1 1 0 1 1 1 0 0 0 1 0  
c: 1 1 0 1 1 0 0 0 1 0 0 0 0 0 1 0  
/.*(ab|ac)(ab|ac)*d/
.*ab+ac+|+ab+ac+|*+d+$

   0 --> [., 1]
   1 --> [@, 0] --> [@, 3]
s  2 --> [@, 3] --> [@, 0]
   3 --> [@,12]
   4 --> [a, 5]
   5 --> [@, 6]
   6 --> [b, 7]
   7 --> [@,13]
   8 --> [a, 9]
   9 --> [@,10]
  10 --> [c,11]
  11 --> [@,13]
  12 --> [@, 8] --> [@, 4]
  13 --> [@,24]
  14 --> [a,15]
  15 --> [@,16]
  16 --> [b,17]
  17 --> [@,23]
  18 --> [a,19]
  19 --> [@,20]
  20 --> [c,21]
  21 --> [@,23]
  22 --> [@,18] --> [@,14]
  23 --> [@,22] --> [@,25]
  24 --> [@,25] --> [@,22]
  25 --> [@,26]
  26 --> [d,27]
t 27 --> 

0 [2,27]
Auto: start: 2, term: 27
Total states: 28
aabacacdccacabddaadcad$
execNFA: execStr: aabacacdccacabddaadcad$, states: 100
 |                     1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 
 | 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 
 | - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
 : 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  s
 : 1 0 1 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  s
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
b: 1 1 0 1 1 0 0 1 1 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 1 1 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0  
c: 1 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0  
c: 1 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 0 0 1 0 0 1 1 1 1 1 1 0  
d: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1  t
c: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
c: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
c: 1 1 0 1 1 0 0 0 1 0 0 1 1 1 1 0 0 0 1 0 0 0 1 0 1 1 1 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0  
b: 1 1 0 1 1 0 0 1 1 0 0 0 1 1 1 0 0 1 1 0 0 0 1 1 1 1 1 0  
d: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1  t
d: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
d: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
c: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
a: 1 1 0 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  
d: 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  



Revision date: 2013-08-01. (Please use ISO 8601, the International Standard Date and Time Notation.)