CS 3723 -- Writing and Reading Binary Trees

CS 3723 Programming Languages
Writing and Reading Binary Trees
Using Separate Programs

Overview: This example illustrates the use of a recursive descent parser to handle character input representing a binary search tree as a sequence of characters, using a "flattened" form similar to the syntax of Lisp. This is a toy example because the nodes of the tree are single characters (to keep the code simple). These single characters are the characters encountered in an input file. Nevertheless, the example shows how semantic actions can reconstruct the internal binary tree from the flattened form. In the initial creation of the binary search tree, the code creates the flattened form from the internal linked list form. The example would scale up to more complex input to form the initial binary search tree.

Details of the code: This example consists of two separate C programs:

search_tree.c:
- Reads input consisting of individual characters.
- Creates a binary search tree with each new character a new node of the tree. (If a character is encountered a second time, it is found in the tree and ignored.) The tree created is in internal linked list form.
- Outputs the tree using a lisp-like representation as a stream of characters. A null tree is represented as '0'. Other trees have the form: '(' character leftsubstree rightsubtree ')'
traverse_tree.c:
- Reads in a binary tree represented in a lisp-like representation as a stream of characters (see above).
- Converts the stream of character input back to the internal linked list representation of the binary tree.
- Uses an inorder traversal to print the nodes in inorder (that is, alphabetic order).

First the two programs are executed separately. Then the output of the first is piped into the second as input. This example shows how a program (search_tree.c above) can create a binary tree, "flatten" it and output it as a stream of characters. This output can then be piped into a second program which decodes the binary tree, that is, "unflattens" it. Finally an inorder traversal is carried out on this reconstructed binary tree.

Part of the point of this example is to illustrate how much better it would be if one could pipe a whole data structure from one process to another.

The programs: Here are the two programs: search_tree.c on the left and traverse_tree.c on the right:

search_tree.c: Construct binary tree, output traverse_tree.c: Reconstruct binary tree

`search_tree.c`: Construct binary tree, output	`traverse_tree.c`: Reconstruct binary tree
/* search_tree.c: read chars, store in binary / / tree and output the tree / #include <stdio.h> #include <ctype.h> #include <string.h> #include <stdlib.h> struct tnode { char ch; struct tnode left; struct tnode right; }; void addtree(struct tnode , char ); struct tnode newnode(char ); struct tnode talloc(void); void treeprint(struct tnode p); int main(void) { struct tnode root; char c; root = NULL; while ((c = getchar()) != '\n') { if (root == NULL) root = newnode(c); else addtree(root, c); } treeprint(root); printf("#\n"); return 0; } /* addtree: add a node, non-recursive / void addtree(struct tnode p, char c) { for (;;) { if ((p -> ch) == c) return; /* ignore / else if ((p -> ch) > c) { / go left / if ((p -> left) == NULL) { p -> left = newnode(c); return; } else p = p -> left; } else if ((p -> ch) < c) { / go right / if ((p -> right) == NULL) { p -> right = newnode(c); return; } else p = p -> right; } } } / newnode: fix up a new node / struct tnode newnode(char c) { struct tnode p = talloc(); p -> ch = c; p -> left = p -> right = NULL; return p; } / talloc: make a tnode / struct tnode talloc(void) { return (struct tnode ) malloc(sizeof(struct tnode)); } / treeprint: print tree in lisp-like form / void treeprint(struct tnode p) { if (p == NULL) printf("0"); else { printf("( %c ", p -> ch); treeprint(p -> left); printf(" "); treeprint(p -> right); printf(" )"); } }	/* traverse_tree.c: read in binary tree. * Grammar for input form: * bintree ---> tree '#' * tree ---> '0' \| '(' letter tree tree ')' * letter ---> 'a' \| 'b' \| 'c' \| ... / #include <stdio.h> #include <stdlib.h> #include <ctype.h> char next; struct tnode { char ch; struct tnode left; struct tnode right; }; struct tnode tree(void); void error(int); void scan(void); void inorder_traversal(struct tnode ); int main(void) { struct tnode root; scan(); root = tree(); /* read in binary tree / if (next != '#') error(1); / print in alpha order using traversal / inorder_traversal(root); printf("\n"); return 0; } struct tnode tree(void) { struct tnode p ; struct tnode q; struct tnode r; if (next == '0') { scan(); p = NULL; return p; } else { p = (struct tnode ) malloc(sizeof(struct tnode)); if (next == '(') scan(); else error(2); if (isalpha(next)) { p -> ch = next; scan(); } else error(3); q = tree(); r = tree(); if (next == ')') scan(); else error(4); p -> left = q; p -> right = r; return p; } } void scan(void) { while (isspace(next = getchar())) ; } void error(int n) { printf("\n*** ERROR: %i\n", n); exit(1); } /* inordertraversal: inorder print of tree p / void inorder_traversal(struct tnode p) { if (p != NULL) { inorder_traversal(p -> left); printf("%c", p -> ch); inorder_traversal(p -> right); } }


/* search_tree.c: read chars, store in binary */
/* tree and output the tree */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>

struct tnode {
   char ch;
   struct tnode *left;
   struct tnode *right;
};
void          addtree(struct tnode *, char );
struct tnode *newnode(char );
struct tnode *talloc(void);
void          treeprint(struct tnode *p);

int main(void) {
   struct tnode *root;
   char c;
   root = NULL;
   while ((c = getchar()) != '\n') {
      if (root == NULL) root = newnode(c);
      else addtree(root, c);
   }
   treeprint(root);
   printf("#\n");
   return 0;
}

/* addtree: add a node, non-recursive */
void addtree(struct tnode *p, char c) {
   for (;;) {
      if ((p -> ch) == c) return; /* ignore */
      else if ((p -> ch) > c) { /* go left */
         if ((p -> left) == NULL) {
            p -> left = newnode(c);
            return;
         }
         else p = p -> left;
      }
      else if ((p -> ch) < c) { /* go right */
         if ((p -> right) == NULL) {
            p -> right = newnode(c);
            return;
         }
         else p = p -> right;
      }
   }
}

/* newnode: fix up a new node */
struct tnode *newnode(char c) {
   struct tnode *p = talloc();
   p -> ch = c;
   p -> left = p -> right = NULL;
   return p;
}

/* talloc: make a tnode */
struct tnode *talloc(void) {
   return (struct tnode *)
        malloc(sizeof(struct tnode));
}

/* treeprint: print tree in lisp-like form */
void treeprint(struct tnode *p) {
   if (p == NULL) printf("0");
   else {
      printf("( %c ", p -> ch);
      treeprint(p -> left);
      printf(" ");
      treeprint(p -> right);
      printf(" )");
   }
}


/* traverse_tree.c: read in binary tree. 
 * Grammar for input form:
 *   bintree --->  tree '#'
 *   tree ---> '0' | '(' letter tree tree ')'
 *   letter ---> 'a' | 'b' | 'c' | ...
 */
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
char next;
struct tnode {
   char ch;
   struct tnode *left;
   struct tnode *right;
};

struct tnode *tree(void);
void error(int);
void scan(void);
void inorder_traversal(struct tnode *);

int main(void) {
   struct tnode *root;
   scan();
   root = tree(); /* read in binary tree */
   if (next != '#') error(1);
   /* print in alpha order using traversal */
   inorder_traversal(root); 
   printf("\n");
   return 0;
}

struct tnode *tree(void) {
   struct tnode *p ;
   struct tnode *q;
   struct tnode *r;
   if (next == '0') {
      scan();
      p = NULL;
      return p;
   }
   else {
      p = (struct tnode *)
         malloc(sizeof(struct tnode));
      if (next == '(')
         scan();
      else error(2);
      if (isalpha(next)) {
         p -> ch = next;
         scan();
      }
      else error(3);
      q = tree();
      r = tree();
      if (next == ')')
         scan();
      else error(4);
      p -> left = q;
      p -> right = r;
      return p;
   }
}

void scan(void) {
   while (isspace(next = getchar()))
      ;
}
void error(int n) {
   printf("\n*** ERROR: %i\n", n);
   exit(1);
}

/* inordertraversal: inorder print of tree p */
void inorder_traversal(struct tnode *p) {
   if (p != NULL) {
      inorder_traversal(p -> left);
      printf("%c", p -> ch);
      inorder_traversal(p -> right);
   }
}

Output of sample runs:

% cc -o search_tree search_tree.c
% cc -o traverse_tree traverse_tree.c
% search_tree
mtwtfssjfmamjjasond    (first letters of days and months -- dups ignored)
( m ( f ( a 0 ( d 0 0 ) ) ( j 0 0 ) ) ( t ( s ( o ( n 0 0 ) 0 ) 0 ) ( w 0 0 ) ) )#
% traverse_tree
( m ( f ( a 0 ( d 0 0 ) ) ( j 0 0 ) ) ( t ( s ( o ( n 0 0 ) 0 ) 0 ) ( w 0 0 ) ) )#
adfjmnostw
% search_tree | traverse_tree
mtwtfssjfmamjjasond
adfjmnostw

Here the programs have been redone so that no blanks are output and blanks are not skipped over on input. This will handle most input characters, including '0', ' ', '(', and ')', but not '#'. (It could also be fixed to not require '#' at the end.) The input routine in the second program is just next = getchar(); and the second program has dropped the isalpha(next) requirement.

Notice that on output, only actual input characters follow a left paren that is used to represent the tree. This is how the representation can handle any Ascii character, including '0', ' ', '(', and ')', and even a newline as shown at the end of this writeup.

The output routine in the first program is now:


/* treeprint: print tree in lisp-like form */
void treeprint(struct tnode *p) {
   if (p == NULL) printf("0");
   else {
      printf("(%c", p -> ch);
      treeprint(p -> left);
      treeprint(p -> right);
      printf(")");
   }
}

Here are the new results of test runs:

% cc -o search_tree2 search_tree2.c
% cc -o traverse_tree2 traverse_tree2.c
% search_tree2
dt$ *0)xy(8
(d($( 00)(*()((00)0)(00(800))))(t0(x0(y00))))#
% traverse_tree2
(d($( 00)(*()((00)0)(00(800))))(t0(x0(y00))))#
 $()*08dtxy
% search_tree2 | traverse_tree2
dt$ *0)xy(8
 $()*08dtxy

Finally, the search_tree2.c program has been modified to continue reading until end-of-file (using while ((c = getchar()) != EOF) { .... The resulting program uses as input the program search_tree2.c itself. Notice that the newline character is also present and is processed. (In the alphabetic order at the end, newline comes first, there are no other control characters, then comes a blank, followed by !"# -- all in the Ascii order. Also \ and _ come between the upper-case letters and lower-case letters, along with [, ], and ^ which are not in the input.)

% search_tree2 < search_tree2.c
(/(*( (
00)(#(!0("00))(((%00)()00))))(.(,0(-00))0))(s(e(a(_(:(000)(<(;00)(>(=00)(N(L(E0(F00))0)(U(O00)(\00))))))0)(c(b00)(d00)))(r(h(g(f00)0)(o(i0(n(l(k00)(m00))0))(p00)))0))(t0(y(u0(v0(w0(x00))))({(z00)(}00))))))#
% search_tree2 < search_tree2.c | traverse_tree2

 !"#%()*,-./0:;<=>EFLNOU\_abcdefghiklmnoprstuvwxyz{}

Revision date: 2004-11-02. (Please use ISO 8601, the International Standard.)

CS 3723 Programming Languages Writing and Reading Binary Trees Using Separate Programs

CS 3723 Programming Languages
Writing and Reading Binary Trees
Using Separate Programs