GENERATING BYTE CODE FROM JAVA AST'S An example AST for an integer arithmetic expression: y + x * (y - 3) ADD / \ VAR MUL y / \ / \ VAR SUB x / \ / \ VAR CONST y 3 Possible C representation of abstract syntax for integer expressions. typedef enum {ADD,SUB,MUL,DIV,LOCVAR,CONST} expkind; typedef struct { expkind kind; union { struct { Expression leftchild; Expression rightchild; } binary_e; struct { String id; } locvar_e; struct { int val; } const_e; } } *Expression; Possible C-like code for walking an expression tree and emitting JVM instructions. We assume a global environment mapping local variable id's to slot numbers. For clarity we emit the instructions as strings; in reality they would be emitted as numeric bytecodes. translate(Expression e) { switch (e->kind) { case ADD: { translate(e->leftchild); translate(e->rightchild); emit("iadd"); } case SUB: { translate(e->leftchild); translate(e->rightchild); emit("isub"); } ... case LOCVAR: { index = lookup(environment, e->id); emit ("iload"); emit (toString(index)); } case CONST: { emit ("ldc"); emit (toString(e->val)); } } An example AST involving statements: while (x < 5) { x = 3 - x; y++; } return y; SEQ / \ / \ WHILE \ / \ \ LT \ \ / \ \ RETURN VAR CONST \ \ x 5 \ VAR SEQ y / \ ASGN INCR x y | SUB / \ / \ CONST VAR 3 x typedef enum {SEQ, WHILE, ASGN, INCR, RETURN} stmtkind; typedef struct { stmtkind kind; union { struct { Stmt first; Stmt second; } seq_s; struct { Boolexp control; Stmt body; } while_s; struct { String var; Exp rhs; } asgn_s; struct { String var; } incr_s; struct { Exp retval; } return_s; } } *Stmt; } translate(Stmt s) { switch (s->kind) { case SEQ: { translate(s->first); translate(s->second); } case WHILE: { top = newlabel(); bottom = newlabel(); emit ("goto"); emit (bottom); emit (top + ":"); translate(s->body); emit (bottom + ":"); translate_boolexp(s->control,top); } case ASGN: { index = lookup(environment,s->var); translate(s->rhs); emit ("istore"); emit (toString(index)); } case INCR: { index = lookup (environment,s->var); emit ("iinc"); emit (toString(index)); emit ("1"); } case RETURN: { translate(s->retval); emit ("ireturn"); } } } Boolean expressions with exits if (x < y && !b) ... AND / \ LT NOT / \ | exp exp BVAR b typedef enum {EQ,NE,LE,LT,GE,GT,AND,OR,NOT,BVAR} bexpkind; typedef struct { bexpkind kind; union { struct { Expression leftchild; Expression rightchild; } relational_b; struct { Boolexp leftchild; Boolexp rightchild; } binary_b; struct { Boolexp child; } unary_b; struct { String var; } bvar_b; } } *Boolexp; Convention: branch to true_target if true; otherwise "fall through" translate_boolexp(Bexpression e,Label true_target) { switch (e->kind) { case EQ: { translate(e->leftchlid); translate(e->rightchild); emit("if_icmpeq"); emit(true_target); } ... case OR: { translate_boolexp(e->leftchild, true_target); translate_boolexp(e->rightchild, true_target); } case AND: { middle = newlabel(); bottom = newlabel(); translate_boolexp(e->leftchild, middle); emit ("goto"); emit (bottom); emit (middle + ":"); translate_boolexp(e->rightchild, true_target); emit (bottom + ":"); } case NOT: { lab = newlabel(); translate_boolexp(e->child, lab); emit ("goto"); emit (true_target); emit (lab + ":"); } case BVAR: { emit ("ldc"); emit (toString(e->val)); emit ("const_1"); emit ("if_icmpeq"); emit (true_target); } } } Other Issues: - Full Java has lots more statement and expression forms - Labels must be converted to actual numeric code offsets by backpatching. - Declarations are processed by modifying the var-to-offset environment. - Actual javac translation is sensitive to exact form of source. E.g., "i++" translates the same way as "i+=1" but not the same as "i = i + 1" - For source of full javac, see Sun's community source release or IBM's Jikes.