DIRECTORY Rope USING [ROPE]; IntCodeDefs: CEDAR DEFINITIONS = BEGIN Align: TYPE = NAT; Offset: TYPE = INT; Count: TYPE = INT; LogicalId: TYPE = INT; nullLogicalId: LogicalId = FIRST[LogicalId]; ByteSequence: TYPE = Rope.ROPE; nullByteSequence: ByteSequence = NIL; Word: TYPE = PACKED ARRAY [0..bitsPerWord) OF BOOL; bitsPerWord: NAT = 32; zerosWord: Word = LOOPHOLE[INT[0]]; onesWord: Word = LOOPHOLE[INT[-1]]; VariableId: TYPE = INT; nullVariableId: VariableId = 0; FileId: TYPE = INT; nullFileId: FileId = 0; Node: TYPE = REF NodeRep; nullNode: Node = NIL; NodeRep: TYPE = RECORD [ bits: Count 0, details: SELECT kind: NodeKind FROM var => [ flags: VariableFlags nullVariableFlags, -- flags for the variable id: VariableId nullVariableId, -- the unique id location: Location NIL], -- the location (NIL if not yet known) const => [ data: SELECT kind: ConstKind FROM word => [word: Word], bytes => [align: Align, bytes: ByteSequence], refLiteral => [litKind: RefLitKind, contents: ByteSequence], numLiteral => [class: ArithClass, contents: ByteSequence], ENDCASE], block => [nodes: NodeList], decl => [var: Var, init: Node], enable => [handle: Handler, scope: NodeList], assign => [lhs: Var, rhs: Node], cond => [cases: CaseList], label => [label: Label], goto => [dest: Label, backwards: BOOL FALSE], apply => [ proc: Node, -- designates the procedure to be applied args: NodeList NIL, -- the values to which the lambda is applied handler: Handler NIL], -- the handler to call on an error lambda => [ parent: Label, -- the proc parent (NIL if outer level) descBody: Var, -- the proc desc body (NIL if outer level or catch phrase) kind: LambdaKind, -- the kind of procedure bitsOut: INT, -- bits returned formalArgs: VarList, -- the formal argument values body: NodeList], -- the body that yields a value (via return nodes) return => [rets: NodeList], oper => [oper: Oper], machineCode => [bytes: ByteSequence], module => [vars: VarList, procs: NodeList], source => [source: SourceRange, nodes: NodeList], comment => [bytes: ByteSequence], ENDCASE]; NodeKind: TYPE = { var, const, block, decl, enable, assign, cond, label, goto, apply, lambda, return, oper, machineCode, module, source, comment}; ConstKind: TYPE = {word, bytes, refLiteral, numLiteral}; LambdaKind: TYPE = { outer, -- normal outer-level procedure inner, -- normal inner-level procedure install, -- installation procedure init, -- initialization procedure catch, -- catch procedure scope, -- enable scope procedure fork, -- fork base procedure unknown}; -- unknown kind of procedure NodeList: TYPE = REF NodeListRep; nullNodeList: NodeList = NIL; NodeListRep: TYPE = RECORD [ first: Node NIL, -- the first node in execution order rest: NodeList NIL]; -- the rest of the nodes in this block VarList: TYPE = REF VarListRep; nullVarList: VarList = NIL; VarListRep: TYPE = RECORD [first: Var, rest: VarList]; Var: TYPE = REF VarRep; nullVar: Var = NIL; VarRep: TYPE = NodeRep.var; VariableFlags: TYPE = PACKED ARRAY VariableFlag OF BOOL; nullVariableFlags: VariableFlags = ALL[FALSE]; VariableFlag: TYPE = { frequent, -- TRUE iff the var is used "frequently" used, -- TRUE iff the var is used outside of a decl constant, -- TRUE iff the var cannot change its value addressed, -- TRUE iff this var referenced by @ assigned, -- TRUE iff the var is assigned after declaration upLevel, -- TRUE iff the var is referenced up level notRegister, -- TRUE iff the var is forced into memory named -- TRUE iff the var is named }; Handler: TYPE = REF HandlerRep; HandlerRep: TYPE = RECORD [context: Node, proc: Node]; Label: TYPE = REF LabelRep; nullLabel: Label = NIL; LabelRep: TYPE = PACKED RECORD [ id: LogicalId, -- a unique id for the label node: Node, -- the node that defines the label backTarget: BOOL, -- TRUE if this label is the target of a backwards goto node jumpedTo: BOOL, -- TRUE if this label is used by a goto used: BOOL -- TRUE if this label is used by anything (other than a label node) ]; Location: TYPE = REF LocationRep; nullLocation: Location = NIL; LocationRep: TYPE = RECORD [ SELECT kind: LocationKind FROM system => [id: LogicalId], globalVar => [id: LogicalId], localVar => [id: LogicalId, parent: Label], register => [id: LogicalId], link => [id: LogicalId], stack => [offset: Offset], deref => [addr: Node, align: Align], indexed => [base: Node, index: Node], field => [base: Node, start: Offset, cross: BOOL FALSE], upLevel => [link: Var, reg: Var, format: LogicalId 0], composite => [parts: NodeList], escape => [id: LogicalId, base: Node, offset: Offset], dummy => NULL, ENDCASE]; LocationKind: TYPE = {system, globalVar, localVar, register, link, stack, deref, indexed, field, upLevel, composite, escape, dummy}; CaseList: TYPE = REF CaseListRep; nullCaseList: CaseList = NIL; CaseListRep: TYPE = RECORD [ tests: NodeList, -- NIL => ELSE or ENDCASE body: Node, -- the node to execute when the test is true rest: CaseList]; -- the rest of the cases (NIL if none) SourceRange: TYPE = RECORD [ start: Offset, -- the first character index for the range chars: Count, -- the number of characters for the range file: FileId]; -- the file where the source originated nullSourceRange: SourceRange = [0, 0, 0]; RefLitKind: TYPE = {rope, atom, refText, other}; Oper: TYPE = REF OperRep; nullOper: Oper = NIL; OperRep: TYPE = PACKED RECORD [ SELECT kind: OperKind FROM code => [label: Label, offset: Offset, direct: BOOL], arith => [class: ArithClass, select: ArithSelector], boolean => [class: BoolClass, bits: Count 1], convert => [to: ArithClass, from: ArithClass], check => [class: ArithClass, sense: Comparator], compare => [class: ArithClass, sense: Comparator], mesa => [mesa: MesaSelector, info: INT], cedar => [cedar: CedarSelector, info: INT], escape => [escape: LogicalId, info: INT], ENDCASE]; OperKind: TYPE = {code, arith, boolean, convert, check, compare, mesa, cedar, escape}; ArithSelector: TYPE = {add, sub, mul, div, mod, pow, abs, neg, min, max}; ArithClass: TYPE = PACKED RECORD [kind: ArithClassKind, checked: BOOL, precision: ArithPrecision]; ArithClassKind: TYPE = MACHINE DEPENDENT { signed (0), unsigned (1), address (2), real (3), firstExtension (4), lastExtension (15)}; ArithPrecision: TYPE = [0..256]; BoolClass: TYPE = {and, not, or, xor}; Comparator: TYPE = {eq, lt, le, ne, ge, gt}; MesaSelector: TYPE = { addr, all, equal, notEqual, nilck, alloc, free, fork, join, monitorEntry, monitorExit, notify, broadcast, wait, unnamedError, unwindError, abortedError, uncaughtError, boundsError, narrowFault, signal, error, unwind, resume, reject, copyGlobal, startGlobal, restartGlobal, stopGlobal, checkInit, globalFrame }; CedarSelector: TYPE = { simpleAssign, simpleAssignInit, complexAssign, complexAssignInit, new, code, narrow, referentType, procCheck }; ConstNode: TYPE = REF NodeRep.const; WordConstNode: TYPE = REF NodeRep.const.word; BytesConstNode: TYPE = REF NodeRep.const.bytes; RefLiteralConstNode: TYPE = REF NodeRep.const.refLiteral; BlockNode: TYPE = REF NodeRep.block; DeclNode: TYPE = REF NodeRep.decl; EnableNode: TYPE = REF NodeRep.enable; AssignNode: TYPE = REF NodeRep.assign; CondNode: TYPE = REF NodeRep.cond; LabelNode: TYPE = REF NodeRep.label; GotoNode: TYPE = REF NodeRep.goto; ApplyNode: TYPE = REF NodeRep.apply; LambdaNode: TYPE = REF NodeRep.lambda; ReturnNode: TYPE = REF NodeRep.return; OperNode: TYPE = REF NodeRep.oper; MachineCodeNode: TYPE = REF NodeRep.machineCode; ModuleNode: TYPE = REF NodeRep.module; SourceNode: TYPE = REF NodeRep.source; CommentNode: TYPE = REF NodeRep.comment; SystemLocation: TYPE = REF LocationRep.system; GlobalVarLocation: TYPE = REF LocationRep.globalVar; LocalVarLocation: TYPE = REF LocationRep.localVar; RegisterLocation: TYPE = REF LocationRep.register; LinkLocation: TYPE = REF LocationRep.link; StackLocation: TYPE = REF LocationRep.stack; DerefLocation: TYPE = REF LocationRep.deref; IndexedLocation: TYPE = REF LocationRep.indexed; FieldLocation: TYPE = REF LocationRep.field; UpLevelLocation: TYPE = REF LocationRep.upLevel; CompositeLocation: TYPE = REF LocationRep.composite; EscapeLocation: TYPE = REF LocationRep.escape; DummyLocation: TYPE = REF LocationRep.dummy; CodeOper: TYPE = REF OperRep.code; ArithOper: TYPE = REF OperRep.arith; BooleanOper: TYPE = REF OperRep.boolean; ConvertOper: TYPE = REF OperRep.convert; CheckOper: TYPE = REF OperRep.check; CompareOper: TYPE = REF OperRep.compare; MesaOper: TYPE = REF OperRep.mesa; CedarOper: TYPE = REF OperRep.cedar; EscapeOper: TYPE = REF OperRep.escape; END. ; IntCodeDefs.mesa Copyright 1985, 1986, 1987, 1988, 1989, 1991, 1993 by Xerox Corporation. All rights reserved. Russ Atkinson (RRA) November 20, 1989 6:19:56 pm PST Christian Jacobi, May 4, 1993 6:46 pm PDT IntCodeDefs Mimosa intermediate code definitions These definitions define a code graph that is easy to generate given the abstract syntax tree. The graph becomes simplified as the code generation progresses. For example, it starts out with nested procedures, but those could be turned into simple procedures by adding the static link as an implicit argument. Comments in this smaller font are meant to indicate fine points. Types from external sources & simple equates gives the required alignment in bits (0 => don't care) may be negative may NOT be negative may be negative This would be LONG STRING in the XDE world. For target values that are 32 bits or less. Types from previous phases of the compiler A VariableId uniquely identifies a variable. The code generator is free to place the variable wherever it is necessary, but must report the location(s) through to the output for debugging purposes. A VariableId is useful for identifying which variable is being reported. A FileId designates a file used in the compilation. The nullFileId indicates the source file being compiled. Node: the basic building block Nodes are used to represent modules, declarations, statements and expressions. NIL is used to represent no value, or no destination, or nothing to do. bits for the value (0 if no value) Var nodes represent variables, parts of variables, or even composite variables. For constants that are 32 bits (or less) For constants that are more than 32 bits (with specified alignment) For ROPE, ATOM, or REF TEXT literals For number literals (where conversion is target dependent) The value of a block node is the value of the last node in the node list. All local variables declared inside of a block disappear at the end of the block. This is a fast special case for live/dead variable analysis, and may be the only one supported for naive code generation. A decl node declares a variable in the current context (local or global) and assigns it from the initialization. The variable is assigned in the same way that assignment is performed in the assign node. For Dragon (and maybe some other machines) a local variable can be reserved and initialized with the same instruction. Using the init field can save us some analysis to discover this fact. This declaration lasts until the end of the enclosing block (there must be one). Whenever a signal or error is raised in the given scope the node given by the handle is called via the signaller (the node should be a label node for a procedure). The scope list may contain decls, since it is treated as a block. All declarations local to the scope disappear at the end of the scope. An assignment moves the value calculated by rhs into the variable named by lhs. This node is not for counted assignments, which are handled by cedar oper nodes. The cond node is used for conditional statements and expressions, including those resulting from IF, SELECT, AND, OR. To simplify the encoding for IF statements and Mesa SELECT statements, there is an implicit jump to the node after the cond node from the end of every case in the cases list. Labels are the only nodes that can introduce cycles into the program graph.The value of a label node is the value of label.label.node. The backwards field is used to indicate a LOOP or RETRY, which is information we know early and might like to retain to help the code generator. It does not necessarily denote the direction of the branch in the machine code, since the code generators are free to emit code in any legitimate order. The jump indicated by a goto node can cross block boundaries, but not frame boundaries. Uplevel GO TO statements are currently limited to signal handlers, and must use the unwind primitive to cause the signaller to remove the frames. If we allow uplevel GO TO statements to be outside of signal handlers, then we will probably implement them through signals anyway. The apply node is used to apply procedures or system primitives to arguments. The handler has a scope that only applies to the procedure call itself, not to the argument evaluation. A few special cases do not fully evaluate their arguments before the application. Lambda nodes are used to represent procedure bodies. If a procedure is nested (parent # NIL) then it uses its last argument (the closure link) as a means to get to the static link (by subtracting the offset at entry). If not nested (parent = NIL), a procedure will discard the closure link if called through its indirect entry, and will have no closure link if called directly. procedure return {no value} The rets list always specifies all words being returned. the given primitive operation as a value {value is applicable} Oper exprs are used to encode system primitives, including arithmetic. They are usually applied, only a few may have any implementation as first-class values. specify the machine code directly {value is applicable, but not storable} MACHINE CODE procedures look much like INLINE procedures. They don't exist as first-class values. They expect their arguments uniformly as values, and in standard positions (e.g. on the stack for PrincOps & Dragon). Applying a machine code node causes the arguments to be put in a standard place and the returns taken from a standard place (as if for a procedure call). A machine code node occuring in any other place just causes the bytes to be emitted verbatim. For the C target the bytes specify the name of the C procedure to call. defines a module {value is a PROGRAM value} A module node is used for a PROGRAM or MONITOR. The vars are the variables in the global frame. The procs list is a sequence of outermost procedure declarations (the first one is always the initialization procedure). nodes associated with a given source range {value is value of last node} If we choose to retain only the granularity of current Cedar/Mesa then we only need one of these around every leaf-level statement/declaration. We use a list of nodes since a single leaf-level source statement may expand into several nodes. a comment (no value) This is useful during the debugging phase, since we can add arbitrary comments as debugging information in the tree and still get them printed out. For now, comments only occur in lambda bodies, module proc lists, and block lists. Specifies the handler for an error Locations A location denotes a logical or physical address. At various points in code generation a variable may have various locations, depending on how machine-specific the code generation has become. For example, a local variable may start out with a localVar kind of location, and wind up with a register kind of location. Many location variants have been chosen to encode the various kinds of left-hand-sides possible for assignments. In particular, local variables, global variables, pointer dereference, array indexing, field selection, and external links have been included. in system vector (id denotes which item) A system vector location specifies the start of some variable at some logical place in the system vector. For some targets, such as PrincOps, the id will be the physical offset from some known location or register. For other targets, such as Dragon, the system vector need not even be contiguous, and the loader has the responsibility of fixing up the address. in global frame (id denotes which segment) A globalVar location specifies the start of some contiguous segment of global frame. Other tables in the system must indicate the usage statistics, the offset from global frame start, and so forth. in local frame (id denotes which segment) A localVar location specifies the start of some contiguous segment of local frame. Other tables in the system must indicate the lexical level, the usage statistics, the offset from local frame start, and so forth. The parent gives the label for the procedure that defines the variable. a machine register (id denotes which register) A register location specifies the start of some contiguous segment of register space. For some targets this may not be relevant. link to external value (e.g. an interface record) A link location specifies that a variable is found through some external linkage. This will be particularly true for links to items in interfaces, but it may also be to something that the loader sets up, such as a literal table. in local argument stack (offset is in AUs from logical base) A stack location specifies a location in the stack for the local frame. For PrincOps this will denote the eval stack. For Dragon it will denote the EU register stack (aliased with the local registers). roughly addr^ A deref location specifies dereferencing through a pointer, and is used in REF & POINTER dereferencing. The align parameter describes the alignment that can be assumed for the pointer. This is useful on machines where the alignment constraint can determine the load or store operation(s) used. roughly (@base+index*((bits+bitsPerWord-1)/bitsPerWord))^ An indexed location specifies a location that is the result of array or sequence indexing. The bits field in the Node object specifies the field width. The indexed variant is used for array and sequence indexing, either packed and unpacked. sub-field of the base (start is a bit offset) A field location specifies a record component, although it is also used for array indexing by constants. The "cross" bool value is added to express the xfield variant used in the little endian compiler. It is NOT used by todays big endian compiler. At this time I do not understand its real usage but include the functionality to allow to incrementally merge the two compiler versions. ChJ, May 4, 1993 reg indicates a particular register in the bank addressed by link An upLevel location is used to represent up-level indexing before the final assignment of the register number is known. The link is an address, and the reg is the offset (the bit offset depends on the width of the registers). The format is present to cover the distinct possibility that different registers sets may be stored in different ways. composite location A composite location indicates a location that need not be contiguous. This is useful for record constructors (V _ [a: E1, b: E2, ...]) and extractors ([a: V1, b: V2, ...] _ E). An escape location is a means for generating code in the event that the other cases do not quite cover certain machines. As such it resembles a kitchen sink. for a dummy source, the value is 0 for a dummy destination, the value is flushed Supporting Cast Kind of REF Literal specifiable via a sequence of bytes. In some (possibly all) implementations REF literals are handled specially by the loader to put these contents into objects of the target machine format. The compiler should know nothing about the prefixes of objects, or quantization of object sizes, or similar details of the SafeStorage implementation. The value of a code oper node is the given code address. This kind of oper is typically used to denote a procedure in the module being compiled. If the label is for a procedure and direct = FALSE, then the address is for the procedure variable entry point. If the label is for a procedure and direct = TRUE, then the address is for the direct entry point. If the label is not for a procedure then direct is not meaningful. For the built-in arithmetic operators. For the various boolean operations. For conversion between the various arithmetic classes. For bounds checking. This operator takes either two arguments, and tests according to the sense. A bounds check is raised when the sense is FALSE, and the first argument is returned when the sense is TRUE. For arithmetic comparisons with a given class. This operator takes either two arguments or three arguments according to the sense. A boolean is returned according to the sense of the comparison. For Mesa system operations (see below). For Cedar system operations (see below). The escape clause is for special cases and extensions. ArithSelector denotes various basic arithmetic operations. kind: indicates the kind of arithmetic (signed, unsigned, real, ...) checked: TRUE => perform overflow checking, FALSE => no overflow checking precision: number of bits of precision For MESA arithmetic For other languages signed is for INT, unsigned is for CARD, address is for POINTER, real is for REAL Precision is given in bits. 0 denotes arbitrary-precision arithmetic (e.g. Lisp bignums). In the descriptions, arg1 is the argument at index 0, arg2 is the argument at index 1, and so on. returns address of arg1 returns ALL[arg1] (count is given by arg2) primitive equality of arg1 & arg2, (info gives # of bits) raise fault if arg1 = NIL allocation of frame extension (arg1 gives # of words) free of frame extension (arg1 gives FX address) PROCESS primitives (FORK & JOIN) MONITOR primitives (ENTRY procs, NOTIFY, BROADCAST, WAIT) built-in errors (for unnamed ERROR, UNWIND, ABORTED, UNCAUGHT, bounds check, narrow fault) (note: not applicable) signaller primitives (SIGNAL, ERROR, (CONTINUE, RETRY, GO TO), RESUME, REJECT) global frame primitives (NEW, START, RESTART, STOP, start traps) the address of the current global frame (not applicable) A CedarSelector is usually applied by name to allow for special implementations. It should also be possible to apply it by reference. The basic Cedar operators are separate to make it easier to omit these features for XDE versions of the Mimosa compiler. lhs _ rhs increments source RCs, decrements dest RCs, and moves the word lhs is first argument, and should be the address of the destination rhs is second argument lhs _ rhs like simpleAssign, but the destination RCs are not decremented lhs _ rhs increments source RCs, decrements dest RCs, and moves the words lhs is first argument, and should be the address of the destination rhs is second argument type of lhs is given by the third argument # of bits given by fourth argument lhs _ rhs like complexAssign, but the destination RCs are not decremented NEW[T] T is first arg, nWords is second arg (only used for zone-less NEW) for CODE[T] info gives the internal type index for T for NARROW[ref, T] ref is first arg, T is second arg; This operation raises a NarrowRefFault if ref # NIL and referentType[ref] # T. get the referent type from a REF (first arg) This operation is used in WITH ref SELECT ... statements and ISTYPE expressions. NIL has a reserved referent type that never matches the type of an object. check a procedure (first arg) for assignability in the safe language A procedure descriptor must contain enough information to be able to check for safe assignment. In systems that do not support retained frames all nested procedures are not safely assignable to locations that may be longer lived than the procedures. For Dragon, a procedure descriptor is always a pointer to the starting PC of the procedure. The word following the PC could be used to indicate the assignability of the procedure. Abbreviations Abbreviations for Node variants Abbreviations for locations Abbreviations for Oper objects NewlineDelimiter (cedarcode) styleStyleDefBeginStyle (Cedar) AttachStyle (note) "for fine points" { quote 8 bp size 12 bp topLeading 10 bp leading 16 bp bottomLeading } StyleRule EndStylecodeK eU`K4K)KKk title Isubtitle$head  &JJInote@@I pagebreak,,bJ6JJJJJ K ,JJ K!%J+J 3K K#K#J++K** K,,OKJmmPNOH K2.5 in tabStopsK2.5 in tabStops1.2 in tabStops2.0 in tabStopsJ2.0 in tabStops""1.0 in tabStops #2.0 in tabStopsK3.5 in tabStops*cCK3.5 in tabStops!13.5 in tabStopsckAO2.0 in tabStopsOO2.0 in tabStops 1.2 in tabStops!1.2 in tabStopsJ1.2 in tabStops(1.2 in tabStops--J1.2 in tabStopsCC1.2 in tabStops<>O2.0 in tabStops1.0 in tabStops%%2.0 in tabStopsIIO2.0 in tabStopsO2.0 in tabStopsO2.0 in tabStopsGG1.0 in tabStops++1.0 in tabStops+O1.0 in tabStops1.0 in tabStops111.0 in tabStopsHHO1.0 in tabStops1.0 in tabStops!!1.0 in tabStopsO1.0 in tabStopsK1.0 in tabStops K1.0 in tabStopsK )8K K&K&K "K!KK KK &KK1.0 in tabStops !K2.5 in tabStopsK2.5 in tabStops1.0 in tabStops K1.0 in tabStops$7K1.0 in tabStops&=J1.0 in tabStops KKK 6KKKKK 8K#.K2.0 in tabStops K2.0 in tabStops !2K2.0 in tabStops&3K2.0 in tabStops $5K2.0 in tabStops /K2.0 in tabStops *;K2.0 in tabStops #3K2.0 in tabStops "6K2.0 in tabStops"K2.0 in tabStopsK2.0 in tabStopsK  6J""K KK1.6 in tabStops K1.6 in tabStops+K1.6 in tabStops ".K1.6 in tabStops K @ K8KK O  O O?? O O??OBB O((Orr  ,O DDOOKP  $K2.5 in tabStops-K2.5 in tabStops/K2.5 in tabStops9K $K "K &K &K "K $K "K $K &K &K "K0K &K &K (K2.5 in tabStopsn.K2.5 in tabStops4K2.5 in tabStops2K2.5 in tabStops2K2.5 in tabStops *K2.5 in tabStops ,K2.5 in tabStops ,K2.5 in tabStops0K2.5 in tabStops ,K2.5 in tabStops0K2.5 in tabStops4K2.5 in tabStops.K2.5 in tabStops ,K2.5 in tabStops "K2.5 in tabStops $K2.5 in tabStops (K2.5 in tabStops (K2.5 in tabStops $K2.5 in tabStops (K2.5 in tabStops "K2.5 in tabStops $K2.5 in tabStops &NP!h}>