;;; **********************************************************************
;;; This code was written as part of the Spice Lisp project at
;;; Carnegie-Mellon University, and has been placed in the public domain.
;;; Spice Lisp is currently incomplete and under active development.
;;; If you want to use this code or any part of Spice Lisp, please contact
;;; Scott Fahlman (FAHLMAN@CMUC). 
;;; **********************************************************************
;;;
;;; The  DISASSEMBLE function as described in the Common Lisp manual.
;;; 
;;; Written by Don Mathis
;;;
;;;
;;; Modified 11/83 by Robert Rose to put an asterisk before lines
;;;  that are branched to.
;;;
;;; Heavily Modified 1/84 to use new instruction set.
;;;
;;;
;;; **********************************************************************

;;;;   The Main function, DISASSEMBLE

(defun disassemble (function &optional (*standard-output* *standard-output*))
  "The argument should be either a function object, a lambda expression, or
   a symbol with a function definition. If the relevant function is not a
   compiled function, it is first compiled. In any case, the compiled code
   is then 'reverse assembled' and printed out in a symbolic format."
  (let ((fun-obj function))
    (cond ((and (atom fun-obj)
		(not (compiled-function-p fun-obj)))
	   (disassemble (symbol-function fun-obj)))
	  (t (cond ((and (not (compiled-function-p fun-obj))
			 (> (length fun-obj) 1))
		    ;; The argument is an uncompiled function or macro.
		    (cond ((eq (car fun-obj) 'MACRO)
			   ;; the argument is an uncompiled macro.
			   (setq fun-obj (third fun-obj))
			   (compile fun-obj)
			   (disassemble fun-obj))
			  ((eq (car fun-obj) 'NAMED-LAMBDA)
			   ;; The argument is a uncompiled function.
			   (setq fun-obj (second fun-obj))
			   (compile fun-obj)
			   (disassemble fun-obj))
			  ((eq (car fun-obj) 'LAMBDA)
			   ;; the argument is an uncompiled lambda.
			   (disassemble (compile nil fun-obj)))
			  (t (format t "Unknown arg for Disassemble~%"))))
		   (t (cond ((and (listp fun-obj)
				  (eq (car fun-obj) 'MACRO))
			     ;; The argument is a compiled macro.
			     (disassemble (cdr fun-obj)))
			    (t ;; The argument is a compiled function object.
			     (format t "~%Disassembly of ~a.~%"
				     (%primitive header-ref fun-obj
						 %function-name-slot))
			     (prin-prelim-info fun-obj)
			     (Output-macro-instructions
			           fun-obj
				   (branch-list fun-obj) )))))))))






;;;   PRIN-PRELIM-INFO takes a function object and extracts from it and
;;; prints out the following information:
;;;   - The argument list of the function.
;;;   - The number of Locals allocated by the function.
;;;   - Whether the function does or does not evaluate its arguments.

(defun prin-prelim-info (function)
  (let ((arglist (%primitive header-ref function %function-arg-names-slot)))
    (if arglist
	(format t "~%Its arg list is: ~S." arglist)
	(format t "~%It takes no arguments."))
    (format t "~%It uses ~S local variable~:P."
	    (1- (ldb %function-locals-byte
		     (%primitive header-ref function %function-locals-slot))))
    (format t "~%It ~a its arguments.~%"
	    (cond ((= 1 (ldb %function-fexpr-byte
			     (%primitive header-ref function
					 %function-fexpr-slot)))
		   "does not evaluate any of")
		  (t "evaluates all of")))))
   


;;;   OUTPUT-MACRO-INSTRUCTIONS takes a function object and prints out the
;;; corresponding macro. (Not executable macro, just macro that looks good!)
 
(defun Output-Macro-Instructions (function branches)
  (let* ((byte-vector (%primitive header-ref function %function-code-slot))
	 (vector-length (length byte-vector))
	 (max-args (+ (ldb %function-max-args-byte
			   (%primitive header-ref function
				       %function-max-args-slot))
		      (if (minusp (%primitive header-ref function
					      %function-rest-arg-slot))
			  1
			  0))))
    (do ((*disassembler-index* 0))
	((= *disassembler-index* vector-length))
      (let* ((symbol (find-symbol-name byte-vector *disassembler-index*)))
	(print-instruction (construct-instruction function symbol max-args 
						  *disassembler-index*
						  byte-vector)
			   branches)
	(setq *disassembler-index* (+ *disassembler-index* 
				      (get symbol '%instruction-length)))))))



(defun find-symbol-name (vector dis-index)
  (let ((byte (aref vector dis-index)))
    (cond ((eq byte 254)
	   (svref *2byte-instruction-table*
		  (aref vector (1+ dis-index))))		  
	  (t (svref *1byte-instruction-table* byte)))))		  
		  
		  
		  

;;;   PRINT-INSTRUCTION takes the list returned by CONSTRUCT-INSTRUCTION
;;; and prints out its contents in an understandable form,
;;; with an asterisk (*) before the line if it is jumped to.

(defun print-instruction (arg branches)
  (format t "~a ~a  ~s~%"
		  (cond ((member (car arg) branches) '*) ;Line is accesed
			(t '| |))                        ;Line is not accesed
		  (car arg) (cdr arg)))))



;;;   BRANCH-LIST is very much like output-macro-instructions,
;;;   but instead of actually creating all the instructions it just
;;;   creates the branch instruction labels.  A list of these labels
;;;   is returned.


(defun branch-list (function)
  (let* ((byte-vector (%primitive header-ref function %function-code-slot))
	 (vector-length (length byte-vector))
	 (max-args
	  (ldb %function-max-args-byte
	       (%primitive header-ref function
		      %function-max-args-slot)))
	 (branches nil))
    (do ((*disassembler-index* 0))
	((= *disassembler-index* vector-length))
      (let* ((symbol (find-symbol-name byte-vector *disassembler-index*))
             (ins-type (get symbol '%Instruction-Type))
             (byte (aref byte-vector *disassembler-index*))
             (start-place (cond ((eq 254 byte) (+ 2 *disassembler-index*))
                                (t (1+ *disassembler-index*)))))

        (cond ((memq ins-type '(SHORT-BRANCH-FORWARD SHORT-BRANCH-BACKWARD))
               (let ((label (get-sb-offset ins-type
                                           (aref byte-vector start-place)
                                           start-place)))
                 (cond ((member label branches) nil)
                       (t (setq branches (cons label branches))))))
              ((memq ins-type '(LONG-BRANCH-FORWARD LONG-BRANCH-BACKWARD))
               (let ((label (get-bb-offset ins-type
                                           (aref byte-vector start-place)
                                           (aref byte-vector
                                                 (1+ start-place))
                                           start-place)))
                 (cond ((not (member label branches))
                        (setq branches (cons label branches)))))))
        (setq *disassembler-index* (+ *disassembler-index* 
                                      (get symbol '%instruction-length)))))
    branches))

;;;   CONSTRUCT-INSTRUCTION takes a function, a vector of bytes, and the max
;;; number of args to the function (and other implicit parameters) and returns
;;; a small structure which contains the information needed to print out the
;;; instruction with a line number.

(defun Construct-instruction (function symbol max-args this-inst vector)
  (let* ((ins-group (cond ((eq nil (get symbol '%instruction-group)) symbol)
                          (t (get symbol '%instruction-group))))
         (ins-type (get symbol '%instruction-type)))
    (list this-inst 
          ins-group
          (cond ((member ins-type '(READ WRITE READ-MODIFY-WRITE))
                 (construct-read-write function symbol max-args this-inst vector))
                (t (let* ((byte (aref vector this-inst))
                          (start-place
                          (cond ((eq 254 byte) (+ 2 this-inst))
                                (t (1+ this-inst)))))
                                
                     (cond ((memq ins-type '(SHORT-BRANCH-FORWARD SHORT-BRANCH-BACKWARD))
                            (get-sb-offset ins-type
                                           (aref vector start-place)
                                           start-place))
                           (t (get-bb-offset ins-type
                                             (aref vector start-place)
                                             (aref vector (1+ start-place))
                                             start-place)))))))))



(defun construct-read-write (func-obj symbol max-args this-byte vector)
  (let ((ins-op (get symbol '%instruction-operand)))
    (if (memq ins-op '(STACK IGNORE))
	ins-op
	(let ((offset (or (get symbol '%instruction-offset)
			  (if (memq ins-op '(LONG-AL LONG-CONSTANT LONG-SYMBOL))
			      (+ (aref vector (+ this-byte 1))
				 (ash (aref vector (+ this-byte 2)) 8))
			      (aref vector (+ this-byte 1))))))
	  (case ins-op
	    ((AL LONG-AL)
	     (if (>= offset max-args)
		 (list 'LOCAL (- offset max-args))
		 (list 'ARG offset)))
	    (PSIC
	     (list 'SHORT-CONST offset))
	    (NSIC
	     (list 'SHORT-CONST (- offset 256)))
	    ((CONSTANT LONG-CONSTANT)
	     (list 'CONSTANT (%primitive header-ref func-obj
					 (+ %function-arg-names-slot 1 offset))))
	    ((SYMBOL LONG-SYMBOL)
	     (list 'SYMBOL (%primitive header-ref func-obj
				       (+ %function-arg-names-slot 1 offset)))))))))

(defun get-sb-offset (ins-type this-byte curr-instr)
  (let ((frame-off (ldb (byte 3 0) this-byte))
        (word-off  (ldb (byte 5 3) this-byte))
        (cur-frame (floor curr-instr 8)))
    (cond ((eq ins-type 'short-branch-backward)
           (- frame-off (* (- word-off cur-frame) 8)))
          (t (+ frame-off (* (+ word-off cur-frame) 8))))))


(defun get-bb-offset (ins-type this-byte next-byte curr-instr)
  (let ((frame-off (ldb (byte 3 0) this-byte))
        (word-off  (ldb (byte 5 3) this-byte))
        (cur-frame (floor curr-instr 8)))
    (cond ((eq ins-type 'long-branch-backward)
           (- frame-off 
              (* (- (+ (* next-byte 32) word-off)
                    cur-frame)
                 8)))
          (t (+ frame-off
                (* (+ (+ (* next-byte 32) word-off)
                      cur-frame) 
                   8))))))