From d8ff03d384ed72a3520c311de6cd8807061eb458 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 1 Nov 2009 22:36:55 -0500 Subject: [PATCH] Fix various immediate operands in x86-64 LAP generation. Implement an abstraction for yielding an operand representing some immediate value, either by yielding an actual immediate operand or by loading an immediate into a temporary register with MOV (which takes 64-bit immediate operands, unlike every other instruction) and by yielding a reference to the temporary register for the operand. Use this to simplify LOAD-DISPLACED-REGISTER, and reduce the code it generates. Use more appropriate names for procedures that generate code to move objects into registers (`load') and to move objects into memory (`store'). Fix various other places that generate LAP with immediates to use the new abstraction to ensure that they always fit in the relevant instructions' operands. --- src/compiler/machines/x86-64/lapgen.scm | 151 +++++++++++++++--------- src/compiler/machines/x86-64/rules1.scm | 125 +++++++++----------- src/compiler/machines/x86-64/rules3.scm | 47 ++++++-- src/compiler/machines/x86-64/rules4.scm | 4 +- 4 files changed, 187 insertions(+), 140 deletions(-) diff --git a/src/compiler/machines/x86-64/lapgen.scm b/src/compiler/machines/x86-64/lapgen.scm index 98f5c2f45..84cfbf320 100644 --- a/src/compiler/machines/x86-64/lapgen.scm +++ b/src/compiler/machines/x86-64/lapgen.scm @@ -281,9 +281,8 @@ USA. (add-pseudo-register-alias! rtl-reg machine-reg)))) (define (object->machine-register! object mreg) - ;; This funny ordering allows load-constant to use a pc value in mreg! - ;; [TRC 20091025: Does this matter, given PC-relative addressing?] - (let ((code (load-constant->register (INST-EA (R ,mreg)) object))) + ;; This ordering allows LOAD-CONSTANT to use MREG as a temporary. + (let ((code (load-constant (INST-EA (R ,mreg)) object))) (require-register! mreg) code)) @@ -314,88 +313,126 @@ USA. (compare/reference*literal register (non-pointer->literal non-pointer))) (define (compare/reference*literal reference literal) - (if (fits-in-signed-long? literal) - (LAP (CMP Q ,reference (&U ,literal))) - (let ((temp (temporary-register-reference))) - (LAP (MOV Q ,temp (&U ,literal)) - (CMP Q ,reference ,temp))))) + (with-unsigned-immediate-operand literal + (lambda (operand) + (LAP (CMP Q ,reference ,operand))))) ;;;; Literals and Constants ;;; These are slightly tricky because most instructions don't admit ;;; 64-bit operands. -(define (convert-object/constant->register target object conversion) +(define (load-converted-constant target object conversion) (let ((target (target-register-reference target))) (if (non-pointer-object? object) - ;; Is this correct if conversion is object->address ? - (load-non-pointer-constant->register target object) - (LAP ,@(load-pointer-constant->register target object) + ;; Assumption: CONVERSION fetches the datum of the object, + ;; which is the same as the address of the object. + (load-non-pointer target 0 (careful-object-datum object)) + (LAP ,@(load-pointer-constant target object) ,@(conversion target))))) -(define (load-constant->register register object) +(define (load-constant register object) (if (non-pointer-object? object) - (load-non-pointer-constant->register register object) - (load-pointer-constant->register register object))) + (load-non-pointer-constant register object) + (load-pointer-constant register object))) -(define (load-pointer-constant->register register object) +(define (load-pointer-constant register object) (LAP (MOV Q ,register (@PCR ,(constant->label object))))) -(define (load-non-pointer-constant->register register object) - (load-non-pointer-literal->register register (non-pointer->literal object))) +(define (load-non-pointer-constant register object) + (load-non-pointer-literal register (non-pointer->literal object))) -(define (load-non-pointer-constant->offset register object) - (load-non-pointer-literal->offset register (non-pointer->literal object))) +(define (load-non-pointer register type datum) + (load-non-pointer-literal register (make-non-pointer-literal type datum))) -(define (load-non-pointer->register register type datum) - (load-non-pointer-literal->register register - (make-non-pointer-literal type datum))) +(define (load-non-pointer-literal register literal) + (load-unsigned-immediate register literal)) -(define (load-non-pointer->offset register type datum) - (load-non-pointer-literal->offset register - (make-non-pointer-literal type datum))) +(define (store-non-pointer-constant register object) + (store-non-pointer-literal register (non-pointer->literal object))) -(define (load-non-pointer-literal->register register literal) - (load-unsigned-immediate->register register literal)) +(define (store-non-pointer offset type datum) + (store-non-pointer-literal offset (make-non-pointer-literal type datum))) -(define (load-non-pointer-literal->offset register literal) - (load-unsigned-immediate->offset register literal)) +(define (store-non-pointer-literal offset literal) + (store-unsigned-immediate offset literal)) (define (non-pointer->literal object) (make-non-pointer-literal (object-type object) (careful-object-datum object))) -(define (load-signed-immediate->register target immediate) - (cond ((zero? immediate) +(define (load-signed-immediate target value) + (cond ((zero? value) (LAP (XOR Q ,target ,target))) - ((fits-in-signed-quad? immediate) - (LAP (MOV Q ,target (& ,immediate)))) + ((fits-in-signed-quad? value) + (LAP (MOV Q ,target (& ,value)))) (else - (error "Signed immediate too large:" immediate)))) + (error "Signed immediate too large:" value)))) -(define (load-unsigned-immediate->register target immediate) - (cond ((zero? immediate) +(define (load-unsigned-immediate target value) + (cond ((zero? value) (LAP (XOR Q ,target ,target))) - ((fits-in-unsigned-quad? immediate) - (LAP (MOV Q ,target (&U ,immediate)))) + ((fits-in-unsigned-quad? value) + (LAP (MOV Q ,target (&U ,value)))) (else - (error "Unsigned immediate too large:" immediate)))) - -(define (load-signed-immediate->offset offset immediate) - (if (fits-in-signed-long? immediate) - (LAP (MOV Q ,(offset->reference! offset) (& ,immediate))) - (let* ((temporary (temporary-register-reference)) - (target (offset->reference! offset))) - (LAP ,@(load-signed-immediate->register temporary immediate) - (MOV Q ,target ,temporary))))) - -(define (load-unsigned-immediate->offset offset immediate) - (if (fits-in-unsigned-long? immediate) - (LAP (MOV Q ,(offset->reference! offset) (&U ,immediate))) - (let* ((temporary (temporary-register-reference)) - (target (offset->reference! offset))) - (LAP ,@(load-unsigned-immediate->register temporary immediate) - (MOV Q ,target ,temporary))))) + (error "Unsigned immediate too large:" value)))) + +(define (store-signed-immediate offset value) + (with-signed-immediate-operand value + (lambda (operand) + (LAP (MOV Q ,(offset->reference! offset) ,operand))))) + +(define (store-unsigned-immediate offset value) + (with-unsigned-immediate-operand value + (lambda (operand) + (LAP (MOV Q ,(offset->reference! offset) ,operand))))) + +(define (with-signed-immediate-operand value receiver) + (receive (temp prefix operand) + (signed-immediate-operand value temporary-register-reference) + temp ;ignore + (LAP ,@prefix + ,@(receiver operand)))) + +(define (with-unsigned-immediate-operand value receiver) + (receive (temp prefix operand) + (unsigned-immediate-operand value temporary-register-reference) + temp ;ignore + (LAP ,@prefix + ,@(receiver operand)))) + +;;; SIGNED-IMMEDIATE-OPERAND and UNSIGNED-IMMEDIATE-OPERAND abstract +;;; the pattern of performing an operation with an instruction that +;;; takes an immediate operand of 32 bits, but using a value that may +;;; exceed 32 bits and thus may require a temporary register (possibly +;;; reused from something else). Some instructions take immediates +;;; differently, and cannot use this; e.g., IMUL. These return the +;;; temporary register reference if a temporary was necessary, an +;;; instruction prefix to load the value into the temporary register, +;;; and the operand to pass to the desired instruction, either a +;;; 32-bit immediate operand or a register reference. Except where +;;; reusing the temporary register is useful, it is generally enough +;;; to use WITH-(UN)SIGNED-IMMEDIATE-OPERAND above. + +(define (signed-immediate-operand value temporary-reference) + (let ((operand (INST-EA (& ,value)))) + (cond ((fits-in-signed-long? value) + (values #f (LAP) operand)) + ((fits-in-signed-quad? value) + (let ((temp (temporary-reference))) + (values temp (LAP (MOV Q ,temp ,operand)) temp))) + (else + (error "Signed immediate value too large:" value))))) + +(define (unsigned-immediate-operand value temporary-reference) + (let ((operand (INST-EA (&U ,value)))) + (cond ((fits-in-unsigned-long? value) + (values #f (LAP) operand)) + ((fits-in-unsigned-quad? value) + (let ((temp (temporary-reference))) + (values temp (LAP (MOV Q ,temp ,operand)) temp))) + (else + (error "Unsigned immediate value too large:" value))))) (define (target-register target) (delete-dead-registers!) @@ -624,7 +661,7 @@ USA. (load-machine-register! (rtl:register-number expression) register)) ((CONS-POINTER) (LAP ,@(clear-registers! register) - ,@(load-non-pointer->register + ,@(load-non-pointer target (rtl:machine-constant-value (rtl:cons-pointer-type expression)) (rtl:machine-constant-value diff --git a/src/compiler/machines/x86-64/rules1.scm b/src/compiler/machines/x86-64/rules1.scm index cbd5902fb..bb1eccbca 100644 --- a/src/compiler/machines/x86-64/rules1.scm +++ b/src/compiler/machines/x86-64/rules1.scm @@ -51,7 +51,7 @@ USA. (ASSIGN (REGISTER (? target)) (OFFSET-ADDRESS (REGISTER (? source)) (MACHINE-CONSTANT (? n)))) - (load-displaced-register target source (* address-units-per-object n))) + (load-displaced-register target source n address-units-per-object)) (define-rule statement (ASSIGN (REGISTER (? target)) @@ -63,7 +63,7 @@ USA. (ASSIGN (REGISTER (? target)) (BYTE-OFFSET-ADDRESS (REGISTER (? source)) (MACHINE-CONSTANT (? n)))) - (load-displaced-register target source n)) + (load-displaced-register target source n 1)) (define-rule statement (ASSIGN (REGISTER (? target)) @@ -75,7 +75,7 @@ USA. (ASSIGN (REGISTER (? target)) (FLOAT-OFFSET-ADDRESS (REGISTER (? source)) (MACHINE-CONSTANT (? n)))) - (load-displaced-register target source (* address-units-per-float n))) + (load-displaced-register target source n address-units-per-float)) (define-rule statement ;; This is an intermediate rule -- not intended to produce code. @@ -83,17 +83,15 @@ USA. (CONS-POINTER (MACHINE-CONSTANT (? type)) (OFFSET-ADDRESS (REGISTER (? source)) (MACHINE-CONSTANT (? n))))) - (load-displaced-register/typed target - source - type - (* address-units-per-object n))) + (load-displaced-register/typed target source type n + address-units-per-object)) (define-rule statement (ASSIGN (REGISTER (? target)) (CONS-POINTER (MACHINE-CONSTANT (? type)) (BYTE-OFFSET-ADDRESS (REGISTER (? source)) (MACHINE-CONSTANT (? n))))) - (load-displaced-register/typed target source type n)) + (load-displaced-register/typed target source type n 1)) (define-rule statement (ASSIGN (REGISTER (? target)) (OBJECT->TYPE (REGISTER (? source)))) @@ -113,11 +111,7 @@ USA. (assign-register->register target datum) (let* ((datum (source-register-reference datum)) (target (target-register-reference target))) - ;; We could use a single MOV instruction with a 64-bit - ;; immediate, most of whose bytes are zero, but this three- - ;; instruction sequence uses fewer bytes. - (LAP (MOV B ,target (&U ,type)) - (SHL Q ,target (&U ,scheme-datum-width)) + (LAP (MOV Q ,target (&U ,(make-non-pointer-literal type 0))) (OR Q ,target ,datum))))) #| This doesn't work because immediate operands aren't big enough to @@ -159,17 +153,17 @@ USA. (define-rule statement (ASSIGN (REGISTER (? target)) (CONSTANT (? object))) - (load-constant->register (target-register-reference target) object)) + (load-constant (target-register-reference target) object)) (define-rule statement (ASSIGN (REGISTER (? target)) (MACHINE-CONSTANT (? n))) - (load-signed-immediate->register (target-register-reference target) n)) + (load-signed-immediate (target-register-reference target) n)) (define-rule statement (ASSIGN (REGISTER (? target)) (CONS-POINTER (MACHINE-CONSTANT (? type)) (MACHINE-CONSTANT (? datum)))) - (load-non-pointer->register (target-register-reference target) type datum)) + (load-non-pointer (target-register-reference target) type datum)) (define-rule statement (ASSIGN (REGISTER (? target)) (ENTRY:PROCEDURE (? label))) @@ -211,11 +205,11 @@ USA. (define-rule statement (ASSIGN (REGISTER (? target)) (OBJECT->DATUM (CONSTANT (? constant)))) - (convert-object/constant->register target constant object->datum)) + (load-converted-constant target constant object->datum)) (define-rule statement (ASSIGN (REGISTER (? target)) (OBJECT->ADDRESS (CONSTANT (? constant)))) - (convert-object/constant->register target constant object->address)) + (load-converted-constant target constant object->address)) ;;;; Transfers from Memory @@ -239,13 +233,13 @@ USA. (define-rule statement (ASSIGN (? expression rtl:simple-offset?) (CONSTANT (? object))) (QUALIFIER (non-pointer-object? object)) - (load-non-pointer-constant->offset expression object)) + (store-non-pointer-constant expression object)) (define-rule statement (ASSIGN (? expression rtl:simple-offset?) (CONS-POINTER (MACHINE-CONSTANT (? type)) (MACHINE-CONSTANT (? datum)))) - (load-non-pointer->offset expression type datum)) + (store-non-pointer expression type datum)) (define-rule statement (ASSIGN (? expression rtl:simple-offset?) @@ -253,7 +247,9 @@ USA. (MACHINE-CONSTANT (? n)))) (if (zero? n) (LAP) - (LAP (ADD Q ,(offset->reference! expression) (& ,n))))) + (with-signed-immediate-operand n + (lambda (operand) + (LAP (ADD Q ,(offset->reference! expression) ,operand)))))) ;;;; Consing @@ -277,20 +273,17 @@ USA. (define-rule statement (ASSIGN (PRE-INCREMENT (REGISTER 4) -1) (CONSTANT (? value))) (QUALIFIER (non-pointer-object? value)) - (push-non-pointer-literal (non-pointer->literal value))) + (with-unsigned-immediate-operand (non-pointer->literal value) + (lambda (operand) + (LAP (PUSH Q ,operand))))) (define-rule statement (ASSIGN (PRE-INCREMENT (REGISTER 4) -1) (CONS-POINTER (MACHINE-CONSTANT (? type)) (MACHINE-CONSTANT (? datum)))) - (push-non-pointer-literal (make-non-pointer-literal type datum))) - -(define (push-non-pointer-literal literal) - (if (fits-in-unsigned-word? literal) - (LAP (PUSH Q (&U ,literal))) - (let ((temp (temporary-register-reference))) - (LAP (MOV Q ,temp (&U ,literal)) - (PUSH Q ,temp))))) + (with-unsigned-immediate-operand (make-non-pointer-literal type datum) + (lambda (operand) + (LAP (PUSH Q ,operand))))) ;;;; CHAR->ASCII/BYTE-OFFSET @@ -349,53 +342,50 @@ USA. ;;;; Utilities specific to rules1 -(define (load-displaced-register/internal target source n signed?) +(define (load-displaced-register/internal target source n scale signed?) (cond ((zero? n) (assign-register->register target source)) ((and (= target source) + ;; Why this condition? (= target rsp)) - (let ((addend (if signed? (INST-EA (& ,n)) (INST-EA (&U ,n))))) - (if (fits-in-signed-long? n) - (LAP (ADD Q (R ,rsp) ,addend)) - (begin - (need-register! rsp) - (let ((temp (temporary-register-reference))) - (LAP (MOV Q ,temp ,addend) - (ADD Q (R ,rsp) ,temp))))))) + ((if signed? + with-signed-immediate-operand + with-unsigned-immediate-operand) + (* n scale) + (lambda (operand) + (LAP (ADD Q (R ,rsp) ,operand))))) (else (receive (reference! referenceable?) (if signed? (values indirect-byte-reference! byte-offset-referenceable?) (values indirect-unsigned-byte-reference! byte-unsigned-offset-referenceable?)) - (define (with-address n suffix) - (let* ((source (reference! source n)) - (target (target-register-reference target))) - (LAP (LEA Q ,target ,source) - ,@(suffix target)))) - (if (referenceable? n) - (with-address n (lambda (target) target (LAP))) - (let ((division (integer-divide n #x80000000))) - (let ((q (integer-divide-quotient division)) - (r (integer-divide-remainder division))) - (with-address r - (lambda (target) - (let ((temp (temporary-register-reference))) - (LAP (MOV Q ,temp (&U ,q)) - (SHL Q ,temp (&U #x20)) - (ADD Q ,target ,temp)))))))))))) - -(define-integrable (load-displaced-register target source n) - (load-displaced-register/internal target source n true)) - -(define-integrable (load-displaced-register/typed target source type n) - (load-displaced-register/internal target - source - (if (zero? type) - n + (let ((n-scaled (* n scale))) + (if (referenceable? n-scaled) + (let* ((source (reference! source n-scaled)) + (target (target-register-reference target))) + (LAP (LEA Q ,target ,source))) + (let ((temp (allocate-temporary-register! 'GENERAL)) + (source (allocate-indirection-register! source))) + (let ((target (target-register-reference target))) + (LAP (MOV Q (R ,temp) + ,(if signed? + (INST-EA (& ,n)) + (INST-EA (&U ,n)))) + (LEA Q ,target (@RI ,source ,temp ,scale))))))))))) + +(define-integrable (load-displaced-register target source n scale) + (load-displaced-register/internal target source n scale #t)) + +(define (load-displaced-register/typed target source type n scale) + (if (zero? type) + (load-displaced-register/internal target source n scale #f) + (load-displaced-register/internal target + source (+ (make-non-pointer-literal type 0) - n)) - false)) + (* n scale)) + 1 + #f))) (define (load-indexed-register target source index scale) (let* ((source (indexed-ea source index scale 0)) @@ -431,10 +421,9 @@ USA. (define (load-char-into-register type source target) (let ((target (target-register-reference target))) (cond ((zero? type) - ;; No faster, but smaller (LAP (MOVZX B ,target ,source))) (else - (LAP ,@(load-non-pointer->register target type 0) + (LAP ,@(load-non-pointer target type 0) (MOV B ,target ,source)))))) (define (indirect-unsigned-byte-reference! register offset) diff --git a/src/compiler/machines/x86-64/rules3.scm b/src/compiler/machines/x86-64/rules3.scm index 16424b910..99bef0f7b 100644 --- a/src/compiler/machines/x86-64/rules3.scm +++ b/src/compiler/machines/x86-64/rules3.scm @@ -261,23 +261,31 @@ USA. (cond ((zero? how-far) (LAP)) ((zero? frame-size) - (LAP (ADD Q (R ,rsp) (&U ,(* address-units-per-object how-far))))) + (with-signed-immediate-operand (* address-units-per-object how-far) + (lambda (addend) + (LAP (ADD Q (R ,rsp) ,addend))))) ((= frame-size 1) (let ((temp (temporary-register-reference))) (LAP (MOV Q ,temp (@R ,rsp)) - (ADD Q (R ,rsp) (&U ,(* address-units-per-object offset))) + ,@(with-signed-immediate-operand + (* address-units-per-object offset) + (lambda (addend) + (LAP (ADD Q (R ,rsp) ,addend)))) (PUSH Q ,temp)))) ((= frame-size 2) (let ((temp1 (temporary-register-reference)) (temp2 (temporary-register-reference))) (LAP (MOV Q ,temp2 (@RO B ,rsp ,address-units-per-object)) (MOV Q ,temp1 (@R ,rsp)) - (ADD Q (R ,rsp) (&U ,(* address-units-per-object offset))) + ,@(with-signed-immediate-operand + (* address-units-per-object offset) + (lambda (addend) + (LAP (ADD Q (R ,rsp) ,addend)))) (PUSH Q ,temp2) (PUSH Q ,temp1)))) (else (error "INVOCATION-PREFIX:MOVE-FRAME-UP: Incorrectly invoked!"))))) - + (define-rule statement (INVOCATION-PREFIX:MOVE-FRAME-UP (? frame-size) (REGISTER (? reg))) (generate/move-frame-up* frame-size @@ -468,7 +476,9 @@ USA. ;; Load the address of the entry instruction into TARGET. (LEA Q ,target (@RO B ,regnum:free-pointer ,pc-offset)) ;; Bump FREE. - (ADD Q (R ,regnum:free-pointer) (&U ,free-offset))))) + ,@(with-signed-immediate-operand free-offset + (lambda (addend) + (LAP (ADD Q (R ,regnum:free-pointer) ,addend))))))) (define (generate/cons-multiclosure target nentries size entries) (let* ((mtarget (target-register target)) @@ -486,16 +496,19 @@ USA. (first-format-offset (+ data-offset address-units-per-closure-entry-count)) (first-pc-offset - (+ first-format-offset address-units-per-entry-format-code))) + (+ first-format-offset address-units-per-entry-format-code)) + (free-offset + (+ first-format-offset + (* nentries address-units-per-closure-entry) + (* size address-units-per-object)))) (LAP (MOV Q ,temp (&U ,(make-multiclosure-manifest nentries size))) (MOV Q (@R ,regnum:free-pointer) ,temp) (MOV L (@RO ,regnum:free-pointer ,data-offset) (&U ,nentries)) ,@(generate-entries entries first-format-offset) (LEA Q ,target (@RO B ,regnum:free-pointer ,first-pc-offset)) - (ADD Q (R ,regnum:free-pointer) - ,(+ first-format-offset - (* nentries address-units-per-closure-entry) - (* size address-units-per-object))))))) + ,@(with-signed-immediate-operand free-offset + (lambda (addend) + (LAP (ADD Q (R ,regnum:free-pointer) ,addend)))))))) (define (generate-closure-entry label min max offset temp) (let* ((procedure-label (rtl-procedure/external-label (label->object label))) @@ -597,8 +610,10 @@ USA. size))) (MOV Q (@R ,regnum:free-pointer) ,target) (MOV Q ,target (R ,regnum:free-pointer)) - (ADD Q (R ,regnum:free-pointer) - (& ,(* address-units-per-object (1+ size))))))) + ,@(with-signed-immediate-operand + (* address-units-per-object (1+ size)) + (lambda (addend) + (LAP (ADD Q (R ,regnum:free-pointer) ,addend))))))) ((1) (let ((entry (vector-ref entries 0))) (generate/cons-closure target @@ -695,7 +710,13 @@ USA. (generate-label)) ;; Increment counter and loop (ADD Q (@R ,rsp) (&U 1)) - (CMP Q (@R ,rsp) (&U ,n-blocks)) + ,@(receive (temp prefix comparand) + ;; Choose an arbitrary temporary register that is not + ;; in use in this sequence. + (unsigned-immediate-operand n-blocks (lambda () r11)) + temp ;ignore + (LAP ,@prefix + (CMP Q (@R ,rsp) ,comparand))) (JL (@PCR ,loop)) (JMP (@PCR ,end)) diff --git a/src/compiler/machines/x86-64/rules4.scm b/src/compiler/machines/x86-64/rules4.scm index 735c1daca..785b55abc 100644 --- a/src/compiler/machines/x86-64/rules4.scm +++ b/src/compiler/machines/x86-64/rules4.scm @@ -110,7 +110,7 @@ USA. (interpreter-call-argument->machine-register! environment rdx))) (LAP ,@set-environment ,@(clear-map (clear-map!)) - ,@(load-constant->register (INST-EA (R ,rbx)) name) + ,@(load-constant (INST-EA (R ,rbx)) name) ,@(invoke-interface/call code)))) (define-rule statement @@ -135,5 +135,5 @@ USA. ,@set-value ,@(clear-map!) (MOV Q ,reg:utility-arg-4 (R ,rax)) - ,@(load-constant->register (INST-EA (R ,rbx)) name) + ,@(load-constant (INST-EA (R ,rbx)) name) ,@(invoke-interface/call code)))) \ No newline at end of file -- 2.25.1