From: Taylor R Campbell Date: Sun, 16 Mar 2014 14:50:30 +0000 (+0000) Subject: Open-code non-2^n quotients and remainders by multiplication on amd64. X-Git-Tag: release-9.2.0~27 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=92222b040ef2a17cbd6034f2b9efda678176ffeb;p=mit-scheme.git Open-code non-2^n quotients and remainders by multiplication on amd64. --- diff --git a/src/compiler/machines/x86-64/rulfix.scm b/src/compiler/machines/x86-64/rulfix.scm index 1dd930666..0b239087c 100644 --- a/src/compiler/machines/x86-64/rulfix.scm +++ b/src/compiler/machines/x86-64/rulfix.scm @@ -88,6 +88,26 @@ USA. (integer-power-of-2? (abs constant)))) (fixnum-2-args/register*constant operator target source constant overflow?)) +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FIXNUM-2-ARGS FIXNUM-QUOTIENT + (REGISTER (? source)) + (OBJECT->FIXNUM (CONSTANT (? d))) + (? overflow?))) + (QUALIFIER (not (or (zero? d) (integer-power-of-2? d)))) + overflow? ;ignore + (fixnum-quotient/constant target source d)) + +(define-rule statement + (ASSIGN (REGISTER (? target)) + (FIXNUM-2-ARGS FIXNUM-REMAINDER + (REGISTER (? source)) + (OBJECT->FIXNUM (CONSTANT (? d))) + (? overflow?))) + (QUALIFIER (not (or (zero? d) (integer-power-of-2? d)))) + overflow? ;ignore + (fixnum-remainder/constant target source d)) + (define-rule statement (ASSIGN (REGISTER (? target)) (FIXNUM-2-ARGS (? operator) @@ -661,6 +681,110 @@ USA. (else (error "Fixnum-remainder/constant: Bad value" n)))))) +;;;; Fast division by multiplication + +(define (fast-divide-prepare divisor width) + (let ((zeros (integer-length (- divisor 1)))) + (values + (+ 1 + (quotient (shift-left (- (shift-left 1 zeros) divisor) width) divisor)) + (if (> zeros 1) 1 zeros) + (if (= zeros 0) 0 (- zeros 1))))) + +;;; For reference, this is what the code generated below computes. + +(define (fast-quotient n d width multiplier s1 s2) + d ;ignore + (let ((t (shift-right (* n multiplier) width))) + (shift-right (+ t (shift-right (- n t) s1)) s2))) + +(define (fast-remainder n d width multiplier s1 s2) + (- n (* d (fast-quotient n d width multiplier s1 s2)))) + +(define (fast-divide-prepare/signed divisor width) + (fast-divide-prepare (abs divisor) width)) + +(define (fast-quotient/signed n d width multiplier s1 s2) + (define (do-unsigned n d) + (fast-quotient n d width multiplier s1 s2)) + (if (negative? d) + (if (negative? n) + (do-unsigned (- 0 n) (- 0 d)) + (- 0 (do-unsigned n (- 0 d)))) + (if (negative? n) + (- 0 (do-unsigned (- 0 n) d)) + (do-unsigned n d)))) + +(define (fast-remainder/signed n d width multiplier s1 s2) + (- n (* d (fast-quotient/signed n d width multiplier s1 s2)))) + +(define (fast-division d get-target get-source finish) + (flush-register! rax) + (need-register! rax) + (flush-register! rdx) + (need-register! rdx) + (receive (multiplier s1 s2) (fast-divide-prepare (abs d) scheme-object-width) + (let* ((subroutine (generate-label 'QUOTIENT)) + (if-negative1 (generate-label 'QUO-NEGATIVE-1)) + (if-negative2 (generate-label 'QUO-NEGATIVE-2)) + (merge1 (generate-label 'QUO-MULTIPLY)) + (merge2 (generate-label 'QUO-RESULT)) + (source (get-source)) + (target (get-target))) + (LAP ;; Divide by 2^t so that factor doesn't mess us up. + (SAR Q ,target (&U ,scheme-type-width)) + ;; No need to CMP; SAR sets the SF bit for us to detect + ;; whether the input is negative. + (JS B (@PCR ,if-negative1)) + (JMP (@PCR ,merge1)) + (LABEL ,if-negative1) + (NEG Q ,target) + (LABEL ,merge1) + ;; MUL takes argument in rax, so put it there. + (MOV Q (R ,rax) ,target) + ;; Load the multiplier into rdx, which is free until we MUL. + (MOV Q (R ,rdx) (&U ,multiplier)) + ;; Compute the 128-bit product rax * multiplier, storing the + ;; high 64 bits in rdx and the low 64 bits in rax. We are + ;; not interested in the low 64 bits, so rax is now free for + ;; reuse. + (MUL Q ((R ,rdx) : (R ,rax)) (R ,rdx)) + ;; Compute ((((n - p) >> s1) + p) >> s2) where p is the high + ;; 64 bits of the product, in rdx. + (SUB Q ,target (R ,rdx)) + (SHR Q ,target (&U ,s1)) + (ADD Q ,target (R ,rdx)) + (SHR Q ,target (&U ,s2)) + ;; Reapply the sign. + (CMP Q ,source (& 0)) + (JL B (@PCR ,if-negative2)) + ,@(if (negative? d) (LAP (NEG Q ,target)) (LAP)) + (JMP (@PCR ,merge2)) + (LABEL ,if-negative2) + ,@(if (negative? d) (LAP) (LAP (NEG Q ,target))) + (LABEL ,merge2) + ;; Convert back to fixnum representation with low zero bits. + (SAL Q ,target (&U ,scheme-type-width)) + ,@(finish target source (INST-EA (R ,rax))))))) + +(define (fixnum-quotient/constant target source d) + (fast-division d + (lambda () (standard-move-to-target! source target)) + (lambda () (standard-move-to-temporary! source)) + (lambda (quotient numerator temp) + quotient numerator temp ;ignore + (LAP)))) + +(define (fixnum-remainder/constant target source d) + (fast-division d + (lambda () (standard-move-to-temporary! source)) + (lambda () (standard-move-to-target! source target)) + (lambda (quotient numerator temp) + ;; Compute n - d q. + (LAP (MOV Q ,temp (& ,d)) + (IMUL Q ,quotient ,temp) + (SUB Q ,numerator ,quotient))))) + (define (fixnum-predicate/unary->binary predicate) (case predicate ((ZERO-FIXNUM?) 'EQUAL-FIXNUM?) diff --git a/src/compiler/machines/x86-64/rulrew.scm b/src/compiler/machines/x86-64/rulrew.scm index 3838549ef..74608c224 100644 --- a/src/compiler/machines/x86-64/rulrew.scm +++ b/src/compiler/machines/x86-64/rulrew.scm @@ -197,9 +197,9 @@ USA. (QUALIFIER (and (memq operator '(FIXNUM-QUOTIENT FIXNUM-REMAINDER)) (rtl:register? operand-1) - (rtl:constant-fixnum-test operand-2 - (lambda (n) - (integer-power-of-2? (abs n)))))) + (rtl:constant-fixnum-test operand-2 + (lambda (value) + (not (zero? value)))))) (rtl:make-fixnum-2-args operator operand-1 operand-2 overflow?)) (define-rule rewriting