From c9030e405581c1220f10c065aee3e458f86eae13 Mon Sep 17 00:00:00 2001 From: "Guillermo J. Rozas" Date: Fri, 28 Feb 1992 20:19:58 +0000 Subject: [PATCH] Add short-circuit handlers for common arithmetic. --- v7/src/microcode/cmpauxmd/i386.m4 | 452 +++++++++++++++++++++++++++++- 1 file changed, 446 insertions(+), 6 deletions(-) diff --git a/v7/src/microcode/cmpauxmd/i386.m4 b/v7/src/microcode/cmpauxmd/i386.m4 index fce7152db..9b292d6af 100644 --- a/v7/src/microcode/cmpauxmd/i386.m4 +++ b/v7/src/microcode/cmpauxmd/i386.m4 @@ -1,6 +1,6 @@ ### -*-Midas-*- ### -### $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/cmpauxmd/i386.m4,v 1.10 1992/02/19 19:00:06 jinx Exp $ +### $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/cmpauxmd/i386.m4,v 1.11 1992/02/28 20:19:58 jinx Exp $ ### ### Copyright (c) 1992 Massachusetts Institute of Technology ### @@ -111,10 +111,21 @@ define(define_debugging_label, $1:') define(HEX, `0x$1') -define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 8)) -define(ADDRESS_MASK, eval(((2 ** (32 - TC_LENGTH)) - 1))) define(IMMEDIATE, `$$1') +define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 8)) +define(DATUM_LENGTH, eval(32 - TC_LENGTH)) +define(DATUM_SHIFT, eval((2 ** DATUM_LENGTH))) +define(ADDRESS_MASK, eval((DATUM_SHIFT - 1))) +define(TAG, ($2 + ($1 * DATUM_SHIFT))) + +define(TC_FALSE,0) +define(TC_FLONUM,6) +define(TC_TRUE,8) +define(TC_FIXNUM,26) +define(TC_MANIFEST_NM_VECTOR,39) +define(TC_COMPILED_ENTRY,40) + define(REGBLOCK_VAL,8) define(REGBLOCK_COMPILER_TEMP,16) define(REGBLOCK_LEXPR_ACTUALS,28) @@ -124,6 +135,17 @@ define(REGBLOCK_CLOSURE_FREE,36) define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP) define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE) +define(COMPILER_REGBLOCK_N_FIXED,16) +define(COMPILER_REGBLOCK_N_HOOKS,80) +define(COMPILER_REGBLOCK_N_TEMPS,256) +define(COMPILER_FIXED_SIZE,1) +define(COMPILER_HOOK_SIZE,1) +define(COMPILER_TEMP_SIZE,3) +define(REGBLOCK_SIZE_IN_OBJECTS, + eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE) + +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE) + +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE))) + define(regs,%esi) define(rfree,%edi) define(rmask,%ebp) @@ -134,14 +156,23 @@ use_external(Ext_Stack_Pointer) .file "cmpaux-i386.s" +.data + .align 2 + .globl C_Stack_Pointer .comm C_Stack_Pointer,4 .globl C_Frame_Pointer .comm C_Frame_Pointer,4 + +define_debugging_label(Regstart) + .space 128 +define_c_label(Registers) + .space eval(REGBLOCK_SIZE_IN_OBJECTS*4) .text .align 2 + define_c_label(interface_initialize) pushl %ebp movl %esp,%ebp @@ -210,6 +241,13 @@ define_c_label(interface_to_scheme) jmp *%edx define_c_label(interface_to_C) + ffree %st (0) # Free floating "regs" + ffree %st (1) + ffree %st (2) + ffree %st (3) + ffree %st (4) + ffree %st (5) + ffree %st (6) movl %edx,%eax # Set up result popl %ebx # Restore callee-saves popl %esi # registers @@ -217,14 +255,19 @@ define_c_label(interface_to_C) leave ret +### Assembly language hooks used to reduce code size. +### There is no time advantage to using these over using +### scheme_to_interface (or scheme_to_interface_call), but the +### code generated by the compiler can be somewhat smaller. + define(define_jump_indirection, `define_c_label(asm_$1) - movl IMMEDIATE(HEX($2)),%eax + movb IMMEDIATE(HEX($2)),%al jmp scheme_to_interface') define(define_call_indirection, `define_c_label(asm_$1) - movl IMMEDIATE(HEX($2)),%eax + movb IMMEDIATE(HEX($2)),%al jmp scheme_to_interface_call') define_call_indirection(interrupt_procedure,1a) @@ -233,5 +276,402 @@ define_jump_indirection(interrupt_closure,18) define_c_label(asm_interrupt_dlink) movl REGBLOCK_DLINK()(regs),%edx - movl IMMEDIATE(HEX(19)),%eax + movb IMMEDIATE(HEX(19)),%al jmp scheme_to_interface_call + +### +### This sames even more instructions than primitive_apply +### When the PC is not available. Instead of jumping here, +### a call instruction is used, and the longword offset to +### the primitive object follows the call instruction. +### This code loads the primitive object and merges with +### apply_primitive +### + + .align 2 +define_c_label(asm_short_primitive_apply) + popl %edx # offset pointer + movl (%edx),%ecx # offset + movl (%edx,%ecx),%ecx # Primitive object + jmp external_reference(asm_primitive_apply) # Merge + + .align 2 +define_jump_indirection(primitive_apply,12) + +define_jump_indirection(primitive_lexpr_apply,13) +define_jump_indirection(error,15) +define_call_indirection(link,17) +define_call_indirection(assignment_trap,1d) +define_call_indirection(reference_trap,1f) +define_call_indirection(safe_reference_trap,20) +define_call_indirection(primitive_error,36) + +### Assembly language hooks used to increase speed. + +# define_jump_indirection(shortcircuit_apply,14) +# +# define(define_apply_fixed_size, +# `define_c_label(asm_shortcircuit_apply_size_$1) +# movl IMMEDIATE($1),%edx +# movb IMMEDIATE(HEX(14)),%eax +# jmp scheme_to_interface') + + .align 2 +define_c_label(asm_shortcircuit_apply) + movl %ecx,%eax # Copy for type code + movl %ecx,%ebx # Copy for address + shrl IMMEDIATE(DATUM_LENGTH),%eax # Select type code + andl rmask,%ebx # Select datum + cmpb IMMEDIATE(TC_COMPILED_ENTRY),%al + jne asm_shortcircuit_apply_generic + movsbl -4(%ebx),%eax # Extract frame size + cmpl %eax,%edx # Compare to nargs+1 + jne asm_shortcircuit_apply_generic + jmp *%ebx # Invoke + +define_debugging_label(asm_shortcircuit_apply_generic) + movl IMMEDIATE(HEX(14)),%eax + jmp scheme_to_interface + +define(define_apply_fixed_size, +` .align 2 +define_c_label(asm_shortcircuit_apply_size_$1) + movl %ecx,%eax # Copy for type code + movl %ecx,%ebx # Copy for address + shrl IMMEDIATE(DATUM_LENGTH),%eax # Select type code + andl rmask,%ebx # Select datum + cmpb IMMEDIATE(TC_COMPILED_ENTRY),%al + jne asm_shortcircuit_apply_generic_$1 + cmpb IMMEDIATE($1),-4(%ebx) # Compare frame size + jne asm_shortcircuit_apply_generic_$1 # to nargs+1 + jmp *%ebx + +asm_shortcircuit_apply_generic_$1: + movl IMMEDIATE($1),%edx + movb IMMEDIATE(HEX(14)),%eax + jmp scheme_to_interface') + +define_apply_fixed_size(1) +define_apply_fixed_size(2) +define_apply_fixed_size(3) +define_apply_fixed_size(4) +define_apply_fixed_size(5) +define_apply_fixed_size(6) +define_apply_fixed_size(7) +define_apply_fixed_size(8) + +### The following code is used by generic arithmetic +### whether the fixnum case is open-coded in line or not. +### This takes care of fixnums and flonums so that the common +### numeric types are much faster than the rare ones +### (bignums, ratnums, recnums) + + .align 2 +asm_generic_flonum_result: + movl IMMEDIATE(eval(TAG(TC_MANIFEST_NM_VECTOR,2))),(rfree) + movl rfree,%eax + fstpl 4(rfree) # fstpd + orl IMMEDIATE(eval(TAG(TC_FLONUM,0))),%eax + andl rmask,(%esp) + addl IMMEDIATE(12),rfree + movl %eax,REGBLOCK_VAL()(regs) + ret + + .align 2 +asm_generic_fixnum_result: + andl rmask,(%esp) + orb IMMEDIATE(TC_FIXNUM),%al + rorl IMMEDIATE(TC_LENGTH),%eax + movl %eax,REGBLOCK_VAL()(regs) + ret + + .align 2 +asm_generic_return_sharp_t: + andl rmask,(%esp) + movl IMMEDIATE(eval(TAG(TC_TRUE,0))),REGBLOCK_VAL()(regs) + ret + + .align 2 +asm_generic_return_sharp_f: + andl rmask,(%esp) + movl IMMEDIATE(eval(TAG(TC_FALSE,0))),REGBLOCK_VAL()(regs) + ret + +define(define_unary_operation, +` .align 2 +define_c_label(asm_generic_$1) + popl %edx + movl %edx,%eax + shrl IMMEDIATE(DATUM_LENGTH),%eax + cmpb IMMEDIATE(TC_FIXNUM),%al + je asm_generic_$1_fix + cmpb IMMEDIATE(TC_FLONUM),%al + jne asm_generic_$1_fail + andl rmask,%edx + fld1 + $4 4(%edx) + jmp asm_generic_flonum_result + +asm_generic_$1_fix: + movl %edx,%eax + shll IMMEDIATE(TC_LENGTH),%eax + $3 IMMEDIATE(eval(2 ** TC_LENGTH)),%eax + jno asm_generic_fixnum_result + +asm_generic_$1_fail: + pushl %edx + movb IMMEDIATE(HEX($2)),%al + jmp scheme_to_interface') + +define(define_unary_predicate, +` .align 2 +define_c_label(asm_generic_$1) + popl %edx + movl %edx,%eax + shrl IMMEDIATE(DATUM_LENGTH),%eax + cmpb IMMEDIATE(TC_FIXNUM),%al + je asm_generic_$1_fix + cmpb IMMEDIATE(TC_FLONUM),%al + jne asm_generic_$1_fail + andl rmask,%edx + fldl 4(%edx) + ftst + fstsw %ax + fstp %st (0) + sahf + $4 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fix: + movl %edx,%eax + shll IMMEDIATE(TC_LENGTH),%eax + cmpl IMMEDIATE(0),%eax + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fail: + pushl %edx + movb IMMEDIATE(HEX($2)),%al + jmp scheme_to_interface') + +define(define_binary_operation, +` .align 2 +define_c_label(asm_generic_$1) + popl %edx + popl %ebx + movl %edx,%eax + movl %ebx,%ecx + shrl IMMEDIATE(DATUM_LENGTH),%eax + shrl IMMEDIATE(DATUM_LENGTH),%ecx + cmpb IMMEDIATE(TC_FIXNUM),%al + je asm_generic_$1_fix + cmpb IMMEDIATE(TC_FLONUM),%al + jne asm_generic_$1_fail + cmpb IMMEDIATE(TC_FLONUM),%cl + je asm_generic_$1_flo_flo + cmpb IMMEDIATE(TC_FIXNUM),%cl + jne asm_generic_$1_fail + shll IMMEDIATE(TC_LENGTH),%ebx + andl rmask,%edx + sarl IMMEDIATE(TC_LENGTH),%ebx + fldl 4(%edx) # fldd + movl %ebx,(rfree) + $5 (rfree) # fisubl + jmp asm_generic_flonum_result + +asm_generic_$1_fix: + cmpb IMMEDIATE(TC_FLONUM),%cl + je asm_generic_$1_fix_flo + cmpb IMMEDIATE(TC_FIXNUM),%cl + jne asm_generic_$1_fail + movl %edx,%eax + movl %ebx,%ecx + shll IMMEDIATE(TC_LENGTH),%eax + shll IMMEDIATE(TC_LENGTH),%ecx + $3 %ecx,%eax # subl + jno asm_generic_fixnum_result + +asm_generic_$1_fail: + pushl %ebx + pushl %edx + movb IMMEDIATE(HEX($2)),%al + jmp scheme_to_interface + +asm_generic_$1_flo_flo: + andl rmask,%edx + andl rmask,%ebx + fldl 4(%edx) # fldd + $6 4(%ebx) # fsubl + jmp asm_generic_flonum_result + +asm_generic_$1_fix_flo: + shll IMMEDIATE(TC_LENGTH),%edx + andl rmask,%ebx + sarl IMMEDIATE(TC_LENGTH),%edx + fldl 4(%ebx) # fldd + movl %edx,(rfree) + $4 (rfree) # fisubrl + jmp asm_generic_flonum_result') + + .align 2 +define_c_label(asm_generic_divide) + popl %edx + popl %ebx + movl %edx,%eax + movl %ebx,%ecx + shrl IMMEDIATE(DATUM_LENGTH),%eax + shrl IMMEDIATE(DATUM_LENGTH),%ecx + cmpb IMMEDIATE(TC_FIXNUM),%al + je asm_generic_divide_fix + cmpb IMMEDIATE(TC_FLONUM),%al + jne asm_generic_divide_fail + cmpb IMMEDIATE(TC_FLONUM),%cl + je asm_generic_divide_flo_flo + cmpb IMMEDIATE(TC_FIXNUM),%cl + jne asm_generic_divide_fail + movl %ebx,%ecx + shll IMMEDIATE(TC_LENGTH),%ecx + je asm_generic_divide_fail + andl rmask,%edx + sarl IMMEDIATE(TC_LENGTH),%ecx + fldl 4(%edx) # fldd + movl %ecx,(rfree) + fidivl (rfree) + jmp asm_generic_flonum_result + +asm_generic_divide_fix: + cmpb IMMEDIATE(TC_FLONUM),%cl + jne asm_generic_divide_fail + movl %edx,%ecx + shll IMMEDIATE(TC_LENGTH),%ecx + je asm_generic_divide_fail + andl rmask,%ebx + sarl IMMEDIATE(TC_LENGTH),%ecx + fldl 4(%ebx) # fldd + movl %ecx,(rfree) + fidivrl (rfree) + jmp asm_generic_flonum_result + +asm_generic_divide_flo_flo: + movl %ebx,%ecx + andl rmask,%ecx + fldl 4(%ecx) # fldd + ftst + fstsw %ax + sahf + je asm_generic_divide_by_zero + andl rmask,%edx + fdivrl 4(%edx) + jmp asm_generic_flonum_result + +asm_generic_divide_by_zero: + fstp %st (0) # Pop second arg + +asm_generic_divide_fail: + pushl %ebx + pushl %edx + movb IMMEDIATE(HEX(23)),%al + jmp scheme_to_interface + +define(define_binary_predicate, +` .align 2 +define_c_label(asm_generic_$1) + popl %edx + popl %ebx + movl %edx,%eax + movl %ebx,%ecx + shrl IMMEDIATE(DATUM_LENGTH),%eax + shrl IMMEDIATE(DATUM_LENGTH),%ecx + cmpb IMMEDIATE(TC_FIXNUM),%al + je asm_generic_$1_fix + cmpb IMMEDIATE(TC_FLONUM),%al + jne asm_generic_$1_fail + cmpb IMMEDIATE(TC_FLONUM),%cl + je asm_generic_$1_flo_flo + cmpb IMMEDIATE(TC_FIXNUM),%cl + jne asm_generic_$1_fail + shll IMMEDIATE(TC_LENGTH),%ebx + andl rmask,%edx + sarl IMMEDIATE(TC_LENGTH),%ebx + fldl 4(%edx) # fldd + movl %ebx,(rfree) + ficompl (rfree) + fstsw %ax + sahf + $5 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fix: + cmpb IMMEDIATE(TC_FLONUM),%cl + je asm_generic_$1_fix_flo + cmpb IMMEDIATE(TC_FIXNUM),%cl + jne asm_generic_$1_fail + shll IMMEDIATE(TC_LENGTH),%edx + shll IMMEDIATE(TC_LENGTH),%ebx + cmpl %ebx,%edx + $3 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_flo_flo: + andl rmask,%edx + andl rmask,%ebx + fldl 4(%edx) # fldd + fcompl 4(%ebx) + fstsw %ax + sahf + $6 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fix_flo: + shll IMMEDIATE(TC_LENGTH),%edx + andl rmask,%ebx + sarl IMMEDIATE(TC_LENGTH),%edx + movl %edx,(rfree) + fildl (rfree) + fcompl 4(%ebx) + fstsw %ax + sahf + $4 asm_generic_return_sharp_t + jmp asm_generic_return_sharp_f + +asm_generic_$1_fail: + pushl %ebx + pushl %edx + movb IMMEDIATE(HEX($2)),%al + jmp scheme_to_interface') + +# define_jump_indirection(generic_decrement,22) +# define_jump_indirection(generic_divide,23) +# define_jump_indirection(generic_equal,24) +# define_jump_indirection(generic_greater,25) +# define_jump_indirection(generic_increment,26) +# define_jump_indirection(generic_less,27) +# define_jump_indirection(generic_subtract,28) +# define_jump_indirection(generic_multiply,29) +# define_jump_indirection(generic_negative,2a) +# define_jump_indirection(generic_add,2b) +# define_jump_indirection(generic_positive,2c) +# define_jump_indirection(generic_zero,2d) +define_jump_indirection(generic_quotient,37) +define_jump_indirection(generic_remainder,38) +define_jump_indirection(generic_modulo,39) + +define_unary_operation(decrement,22,subl,fsubrl) +define_unary_operation(increment,26,addl,faddl) + +define_unary_predicate(negative,2a,jl,jb) +define_unary_predicate(positive,2c,jg,ja) +define_unary_predicate(zero,2d,je,je) + +# define_binary_operation(name,index,fix*fix,fix*flo,flo*fix,flo*flo) +# define_binary_operation( $1, $2, $3, $4, $5, $6) +define_binary_operation(add,2b,addl,fiaddl,fiaddl,faddl) +define_binary_operation(subtract,28,subl,fisubrl,fisubl,fsubl) +define_binary_operation(multiply,29,imull,fimull,fimull,fmull) +# Divide needs to check for 0, so we can't really use the following +# define_binary_operation(divide,23,NONE,fidivrl,fidivl,fdivl) + +# define_binary_predicate(name,index,fix*fix,fix*flo,flo*fix,flo*flo) +define_binary_predicate(equal,24,je,je,je,je) +define_binary_predicate(greater,25,jg,ja,ja,ja) +define_binary_predicate(less,27,jl,jb,jb,jb) -- 2.25.1