Add short-circuit handlers for common arithmetic.
authorGuillermo J. Rozas <edu/mit/csail/zurich/gjr>
Fri, 28 Feb 1992 20:19:58 +0000 (20:19 +0000)
committerGuillermo J. Rozas <edu/mit/csail/zurich/gjr>
Fri, 28 Feb 1992 20:19:58 +0000 (20:19 +0000)
v7/src/microcode/cmpauxmd/i386.m4

index fce7152db38f229cd3fd0f6408b6c6bcb96c4285..9b292d6afb37237218eee79c5d1fe5205d94768c 100644 (file)
@@ -1,6 +1,6 @@
 ### -*-Midas-*-
 ###
-###    $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/cmpauxmd/i386.m4,v 1.10 1992/02/19 19:00:06 jinx Exp $
+###    $Header: /Users/cph/tmp/foo/mit-scheme/mit-scheme/v7/src/microcode/cmpauxmd/i386.m4,v 1.11 1992/02/28 20:19:58 jinx Exp $
 ###
 ###    Copyright (c) 1992 Massachusetts Institute of Technology
 ###
@@ -111,10 +111,21 @@ define(define_debugging_label,
 $1:')
 
 define(HEX, `0x$1')
-define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 8))
-define(ADDRESS_MASK, eval(((2 ** (32 - TC_LENGTH)) - 1)))
 define(IMMEDIATE, `$$1')
 
+define(TC_LENGTH, ifdef(`TYPE_CODE_LENGTH', TYPE_CODE_LENGTH, 8))
+define(DATUM_LENGTH, eval(32 - TC_LENGTH))
+define(DATUM_SHIFT, eval((2 ** DATUM_LENGTH)))
+define(ADDRESS_MASK, eval((DATUM_SHIFT - 1)))
+define(TAG, ($2 + ($1 * DATUM_SHIFT)))
+
+define(TC_FALSE,0)
+define(TC_FLONUM,6)
+define(TC_TRUE,8)
+define(TC_FIXNUM,26)
+define(TC_MANIFEST_NM_VECTOR,39)
+define(TC_COMPILED_ENTRY,40)
+
 define(REGBLOCK_VAL,8)
 define(REGBLOCK_COMPILER_TEMP,16)
 define(REGBLOCK_LEXPR_ACTUALS,28)
@@ -124,6 +135,17 @@ define(REGBLOCK_CLOSURE_FREE,36)
 define(REGBLOCK_DLINK,REGBLOCK_COMPILER_TEMP)
 define(REGBLOCK_UTILITY_ARG4,REGBLOCK_CLOSURE_FREE)
 
+define(COMPILER_REGBLOCK_N_FIXED,16)
+define(COMPILER_REGBLOCK_N_HOOKS,80)
+define(COMPILER_REGBLOCK_N_TEMPS,256)
+define(COMPILER_FIXED_SIZE,1)
+define(COMPILER_HOOK_SIZE,1)
+define(COMPILER_TEMP_SIZE,3)
+define(REGBLOCK_SIZE_IN_OBJECTS,
+       eval((COMPILER_REGBLOCK_N_FIXED*COMPILER_FIXED_SIZE)
+           +(COMPILER_REGBLOCK_N_HOOKS*COMPILER_HOOK_SIZE)
+           +(COMPILER_REGBLOCK_N_TEMPS*COMPILER_TEMP_SIZE)))
+
 define(regs,%esi)
 define(rfree,%edi)
 define(rmask,%ebp)
@@ -134,14 +156,23 @@ use_external(Ext_Stack_Pointer)
 
        .file   "cmpaux-i386.s"
 
+.data
+       .align 2
+
 .globl C_Stack_Pointer
 .comm C_Stack_Pointer,4
 
 .globl C_Frame_Pointer
 .comm C_Frame_Pointer,4
+
+define_debugging_label(Regstart)
+       .space  128
+define_c_label(Registers)
+       .space  eval(REGBLOCK_SIZE_IN_OBJECTS*4)
 \f
 .text
        .align 2
+
 define_c_label(interface_initialize)
        pushl   %ebp
        movl    %esp,%ebp
@@ -210,6 +241,13 @@ define_c_label(interface_to_scheme)
        jmp     *%edx
 
 define_c_label(interface_to_C)
+       ffree   %st (0)                                 # Free floating "regs"
+       ffree   %st (1)
+       ffree   %st (2)
+       ffree   %st (3)
+       ffree   %st (4)
+       ffree   %st (5)
+       ffree   %st (6)
        movl    %edx,%eax                               # Set up result
        popl    %ebx                                    # Restore callee-saves
        popl    %esi                                    #  registers
@@ -217,14 +255,19 @@ define_c_label(interface_to_C)
        leave
        ret
 \f
+###    Assembly language hooks used to reduce code size.
+###    There is no time advantage to using these over using
+###    scheme_to_interface (or scheme_to_interface_call), but the
+###    code generated by the compiler can be somewhat smaller.
+
 define(define_jump_indirection,
 `define_c_label(asm_$1)
-       movl    IMMEDIATE(HEX($2)),%eax
+       movb    IMMEDIATE(HEX($2)),%al
        jmp     scheme_to_interface')
        
 define(define_call_indirection,
 `define_c_label(asm_$1)
-       movl    IMMEDIATE(HEX($2)),%eax
+       movb    IMMEDIATE(HEX($2)),%al
        jmp     scheme_to_interface_call')
        
 define_call_indirection(interrupt_procedure,1a)
@@ -233,5 +276,402 @@ define_jump_indirection(interrupt_closure,18)
 
 define_c_label(asm_interrupt_dlink)
        movl    REGBLOCK_DLINK()(regs),%edx
-       movl    IMMEDIATE(HEX(19)),%eax
+       movb    IMMEDIATE(HEX(19)),%al
        jmp     scheme_to_interface_call
+
+###
+###    This sames even more instructions than primitive_apply
+###    When the PC is not available.  Instead of jumping here,
+###    a call instruction is used, and the longword offset to
+###    the primitive object follows the call instruction.
+###    This code loads the primitive object and merges with
+###    apply_primitive
+###
+
+       .align  2
+define_c_label(asm_short_primitive_apply)
+       popl    %edx                                    # offset pointer
+       movl    (%edx),%ecx                             # offset
+       movl    (%edx,%ecx),%ecx                        # Primitive object
+       jmp     external_reference(asm_primitive_apply) # Merge
+
+       .align  2
+define_jump_indirection(primitive_apply,12)
+
+define_jump_indirection(primitive_lexpr_apply,13)
+define_jump_indirection(error,15)
+define_call_indirection(link,17)
+define_call_indirection(assignment_trap,1d)
+define_call_indirection(reference_trap,1f)
+define_call_indirection(safe_reference_trap,20)
+define_call_indirection(primitive_error,36)
+\f
+###    Assembly language hooks used to increase speed.
+
+# define_jump_indirection(shortcircuit_apply,14)
+# 
+# define(define_apply_fixed_size,
+# `define_c_label(asm_shortcircuit_apply_size_$1)
+#      movl    IMMEDIATE($1),%edx
+#      movb    IMMEDIATE(HEX(14)),%eax
+#      jmp     scheme_to_interface')
+
+       .align  2
+define_c_label(asm_shortcircuit_apply)
+       movl    %ecx,%eax                               # Copy for type code
+       movl    %ecx,%ebx                               # Copy for address
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax            # Select type code
+       andl    rmask,%ebx                              # Select datum
+       cmpb    IMMEDIATE(TC_COMPILED_ENTRY),%al
+       jne     asm_shortcircuit_apply_generic
+       movsbl  -4(%ebx),%eax                           # Extract frame size
+       cmpl    %eax,%edx                               # Compare to nargs+1
+       jne     asm_shortcircuit_apply_generic
+       jmp     *%ebx                                   # Invoke
+
+define_debugging_label(asm_shortcircuit_apply_generic)
+       movl    IMMEDIATE(HEX(14)),%eax
+       jmp     scheme_to_interface     
+
+define(define_apply_fixed_size,
+`      .align  2
+define_c_label(asm_shortcircuit_apply_size_$1)
+       movl    %ecx,%eax                               # Copy for type code
+       movl    %ecx,%ebx                               # Copy for address
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax            # Select type code
+       andl    rmask,%ebx                              # Select datum
+       cmpb    IMMEDIATE(TC_COMPILED_ENTRY),%al
+       jne     asm_shortcircuit_apply_generic_$1
+       cmpb    IMMEDIATE($1),-4(%ebx)                  # Compare frame size
+       jne     asm_shortcircuit_apply_generic_$1       # to nargs+1
+       jmp     *%ebx
+
+asm_shortcircuit_apply_generic_$1:
+       movl    IMMEDIATE($1),%edx
+       movb    IMMEDIATE(HEX(14)),%eax
+       jmp     scheme_to_interface')
+
+define_apply_fixed_size(1)
+define_apply_fixed_size(2)
+define_apply_fixed_size(3)
+define_apply_fixed_size(4)
+define_apply_fixed_size(5)
+define_apply_fixed_size(6)
+define_apply_fixed_size(7)
+define_apply_fixed_size(8)
+\f
+###    The following code is used by generic arithmetic
+###    whether the fixnum case is open-coded in line or not.
+###    This takes care of fixnums and flonums so that the common
+###    numeric types are much faster than the rare ones
+###    (bignums, ratnums, recnums)
+
+       .align  2
+asm_generic_flonum_result:
+       movl    IMMEDIATE(eval(TAG(TC_MANIFEST_NM_VECTOR,2))),(rfree)
+       movl    rfree,%eax
+       fstpl   4(rfree)                                # fstpd
+       orl     IMMEDIATE(eval(TAG(TC_FLONUM,0))),%eax
+       andl    rmask,(%esp)
+       addl    IMMEDIATE(12),rfree
+       movl    %eax,REGBLOCK_VAL()(regs)
+       ret
+
+       .align  2
+asm_generic_fixnum_result:
+       andl    rmask,(%esp)
+       orb     IMMEDIATE(TC_FIXNUM),%al
+       rorl    IMMEDIATE(TC_LENGTH),%eax
+       movl    %eax,REGBLOCK_VAL()(regs)
+       ret
+
+       .align  2
+asm_generic_return_sharp_t:
+       andl    rmask,(%esp)
+       movl    IMMEDIATE(eval(TAG(TC_TRUE,0))),REGBLOCK_VAL()(regs)
+       ret
+
+       .align  2
+asm_generic_return_sharp_f:
+       andl    rmask,(%esp)
+       movl    IMMEDIATE(eval(TAG(TC_FALSE,0))),REGBLOCK_VAL()(regs)
+       ret
+\f
+define(define_unary_operation,
+`      .align  2
+define_c_label(asm_generic_$1)
+       popl    %edx
+       movl    %edx,%eax
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax
+       cmpb    IMMEDIATE(TC_FIXNUM),%al
+       je      asm_generic_$1_fix
+       cmpb    IMMEDIATE(TC_FLONUM),%al
+       jne     asm_generic_$1_fail
+       andl    rmask,%edx
+       fld1
+       $4      4(%edx)
+       jmp     asm_generic_flonum_result
+
+asm_generic_$1_fix:
+       movl    %edx,%eax
+       shll    IMMEDIATE(TC_LENGTH),%eax
+       $3      IMMEDIATE(eval(2 ** TC_LENGTH)),%eax
+       jno     asm_generic_fixnum_result
+
+asm_generic_$1_fail:
+       pushl   %edx
+       movb    IMMEDIATE(HEX($2)),%al
+       jmp     scheme_to_interface')
+
+define(define_unary_predicate,
+`      .align  2
+define_c_label(asm_generic_$1)
+       popl    %edx
+       movl    %edx,%eax
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax
+       cmpb    IMMEDIATE(TC_FIXNUM),%al
+       je      asm_generic_$1_fix
+       cmpb    IMMEDIATE(TC_FLONUM),%al
+       jne     asm_generic_$1_fail
+       andl    rmask,%edx
+       fldl    4(%edx)
+       ftst
+       fstsw   %ax
+       fstp    %st (0)
+       sahf
+       $4      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_fix:
+       movl    %edx,%eax
+       shll    IMMEDIATE(TC_LENGTH),%eax
+       cmpl    IMMEDIATE(0),%eax
+       $3      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+       pushl   %edx
+       movb    IMMEDIATE(HEX($2)),%al
+       jmp     scheme_to_interface')
+\f
+define(define_binary_operation,
+`      .align  2
+define_c_label(asm_generic_$1)
+       popl    %edx
+       popl    %ebx
+       movl    %edx,%eax
+       movl    %ebx,%ecx
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax
+       shrl    IMMEDIATE(DATUM_LENGTH),%ecx
+       cmpb    IMMEDIATE(TC_FIXNUM),%al
+       je      asm_generic_$1_fix
+       cmpb    IMMEDIATE(TC_FLONUM),%al
+       jne     asm_generic_$1_fail
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       je      asm_generic_$1_flo_flo
+       cmpb    IMMEDIATE(TC_FIXNUM),%cl
+       jne     asm_generic_$1_fail
+       shll    IMMEDIATE(TC_LENGTH),%ebx
+       andl    rmask,%edx
+       sarl    IMMEDIATE(TC_LENGTH),%ebx
+       fldl    4(%edx)                                 # fldd
+       movl    %ebx,(rfree)
+       $5      (rfree)                                 # fisubl
+       jmp     asm_generic_flonum_result
+
+asm_generic_$1_fix:
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       je      asm_generic_$1_fix_flo
+       cmpb    IMMEDIATE(TC_FIXNUM),%cl
+       jne     asm_generic_$1_fail
+       movl    %edx,%eax
+       movl    %ebx,%ecx
+       shll    IMMEDIATE(TC_LENGTH),%eax
+       shll    IMMEDIATE(TC_LENGTH),%ecx
+       $3      %ecx,%eax                               # subl
+       jno     asm_generic_fixnum_result
+
+asm_generic_$1_fail:
+       pushl   %ebx
+       pushl   %edx
+       movb    IMMEDIATE(HEX($2)),%al
+       jmp     scheme_to_interface
+
+asm_generic_$1_flo_flo:
+       andl    rmask,%edx
+       andl    rmask,%ebx
+       fldl    4(%edx)                                 # fldd
+       $6      4(%ebx)                                 # fsubl
+       jmp     asm_generic_flonum_result       
+
+asm_generic_$1_fix_flo:
+       shll    IMMEDIATE(TC_LENGTH),%edx
+       andl    rmask,%ebx
+       sarl    IMMEDIATE(TC_LENGTH),%edx
+       fldl    4(%ebx)                                 # fldd
+       movl    %edx,(rfree)
+       $4      (rfree)                                 # fisubrl
+       jmp     asm_generic_flonum_result')
+\f
+       .align  2
+define_c_label(asm_generic_divide)
+       popl    %edx
+       popl    %ebx
+       movl    %edx,%eax
+       movl    %ebx,%ecx
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax
+       shrl    IMMEDIATE(DATUM_LENGTH),%ecx
+       cmpb    IMMEDIATE(TC_FIXNUM),%al
+       je      asm_generic_divide_fix
+       cmpb    IMMEDIATE(TC_FLONUM),%al
+       jne     asm_generic_divide_fail
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       je      asm_generic_divide_flo_flo
+       cmpb    IMMEDIATE(TC_FIXNUM),%cl
+       jne     asm_generic_divide_fail
+       movl    %ebx,%ecx
+       shll    IMMEDIATE(TC_LENGTH),%ecx
+       je      asm_generic_divide_fail
+       andl    rmask,%edx
+       sarl    IMMEDIATE(TC_LENGTH),%ecx
+       fldl    4(%edx)                                 # fldd
+       movl    %ecx,(rfree)
+       fidivl  (rfree)
+       jmp     asm_generic_flonum_result
+
+asm_generic_divide_fix:
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       jne     asm_generic_divide_fail
+       movl    %edx,%ecx
+       shll    IMMEDIATE(TC_LENGTH),%ecx
+       je      asm_generic_divide_fail
+       andl    rmask,%ebx
+       sarl    IMMEDIATE(TC_LENGTH),%ecx
+       fldl    4(%ebx)                                 # fldd
+       movl    %ecx,(rfree)
+       fidivrl (rfree)
+       jmp     asm_generic_flonum_result
+
+asm_generic_divide_flo_flo:
+       movl    %ebx,%ecx
+       andl    rmask,%ecx
+       fldl    4(%ecx)                                 # fldd
+       ftst
+       fstsw   %ax
+       sahf
+       je      asm_generic_divide_by_zero
+       andl    rmask,%edx
+       fdivrl  4(%edx)
+       jmp     asm_generic_flonum_result       
+
+asm_generic_divide_by_zero:
+       fstp    %st (0)                                 # Pop second arg
+
+asm_generic_divide_fail:
+       pushl   %ebx
+       pushl   %edx
+       movb    IMMEDIATE(HEX(23)),%al
+       jmp     scheme_to_interface
+\f
+define(define_binary_predicate,
+`      .align  2
+define_c_label(asm_generic_$1)
+       popl    %edx
+       popl    %ebx
+       movl    %edx,%eax
+       movl    %ebx,%ecx
+       shrl    IMMEDIATE(DATUM_LENGTH),%eax
+       shrl    IMMEDIATE(DATUM_LENGTH),%ecx
+       cmpb    IMMEDIATE(TC_FIXNUM),%al
+       je      asm_generic_$1_fix
+       cmpb    IMMEDIATE(TC_FLONUM),%al
+       jne     asm_generic_$1_fail
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       je      asm_generic_$1_flo_flo
+       cmpb    IMMEDIATE(TC_FIXNUM),%cl
+       jne     asm_generic_$1_fail
+       shll    IMMEDIATE(TC_LENGTH),%ebx
+       andl    rmask,%edx
+       sarl    IMMEDIATE(TC_LENGTH),%ebx
+       fldl    4(%edx)                                 # fldd
+       movl    %ebx,(rfree)
+       ficompl (rfree)
+       fstsw   %ax
+       sahf
+       $5      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_fix:
+       cmpb    IMMEDIATE(TC_FLONUM),%cl
+       je      asm_generic_$1_fix_flo
+       cmpb    IMMEDIATE(TC_FIXNUM),%cl
+       jne     asm_generic_$1_fail
+       shll    IMMEDIATE(TC_LENGTH),%edx
+       shll    IMMEDIATE(TC_LENGTH),%ebx
+       cmpl    %ebx,%edx
+       $3      asm_generic_return_sharp_t      
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_flo_flo:
+       andl    rmask,%edx
+       andl    rmask,%ebx
+       fldl    4(%edx)                                 # fldd
+       fcompl  4(%ebx)
+       fstsw   %ax
+       sahf
+       $6      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_fix_flo:
+       shll    IMMEDIATE(TC_LENGTH),%edx
+       andl    rmask,%ebx
+       sarl    IMMEDIATE(TC_LENGTH),%edx
+       movl    %edx,(rfree)
+       fildl   (rfree)
+       fcompl  4(%ebx)
+       fstsw   %ax
+       sahf
+       $4      asm_generic_return_sharp_t
+       jmp     asm_generic_return_sharp_f
+
+asm_generic_$1_fail:
+       pushl   %ebx
+       pushl   %edx
+       movb    IMMEDIATE(HEX($2)),%al
+       jmp     scheme_to_interface')
+\f
+# define_jump_indirection(generic_decrement,22)
+# define_jump_indirection(generic_divide,23)
+# define_jump_indirection(generic_equal,24)
+# define_jump_indirection(generic_greater,25)
+# define_jump_indirection(generic_increment,26)
+# define_jump_indirection(generic_less,27)
+# define_jump_indirection(generic_subtract,28)
+# define_jump_indirection(generic_multiply,29)
+# define_jump_indirection(generic_negative,2a)
+# define_jump_indirection(generic_add,2b)
+# define_jump_indirection(generic_positive,2c)
+# define_jump_indirection(generic_zero,2d)
+define_jump_indirection(generic_quotient,37)
+define_jump_indirection(generic_remainder,38)
+define_jump_indirection(generic_modulo,39)
+
+define_unary_operation(decrement,22,subl,fsubrl)
+define_unary_operation(increment,26,addl,faddl)
+
+define_unary_predicate(negative,2a,jl,jb)
+define_unary_predicate(positive,2c,jg,ja)
+define_unary_predicate(zero,2d,je,je)
+
+# define_binary_operation(name,index,fix*fix,fix*flo,flo*fix,flo*flo)
+# define_binary_operation(  $1,   $2,     $3,     $4,     $5,     $6)
+define_binary_operation(add,2b,addl,fiaddl,fiaddl,faddl)
+define_binary_operation(subtract,28,subl,fisubrl,fisubl,fsubl)
+define_binary_operation(multiply,29,imull,fimull,fimull,fmull)
+# Divide needs to check for 0, so we can't really use the following
+# define_binary_operation(divide,23,NONE,fidivrl,fidivl,fdivl)
+
+# define_binary_predicate(name,index,fix*fix,fix*flo,flo*fix,flo*flo)
+define_binary_predicate(equal,24,je,je,je,je)
+define_binary_predicate(greater,25,jg,ja,ja,ja)
+define_binary_predicate(less,27,jl,jb,jb,jb)