From 3ecdecd3fe9e8050f75e7eecab1011896ea9c601 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 26 Jun 2011 19:39:09 +0000 Subject: [PATCH] Move interrupt branch from start to end of block, on i386. This makes it a forward branch rather than a reverse branch, so that the CPU's branch predictor will predict it not taken rather than taken by default. Reduces time to compile the whole system by about 20%. This performance increase will apply to any compiled code, independent of what sort of computation it does. --- src/compiler/machines/i386/rules3.scm | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/compiler/machines/i386/rules3.scm b/src/compiler/machines/i386/rules3.scm index a6da4e2da..c9da18f50 100644 --- a/src/compiler/machines/i386/rules3.scm +++ b/src/compiler/machines/i386/rules3.scm @@ -398,14 +398,23 @@ USA. (define (interrupt-check interrupt-label checks) ;; This always does interrupt checks in line. - (LAP ,@(if (or (memq 'INTERRUPT checks) (memq 'HEAP checks)) - (LAP (CMP W (R ,regnum:free-pointer) ,reg:compiled-memtop) - (JGE (@PCR ,interrupt-label))) - (LAP)) - ,@(if (memq 'STACK checks) - (LAP (CMP W (R ,regnum:stack-pointer) ,reg:stack-guard) - (JL (@PCR ,interrupt-label))) - (LAP)))) + (let ((branch-target (generate-label 'INTERRUPT))) + ;; Put the interrupt check branch target after the branch so that + ;; it is a forward branch, which Intel and AMD CPUs will predict + ;; not taken by default, in the absence of dynamic branch + ;; prediction profile data. + (add-end-of-block-code! + (lambda () + (LAP (LABEL ,branch-target) + (JMP (@PCR ,interrupt-label))))) + (LAP ,@(if (or (memq 'INTERRUPT checks) (memq 'HEAP checks)) + (LAP (CMP W (R ,regnum:free-pointer) ,reg:compiled-memtop) + (JGE (@PCR ,branch-target))) + (LAP)) + ,@(if (memq 'STACK checks) + (LAP (CMP W (R ,regnum:stack-pointer) ,reg:stack-guard) + (JL (@PCR ,branch-target))) + (LAP))))) (define (simple-procedure-header code-word label entry) (let ((checks (get-entry-interrupt-checks))) -- 2.25.1