From 15cddda2ab341f1bb69f4af9afe344ea50fedd5d Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 14 Oct 2018 18:46:32 -0700 Subject: [PATCH] Fix bug where the result of char-upcase is multibyte. This was previously hidden because the compiler isn't doing range checking on the third argument to bytevector-u8-set!. --- src/runtime/rgxcmp.scm | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/runtime/rgxcmp.scm b/src/runtime/rgxcmp.scm index f4ffe02cc..158ecc696 100644 --- a/src/runtime/rgxcmp.scm +++ b/src/runtime/rgxcmp.scm @@ -189,7 +189,8 @@ USA. (let ((result (make-bytevector 2))) (bytevector-u8-set! result 0 re-code:exact-1) (bytevector-u8-set! result 1 - (char->integer (if case-fold? (char-upcase char) char))) + (char->integer + (if case-fold? (char-sort-of-upcase char) char))) (make-compiled-regexp result case-fold?))) (define re-compile-string @@ -201,7 +202,7 @@ USA. (get-byte (if case-fold? (lambda (i) - (char->integer (char-upcase (string-ref string i)))) + (char->integer (char-sort-of-upcase (string-ref string i)))) (lambda (i) (char->integer (string-ref string i)))))) (let ((copy! @@ -267,6 +268,13 @@ USA. (byte-stream #f read-only #t) (translation-table #f read-only #t)) +;; Needed so that we stay within ISO 8859-1. +(define (char-sort-of-upcase char) + (let ((c (char-upcase char))) + (if (fix:>= (char->integer c) #x100) + char + c))) + (define re-translation-table (let ((normal-table (make-bytevector #x100)) (upcase-table (make-bytevector #x100))) @@ -274,7 +282,8 @@ USA. ((not (fix:< i #x100))) (bytevector-u8-set! normal-table i i) (bytevector-u8-set! upcase-table i - (char->integer (char-upcase (integer->char i))))) + (char->integer + (char-sort-of-upcase (integer->char i))))) (lambda (case-fold?) (if case-fold? upcase-table normal-table)))) -- 2.25.1