From 466bb04785c67f610a2fa7ef0b1c7f4bbb3a2e01 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Mon, 20 Feb 2017 22:04:55 -0800 Subject: [PATCH] Minimize inter-package deps around legacy strings. Also optimize handling of ascii for symbol names. --- src/runtime/bytevector.scm | 4 +- src/runtime/runtime.pkg | 8 +--- src/runtime/symbol.scm | 97 ++++++++++++++++++++++++++------------ src/runtime/ustring.scm | 15 ------ 4 files changed, 71 insertions(+), 53 deletions(-) diff --git a/src/runtime/bytevector.scm b/src/runtime/bytevector.scm index ced0b4ee4..4a984a857 100644 --- a/src/runtime/bytevector.scm +++ b/src/runtime/bytevector.scm @@ -45,7 +45,9 @@ USA. (bytevector-length 1) (bytevector-u8-ref 2) (bytevector-u8-set! 3) - (bytevector? 1)) + (bytevector? 1) + (legacy-string-allocate string-allocate 1) + (legacy-string? string? 1)) (define (make-bytevector k #!optional byte) (let ((bytevector (allocate-bytevector k))) diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index f326541e7..a92891bc3 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1173,16 +1173,10 @@ USA. string>? string? vector->string) - (export (runtime bytevector) - legacy-string-allocate - legacy-string? - ustring->legacy-string) (export (runtime predicate-metadata) register-ustring-predicates!) (export (runtime symbol) - %string* - legacy-string-downcase - legacy-string?)) + %string*)) (define-package (runtime bytevector) (files "bytevector") diff --git a/src/runtime/symbol.scm b/src/runtime/symbol.scm index f8aab276f..0cb6d10d7 100644 --- a/src/runtime/symbol.scm +++ b/src/runtime/symbol.scm @@ -54,52 +54,34 @@ USA. (define (symbol->string symbol) (guarantee symbol? symbol 'symbol->string) - (utf8->string - (let ((name (system-pair-car symbol))) - (cond ((bytevector? name) name) - ((legacy-string? name) (%legacy-string->bytevector name)) - (else (error "Illegal symbol name:" name)))))) + (let ((s (system-pair-car symbol))) + (cond ((maybe-ascii s)) + ((bytevector? s) (utf8->string s)) + ((legacy-string? s) (utf8->string (%legacy-string->bytevector s))) + (else (error "Illegal symbol name:" s))))) (define (string-head->symbol string end) - (string->symbol (string-copy string 0 end))) + (string->symbol (string-slice string 0 end))) (define (string-tail->symbol string start) - (string->symbol (string-copy string start))) + (string->symbol (string-slice string start))) (define (symbol . objects) (string->symbol (%string* objects 'symbol))) (define (intern string) - (string->symbol (cold-load-foldcase string))) + ((ucode-primitive string->symbol) (foldcase->utf8 string))) (define (intern-soft string) - ((ucode-primitive find-symbol) (string->utf8 (cold-load-foldcase string)))) - -(define (cold-load-foldcase string) - (if (ascii-string? string) - ;; Needed during cold load. - (legacy-string-downcase string) - (string-foldcase string))) + ((ucode-primitive find-symbol) (foldcase->utf8 string))) (define (symbol-name symbol) (if (not (symbol? symbol)) (error:not-a symbol? symbol 'symbol-name)) - (let* ((bytes (system-pair-car symbol)) - (string (object-new-type (ucode-type string) bytes))) - (if (ascii-string? string) - ;; Needed during cold load. - string + (let ((bytes (system-pair-car symbol))) + (or (maybe-ascii bytes) (utf8->string bytes)))) -(define (ascii-string? string) - (and ((ucode-primitive string?) string) - (let ((end ((ucode-primitive string-length) string))) - (let loop ((i 0)) - (if (fix:< i end) - (and (fix:< ((ucode-primitive vector-8b-ref) string i) #x80) - (loop (fix:+ i 1))) - #t))))) - (define (symbol-hash symbol #!optional modulus) (string-hash (symbol-name symbol) modulus)) @@ -107,4 +89,59 @@ USA. (string? x y) - (stringutf8 string) + (if (ascii-string? string) + ;; Needed during cold load. + (%legacy-string->bytevector (ascii-string-foldcase string)) + (string->utf8 (string-foldcase string)))) + +(define (ascii-string? string) + (and (legacy-string? string) + (let ((end (legacy-string-length string))) + (let loop ((i 0)) + (if (fix:< i end) + (and (fix:< (vector-8b-ref string i) #x80) + (loop (fix:+ i 1))) + #t))))) + +(define (ascii-string-foldcase string) + (let ((end (legacy-string-length string))) + (if (let loop ((i 0)) + (if (fix:< i end) + (and (not (ascii-changes-when-case-folded? + (vector-8b-ref string i))) + (loop (fix:+ i 1))) + #t)) + string + (let ((string* (legacy-string-allocate end))) + (do ((i 0 (fix:+ i 1))) + ((fix:= i end)) + (vector-8b-set! string* + i + (ascii-foldcase (vector-8b-ref string i)))) + string*)))) + +(define (ascii-changes-when-case-folded? code) + (and (fix:>= code (char->integer #\A)) + (fix:<= code (char->integer #\Z)))) + +(define (ascii-foldcase code) + (if (ascii-changes-when-case-folded? code) + (fix:+ (char->integer #\a) + (fix:- code (char->integer #\A))) + code)) \ No newline at end of file diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 08e4ebd17..4df0a2053 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -777,12 +777,6 @@ USA. (define (burst-string string delimiter allow-runs?) ((string-splitter delimiter allow-runs?) string)) -(define (ustring->legacy-string string) - (if (legacy-string? string) - string - (and (string-8-bit? string) - (string-copy string)))) - (define (string-8-bit? string) (receive (string start end) (translate-slice string 0 (string-length string)) (if (legacy-string? string) @@ -809,15 +803,6 @@ USA. (else (error:not-a string? string 'string-for-primitive)))) -(define (legacy-string-downcase string) - (let ((end (legacy-string-length string))) - (let ((string* (legacy-string-allocate end))) - (do ((i 0 (fix:+ i 1))) - ((fix:= i end)) - (legacy-string-set! string* i - (char-downcase (legacy-string-ref string i)))) - string*))) - (define-integrable (copy-loop to-set! to at from-ref from start end) (do ((i start (fix:+ i 1)) (j at (fix:+ j 1))) -- 2.25.1