From 5b40758ac6a4afa0f60d86bbfe160352db5cda65 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sat, 22 Apr 2017 22:57:42 -0700 Subject: [PATCH] Change symbol->string and symbol-name to return immutable strings. --- src/runtime/bytevector.scm | 5 +-- src/runtime/runtime.pkg | 11 +++++-- src/runtime/symbol.scm | 65 +++++++++++++++++--------------------- src/runtime/ustring.scm | 25 +++++++++++++++ 4 files changed, 63 insertions(+), 43 deletions(-) diff --git a/src/runtime/bytevector.scm b/src/runtime/bytevector.scm index dd34c0719..4cbecc02c 100644 --- a/src/runtime/bytevector.scm +++ b/src/runtime/bytevector.scm @@ -63,10 +63,7 @@ USA. string (begin (guarantee legacy-string? string 'legacy-string->bytevector) - (%legacy-string->bytevector string)))) - -(define-integrable (%legacy-string->bytevector string) - (object-new-type (ucode-type bytevector) string)) + (object-new-type (ucode-type bytevector) string)))) (define (bytevector-append . bytevectors) (let* ((k diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 27bf50883..0d30485dc 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -1056,7 +1056,14 @@ USA. substring? vector->string) (export (runtime predicate-metadata) - register-ustring-predicates!)) + register-ustring-predicates!) + (export (runtime symbol) + %ascii-ustring! + %ascii-ustring-allocate + %ustring1? + cp1-ref + cp1-set! + ustring-length)) (define-package (runtime bytevector) (files "bytevector") @@ -1107,8 +1114,6 @@ USA. vector->bytevector) (export (runtime predicate-metadata) register-mit-bytevector-predicates!) - (export (runtime symbol) - %legacy-string->bytevector) (export (runtime ucd-tables) vector->bytevector-u16be)) diff --git a/src/runtime/symbol.scm b/src/runtime/symbol.scm index 9cd84c9f3..34e20396c 100644 --- a/src/runtime/symbol.scm +++ b/src/runtime/symbol.scm @@ -55,11 +55,12 @@ USA. (define (symbol->string symbol) (if (not (symbol? symbol)) (error:not-a symbol? symbol 'symbol->string)) - (let ((s (system-pair-car symbol))) - (cond ((maybe-ascii s)) - ((bytevector? s) (utf8->string s)) - ((legacy-string? s) (utf8->string (%legacy-string->bytevector s))) - (else (error "Illegal symbol name:" s))))) + (symbol-name symbol)) + +(define (symbol-name symbol) + (let ((bytes (->bytes (system-pair-car symbol)))) + (or (maybe-ascii bytes) + (utf8->string bytes)))) (define (symbol . objects) (string->symbol (string* objects))) @@ -70,11 +71,6 @@ USA. (define (intern-soft string) ((ucode-primitive find-symbol) (foldcase->utf8 string))) -(define (symbol-name symbol) - (let ((bytes (system-pair-car symbol))) - (or (maybe-ascii bytes) - (utf8->string bytes)))) - (define (symbol-hash symbol #!optional modulus) (string-hash (symbol-name symbol) modulus)) @@ -84,49 +80,46 @@ USA. (define (symbol>? x y) (stringbytes maybe-string) + (object-new-type (ucode-type bytevector) maybe-string)) (define (maybe-ascii bytes) ;; Needed during cold load. - (let ((string (object-new-type (ucode-type string) bytes))) - (and (ascii-string? string) - string))) + (let ((string (object-new-type (ucode-type unicode-string) bytes))) + (and (ustring-ascii? string) + (begin + (%ascii-ustring! string) + string)))) (define (foldcase->utf8 string) - (if (ascii-string? string) + (if (and (%ustring1? string) + (ustring-ascii? string)) ;; Needed during cold load. - (%legacy-string->bytevector (ascii-string-foldcase string)) + (->bytes (ascii-string-foldcase string)) (string->utf8 (string-foldcase string)))) -(define (ascii-string? string) - (and (legacy-string? string) - (let ((end (legacy-string-length string))) - (let loop ((i 0)) - (if (fix:< i end) - (and (fix:< (vector-8b-ref string i) #x80) - (loop (fix:+ i 1))) - #t))))) +(define (ustring-ascii? string) + (let ((end (ustring-length string))) + (let loop ((i 0)) + (if (fix:< i end) + (and (fix:< (cp1-ref string i) #x80) + (loop (fix:+ i 1))) + #t)))) (define (ascii-string-foldcase string) - (let ((end (legacy-string-length string))) + (let ((end (ustring-length string))) (if (let loop ((i 0)) (if (fix:< i end) - (and (not (ascii-changes-when-case-folded? - (vector-8b-ref string i))) + (and (not (ascii-changes-when-case-folded? (cp1-ref string i))) (loop (fix:+ i 1))) #t)) string - (let ((string* (legacy-string-allocate end))) + (let ((string* (%ascii-ustring-allocate end))) (do ((i 0 (fix:+ i 1))) ((fix:= i end)) - (vector-8b-set! string* - i - (ascii-foldcase (vector-8b-ref string i)))) + (cp1-set! string* + i + (ascii-foldcase (cp1-ref string i)))) string*)))) (define (ascii-changes-when-case-folded? code) diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index b84c3a5ab..b468023ee 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -126,6 +126,11 @@ USA. (define (%ustring-cp-size string) (fix:and #x03 (%ustring-flags string))) +(define (%set-ustring-cp-size! string cp-size) + (%set-ustring-flags! string + (fix:or (fix:andc (%ustring-flags string) #x03) + cp-size))) + (define (%ustring-mutable? string) (fix:= 0 (%ustring-cp-size string))) @@ -219,6 +224,26 @@ USA. (else (%ustring-allocate (fix:* 3 n) n 3)))) +;;; Used during cold load. +(define (%ustring1? object) + (or (and (ustring? object) + (fix:= 1 (%ustring-cp-size object))) + (legacy-string? object))) + +;;; Used during cold load. +(define (%ascii-ustring! string) + (%set-ustring-cp-size! string 1) + (ustring-in-nfc! string) + (ustring-in-nfd! string)) + +;;; Used during cold load. +(define (%ascii-ustring-allocate n) + (let ((s (%ustring-allocate (fix:+ n 1) n 1))) + (ustring-in-nfc! s) + (ustring-in-nfd! s) + (ustring1-set! s n #\null) ;zero-terminate for C + s)) + (define (ustring-ref string index) (case (ustring-cp-size string) ((1) (ustring1-ref string index)) -- 2.25.1