From e14354dc5b3bb648227816db94e83e85fabe820c Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Tue, 7 Feb 2017 22:29:17 -0800 Subject: [PATCH] Add value conversions to the UCD property code generator. This translates the string values into something more sensible for Scheme. --- src/etc/ucd-converter.scm | 110 ++++++++++++++++++++++++++++++++++- src/runtime/ucd-table-gc.scm | 58 +++++++++--------- 2 files changed, 136 insertions(+), 32 deletions(-) diff --git a/src/etc/ucd-converter.scm b/src/etc/ucd-converter.scm index 2a238b7d3..30d83cef7 100644 --- a/src/etc/ucd-converter.scm +++ b/src/etc/ucd-converter.scm @@ -415,12 +415,13 @@ USA. (let ((maker (entries-maker)) (entry-count 0) (unique-entry-count 0) - (byte-count 0)) + (byte-count 0) + (convert-value (value-converter prop-name))) (define (make-value-code value) (lambda (offsets-name sv-name table-name) offsets-name - (values #f #f `(,sv-name ,table-name ,value)))) + (values #f #f `(,sv-name ,table-name ,(convert-value value))))) (define (make-node-code n-bits offset indexes) (receive (bytes-per-entry offsets-expr coder) @@ -750,4 +751,107 @@ USA. (values (cons (make-cpr (cpr-start cpr) cp) value) (cons (make-cpr cp (cpr-end cpr)) value)) (values (cons cpr value) - #f))) \ No newline at end of file + #f))) + +;;;; Value conversions + +;;; Converted values must be constant expressions, so quoting may be needed. + +(define (value-converter prop-name) + (cond ((string=? prop-name "CI") converter:boolean) + ((string=? prop-name "CWCF") converter:boolean) + ((string=? prop-name "CWCM") converter:boolean) + ((string=? prop-name "CWKCF") converter:boolean) + ((string=? prop-name "CWL") converter:boolean) + ((string=? prop-name "CWT") converter:boolean) + ((string=? prop-name "CWU") converter:boolean) + ((string=? prop-name "Cased") converter:boolean) + ((string=? prop-name "Lower") converter:boolean) + ((string=? prop-name "NFKC_CF") converter:zero-or-more-code-points) + ((string=? prop-name "OLower") converter:boolean) + ((string=? prop-name "OUpper") converter:boolean) + ((string=? prop-name "Upper") converter:boolean) + ((string=? prop-name "cf") converter:one-or-more-code-points) + ((string=? prop-name "gc") converter:category) + ((string=? prop-name "lc") converter:one-or-more-code-points) + ((string=? prop-name "scf") converter:single-code-point) + ((string=? prop-name "slc") converter:single-code-point) + ((string=? prop-name "stc") converter:single-code-point) + ((string=? prop-name "suc") converter:single-code-point) + ((string=? prop-name "tc") converter:one-or-more-code-points) + ((string=? prop-name "uc") converter:one-or-more-code-points) + (else (error "Unsupported property:" prop-name)))) + +(define (converter:boolean value) + (cond ((not value) (default-object)) + ((string=? value "N") #f) + ((string=? value "Y") #t) + (else (error "Illegal boolean value:" value)))) + +(define (converter:category value) + (cond ((not value) value) + ((string=? value "Lu") ''letter:uppercase) + ((string=? value "Ll") ''letter:lowercase) + ((string=? value "Lt") ''letter:titlecase) + ((string=? value "Lm") ''letter:modifier) + ((string=? value "Lo") ''letter:other) + ((string=? value "Mn") ''mark:nonspacing) + ((string=? value "Mc") ''mark:spacing-combining) + ((string=? value "Me") ''mark:enclosing) + ((string=? value "Nd") ''number:decimal-digit) + ((string=? value "Nl") ''number:letter) + ((string=? value "No") ''number:other) + ((string=? value "Pc") ''punctuation:connector) + ((string=? value "Pd") ''punctuation:dash) + ((string=? value "Ps") ''punctuation:open) + ((string=? value "Pe") ''punctuation:close) + ((string=? value "Pi") ''punctuation:initial-quote) + ((string=? value "Pf") ''punctuation:final-quote) + ((string=? value "Po") ''punctuation:other) + ((string=? value "Sm") ''symbol:math) + ((string=? value "Sc") ''symbol:currency) + ((string=? value "Sk") ''symbol:modifier) + ((string=? value "So") ''symbol:other) + ((string=? value "Zs") ''separator:space) + ((string=? value "Zl") ''separator:line) + ((string=? value "Zp") ''separator:paragraph) + ((string=? value "Cc") ''other:control) + ((string=? value "Cf") ''other:format) + ((string=? value "Cs") ''other:surrogate) + ((string=? value "Co") ''other:private-use) + ((string=? value "Cn") ''other:not-assigned) + (else (error "Illegal category value:" value)))) + +(define (converter:single-code-point value) + (cond ((not value) (default-object)) + ((string=? value "#") #f) + ((string->number value 16) + => (lambda (cp) + (if (not (unicode-code-point? cp)) + (error "Illegal code-point value:" value)) + cp)) + (else (error "Illegal code-point value:" value)))) + +(define (converter:zero-or-more-code-points value) + (convert-code-points value #t)) + +(define (converter:one-or-more-code-points value) + (convert-code-points value #f)) + +(define (convert-code-points value zero-ok?) + (cond ((not value) (default-object)) + ((string=? value "#") #f) + ((string=? value "") + (if (not zero-ok?) + (error "At least one code point required:" value)) + ''()) + (else + `',(map (lambda (part) + (let ((cp (string->number part 16 #t))) + (if (not (unicode-code-point? cp)) + (error "Illegal code-points value:" value)) + cp)) + (code-points-splitter value))))) + +(define code-points-splitter + (string-splitter #\space #f)) \ No newline at end of file diff --git a/src/runtime/ucd-table-gc.scm b/src/runtime/ucd-table-gc.scm index 0e224d27e..8d7745149 100644 --- a/src/runtime/ucd-table-gc.scm +++ b/src/runtime/ucd-table-gc.scm @@ -26,7 +26,7 @@ USA. ;;;; UCD property: gc -;;; Generated from Unicode 9.0.0 UCD at 2017-02-07T20:26:42-08 +;;; Generated from Unicode 9.0.0 UCD at 2017-02-07T22:28:43-08 (declare (usual-integrations)) @@ -39,112 +39,112 @@ USA. (define (ucd-gc-entry-0 sv table) sv table - "Cc") + 'other:control) (define (ucd-gc-entry-1 sv table) sv table - "Zs") + 'separator:space) (define (ucd-gc-entry-2 sv table) sv table - "Po") + 'punctuation:other) (define (ucd-gc-entry-3 sv table) sv table - "Sc") + 'symbol:currency) (define (ucd-gc-entry-4 sv table) sv table - "Ps") + 'punctuation:open) (define (ucd-gc-entry-5 sv table) sv table - "Pe") + 'punctuation:close) (define (ucd-gc-entry-6 sv table) sv table - "Sm") + 'symbol:math) (define (ucd-gc-entry-7 sv table) sv table - "Pd") + 'punctuation:dash) (define (ucd-gc-entry-8 sv table) sv table - "Nd") + 'number:decimal-digit) (define (ucd-gc-entry-9 sv table) sv table - "Lu") + 'letter:uppercase) (define (ucd-gc-entry-10 sv table) sv table - "Sk") + 'symbol:modifier) (define (ucd-gc-entry-11 sv table) sv table - "Pc") + 'punctuation:connector) (define (ucd-gc-entry-12 sv table) sv table - "Ll") + 'letter:lowercase) (define (ucd-gc-entry-13 sv table) sv table - "So") + 'symbol:other) (define (ucd-gc-entry-14 sv table) sv table - "Lo") + 'letter:other) (define (ucd-gc-entry-15 sv table) sv table - "Pi") + 'punctuation:initial-quote) (define (ucd-gc-entry-16 sv table) sv table - "Cf") + 'other:format) (define (ucd-gc-entry-17 sv table) sv table - "No") + 'number:other) (define (ucd-gc-entry-18 sv table) sv table - "Pf") + 'punctuation:final-quote) (define (ucd-gc-entry-19 sv table) sv table - "Lt") + 'letter:titlecase) (define (ucd-gc-entry-20 sv table) sv table - "Lm") + 'letter:modifier) (define (ucd-gc-entry-21 sv table) sv table - "Mn") + 'mark:nonspacing) (define (ucd-gc-entry-22 sv table) sv @@ -154,32 +154,32 @@ USA. (define (ucd-gc-entry-23 sv table) sv table - "Me") + 'mark:enclosing) (define (ucd-gc-entry-24 sv table) sv table - "Mc") + 'mark:spacing-combining) (define (ucd-gc-entry-25 sv table) sv table - "Nl") + 'number:letter) (define (ucd-gc-entry-26 sv table) sv table - "Zl") + 'separator:line) (define (ucd-gc-entry-27 sv table) sv table - "Zp") + 'separator:paragraph) (define (ucd-gc-entry-28 sv table) sv table - "Co") + 'other:private-use) ;;; (1 2 2 2 3 2 2 2 4 5 2 6 2 7 2 2) (define-deferred ucd-gc-entry-29 -- 2.25.1