From ab9322c0437ba7b35b62d10e221116259c9d7c93 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sat, 18 Feb 2017 01:14:09 -0800 Subject: [PATCH] Refactor the converter to separate the value mapping from the dispatcher. --- src/etc/ucd-converter.scm | 433 ++++++++++++++++++++--------------- src/runtime/ucd-table-gc.scm | 90 +++----- 2 files changed, 273 insertions(+), 250 deletions(-) diff --git a/src/etc/ucd-converter.scm b/src/etc/ucd-converter.scm index 8dc166644..d12deca16 100644 --- a/src/etc/ucd-converter.scm +++ b/src/etc/ucd-converter.scm @@ -455,8 +455,9 @@ USA. (define (metadata->code-generator metadata) (let ((name (metadata-name metadata)) (type-spec (metadata-type-spec metadata))) - (cond ((string=? name "NFC_QC") code-generator:qc) - ((string=? name "NFKC_QC") code-generator:qc) + (cond ((string=? name "NFC_QC") code-generator:nfc-qc) + ((string=? name "NFKC_QC") code-generator:nfc-qc) + ((string=? name "gc") code-generator:gc) ((string=? name "nt") code-generator:nt) ((eq? type-spec 'boolean) code-generator:boolean) ((eq? type-spec 'byte) code-generator:byte) @@ -464,7 +465,6 @@ USA. ((eq? type-spec 'code-point*) code-generator:code-point*) ((eq? type-spec 'code-point+) code-generator:code-point+) ((eq? type-spec 'rational-or-nan) code-generator:rational-or-nan) - ((mapped-enum-type? type-spec) code-generator:mapped-enum) (else (error "Unsupported metadata:" metadata))))) (define (code-generator:boolean prop-name metadata prop-alist proc-name) @@ -479,20 +479,129 @@ USA. (and (equal? "Y" (cdr value-map)) (car value-map))) prop-alist)))))) + +(define (code-generator:byte prop-name metadata prop-alist proc-name) + ((hashed-code-generator value-manager:byte) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:code-point prop-name metadata prop-alist proc-name) + ((hashed-code-generator value-manager:code-point) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:code-point* prop-name metadata prop-alist proc-name) + ((hashed-code-generator value-manager:code-points) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:code-point+ prop-name metadata prop-alist proc-name) + ((hashed-code-generator value-manager:code-points) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:gc prop-name metadata prop-alist proc-name) + ((trie-code-generator (mapped-enum-value-manager #f metadata) '(5 8 8)) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:nfc-qc prop-name metadata prop-alist proc-name) + ((hashed-code-generator (mapped-enum-value-manager "Y" metadata)) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:nt prop-name metadata prop-alist proc-name) + ((hashed-code-generator (mapped-enum-value-manager "None" metadata)) + prop-name metadata prop-alist proc-name)) + +(define (code-generator:rational-or-nan prop-name metadata prop-alist proc-name) + ((hashed-code-generator value-manager:rational-or-nan) + prop-name metadata prop-alist proc-name)) + +(define (value-manager default-string converter + #!optional runtime-default runtime-converter) + (make-value-manager default-string + converter + (if (default-object? runtime-default) + (let ((value + (and default-string + (converter default-string)))) + (lambda (char-expr) + char-expr + value)) + runtime-default) + (if (default-object? runtime-converter) + (lambda (sv-expr) sv-expr) + runtime-converter))) + +(define-record-type + (make-value-manager default-string + converter + runtime-default + runtime-converter) + value-manager? + (default-string value-manager-default-string) + (converter value-manager-converter) + (runtime-default value-manager-runtime-default) + (runtime-converter value-manager-runtime-converter)) + +(define (string->cp string) + (let ((cp (string->number string 16))) + (if (not (unicode-code-point? cp)) + (error "Illegal code-point value:" string)) + cp)) + +(define value-manager:code-point + (value-manager "#" + string->cp + (lambda (char-expr) char-expr) + (lambda (sv-expr) `(integer->char ,sv-expr)))) + +(define value-manager:code-points + (value-manager "#" + (let ((splitter (string-splitter #\space #f))) + (lambda (value) + (if (ustring=? "" value) + '() + (map string->cp (splitter value))))) + (lambda (char-expr) `(list ,char-expr)) + (lambda (svs-expr) `(map integer->char ,svs-expr)))) -(define (hashed-code-generator default-string value-converter - #!optional default-value runtime-value-converter) - (let ((default-value - (if (default-object? default-value) - (let ((value (value-converter default-string))) - (lambda (char-expr) - char-expr - value)) - default-value)) - (runtime-value-converter - (if (default-object? runtime-value-converter) - (lambda (sv-expr) sv-expr) - runtime-value-converter))) +(define value-manager:byte + (value-manager "0" + (lambda (string) + (let ((n (string->number string 10))) + (if (not (and (index-fixnum? n) (fix:<= n 254))) + (error "Illegal ccc value:" string)) + n)))) + +(define value-manager:rational-or-nan + (value-manager "NaN" + (lambda (string) + (if (string=? string "NaN") + #f + (let ((n (string->number string 10))) + (if (not (exact-rational? n)) + (error "Illegal rational value:" string)) + n))))) + +(define (converter:mapped-enum metadata) + (let ((name (symbol->string (metadata-full-name metadata))) + (translations + (mapped-enum-type-translations (metadata-type-spec metadata)))) + (lambda (value) + (if value + (let ((p + (find (lambda (p) + (ustring=? value (car p))) + translations))) + (if (not p) + (error (ustring-append "Illegal " name " value:") value)) + (cdr p)) + (default-object))))) + +(define (mapped-enum-value-manager default-string metadata) + (value-manager default-string (converter:mapped-enum metadata))) + +(define (hashed-code-generator value-manager) + (let ((default-string (value-manager-default-string value-manager)) + (value-converter (value-manager-converter value-manager)) + (default-value (value-manager-runtime-default value-manager)) + (runtime-converter (value-manager-runtime-converter value-manager))) (lambda (prop-name metadata prop-alist proc-name) (let ((table-name (symbol "char-map:" (metadata-full-name metadata))) (mapping @@ -502,7 +611,8 @@ USA. (cons cp value))) (expand-cpr (car p)))) (remove (lambda (p) - (ustring=? default-string (cdr p))) + (and default-string + (ustring=? default-string (cdr p)))) prop-alist)))) (with-notification (lambda (port) @@ -518,182 +628,91 @@ USA. (for-each (lambda (p) (hash-table-set! table (integer->char (car p)) - ,(runtime-value-converter '(cdr p)))) + ,(runtime-converter '(cdr p)))) ',mapping) table))))))) -(define (string->cp string) - (let ((cp (string->number string 16 #t))) - (if (not (unicode-code-point? cp)) - (error "Illegal code-point value:" string)) - cp)) - -(define (string->cps value) - (if (ustring=? "" value) - '() - (map string->cp (code-points-splitter value)))) - -(define code-points-splitter - (string-splitter #\space #f)) - -(define code-generator:code-point - (hashed-code-generator "#" - string->cp - (lambda (char-expr) char-expr) - (lambda (sv-expr) `(integer->char ,sv-expr)))) - -(define (code-points-default char-expr) - `(list ,char-expr)) - -(define (code-points-converter svs-expr) - `(map integer->char ,svs-expr)) - -(define code-generator:code-point* - (hashed-code-generator "#" - string->cps - code-points-default - code-points-converter)) - -(define code-generator:code-point+ - (hashed-code-generator "#" - string->cps - code-points-default - code-points-converter)) - -(define code-generator:byte - (hashed-code-generator "0" - (lambda (string) - (let ((n (string->number string 10 #t))) - (if (not (and (fix:<= 0 n) (fix:<= n 254))) - (error "Illegal ccc value:" string)) - n)))) - -(define code-generator:rational-or-nan - (hashed-code-generator "NaN" - (lambda (string) - (if (string=? string "NaN") - #f - (let ((n (string->number string 10 #t))) - (if (not (exact-rational? n)) - (error "Illegal rational value:" string)) - n))))) - -(define (converter:mapped-enum metadata) - (let ((name (symbol->string (metadata-full-name metadata))) - (translations - (mapped-enum-type-translations (metadata-type-spec metadata)))) - (lambda (value) - (if value - (let ((p - (find (lambda (p) - (ustring=? value (car p))) - translations))) - (if (not p) - (error (ustring-append "Illegal " name " value:") value)) - (cdr p)) - (default-object))))) - -(define code-generator:nt - (hashed-code-generator "None" (converter:mapped-enum (prop-metadata "nt")))) - -(define code-generator:qc - (hashed-code-generator "Y" (converter:mapped-enum (prop-metadata "NFC_QC")))) - -(define (code-generator:mapped-enum prop-name metadata prop-alist proc-name) - (let ((maker (entries-maker)) - (entry-count 0) - (unique-entry-count 0) - (byte-count 0) - (convert-value (converter:mapped-enum metadata))) - - (define (make-value-code value) - (let ((value* - (let ((converted (convert-value value))) - (if (or (symbol? converted) - (list? converted) - (vector? converted)) - `',converted - converted)))) - (lambda (offsets-name sv-name table-name) - offsets-name - (values #f #f `(,sv-name ,table-name ,value*))))) - - (define (make-node-code n-bits offset indexes) - (receive (bytes-per-entry offsets-expr coder) - (or (try-linear indexes) - (try-8-bit-direct indexes) - (try-8-bit-spread indexes) - (try-16-bit-direct indexes) - (try-16-bit-spread indexes) - (error "Dispatch won't fit in 16 bits:" indexes)) - (count-entries! indexes bytes-per-entry) - (lambda (offsets-name sv-name table-name) - (values indexes - offsets-expr - `(((vector-ref ,table-name - ,(coder offsets-name - (lambda (shift) - `(fix:and ,(* (expt 2 shift) - (- (expt 2 n-bits) 1)) - ,(code:rsh sv-name - (- offset shift)))))) - ,sv-name - ,table-name)))))) - - (define (count-entries! indexes bytes-per-entry) - (let ((n (length indexes)) - (u (length (delete-duplicates indexes eqv?)))) - (set! entry-count (+ entry-count n)) - (set! unique-entry-count (+ unique-entry-count u)) - (set! byte-count (+ byte-count (* n bytes-per-entry)))) - unspecific) - - (let ((table (make-equal-hash-table)) - (make-entry (maker 'make-entry))) - - ;; Make sure that the leaf nodes are at the beginning of the table. - (for-each (lambda (value) - (hash-table/intern! table value - (lambda () - (make-entry (make-value-code value))))) - (map cdr prop-alist)) - - (let loop - ((entries (expand-ranges (slice-prop-alist prop-alist '(5 8 8)))) - (n-max 21)) - (hash-table/intern! table entries - (lambda () - (make-entry - (let* ((n-bits (car entries)) - (n-max* (- n-max n-bits))) - (make-node-code n-bits n-max* - (map (lambda (entry) - (loop entry n-max*)) - (cdr entries))))))))) - - (let ((root-entry ((maker 'get-root-entry))) - (table-entries ((maker 'get-table-entries)))) - (report-table-statistics prop-name entry-count unique-entry-count - byte-count (length table-entries)) - (generate-top-level (ustring-downcase prop-name) - root-entry table-entries proc-name)))) +(define (trie-code-generator value-manager slices) + (let ((default-string (value-manager-default-string value-manager)) + (value-converter (value-manager-converter value-manager)) + (default-value (value-manager-runtime-default value-manager)) + (runtime-converter (value-manager-runtime-converter value-manager))) + (lambda (prop-name metadata prop-alist proc-name) + (let ((maker (entries-maker)) + (stats (trie-stats))) + + (define (make-value-code value) + (if (and default-string (string=? value default-string)) + (lambda (offsets-name sv-name table-name) + offsets-name + (values #f #f + `((declare (ignore ,table-name)) + ,(default-value sv-name)))) + (let ((value* + (let ((converted (value-converter value))) + (if (or (symbol? converted) + (list? converted) + (vector? converted)) + `',converted + converted)))) + (lambda (offsets-name sv-name table-name) + offsets-name + (values #f #f + `((declare (ignore ,sv-name ,table-name)) + ,(runtime-converter value*))))))) + + (define (make-node-code n-bits offset indexes) + (receive (bytes-per-entry offsets-expr coder) + (or (try-linear indexes) + (try-8-bit-direct indexes) + (try-8-bit-spread indexes) + (try-16-bit-direct indexes) + (try-16-bit-spread indexes) + (error "Dispatch won't fit in 16 bits:" indexes)) + ((stats 'record!) indexes bytes-per-entry) + (lambda (offsets-name sv-name table-name) + (values indexes + offsets-expr + `(((vector-ref ,table-name + ,(coder offsets-name + (lambda (shift) + `(fix:and ,(* (expt 2 shift) + (- (expt 2 n-bits) 1)) + ,(code:rsh sv-name + (- offset + shift)))))) + ,sv-name + ,table-name)))))) + + (let ((table (make-equal-hash-table)) + (make-entry (maker 'make-entry))) + + ;; Make sure that the leaf nodes are at the beginning of the table. + (for-each (lambda (value) + (hash-table/intern! table value + (lambda () + (make-entry (make-value-code value))))) + (map cdr prop-alist)) + + (let loop + ((entries (expand-ranges (slice-prop-alist prop-alist slices))) + (n-max 21)) + (hash-table/intern! table entries + (lambda () + (make-entry + (let* ((n-bits (car entries)) + (n-max* (- n-max n-bits))) + (make-node-code n-bits n-max* + (map (lambda (entry) + (loop entry n-max*)) + (cdr entries))))))))) + + (let ((root-entry ((maker 'get-root-entry))) + (table-entries ((maker 'get-table-entries)))) + ((stats 'report) prop-name (length table-entries)) + (generate-top-level (ustring-downcase prop-name) + root-entry table-entries proc-name)))))) -(define (report-table-statistics prop-name entry-count unique-entry-count - byte-count n-entries) - (with-notification - (lambda (port) - (write-string "UCD property " port) - (write-string prop-name port) - (write-string ": dispatch tables = " port) - (write entry-count port) - (write-string "/" port) - (write unique-entry-count port) - (write-string " entries, " port) - (write byte-count port) - (write-string " bytes; object table = " port) - (write n-entries port) - (write-string " words" port)))) - (define (generate-top-level prop-name root-entry table-entries proc-name) (let ((table-name (symbol "ucd-" prop-name "-entries")) (entry-names @@ -716,7 +735,7 @@ USA. (define ,table-name) ,@(generate-table-initializers table-name entry-names)))) - + (define (generate-entry-definition name entry sv-name table-name arg-names wrap-body) (receive (comment offsets-expr body) (entry 'offsets sv-name table-name) @@ -870,6 +889,40 @@ USA. ((get-table-entries) (lambda () (reverse (cdr entries)))) ((get-root-entry) (lambda () (car entries))) (else (error "Unknown operator:" operator)))))) + +(define (trie-stats) + (let ((entry-count 0) + (unique-entry-count 0) + (byte-count 0)) + + (define (record! indexes bytes-per-entry) + (let ((n (length indexes)) + (u (length (delete-duplicates indexes eqv?)))) + (set! entry-count (+ entry-count n)) + (set! unique-entry-count (+ unique-entry-count u)) + (set! byte-count (+ byte-count (* n bytes-per-entry)))) + unspecific) + + (define (report prop-name n-entries) + (with-notification + (lambda (port) + (write-string "UCD property " port) + (write-string prop-name port) + (write-string ": dispatch tables = " port) + (write entry-count port) + (write-string "/" port) + (write unique-entry-count port) + (write-string " entries, " port) + (write byte-count port) + (write-string " bytes; object table = " port) + (write n-entries port) + (write-string " words" port)))) + + (lambda (operator) + (case operator + ((record!) record!) + ((report) report) + (else (error "Unknown operator:" operator)))))) (define (expand-ranges stratified) (if (list? stratified) diff --git a/src/runtime/ucd-table-gc.scm b/src/runtime/ucd-table-gc.scm index 3e371f4fa..852a396f7 100644 --- a/src/runtime/ucd-table-gc.scm +++ b/src/runtime/ucd-table-gc.scm @@ -37,153 +37,123 @@ USA. ((vector-ref ucd-gc-entries (bytevector-u8-ref offsets (fix:and 31 (fix:lsh sv -16)))) sv ucd-gc-entries))))) (define (ucd-gc-entry-0 sv table) - sv - table + (declare (ignore sv table)) 'other:control) (define (ucd-gc-entry-1 sv table) - sv - table + (declare (ignore sv table)) 'separator:space) (define (ucd-gc-entry-2 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:other) (define (ucd-gc-entry-3 sv table) - sv - table + (declare (ignore sv table)) 'symbol:currency) (define (ucd-gc-entry-4 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:open) (define (ucd-gc-entry-5 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:close) (define (ucd-gc-entry-6 sv table) - sv - table + (declare (ignore sv table)) 'symbol:math) (define (ucd-gc-entry-7 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:dash) (define (ucd-gc-entry-8 sv table) - sv - table + (declare (ignore sv table)) 'number:decimal-digit) (define (ucd-gc-entry-9 sv table) - sv - table + (declare (ignore sv table)) 'letter:uppercase) (define (ucd-gc-entry-10 sv table) - sv - table + (declare (ignore sv table)) 'symbol:modifier) (define (ucd-gc-entry-11 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:connector) (define (ucd-gc-entry-12 sv table) - sv - table + (declare (ignore sv table)) 'letter:lowercase) (define (ucd-gc-entry-13 sv table) - sv - table + (declare (ignore sv table)) 'symbol:other) (define (ucd-gc-entry-14 sv table) - sv - table + (declare (ignore sv table)) 'letter:other) (define (ucd-gc-entry-15 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:initial-quote) (define (ucd-gc-entry-16 sv table) - sv - table + (declare (ignore sv table)) 'other:format) (define (ucd-gc-entry-17 sv table) - sv - table + (declare (ignore sv table)) 'number:other) (define (ucd-gc-entry-18 sv table) - sv - table + (declare (ignore sv table)) 'punctuation:final-quote) (define (ucd-gc-entry-19 sv table) - sv - table + (declare (ignore sv table)) 'letter:titlecase) (define (ucd-gc-entry-20 sv table) - sv - table + (declare (ignore sv table)) 'letter:modifier) (define (ucd-gc-entry-21 sv table) - sv - table + (declare (ignore sv table)) 'mark:nonspacing) (define (ucd-gc-entry-22 sv table) - sv - table + (declare (ignore sv table)) 'other:not-assigned) (define (ucd-gc-entry-23 sv table) - sv - table + (declare (ignore sv table)) 'mark:enclosing) (define (ucd-gc-entry-24 sv table) - sv - table + (declare (ignore sv table)) 'mark:spacing-combining) (define (ucd-gc-entry-25 sv table) - sv - table + (declare (ignore sv table)) 'number:letter) (define (ucd-gc-entry-26 sv table) - sv - table + (declare (ignore sv table)) 'separator:line) (define (ucd-gc-entry-27 sv table) - sv - table + (declare (ignore sv table)) 'separator:paragraph) (define (ucd-gc-entry-28 sv table) - sv - table + (declare (ignore sv table)) 'other:surrogate) (define (ucd-gc-entry-29 sv table) - sv - table + (declare (ignore sv table)) 'other:private-use) (define-deferred ucd-gc-entry-30 -- 2.25.1