From 85761b267d2a230a1ef6583b4d17c5ed7e91cc0c Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sun, 1 Dec 2019 20:49:07 -0800 Subject: [PATCH] Reorganization in preparation for char-set refactor. --- src/runtime/char-set.scm | 452 ++++++++++++++++++++------------------- 1 file changed, 228 insertions(+), 224 deletions(-) diff --git a/src/runtime/char-set.scm b/src/runtime/char-set.scm index ae86ffa3a..848b6d8bf 100644 --- a/src/runtime/char-set.scm +++ b/src/runtime/char-set.scm @@ -179,84 +179,28 @@ USA. (%high-set! high i (car ilist))) high))) -(define (char-set->ilist char-set) - (reverse! - (%high->ilist (%char-set-high char-set) - (%low->ilist (%char-set-low char-set))))) - -(define (%low->ilist low) - (let ((low-limit (%low-limit low))) - - (define (find-start i result) - (if (fix:< i low-limit) - (if (%low-ref low i) - (find-end i result) - (find-start (fix:+ i 1) result)) - result)) - - (define (find-end start result) - (let loop ((i (fix:+ start 1))) - (if (fix:< i low-limit) - (if (%low-ref low i) - (loop (fix:+ i 1)) - (find-start i (reverse-ilist-cons start i result))) - (reverse-ilist-cons start low-limit result)))) - - (find-start 0 '()))) - -(define (%high->ilist high result) - (let ((n (%high-limit high))) +(define (ilist-invert ilist) - (define (loop i result) - (if (fix:< i n) - (loop (fix:+ i 1) - (cons (%high-ref high i) result)) - result)) + (define (loop start ilist inverse) + (if (pair? ilist) + (loop (cadr ilist) + (cddr ilist) + (reverse-ilist-cons start (car ilist) inverse)) + (reverse! + (if (fix:< start #x110000) + (reverse-ilist-cons start #x110000 inverse) + inverse)))) - (if (and (fix:> n 0) - (pair? result) - (fix:= (%high-ref high 0) (car result))) - (loop 1 (cdr result)) - (loop 0 result)))) + (if (or (not (pair? ilist)) + (fix:< 0 (car ilist))) + (loop 0 ilist '()) + (loop (cadr ilist) (cddr ilist) '()))) (define-integrable (ilist-cons start end ilist) (cons start (cons end ilist))) (define-integrable (reverse-ilist-cons start end ilist) (cons end (cons start ilist))) - -(define (char-set-size char-set) - (fix:+ (%low-size (%char-set-low char-set)) - (%high-size (%char-set-high char-set)))) - -(define (%low-size low) - (let ((low-limit (%low-limit low))) - - (define (find-start i size) - (if (fix:< i low-limit) - (if (%low-ref low i) - (let ((end (find-end (fix:+ i 1)))) - (find-start end (fix:+ size (fix:- end i)))) - (find-start (fix:+ i 1) size)) - size)) - - (define (find-end i) - (if (fix:< i low-limit) - (if (%low-ref low i) - (find-end (fix:+ i 1)) - i) - low-limit)) - - (find-start 0 0))) - -(define (%high-size high) - (let ((end (%high-limit high))) - (do ((index 0 (fix:+ index 2)) - (size 0 - (fix:+ size - (fix:- (%high-ref high (fix:+ index 1)) - (%high-ref high index))))) - ((not (fix:< index end)) size)))) (define (ilist-combiner combine) @@ -310,43 +254,57 @@ USA. (lambda (il1 il2) (loop 0 0 il1 il2 '()))) + +(define ilist-union + (ilist-combiner (lambda (a b) (or a b)))) + +(define ilist-intersection + (ilist-combiner (lambda (a b) (and a b)))) + +(define ilist-difference + (ilist-combiner (lambda (a b) (and a (not b))))) -;;;; Constructors +;;;; Ranges -(define (char-set . chars) - (char-set* chars)) +(define (range? object) + (or (and (pair? object) + (index-fixnum? (car object)) + (index-fixnum? (cdr object)) + (fix:<= (cdr object) #x110000) + (fix:<= (car object) (cdr object))) + (unicode-code-point? object))) -(define (char-set* cpl) - (guarantee-list-of cpl-element? cpl 'char-set*) - (char-set-union* (%cpl->char-sets cpl))) +(define (make-range start end) + (if (fix:= (fix:- end start) 1) + start + (cons start end))) -(define (%cpl->char-sets cpl) - (let loop ((cpl cpl) (ranges '()) (char-sets '())) - (cond ((not (pair? cpl)) - (cons (ranges->char-set (normalize-ranges ranges)) - char-sets)) - ((%cpl-element->ranges (car cpl)) - => (lambda (ranges*) - (loop (cdr cpl) - (append ranges* ranges) - char-sets))) - ((char-set? (car cpl)) - (loop (cdr cpl) - ranges - (cons (car cpl) char-sets))) - ((name->char-set (car cpl)) - => (lambda (char-set) - (loop (cdr cpl) - ranges - (cons char-set char-sets)))) - (else - (error:not-a cpl-element? (car cpl)))))) +(define (range-start range) + (if (pair? range) + (car range) + range)) -(define (%cpl-element->ranges elt) - (cond ((range? elt) (list elt)) - ((char? elt) (list (char-code elt))) - ((string? elt) (map char->integer (string->list elt))) - (else #f))) +(define (range-end range) + (if (pair? range) + (cdr range) + (fix:+ range 1))) + +(define (rangechar-set ranges) + (let loop ((ranges ranges) (ilist '())) + (if (pair? ranges) + (loop (cdr ranges) + (reverse-ilist-cons (range-start (car ranges)) + (range-end (car ranges)) + ilist)) + (ilist->char-set (reverse! ilist))))) (define (normalize-ranges ranges) (let ((ranges @@ -368,28 +326,6 @@ USA. (set-cdr! ranges (cddr ranges)) (loop ranges))))))) ranges)) - -(define (string->char-set string) - (char-set* (map char->integer (string->list string)))) - -(define (compute-char-set procedure) - - (define (find-start cp end ilist) - (if (fix:< cp end) - (if (procedure cp) - (find-end (fix:+ cp 1) end cp ilist) - (find-start (fix:+ cp 1) end ilist)) - ilist)) - - (define (find-end cp end start ilist) - (if (fix:< cp end) - (if (procedure cp) - (find-end (fix:+ cp 1) end start ilist) - (find-start (fix:+ cp 1) end (ilist-cons cp start ilist))) - (ilist-cons end start ilist))) - - (ilist->char-set - (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '()))))) ;;;; Code-point lists @@ -425,47 +361,176 @@ USA. ((whitespace white space) char-set:whitespace) (else #f))) -(define (range? object) - (or (and (pair? object) - (index-fixnum? (car object)) - (index-fixnum? (cdr object)) - (fix:<= (cdr object) #x110000) - (fix:<= (car object) (cdr object))) - (unicode-code-point? object))) +(define (%cpl->char-sets cpl) + (let loop ((cpl cpl) (ranges '()) (char-sets '())) + (cond ((not (pair? cpl)) + (cons (ranges->char-set (normalize-ranges ranges)) + char-sets)) + ((%cpl-element->ranges (car cpl)) + => (lambda (ranges*) + (loop (cdr cpl) + (append ranges* ranges) + char-sets))) + ((char-set? (car cpl)) + (loop (cdr cpl) + ranges + (cons (car cpl) char-sets))) + ((name->char-set (car cpl)) + => (lambda (char-set) + (loop (cdr cpl) + ranges + (cons char-set char-sets)))) + (else + (error:not-a cpl-element? (car cpl)))))) -(define (make-range start end) - (if (fix:= (fix:- end start) 1) - start - (cons start end))) +(define (%cpl-element->ranges elt) + (cond ((range? elt) (list elt)) + ((char? elt) (list (char-code elt))) + ((string? elt) (map char->integer (string->list elt))) + (else #f))) + +;;;; Predicates -(define (range-start range) - (if (pair? range) - (car range) - range)) +(define (char-set= char-set . char-sets) + (every (lambda (char-set*) + (and (bytevector=? (%char-set-low char-set*) + (%char-set-low char-set)) + (bytevector=? (%char-set-high char-set*) + (%char-set-high char-set)))) + char-sets)) -(define (range-end range) - (if (pair? range) - (cdr range) - (fix:+ range 1))) +(define (char-set-hash char-set #!optional modulus) + (let ((hash + (primitive-object-hash-2 (%char-set-low char-set) + (%char-set-high char-set)))) + (if (default-object? modulus) + hash + (begin + (guarantee positive-fixnum? modulus 'char-set-hash) + (fix:remainder hash modulus))))) -(define (rangechar-set ranges) - (let loop ((ranges ranges) (ilist '())) - (if (pair? ranges) - (loop (cdr ranges) - (reverse-ilist-cons (range-start (car ranges)) - (range-end (car ranges)) - ilist)) - (ilist->char-set (reverse! ilist))))) +(define (char-sets-disjoint? char-set . char-sets) + (every (lambda (char-set*) + (char-set-empty? (char-set-intersection char-set char-set*))) + char-sets)) -;;;; Accessors +;;;; Constructors + +(define (char-set . cpl) + (char-set* cpl)) + +(define (char-set* cpl) + (guarantee-list-of cpl-element? cpl 'char-set*) + (char-set-union* (%cpl->char-sets cpl))) + +(define (string->char-set string) + (char-set* (map char->integer (string->list string)))) + +(define (compute-char-set procedure) + + (define (find-start cp end ilist) + (if (fix:< cp end) + (if (procedure cp) + (find-end (fix:+ cp 1) end cp ilist) + (find-start (fix:+ cp 1) end ilist)) + ilist)) + + (define (find-end cp end start ilist) + (if (fix:< cp end) + (if (procedure cp) + (find-end (fix:+ cp 1) end start ilist) + (find-start (fix:+ cp 1) end (ilist-cons cp start ilist))) + (ilist-cons end start ilist))) + + (ilist->char-set + (reverse! (find-start #xE000 #x110000 (find-start 0 #xD800 '()))))) + +;;;; Queries + +(define (char-set->ilist char-set) + (reverse! + (%high->ilist (%char-set-high char-set) + (%low->ilist (%char-set-low char-set))))) + +(define (%low->ilist low) + (let ((low-limit (%low-limit low))) + + (define (find-start i result) + (if (fix:< i low-limit) + (if (%low-ref low i) + (find-end i result) + (find-start (fix:+ i 1) result)) + result)) + + (define (find-end start result) + (let loop ((i (fix:+ start 1))) + (if (fix:< i low-limit) + (if (%low-ref low i) + (loop (fix:+ i 1)) + (find-start i (reverse-ilist-cons start i result))) + (reverse-ilist-cons start low-limit result)))) + + (find-start 0 '()))) + +(define (%high->ilist high result) + (let ((n (%high-limit high))) + + (define (loop i result) + (if (fix:< i n) + (loop (fix:+ i 1) + (cons (%high-ref high i) result)) + result)) + + (if (and (fix:> n 0) + (pair? result) + (fix:= (%high-ref high 0) (car result))) + (loop 1 (cdr result)) + (loop 0 result)))) + +(define (char-set->code-points char-set) + (let loop ((ilist (char-set->ilist char-set)) (ranges '())) + (if (pair? ilist) + (loop (cddr ilist) + (cons (make-range (car ilist) (cadr ilist)) + ranges)) + (reverse! ranges)))) + +(define (char-set-size char-set) + (fix:+ (%low-size (%char-set-low char-set)) + (%high-size (%char-set-high char-set)))) + +(define (%low-size low) + (let ((low-limit (%low-limit low))) + + (define (find-start i size) + (if (fix:< i low-limit) + (if (%low-ref low i) + (let ((end (find-end (fix:+ i 1)))) + (find-start end (fix:+ size (fix:- end i)))) + (find-start (fix:+ i 1) size)) + size)) + + (define (find-end i) + (if (fix:< i low-limit) + (if (%low-ref low i) + (find-end (fix:+ i 1)) + i) + low-limit)) + + (find-start 0 0))) + +(define (%high-size high) + (let ((end (%high-limit high))) + (do ((index 0 (fix:+ index 2)) + (size 0 + (fix:+ size + (fix:- (%high-ref high (fix:+ index 1)) + (%high-ref high index))))) + ((not (fix:< index end)) size)))) (define (char-set-contains? char-set char) (guarantee char? char 'char-set-contains?) @@ -491,67 +556,12 @@ USA. (loop (fix:+ i 2) upper)) (else #t))) #f))))) - -(define (char-set-table char-set) - (force (%char-set-table char-set))) - -(define (char-set= char-set . char-sets) - (every (lambda (char-set*) - (and (bytevector=? (%char-set-low char-set*) - (%char-set-low char-set)) - (bytevector=? (%char-set-high char-set*) - (%char-set-high char-set)))) - char-sets)) - -(define (char-set-empty? cs) - (and (fix:= 0 (bytevector-length (%char-set-low cs))) - (fix:= 0 (bytevector-length (%char-set-high cs))))) - -(define (char-set-hash char-set #!optional modulus) - (let ((hash - (primitive-object-hash-2 (%char-set-low char-set) - (%char-set-high char-set)))) - (if (default-object? modulus) - hash - (begin - (guarantee positive-fixnum? modulus 'char-set-hash) - (fix:remainder hash modulus))))) - -(define (char-set->code-points char-set) - (let loop ((ilist (char-set->ilist char-set)) (ranges '())) - (if (pair? ilist) - (loop (cddr ilist) - (cons (make-range (car ilist) (cadr ilist)) - ranges)) - (reverse! ranges)))) - -(define (char-sets-disjoint? char-set . char-sets) - (every (lambda (char-set*) - (char-set-empty? (char-set-intersection char-set char-set*))) - char-sets)) -;;;; Combinations +;;;; Algebra (define (char-set-invert char-set) (ilist->char-set (ilist-invert (char-set->ilist char-set)))) -(define (ilist-invert ilist) - - (define (loop start ilist inverse) - (if (pair? ilist) - (loop (cadr ilist) - (cddr ilist) - (reverse-ilist-cons start (car ilist) inverse)) - (reverse! - (if (fix:< start #x110000) - (reverse-ilist-cons start #x110000 inverse) - inverse)))) - - (if (or (not (pair? ilist)) - (fix:< 0 (car ilist))) - (loop 0 ilist '()) - (loop (cadr ilist) (cddr ilist) '()))) - (define (char-set-union . char-sets) (char-set-union* char-sets)) @@ -578,15 +588,6 @@ USA. (fold-left ilist-difference (char-set->ilist char-set) (map char-set->ilist char-sets)))) - -(define ilist-union - (ilist-combiner (lambda (a b) (or a b)))) - -(define ilist-intersection - (ilist-combiner (lambda (a b) (and a b)))) - -(define ilist-difference - (ilist-combiner (lambda (a b) (and a (not b))))) ;;;; Char-Set Compiler @@ -625,6 +626,9 @@ USA. (values (loop (cdr pattern) '()) #t) (values (loop pattern '()) #f)))) +(define (char-set-table char-set) + (force (%char-set-table char-set))) + ;;;; Miscellaneous character sets (define char-ctl?) -- 2.25.1