(make-state (matcher-initial-node matcher)
(initial-ctx index
(if (matcher-need-gcb? matcher)
- (string-gcb-stream string start end)
+ (grapheme-cluster-breaks
+ string start end)
'())
capture?))))
(trace-matcher (lambda (port) (write (list 'initial-state initial) port)))
(define (chase-gcbs index gcbs)
(if (and (pair? gcbs) (fix:< (car gcbs) index))
- (chase-gcbs index (force (cdr gcbs)))
+ (chase-gcbs index (cdr gcbs))
gcbs))
(define (all-groups string index ctx)
grapheme-cluster-breaks
grapheme-cluster-length
grapheme-cluster-slice
- string->grapheme-clusters
- string-gcb-fold
- string-gcb-fold-right
- string-gcb-stream))
+ string->grapheme-clusters))
(define-package (runtime ucd-segmentation word)
(files "ucd-word")
(parent (runtime ucd-segmentation))
(export ()
- string-wb-fold
- string-wb-fold-right
- string-wb-stream
- string-word-breaks)
- (export (runtime string)
- find-word-breaks))
+ string-word-breaks))
(define-package (runtime character-set)
(files "char-set")
(define (string-titlecase string)
(let ((builder (string-builder)))
- (find-word-breaks string 0
- (lambda (end start)
- (maybe-titlecase string start end builder)
- end))
+ (fold (lambda (end start)
+ (maybe-titlecase string start end builder)
+ end)
+ 0
+ (string-word-breaks string))
(builder)))
(define (maybe-titlecase string start end builder)
(add-boot-deps! '(runtime ucd-glue) '(runtime ucd-segmentation))
\f
+(define evolver)
+(define grapheme-cluster-breaks)
+(add-boot-init!
+ (lambda ()
+ (set! evolver
+ (make-evolver codes abbrevs extra-states ucd-gcb+ep-value rules))
+ (set! grapheme-cluster-breaks (evolver-interpreter evolver))
+ unspecific))
+
(define codes
'(control
carriage-return
(ri _ ri ri*2)
(any / any)))
-
-(define evolver)
-(define string-gcb-fold)
-(define string-gcb-fold-right)
-(define string-gcb-stream)
-(define string->gcb-names)
-(define show-transitions)
-(add-boot-init!
- (lambda ()
- (set! evolver
- (make-evolver codes abbrevs extra-states ucd-gcb+ep-value rules))
- (set! string-gcb-fold (folder evolver 'string-gcb-fold))
- (set! string-gcb-fold-right (right-folder evolver 'string-gcb-fold-right))
- (set! string-gcb-stream (streamer evolver 'string-gcb-stream))
- (set! string->gcb-names (evolver-string->code-names evolver))
- (set! show-transitions (evolver-show-transitions evolver))
- unspecific))
\f
(define (string->grapheme-clusters string #!optional start end)
- (string-gcb-fold-right (lambda (break prev-break acc)
- (if prev-break
- (cons (substring string prev-break break)
- acc)
- acc))
- '()
- string start end))
+ (let ((breaks (grapheme-cluster-breaks string start end)))
+ (if (pair? breaks)
+ (let loop ((breaks (cdr breaks)) (prev-break (car breaks)))
+ (if (pair? breaks)
+ (cons (substring string prev-break (car breaks))
+ (loop (cdr breaks) (car breaks)))
+ '()))
+ '())))
(define (grapheme-cluster-length string)
- (string-gcb-fold (lambda (break prev-break count)
- (declare (ignore break))
- (if prev-break
- (fix:+ count 1)
- count))
- 0
- string))
+ (gclength (grapheme-cluster-breaks string)))
(define (grapheme-cluster-slice string start end)
;; START and END refer to the cluster breaks, they must be <= the number of
;; clusters in STRING.
- (guarantee index-fixnum? start 'grapheme-cluster-slice)
- (guarantee index-fixnum? end 'grapheme-cluster-slice)
- (if (not (fix:<= start end))
- (error:bad-range-argument start 'grapheme-cluster-slice))
(let ((breaks (grapheme-cluster-breaks string)))
- (string-slice string
- (list-ref breaks start)
- (list-ref breaks end))))
-
-(define (grapheme-cluster-breaks string #!optional start end)
- (let loop ((stream (string-gcb-stream string start end)))
- (if (pair? stream)
- (cons (car stream) (loop (force (cdr stream))))
- '())))
\ No newline at end of file
+ (let ((end (fix:end-index end (gclength breaks) 'grapheme-cluster-slice))
+ (start (fix:start-index start end 'grapheme-cluster-slice)))
+ (string-slice string
+ (list-ref breaks start)
+ (list-ref breaks end)))))
+
+(define (gclength breaks)
+ (let ((n (length breaks)))
+ (if (fix:> n 0)
+ (fix:- n 1)
+ n)))
\ No newline at end of file
(add-boot-deps! '(runtime dynamic))
\f
-(define (folder evolver caller)
- (let ((interpreter (evolver-interpreter evolver)))
- (lambda (kons knil string #!optional start end)
- (let ((end (fix:end-index end (string-length string) caller)))
- (fold kons
- knil
- (interpreter string
- (fix:start-index start end caller)
- end))))))
-
-(define (right-folder evolver caller)
- (let ((interpreter (evolver-interpreter evolver)))
- (lambda (kons knil string #!optional start end)
- (let ((end (fix:end-index end (string-length string) caller)))
- (fold-right kons
- knil
- (interpreter string
- (fix:start-index start end caller)
- end))))))
-
-(define (streamer evolver caller)
- (let ((interpreter (evolver-interpreter evolver)))
- (lambda (string #!optional start end)
- (let ((end (fix:end-index end (string-length string) caller)))
- (list->stream (interpreter string
- (fix:start-index start end caller)
- end))))))
-
-;;; Debugging support:
-
-(define (evolver-string->code-names evolver)
- (let ((char->code-name (evolver-char->code-name evolver)))
- (lambda (string)
- (map char->code-name (string->list string)))))
-
-(define (evolver-char->code-name evolver)
- (let ((codes (evolver-codes evolver))
- (char->code (evolver-char->code evolver)))
- (lambda (char)
- (vector-ref codes (char->code char)))))
-
-(define (evolver-show-transitions evolver)
- (let ((interpreter (evolver-interpreter evolver)))
- (lambda (string)
- (parameterize ((trace-interpreter? #t))
- (interpreter string 0 (string-length string))))))
-\f
(define (make-evolver codes abbrevs extra-states char->code rules)
(let-values (((transitions new-states)
(convert-to-transitions
(define-integrable state-index car)
(define-integrable state-breaks cdr)
- (lambda (string start end)
- (let loop ((i start) (states (list (make-state sot-index '()))))
- (if (fix:< i end)
- (loop (fix:+ i 1)
- (evolve-states states (char->code (string-ref string i)) i))
- (let ((states (evolve-states states eot-code i)))
- (if (not (and (pair? states) (null? (cdr states))))
- (error "Interpretation didn't converge:" states))
- (reverse (state-breaks (car states)))))))))
+ (lambda (string #!optional start end)
+ (let* ((end (fix:end-index end (string-length string)))
+ (start (fix:start-index start end)))
+ (let loop ((i start) (states (list (make-state sot-index '()))))
+ (if (fix:< i end)
+ (loop (fix:+ i 1)
+ (evolve-states states (char->code (string-ref string i)) i))
+ (let ((states (evolve-states states eot-code i)))
+ (if (not (and (pair? states) (null? (cdr states))))
+ (error "Interpretation didn't converge:" states))
+ (reverse (state-breaks (car states))))))))))
\f
(define (create-state-vector state-diagram)
(cdr state-entry))))
state-diagram)))
+;;; Debugging support:
+
+(define (evolver-string->code-names evolver)
+ (let ((char->code-name (evolver-char->code-name evolver)))
+ (lambda (string)
+ (map char->code-name (string->list string)))))
+
+(define (evolver-char->code-name evolver)
+ (let ((codes (evolver-codes evolver))
+ (char->code (evolver-char->code evolver)))
+ (lambda (char)
+ (vector-ref codes (char->code char)))))
+
+(define (evolver-show-transitions evolver)
+ (let ((interpreter (evolver-interpreter evolver)))
+ (lambda (string #!optional start end)
+ (parameterize ((trace-interpreter? #t))
+ (interpreter string start end)))))
+
(define-deferred trace-interpreter?
(make-settable-parameter #f))
\ No newline at end of file
(add-boot-deps! '(runtime ucd-glue) '(runtime ucd-segmentation))
\f
+(define evolver)
+(define string-word-breaks)
+(add-boot-init!
+ (lambda ()
+ (set! evolver
+ (make-evolver codes abbrevs extra-states ucd-wb+ep-value rules))
+ (set! string-word-breaks (evolver-interpreter evolver))
+ unspecific))
+
(define codes
'(carriage-return
double-quote
(ri efz* _ ri ri*2)
(any / any)))
-\f
-(define evolver)
-(define string-wb-fold)
-(define string-wb-fold-right)
-(define string-wb-stream)
-(define string->wb-names)
-(define show-transitions)
-(add-boot-init!
- (lambda ()
- (set! evolver
- (make-evolver codes abbrevs extra-states ucd-wb+ep-value rules))
- (set! string-wb-fold (folder evolver 'string-wb-fold))
- (set! string-wb-fold-right (right-folder evolver 'string-wb-fold-right))
- (set! string-wb-stream (streamer evolver 'string-wb-stream))
- (set! string->wb-names (evolver-string->code-names evolver))
- (set! show-transitions (evolver-show-transitions evolver))
- unspecific))
-
-(define (string-word-breaks string)
- (let loop ((stream (string-wb-stream string)))
- (if (pair? stream)
- (cons (car stream) (loop (force (cdr stream))))
- '())))
-
-(define (find-word-breaks string knil kons)
- (string-wb-fold (lambda (break prev-break acc)
- (declare (ignore prev-break))
- (kons break acc))
- knil
- string))