From d9a966f26a00c9722f78f00dc471099102c99af8 Mon Sep 17 00:00:00 2001 From: Chris Hanson Date: Sat, 18 Mar 2017 14:34:15 -0700 Subject: [PATCH] Rework the character parser to handle backslash reasonably. --- src/runtime/char.scm | 45 +++++++++++++++++++++---------------------- src/runtime/parse.scm | 44 ++++++++++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 35 deletions(-) diff --git a/src/runtime/char.scm b/src/runtime/char.scm index ee9a86b56..f1c6a23ca 100644 --- a/src/runtime/char.scm +++ b/src/runtime/char.scm @@ -215,30 +215,29 @@ USA. (define (name->char string #!optional fold-case?) (let ((fold-case? (if (default-object? fold-case?) #t fold-case?)) - (parse-hex - (lambda (string start) - (let ((n (string->number string 16 #f start))) - (and (exact-nonnegative-integer? n) - n)))) - (lose (lambda () (error:bad-range-argument string 'NAME->CHAR)))) - (receive (string bits) (match-bucky-bits-prefix string fold-case?) - (let ((end (string-length string))) - (if (fix:= 0 end) - (lose)) - (if (fix:= 1 end) - (let ((char (string-ref string 0))) - (if (not (char-graphic? char)) - (lose)) - (make-char (char-code char) bits)) - (make-char (or (match-named-code string fold-case?) - ;; R7RS syntax (not sure if -ci is right) - (and (string-prefix-ci? "x" string) - (parse-hex string 1)) + (lose (lambda () (error:bad-range-argument string 'name->char)))) + (let ((parse-hex + (lambda (string start) + (let ((cp (string->number string 16 #t start))) + (if (not (unicode-code-point? cp)) + (lose)) + cp)))) + (receive (string bits) (match-bucky-bits-prefix string fold-case?) + (let ((end (string-length string))) + (if (fix:= 0 end) + (lose)) + (make-char (cond ((fix:= 1 end) + (char-code (string-ref string 0))) + ;; R7RS syntax + ((char=? #\x (string-ref string 0)) + (parse-hex string 1)) ;; Non-standard syntax (Unicode style) - (and (string-prefix-ci? "u+" string) - (parse-hex string 2)) - (lose)) - bits)))))) + ((and (char-ci=? #\u (string-ref string 0)) + (char=? #\+ (string-ref string 1))) + (parse-hex string 2)) + ((match-named-code string fold-case?)) + (else (lose))) + bits)))))) (define (char->name char) (let ((bits (char-bits char)) diff --git a/src/runtime/parse.scm b/src/runtime/parse.scm index 168ebec73..94cdbfaff 100644 --- a/src/runtime/parse.scm +++ b/src/runtime/parse.scm @@ -733,21 +733,41 @@ USA. (define (handler:char db ctx char1 char2) ctx char1 char2 - (let ((char (%read-char/no-eof db))) + (let ((char (%read-char/no-eof db)) + (at-end? + (lambda () + (let ((char (%peek-char db))) + (or (eof-object? char) + (atom-delimiter? char)))))) (cond ((or (atom-delimiter? char) - (let ((char (%peek-char db))) - (or (eof-object? char) - (atom-delimiter? char)))) + (at-end?)) char) - ((char-ci=? char #\x) - (let* ((string (parse-atom db '())) - (cp (string->number string 16 #t))) - (if (not (unicode-code-point? cp)) - (error:illegal-code-point string)) - (integer->char cp))) + ((char=? char #\x) + (let ((builder (string-builder))) + (let loop () + (if (not (at-end?)) + (begin + (builder (%read-char db)) + (loop)))) + (let* ((string (builder)) + (cp (string->number string 16 #t))) + (if (not (unicode-code-point? cp)) + (error:illegal-code-point string)) + (integer->char cp)))) (else - (name->char (parse-atom db (list char)) - (db-fold-case? db)))))) + (let ((builder (string-builder))) + (builder char) + (let loop () + (if (not (at-end?)) + (begin + (builder + (let ((char (%read-char db))) + (if (char=? #\\ char) + (%read-char/no-eof db) + char))) + (loop)))) + (name->char (builder) + (db-fold-case? db))))))) (define (handler:named-constant db ctx char1 char2) ctx char1 char2 -- 2.25.1