From: Chris Hanson Date: Wed, 1 Mar 2017 09:42:28 +0000 (-0800) Subject: Implement dumb Unicode string search, and eliminate old implementation. X-Git-Tag: mit-scheme-pucked-9.2.12~198^2~7 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=6536686f38595f63d4a33ae1757d2ea081d44cc4;p=mit-scheme.git Implement dumb Unicode string search, and eliminate old implementation. It looks like the KMP string-search algorithm is better for Unicode than BM, so that will need to be implemented soon-ish. --- diff --git a/src/runtime/httpio.scm b/src/runtime/httpio.scm index e735ea3a1..a12228488 100644 --- a/src/runtime/httpio.scm +++ b/src/runtime/httpio.scm @@ -85,7 +85,7 @@ USA. (guarantee-http-headers headers caller) (if body (begin - (guarantee-string body caller) + (guarantee string? body caller) (let ((n (%get-content-length headers)) (m (vector-8b-length body))) (if n @@ -116,7 +116,7 @@ USA. (define-guarantee simple-http-response "simple HTTP response") (define (make-simple-http-response body) - (guarantee-string body 'MAKE-SIMPLE-HTTP-RESPONSE) + (guarantee string? body 'MAKE-SIMPLE-HTTP-RESPONSE) (%make-http-response #f 200 (http-status-description 200) '() body)) (define (http-message? object) diff --git a/src/runtime/output.scm b/src/runtime/output.scm index 21b3bd148..6e47d49e7 100644 --- a/src/runtime/output.scm +++ b/src/runtime/output.scm @@ -167,10 +167,10 @@ USA. (error:wrong-type-argument strings "list of strings" 'WRITE-STRINGS-IN-COLUMNS)) (guarantee textual-output-port? port 'WRITE-STRINGS-IN-COLUMNS) - (guarantee-exact-positive-integer min-minor 'WRITE-STRINGS-IN-COLUMNS) - (guarantee-string left-margin 'WRITE-STRINGS-IN-COLUMNS) - (guarantee-string col-sep 'WRITE-STRINGS-IN-COLUMNS) - (guarantee-string right-margin 'WRITE-STRINGS-IN-COLUMNS) + (guarantee exact-positive-integer? min-minor 'WRITE-STRINGS-IN-COLUMNS) + (guarantee string? left-margin 'WRITE-STRINGS-IN-COLUMNS) + (guarantee string? col-sep 'WRITE-STRINGS-IN-COLUMNS) + (guarantee string? right-margin 'WRITE-STRINGS-IN-COLUMNS) (let ((n-strings (length strings)) (max-width (output-port/x-size port)) (lm-width (string-length left-margin)) @@ -292,9 +292,9 @@ USA. (error:wrong-type-argument strings "non-empty list of strings" 'WRITE-STRINGS-IN-PARAGRAPH)) (guarantee textual-output-port? port 'WRITE-STRINGS-IN-PARAGRAPH) - (guarantee-exact-positive-integer width 'WRITE-STRINGS-IN-PARAGRAPH) - (guarantee-exact-nonnegative-integer indent 'WRITE-STRINGS-IN-PARAGRAPH) - (guarantee-exact-nonnegative-integer first 'WRITE-STRINGS-IN-PARAGRAPH) + (guarantee exact-positive-integer? width 'WRITE-STRINGS-IN-PARAGRAPH) + (guarantee exact-nonnegative-integer? indent 'WRITE-STRINGS-IN-PARAGRAPH) + (guarantee exact-nonnegative-integer? first 'WRITE-STRINGS-IN-PARAGRAPH) (if (< width (+ indent first (string-length (car strings)))) (error:bad-range-argument width 'WRITE-STRINGS-IN-PARAGRAPH)) diff --git a/src/runtime/pgsql.scm b/src/runtime/pgsql.scm index a747479d6..7b1b13183 100644 --- a/src/runtime/pgsql.scm +++ b/src/runtime/pgsql.scm @@ -311,7 +311,7 @@ USA. (pq-unescape-bytea string)) (define (exec-pgsql-query connection query) - (guarantee-string query 'EXEC-PGSQL-QUERY) + (guarantee string? query 'EXEC-PGSQL-QUERY) (let ((result (let ((handle (connection->handle connection))) (make-gc-finalized-object diff --git a/src/runtime/runtime.pkg b/src/runtime/runtime.pkg index 756737b5d..1f49ebc59 100644 --- a/src/runtime/runtime.pkg +++ b/src/runtime/runtime.pkg @@ -958,35 +958,21 @@ USA. (files "string") (parent (runtime)) (export () deprecated:string - (guarantee-vector-8b guarantee-string) - (vector-8b-length string-length) - (vector-8b? string?) - error:not-string - guarantee-string - guarantee-string-index + (vector-8b? legacy-string?) + legacy-string? make-legacy-string make-vector-8b - vector-8b-fill! - vector-8b-find-next-char - vector-8b-find-next-char-ci - vector-8b-find-previous-char - vector-8b-find-previous-char-ci + vector-8b-length vector-8b-ref - vector-8b-set!) - (export () - string-search-all - string-search-backward - string-search-forward - substring-search-all - substring-search-backward - substring-search-forward - substring?)) + vector-8b-set!)) (define-package (runtime ustring) (files "ustring") (parent (runtime)) (export () deprecated:ustring (string-hash-mod string-hash) + (string-search-all string-find-all-matches) + (string-search-forward string-find-first-match) (substring->list string->list) (substring-move-left! substring-move!) (substring-move-right! substring-move!) @@ -1002,6 +988,7 @@ USA. string-move! string-pad-left string-pad-right + string-search-backward string-trim string-trim-left string-trim-right @@ -1018,6 +1005,9 @@ USA. substring-move! substring-prefix-ci? substring-prefix? + substring-search-all + substring-search-backward + substring-search-forward substring-suffix-ci? substring-suffix? substring-upper-case? @@ -1052,8 +1042,11 @@ USA. string-downcase string-every string-fill! + string-find-all-matches string-find-first-index + string-find-first-match string-find-last-index + string-find-last-match string-foldcase string-for-each string-for-primitive ;export to (runtime) after 9.3 @@ -1090,6 +1083,7 @@ USA. string>=? string>? string? + substring? vector->string) (export (runtime predicate-metadata) register-ustring-predicates!) diff --git a/src/runtime/string.scm b/src/runtime/string.scm index c39010c99..c04c55601 100644 --- a/src/runtime/string.scm +++ b/src/runtime/string.scm @@ -24,40 +24,18 @@ USA. |# -;;;; Character String Operations +;;;; Legacy Strings ;;; package: (runtime string) -;;; This file is designed to be compiled with type and range checking -;;; turned off. The advertised user-visible procedures all explicitly -;;; check their arguments. -;;; -;;; Many of the procedures are split into several user versions that -;;; just validate their arguments and pass them on to an internal -;;; version (prefixed with `%') that assumes all arguments have been -;;; checked. This avoids repeated argument checks. - -(declare (usual-integrations) - (integrate-external "char") - (integrate-external "chrset")) - -;;;; Primitives +(declare (usual-integrations)) (define-primitives (string-allocate 1) - (string-length 1) - (string-ref 2) - (string-set! 3) - (string? 1) - vector-8b-fill! - vector-8b-find-next-char - vector-8b-find-next-char-ci - vector-8b-find-previous-char - vector-8b-find-previous-char-ci + (legacy-string? string? 1) + (vector-8b-length string-length 1) (vector-8b-ref 2) (vector-8b-set! 3)) -;;;; Basic Operations - (define (make-legacy-string k #!optional char) (let ((string (string-allocate k))) (if (not (default-object? char)) @@ -70,296 +48,4 @@ USA. (make-legacy-string length (if (default-object? ascii) ascii - (integer->char ascii)))) - -;;;; String search - -(define (substring? pattern text) - (and (string-search-forward pattern text) #t)) - -(define (string-search-forward pattern text) - (guarantee-string pattern 'STRING-SEARCH-FORWARD) - (guarantee-string text 'STRING-SEARCH-FORWARD) - (%substring-search-forward text 0 (string-length text) - pattern 0 (string-length pattern))) - -(define (substring-search-forward pattern text tstart tend) - (guarantee-string pattern 'SUBSTRING-SEARCH-FORWARD) - (guarantee-substring text tstart tend 'SUBSTRING-SEARCH-FORWARD) - (%substring-search-forward text tstart tend - pattern 0 (string-length pattern))) - -(define (string-search-backward pattern text) - (guarantee-string pattern 'STRING-SEARCH-BACKWARD) - (guarantee-string text 'STRING-SEARCH-BACKWARD) - (%substring-search-backward text 0 (string-length text) - pattern 0 (string-length pattern))) - -(define (substring-search-backward pattern text tstart tend) - (guarantee-string pattern 'SUBSTRING-SEARCH-BACKWARD) - (guarantee-substring text tstart tend 'SUBSTRING-SEARCH-BACKWARD) - (%substring-search-backward text tstart tend - pattern 0 (string-length pattern))) - -(define (string-search-all pattern text) - (guarantee-string pattern 'STRING-SEARCH-ALL) - (guarantee-string text 'STRING-SEARCH-ALL) - (%substring-search-all text 0 (string-length text) - pattern 0 (string-length pattern))) - -(define (substring-search-all pattern text tstart tend) - (guarantee-string pattern 'SUBSTRING-SEARCH-ALL) - (guarantee-substring text tstart tend 'SUBSTRING-SEARCH-ALL) - (%substring-search-all text tstart tend - pattern 0 (string-length pattern))) - -(define (%substring-search-forward text tstart tend pattern pstart pend) - ;; Returns index of first matched char, or #F. - (if (fix:< (fix:- pend pstart) 4) - (%dumb-substring-search-forward text tstart tend pattern pstart pend) - (%bm-substring-search-forward text tstart tend pattern pstart pend))) - -(define (%dumb-substring-search-forward text tstart tend pattern pstart pend) - (if (fix:= pstart pend) - 0 - (let* ((leader (string-ref pattern pstart)) - (plen (fix:- pend pstart)) - (tend (fix:- tend plen))) - (let loop ((tstart tstart)) - (let ((tstart - (let find-leader ((tstart tstart)) - (and (fix:<= tstart tend) - (if (char=? leader (string-ref text tstart)) - tstart - (find-leader (fix:+ tstart 1))))))) - (and tstart - (if (substring=? text (fix:+ tstart 1) (fix:+ tstart plen) - pattern (fix:+ pstart 1) pend) - tstart - (loop (fix:+ tstart 1))))))))) - -(define (%substring-search-backward text tstart tend pattern pstart pend) - ;; Returns index following last matched char, or #F. - (if (fix:< (fix:- pend pstart) 4) - (%dumb-substring-search-backward text tstart tend pattern pstart pend) - (%bm-substring-search-backward text tstart tend pattern pstart pend))) - -(define (%dumb-substring-search-backward text tstart tend pattern pstart pend) - (if (fix:= pstart pend) - 0 - (let* ((pend-1 (fix:- pend 1)) - (trailer (string-ref pattern pend-1)) - (plen (fix:- pend pstart)) - (tstart+plen (fix:+ tstart plen))) - (let loop ((tend tend)) - (let ((tend - (let find-trailer ((tend tend)) - (and (fix:<= tstart+plen tend) - (if (char=? trailer (string-ref text (fix:- tend 1))) - tend - (find-trailer (fix:- tend 1))))))) - (and tend - (if (substring=? text (fix:- tend plen) (fix:- tend 1) - pattern pstart pend-1) - tend - (loop (fix:- tend 1))))))))) - -(define (%substring-search-all text tstart tend pattern pstart pend) - (let ((plen (fix:- pend pstart))) - (cond ((fix:= plen 1) - (let ((c (string-ref pattern pstart))) - (let loop ((ti tend) (occurrences '())) - (let ((index (substring-find-previous-char text tstart ti c))) - (if index - (loop index (cons index occurrences)) - occurrences))))) - #; ;This may not be worthwhile -- I have no measurements. - ((fix:< plen 4) - (let loop ((ti tend) (occurrences '())) - (let ((index - (%dumb-substring-search-backward text tstart ti - pattern pstart pend))) - (if index - (loop (fix:+ index (fix:- plen 1)) (cons index occurrences)) - occurrences)))) - (else - (%bm-substring-search-all text tstart tend pattern pstart pend))))) - -;;;; Boyer-Moore String Search - -;;; Cormen, Leiserson, and Rivest, "Introduction to Algorithms", -;;; Chapter 34, "String Matching". - -(define (%bm-substring-search-forward text tstart tend pattern pstart pend) - (let ((m (fix:- pend pstart)) - (pstart-1 (fix:- pstart 1)) - (pend-1 (fix:- pend 1)) - (lambda* (compute-last-occurrence-function pattern pstart pend)) - (gamma - (compute-good-suffix-function pattern pstart pend - (compute-gamma0 pattern pstart pend)))) - (let ((tend-m (fix:- tend m)) - (m-1 (fix:- m 1))) - (let outer ((s tstart)) - (and (fix:<= s tend-m) - (let inner ((pj pend-1) (tj (fix:+ s m-1))) - (if (fix:= (vector-8b-ref pattern pj) (vector-8b-ref text tj)) - (if (fix:= pstart pj) - s - (inner (fix:- pj 1) (fix:- tj 1))) - (outer - (fix:+ s - (fix:max (fix:- (fix:- pj pstart-1) - (lambda* (vector-8b-ref text tj))) - (gamma (fix:- pj pstart)))))))))))) - -(define (%bm-substring-search-backward text tstart tend pattern pstart pend) - (let ((m (fix:- pend pstart)) - (pend-1 (fix:- pend 1)) - (rpattern (reverse-pattern pattern pstart pend))) - (let ((tstart+m (fix:+ tstart m)) - (lambda* (compute-last-occurrence-function rpattern 0 m)) - (gamma - (compute-good-suffix-function rpattern 0 m - (compute-gamma0 rpattern 0 m)))) - (let outer ((s tend)) - (and (fix:>= s tstart+m) - (let inner ((pj pstart) (tj (fix:- s m))) - (if (fix:= (vector-8b-ref pattern pj) (vector-8b-ref text tj)) - (if (fix:= pend-1 pj) - s - (inner (fix:+ pj 1) (fix:+ tj 1))) - (outer - (fix:- s - (fix:max (fix:- (fix:- pend pj) - (lambda* (vector-8b-ref text tj))) - (gamma (fix:- pend-1 pj)))))))))))) - -(define (%bm-substring-search-all text tstart tend pattern pstart pend) - (let ((m (fix:- pend pstart)) - (pstart-1 (fix:- pstart 1)) - (pend-1 (fix:- pend 1)) - (lambda* (compute-last-occurrence-function pattern pstart pend)) - (gamma0 (compute-gamma0 pattern pstart pend))) - (let ((gamma (compute-good-suffix-function pattern pstart pend gamma0)) - (tend-m (fix:- tend m)) - (m-1 (fix:- m 1))) - (let outer ((s tstart) (occurrences '())) - (if (fix:<= s tend-m) - (let inner ((pj pend-1) (tj (fix:+ s m-1))) - (if (fix:= (vector-8b-ref pattern pj) (vector-8b-ref text tj)) - (if (fix:= pstart pj) - (outer (fix:+ s gamma0) (cons s occurrences)) - (inner (fix:- pj 1) (fix:- tj 1))) - (outer (fix:+ s - (fix:max (fix:- (fix:- pj pstart-1) - (lambda* - (vector-8b-ref text tj))) - (gamma (fix:- pj pstart)))) - occurrences))) - (reverse! occurrences)))))) - -(define (compute-last-occurrence-function pattern pstart pend) - (let ((lam (make-vector 256 0))) - (do ((j pstart (fix:+ j 1))) - ((fix:= j pend)) - (vector-set! lam - (vector-8b-ref pattern j) - (fix:+ (fix:- j pstart) 1))) - (lambda (symbol) - (vector-ref lam symbol)))) - -(define (compute-good-suffix-function pattern pstart pend gamma0) - (let ((m (fix:- pend pstart))) - (let ((pi - (compute-prefix-function (reverse-pattern pattern pstart pend) - 0 - m)) - (gamma (make-vector m gamma0)) - (m-1 (fix:- m 1))) - (do ((l 0 (fix:+ l 1))) - ((fix:= l m)) - (let ((j (fix:- m-1 (vector-ref pi l))) - (k (fix:- (fix:+ 1 l) (vector-ref pi l)))) - (if (fix:< k (vector-ref gamma j)) - (vector-set! gamma j k)))) - (lambda (index) - (vector-ref gamma index))))) - -(define (compute-gamma0 pattern pstart pend) - (let ((m (fix:- pend pstart))) - (fix:- m - (vector-ref (compute-prefix-function pattern pstart pend) - (fix:- m 1))))) - -(define (compute-prefix-function pattern pstart pend) - (let* ((m (fix:- pend pstart)) - (pi (make-vector m))) - (vector-set! pi 0 0) - (let outer ((k 0) (q 1)) - (if (fix:< q m) - (let ((k - (let ((pq (vector-8b-ref pattern (fix:+ pstart q)))) - (let inner ((k k)) - (cond ((fix:= pq (vector-8b-ref pattern (fix:+ pstart k))) - (fix:+ k 1)) - ((fix:= k 0) - k) - (else - (inner (vector-ref pi (fix:- k 1))))))))) - (vector-set! pi q k) - (outer k (fix:+ q 1))))) - pi)) - -(define (reverse-pattern pattern pstart pend) - (let ((builder (string-builder))) - (do ((i (fix:- pend 1) (fix:- i 1))) - ((not (fix:>= i pstart))) - (builder (string-ref pattern i))) - (builder))) - -;;;; Guarantors -;; -;; The guarantors are integrated. Most are structured as combination of -;; simple tests which the compiler can open-code, followed by a call to a -;; GUARANTEE-.../FAIL version which does the tests again to signal a -;; meaningful message. Structuring the code this way significantly -;; reduces code bloat from large integrated procedures. - -(declare (integrate-operator guarantee-string)) -(define-guarantee string "string") - -(define-integrable (guarantee-string-index object caller) - (if (not (index-fixnum? object)) - (error:wrong-type-argument object "string index" caller))) - -(define-integrable (guarantee-substring string start end caller) - (if (not (and (string? string) - (index-fixnum? start) - (index-fixnum? end) - (fix:<= start end) - (fix:<= end (string-length string)))) - (guarantee-substring/fail string start end caller))) - -(define (guarantee-substring/fail string start end caller) - (guarantee-string string caller) - (guarantee-substring-end-index end (string-length string) caller) - (guarantee-substring-start-index start end caller)) - -(define-integrable (guarantee-substring-end-index end length caller) - (guarantee-string-index end caller) - (if (not (fix:<= end length)) - (error:bad-range-argument end caller)) - end) - -(define-integrable (guarantee-substring-start-index start end caller) - (guarantee-string-index start caller) - (if (not (fix:<= start end)) - (error:bad-range-argument start caller)) - start) - -(define-integrable (guarantee-2-substrings string1 start1 end1 - string2 start2 end2 - procedure) - (guarantee-substring string1 start1 end1 procedure) - (guarantee-substring string2 start2 end2 procedure)) \ No newline at end of file + (integer->char ascii)))) \ No newline at end of file diff --git a/src/runtime/ustring.scm b/src/runtime/ustring.scm index 3923307b7..1c2cfbae9 100644 --- a/src/runtime/ustring.scm +++ b/src/runtime/ustring.scm @@ -143,7 +143,7 @@ USA. (register-predicate! 8-bit-string? '8-bit-string '<= string?) (register-predicate! ->string-component? '->string-component)) -;;;; Strings +;;;; Basic operations (define (string? object) (or (legacy-string? object) @@ -212,6 +212,8 @@ USA. start (fix:- end start)))))) +;;;; Streaming build + (define (string-builder) (let ((builder (make-sequence-builder (lambda () (full-string-allocate 16)) @@ -243,6 +245,8 @@ USA. (string-copy! result i (caar parts) 0 (cdar parts))) result)) +;;;; Copy + (define (string-copy! to at from #!optional start end) (let* ((end (fix:end-index end (string-length from) 'string-copy!)) (start (fix:start-index start end 'string-copy!))) @@ -487,6 +491,8 @@ USA. (loop (fix:+ i 1))) #t)))) +;;;; Normalization + (define (string->nfd string) (if (or (string-ascii? string) ;ASCII unaffected by normalization (string-in-nfd? string)) @@ -549,7 +555,6 @@ USA. (scan-for-non-starter 0)) string) -#| (define (quick-check string qc-value) (let ((n (string-length string))) (let loop ((i 0) (last-ccc 0) (result #t)) @@ -565,8 +570,9 @@ USA. (loop (fix:+ i 1) ccc check) (loop (fix:+ i 1) ccc result)))))) result)))) -|# +;;;; Grapheme clusters + (define (grapheme-cluster-length string) (let ((breaks (find-grapheme-cluster-breaks string @@ -600,8 +606,6 @@ USA. (if (not end-index) (error:bad-range-argument end 'grapheme-cluster-slice)) (string-slice string start-index end-index))) - -;;;; Grapheme-cluster breaks (define (find-grapheme-cluster-breaks string initial-ctx break) (let ((n (string-length string))) @@ -622,7 +626,7 @@ USA. (if (fix:> n 0) (transition (get-gcb 0) 0 (break 0 initial-ctx)) initial-ctx))) - + (define gcb-names '#(control carriage-return @@ -917,6 +921,70 @@ USA. (make-!selector wb-names '(emoji-base-gaz glue-after-zwj))) ))))) +;;;; Search + +(define-integrable (string-matcher caller matcher) + (lambda (pattern text) + (guarantee string? pattern caller) + (guarantee string? text caller) + (let ((pend (string-length pattern))) + (if (fix:= 0 pend) + (error:bad-range-argument pend caller)) + (matcher pattern pend text (fix:- (string-length text) pend))))) + +(define string-find-first-match + (string-matcher 'string-find-first-match + %dumb-string-find-first-match)) + +(define string-find-last-match + (string-matcher 'string-find-last-match + %dumb-string-find-last-match)) + +(define string-find-all-matches + (string-matcher 'string-find-all-matches + %dumb-string-find-all-matches)) + +(define (%dumb-string-find-first-match pattern pend text tlast) + (and (fix:>= tlast 0) + (let find-match ((tstart 0)) + (and (fix:<= tstart tlast) + (let match ((pi 0) (ti tstart)) + (if (fix:< pi pend) + (if (char=? (string-ref pattern pi) + (string-ref text ti)) + (match (fix:+ pi 1) (fix:+ ti 1)) + (find-match (fix:+ tstart 1))) + tstart)))))) + +(define (%dumb-string-find-last-match pattern pend text tlast) + (and (fix:>= tlast 0) + (let find-match ((tstart tlast)) + (and (fix:>= tstart 0) + (let match ((pi 0) (ti tstart)) + (if (fix:< pi pend) + (if (char=? (string-ref pattern pi) + (string-ref text ti)) + (match (fix:+ pi 1) (fix:+ ti 1)) + (find-match (fix:- tstart 1))) + tstart)))))) + +(define (%dumb-string-find-all-matches pattern pend text tlast) + (if (fix:>= tlast 0) + (let find-match ((tstart tlast) (matches '())) + (if (fix:>= tstart 0) + (find-match (fix:- tstart 1) + (let match ((pi 0) (ti tstart)) + (if (fix:< pi pend) + (if (char=? (string-ref pattern pi) + (string-ref text ti)) + (match (fix:+ pi 1) (fix:+ ti 1)) + matches) + (cons tstart matches)))) + matches)) + '())) + +;;;; Sequence converters + (define (list->string chars) (if (every char-8-bit? chars) (let ((string (legacy-string-allocate (length chars)))) @@ -971,6 +1039,8 @@ USA. %full-string-ref string start end) to))))) +;;;; Append and general constructor + (define (string-append . strings) (%string-append* strings)) @@ -1026,6 +1096,8 @@ USA. (number? object) (uri? object))) +;;;; Mapping + (define (mapper-values proc string strings) (cond ((null? strings) (values (string-length string) @@ -1068,7 +1140,7 @@ USA. ((not (fix:< i n))) (builder (proc i))) (builder)))) - + (define (string-count proc string . strings) (receive (n proc) (mapper-values proc string strings) (let loop ((i 0) (count 0)) @@ -1078,7 +1150,7 @@ USA. (fix:+ count 1) count)) count)))) - + (define (string-any proc string . strings) (receive (n proc) (mapper-values proc string strings) (let loop ((i 0)) @@ -1110,31 +1182,9 @@ USA. (if (proc i) i (loop (fix:- i 1))))))) - -(define (string-fill! string char #!optional start end) - (guarantee bitless-char? char 'string-fill!) - (let* ((end (fix:end-index end (string-length string) 'string-fill!)) - (start (fix:start-index start end 'string-fill!))) - (receive (string start end) (translate-slice string start end) - (if (legacy-string? string) - (do ((index start (fix:+ index 1))) - ((not (fix:< index end)) unspecific) - (legacy-string-set! string index char)) - (let ((bytes (%full-string-cp-vector string)) - (cp (char->integer char))) - (do ((i start (fix:+ i 1))) - ((not (fix:< i end))) - (cp-vector-set! bytes i cp))))))) - -(define (string-hash string #!optional modulus) - (let ((string* (string-for-primitive string))) - (if (default-object? modulus) - ((ucode-primitive string-hash) string*) - ((ucode-primitive string-hash-mod) string* modulus)))) - -(define (string-ci-hash string #!optional modulus) - (string-hash (string-foldcase string) modulus)) +;;;; Joiner/splitter + (define (string-joiner infix #!optional prefix suffix) (let ((joiner (string-joiner* prefix infix suffix))) (lambda strings @@ -1211,6 +1261,8 @@ USA. (if (char=? char char1) char2 char)) string)) +;;;; Trimmer/padder + (define (string-trimmer . options) (receive (where copy? trim-char?) (string-trimmer-options options 'string-trimmer) @@ -1281,6 +1333,32 @@ USA. (list 'fill-with grapheme-cluster-string? " ") (list 'clip? boolean? #t)))) +;;;; Miscellaneous + +(define (string-fill! string char #!optional start end) + (guarantee bitless-char? char 'string-fill!) + (let* ((end (fix:end-index end (string-length string) 'string-fill!)) + (start (fix:start-index start end 'string-fill!))) + (receive (string start end) (translate-slice string start end) + (if (legacy-string? string) + (do ((index start (fix:+ index 1))) + ((not (fix:< index end)) unspecific) + (legacy-string-set! string index char)) + (let ((bytes (%full-string-cp-vector string)) + (cp (char->integer char))) + (do ((i start (fix:+ i 1))) + ((not (fix:< i end))) + (cp-vector-set! bytes i cp))))))) + +(define (string-hash string #!optional modulus) + (let ((string* (string-for-primitive string))) + (if (default-object? modulus) + ((ucode-primitive string-hash) string*) + ((ucode-primitive string-hash-mod) string* modulus)))) + +(define (string-ci-hash string #!optional modulus) + (string-hash (string-foldcase string) modulus)) + (define (8-bit-string? object) (and (string? object) (string-8-bit? object))) @@ -1324,6 +1402,8 @@ USA. (loop (fix:+ i 1))) #t))) +;;;;Backwards compatibility + (define (string-find-next-char string char) (string-find-first-index (char=-predicate char) string)) @@ -1366,6 +1446,35 @@ USA. (define substring-find-previous-char-in-set (substring-find-maker string-find-previous-char-in-set)) + +(define (substring? pattern text) + (and (or (fix:= 0 (string-length pattern)) + (string-find-first-match pattern text)) + #t)) + +(define (string-search-backward pattern text) + (let ((index (string-find-last-match pattern text))) + (and index + (fix:+ index (string-length pattern))))) + +(define-integrable (substring-search-maker string-search) + (lambda (pattern text tstart tend) + (let* ((slice (string-slice text tstart tend)) + (index (string-search pattern slice))) + (and index + (fix:+ tstart index))))) + +(define substring-search-forward + (substring-search-maker string-find-first-match)) + +(define substring-search-backward + (substring-search-maker string-search-backward)) + +(define (substring-search-all pattern text tstart tend) + (let ((slice (string-slice text tstart tend))) + (map (lambda (index) + (fix:+ tstart index)) + (string-find-all-matches pattern slice)))) (define (string-move! string1 string2 start2) (string-copy! string2 start2 string1)) diff --git a/src/ssp/mod-lisp.scm b/src/ssp/mod-lisp.scm index e4a0b4406..e8e5d57f3 100644 --- a/src/ssp/mod-lisp.scm +++ b/src/ssp/mod-lisp.scm @@ -601,8 +601,8 @@ USA. (reverse! strings)))) (define (http-response-header keyword datum #!optional overwrite?) - (guarantee-symbol keyword 'HTTP-RESPONSE-HEADER) - (guarantee-string datum 'HTTP-RESPONSE-HEADER) + (guarantee symbol? keyword 'HTTP-RESPONSE-HEADER) + (guarantee string? datum 'HTTP-RESPONSE-HEADER) (if (memq keyword '(STATUS CONTENT-LENGTH)) (error "Illegal header keyword:" keyword)) (if (or (eq? keyword 'CONTENT-TYPE) @@ -617,7 +617,7 @@ USA. (maybe-set-entity *current-request* *current-response* entity)) (define (http-status-response code . extra) - (guarantee-exact-nonnegative-integer code 'HTTP-STATUS-RESPONSE) + (guarantee exact-nonnegative-integer? code 'HTTP-STATUS-RESPONSE) (status-response! *current-response* code extra)) ;;;; MIME stuff @@ -758,7 +758,7 @@ USA. 'handler handler)) (define (define-url-bindings url . klist) - (guarantee-keyword-list klist 'define-url-bindings) + (guarantee keyword-list? klist 'define-url-bindings) (let* ((binding (find-matching-item url-bindings (lambda (binding) diff --git a/src/win32/clipbrd.scm b/src/win32/clipbrd.scm index 1f30c6a42..cbabca895 100644 --- a/src/win32/clipbrd.scm +++ b/src/win32/clipbrd.scm @@ -52,7 +52,10 @@ USA. (copy-memory s ptr maxlen) (global-unlock mem) (close-clipboard) - (substring s 0 (vector-8b-find-next-char s 0 maxlen 0)))))) + (string-copy s + 0 + (or (string-find-first-index (char=-predicate #\null) s) + maxlen)))))) (define (win32-screen-width) (get-system-metrics SM_CXSCREEN))