(declare (usual-integrations))
-;;; This file will parse "file attributes line" found in the first
+;;; This code will parse "file attributes line" found in the first
;;; or second line of file and delimited by the special -*- sequence.
-;;;
-;;; It currently contains just a stub function that the parser will
-;;; call when the delimiter is recognized within a comment.
-
-(define (parse-file-attributes-line port db multiline)
- (declare (ignore port db multiline))
- unspecific)
-
-(define (initialize-package!)
- unspecific)
;;; Here are sample attribute lines taken from various files
;;; found in the wild. They won't be parsed because they are not
;;; in the first two lines.
+\f
+;;; ---------------
+#| -*-Scheme-*-
+This file is part of MIT/GNU Scheme.
+|#
#||-*- mode:lisp;
package:(FOOBAR :USE (GLOBAL BAZ)
base:10
-*- ||#
-;;; -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
-
-;;; -*- Mode: C; tab-width: 4; -*- */
+;;; -*- Mode: Java; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
-;;; -*-mode:C;tab-width:3-*-
+;;; -*- Mode: C; tab-width: 4; -*-
;;; For Emacs: -*- mode:cperl; mode:folding -*-
;;; -*-mode:C;tab-width:3-*-
-;;; -*-mode:c; c-style:k&r; c-basic-offset:4; -*- */
+;;; -*-mode:c; c-style:k&r; c-basic-offset:4; -*-
;;;-*-Mode:LISP;Syntax: Common-Lisp;Package:ib;Base:10-*-
;;; It should surprise no one that the following comes from a python file.
;;; -*-*- encoding: utf-8 -*-*-
+\f
+;;; ---------------
+
+;;; The most general case is a series of key/value pairs where the key
+;;; is followed by a colon and the pairs are separated or delimited by
+;;; semicolons. Whitespace is optional and cannot be relied upon to
+;;; delimit the end of a key or a value.
+
+;;; If the parser used the standard atom delimiters and the system
+;;; global parser table, a file attributes line like
+;;; "-*-mode:lisp;parser:read-*-" would be interpreted as the symbol
+;;; '-*-mode:lisp followed by a comment. We therefore need to run the
+;;; parser with modified settings.
+
+;;; We need two modes. The first is the mode where we are expecting
+;;; the key of a key/value pair. Since the key is delimited by a
+;;; colon, or by whitespace followed by a colon, we need the colon
+;;; character to be atom-delimiter.
+
+;;; The second mode is when we are reading the value of a key/value
+;;; pair. The value is read as an ordinary lisp object. This is
+;;; slightly different from the standard settings of the Scheme
+;;; reader.
+
+;;; The actual way we parse the mode line is to stay in the first mode
+;;; until we read a colon character. At that point, we switch to the
+;;; second mode in order to read a single value and return to the
+;;; first mode immediately afterwards.
+\f
+;; These are the char-sets and parser table for use in the mode where
+;; we are parsing anything but a value. (mode 1)
+(define char-set/file-attributes-atom-delimiters)
+(define char-set/file-attributes-constituents)
+(define file-attributes-parser-table)
+
+(define (parse-file-attributes-item parse port)
+ ;; Prepare the parser for first mode.
+ (fluid-let ((*parser-associate-positions?* #f)
+ (*parser-atom-delimiters*
+ char-set/file-attributes-atom-delimiters)
+ (*parser-canonicalize-symbols?* #f)
+ (*parser-constituents* char-set/file-attributes-constituents)
+ (*parser-enable-file-attributes-parsing?* #f) ; no recursion!
+ (*parser-keyword-style* #f)
+ (*parser-radix* 10)
+ (*parser-table* file-attributes-parser-table))
+ (parse port system-global-environment)))
+
+(define (parse-file-attributes-value parse port)
+ ;; Prepare the parser for second mode.
+ (fluid-let ((*parser-associate-positions?* #f)
+ (*parser-atom-delimiters* char-set/atom-delimiters)
+ (*parser-canonicalize-symbols?* #f)
+ (*parser-constituents* char-set/constituents)
+ (*parser-enable-file-attributes-parsing?* #f) ; no recursion!
+ ;; enable prefix keywords
+ (*parser-keyword-style* 'prefix)
+ (*parser-radix* 10)
+ (*parser-table* system-global-parser-table))
+ (parse port system-global-environment)))
+
+(define (parse-file-attributes-line port db multiline)
+ (declare (ignore db))
+ (tokens->alist
+ (tokenize-file-attributes-line port multiline)))
+\f
+;; If we don't see a COLON or a SEMICOLON often enough, we'll assume
+;; that we're confused by an ill-formed attributes line and abandon
+;; the parsing.
+(define file-attributes-confusion-limit 3)
+
+(define (tokenize-file-attributes-line port multiline)
+ (let ((parser (top-level-parser port)))
+
+ (define (tokenize confusion-count tokens)
+ (if (> confusion-count file-attributes-confusion-limit)
+ (begin
+ (warn "Ill-formed file attributes list.")
+ #f)
+
+ (let ((token (if (and (pair? tokens)
+ (eq? (car tokens) colon-token))
+ (parse-file-attributes-value parser port)
+ (parse-file-attributes-item parser port))))
+
+ (cond ((eof-object? token) (if multiline
+ (error:premature-eof port)
+ token))
+
+ ((or (eq? token colon-token)
+ (eq? token semicolon-token))
+ ;; saw a colon or semicolon, we're still on track.
+ (tokenize 0 (cons token tokens)))
+
+ ((eq? token newline-token)
+ (if multiline
+ ;; discard if multiline
+ (tokenize (+ confusion-count 1) tokens)
+ ;; If we hit the end of line while parsing a single
+ ;; line, then the file attributes line is ill-formed.
+ (begin
+ (warn "Ill-formed file attributes line.")
+ #f)))
+
+ ((symbol? token)
+ (let ((token* (if (null? tokens)
+ (trim-initial-token token)
+ token)))
+ (cond ((not token*) (tokenize confusion-count tokens))
+ ((string-suffix? "-*-" (symbol-name token*))
+ (let ((token** (trim-final-token token*)))
+ (if token**
+ (reverse (cons token** tokens))
+ (reverse tokens))))
+ (else (tokenize (+ confusion-count 1)
+ (cons token* tokens))))))
+
+ (else (tokenize (+ confusion-count 1)
+ (cons token tokens)))))))
+
+ (tokenize 0 '())))
+\f
+;;; In the case where the file attributes line has spurious *-
+;;; characters, and perhaps is not whitespace delimited, these
+;;; characters will end up being the first token or prepended to the
+;;; first token. Examples:
+;;; -*-*- encoding: utf-8 -*-*-
+;;; -*-*-*-logrus-*-*-*-
+;;; -*-*- coding: latin-1 -*-*-
+
+(define (trim-initial-token sym)
+ (if (string-prefix? "*-" (symbol-name sym))
+ (do ((token-string (symbol-name sym) (string-tail token-string 2)))
+ ((not (string-prefix? "*-" token-string))
+ (if (zero? (string-length token-string))
+ #f
+ (string->symbol token-string)))))
+ sym)
+
+;;; If the final token is a symbol that is not whitespace delimited,
+;;; then the end marker will be attached to the token. Furthermore,
+;;; if there are spurious -* characters, these will have been attached
+;;; as well. Examples:
+;;; -*-Scheme-*-
+;;; -*-outline-*-*-
+(define (trim-final-token sym)
+ (do ((token-string
+ (let ((s (symbol-name sym)))
+ (string-head s (- (string-length s) 3)))
+ (string-head token-string (- (string-length token-string) 2))))
+ ((not (string-suffix? "-*" token-string))
+ (if (zero? (string-length token-string))
+ #f
+ (string->symbol token-string)))))
+\f
+;;; Given a list of tokens, create an alist of keys and values.
+(define (tokens->alist tokens)
+
+ ;; A single token is a mode indicator
+ (define (parse-mode mode-token)
+ (list (cons 'MODE mode-token)))
+
+ ;; An attribute consists of a key, colon, value
+ ;; triplet. The key must be a symbol.
+ (define (parse-attribute tokens)
+ (let ((key (car tokens))
+ (t1 (cdr tokens)))
+ (if (or (not (symbol? key))
+ (not (pair? t1)))
+ (ill-formed)
+ (let ((colon (car t1))
+ (t2 (cdr t1)))
+ (if (or (not (eq? colon colon-token))
+ (not (pair? t2)))
+ (ill-formed)
+ (let ((value (car t2))
+ (t3 (cdr t2)))
+ (if (not (null? t3))
+ (ill-formed)
+ (cons key value))))))))
+
+ (define (parse-attributes-alist tokens)
+ (define (group alist accum tail)
+ (if (pair? tail)
+ (let ((token (car tail)))
+ (if (eq? token semicolon-token)
+ (let ((entry (parse-attribute (reverse accum))))
+ (group (if entry
+ (cons entry alist)
+ alist)
+ '()
+ (cdr tail)))
+ (group alist (cons token accum) (cdr tail))))
+ (if (null? accum)
+ (reverse alist)
+ (reverse (let ((entry (parse-attribute (reverse accum))))
+ (if entry
+ (cons entry alist)
+ alist))))))
+ (group '() '() tokens))
+
+ (define (ill-formed)
+ (warn "Ill-formed file attributes list.")
+ #f)
+
+ (if (pair? tokens)
+ (cond ((memq semicolon-token tokens)
+ (parse-attributes-alist tokens))
+
+ ((memq colon-token tokens)
+ (list (parse-attribute tokens)))
+
+ ((null? (cdr tokens))
+ (parse-mode (car tokens)))
+
+ (else (list tokens)))
+ #f))
+\f
+(define (initialize-package!)
+ (let* ((constituents char-set/constituents)
+ (atom-delimiters
+ (char-set-union char-set:whitespace
+ ;; Note that colon is a delimiter!
+ (string->char-set "()[]{}\":;'`,")
+ (char-set #\U+00AB #\U+00BB)))
+ (symbol-leaders
+ (char-set-difference constituents
+ (char-set-union atom-delimiters
+ char-set/number-leaders)))
+ (special-number-leaders
+ (string->char-set "bBoOdDxXiIeEsSlL"))
+ (store-char (lambda (v c h) (vector-set! v (char->integer c) h)))
+ (store-char-set
+ (lambda (v c h)
+ (for-each (lambda (c) (store-char v c h))
+ (char-set-members c)))))
+ (let ((initial (make-vector #x100 #f))
+ (special (make-vector #x100 #f)))
+ (store-char-set initial char-set:whitespace handler:whitespace)
+ (store-char initial #\newline handler:newline)
+ (store-char-set initial char-set/number-leaders handler:atom)
+ (store-char-set initial symbol-leaders handler:symbol)
+ (store-char-set special special-number-leaders handler:number)
+ (store-char initial #\( handler:list)
+ (store-char special #\( handler:vector)
+ (store-char initial #\) handler:close-parenthesis)
+ (store-char initial #\: handler:colon)
+ (store-char initial #\; handler:semicolon)
+ (store-char initial #\' handler:quote)
+ (store-char initial #\` handler:quasiquote)
+ (store-char initial #\, handler:unquote)
+ (store-char initial #\" handler:string)
+ (store-char initial #\# handler:special)
+ (store-char special #\f handler:false)
+ (store-char special #\F handler:false)
+ (store-char special #\t handler:true)
+ (store-char special #\T handler:true)
+ (store-char special #\\ handler:char)
+; (store-char special #\! handler:named-constant)
+ (set! file-attributes-parser-table (make-parser-table initial special))
+ )
+ (set! char-set/file-attributes-atom-delimiters atom-delimiters)
+ (set! char-set/file-attributes-constituents constituents))
+ unspecific)
+\f
+(define (handler:newline port db ctx char)
+ (declare (ignore port db ctx char))
+ newline-token)
+
+(define (handler:colon port db ctx char)
+ (declare (ignore port db ctx char))
+ colon-token)
+
+(define (handler:semicolon port db ctx char)
+ (declare (ignore port db ctx char))
+ semicolon-token)
+
+(define colon-token (list 'COLON))
+(define newline-token (list 'NEWLINE))
+(define semicolon-token (list 'SEMICOLON))
+
(read-finish (port/operation port 'READ-FINISH)))
(lambda (port environment)
(if read-start (read-start port))
- (let ((db (initial-db port environment)))
- (let ((object (dispatch port db 'TOP-LEVEL)))
- (if read-finish (read-finish port))
- (finish-parsing object db)))))))
+ (let restart ()
+ (let* ((db (initial-db port environment))
+ (object (dispatch port db 'TOP-LEVEL)))
+ (if (eq? object restart-parsing)
+ (restart)
+ (begin
+ (if read-finish (read-finish port))
+ (finish-parsing object db)))))))))
(define (read-in-context port db ctx)
(let ((object (dispatch port db ctx)))
- (if (eof-object? object)
- (error:premature-eof port))
- object))
+ (cond ((eof-object? object) (error:premature-eof port))
+ ((eq? object restart-parsing) (error:unexpected-restart port))
+ (else object))))
(define-integrable (read-object port db)
(read-in-context port db 'OBJECT))
(if (eof-object? char)
char
(let ((object ((get-handler char handlers) port db ctx char)))
- (if (eq? object continue-parsing)
- (loop)
- (begin
- (record-object-position! position object db)
- object))))))))
-
+ (cond ((eq? object continue-parsing) (loop))
+ ((eq? object restart-parsing) object)
+ (else
+ (record-object-position! position object db)
+ object))))))))
+
+;; Causes the dispatch to be re-run.
+;; Used to discard things like whitespace and comments.
(define continue-parsing
(list 'CONTINUE-PARSING))
+;; Causes the dispatch to finish, but the top-level parser will return
+;; back into the dispatch after re-initializing the db. This is used
+;; to reset the parser when changing read syntax as specified by the
+;; file attributes list.
+(define restart-parsing
+ (list 'RESTART-PARSING))
+
(define (handler:special port db ctx char1)
(let ((char2 (%read-char/no-eof port db)))
((get-handler char2 (parser-table/special (db-parser-table db)))
(case char
((#\newline) unspecific)
((#\-)
- (parse-file-attributes-line port db false)
- (discard))
+ (process-file-attributes
+ (parse-file-attributes-line port db false) port)
+ (discard restart-parsing))
(else (scan))))))
((#\-) (dash))
(else (scan))))))
- (define (discard)
+ (define (discard action)
(let ((char (%read-char port db)))
(cond ((eof-object? char) char)
- ((char=? char #\newline) unspecific)
- (else (discard)))))
+ ((char=? char #\newline) action)
+ (else (discard action)))))
;; If we're past the second line, just discard.
(if (and (< (current-line port db) 2)
(db-enable-file-attributes-parsing db))
(scan)
- (discard))
-
- continue-parsing)
+ (discard continue-parsing)))
\f
(define (handler:multi-line-comment
port db ctx char1 char2)
(define (scan)
(case (%read-char/no-eof port db)
- ((#\newline) (discard 0))
+ ((#\newline) (discard 0 continue-parsing))
((#\#) (sharp))
((#\-) (dash))
((#\|) (vbar))
(define (sharp)
(case (%read-char/no-eof port db)
- ((#\newline) (discard 0))
+ ((#\newline) (discard 0 continue-parsing))
((#\#) (sharp))
((#\-) (dash))
- ((#\|) (discard 1)) ; nested comment
+ ((#\|) (discard 1 continue-parsing)) ; nested comment
(else (scan))))
(define (vbar)
(case (%read-char/no-eof port db)
- ((#\newline) (discard 0))
+ ((#\newline) (discard 0 continue-parsing))
((#\#) unspecific) ; end of comment
((#\-) (dash))
((#\|) (vbar))
(define (dash)
(case (%read-char/no-eof port db)
- ((#\newline) (discard 0))
+ ((#\newline) (discard 0 continue-parsing))
((#\#) (sharp))
((#\*) (dash-star))
((#\-) (dash))
(define (dash-star)
(case (%read-char/no-eof port db)
- ((#\newline) (discard 0))
+ ((#\newline) (discard 0 continue-parsing))
((#\#) (sharp))
- ((#\-) (parse-file-attributes-line port db true) (discard 0))
+ ((#\-)
+ (process-file-attributes (parse-file-attributes-line port db true) port)
+ (discard 0 restart-parsing))
((#\|) (vbar))
(else (scan))))
;; just track the nesting level and discard stuff.
;; We don't look for the file-attribute marker.
- (define (discard depth)
+ (define (discard depth action)
(case (%read-char/no-eof port db)
- ((#\#) (discard-sharp depth))
- ((#\|) (discard-vbar depth))
- (else (discard depth))))
+ ((#\#) (discard-sharp depth action))
+ ((#\|) (discard-vbar depth action))
+ (else (discard depth action))))
- (define (discard-sharp depth)
+ (define (discard-sharp depth action)
(case (%read-char/no-eof port db)
- ((#\#) (discard-sharp depth))
- ((#\|) (discard (+ depth 1))) ; push
- (else (discard depth))))
+ ((#\#) (discard-sharp depth action))
+ ((#\|) (discard (+ depth 1) action)) ; push
+ (else (discard depth action))))
- (define (discard-vbar depth)
+ (define (discard-vbar depth action)
(case (%read-char/no-eof port db)
((#\#) (if (> depth 0)
- (discard (- depth 1)) ; pop
- unspecific))
- ((#\|) (discard-vbar depth))
- (else (discard depth))))
+ (discard (- depth 1) action) ; pop
+ action))
+ ((#\|) (discard-vbar depth action))
+ (else (discard depth action))))
;; Start the machine.
;; If we're past the second line, just discard.
(if (and (< (current-line port db) 2)
(db-enable-file-attributes-parsing db))
(scan)
- (discard 0))
+ (discard 0 continue-parsing)))
- continue-parsing)
\f
;; It would be better if we could skip over the object without
;; creating it, but for now this will work.
(if (eof-object? char)
(error:premature-eof port))
char))
-
+\f
(define-structure db
(associate-positions? #f read-only #t)
(atom-delimiters #f read-only #t)
(guarantee-char-set constituents #f)
(make-db (environment-lookup environment '*PARSER-ASSOCIATE-POSITIONS?*)
atom-delimiters
- (environment-lookup environment '*PARSER-CANONICALIZE-SYMBOLS?*)
+ (overridable-value
+ port environment '*PARSER-CANONICALIZE-SYMBOLS?*)
constituents
- (environment-lookup environment
- '*PARSER-ENABLE-FILE-ATTRIBUTES-PARSING?*)
- (environment-lookup environment '*PARSER-KEYWORD-STYLE*)
+ (overridable-value
+ port environment '*PARSER-ENABLE-FILE-ATTRIBUTES-PARSING?*)
+ (overridable-value port environment '*PARSER-KEYWORD-STYLE*)
(environment-lookup environment '*PARSER-RADIX*)
(environment-lookup environment '*PARSER-TABLE*)
(make-shared-objects)
(port/operation port 'READ-CHAR)
'())))
+(define (overridable-value port environment name)
+ ;; Check the port property list for the name, and then the
+ ;; environment. This way a port can override the default.
+ (port/get-property port name (environment-lookup environment name)))
+
(define (position-operation port environment)
(let ((default (lambda (port) port #f)))
(if (environment-lookup environment '*PARSER-ASSOCIATE-POSITIONS?*)
default)
default)))
-(define-integrable (current-position port db)
- ((db-get-position db) port))
-
(define-integrable (current-line port db)
((db-input-line db) port))
+(define-integrable (current-position port db)
+ ((db-get-position db) port))
+
(define-integrable (record-object-position! position object db)
(if (and position (object-pointer? object))
(set-db-position-mapping! db
(cons object (db-position-mapping db))
object))
\f
+(define (process-file-attributes file-attribute-alist port)
+ (if file-attribute-alist
+ (begin
+ ;; Disable further attributes parsing.
+ (port/set-property! port '*PARSER-ENABLE-FILE-ATTRIBUTES-PARSING?* #f)
+ (process-keyword-attribute file-attribute-alist port)
+ (process-mode-attribute file-attribute-alist port)
+ (process-studly-case-attribute file-attribute-alist port))))
+
+(define (lookup-file-attribute file-attribute-alist attribute)
+ (assoc attribute file-attribute-alist
+ (lambda (left right)
+ (string-ci=? (symbol-name left) (symbol-name right)))))
+
+;;; Look for keyword-style: prefix or keyword-style: suffix
+(define (process-keyword-attribute file-attribute-alist port)
+ (let ((keyword-entry
+ (lookup-file-attribute file-attribute-alist 'KEYWORD-STYLE)))
+ (if (pair? keyword-entry)
+ (let ((value (cdr keyword-entry)))
+ (cond ((and (symbol value)
+ (string-ci=? (symbol-name value) "prefix"))
+ (port/set-property! port '*PARSER-KEYWORD-STYLE* 'PREFIX))
+ ((and (symbol value)
+ (string-ci=? (symbol-name value) "suffix"))
+ (port/set-property! port '*PARSER-KEYWORD-STYLE* 'SUFFIX))
+ (else
+ (warn "Unrecognized value for keyword-style" value)))))))
+
+;;; Don't do anything with the mode, but warn if it isn't scheme.
+(define (process-mode-attribute file-attribute-alist port)
+ (declare (ignore port))
+ (let ((mode-entry
+ (lookup-file-attribute file-attribute-alist 'KEYWORD-STYLE)))
+ (if (pair? mode-entry)
+ (let ((value (cdr mode-entry)))
+ (if (or (not (symbol value))
+ (not (string-ci=? (symbol-name value) "scheme")))
+ (warn "Unexpected file mode:" (if (symbol? value)
+ (symbol-name value)
+ value)))))))
+
+;; If you want to turn on studly case, then the attribute must be
+;; exactly "sTuDly-case" and the value must be exactly "True". After
+;; all, case is important. If you want to turn it off, the case of
+;; the attribute and the value don't matter.
+(define (process-studly-case-attribute file-attribute-alist port)
+ (let ((studly-case-entry
+ (lookup-file-attribute file-attribute-alist 'STUDLY-CASE)))
+ (if (pair? studly-case-entry)
+ (let ((value (cdr studly-case-entry)))
+ (cond ((or (eq? value #t)
+ (and (symbol? value)
+ (string-ci=? (symbol-name value) "true")))
+ ;; STricTly cHeck thE case.
+ (cond ((not (string=? (symbol-name (car studly-case-entry))
+ "sTuDly-case"))
+ (warn "Attribute name mismatch. Expected sTuDly-case.")
+ #f)
+ ((and (symbol? value)
+ (not (string=? (symbol-name value) "True")))
+ (warn "Attribute value mismatch. Expected True.")
+ #f)
+ (else
+ (port/set-property!
+ port '*PARSER-CANONICALIZE-SYMBOLS?* #f))))
+ ((or (not value)
+ (and (symbol? value)
+ (string-ci=? (symbol-name value) "false")))
+ (port/set-property! port '*PARSER-CANONICALIZE-SYMBOLS?* #t))
+ (else (warn "Unrecognized value for sTuDly-case" value)))))))
+
+\f
(define-syntax define-parse-error
(sc-macro-transformer
(lambda (form environment)
STANDARD-ERROR-HANDLER)))))
(ill-formed-syntax form)))))
-(define condition-type:parse-error)
(define condition-type:illegal-bit-string)
(define condition-type:illegal-boolean)
(define condition-type:illegal-char)
(define condition-type:illegal-named-constant)
(define condition-type:illegal-number)
(define condition-type:illegal-unhash)
-(define condition-type:undefined-hash)
(define condition-type:no-quoting-allowed)
+(define condition-type:non-shared-object)
+(define condition-type:parse-error)
(define condition-type:premature-eof)
(define condition-type:re-shared-object)
-(define condition-type:non-shared-object)
(define condition-type:unbalanced-close)
+(define condition-type:undefined-hash)
+(define condition-type:unexpected-restart)
(define error:illegal-bit-string)
(define error:illegal-boolean)
(define error:illegal-char)
(define error:illegal-named-constant)
(define error:illegal-number)
(define error:illegal-unhash)
-(define error:undefined-hash)
(define error:no-quoting-allowed)
+(define error:non-shared-object)
(define error:premature-eof)
(define error:re-shared-object)
-(define error:non-shared-object)
(define error:unbalanced-close)
+(define error:undefined-hash)
+(define error:unexpected-restart)
\f
(define (initialize-condition-types!)
(set! condition-type:parse-error
(lambda (char port)
(write-string "Unbalanced close parenthesis: " port)
(write char port)))
+ (define-parse-error (unexpected-restart port)
+ (lambda (port* port)
+ (write-string "Unexpected parse restart on: " port)
+ (write port* port)))
unspecific)
\ No newline at end of file