From: Chris Hanson Date: Wed, 7 Jun 2000 18:37:33 +0000 (+0000) Subject: Complete redesign of the MIME codecs. This design is mostly working, X-Git-Tag: 20090517-FFI~3593 X-Git-Url: https://birchwood-abbey.net/git?a=commitdiff_plain;h=6089b52021e498a94ffcb920053d08c1b0c8f1db;p=mit-scheme.git Complete redesign of the MIME codecs. This design is mostly working, except for the quoted-printable decoder, which is too complicated and still has some subtle bugs. --- diff --git a/v7/src/imail/imail.pkg b/v7/src/imail/imail.pkg index 8eed5d22b..45175a48f 100644 --- a/v7/src/imail/imail.pkg +++ b/v7/src/imail/imail.pkg @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: imail.pkg,v 1.47 2000/06/05 20:56:45 cph Exp $ +;;; $Id: imail.pkg,v 1.48 2000/06/07 18:37:33 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -288,9 +288,15 @@ (files "mime-codec") (parent (edwin imail)) (export (edwin imail) - decode-base64-binary-string - decode-base64-binary-substring - decode-base64-text-string - decode-base64-text-substring - decode-quoted-printable-string - decode-quoted-printable-substring)) \ No newline at end of file + decode-base64:finalize + decode-base64:initialize + decode-base64:update + decode-quoted-printable:finalize + decode-quoted-printable:initialize + decode-quoted-printable:update + encode-base64:finalize + encode-base64:initialize + encode-base64:update + encode-quoted-printable:finalize + encode-quoted-printable:initialize + encode-quoted-printable:update)) \ No newline at end of file diff --git a/v7/src/imail/mime-codec.scm b/v7/src/imail/mime-codec.scm index 6bb2a0aa1..0e6ce4a8c 100644 --- a/v7/src/imail/mime-codec.scm +++ b/v7/src/imail/mime-codec.scm @@ -1,6 +1,6 @@ ;;; -*-Scheme-*- ;;; -;;; $Id: mime-codec.scm,v 1.7 2000/06/03 01:58:32 cph Exp $ +;;; $Id: mime-codec.scm,v 1.8 2000/06/07 18:37:25 cph Exp $ ;;; ;;; Copyright (c) 2000 Massachusetts Institute of Technology ;;; @@ -22,59 +22,255 @@ (declare (usual-integrations)) +;;;; Encode quoted-printable + +;;; Hair from two things: (1) delaying the decision to encode trailing +;;; whitespace until we see what comes after it on the line; and (2) +;;; an incremental line-breaking algorithm. + +(define-structure (qp-encoding-context + (conc-name qp-encoding-context/) + (constructor encode-quoted-printable:initialize + (port text?))) + (port #f read-only #t) + (text? #f read-only #t) + ;; Either #F, or an LWSP input that may or may not need to be + ;; encoded, depending on subsequent input. + (pending-lwsp #f) + ;; An exact integer between 0 and 75 inclusive, recording the number + ;; of characters that have been written on the current output line. + (column 0) + ;; Either #F, or an output string that may or may not fit on the + ;; current output line, depending on subsequent output. + (pending-output #f)) + +(define (encode-quoted-printable:finalize context) + (encode-qp-pending-lwsp context #f 'INPUT-END) + (write-qp-pending-output context #t)) + +(define (encode-quoted-printable:update context string start end) + (if (qp-encoding-context/text? context) + (let loop ((start start)) + (let ((i (substring-find-next-char string start end #\newline))) + (if i + (begin + (encode-qp context string start i 'LINE-END) + (loop (fix:+ i 1))) + (encode-qp context string start end 'PARTIAL)))) + (encode-qp context string start end 'PARTIAL))) + +(define (encode-qp context string start end type) + (encode-qp-pending-lwsp context (fix:< start end) type) + (let ((port (qp-encoding-context/port context)) + (text? (qp-encoding-context/text? context))) + (let loop ((start start)) + (cond ((fix:< start end) + (let ((char (string-ref string start)) + (start (fix:+ start 1))) + (cond ((not (char-lwsp? char)) + (if (char-set-member? char-set:qp-encoded char) + (write-qp-encoded context char) + (write-qp-clear context char)) + (loop start)) + ((and (eq? type 'PARTIAL) + (not (fix:< start end))) + (set-qp-encoding-context/pending-lwsp! context char)) + (else + (if (fix:< start end) + (write-qp-clear context char) + (write-qp-encoded context char)) + (loop start))))) + ((eq? type 'LINE-END) + (write-qp-hard-break context)))))) + +(define (encode-qp-pending-lwsp context packet-not-empty? type) + (let ((pending (qp-encoding-context/pending-lwsp context))) + (if pending + (cond (packet-not-empty? + (set-qp-encoding-context/pending-lwsp! context #f) + (write-qp-clear context pending)) + ((not (eq? type 'PARTIAL)) + (set-qp-encoding-context/pending-lwsp! context #f) + (write-qp-encoded context pending)))))) + +(define (write-qp-clear context char) + (write-qp-pending-output context #f) + (let ((port (qp-encoding-context/port context)) + (column (qp-encoding-context/column context))) + (cond ((fix:< column 75) + (write-char char port) + (set-qp-encoding-context/column! context (fix:+ column 1))) + ((not (qp-encoding-context/text? context)) + (write-qp-soft-break context) + (write-char char port) + (set-qp-encoding-context/column! context 1)) + (else + (set-qp-encoding-context/pending-output! context (string char)))))) + +(define (write-qp-encoded context char) + (write-qp-pending-output context #f) + (let ((port (qp-encoding-context/port context)) + (column (qp-encoding-context/column context)) + (d (char->integer char))) + (let ((c1 (digit->char (fix:lsh d -4) 16)) + (c2 (digit->char (fix:and d #x0F) 16))) + (if (fix:= column 73) + (set-qp-encoding-context/pending-output! context (string #\= c1 c2)) + (begin + (if (fix:> column 73) + (write-qp-soft-break context)) + (write-char #\= port) + (write-char c1 port) + (write-char c2 port) + (set-qp-encoding-context/column! + context + (fix:+ (qp-encoding-context/column context) 3))))))) + +(define (write-qp-hard-break context) + (write-qp-pending-output context #t) + (newline (qp-encoding-context/port context)) + (set-qp-encoding-context/column! context 0)) + +(define (write-qp-pending-output context newline?) + (let ((pending (qp-encoding-context/pending-output context))) + (if pending + (begin + (if (not newline?) + (write-qp-soft-break context)) + (write-string pending (qp-encoding-context/port context)) + (set-qp-encoding-context/pending-output! context #f) + (set-qp-encoding-context/column! + context + (fix:+ (qp-encoding-context/column context) + (string-length pending))))))) + +(define (write-qp-soft-break context) + (let ((port (qp-encoding-context/port context))) + (write-char #\= port) + (newline port)) + (set-qp-encoding-context/column! context 0)) + ;;;; Decode quoted-printable -(define (decode-quoted-printable-string string port) - (decode-quoted-printable-substring string 0 (string-length string) port)) +;;; This decoder is unbelievably hairy. The hair is due to the fact +;;; that the input to the decoder is arbitrarily packetized, and the +;;; encoder really wants to operate on units of input lines. The +;;; strategy is that we process as much of the input packet as +;;; possible, then save enough state to continue when the next packet +;;; comes along. -(define (decode-quoted-printable-substring string start end port) +(define-structure (qp-decoding-context + (conc-name qp-decoding-context/) + (constructor decode-quoted-printable:initialize + (port text?))) + (port #f read-only #t) + (text? #f read-only #t) + ;; Either #F, or a string. If a string, the string will entirely + ;; consist of LWSP characters. This is whitespace that appeared at + ;; the end of an input packet. We are waiting to see if it is + ;; followed by a newline, meaning it is to be discarded, or + ;; otherwise is part of the output. + (pending #f) + ;; Either #F, 'EQUALS, or a character. If not #F, it indicates that + ;; a packet ended with an unfinished = sequence that we can't decode + ;; until we get more characters. The symbol 'EQUALS means we saw + ;; the equals sign but nothing else. A character means we saw the + ;; equals sign and that character. + (partial #f)) + +(define (decode-quoted-printable:finalize context) + (decode-qp-pending context 'INPUT-END)) + +(define (decode-quoted-printable:update context string start end) (let loop ((start start)) - (let ((i (substring-find-next-char string start end #\newline))) - (if i - (begin - (if (decode-quoted-printable-line string start i port) - (newline port)) - (loop (fix:+ i 1))) - (decode-quoted-printable-line string start end port))))) + (if (fix:< start end) + (let ((i (substring-find-next-char string start end #\newline))) + (if i + (begin + (let ((i (skip-lwsp-backwards string start i))) + (cond ((fix:< start i) + (decode-qp-pending context 'PARTIAL) + (decode-qp context string start i 'LINE-END)) + ((not (decode-qp-pending context 'LINE-END)) + (decode-qp context "" 0 0 'LINE-END)))) + (loop (fix:+ i 1))) + (let ((end* (skip-lwsp-backwards string start end))) + (if (fix:< start end*) + (begin + (decode-qp-pending context 'PARTIAL) + (decode-qp context string start end* 'PARTIAL))) + (if (fix:< end* end) + (set-qp-decoding-context/pending! + context + (let ((string (substring string end* end)) + (pending (qp-decoding-context/pending context))) + (if pending + (string-append pending string) + string)))))))))) -(define (decode-quoted-printable-line string start end port) - (let ((end (skip-lwsp-backwards string start end))) - (let loop ((start start)) +(define (decode-qp-pending context type) + (let ((pending (qp-decoding-context/pending context))) + (and pending + (begin + (set-qp-decoding-context/pending! context #f) + (decode-qp context pending 0 + (if (eq? type 'PARTIAL) (string-length pending) 0) + type) + #t)))) + +(define (decode-qp context string start end type) + (let ((port (qp-decoding-context/port context))) + (let loop ((start (decode-qp-partial context string start end type))) (let ((i (substring-find-next-char-in-set string start end char-set:qp-encoded))) (if i (begin - (write-substring string start i port) - (cond ((decode-qp-hex-octet string i end) - => (lambda (char) - (write-char char port) - (loop (fix:+ i 3)))) - ((char=? (string-ref string i) #\=) - (if (fix:< (fix:+ i 1) end) - ;; This case is illegal. RFC 2045 recommends - ;; leaving it unconverted. - (begin - (write-char (string-ref string i) port) - (write-char (string-ref string (fix:+ i 1)) port) - (loop (fix:+ i 2))) - ;; Soft line break. - #f)) - (else + (if (fix:< start i) + (write-substring string start i port)) + (cond ((not (char=? (string-ref string i) #\=)) ;; This case is illegal. RFC 2045 recommends ;; dropping the char altogether. - (loop (fix:+ i 1))))) + (loop (fix:+ i 1))) + ((fix:< (fix:+ i 2) end) + (loop + (fix:+ (fix:+ i 1) + (decode-qp-hex-octet + context + (string-ref string (fix:+ i 1)) + (string-ref string (fix:+ i 2)))))) + ((eq? type 'PARTIAL) + (set-qp-decoding-context/partial! + context + (if (fix:< (fix:+ i 1) end) + (string-ref string (fix:+ i 1)) + 'EQUALS))) + ((fix:< (fix:+ i 1) end) + ;; This case is illegal. RFC 2045 recommends + ;; leaving it unconverted. + (write-char #\= port) + (write-char (string-ref string (fix:+ i 1)) port)) + ((eq? type 'INPUT-END) + ;; This case is illegal. RFC 2045 recommends + ;; leaving it unconverted. + (write-char #\= port)) + (else + ;; This is a soft line break. + unspecific))) (begin - (write-substring string start end port) - ;; Hard line break. - #t)))))) - -(define (decode-qp-hex-octet string start end) - (and (fix:<= (fix:+ start 3) end) - (let ((d1 (char->digit (string-ref string (fix:+ start 1)) 16)) - (d2 (char->digit (string-ref string (fix:+ start 2)) 16))) - (and d1 d2 - (integer->char (fix:+ (fix:* 16 d1) d2)))))) + (if (fix:< start end) + (write-substring string start end port)) + (if (eq? type 'LINE-END) + (if (qp-decoding-context/text? context) + (if (eq? (qp-decoding-context/partial context) 'EQUALS) + ;; This is a soft line break. + (set-qp-decoding-context/partial! context #f) + ;; This is a hard line break. + (newline port)) + ;; I think this is illegal (RFC 2045 doesn't + ;; say). Most sensible thing to do is treat it + ;; like a soft line break. + unspecific)))))))) (define char-set:qp-encoded (char-set-invert @@ -82,103 +278,254 @@ (char-set #\=)) char-set:lwsp))) +(define (decode-qp-partial context string start end type) + (let ((partial (qp-decoding-context/partial context))) + (cond ((not (and partial (fix:< start end))) + (if (and partial (not (eq? type 'PARTIAL))) + (let ((port (qp-decoding-context/port context))) + ;; If PARTIAL is a character, this is illegal. + ;; Otherwise, this is a soft line break. + (cond ((char? partial) + ;; Illegal. + (write-char #\= port) + (write-char partial port) + (set-qp-decoding-context/partial! context #f)) + ((eq? type 'INPUT-END) + ;; Illegal. + (write-char #\= port) + (set-qp-decoding-context/partial! context #f)) + (else + ;; Soft line break. + unspecific)))) + start) + ((eq? partial 'EQUALS) + (if (fix:< (fix:+ start 1) end) + (begin + (set-qp-decoding-context/partial! context #f) + (fix:+ start + (decode-qp-hex-octet + context + (string-ref string start) + (string-ref string (fix:+ start 1))))) + (begin + (set-qp-decoding-context/partial! context + (string-ref string start)) + (fix:+ start 1)))) + (else + (set-qp-decoding-context/partial! context #f) + (fix:+ start + (fix:- (decode-qp-hex-octet context + partial + (string-ref string start)) + 1)))))) + +(define (decode-qp-hex-octet context c1 c2) + (let ((port (qp-decoding-context/port context))) + (let ((char + (let ((d1 (char->digit c1 16)) + (d2 (char->digit c2 16))) + (and d1 d2 + (integer->char (fix:or (fix:lsh d1 4) d2)))))) + (if char + (begin + (write-char char port) + 2) + ;; This case is illegal. RFC 2045 recommends + ;; leaving it unconverted. + (begin + (write-char #\= port) + (write-char c1 port) + 1))))) + +;;;; Encode BASE64 + +(define-structure (base64-encoding-context + (conc-name base64-encoding-context/) + (constructor encode-base64:initialize (port text?))) + (port #f read-only #t) + (text? #f read-only #t) + (buffer (make-string 48) read-only #t) + (index 0)) + +(define (encode-base64:finalize context) + (write-base64-line context)) + +(define (encode-base64:update context string start end) + (if (base64-encoding-context/text? context) + (let loop ((start start)) + (let ((index (substring-find-next-char string start end #\newline))) + (if index + (begin + (encode-base64 context string start index) + (encode-base64 context "\r\n" 0 2) + (loop (fix:+ index 1))) + (encode-base64 context string start end)))) + (encode-base64 context string start end))) + +(define (encode-base64 context string start end) + (let ((buffer (base64-encoding-context/buffer context))) + (let loop ((start start)) + (if (fix:< start end) + (let ((i (base64-encoding-context/index context))) + (let ((start* (fix:min end (fix:+ start (fix:- 48 i))))) + (let ((i (substring-move! string start start* buffer i))) + (set-base64-encoding-context/index! context i) + (if (fix:= i 48) + (write-base64-line context))) + (loop start*))))))) + +(define (write-base64-line context) + (let ((buffer (base64-encoding-context/buffer context)) + (end (base64-encoding-context/index context)) + (port (base64-encoding-context/port context))) + (if (fix:> end 0) + (begin + (let ((write-digit + (lambda (d) + (write-char (string-ref base64-digit-table (fix:and #x3F d)) + port)))) + (let loop ((start 0)) + (let ((n (fix:- end start))) + (cond ((fix:>= n 3) + (let ((d1 (vector-8b-ref buffer start)) + (d2 (vector-8b-ref buffer (fix:+ start 1))) + (d3 (vector-8b-ref buffer (fix:+ start 2)))) + (write-digit (fix:lsh d1 -2)) + (write-digit (fix:or (fix:lsh d1 4) (fix:lsh d2 -4))) + (write-digit (fix:or (fix:lsh d2 2) (fix:lsh d3 -6))) + (write-digit d3)) + (loop (fix:+ start 3))) + ((fix:= n 2) + (let ((d1 (vector-8b-ref buffer start)) + (d2 (vector-8b-ref buffer (fix:+ start 1)))) + (write-digit (fix:lsh d1 -2)) + (write-digit (fix:or (fix:lsh d1 4) (fix:lsh d2 -4))) + (write-digit (fix:lsh d2 2))) + (write-char #\= port)) + ((fix:= n 1) + (let ((d1 (vector-8b-ref buffer start))) + (write-digit (fix:lsh d1 -2)) + (write-digit (fix:lsh d1 4))) + (write-char #\= port) + (write-char #\= port)))))) + (newline port) + (set-base64-encoding-context/index! context 0))))) + ;;;; Decode BASE64 -(define (decode-base64-binary-string string port) - (decode-base64-binary-substring string 0 (string-length string) port)) - -(define (decode-base64-binary-substring string start end port) - (decode-base64-internal string start end - (lambda (char) (write-char char port)))) - -(define (decode-base64-text-string string pending-return? port) - (decode-base64-text-substring string 0 (string-length string) - pending-return? port)) - -(define (decode-base64-text-substring string start end pending-return? port) - (decode-base64-internal string start end - (lambda (char) - (if pending-return? - (case char - ((#\linefeed) - (set! pending-return? #f) - (newline port)) - ((#\return) - (write-char #\return port)) - (else - (set! pending-return? #f) - (write-char #\return port))) - (if (char=? char #\return) - (set! pending-return? #t) - (write-char char port))))) - pending-return?) - -(define (decode-base64-internal string start end output) - (let ((input - (let ((port (string->input-port string start end))) - (lambda (index) - (let loop () - (let ((char (read-char port))) - (cond ((eof-object? char) - (if (not (fix:= index 0)) - (error "Premature EOF from BASE64 port.")) - #f) - ((let ((digit - (vector-8b-ref base64-char-table - (char->integer char)))) - (and (fix:< digit #x40) - digit))) - ((char=? char #\=) - (if (not (or (fix:= index 2) (fix:= index 3))) - (error "Misplaced #\= from BASE64 port.")) - #f) - (else (loop))))))))) - (let loop () - (if (decode-base64-quantum input output) - (loop))))) +(define-structure (base64-decoding-context + (conc-name base64-decoding-context/) + (constructor decode-base64:initialize (port text?))) + (port #f read-only #t) + (text? #f read-only #t) + (input-buffer (make-string 4) read-only #t) + (input-index 0) + (output-buffer (make-string 3) read-only #t) + (pending-return? #f)) + +(define (decode-base64:finalize context) + (if (fix:> (base64-decoding-context/input-index context) 0) + (error "BASE64 input length is not a multiple of 4.")) + (if (base64-decoding-context/pending-return? context) + (write-char #\return (base64-decoding-context/port context)))) + +(define (decode-base64:update context string start end) + (let ((buffer (base64-decoding-context/input-buffer context))) + (let loop + ((start start) + (index (base64-decoding-context/input-index context))) + (if (fix:< start end) + (let ((char (string-ref string start)) + (start (fix:+ start 1))) + (if (or (char=? char #\=) + (fix:< (vector-8b-ref base64-char-table + (char->integer char)) + #x40)) + (begin + (string-set! buffer index char) + (if (fix:< index 3) + (loop start (fix:+ index 1)) + (begin + (decode-base64-quantum context) + (loop start 0)))) + (loop start index))) + (set-base64-decoding-context/input-index! context index))))) -(define (decode-base64-quantum input output) - (let ((d1 (input 0)) - (output - (lambda (n) - (output (integer->char (fix:and n #xff)))))) - (and d1 - (let* ((d2 (input 1)) - (d3 (input 2)) - (d4 (input 3))) - (if d4 - (if d3 - (let ((n - (fix:+ (fix:+ (fix:lsh d1 18) - (fix:lsh d2 12)) - (fix:+ (fix:lsh d3 6) - d4)))) - (output (fix:lsh n -16)) - (output (fix:lsh n -8)) - (output n) - #t) - (error "Misplaced #\= from BASE64 port.")) - (begin - (if d3 - (let ((n - (fix:+ (fix:+ (fix:lsh d1 10) - (fix:lsh d2 4)) - (fix:lsh d3 -2)))) - (output (fix:lsh n -8)) - (output n)) - (output (fix:+ (fix:lsh d1 2) - (fix:lsh d2 -4)))) - #f)))))) - -(define base64-char-table - (let ((table (make-string 256 (integer->char #xff)))) - (define (do-range low high value) - (vector-8b-set! table low value) - (if (fix:< low high) - (do-range (fix:+ low 1) high (fix:+ value 1)))) - (do-range (char->integer #\A) (char->integer #\Z) 0) - (do-range (char->integer #\a) (char->integer #\z) 26) - (do-range (char->integer #\0) (char->integer #\9) 52) - (vector-8b-set! table (char->integer #\+) 62) - (vector-8b-set! table (char->integer #\/) 63) - table)) \ No newline at end of file +(define (decode-base64-quantum context) + (let ((input (base64-decoding-context/input-buffer context)) + (output (base64-decoding-context/output-buffer context)) + (port (base64-decoding-context/port context))) + (let ((n (decode-base64-quantum-1 input output))) + (if (base64-decoding-context/text? context) + (let loop + ((index 0) + (pending? (base64-decoding-context/pending-return? context))) + (if (fix:< index n) + (let ((char (string-ref output index))) + (if pending? + (if (char=? char #\linefeed) + (begin + (newline port) + (loop (fix:+ index 1) #f)) + (begin + (write-char #\return port) + (loop index #f))) + (if (char=? char #\return) + (loop (fix:+ index 1) #t) + (begin + (write-char char port) + (loop (fix:+ index 1) #f))))) + (set-base64-decoding-context/pending-return?! context + pending?))) + (write-substring output 0 n port))))) + +(define (decode-base64-quantum-1 input output) + (let ((d1 (decode-base64-char input 0)) + (d2 (decode-base64-char input 1))) + (cond ((not (char=? (string-ref input 3) #\=)) + (let ((n + (fix:+ (fix:+ (fix:lsh d1 18) + (fix:lsh d2 12)) + (fix:+ (fix:lsh (decode-base64-char input 2) 6) + (decode-base64-char input 3))))) + (vector-8b-set! output 0 (fix:lsh n -16)) + (vector-8b-set! output 1 (fix:and #xFF (fix:lsh n -8))) + (vector-8b-set! output 2 (fix:and #xFF n)) + 3)) + ((not (char=? (string-ref input 2) #\=)) + (let ((n + (fix:+ (fix:+ (fix:lsh d1 10) (fix:lsh d2 4)) + (fix:lsh (decode-base64-char input 2) -2)))) + (vector-8b-set! output 0 (fix:lsh n -8)) + (vector-8b-set! output 1 (fix:and #xFF n))) + 2) + (else + (vector-8b-set! output 0 (fix:+ (fix:lsh d1 2) (fix:lsh d2 -4))) + 1)))) + +(define (decode-base64-char input index) + (let ((digit (vector-8b-ref base64-char-table (vector-8b-ref input index)))) + (if (fix:> digit #x40) + (error "Misplaced #\= in BASE64 input.")) + digit)) + +(define base64-char-table) +(define base64-digit-table) +(let ((char-table (make-string 256 (integer->char #xff))) + (digit-table (make-string 64))) + (define (do-range low high value) + (do-char low value) + (if (fix:< low high) + (do-range (fix:+ low 1) high (fix:+ value 1)))) + (define (do-char code value) + (vector-8b-set! char-table code value) + (vector-8b-set! digit-table value code)) + + (do-range (char->integer #\A) (char->integer #\Z) 0) + (do-range (char->integer #\a) (char->integer #\z) 26) + (do-range (char->integer #\0) (char->integer #\9) 52) + (do-char (char->integer #\+) 62) + (do-char (char->integer #\/) 63) + (set! base64-char-table char-table) + (set! base64-digit-table digit-table) + unspecific) \ No newline at end of file