-
Notifications
You must be signed in to change notification settings - Fork 10
/
rfc2388.lisp
473 lines (401 loc) · 17.7 KB
/
rfc2388.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
;;;; Copyright (c) 2003 Janis Dzerins
;;;; Modifications for TBNL Copyright (c) 2004 Michael Weber and Dr. Edmund Weitz
;;;; Further modifications for Toot Copyright (c) 2011, Peter Seibel. All rights reserved.
;;;;
;;;; Redistribution and use in source and binary forms, with or
;;;; without modification, are permitted provided that the following
;;;; conditions are met:
;;;;
;;;; 1. Redistributions of source code must retain the above copyright
;;;; notice, this list of conditions and the following disclaimer.
;;;;
;;;; 2. Redistributions in binary form must reproduce the above
;;;; copyright notice, this list of conditions and the following
;;;; disclaimer in the documentation and/or other materials
;;;; provided with the distribution.
;;;;
;;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
;;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
;;;; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
;;;; FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
;;;; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
;;;; OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
;;;; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;;;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
;;;; USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
;;;; DAMAGE.
(in-package :toot)
;;; Utility functions
(defun lwsp-char-p (char)
"Returns true if CHAR is a linear-whitespace-char (LWSP-char). Either
space or tab, in short."
(or (char= char #\space)
(char= char #\tab)))
;;; *** This actually belongs to RFC2046
;;;
(defun read-until-next-boundary (stream boundary &optional discard out-stream)
"Reads from STREAM up to the next boundary. Returns two values: read
data (nil if DISCARD is true), and true if the boundary is not last
(i.e., there's more data)."
;; Read until [CRLF]--boundary[--][transport-padding]CRLF
;; States: 1 2 345 67 8 9 10
;;
;; *** This will WARN like crazy on some bad input -- should only do each
;; warning once.
(let ((length (length boundary)))
(unless (<= 1 length 70)
(warn "Boundary has invalid length -- must be between 1 and 70, but is: ~S" length))
(when (lwsp-char-p (schar boundary (1- length)))
(warn "Boundary has trailing whitespace: ~S" boundary)))
(flet ((run (result)
"This one writes everything up to a boundary to RESULT stream,
and returns false if the closing delimiter has been read, and
true otherwise."
(let ((state 1)
(boundary-index 0)
(boundary-length (length boundary))
(closed nil)
(queued-chars (make-string 4))
(queue-index 0)
char
(leave-char nil))
(flet ((write-queued-chars ()
(dotimes (i queue-index)
(write-char (schar queued-chars i) result))
(setf queue-index 0))
(enqueue-char ()
(setf (schar queued-chars queue-index) char)
(incf queue-index)))
(loop
(if leave-char
(setq leave-char nil)
(setq char (read-char stream nil nil)))
(unless char
(setq closed t)
(return))
#-(and)
(format t "~&S:~D QI:~D BI:~2,'0D CH:~:[~;*~]~S~%"
state queue-index boundary-index leave-char char)
(case state
(1 ;; optional starting CR
(cond ((char= char #\return)
(enqueue-char)
(setq state 2))
((char= char #\-)
(setq leave-char t
state 3))
(t
(write-char char result))))
(2 ;; optional starting LF
(cond ((char= char #\linefeed)
(enqueue-char)
(setq state 3))
(t
(write-queued-chars)
(setq leave-char t
state 1))))
(3 ;; first dash in dash-boundary
(cond ((char= char #\-)
(enqueue-char)
(setq state 4))
(t
(write-queued-chars)
(setq leave-char t
state 1))))
(4 ;; second dash in dash-boundary
(cond ((char= char #\-)
(enqueue-char)
(setq state 5))
(t
(write-queued-chars)
(setq leave-char t
state 1))))
(5 ;; boundary
(cond ((char= char (schar boundary boundary-index))
(incf boundary-index)
(when (= boundary-index boundary-length)
(setq state 6)))
(t
(write-queued-chars)
(write-sequence boundary result :end boundary-index)
(setq boundary-index 0
leave-char t
state 1))))
(6 ;; first dash in close-delimiter
(cond ((char= char #\-)
(setq state 7))
(t
(setq leave-char t)
(setq state 8))))
(7 ;; second dash in close-delimiter
(cond ((char= char #\-)
(setq closed t
state 8))
(t
;; this is a strange situation -- only two dashes, linear
;; whitespace or CR is allowed after boundary, but there was
;; a single dash... One thing is clear -- this is not a
;; close-delimiter. Hence this is garbage what we're looking
;; at!
(warn "Garbage where expecting close-delimiter!")
(setq leave-char t)
(setq state 8))))
(8 ;; transport-padding (LWSP* == [#\space #\tab]*)
(cond ((lwsp-char-p char)
;; ignore these
)
(t
(setq leave-char t)
(setq state 9))))
(9 ;; CR
(cond ((char= char #\return)
(setq state 10))
(t
(warn "Garbage where expecting CR!"))))
(10 ;; LF
(cond ((char= char #\linefeed)
;; the end
(return))
(t
(warn "Garbage where expecting LF!")))))))
(not closed))))
(if discard
(let ((stream (make-broadcast-stream)))
(values nil (run stream)))
(let* ((stream (or out-stream (make-string-output-stream)))
(closed (run stream)))
(values (or out-stream (get-output-stream-string stream))
closed)))))
;;; Header parsing
(defstruct (header (:type list)
(:constructor make-header (name value parameters)))
name
value
parameters)
(defun skip-linear-whitespace (string &key (start 0) end)
"Returns the position of first non-linear-whitespace character in STRING
bound by START and END."
(position-if-not #'lwsp-char-p string :start start :end end))
(defgeneric parse-header (source &optional start-state)
(:documentation "Parses SOURCE and returs a single MIME header.
Header is a list of the form (NAME VALUE PARAMETERS), PARAMETERS
is a list of (NAME . VALUE)"))
(defmethod parse-header ((source string) &optional (start-state :name))
(with-input-from-string (in source)
(parse-header in start-state)))
;;; *** I don't like this parser -- it will have to be rewritten when I
;;; make my state-machine parser-generator macro!
;;;
(defmethod parse-header ((stream stream) &optional (start-state :name))
"Returns a MIME part header, or NIL, if there is no header. Header is
terminated by CRLF."
(let ((state (ecase start-state
(:name 1)
(:value 2)
(:parameters 3)))
(result (make-string-output-stream))
char
(leave-char nil)
name
value
parameter-name
parameters)
(labels ((skip-lwsp (next-state)
(loop
do (setq char (read-char stream nil nil))
while (and char (lwsp-char-p char)))
(setq leave-char t
state next-state))
(collect-parameter ()
(push (cons parameter-name
(get-output-stream-string result))
parameters)
(setq parameter-name nil)
(skip-lwsp 3))
(token-end-char-p (char)
(or (char= char #\;)
(lwsp-char-p char))))
(loop
(if leave-char
(setq leave-char nil)
(setq char (read-char stream nil nil)))
;; end of stream
(unless char
(return))
(when (char= #\return char)
(setq char (read-char stream nil nil))
(cond ((or (null char)
(char= #\linefeed char))
;; CRLF ends the input
(return))
(t
(warn "LINEFEED without RETURN in header.")
(write-char #\return result)
(setq leave-char t))))
#-(and)
(format t "~&S:~,'0D CH:~:[~;*~]~S~%"
state leave-char char)
(ecase state
(1 ;; NAME
(cond ((char= char #\:)
;; end of name
(setq name (get-output-stream-string result))
(skip-lwsp 2))
(t
(write-char char result))))
(2 ;; VALUE
(cond ((token-end-char-p char)
(setq value (get-output-stream-string result))
(skip-lwsp 3))
(t
(write-char char result))))
(3 ;; PARAMETER name
(cond ((char= #\= char)
(setq parameter-name (get-output-stream-string result)
state 4))
(t
(write-char char result))))
(4 ;; PARAMETER value start
(cond ((char= #\" char)
(setq state 5))
(t
(setq leave-char t
state 7))))
(5 ;; Quoted PARAMETER value
(cond ((char= #\" char)
(setq state 6))
(t
(write-char char result))))
(6 ;; End of quoted PARAMETER value
(cond ((token-end-char-p char)
(collect-parameter))
(t
;; no space or semicolon after quoted parameter value
(setq leave-char t
state 3))))
(7 ;; Unquoted PARAMETER value
(cond ((token-end-char-p char)
(collect-parameter))
(t
(write-char char result))))))
(case state
(1
(setq name (get-output-stream-string result)))
(2
(setq value (get-output-stream-string result)))
((3 4)
(let ((name (get-output-stream-string result)))
(unless (zerop (length name))
(warn "Parameter without value in header.")
(push (cons name nil) parameters))))
((5 6 7)
(push (cons parameter-name (get-output-stream-string result)) parameters))))
(if (and (or (null name)
(zerop (length name)))
(null value)
(null parameters))
nil
(make-header name value parameters))))
;;; _The_ MIME parsing
(defgeneric parse-mime (source boundary tmp-filename-generator)
(:documentation
"Parses MIME entities, returning them as a list. Each element in the
list is of form: (body headers), where BODY is the contents of MIME
part, and HEADERS are all headers for that part. BOUNDARY is a string
used to separate MIME entities."))
(defstruct (content-type (:type list)
(:constructor make-content-type (super sub)))
super
sub)
(defun parse-content-type (string)
"Returns content-type which is parsed from STRING."
(let ((sep-offset (position #\/ string))
(type (array-element-type string)))
(if (numberp sep-offset)
(make-content-type (make-array sep-offset
:element-type type
:displaced-to string)
(make-array (- (length string) (incf sep-offset))
:element-type type
:displaced-to string
:displaced-index-offset sep-offset))
(make-content-type string nil))))
(defun unparse-content-type (ct)
"Returns content-type CT in string representation."
(let ((super (content-type-super ct))
(sub (content-type-sub ct)))
(cond ((and super sub)
(concatenate 'string super "/" sub))
(t (or super "")))))
(defstruct (mime-part (:type list)
(:constructor make-mime-part (contents headers)))
contents
headers)
(defmethod parse-mime ((input string) separator tmp-filename-generator)
(with-input-from-string (stream input)
(parse-mime stream separator tmp-filename-generator)))
(defmethod parse-mime ((input stream) boundary tmp-filename-generator)
;; Find the first boundary. Return immediately if it is also the last
;; one.
(unless (nth-value 1 (read-until-next-boundary input boundary t))
(return-from parse-mime nil))
(let ((result ()))
(loop
(let ((headers (loop
for header = (parse-header input)
while header
when (string-equal "CONTENT-TYPE" (header-name header))
do (setf (header-value header) (parse-content-type (header-value header)))
collect header)))
(let ((file-name (get-file-name headers)))
(cond ((and tmp-filename-generator file-name)
(let ((temp-file (funcall tmp-filename-generator)))
(multiple-value-bind (text more)
(with-open-file (out-file (ensure-directories-exist temp-file)
:direction :output
;; external format for faithful I/O
;; see <http://cl-cookbook.sourceforge.net/io.html#faith>
#+(or :sbcl :lispworks :allegro :openmcl)
:external-format
#+sbcl :latin-1
#+:lispworks '(:latin-1 :eol-style :lf)
#+:allegro (excl:crlf-base-ef :latin1)
#+:openmcl '(:character-encoding :iso-8859-1
:line-termination :unix))
(read-until-next-boundary input boundary nil out-file))
(declare (ignore text))
(when (and (stringp file-name) (plusp (length file-name)))
(push (make-mime-part temp-file headers) result))
(when (not more)
(return)))))
(t
(multiple-value-bind (text more)
(read-until-next-boundary input boundary)
(push (make-mime-part text headers) result)
(when (not more)
(return))))))))
(nreverse result)))
(defun find-header (label headers)
"Find header by label from set of headers."
(find label headers :key #'header-name :test #'string-equal))
(defun find-parameter (name params)
"Find header parameter by name from set of parameters."
(assoc name params :test #'string-equal))
(defun mime-content-type (part &key as-string)
"Returns the Content-Type header of mime-part PART."
(let ((header (find-header "CONTENT-TYPE" (mime-part-headers part))))
(if header
(if as-string
(or (unparse-content-type (header-value header)) "")
(header-value header))
(when as-string ""))))
(defun find-content-disposition-header (headers)
(find-if (lambda (header)
(and (string-equal "CONTENT-DISPOSITION" (header-name header))
(string-equal "FORM-DATA" (header-value header))))
headers))
(defun get-file-name (headers)
(cdr (find-parameter "FILENAME"
(header-parameters (find-content-disposition-header headers)))))