Main Page   Class Hierarchy   Alphabetical List   Compound List   Examples  
qp.h
1/***************************************************************************
2 copyright : (C) 2002-2008 by Stefano Barbato
3 email : stefano@codesink.org
4
5 $Id: qp.h,v 1.20 2008-10-07 11:06:26 tat Exp $
6 ***************************************************************************/
7#ifndef _MIMETIC_CODEC_QP_H_
8#define _MIMETIC_CODEC_QP_H_
9#include <iostream>
10#include <string>
11#include <sstream>
12#include <cassert>
13#include <mimetic/libconfig.h>
14#include <mimetic/utils.h>
15#include <mimetic/circular_buffer.h>
16#include <mimetic/codec/codec_base.h>
17#include <mimetic/codec/codec_chain.h>
18
19namespace mimetic
20{
21
22class QP
23{
24 friend class test_qp;
25 enum { LF = 0xA, CR = 0xD, NL = LF, TAB = 9, SP = 32 };
26 enum { default_maxlen = 76 };
27 enum {
28 printable, /* print as-is */
29 tab, /* print if !isBinary */
30 sp, /* ' ' */
31 newline, /* cr or lf; encode if isBinary*/
32 binary, /* rest of the ascii map */
33 unsafe /* "!\"#$@[]\\^`{}|~" */
34 };
35 static char sTb[256];
36
37public:
38
39/// quoted-printable encoder
40/*!
41
42 \sa encode decode
43 */
44class Encoder: public buffered_codec, public chainable_codec<Encoder>
45{
46 enum { laBufSz = 5 }; // look-ahead buffer
47 size_t m_pos, m_maxlen;
48 bool m_binary;
49 circular_buffer<char_type> m_cbuf;
50
51 template<typename OutIt>
52 void hardLineBrk(OutIt& out)
53 {
54 *out = NL; ++out;
55 m_pos = 1;
56 }
57 template<typename OutIt>
58 void softLineBrk(OutIt& out)
59 {
60 *out = '='; ++out;
61 hardLineBrk(out);
62 }
63 template<typename OutIt>
64 void write(char_type ch, OutIt& out)
65 {
66 bool is_last_ch = m_cbuf.empty();
67 if(!is_last_ch && m_pos == m_maxlen)
68 softLineBrk(out);
69 *out = ch; ++out;
70 m_pos++;
71 }
72 template<typename OutIt>
73 void writeHex(char_type ch, OutIt& out)
74 {
75 static char_type hexc[] =
76 {
77 '0', '1', '2', '3', '4', '5' ,'6', '7', '8', '9',
78 'A', 'B', 'C', 'D', 'E', 'F'
79 };
80 bool is_last_ch = m_cbuf.empty();
81 if(m_pos + (is_last_ch ? 1 : 2) >= m_maxlen)
82 softLineBrk(out);
83 // write out =HH
84 *out = '='; ++out;
85 *out = hexc[ch >> 4]; ++out;
86 *out = hexc[ch & 0xf]; ++out;
87 m_pos += 3;
88 }
89 template<typename OutIt>
90 void encodeChar(char_type c, OutIt& out)
91 {
92 int cnt = m_cbuf.count();
93 switch(sTb[c])
94 {
95 case printable:
96 if(m_pos == 1)
97 {
98 switch(c)
99 {
100 case 'F': // hex enc on "^From .*"
101 if(cnt>=4 && m_cbuf.compare(0,4,"rom "))
102 {
103 writeHex(c,out);
104 return;
105 }
106 break;
107 case '.': // hex encode if "^.[\r\n]" or on eof
108 if(!cnt || sTb[ m_cbuf[0] ] == newline)
109 {
110 writeHex(c,out);
111 return;
112 }
113 break;
114 }
115 }
116 write(c,out);
117 break;
118 case tab:
119 case sp:
120 // on binary encoding, or last input ch or newline
121 if(m_binary || !cnt || sTb[ m_cbuf[0] ] == newline)
122 writeHex(c,out);
123 else
124 write(c,out);
125 break;
126 case newline:
127 if(m_binary)
128 writeHex(c, out);
129 else {
130 if(cnt && m_cbuf[0] == (c == CR ? LF : CR))
131 m_cbuf.pop_front(); // eat it
132 hardLineBrk(out);
133 }
134 break;
135 case binary:
136 if(!m_binary) m_binary = 1; // switch to binary mode
137 writeHex(c, out);
138 break;
139 case unsafe:
140 writeHex(c, out);
141 break;
142 }
143 }
144public:
145 /*! return the multiplier of the required (max) size of the output buffer
146 * when encoding */
147 double codeSizeMultiplier() const
148 {
149 // worse case is *3 but we'll use the (euristic) average value of 1.5.
150 // this may decrease performance when encoding messages with many
151 // non-ASCII (> 127) characters
152 return 1.5;
153 }
154 /*!
155 Constructor
156 \param isBinary if true all space and newline characters will be
157 treated like binary chars and will be hex encoded (useful if you
158 want to encode a binary file).
159 */
160 Encoder(bool isBinary = false)
161 : m_pos(1), m_maxlen(default_maxlen),
162 m_binary(isBinary), m_cbuf(laBufSz)
163 {
164 }
165 /*! Returns the name of the codec ("Quoted-Printable") */
166 const char* name() const { return "Quoted-Printable"; }
167 /*! Returns the max line length */
168 size_t maxlen()
169 {
170 return m_maxlen;
171 }
172 /*!
173 Set the max line length. No more then \p i chars will be
174 printed on one line.
175 */
176 void maxlen(size_t i)
177 {
178 m_maxlen = i;
179 }
180 /*!
181 Encodes [\p bit,\p eit) and write any encoded char to \p out.
182 */
183 template<typename InIt, typename OutIt>
184 void process(InIt bit, InIt eit, OutIt out)
185 {
186 for(; bit != eit; ++bit)
187 process(*bit, out);
188 flush(out);
189 }
190 /*!
191 Encodes \p ic and write any encoded output char to \p out.
192 \warning You must call flush() when all chars have been
193 processed by the encode funcion.
194 \n
195 \code
196 while( (c = getchar()) != EOF )
197 qp.process(c, out);
198 qp.flush();
199 \endcode
200 \n
201 \sa flush()
202 */
203 template<typename OutIt>
204 void process(char_type ic, OutIt& out)
205 {
206 m_cbuf.push_back(ic);
207 if(m_cbuf.count() < laBufSz)
208 return;
209 char_type c = m_cbuf.front();
210 m_cbuf.pop_front();
211 encodeChar(c, out);
212 }
213 /*!
214 Write to \p out any buffered encoded char.
215 */
216 template<typename OutIt>
217 void flush(OutIt& out)
218 {
219 char_type c;
220 while(!m_cbuf.empty())
221 {
222 c = m_cbuf.front();
223 m_cbuf.pop_front();
224 encodeChar(c, out);
225 }
226 }
227};
228
229/// quoted-printable decoder
230/*!
231
232 \sa encode decode
233 */
234class Decoder: public buffered_codec, public chainable_codec<Encoder>
235{
236 enum { laBufSz = 80 }; // look-ahead buffer
237 enum {
238 sWaitingChar,
239 sAfterEq,
240 sWaitingFirstHex,
241 sWaitingSecondHex,
242 sBlank,
243 sNewline,
244 sOtherChar
245 };
246 size_t m_pos, m_maxlen;
247
248
249 int m_state, m_nl;
250 std::string m_prev;
251
252 template<typename OutIt>
253 void hardLineBrk(OutIt& out) const
254 {
255 *out = NL; ++out;
256 }
257 template<typename OutIt>
258 void write(char_type ch, OutIt& out) const
259 {
260 *out = ch; ++out;
261 }
262 bool isnl(char_type c) const
263 {
264 return (c == CR || c == LF);
265 }
266 template<typename OutIt>
267 void flushPrev(OutIt& out)
268 {
269 copy(m_prev.begin(), m_prev.end(), out);
270 m_prev.clear();
271 }
272 int hex_to_int(char_type c) const
273 {
274 if( c >= '0' && c <='9') return c - '0';
275 else if( c >= 'A' && c <='F') return c - 'A' + 10;
276 else if( c >= 'a' && c <='f') return c - 'a' + 10;
277 else return 0;
278 }
279 bool ishex(char_type c) const
280 {
281 return (c >= '0' && c <= '9') ||
282 (c >= 'A' && c <= 'F') ||
283 (c >= 'a' && c <= 'f');
284 }
285 template<typename OutIt>
286 void decodeChar(char_type c, OutIt& out)
287 {
288 for(;;)
289 {
290 switch(m_state)
291 {
292 case sBlank:
293 if(isblank(c))
294 m_prev.append(1,c);
295 else if(isnl(c)) {
296 // soft linebrk & ignore trailing blanks
297 m_prev.clear();
298 m_state = sWaitingChar;
299 } else {
300 flushPrev(out);
301 m_state = sWaitingChar;
302 continue;
303 }
304 return;
305 case sAfterEq:
306 if(isblank(c))
307 m_prev.append(1,c);
308 else if(isnl(c)) {
309 // soft linebrk
310 m_state = sNewline;
311 continue;
312 } else {
313 if(m_prev.length() > 1)
314 {
315 // there're blanks after =
316 flushPrev(out);
317 m_state = sWaitingChar;
318 } else
319 m_state = sWaitingFirstHex;
320 continue;
321 }
322 return;
323 case sWaitingFirstHex:
324 if(!ishex(c))
325 {
326 // malformed: =[not-hexch]
327 flushPrev(out);
328 write(c, out);
329 m_state = sWaitingChar;
330 return;
331 } else {
332 m_prev.append(1,c);
333 m_state = sWaitingSecondHex;
334 }
335 return;
336 case sWaitingSecondHex:
337 if(!ishex(c))
338 { // malformed (=[hexch][not-hexch])
339 flushPrev(out);
340 write(c, out);
341 } else {
342 char_type oc, last;
343 assert(m_prev.length());
344 last = m_prev[m_prev.length()-1];
345 oc = hex_to_int(last) << 4 |
346 hex_to_int(c) ;
347 write(oc,out);
348 m_prev.clear();
349 }
350 m_state = sWaitingChar;
351 return;
352 case sNewline:
353 if(m_nl == 0)
354 {
355 m_nl = c;
356 return;
357 } else {
358 int len = m_prev.length();
359 if(!len || m_prev[0] != '=')
360 hardLineBrk(out);
361 m_prev.clear();
362 m_state = sWaitingChar;
363 bool is2Ch;
364 is2Ch = (c == (m_nl == CR ? LF : CR));
365 m_nl = 0;
366 if(is2Ch)
367 return;
368 continue;
369 }
370 case sWaitingChar:
371 if(isblank(c))
372 {
373 m_state = sBlank;
374 continue;
375 } else if(isnl(c)) {
376 m_state = sNewline;
377 continue;
378 } else if(c == '=') {
379 m_state = sAfterEq;
380 m_prev.append(1, c);
381 return;
382 } else {
383 // WARNING: NOT ignoring chars > 126
384 // as suggested in rfc2045 6.7 note 4
385 if(c < 32 && c != TAB)
386 {
387 // malformed, CTRL ch found
388 // ignore (rfc2045 6.7 note 4)
389 return;
390 }
391 write(c,out);
392 }
393 return;
394 }
395 }
396 }
397public:
398 /*! Constructor */
400 : m_state(sWaitingChar), m_nl(0)
401 {
402 }
403 /*! Returns the name of the codec ("Quoted-Printable") */
404 const char* name() const { return "Quoted-Printable"; }
405 /*! Returns the max line length */
406 size_t maxlen()
407 {
408 return m_maxlen;
409 }
410 /*!
411 Set the max line length. No more then \p i chars will be
412 printed on one line.
413 */
414 void maxlen(size_t i)
415 {
416 m_maxlen = i;
417 }
418 /*!
419 Decodes [\p bit,\p eit) and write any decoded char to \p out.
420 */
421 template<typename InIt, typename OutIt>
422 void process(InIt bit, InIt eit, OutIt out)
423 {
424 for(;bit != eit; ++bit)
425 decodeChar(*bit, out);
426 flush(out);
427 }
428 /*!
429 Decodes \p ic and write any decoded output char to \p out.
430
431 \warning You must call flush() when all chars have been
432 processed by the code(...) funcion.
433 \n
434 \code
435 while( (c = getchar()) != EOF )
436 qp.process(c, out);
437 qp.flush();
438 \endcode
439 \n
440 \sa flush()
441 */
442 template<typename OutIt>
443 void process(char_type ic, OutIt& out)
444 {
445 decodeChar(ic, out);
446 }
447 /*!
448 Write to \p out any buffered decoded char.
449 */
450 template<typename OutIt>
451 void flush(OutIt& out)
452 {
453 /* m_prev can be (regex):
454 empty:
455 ok
456 '=' :
457 malformed, '=' is last stream char, print as is
458 (rfc2045 6.7 note 3)
459 '=[a-zA-Z]'
460 malformed, print as is
461 (rfc2045 6.7 note 2)
462 '= +'
463 malformed, just print '=' and ignore trailing
464 blanks (rfc2045 6.7 (3) )
465 */
466 int len = m_prev.length();
467 if(len)
468 {
469 if(len == 1)
470 {
471 /* malformed if m_prev[0] == '=' */
472 write('=', out);
473 } else {
474 write('=', out);
475 if(m_prev[1] != ' ')
476 write(m_prev[1], out);
477 }
478 } else if(m_nl != 0) // stream ends with newline
479 hardLineBrk(out);
480
481 }
482};
483
484};
485
486
487} // namespace
488
489#endif
490
quoted-printable decoder
Definition qp.h:235
size_t maxlen()
Definition qp.h:406
const char * name() const
Definition qp.h:404
void process(InIt bit, InIt eit, OutIt out)
Definition qp.h:422
void process(char_type ic, OutIt &out)
Definition qp.h:443
void maxlen(size_t i)
Definition qp.h:414
Decoder()
Definition qp.h:399
void flush(OutIt &out)
Definition qp.h:451
quoted-printable encoder
Definition qp.h:45
size_t maxlen()
Definition qp.h:168
double codeSizeMultiplier() const
Definition qp.h:147
const char * name() const
Definition qp.h:166
Encoder(bool isBinary=false)
Definition qp.h:160
void process(InIt bit, InIt eit, OutIt out)
Definition qp.h:184
void process(char_type ic, OutIt &out)
Definition qp.h:204
void maxlen(size_t i)
Definition qp.h:176
void flush(OutIt &out)
Definition qp.h:217
Definition body.h:18
Base class for buffered codecs.
Definition codec_base.h:48