utf7.js 9.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. "use strict";
  2. var Buffer = require("buffer").Buffer;
  3. // UTF-7 codec, according to https://tools.ietf.org/html/rfc2152
  4. // See also below a UTF-7-IMAP codec, according to http://tools.ietf.org/html/rfc3501#section-5.1.3
  5. exports.utf7 = Utf7Codec;
  6. exports.unicode11utf7 = 'utf7'; // Alias UNICODE-1-1-UTF-7
  7. function Utf7Codec(codecOptions, iconv) {
  8. this.iconv = iconv;
  9. };
  10. Utf7Codec.prototype.encoder = Utf7Encoder;
  11. Utf7Codec.prototype.decoder = Utf7Decoder;
  12. Utf7Codec.prototype.bomAware = true;
  13. // -- Encoding
  14. var nonDirectChars = /[^A-Za-z0-9'\(\),-\.\/:\? \n\r\t]+/g;
  15. function Utf7Encoder(options, codec) {
  16. this.iconv = codec.iconv;
  17. }
  18. Utf7Encoder.prototype.write = function(str) {
  19. // Naive implementation.
  20. // Non-direct chars are encoded as "+<base64>-"; single "+" char is encoded as "+-".
  21. return new Buffer(str.replace(nonDirectChars, function(chunk) {
  22. return "+" + (chunk === '+' ? '' :
  23. this.iconv.encode(chunk, 'utf16-be').toString('base64').replace(/=+$/, ''))
  24. + "-";
  25. }.bind(this)));
  26. }
  27. Utf7Encoder.prototype.end = function() {
  28. }
  29. // -- Decoding
  30. function Utf7Decoder(options, codec) {
  31. this.iconv = codec.iconv;
  32. this.inBase64 = false;
  33. this.base64Accum = '';
  34. }
  35. var base64Regex = /[A-Za-z0-9\/+]/;
  36. var base64Chars = [];
  37. for (var i = 0; i < 256; i++)
  38. base64Chars[i] = base64Regex.test(String.fromCharCode(i));
  39. var plusChar = '+'.charCodeAt(0),
  40. minusChar = '-'.charCodeAt(0),
  41. andChar = '&'.charCodeAt(0);
  42. Utf7Decoder.prototype.write = function(buf) {
  43. var res = "", lastI = 0,
  44. inBase64 = this.inBase64,
  45. base64Accum = this.base64Accum;
  46. // The decoder is more involved as we must handle chunks in stream.
  47. for (var i = 0; i < buf.length; i++) {
  48. if (!inBase64) { // We're in direct mode.
  49. // Write direct chars until '+'
  50. if (buf[i] == plusChar) {
  51. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  52. lastI = i+1;
  53. inBase64 = true;
  54. }
  55. } else { // We decode base64.
  56. if (!base64Chars[buf[i]]) { // Base64 ended.
  57. if (i == lastI && buf[i] == minusChar) {// "+-" -> "+"
  58. res += "+";
  59. } else {
  60. var b64str = base64Accum + buf.slice(lastI, i).toString();
  61. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  62. }
  63. if (buf[i] != minusChar) // Minus is absorbed after base64.
  64. i--;
  65. lastI = i+1;
  66. inBase64 = false;
  67. base64Accum = '';
  68. }
  69. }
  70. }
  71. if (!inBase64) {
  72. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  73. } else {
  74. var b64str = base64Accum + buf.slice(lastI).toString();
  75. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  76. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  77. b64str = b64str.slice(0, canBeDecoded);
  78. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  79. }
  80. this.inBase64 = inBase64;
  81. this.base64Accum = base64Accum;
  82. return res;
  83. }
  84. Utf7Decoder.prototype.end = function() {
  85. var res = "";
  86. if (this.inBase64 && this.base64Accum.length > 0)
  87. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  88. this.inBase64 = false;
  89. this.base64Accum = '';
  90. return res;
  91. }
  92. // UTF-7-IMAP codec.
  93. // RFC3501 Sec. 5.1.3 Modified UTF-7 (http://tools.ietf.org/html/rfc3501#section-5.1.3)
  94. // Differences:
  95. // * Base64 part is started by "&" instead of "+"
  96. // * Direct characters are 0x20-0x7E, except "&" (0x26)
  97. // * In Base64, "," is used instead of "/"
  98. // * Base64 must not be used to represent direct characters.
  99. // * No implicit shift back from Base64 (should always end with '-')
  100. // * String must end in non-shifted position.
  101. // * "-&" while in base64 is not allowed.
  102. exports.utf7imap = Utf7IMAPCodec;
  103. function Utf7IMAPCodec(codecOptions, iconv) {
  104. this.iconv = iconv;
  105. };
  106. Utf7IMAPCodec.prototype.encoder = Utf7IMAPEncoder;
  107. Utf7IMAPCodec.prototype.decoder = Utf7IMAPDecoder;
  108. Utf7IMAPCodec.prototype.bomAware = true;
  109. // -- Encoding
  110. function Utf7IMAPEncoder(options, codec) {
  111. this.iconv = codec.iconv;
  112. this.inBase64 = false;
  113. this.base64Accum = new Buffer(6);
  114. this.base64AccumIdx = 0;
  115. }
  116. Utf7IMAPEncoder.prototype.write = function(str) {
  117. var inBase64 = this.inBase64,
  118. base64Accum = this.base64Accum,
  119. base64AccumIdx = this.base64AccumIdx,
  120. buf = new Buffer(str.length*5 + 10), bufIdx = 0;
  121. for (var i = 0; i < str.length; i++) {
  122. var uChar = str.charCodeAt(i);
  123. if (0x20 <= uChar && uChar <= 0x7E) { // Direct character or '&'.
  124. if (inBase64) {
  125. if (base64AccumIdx > 0) {
  126. bufIdx += buf.write(base64Accum.slice(0, base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  127. base64AccumIdx = 0;
  128. }
  129. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  130. inBase64 = false;
  131. }
  132. if (!inBase64) {
  133. buf[bufIdx++] = uChar; // Write direct character
  134. if (uChar === andChar) // Ampersand -> '&-'
  135. buf[bufIdx++] = minusChar;
  136. }
  137. } else { // Non-direct character
  138. if (!inBase64) {
  139. buf[bufIdx++] = andChar; // Write '&', then go to base64 mode.
  140. inBase64 = true;
  141. }
  142. if (inBase64) {
  143. base64Accum[base64AccumIdx++] = uChar >> 8;
  144. base64Accum[base64AccumIdx++] = uChar & 0xFF;
  145. if (base64AccumIdx == base64Accum.length) {
  146. bufIdx += buf.write(base64Accum.toString('base64').replace(/\//g, ','), bufIdx);
  147. base64AccumIdx = 0;
  148. }
  149. }
  150. }
  151. }
  152. this.inBase64 = inBase64;
  153. this.base64AccumIdx = base64AccumIdx;
  154. return buf.slice(0, bufIdx);
  155. }
  156. Utf7IMAPEncoder.prototype.end = function() {
  157. var buf = new Buffer(10), bufIdx = 0;
  158. if (this.inBase64) {
  159. if (this.base64AccumIdx > 0) {
  160. bufIdx += buf.write(this.base64Accum.slice(0, this.base64AccumIdx).toString('base64').replace(/\//g, ',').replace(/=+$/, ''), bufIdx);
  161. this.base64AccumIdx = 0;
  162. }
  163. buf[bufIdx++] = minusChar; // Write '-', then go to direct mode.
  164. this.inBase64 = false;
  165. }
  166. return buf.slice(0, bufIdx);
  167. }
  168. // -- Decoding
  169. function Utf7IMAPDecoder(options, codec) {
  170. this.iconv = codec.iconv;
  171. this.inBase64 = false;
  172. this.base64Accum = '';
  173. }
  174. var base64IMAPChars = base64Chars.slice();
  175. base64IMAPChars[','.charCodeAt(0)] = true;
  176. Utf7IMAPDecoder.prototype.write = function(buf) {
  177. var res = "", lastI = 0,
  178. inBase64 = this.inBase64,
  179. base64Accum = this.base64Accum;
  180. // The decoder is more involved as we must handle chunks in stream.
  181. // It is forgiving, closer to standard UTF-7 (for example, '-' is optional at the end).
  182. for (var i = 0; i < buf.length; i++) {
  183. if (!inBase64) { // We're in direct mode.
  184. // Write direct chars until '&'
  185. if (buf[i] == andChar) {
  186. res += this.iconv.decode(buf.slice(lastI, i), "ascii"); // Write direct chars.
  187. lastI = i+1;
  188. inBase64 = true;
  189. }
  190. } else { // We decode base64.
  191. if (!base64IMAPChars[buf[i]]) { // Base64 ended.
  192. if (i == lastI && buf[i] == minusChar) { // "&-" -> "&"
  193. res += "&";
  194. } else {
  195. var b64str = base64Accum + buf.slice(lastI, i).toString().replace(/,/g, '/');
  196. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  197. }
  198. if (buf[i] != minusChar) // Minus may be absorbed after base64.
  199. i--;
  200. lastI = i+1;
  201. inBase64 = false;
  202. base64Accum = '';
  203. }
  204. }
  205. }
  206. if (!inBase64) {
  207. res += this.iconv.decode(buf.slice(lastI), "ascii"); // Write direct chars.
  208. } else {
  209. var b64str = base64Accum + buf.slice(lastI).toString().replace(/,/g, '/');
  210. var canBeDecoded = b64str.length - (b64str.length % 8); // Minimal chunk: 2 quads -> 2x3 bytes -> 3 chars.
  211. base64Accum = b64str.slice(canBeDecoded); // The rest will be decoded in future.
  212. b64str = b64str.slice(0, canBeDecoded);
  213. res += this.iconv.decode(new Buffer(b64str, 'base64'), "utf16-be");
  214. }
  215. this.inBase64 = inBase64;
  216. this.base64Accum = base64Accum;
  217. return res;
  218. }
  219. Utf7IMAPDecoder.prototype.end = function() {
  220. var res = "";
  221. if (this.inBase64 && this.base64Accum.length > 0)
  222. res = this.iconv.decode(new Buffer(this.base64Accum, 'base64'), "utf16-be");
  223. this.inBase64 = false;
  224. this.base64Accum = '';
  225. return res;
  226. }