626 lines
23 KiB
JavaScript
626 lines
23 KiB
JavaScript
/* eslint no-control-regex:0 */
|
||
|
||
'use strict';
|
||
|
||
const base64 = require('../base64');
|
||
const qp = require('../qp');
|
||
const mimeTypes = require('./mime-types');
|
||
|
||
module.exports = {
|
||
/**
|
||
* Checks if a value is plaintext string (uses only printable 7bit chars)
|
||
*
|
||
* @param {String} value String to be tested
|
||
* @returns {Boolean} true if it is a plaintext string
|
||
*/
|
||
isPlainText(value, isParam) {
|
||
const re = isParam ? /[\x00-\x08\x0b\x0c\x0e-\x1f"\u0080-\uFFFF]/ : /[\x00-\x08\x0b\x0c\x0e-\x1f\u0080-\uFFFF]/;
|
||
if (typeof value !== 'string' || re.test(value)) {
|
||
return false;
|
||
} else {
|
||
return true;
|
||
}
|
||
},
|
||
|
||
/**
|
||
* Checks if a multi line string containes lines longer than the selected value.
|
||
*
|
||
* Useful when detecting if a mail message needs any processing at all –
|
||
* if only plaintext characters are used and lines are short, then there is
|
||
* no need to encode the values in any way. If the value is plaintext but has
|
||
* longer lines then allowed, then use format=flowed
|
||
*
|
||
* @param {Number} lineLength Max line length to check for
|
||
* @returns {Boolean} Returns true if there is at least one line longer than lineLength chars
|
||
*/
|
||
hasLongerLines(str, lineLength) {
|
||
if (str.length > 128 * 1024) {
|
||
// do not test strings longer than 128kB
|
||
return true;
|
||
}
|
||
return new RegExp('^.{' + (lineLength + 1) + ',}', 'm').test(str);
|
||
},
|
||
|
||
/**
|
||
* Encodes a string or an Buffer to an UTF-8 MIME Word (rfc2047)
|
||
*
|
||
* @param {String|Buffer} data String to be encoded
|
||
* @param {String} mimeWordEncoding='Q' Encoding for the mime word, either Q or B
|
||
* @param {Number} [maxLength=0] If set, split mime words into several chunks if needed
|
||
* @return {String} Single or several mime words joined together
|
||
*/
|
||
encodeWord(data, mimeWordEncoding, maxLength) {
|
||
mimeWordEncoding = (mimeWordEncoding || 'Q').toString().toUpperCase().trim().charAt(0);
|
||
maxLength = maxLength || 0;
|
||
|
||
let encodedStr;
|
||
let toCharset = 'UTF-8';
|
||
|
||
if (maxLength && maxLength > 7 + toCharset.length) {
|
||
maxLength -= 7 + toCharset.length;
|
||
}
|
||
|
||
if (mimeWordEncoding === 'Q') {
|
||
// https://tools.ietf.org/html/rfc2047#section-5 rule (3)
|
||
encodedStr = qp.encode(data).replace(/[^a-z0-9!*+\-/=]/gi, chr => {
|
||
let ord = chr.charCodeAt(0).toString(16).toUpperCase();
|
||
if (chr === ' ') {
|
||
return '_';
|
||
} else {
|
||
return '=' + (ord.length === 1 ? '0' + ord : ord);
|
||
}
|
||
});
|
||
} else if (mimeWordEncoding === 'B') {
|
||
encodedStr = typeof data === 'string' ? data : base64.encode(data);
|
||
maxLength = maxLength ? Math.max(3, ((maxLength - (maxLength % 4)) / 4) * 3) : 0;
|
||
}
|
||
|
||
if (maxLength && (mimeWordEncoding !== 'B' ? encodedStr : base64.encode(data)).length > maxLength) {
|
||
if (mimeWordEncoding === 'Q') {
|
||
encodedStr = this.splitMimeEncodedString(encodedStr, maxLength).join('?= =?' + toCharset + '?' + mimeWordEncoding + '?');
|
||
} else {
|
||
// RFC2047 6.3 (2) states that encoded-word must include an integral number of characters, so no chopping unicode sequences
|
||
let parts = [];
|
||
let lpart = '';
|
||
for (let i = 0, len = encodedStr.length; i < len; i++) {
|
||
let chr = encodedStr.charAt(i);
|
||
|
||
if (/[\ud83c\ud83d\ud83e]/.test(chr) && i < len - 1) {
|
||
// composite emoji byte, so add the next byte as well
|
||
chr += encodedStr.charAt(++i);
|
||
}
|
||
|
||
// check if we can add this character to the existing string
|
||
// without breaking byte length limit
|
||
if (Buffer.byteLength(lpart + chr) <= maxLength || i === 0) {
|
||
lpart += chr;
|
||
} else {
|
||
// we hit the length limit, so push the existing string and start over
|
||
parts.push(base64.encode(lpart));
|
||
lpart = chr;
|
||
}
|
||
}
|
||
if (lpart) {
|
||
parts.push(base64.encode(lpart));
|
||
}
|
||
|
||
if (parts.length > 1) {
|
||
encodedStr = parts.join('?= =?' + toCharset + '?' + mimeWordEncoding + '?');
|
||
} else {
|
||
encodedStr = parts.join('');
|
||
}
|
||
}
|
||
} else if (mimeWordEncoding === 'B') {
|
||
encodedStr = base64.encode(data);
|
||
}
|
||
|
||
return '=?' + toCharset + '?' + mimeWordEncoding + '?' + encodedStr + (encodedStr.substr(-2) === '?=' ? '' : '?=');
|
||
},
|
||
|
||
/**
|
||
* Finds word sequences with non ascii text and converts these to mime words
|
||
*
|
||
* @param {String} value String to be encoded
|
||
* @param {String} mimeWordEncoding='Q' Encoding for the mime word, either Q or B
|
||
* @param {Number} [maxLength=0] If set, split mime words into several chunks if needed
|
||
* @param {Boolean} [encodeAll=false] If true and the value needs encoding then encodes entire string, not just the smallest match
|
||
* @return {String} String with possible mime words
|
||
*/
|
||
encodeWords(value, mimeWordEncoding, maxLength, encodeAll) {
|
||
maxLength = maxLength || 0;
|
||
|
||
let encodedValue;
|
||
|
||
// find first word with a non-printable ascii or special symbol in it
|
||
let firstMatch = value.match(/(?:^|\s)([^\s]*["\u0080-\uFFFF])/);
|
||
if (!firstMatch) {
|
||
return value;
|
||
}
|
||
|
||
if (encodeAll) {
|
||
// if it is requested to encode everything or the string contains something that resebles encoded word, then encode everything
|
||
|
||
return this.encodeWord(value, mimeWordEncoding, maxLength);
|
||
}
|
||
|
||
// find the last word with a non-printable ascii in it
|
||
let lastMatch = value.match(/(["\u0080-\uFFFF][^\s]*)[^"\u0080-\uFFFF]*$/);
|
||
if (!lastMatch) {
|
||
// should not happen
|
||
return value;
|
||
}
|
||
|
||
let startIndex =
|
||
firstMatch.index +
|
||
(
|
||
firstMatch[0].match(/[^\s]/) || {
|
||
index: 0
|
||
}
|
||
).index;
|
||
let endIndex = lastMatch.index + (lastMatch[1] || '').length;
|
||
|
||
encodedValue =
|
||
(startIndex ? value.substr(0, startIndex) : '') +
|
||
this.encodeWord(value.substring(startIndex, endIndex), mimeWordEncoding || 'Q', maxLength) +
|
||
(endIndex < value.length ? value.substr(endIndex) : '');
|
||
|
||
return encodedValue;
|
||
},
|
||
|
||
/**
|
||
* Joins parsed header value together as 'value; param1=value1; param2=value2'
|
||
* PS: We are following RFC 822 for the list of special characters that we need to keep in quotes.
|
||
* Refer: https://www.w3.org/Protocols/rfc1341/4_Content-Type.html
|
||
* @param {Object} structured Parsed header value
|
||
* @return {String} joined header value
|
||
*/
|
||
buildHeaderValue(structured) {
|
||
let paramsArray = [];
|
||
|
||
Object.keys(structured.params || {}).forEach(param => {
|
||
// filename might include unicode characters so it is a special case
|
||
// other values probably do not
|
||
let value = structured.params[param];
|
||
if (!this.isPlainText(value, true) || value.length >= 75) {
|
||
this.buildHeaderParam(param, value, 50).forEach(encodedParam => {
|
||
if (!/[\s"\\;:/=(),<>@[\]?]|^[-']|'$/.test(encodedParam.value) || encodedParam.key.substr(-1) === '*') {
|
||
paramsArray.push(encodedParam.key + '=' + encodedParam.value);
|
||
} else {
|
||
paramsArray.push(encodedParam.key + '=' + JSON.stringify(encodedParam.value));
|
||
}
|
||
});
|
||
} else if (/[\s'"\\;:/=(),<>@[\]?]|^-/.test(value)) {
|
||
paramsArray.push(param + '=' + JSON.stringify(value));
|
||
} else {
|
||
paramsArray.push(param + '=' + value);
|
||
}
|
||
});
|
||
|
||
return structured.value + (paramsArray.length ? '; ' + paramsArray.join('; ') : '');
|
||
},
|
||
|
||
/**
|
||
* Encodes a string or an Buffer to an UTF-8 Parameter Value Continuation encoding (rfc2231)
|
||
* Useful for splitting long parameter values.
|
||
*
|
||
* For example
|
||
* title="unicode string"
|
||
* becomes
|
||
* title*0*=utf-8''unicode
|
||
* title*1*=%20string
|
||
*
|
||
* @param {String|Buffer} data String to be encoded
|
||
* @param {Number} [maxLength=50] Max length for generated chunks
|
||
* @param {String} [fromCharset='UTF-8'] Source sharacter set
|
||
* @return {Array} A list of encoded keys and headers
|
||
*/
|
||
buildHeaderParam(key, data, maxLength) {
|
||
let list = [];
|
||
let encodedStr = typeof data === 'string' ? data : (data || '').toString();
|
||
let encodedStrArr;
|
||
let chr, ord;
|
||
let line;
|
||
let startPos = 0;
|
||
let i, len;
|
||
|
||
maxLength = maxLength || 50;
|
||
|
||
// process ascii only text
|
||
if (this.isPlainText(data, true)) {
|
||
// check if conversion is even needed
|
||
if (encodedStr.length <= maxLength) {
|
||
return [
|
||
{
|
||
key,
|
||
value: encodedStr
|
||
}
|
||
];
|
||
}
|
||
|
||
encodedStr = encodedStr.replace(new RegExp('.{' + maxLength + '}', 'g'), str => {
|
||
list.push({
|
||
line: str
|
||
});
|
||
return '';
|
||
});
|
||
|
||
if (encodedStr) {
|
||
list.push({
|
||
line: encodedStr
|
||
});
|
||
}
|
||
} else {
|
||
if (/[\uD800-\uDBFF]/.test(encodedStr)) {
|
||
// string containts surrogate pairs, so normalize it to an array of bytes
|
||
encodedStrArr = [];
|
||
for (i = 0, len = encodedStr.length; i < len; i++) {
|
||
chr = encodedStr.charAt(i);
|
||
ord = chr.charCodeAt(0);
|
||
if (ord >= 0xd800 && ord <= 0xdbff && i < len - 1) {
|
||
chr += encodedStr.charAt(i + 1);
|
||
encodedStrArr.push(chr);
|
||
i++;
|
||
} else {
|
||
encodedStrArr.push(chr);
|
||
}
|
||
}
|
||
encodedStr = encodedStrArr;
|
||
}
|
||
|
||
// first line includes the charset and language info and needs to be encoded
|
||
// even if it does not contain any unicode characters
|
||
line = 'utf-8\x27\x27';
|
||
let encoded = true;
|
||
startPos = 0;
|
||
|
||
// process text with unicode or special chars
|
||
for (i = 0, len = encodedStr.length; i < len; i++) {
|
||
chr = encodedStr[i];
|
||
|
||
if (encoded) {
|
||
chr = this.safeEncodeURIComponent(chr);
|
||
} else {
|
||
// try to urlencode current char
|
||
chr = chr === ' ' ? chr : this.safeEncodeURIComponent(chr);
|
||
// By default it is not required to encode a line, the need
|
||
// only appears when the string contains unicode or special chars
|
||
// in this case we start processing the line over and encode all chars
|
||
if (chr !== encodedStr[i]) {
|
||
// Check if it is even possible to add the encoded char to the line
|
||
// If not, there is no reason to use this line, just push it to the list
|
||
// and start a new line with the char that needs encoding
|
||
if ((this.safeEncodeURIComponent(line) + chr).length >= maxLength) {
|
||
list.push({
|
||
line,
|
||
encoded
|
||
});
|
||
line = '';
|
||
startPos = i - 1;
|
||
} else {
|
||
encoded = true;
|
||
i = startPos;
|
||
line = '';
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
|
||
// if the line is already too long, push it to the list and start a new one
|
||
if ((line + chr).length >= maxLength) {
|
||
list.push({
|
||
line,
|
||
encoded
|
||
});
|
||
line = chr = encodedStr[i] === ' ' ? ' ' : this.safeEncodeURIComponent(encodedStr[i]);
|
||
if (chr === encodedStr[i]) {
|
||
encoded = false;
|
||
startPos = i - 1;
|
||
} else {
|
||
encoded = true;
|
||
}
|
||
} else {
|
||
line += chr;
|
||
}
|
||
}
|
||
|
||
if (line) {
|
||
list.push({
|
||
line,
|
||
encoded
|
||
});
|
||
}
|
||
}
|
||
|
||
return list.map((item, i) => ({
|
||
// encoded lines: {name}*{part}*
|
||
// unencoded lines: {name}*{part}
|
||
// if any line needs to be encoded then the first line (part==0) is always encoded
|
||
key: key + '*' + i + (item.encoded ? '*' : ''),
|
||
value: item.line
|
||
}));
|
||
},
|
||
|
||
/**
|
||
* Parses a header value with key=value arguments into a structured
|
||
* object.
|
||
*
|
||
* parseHeaderValue('content-type: text/plain; CHARSET='UTF-8'') ->
|
||
* {
|
||
* 'value': 'text/plain',
|
||
* 'params': {
|
||
* 'charset': 'UTF-8'
|
||
* }
|
||
* }
|
||
*
|
||
* @param {String} str Header value
|
||
* @return {Object} Header value as a parsed structure
|
||
*/
|
||
parseHeaderValue(str) {
|
||
let response = {
|
||
value: false,
|
||
params: {}
|
||
};
|
||
let key = false;
|
||
let value = '';
|
||
let type = 'value';
|
||
let quote = false;
|
||
let escaped = false;
|
||
let chr;
|
||
|
||
for (let i = 0, len = str.length; i < len; i++) {
|
||
chr = str.charAt(i);
|
||
if (type === 'key') {
|
||
if (chr === '=') {
|
||
key = value.trim().toLowerCase();
|
||
type = 'value';
|
||
value = '';
|
||
continue;
|
||
}
|
||
value += chr;
|
||
} else {
|
||
if (escaped) {
|
||
value += chr;
|
||
} else if (chr === '\\') {
|
||
escaped = true;
|
||
continue;
|
||
} else if (quote && chr === quote) {
|
||
quote = false;
|
||
} else if (!quote && chr === '"') {
|
||
quote = chr;
|
||
} else if (!quote && chr === ';') {
|
||
if (key === false) {
|
||
response.value = value.trim();
|
||
} else {
|
||
response.params[key] = value.trim();
|
||
}
|
||
type = 'key';
|
||
value = '';
|
||
} else {
|
||
value += chr;
|
||
}
|
||
escaped = false;
|
||
}
|
||
}
|
||
|
||
if (type === 'value') {
|
||
if (key === false) {
|
||
response.value = value.trim();
|
||
} else {
|
||
response.params[key] = value.trim();
|
||
}
|
||
} else if (value.trim()) {
|
||
response.params[value.trim().toLowerCase()] = '';
|
||
}
|
||
|
||
// handle parameter value continuations
|
||
// https://tools.ietf.org/html/rfc2231#section-3
|
||
|
||
// preprocess values
|
||
Object.keys(response.params).forEach(key => {
|
||
let actualKey, nr, match, value;
|
||
if ((match = key.match(/(\*(\d+)|\*(\d+)\*|\*)$/))) {
|
||
actualKey = key.substr(0, match.index);
|
||
nr = Number(match[2] || match[3]) || 0;
|
||
|
||
if (!response.params[actualKey] || typeof response.params[actualKey] !== 'object') {
|
||
response.params[actualKey] = {
|
||
charset: false,
|
||
values: []
|
||
};
|
||
}
|
||
|
||
value = response.params[key];
|
||
|
||
if (nr === 0 && match[0].substr(-1) === '*' && (match = value.match(/^([^']*)'[^']*'(.*)$/))) {
|
||
response.params[actualKey].charset = match[1] || 'iso-8859-1';
|
||
value = match[2];
|
||
}
|
||
|
||
response.params[actualKey].values[nr] = value;
|
||
|
||
// remove the old reference
|
||
delete response.params[key];
|
||
}
|
||
});
|
||
|
||
// concatenate split rfc2231 strings and convert encoded strings to mime encoded words
|
||
Object.keys(response.params).forEach(key => {
|
||
let value;
|
||
if (response.params[key] && Array.isArray(response.params[key].values)) {
|
||
value = response.params[key].values.map(val => val || '').join('');
|
||
|
||
if (response.params[key].charset) {
|
||
// convert "%AB" to "=?charset?Q?=AB?="
|
||
response.params[key] =
|
||
'=?' +
|
||
response.params[key].charset +
|
||
'?Q?' +
|
||
value
|
||
// fix invalidly encoded chars
|
||
.replace(/[=?_\s]/g, s => {
|
||
let c = s.charCodeAt(0).toString(16);
|
||
if (s === ' ') {
|
||
return '_';
|
||
} else {
|
||
return '%' + (c.length < 2 ? '0' : '') + c;
|
||
}
|
||
})
|
||
// change from urlencoding to percent encoding
|
||
.replace(/%/g, '=') +
|
||
'?=';
|
||
} else {
|
||
response.params[key] = value;
|
||
}
|
||
}
|
||
});
|
||
|
||
return response;
|
||
},
|
||
|
||
/**
|
||
* Returns file extension for a content type string. If no suitable extensions
|
||
* are found, 'bin' is used as the default extension
|
||
*
|
||
* @param {String} mimeType Content type to be checked for
|
||
* @return {String} File extension
|
||
*/
|
||
detectExtension: mimeType => mimeTypes.detectExtension(mimeType),
|
||
|
||
/**
|
||
* Returns content type for a file extension. If no suitable content types
|
||
* are found, 'application/octet-stream' is used as the default content type
|
||
*
|
||
* @param {String} extension Extension to be checked for
|
||
* @return {String} File extension
|
||
*/
|
||
detectMimeType: extension => mimeTypes.detectMimeType(extension),
|
||
|
||
/**
|
||
* Folds long lines, useful for folding header lines (afterSpace=false) and
|
||
* flowed text (afterSpace=true)
|
||
*
|
||
* @param {String} str String to be folded
|
||
* @param {Number} [lineLength=76] Maximum length of a line
|
||
* @param {Boolean} afterSpace If true, leave a space in th end of a line
|
||
* @return {String} String with folded lines
|
||
*/
|
||
foldLines(str, lineLength, afterSpace) {
|
||
str = (str || '').toString();
|
||
lineLength = lineLength || 76;
|
||
|
||
let pos = 0,
|
||
len = str.length,
|
||
result = '',
|
||
line,
|
||
match;
|
||
|
||
while (pos < len) {
|
||
line = str.substr(pos, lineLength);
|
||
if (line.length < lineLength) {
|
||
result += line;
|
||
break;
|
||
}
|
||
if ((match = line.match(/^[^\n\r]*(\r?\n|\r)/))) {
|
||
line = match[0];
|
||
result += line;
|
||
pos += line.length;
|
||
continue;
|
||
} else if ((match = line.match(/(\s+)[^\s]*$/)) && match[0].length - (afterSpace ? (match[1] || '').length : 0) < line.length) {
|
||
line = line.substr(0, line.length - (match[0].length - (afterSpace ? (match[1] || '').length : 0)));
|
||
} else if ((match = str.substr(pos + line.length).match(/^[^\s]+(\s*)/))) {
|
||
line = line + match[0].substr(0, match[0].length - (!afterSpace ? (match[1] || '').length : 0));
|
||
}
|
||
|
||
result += line;
|
||
pos += line.length;
|
||
if (pos < len) {
|
||
result += '\r\n';
|
||
}
|
||
}
|
||
|
||
return result;
|
||
},
|
||
|
||
/**
|
||
* Splits a mime encoded string. Needed for dividing mime words into smaller chunks
|
||
*
|
||
* @param {String} str Mime encoded string to be split up
|
||
* @param {Number} maxlen Maximum length of characters for one part (minimum 12)
|
||
* @return {Array} Split string
|
||
*/
|
||
splitMimeEncodedString: (str, maxlen) => {
|
||
let curLine,
|
||
match,
|
||
chr,
|
||
done,
|
||
lines = [];
|
||
|
||
// require at least 12 symbols to fit possible 4 octet UTF-8 sequences
|
||
maxlen = Math.max(maxlen || 0, 12);
|
||
|
||
while (str.length) {
|
||
curLine = str.substr(0, maxlen);
|
||
|
||
// move incomplete escaped char back to main
|
||
if ((match = curLine.match(/[=][0-9A-F]?$/i))) {
|
||
curLine = curLine.substr(0, match.index);
|
||
}
|
||
|
||
done = false;
|
||
while (!done) {
|
||
done = true;
|
||
// check if not middle of a unicode char sequence
|
||
if ((match = str.substr(curLine.length).match(/^[=]([0-9A-F]{2})/i))) {
|
||
chr = parseInt(match[1], 16);
|
||
// invalid sequence, move one char back anc recheck
|
||
if (chr < 0xc2 && chr > 0x7f) {
|
||
curLine = curLine.substr(0, curLine.length - 3);
|
||
done = false;
|
||
}
|
||
}
|
||
}
|
||
|
||
if (curLine.length) {
|
||
lines.push(curLine);
|
||
}
|
||
str = str.substr(curLine.length);
|
||
}
|
||
|
||
return lines;
|
||
},
|
||
|
||
encodeURICharComponent: chr => {
|
||
let res = '';
|
||
let ord = chr.charCodeAt(0).toString(16).toUpperCase();
|
||
|
||
if (ord.length % 2) {
|
||
ord = '0' + ord;
|
||
}
|
||
|
||
if (ord.length > 2) {
|
||
for (let i = 0, len = ord.length / 2; i < len; i++) {
|
||
res += '%' + ord.substr(i, 2);
|
||
}
|
||
} else {
|
||
res += '%' + ord;
|
||
}
|
||
|
||
return res;
|
||
},
|
||
|
||
safeEncodeURIComponent(str) {
|
||
str = (str || '').toString();
|
||
|
||
try {
|
||
// might throw if we try to encode invalid sequences, eg. partial emoji
|
||
str = encodeURIComponent(str);
|
||
} catch (E) {
|
||
// should never run
|
||
return str.replace(/[^\x00-\x1F *'()<>@,;:\\"[\]?=\u007F-\uFFFF]+/g, '');
|
||
}
|
||
|
||
// ensure chars that are not handled by encodeURICompent are converted as well
|
||
return str.replace(/[\x00-\x1F *'()<>@,;:\\"[\]?=\u007F-\uFFFF]/g, chr => this.encodeURICharComponent(chr));
|
||
}
|
||
};
|