diff --git a/automation/build-bin.ts b/automation/build-bin.ts index bbc39da5..376f41b4 100644 --- a/automation/build-bin.ts +++ b/automation/build-bin.ts @@ -99,7 +99,6 @@ async function buildPkg() { // [platform, [source path], [destination path]] ['*', ['open', 'xdg-open'], ['xdg-open']], ['darwin', ['denymount', 'bin', 'denymount'], ['denymount']], - ['win32', ['mmmagic', 'magic', 'magic.mgc'], ['mmmagic', 'magic.mgc']], ]; await Bluebird.map(paths, ([platform, source, dest]) => { if (platform === '*' || platform === process.platform) { diff --git a/lib/utils/eol-conversion.ts b/lib/utils/eol-conversion.ts index 25c7fff0..b337df28 100644 --- a/lib/utils/eol-conversion.ts +++ b/lib/utils/eol-conversion.ts @@ -15,35 +15,19 @@ * limitations under the License. */ +import { fs } from 'mz'; +import Logger = require('./logger'); + +const globalLogger = Logger.getLogger(); + // Define file size threshold (bytes) over which analysis/conversion is not performed. const LARGE_FILE_THRESHOLD = 10 * 1000 * 1000; -// The list of encodings to convert is intentionally conservative for now +// Note that `convertEolInPlace()` only works with UTF-8 or single-byte encodings const CONVERTIBLE_ENCODINGS = ['ascii', 'utf-8']; -/** - * Attempt to detect the encoding of a data buffer - * @param data - */ -async function detectEncoding(data: Buffer): Promise { - const mmmagic = await import('mmmagic'); - // Instantiate mmmagic for mime encoding analysis - const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_ENCODING); - - // Promisify magic.detect - // For some reason, got 'Illegal Invocation' when using: - // const detectEncoding = promisify(magic.detect); - return new Promise((resolve, reject) => { - magic.detect(data, (err, encoding) => { - if (err) { - return reject(err); - } - // mmmagic reports ascii as 'us-ascii', but node Buffer uses 'ascii' - encoding = encoding === 'us-ascii' ? 'ascii' : encoding; - return resolve(encoding); - }); - }); -} +// Maximum number of bytes to consider when detecting the file encoding +const DETECT_MAX_BYTES = 1024; /** * Convert EOL (CRLF → LF) in place, i.e. modifying the input buffer. @@ -90,10 +74,7 @@ export async function readFileWithEolConversion( filepath: string, convertEol: boolean, ): Promise { - const { fs } = await import('mz'); const fileBuffer = await fs.readFile(filepath); - const Logger = await import('./logger'); - const globalLogger = Logger.getLogger(); // Skip processing of very large files const fileStats = await fs.stat(filepath); @@ -111,7 +92,7 @@ export async function readFileWithEolConversion( } // Skip further processing of files that don't contain CRLF - if (!fileBuffer.includes('\r\n', 0, encoding)) { + if (!fileBuffer.includes('\r\n')) { return fileBuffer; } @@ -136,3 +117,122 @@ export async function readFileWithEolConversion( return fileBuffer; } } + +/** + * Attempt to detect the encoding of a data buffer. + * Code copied and modified from the npm package 'isbinaryfile' (MIT licence) + * https://github.com/gjtorikian/isBinaryFile/blob/master/src/index.ts + * + * @returns one of the possible values: '' (empty file), 'utf-8', 'utf-16', + * 'utf-32', 'gb-18030', 'pdf', and 'binary'. + * + * Note: pure ASCII data is identified as 'utf-8' (ASCII is indeed a subset + * of UTF-8). + * + * @param fileBuffer File contents whose encoding should be detected + * @param bytesRead Optional "file size" if smaller than the buffer size + */ +export async function detectEncoding( + fileBuffer: Buffer, + bytesRead = fileBuffer.length, +): Promise { + // empty file + if (bytesRead === 0) { + return ''; + } + + const totalBytes = Math.min(bytesRead, DETECT_MAX_BYTES); + + // UTF-8 BOM + if ( + bytesRead >= 3 && + fileBuffer[0] === 0xef && + fileBuffer[1] === 0xbb && + fileBuffer[2] === 0xbf + ) { + return 'utf-8'; + } + + // UTF-32 BOM + if ( + bytesRead >= 4 && + fileBuffer[0] === 0x00 && + fileBuffer[1] === 0x00 && + fileBuffer[2] === 0xfe && + fileBuffer[3] === 0xff + ) { + return 'utf-32'; + } + + // UTF-32 LE BOM + if ( + bytesRead >= 4 && + fileBuffer[0] === 0xff && + fileBuffer[1] === 0xfe && + fileBuffer[2] === 0x00 && + fileBuffer[3] === 0x00 + ) { + return 'utf-32'; + } + + // GB BOM (https://en.wikipedia.org/wiki/GB_18030) + if ( + bytesRead >= 4 && + fileBuffer[0] === 0x84 && + fileBuffer[1] === 0x31 && + fileBuffer[2] === 0x95 && + fileBuffer[3] === 0x33 + ) { + return 'gb-18030'; + } + + if (totalBytes >= 5 && fileBuffer.slice(0, 5).toString() === '%PDF-') { + /* PDF. This is binary. */ + return 'pdf'; + } + + // UTF-16 BE BOM + if (bytesRead >= 2 && fileBuffer[0] === 0xfe && fileBuffer[1] === 0xff) { + return 'utf-16'; + } + + // UTF-16 LE BOM + if (bytesRead >= 2 && fileBuffer[0] === 0xff && fileBuffer[1] === 0xfe) { + return 'utf-16'; + } + + for (let i = 0; i < totalBytes; i++) { + let c = fileBuffer[i]; + if (c === 0) { + // NULL byte + return 'binary'; + } else if (c === 27) { + // ESC character used in ANSI escape sequences for text color (log files) + continue; + } else if ((c < 7 || c > 14) && (c < 32 || c > 127)) { + // UTF-8 detection + if (c > 193 && c < 224 && i + 1 < totalBytes) { + i++; + c = fileBuffer[i]; + if (c > 127 && c < 192) { + continue; + } + } else if (c > 223 && c < 240 && i + 2 < totalBytes) { + i++; + c = fileBuffer[i]; + if ( + c > 127 && + c < 192 && + fileBuffer[i + 1] > 127 && + fileBuffer[i + 1] < 192 + ) { + i++; + continue; + } + } + return 'binary'; + } + } + + return 'utf-8'; +} diff --git a/npm-shrinkwrap.json b/npm-shrinkwrap.json index 93c2d767..84f9200a 100644 --- a/npm-shrinkwrap.json +++ b/npm-shrinkwrap.json @@ -821,15 +821,6 @@ "@types/node": "*" } }, - "@types/mmmagic": { - "version": "0.4.16-alpha", - "resolved": "https://registry.npmjs.org/@types/mmmagic/-/mmmagic-0.4.16-alpha.tgz", - "integrity": "sha1-zM66vnBpBmPWRaMdTLzxzZ3+UIE=", - "dev": true, - "requires": { - "@types/node": "*" - } - }, "@types/mocha": { "version": "5.2.7", "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.7.tgz", @@ -9057,14 +9048,6 @@ } } }, - "mmmagic": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/mmmagic/-/mmmagic-0.5.3.tgz", - "integrity": "sha512-xLqCu7GJYTzJczg0jafXFuh+iPzQL/ru0YYf4GiTTz8Cehru/wiXtUS8Pp8Xi77zNaiVndJ0OO1yAFci6iHyFg==", - "requires": { - "nan": "^2.13.2" - } - }, "mocha": { "version": "6.2.2", "resolved": "https://registry.npmjs.org/mocha/-/mocha-6.2.2.tgz", diff --git a/package.json b/package.json index f0a96176..76397e5d 100644 --- a/package.json +++ b/package.json @@ -112,7 +112,6 @@ "@types/lodash": "4.14.112", "@types/mixpanel": "2.14.0", "@types/mkdirp": "0.5.2", - "@types/mmmagic": "0.4.16-alpha", "@types/mocha": "^5.2.7", "@types/mz": "0.0.32", "@types/net-keepalive": "^0.4.0", @@ -208,7 +207,6 @@ "minimatch": "^3.0.4", "mixpanel": "^0.10.3", "mkdirp": "^0.5.1", - "mmmagic": "^0.5.3", "moment": "^2.24.0", "moment-duration-format": "^2.3.2", "mz": "^2.7.0", diff --git a/patches/mmmagic+0.5.3.patch b/patches/mmmagic+0.5.3.patch deleted file mode 100644 index f3308218..00000000 --- a/patches/mmmagic+0.5.3.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/node_modules/mmmagic/lib/index.js b/node_modules/mmmagic/lib/index.js -index bd18a4b..a19a149 100644 ---- a/node_modules/mmmagic/lib/index.js -+++ b/node_modules/mmmagic/lib/index.js -@@ -1,5 +1,8 @@ - var Magic = require('../build/Release/magic'); --var fbpath = require('path').join(__dirname, '..', 'magic', 'magic'); -+var path = require('path'); -+var fbpath = process.pkg -+ ? path.join(path.dirname(process.execPath), 'mmmagic', 'magic') -+ : path.join(__dirname, '..', 'magic', 'magic'); - Magic.setFallback(fbpath); - - module.exports = { diff --git a/tests/utils/eol-conversion.spec.ts b/tests/utils/eol-conversion.spec.ts index d039fa29..5bacd74a 100644 --- a/tests/utils/eol-conversion.spec.ts +++ b/tests/utils/eol-conversion.spec.ts @@ -16,8 +16,13 @@ */ import { expect } from 'chai'; +import { fs } from 'mz'; +import * as path from 'path'; -import { convertEolInPlace } from '../../build/utils/eol-conversion'; +import { + convertEolInPlace, + detectEncoding, +} from '../../build/utils/eol-conversion'; describe('convertEolInPlace() function', function() { it('should return expected values', () => { @@ -53,3 +58,39 @@ describe('convertEolInPlace() function', function() { } }); }); + +describe('detectEncoding() function', function() { + it('should correctly detect the encoding of a few selected files', async () => { + const sampleBinary = [ + 'ext2fs/build/Release/bindings.node', + 'drivelist/build/Release/drivelist.node', + 'resin-cli-visuals/node_modules/drivelist/build/Release/drivelist.node', + '@balena.io/usb/build/Release/usb_bindings.node', + 'xxhash/build/Release/hash.node', + 'mountutils/build/Release/MountUtils.node', + ]; + const sampleText = [ + 'node_modules/.bin/etcher-image-write', + 'node_modules/.bin/mocha', + 'node_modules/.bin/rimraf', + 'node_modules/.bin/gulp', + 'node_modules/.bin/prettier', + 'node_modules/.bin/coffeelint', + 'node_modules/.bin/tsc', + 'node_modules/.bin/resin-lint', + 'node_modules/.bin/balena-preload', + 'node_modules/.bin/catch-uncommitted', + ]; + + for (const fname of sampleBinary) { + const buf = await fs.readFile(path.join('node_modules', fname)); + const encoding = await detectEncoding(buf); + expect(encoding).to.equal('binary'); + } + for (const fname of sampleText) { + const buf = await fs.readFile(fname); + const encoding = await detectEncoding(buf); + expect(encoding).to.equal('utf-8'); + } + }); +});