Fix balena push "Segmentation fault" on Windows (replace 'mmmagic' with 'isBinaryFile')

Connects-to: #1611
Change-type: patch
This commit is contained in:
Paulo Castro 2020-02-11 18:07:25 +00:00
parent 1a74dcf4cf
commit 88a1e413a3
6 changed files with 170 additions and 63 deletions

View File

@ -99,7 +99,6 @@ async function buildPkg() {
// [platform, [source path], [destination path]]
['*', ['open', 'xdg-open'], ['xdg-open']],
['darwin', ['denymount', 'bin', 'denymount'], ['denymount']],
['win32', ['mmmagic', 'magic', 'magic.mgc'], ['mmmagic', 'magic.mgc']],
];
await Bluebird.map(paths, ([platform, source, dest]) => {
if (platform === '*' || platform === process.platform) {

View File

@ -15,35 +15,19 @@
* limitations under the License.
*/
import { fs } from 'mz';
import Logger = require('./logger');
const globalLogger = Logger.getLogger();
// Define file size threshold (bytes) over which analysis/conversion is not performed.
const LARGE_FILE_THRESHOLD = 10 * 1000 * 1000;
// The list of encodings to convert is intentionally conservative for now
// Note that `convertEolInPlace()` only works with UTF-8 or single-byte encodings
const CONVERTIBLE_ENCODINGS = ['ascii', 'utf-8'];
/**
* Attempt to detect the encoding of a data buffer
* @param data
*/
async function detectEncoding(data: Buffer): Promise<string> {
const mmmagic = await import('mmmagic');
// Instantiate mmmagic for mime encoding analysis
const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_ENCODING);
// Promisify magic.detect
// For some reason, got 'Illegal Invocation' when using:
// const detectEncoding = promisify(magic.detect);
return new Promise((resolve, reject) => {
magic.detect(data, (err, encoding) => {
if (err) {
return reject(err);
}
// mmmagic reports ascii as 'us-ascii', but node Buffer uses 'ascii'
encoding = encoding === 'us-ascii' ? 'ascii' : encoding;
return resolve(encoding);
});
});
}
// Maximum number of bytes to consider when detecting the file encoding
const DETECT_MAX_BYTES = 1024;
/**
* Convert EOL (CRLF LF) in place, i.e. modifying the input buffer.
@ -90,10 +74,7 @@ export async function readFileWithEolConversion(
filepath: string,
convertEol: boolean,
): Promise<Buffer> {
const { fs } = await import('mz');
const fileBuffer = await fs.readFile(filepath);
const Logger = await import('./logger');
const globalLogger = Logger.getLogger();
// Skip processing of very large files
const fileStats = await fs.stat(filepath);
@ -111,7 +92,7 @@ export async function readFileWithEolConversion(
}
// Skip further processing of files that don't contain CRLF
if (!fileBuffer.includes('\r\n', 0, encoding)) {
if (!fileBuffer.includes('\r\n')) {
return fileBuffer;
}
@ -136,3 +117,122 @@ export async function readFileWithEolConversion(
return fileBuffer;
}
}
/**
* Attempt to detect the encoding of a data buffer.
* Code copied and modified from the npm package 'isbinaryfile' (MIT licence)
* https://github.com/gjtorikian/isBinaryFile/blob/master/src/index.ts
*
* @returns one of the possible values: '' (empty file), 'utf-8', 'utf-16',
* 'utf-32', 'gb-18030', 'pdf', and 'binary'.
*
* Note: pure ASCII data is identified as 'utf-8' (ASCII is indeed a subset
* of UTF-8).
*
* @param fileBuffer File contents whose encoding should be detected
* @param bytesRead Optional "file size" if smaller than the buffer size
*/
export async function detectEncoding(
fileBuffer: Buffer,
bytesRead = fileBuffer.length,
): Promise<string> {
// empty file
if (bytesRead === 0) {
return '';
}
const totalBytes = Math.min(bytesRead, DETECT_MAX_BYTES);
// UTF-8 BOM
if (
bytesRead >= 3 &&
fileBuffer[0] === 0xef &&
fileBuffer[1] === 0xbb &&
fileBuffer[2] === 0xbf
) {
return 'utf-8';
}
// UTF-32 BOM
if (
bytesRead >= 4 &&
fileBuffer[0] === 0x00 &&
fileBuffer[1] === 0x00 &&
fileBuffer[2] === 0xfe &&
fileBuffer[3] === 0xff
) {
return 'utf-32';
}
// UTF-32 LE BOM
if (
bytesRead >= 4 &&
fileBuffer[0] === 0xff &&
fileBuffer[1] === 0xfe &&
fileBuffer[2] === 0x00 &&
fileBuffer[3] === 0x00
) {
return 'utf-32';
}
// GB BOM (https://en.wikipedia.org/wiki/GB_18030)
if (
bytesRead >= 4 &&
fileBuffer[0] === 0x84 &&
fileBuffer[1] === 0x31 &&
fileBuffer[2] === 0x95 &&
fileBuffer[3] === 0x33
) {
return 'gb-18030';
}
if (totalBytes >= 5 && fileBuffer.slice(0, 5).toString() === '%PDF-') {
/* PDF. This is binary. */
return 'pdf';
}
// UTF-16 BE BOM
if (bytesRead >= 2 && fileBuffer[0] === 0xfe && fileBuffer[1] === 0xff) {
return 'utf-16';
}
// UTF-16 LE BOM
if (bytesRead >= 2 && fileBuffer[0] === 0xff && fileBuffer[1] === 0xfe) {
return 'utf-16';
}
for (let i = 0; i < totalBytes; i++) {
let c = fileBuffer[i];
if (c === 0) {
// NULL byte
return 'binary';
} else if (c === 27) {
// ESC character used in ANSI escape sequences for text color (log files)
continue;
} else if ((c < 7 || c > 14) && (c < 32 || c > 127)) {
// UTF-8 detection
if (c > 193 && c < 224 && i + 1 < totalBytes) {
i++;
c = fileBuffer[i];
if (c > 127 && c < 192) {
continue;
}
} else if (c > 223 && c < 240 && i + 2 < totalBytes) {
i++;
c = fileBuffer[i];
if (
c > 127 &&
c < 192 &&
fileBuffer[i + 1] > 127 &&
fileBuffer[i + 1] < 192
) {
i++;
continue;
}
}
return 'binary';
}
}
return 'utf-8';
}

17
npm-shrinkwrap.json generated
View File

@ -821,15 +821,6 @@
"@types/node": "*"
}
},
"@types/mmmagic": {
"version": "0.4.16-alpha",
"resolved": "https://registry.npmjs.org/@types/mmmagic/-/mmmagic-0.4.16-alpha.tgz",
"integrity": "sha1-zM66vnBpBmPWRaMdTLzxzZ3+UIE=",
"dev": true,
"requires": {
"@types/node": "*"
}
},
"@types/mocha": {
"version": "5.2.7",
"resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-5.2.7.tgz",
@ -9057,14 +9048,6 @@
}
}
},
"mmmagic": {
"version": "0.5.3",
"resolved": "https://registry.npmjs.org/mmmagic/-/mmmagic-0.5.3.tgz",
"integrity": "sha512-xLqCu7GJYTzJczg0jafXFuh+iPzQL/ru0YYf4GiTTz8Cehru/wiXtUS8Pp8Xi77zNaiVndJ0OO1yAFci6iHyFg==",
"requires": {
"nan": "^2.13.2"
}
},
"mocha": {
"version": "6.2.2",
"resolved": "https://registry.npmjs.org/mocha/-/mocha-6.2.2.tgz",

View File

@ -112,7 +112,6 @@
"@types/lodash": "4.14.112",
"@types/mixpanel": "2.14.0",
"@types/mkdirp": "0.5.2",
"@types/mmmagic": "0.4.16-alpha",
"@types/mocha": "^5.2.7",
"@types/mz": "0.0.32",
"@types/net-keepalive": "^0.4.0",
@ -208,7 +207,6 @@
"minimatch": "^3.0.4",
"mixpanel": "^0.10.3",
"mkdirp": "^0.5.1",
"mmmagic": "^0.5.3",
"moment": "^2.24.0",
"moment-duration-format": "^2.3.2",
"mz": "^2.7.0",

View File

@ -1,14 +0,0 @@
diff --git a/node_modules/mmmagic/lib/index.js b/node_modules/mmmagic/lib/index.js
index bd18a4b..a19a149 100644
--- a/node_modules/mmmagic/lib/index.js
+++ b/node_modules/mmmagic/lib/index.js
@@ -1,5 +1,8 @@
var Magic = require('../build/Release/magic');
-var fbpath = require('path').join(__dirname, '..', 'magic', 'magic');
+var path = require('path');
+var fbpath = process.pkg
+ ? path.join(path.dirname(process.execPath), 'mmmagic', 'magic')
+ : path.join(__dirname, '..', 'magic', 'magic');
Magic.setFallback(fbpath);
module.exports = {

View File

@ -16,8 +16,13 @@
*/
import { expect } from 'chai';
import { fs } from 'mz';
import * as path from 'path';
import { convertEolInPlace } from '../../build/utils/eol-conversion';
import {
convertEolInPlace,
detectEncoding,
} from '../../build/utils/eol-conversion';
describe('convertEolInPlace() function', function() {
it('should return expected values', () => {
@ -53,3 +58,39 @@ describe('convertEolInPlace() function', function() {
}
});
});
describe('detectEncoding() function', function() {
it('should correctly detect the encoding of a few selected files', async () => {
const sampleBinary = [
'ext2fs/build/Release/bindings.node',
'drivelist/build/Release/drivelist.node',
'resin-cli-visuals/node_modules/drivelist/build/Release/drivelist.node',
'@balena.io/usb/build/Release/usb_bindings.node',
'xxhash/build/Release/hash.node',
'mountutils/build/Release/MountUtils.node',
];
const sampleText = [
'node_modules/.bin/etcher-image-write',
'node_modules/.bin/mocha',
'node_modules/.bin/rimraf',
'node_modules/.bin/gulp',
'node_modules/.bin/prettier',
'node_modules/.bin/coffeelint',
'node_modules/.bin/tsc',
'node_modules/.bin/resin-lint',
'node_modules/.bin/balena-preload',
'node_modules/.bin/catch-uncommitted',
];
for (const fname of sampleBinary) {
const buf = await fs.readFile(path.join('node_modules', fname));
const encoding = await detectEncoding(buf);
expect(encoding).to.equal('binary');
}
for (const fname of sampleText) {
const buf = await fs.readFile(fname);
const encoding = await detectEncoding(buf);
expect(encoding).to.equal('utf-8');
}
});
});