balena-cli/lib/utils/eol-conversion.ts

139 lines
4.3 KiB
TypeScript
Raw Normal View History

/**
* @license
* Copyright 2019-2020 Balena Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Define file size threshold (bytes) over which analysis/conversion is not performed.
const LARGE_FILE_THRESHOLD = 10 * 1000 * 1000;
// The list of encodings to convert is intentionally conservative for now
const CONVERTIBLE_ENCODINGS = ['ascii', 'utf-8'];
/**
* Attempt to detect the encoding of a data buffer
* @param data
*/
async function detectEncoding(data: Buffer): Promise<string> {
const mmmagic = await import('mmmagic');
// Instantiate mmmagic for mime encoding analysis
const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_ENCODING);
// Promisify magic.detect
// For some reason, got 'Illegal Invocation' when using:
// const detectEncoding = promisify(magic.detect);
return new Promise((resolve, reject) => {
magic.detect(data, (err, encoding) => {
if (err) {
return reject(err);
}
// mmmagic reports ascii as 'us-ascii', but node Buffer uses 'ascii'
encoding = encoding === 'us-ascii' ? 'ascii' : encoding;
return resolve(encoding);
});
});
}
/**
* Convert EOL (CRLF LF) in place, i.e. modifying the input buffer.
* Safe for UTF-8, ASCII and 8-bit encodings (like 'latin-1', 'iso-8859-1', ...),
* but not safe for UTF-16 or UTF-32.
* Return a new buffer object sharing the same contents memory space as the
* input buffer (using Buffer.slice()), in order to safely reflect the new
* buffer size.
* @param buf
*/
export function convertEolInPlace(buf: Buffer): Buffer {
const CR = 13;
const LF = 10;
let foundCR = false;
let j;
// Algorithm gist:
// - i and j are running indexes over the same buffer, but think of it as
// i pointing to the input buffer, and j pointing to the output buffer.
// - i and j are incremented by 1 in every loop iteration, but if a LF is found
// after a CR, then j is decremented by 1, and LF is written. Invariant: j <= i.
for (let i = (j = 0); i < buf.length; i++, j++) {
const b = (buf[j] = buf[i]);
if (b === CR) {
foundCR = true;
} else {
if (foundCR && b === LF) {
j--; // decrement index of "output buffer"
buf[j] = LF; // overwrite previous CR with LF
}
foundCR = false;
}
}
return buf.slice(0, j);
}
/**
* Drop-in replacement for promisified fs.readFile(<string>)
* Attempts to convert EOLs from CRLF to LF for supported encodings,
* or otherwise logs warnings.
* @param filepath
* @param convertEol When true, performs conversions, otherwise just warns.
*/
export async function readFileWithEolConversion(
filepath: string,
convertEol: boolean,
): Promise<Buffer> {
const { fs } = await import('mz');
const fileBuffer = await fs.readFile(filepath);
const Logger = await import('./logger');
const globalLogger = Logger.getLogger();
// Skip processing of very large files
const fileStats = await fs.stat(filepath);
if (fileStats.size > LARGE_FILE_THRESHOLD) {
globalLogger.logWarn(`CRLF detection skipped for large file: ${filepath}`);
return fileBuffer;
}
// Analyse encoding
const encoding = await detectEncoding(fileBuffer);
// Skip further processing of non-convertible encodings
if (!CONVERTIBLE_ENCODINGS.includes(encoding)) {
return fileBuffer;
}
// Skip further processing of files that don't contain CRLF
if (!fileBuffer.includes('\r\n', 0, encoding)) {
return fileBuffer;
}
if (convertEol) {
// Convert CRLF->LF
globalLogger.logInfo(
`Converting line endings CRLF -> LF for file: ${filepath}`,
);
return convertEolInPlace(fileBuffer);
} else {
// Immediate warning
globalLogger.logWarn(
`CRLF (Windows) line endings detected in file: ${filepath}`,
);
// And summary warning later
globalLogger.deferredLog(
'Windows-format line endings were detected in some files. Consider using the `--convert-eol` option.',
Logger.Level.WARN,
);
return fileBuffer;
}
}