balena-cli/lib/utils/eol-conversion.ts

/**
 * @license
 * Copyright 2019-2020 Balena Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import mmmagic = require('mmmagic');
import fs = require('mz/fs');
import Logger = require('./logger');

const globalLogger = Logger.getLogger();

// Define file size threshold (bytes) over which analysis/conversion is not performed.
const LARGE_FILE_THRESHOLD = 10 * 1000 * 1000;

// The list of encodings to convert is intentionally conservative for now
const CONVERTIBLE_ENCODINGS = ['ascii', 'utf-8'];

/**
 * Attempt to detect the encoding of a data buffer
 * @param data
 */
async function detectEncoding(data: Buffer): Promise<string> {
	// Instantiate mmmagic for mime encoding analysis
	const magic = new mmmagic.Magic(mmmagic.MAGIC_MIME_ENCODING);

	// Promisify magic.detect
	// For some reason, got 'Illegal Invocation' when using:
	//   const detectEncoding = promisify(magic.detect);
	return new Promise((resolve, reject) => {
		magic.detect(data, (err, encoding) => {
			if (err) {
				return reject(err);
			}
			// mmmagic reports ascii as 'us-ascii', but node Buffer uses 'ascii'
			encoding = encoding === 'us-ascii' ? 'ascii' : encoding;
			return resolve(encoding);
		});
	});
}

/**
 * Convert EOL (CRLF → LF) in place, i.e. modifying the input buffer.
 * Safe for UTF-8, ASCII and 8-bit encodings (like 'latin-1', 'iso-8859-1', ...),
 * but not safe for UTF-16 or UTF-32.
 * Return a new buffer object sharing the same contents memory space as the
 * input buffer (using Buffer.slice()), in order to safely reflect the new
 * buffer size.
 * @param buf
 */
function convertEolInPlace(buf: Buffer): Buffer {
	const CR = 13;
	const LF = 10;
	let foundCR = false;
	let j;
	// Algorithm gist:
	// - i and j are running indexes over the same buffer, but think of it as
	//   i pointing to the input buffer, and j pointing to the output buffer.
	// - i and j are incremented by 1 in every loop iteration, but if a LF is found
	//   after a CR, then j is decremented by 1, and LF is written. Invariant: j <= i.
	for (let i = (j = 0); i < buf.length; i++, j++) {
		const b = (buf[j] = buf[i]);
		if (b === CR) {
			foundCR = true;
		} else {
			if (foundCR && b === LF) {
				j--; // decrement index of "output buffer"
				buf[j] = LF; // overwrite previous CR with LF
			}
			foundCR = false;
		}
	}
	return buf.slice(0, j);
}

/**
 * Drop-in replacement for promisified fs.readFile(<string>)
 * Attempts to convert EOLs from CRLF to LF for supported encodings,
 * or otherwise logs warnings.
 * @param filepath
 * @param convertEol When true, performs conversions, otherwise just warns.
 */
export async function readFileWithEolConversion(
	filepath: string,
	convertEol: boolean,
): Promise<Buffer> {
	const fileBuffer = await fs.readFile(filepath);

	// Skip processing of very large files
	const fileStats = await fs.stat(filepath);
	if (fileStats.size > LARGE_FILE_THRESHOLD) {
		globalLogger.logWarn(`CRLF detection skipped for large file: ${filepath}`);
		return fileBuffer;
	}

	// Analyse encoding
	const encoding = await detectEncoding(fileBuffer);

	// Skip further processing of non-convertible encodings
	if (!CONVERTIBLE_ENCODINGS.includes(encoding)) {
		return fileBuffer;
	}

	// Skip further processing of files that don't contain CRLF
	if (!fileBuffer.includes('\r\n', 0, encoding)) {
		return fileBuffer;
	}

	if (convertEol) {
		// Convert CRLF->LF
		globalLogger.logInfo(
			`Converting line endings CRLF -> LF for file: ${filepath}`,
		);

		return convertEolInPlace(fileBuffer);
	} else {
		// Immediate warning
		globalLogger.logWarn(
			`CRLF (Windows) line endings detected in file: ${filepath}`,
		);
		// And summary warning later
		globalLogger.deferredLog(
			'Windows-format line endings were detected in some files. Consider using the `--convert-eol` option.',
			Logger.Level.WARN,
		);

		return fileBuffer;
	}
}