Merge pull request #2376 from balena-io/lucianbuzzo/fast-scan

Improve directory scan speed prior to tarballing
This commit is contained in:
bulldozer-balena[bot] 2021-11-26 15:35:56 +00:00 committed by GitHub
commit 498e21f0ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 106 additions and 32 deletions

View File

@ -199,14 +199,17 @@ export async function deployToDevice(opts: DeviceDeployOptions): Promise<void> {
await checkBuildSecretsRequirements(docker, opts.source); await checkBuildSecretsRequirements(docker, opts.source);
globalLogger.logDebug('Tarring all non-ignored files...'); globalLogger.logDebug('Tarring all non-ignored files...');
const tarStartTime = Date.now();
const tarStream = await tarDirectory(opts.source, { const tarStream = await tarDirectory(opts.source, {
composition: project.composition, composition: project.composition,
convertEol: opts.convertEol, convertEol: opts.convertEol,
multiDockerignore: opts.multiDockerignore, multiDockerignore: opts.multiDockerignore,
nogitignore: opts.nogitignore, // v13: delete this line nogitignore: opts.nogitignore, // v13: delete this line
}); });
globalLogger.logDebug(`Tarring complete in ${Date.now() - tarStartTime} ms`);
// Try to detect the device information // Try to detect the device information
globalLogger.logDebug('Fetching device information...');
const deviceInfo = await api.getDeviceInformation(); const deviceInfo = await api.getDeviceInformation();
let buildLogs: Dictionary<string> | undefined; let buildLogs: Dictionary<string> | undefined;

View File

@ -200,6 +200,48 @@ export interface FileStats {
stats: Stats; stats: Stats;
} }
/**
* Create a list of files for the filesystem subtree rooted at
* projectDir, excluding entries for directories themselves.
* @param projectDir Source directory (root of subtree to be listed)
*/
async function listFiles(projectDir: string): Promise<string[]> {
const dirs: string[] = [];
const files: string[] = [];
dirs.push(projectDir);
async function walk(currentDirs: string[]): Promise<string[]> {
if (!currentDirs.length) {
return files;
}
const foundDirs: string[] = [];
// Because `currentDirs` can be of arbitrary length, process them in smaller batches
// to avoid out of memory errors.
// This approach is significantly faster than using Bluebird.map with a
// concurrency setting
const chunks = _.chunk(currentDirs, 100);
for (const chunk of chunks) {
await Promise.all(
chunk.map(async (dir) => {
const _files = await fs.readdir(dir, { withFileTypes: true });
for (const entry of _files) {
const fpath = path.join(dir, entry.name);
if (entry.isDirectory()) {
foundDirs.push(fpath);
dirs.push(fpath);
} else {
files.push(fpath);
}
}
}),
);
}
return walk(foundDirs);
}
return walk([projectDir]);
}
/** /**
* Return the contents of a .dockerignore file at projectDir, as a string. * Return the contents of a .dockerignore file at projectDir, as a string.
* Return an empty string if a .dockerignore file does not exist. * Return an empty string if a .dockerignore file does not exist.
@ -211,7 +253,7 @@ async function readDockerIgnoreFile(projectDir: string): Promise<string> {
let dockerIgnoreStr = ''; let dockerIgnoreStr = '';
try { try {
dockerIgnoreStr = await fs.readFile(dockerIgnorePath, 'utf8'); dockerIgnoreStr = await fs.readFile(dockerIgnorePath, 'utf8');
} catch (err) { } catch (err: any) {
if (err.code !== 'ENOENT') { if (err.code !== 'ENOENT') {
throw new ExpectedError( throw new ExpectedError(
`Error reading file "${dockerIgnorePath}": ${err.message}`, `Error reading file "${dockerIgnorePath}": ${err.message}`,
@ -269,7 +311,10 @@ export async function filterFilesWithDockerignore(
projectDir: string, projectDir: string,
multiDockerignore: boolean, multiDockerignore: boolean,
serviceDirsByService: ServiceDirs, serviceDirsByService: ServiceDirs,
): Promise<{ filteredFileList: FileStats[]; dockerignoreFiles: FileStats[] }> { ): Promise<{
filteredFileList: FileStats[];
dockerignoreFiles: FileStats[];
}> {
// path.resolve() also converts forward slashes to backslashes on Windows // path.resolve() also converts forward slashes to backslashes on Windows
projectDir = path.resolve(projectDir); projectDir = path.resolve(projectDir);
const root = '.' + path.sep; const root = '.' + path.sep;
@ -294,45 +339,65 @@ export async function filterFilesWithDockerignore(
const dockerignoreServiceDirs: string[] = multiDockerignore const dockerignoreServiceDirs: string[] = multiDockerignore
? Object.keys(ignoreByDir).filter((dir) => dir && dir !== root) ? Object.keys(ignoreByDir).filter((dir) => dir && dir !== root)
: []; : [];
const files = await listFiles(projectDir);
const dockerignoreFiles: FileStats[] = []; const dockerignoreFiles: FileStats[] = [];
const filteredFileList: FileStats[] = []; const filteredFileList: FileStats[] = [];
const klaw = await import('klaw');
await new Promise((resolve, reject) => { // Because `files` can be of arbitrary length, process them in smaller batches
// Looking at klaw's source code, `preserveSymlinks` appears to only // to avoid out of memory errors.
// afect the `stats` argument to the `data` event handler // This approach is significantly faster than using Bluebird.map with a
klaw(projectDir, { preserveSymlinks: false }) // concurrency setting
.on('error', reject) const chunks = _.chunk(files, 750);
.on('end', resolve) for (const chunk of chunks) {
.on('data', (item: { path: string; stats: Stats }) => { await Promise.all(
const { path: filePath, stats } = item; chunk.map(async (filePath) => {
// With `preserveSymlinks: false`, filePath cannot be a symlink.
// filePath may be a directory or a regular or special file
if (!stats.isFile()) {
return;
}
const relPath = path.relative(projectDir, filePath); const relPath = path.relative(projectDir, filePath);
const fileInfo = {
filePath, // .dockerignore files are always added to a list of known dockerignore files
relPath,
stats,
};
if (path.basename(relPath) === '.dockerignore') { if (path.basename(relPath) === '.dockerignore') {
dockerignoreFiles.push(fileInfo); const diStats = await fs.stat(filePath);
dockerignoreFiles.push({
filePath,
relPath,
stats: diStats,
});
} }
for (const dir of dockerignoreServiceDirs) {
if (relPath.startsWith(dir)) { // First check if the file is ignored by a .dockerignore file in a service directory
if (!ignoreByDir[dir].ignores(relPath.substring(dir.length))) { const matchingDir = dockerignoreServiceDirs.find((dir) => {
filteredFileList.push(fileInfo); return relPath.startsWith(dir);
} });
// If the file is ignore in a service directory, exit early, otherwise check if it is ignored by the root .dockerignore file.
// Crucially, if the file is in a known service directory, and isn't ignored, the root .dockerignore file should not be checked.
if (matchingDir) {
if (
ignoreByDir[matchingDir].ignores(
relPath.substring(matchingDir.length),
)
) {
return; return;
} }
} else if (ignoreByDir[root].ignores(relPath)) {
return;
} }
if (!ignoreByDir[root].ignores(relPath)) {
filteredFileList.push(fileInfo); // At this point we can do a final stat of the file, and check if it should be included
const stats = await fs.stat(filePath);
// filePath may be a special file that we should ignore, such as a socket
if (stats.isFile()) {
filteredFileList.push({
filePath,
relPath,
stats,
});
} }
}); }),
}); );
}
return { filteredFileList, dockerignoreFiles }; return { filteredFileList, dockerignoreFiles };
} }

View File

@ -317,12 +317,18 @@ async function getTarStream(build: RemoteBuild): Promise<Stream.Readable> {
Object.keys(build.opts.registrySecrets).length > 0 Object.keys(build.opts.registrySecrets).length > 0
? preFinalizeCallback ? preFinalizeCallback
: undefined; : undefined;
return await tarDirectory(path.resolve(build.source), { globalLogger.logDebug('Tarring all non-ignored files...');
const tarStartTime = Date.now();
const tarStream = await tarDirectory(path.resolve(build.source), {
preFinalizeCallback: preFinalizeCb, preFinalizeCallback: preFinalizeCb,
convertEol: build.opts.convertEol, convertEol: build.opts.convertEol,
multiDockerignore: build.opts.multiDockerignore, multiDockerignore: build.opts.multiDockerignore,
nogitignore: build.nogitignore, // v13: delete this line nogitignore: build.nogitignore, // v13: delete this line
}); });
globalLogger.logDebug(
`Tarring complete in ${Date.now() - tarStartTime} ms`,
);
return tarStream;
} finally { } finally {
tarSpinner.stop(); tarSpinner.stop();
} }