From 915f7e3763991700d4746e3581099d5793a58648 Mon Sep 17 00:00:00 2001 From: Paulo Castro Date: Thu, 24 Feb 2022 22:37:21 +0000 Subject: [PATCH] ssh: Allow ssh to service with IP address and production balenaOS image Also remove 'balena ssh' dependency on the device supervisor (that may be down because of device issues or a supervisor bug) when opening a ssh shell on a container (#1560). Resolves: #2458 Resolves: #1560 Change-type: minor --- lib/commands/ssh.ts | 124 +++------------------------- lib/utils/device/ssh.ts | 161 +++++++++++++++++++++---------------- lib/utils/ssh.ts | 25 +++--- tests/commands/ssh.spec.ts | 13 +-- 4 files changed, 122 insertions(+), 201 deletions(-) diff --git a/lib/commands/ssh.ts b/lib/commands/ssh.ts index 44bf9bf6..a500feb7 100644 --- a/lib/commands/ssh.ts +++ b/lib/commands/ssh.ts @@ -20,7 +20,6 @@ import Command from '../command'; import * as cf from '../utils/common-flags'; import { getBalenaSdk, stripIndent } from '../utils/lazy'; import { parseAsInteger, validateLocalHostnameOrIp } from '../utils/validation'; -import * as BalenaSdk from 'balena-sdk'; interface FlagsDef { port?: number; @@ -128,8 +127,8 @@ export default class SshCmd extends Command { if (validateLocalHostnameOrIp(params.fleetOrDevice)) { const { performLocalDeviceSSH } = await import('../utils/device/ssh'); return await performLocalDeviceSSH({ - address: params.fleetOrDevice, - port: options.port, + hostname: params.fleetOrDevice, + port: options.port || 'local', forceTTY: options.tty, verbose: options.verbose, service: params.service, @@ -152,12 +151,6 @@ export default class SshCmd extends Command { params.fleetOrDevice, ); - const device = await sdk.models.device.get(deviceUuid, { - $select: ['id', 'supervisor_version', 'is_online'], - }); - - const deviceId = device.id; - const supervisorVersion = device.supervisor_version; const { which } = await import('../utils/which'); const [whichProxytunnel, username, proxyUrl] = await Promise.all([ @@ -209,19 +202,15 @@ export default class SshCmd extends Command { // that we know exists and is accessible let containerId: string | undefined; if (params.service != null) { - containerId = await this.getContainerId( - sdk, + const { getContainerIdForService } = await import('../utils/device/ssh'); + containerId = await getContainerIdForService({ deviceUuid, - params.service, - { - port: options.port, - proxyCommand, - proxyUrl: proxyUrl || '', - username: username!, - }, - supervisorVersion, - deviceId, - ); + hostname: `ssh.${proxyUrl}`, + port: options.port || 'cloud', + proxyCommand, + service: params.service, + username: username!, + }); } let accessCommand: string; @@ -234,101 +223,10 @@ export default class SshCmd extends Command { await runRemoteCommand({ cmd: accessCommand, hostname: `ssh.${proxyUrl}`, - port: options.port, + port: options.port || 'cloud', proxyCommand, username, verbose: options.verbose, }); } - - async getContainerId( - sdk: BalenaSdk.BalenaSDK, - uuid: string, - serviceName: string, - sshOpts: { - port?: number; - proxyCommand?: string[]; - proxyUrl: string; - username: string; - }, - version?: string, - id?: number, - ): Promise { - const semver = await import('balena-semver'); - - if (version == null || id == null) { - const device = await sdk.models.device.get(uuid, { - $select: ['id', 'supervisor_version'], - }); - version = device.supervisor_version; - id = device.id; - } - - let containerId: string | undefined; - if (semver.gte(version, '8.6.0')) { - const apiUrl = await sdk.settings.get('apiUrl'); - // TODO: Move this into the SDKs device model - const request = await sdk.request.send({ - method: 'POST', - url: '/supervisor/v2/containerId', - baseUrl: apiUrl, - body: { - method: 'GET', - deviceId: id, - }, - }); - if (request.status !== 200) { - throw new Error( - `There was an error connecting to device ${uuid}, HTTP response code: ${request.status}.`, - ); - } - const body = request.body; - if (body.status !== 'success') { - throw new Error( - `There was an error communicating with device ${uuid}.\n\tError: ${body.message}`, - ); - } - containerId = body.services[serviceName]; - } else { - console.error(stripIndent` - Using slow legacy method to determine container IDs. To speed up - this process, update the device supervisor to v8.6.0 or later. - `); - // We need to execute a balena ps command on the device, - // and parse the output, looking for a specific - // container - const { escapeRegExp } = await import('lodash'); - const { deviceContainerEngineBinary } = await import( - '../utils/device/ssh' - ); - const { getRemoteCommandOutput } = await import('../utils/ssh'); - - const containers: string = ( - await getRemoteCommandOutput({ - cmd: `host ${uuid} "${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`, - hostname: `ssh.${sshOpts.proxyUrl}`, - port: sshOpts.port, - proxyCommand: sshOpts.proxyCommand, - stderr: 'inherit', - username: sshOpts.username, - }) - ).stdout.toString(); - const lines = containers.split('\n'); - const regex = new RegExp(`\\/?${escapeRegExp(serviceName)}_\\d+_\\d+`); - for (const container of lines) { - const [cId, name] = container.split(' '); - if (regex.test(name)) { - containerId = cId; - break; - } - } - } - - if (containerId == null) { - throw new Error( - `Could not find a service ${serviceName} on device ${uuid}.`, - ); - } - return containerId; - } } diff --git a/lib/utils/device/ssh.ts b/lib/utils/device/ssh.ts index 55c5e514..0a7ee732 100644 --- a/lib/utils/device/ssh.ts +++ b/lib/utils/device/ssh.ts @@ -13,53 +13,88 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -import type { ContainerInfo } from 'dockerode'; import { ExpectedError } from '../../errors'; import { stripIndent } from '../lazy'; -export interface DeviceSSHOpts { - address: string; - port?: number; +import { + findBestUsernameForDevice, + getRemoteCommandOutput, + runRemoteCommand, + SshRemoteCommandOpts, +} from '../ssh'; + +export interface DeviceSSHOpts extends SshRemoteCommandOpts { forceTTY?: boolean; - verbose: boolean; service?: string; } -export const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`; +const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`; /** - * List the running containers on the device with dockerode, and return the - * container ID that matches the given service name. + * List the running containers on the device over ssh, and return the full + * container name that matches the given service name. + * + * Note: In the past, two other approaches were implemented for this function: + * + * - Obtaining container IDs through a supervisor API call: + * '/supervisor/v2/containerId' endpoint, via cloud. + * - Obtaining container IDs using 'dockerode' connected directly to + * balenaEngine on a device, TCP port 2375. + * + * The problem with using the supervisor API is that it means that 'balena ssh' + * becomes dependent on the supervisor being up an running, but sometimes ssh + * is needed to investigate devices issues where the supervisor has got into + * trouble (e.g. supervisor in restart loop). This is the subject of CLI issue + * https://github.com/balena-io/balena-cli/issues/1560 . + * + * The problem with using dockerode to connect directly to port 2375 (balenaEngine) + * is that it only works with development variants of balenaOS. Production variants + * block access to port 2375 for security reasons. 'balena ssh' should support + * production variants as well, especially after balenaOS v2.44.0 that introduced + * support for using the cloud account username for ssh authentication. + * + * Overall, the most reliable approach is to run 'balena-engine ps' over ssh. + * It is OK to depend on balenaEngine because ssh to a container is implemented + * through 'balena-engine exec' anyway, and of course it is OK to depend on ssh + * itself. */ -async function getContainerIdForService( - service: string, - deviceAddress: string, +export async function getContainerIdForService( + opts: SshRemoteCommandOpts & { service: string; deviceUuid?: string }, ): Promise { - const { escapeRegExp, reduce } = await import('lodash'); - const Docker = await import('dockerode'); - const docker = new Docker({ - host: deviceAddress, - port: 2375, - }); - const regex = new RegExp(`(^|\\/)${escapeRegExp(service)}_\\d+_\\d+`); - const nameRegex = /\/?([a-zA-Z0-9_-]+)_\d+_\d+/; - let allContainers: ContainerInfo[]; - try { - allContainers = await docker.listContainers(); - } catch (_e) { - throw new ExpectedError(stripIndent` - Could not access docker daemon on device ${deviceAddress}. - Please ensure the device is in local mode.`); + opts.cmd = `"${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`; + if (opts.deviceUuid) { + // If a device UUID is given, perform ssh via cloud proxy 'host' command + opts.cmd = `host ${opts.deviceUuid} ${opts.cmd}`; } + const psLines: string[] = ( + await getRemoteCommandOutput({ ...opts, stderr: 'inherit' }) + ).stdout + .toString() + .split('\n') + .filter((l) => l); + + const { escapeRegExp } = await import('lodash'); + const regex = new RegExp(`(?:^|\\/)${escapeRegExp(opts.service)}_\\d+_\\d+`); + // Old balenaOS container name pattern: + // main_1234567_2345678 + // New balenaOS container name patterns: + // main_1234567_2345678_a000b111c222d333e444f555a666b777 + // main_1_1_localrelease + const nameRegex = /(?:^|\/)([a-zA-Z0-9_-]+)_\d+_\d+(?:_.+)?$/; + const serviceNames: string[] = []; - const containers: Array<{ id: string; name: string }> = []; - for (const container of allContainers) { - for (const name of container.Names) { + const containerNames: string[] = []; + let containerId: string | undefined; + + // sample psLine: 'b603c74e951e bar_4587562_2078151_3261c9d4c22f2c53a5267be459c89990' + for (const psLine of psLines) { + const [cId, name] = psLine.split(' '); + if (cId && name) { if (regex.test(name)) { - containers.push({ id: container.Id, name }); - break; + containerNames.push(name); + containerId = cId; } const match = name.match(nameRegex); if (match) { @@ -67,23 +102,21 @@ async function getContainerIdForService( } } } - if (containers.length > 1) { + + if (containerNames.length > 1) { + const [s, d] = [opts.service, opts.deviceUuid || opts.hostname]; throw new ExpectedError(stripIndent` - Found more than one container matching service name "${service}": - ${containers.map((container) => container.name).join(', ')} + Found more than one container matching service name "${s}" on device "${d}": + ${containerNames.join(', ')} Use different service names to avoid ambiguity. `); } - const containerId = containers.length ? containers[0].id : ''; if (!containerId) { + const [s, d] = [opts.service, opts.deviceUuid || opts.hostname]; throw new ExpectedError( - `Could not find a service on device with name ${service}. ${ + `Could not find a container matching service name "${s}" on device "${d}".${ serviceNames.length > 0 - ? `Available services:\n${reduce( - serviceNames, - (str, name) => `${str}\t${name}\n`, - '', - )}` + ? `\nAvailable services:\n\t${serviceNames.join('\n\t')}` : '' }`, ); @@ -94,13 +127,25 @@ async function getContainerIdForService( export async function performLocalDeviceSSH( opts: DeviceSSHOpts, ): Promise { + // Before we started using `findBestUsernameForDevice`, we tried the approach + // of attempting ssh with the 'root' username first and, if that failed, then + // attempting ssh with a regular user (balenaCloud username). The problem with + // that approach was that it would print the following message to the console: + // "root@192.168.1.36: Permission denied (publickey)" + // ... right before having success as a regular user, which looked broken or + // confusing from users' point of view. Capturing stderr to prevent that + // message from being printed is tricky because the messages printed to stderr + // may include the stderr output of remote commands that are of interest to + // the user. + const username = await findBestUsernameForDevice(opts.hostname, opts.port); let cmd = ''; if (opts.service) { - const containerId = await getContainerIdForService( - opts.service, - opts.address, - ); + const containerId = await getContainerIdForService({ + ...opts, + service: opts.service, + username, + }); const shellCmd = `/bin/sh -c "if [ -e /bin/bash ]; then exec /bin/bash; else exec /bin/sh; fi"`; // stdin (fd=0) is not a tty when data is piped in, for example @@ -112,29 +157,5 @@ export async function performLocalDeviceSSH( cmd = `${deviceContainerEngineBinary} exec -i ${ttyFlag} ${containerId} ${shellCmd}`; } - const { findBestUsernameForDevice, runRemoteCommand } = await import( - '../ssh' - ); - - // Before we started using `findBestUsernameForDevice`, we tried the approach - // of attempting ssh with the 'root' username first and, if that failed, then - // attempting ssh with a regular user (balenaCloud username). The problem with - // that approach was that it would print the following message to the console: - // "root@192.168.1.36: Permission denied (publickey)" - // ... right before having success as a regular user, which looked broken or - // confusing from users' point of view. Capturing stderr to prevent that - // message from being printed is tricky because the messages printed to stderr - // may include the stderr output of remote commands that are of interest to - // the user. Workarounds based on delays (timing) are tricky too because a - // ssh session length may vary from a fraction of a second (non interactive) - // to hours or days. - const username = await findBestUsernameForDevice(opts.address); - - await runRemoteCommand({ - cmd, - hostname: opts.address, - port: Number(opts.port) || 'local', - username, - verbose: opts.verbose, - }); + await runRemoteCommand({ ...opts, cmd, username }); } diff --git a/lib/utils/ssh.ts b/lib/utils/ssh.ts index 0f005050..837086c1 100644 --- a/lib/utils/ssh.ts +++ b/lib/utils/ssh.ts @@ -247,14 +247,15 @@ export async function getLocalDeviceCmdStdout( cmd: string, stdout: 'capture' | 'ignore' | 'inherit' | NodeJS.WritableStream = 'capture', ): Promise { + const port = 'local'; return ( await getRemoteCommandOutput({ cmd, hostname, - port: 'local', + port, stdout, stderr: 'inherit', - username: await findBestUsernameForDevice(hostname), + username: await findBestUsernameForDevice(hostname, port), }) ).stdout; } @@ -267,16 +268,14 @@ export async function getLocalDeviceCmdStdout( * added to the device's 'config.json' file. * @return True if succesful, false on any errors. */ -export const isRootUserGood = _.memoize( - async (hostname: string, port = 'local') => { - try { - await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore }); - } catch (e) { - return false; - } - return true; - }, -); +export const isRootUserGood = _.memoize(async (hostname: string, port) => { + try { + await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore }); + } catch (e) { + return false; + } + return true; +}); /** * Determine whether the given local device (hostname or IP address) should be @@ -291,7 +290,7 @@ export const isRootUserGood = _.memoize( * universally possible. */ export const findBestUsernameForDevice = _.memoize( - async (hostname: string, port = 'local'): Promise => { + async (hostname: string, port): Promise => { let username: string | undefined; if (await isRootUserGood(hostname, port)) { username = 'root'; diff --git a/tests/commands/ssh.spec.ts b/tests/commands/ssh.spec.ts index eb314ef2..56b66326 100644 --- a/tests/commands/ssh.spec.ts +++ b/tests/commands/ssh.spec.ts @@ -33,15 +33,17 @@ describe('balena ssh', function () { let mockedExitCode = 0; async function mockSpawn({ revert = false } = {}) { + const childProcessPath = 'child_process'; if (revert) { - mock.stopAll(); + mock.stop(childProcessPath); mock.reRequire('../../build/utils/ssh'); + mock.reRequire('../../build/utils/device/ssh'); return; } const { EventEmitter } = await import('stream'); - const childProcessMod = await import('child_process'); + const childProcessMod = await import(childProcessPath); const originalSpawn = childProcessMod.spawn; - mock('child_process', { + mock(childProcessPath, { ...childProcessMod, spawn: (program: string, ...args: any[]) => { if (program.includes('ssh')) { @@ -117,7 +119,7 @@ describe('balena ssh', function () { }, ); - it('should fail if device not online (mocked, device UUID)', async () => { + itSS('should fail if device not online (mocked, device UUID)', async () => { const deviceUUID = 'abc1234'; const expectedErrLines = ['Device with UUID abc1234 is offline']; api.expectGetWhoAmI({ optional: true, persist: true }); @@ -132,6 +134,7 @@ describe('balena ssh', function () { it('should produce the expected error message (real ssh, device IP address)', async function () { await mockSpawn({ revert: true }); + api.expectGetWhoAmI({ optional: true, persist: true }); const expectedErrLines = [ 'SSH: Process exited with non-zero status code "255"', ]; @@ -174,7 +177,7 @@ async function startMockSshServer(): Promise<[Server, number]> { console.error(`mock ssh server error:\n${err}`); }); - return new Promise<[Server, number]>((resolve, reject) => { + return await new Promise<[Server, number]>((resolve, reject) => { // TODO: remove 'as any' below. According to @types/node v12.20.42, the // callback type is `() => void`, but our code assumes `(err: Error) => void` const listener = (server.listen as any)(0, '127.0.0.1', (err: Error) => {