ssh: Allow ssh to service with IP address and production balenaOS image

Also remove 'balena ssh' dependency on the device supervisor (that may
be down because of device issues or a supervisor bug) when opening a
ssh shell on a container (#1560).

Resolves: #2458
Resolves: #1560
Change-type: minor
This commit is contained in:
Paulo Castro 2022-02-24 22:37:21 +00:00
parent cd17d79067
commit 915f7e3763
4 changed files with 122 additions and 201 deletions

View File

@ -20,7 +20,6 @@ import Command from '../command';
import * as cf from '../utils/common-flags'; import * as cf from '../utils/common-flags';
import { getBalenaSdk, stripIndent } from '../utils/lazy'; import { getBalenaSdk, stripIndent } from '../utils/lazy';
import { parseAsInteger, validateLocalHostnameOrIp } from '../utils/validation'; import { parseAsInteger, validateLocalHostnameOrIp } from '../utils/validation';
import * as BalenaSdk from 'balena-sdk';
interface FlagsDef { interface FlagsDef {
port?: number; port?: number;
@ -128,8 +127,8 @@ export default class SshCmd extends Command {
if (validateLocalHostnameOrIp(params.fleetOrDevice)) { if (validateLocalHostnameOrIp(params.fleetOrDevice)) {
const { performLocalDeviceSSH } = await import('../utils/device/ssh'); const { performLocalDeviceSSH } = await import('../utils/device/ssh');
return await performLocalDeviceSSH({ return await performLocalDeviceSSH({
address: params.fleetOrDevice, hostname: params.fleetOrDevice,
port: options.port, port: options.port || 'local',
forceTTY: options.tty, forceTTY: options.tty,
verbose: options.verbose, verbose: options.verbose,
service: params.service, service: params.service,
@ -152,12 +151,6 @@ export default class SshCmd extends Command {
params.fleetOrDevice, params.fleetOrDevice,
); );
const device = await sdk.models.device.get(deviceUuid, {
$select: ['id', 'supervisor_version', 'is_online'],
});
const deviceId = device.id;
const supervisorVersion = device.supervisor_version;
const { which } = await import('../utils/which'); const { which } = await import('../utils/which');
const [whichProxytunnel, username, proxyUrl] = await Promise.all([ const [whichProxytunnel, username, proxyUrl] = await Promise.all([
@ -209,19 +202,15 @@ export default class SshCmd extends Command {
// that we know exists and is accessible // that we know exists and is accessible
let containerId: string | undefined; let containerId: string | undefined;
if (params.service != null) { if (params.service != null) {
containerId = await this.getContainerId( const { getContainerIdForService } = await import('../utils/device/ssh');
sdk, containerId = await getContainerIdForService({
deviceUuid, deviceUuid,
params.service, hostname: `ssh.${proxyUrl}`,
{ port: options.port || 'cloud',
port: options.port, proxyCommand,
proxyCommand, service: params.service,
proxyUrl: proxyUrl || '', username: username!,
username: username!, });
},
supervisorVersion,
deviceId,
);
} }
let accessCommand: string; let accessCommand: string;
@ -234,101 +223,10 @@ export default class SshCmd extends Command {
await runRemoteCommand({ await runRemoteCommand({
cmd: accessCommand, cmd: accessCommand,
hostname: `ssh.${proxyUrl}`, hostname: `ssh.${proxyUrl}`,
port: options.port, port: options.port || 'cloud',
proxyCommand, proxyCommand,
username, username,
verbose: options.verbose, verbose: options.verbose,
}); });
} }
async getContainerId(
sdk: BalenaSdk.BalenaSDK,
uuid: string,
serviceName: string,
sshOpts: {
port?: number;
proxyCommand?: string[];
proxyUrl: string;
username: string;
},
version?: string,
id?: number,
): Promise<string> {
const semver = await import('balena-semver');
if (version == null || id == null) {
const device = await sdk.models.device.get(uuid, {
$select: ['id', 'supervisor_version'],
});
version = device.supervisor_version;
id = device.id;
}
let containerId: string | undefined;
if (semver.gte(version, '8.6.0')) {
const apiUrl = await sdk.settings.get('apiUrl');
// TODO: Move this into the SDKs device model
const request = await sdk.request.send({
method: 'POST',
url: '/supervisor/v2/containerId',
baseUrl: apiUrl,
body: {
method: 'GET',
deviceId: id,
},
});
if (request.status !== 200) {
throw new Error(
`There was an error connecting to device ${uuid}, HTTP response code: ${request.status}.`,
);
}
const body = request.body;
if (body.status !== 'success') {
throw new Error(
`There was an error communicating with device ${uuid}.\n\tError: ${body.message}`,
);
}
containerId = body.services[serviceName];
} else {
console.error(stripIndent`
Using slow legacy method to determine container IDs. To speed up
this process, update the device supervisor to v8.6.0 or later.
`);
// We need to execute a balena ps command on the device,
// and parse the output, looking for a specific
// container
const { escapeRegExp } = await import('lodash');
const { deviceContainerEngineBinary } = await import(
'../utils/device/ssh'
);
const { getRemoteCommandOutput } = await import('../utils/ssh');
const containers: string = (
await getRemoteCommandOutput({
cmd: `host ${uuid} "${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`,
hostname: `ssh.${sshOpts.proxyUrl}`,
port: sshOpts.port,
proxyCommand: sshOpts.proxyCommand,
stderr: 'inherit',
username: sshOpts.username,
})
).stdout.toString();
const lines = containers.split('\n');
const regex = new RegExp(`\\/?${escapeRegExp(serviceName)}_\\d+_\\d+`);
for (const container of lines) {
const [cId, name] = container.split(' ');
if (regex.test(name)) {
containerId = cId;
break;
}
}
}
if (containerId == null) {
throw new Error(
`Could not find a service ${serviceName} on device ${uuid}.`,
);
}
return containerId;
}
} }

View File

@ -13,53 +13,88 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. limitations under the License.
*/ */
import type { ContainerInfo } from 'dockerode';
import { ExpectedError } from '../../errors'; import { ExpectedError } from '../../errors';
import { stripIndent } from '../lazy'; import { stripIndent } from '../lazy';
export interface DeviceSSHOpts { import {
address: string; findBestUsernameForDevice,
port?: number; getRemoteCommandOutput,
runRemoteCommand,
SshRemoteCommandOpts,
} from '../ssh';
export interface DeviceSSHOpts extends SshRemoteCommandOpts {
forceTTY?: boolean; forceTTY?: boolean;
verbose: boolean;
service?: string; service?: string;
} }
export const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`; const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`;
/** /**
* List the running containers on the device with dockerode, and return the * List the running containers on the device over ssh, and return the full
* container ID that matches the given service name. * container name that matches the given service name.
*
* Note: In the past, two other approaches were implemented for this function:
*
* - Obtaining container IDs through a supervisor API call:
* '/supervisor/v2/containerId' endpoint, via cloud.
* - Obtaining container IDs using 'dockerode' connected directly to
* balenaEngine on a device, TCP port 2375.
*
* The problem with using the supervisor API is that it means that 'balena ssh'
* becomes dependent on the supervisor being up an running, but sometimes ssh
* is needed to investigate devices issues where the supervisor has got into
* trouble (e.g. supervisor in restart loop). This is the subject of CLI issue
* https://github.com/balena-io/balena-cli/issues/1560 .
*
* The problem with using dockerode to connect directly to port 2375 (balenaEngine)
* is that it only works with development variants of balenaOS. Production variants
* block access to port 2375 for security reasons. 'balena ssh' should support
* production variants as well, especially after balenaOS v2.44.0 that introduced
* support for using the cloud account username for ssh authentication.
*
* Overall, the most reliable approach is to run 'balena-engine ps' over ssh.
* It is OK to depend on balenaEngine because ssh to a container is implemented
* through 'balena-engine exec' anyway, and of course it is OK to depend on ssh
* itself.
*/ */
async function getContainerIdForService( export async function getContainerIdForService(
service: string, opts: SshRemoteCommandOpts & { service: string; deviceUuid?: string },
deviceAddress: string,
): Promise<string> { ): Promise<string> {
const { escapeRegExp, reduce } = await import('lodash'); opts.cmd = `"${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`;
const Docker = await import('dockerode'); if (opts.deviceUuid) {
const docker = new Docker({ // If a device UUID is given, perform ssh via cloud proxy 'host' command
host: deviceAddress, opts.cmd = `host ${opts.deviceUuid} ${opts.cmd}`;
port: 2375,
});
const regex = new RegExp(`(^|\\/)${escapeRegExp(service)}_\\d+_\\d+`);
const nameRegex = /\/?([a-zA-Z0-9_-]+)_\d+_\d+/;
let allContainers: ContainerInfo[];
try {
allContainers = await docker.listContainers();
} catch (_e) {
throw new ExpectedError(stripIndent`
Could not access docker daemon on device ${deviceAddress}.
Please ensure the device is in local mode.`);
} }
const psLines: string[] = (
await getRemoteCommandOutput({ ...opts, stderr: 'inherit' })
).stdout
.toString()
.split('\n')
.filter((l) => l);
const { escapeRegExp } = await import('lodash');
const regex = new RegExp(`(?:^|\\/)${escapeRegExp(opts.service)}_\\d+_\\d+`);
// Old balenaOS container name pattern:
// main_1234567_2345678
// New balenaOS container name patterns:
// main_1234567_2345678_a000b111c222d333e444f555a666b777
// main_1_1_localrelease
const nameRegex = /(?:^|\/)([a-zA-Z0-9_-]+)_\d+_\d+(?:_.+)?$/;
const serviceNames: string[] = []; const serviceNames: string[] = [];
const containers: Array<{ id: string; name: string }> = []; const containerNames: string[] = [];
for (const container of allContainers) { let containerId: string | undefined;
for (const name of container.Names) {
// sample psLine: 'b603c74e951e bar_4587562_2078151_3261c9d4c22f2c53a5267be459c89990'
for (const psLine of psLines) {
const [cId, name] = psLine.split(' ');
if (cId && name) {
if (regex.test(name)) { if (regex.test(name)) {
containers.push({ id: container.Id, name }); containerNames.push(name);
break; containerId = cId;
} }
const match = name.match(nameRegex); const match = name.match(nameRegex);
if (match) { if (match) {
@ -67,23 +102,21 @@ async function getContainerIdForService(
} }
} }
} }
if (containers.length > 1) {
if (containerNames.length > 1) {
const [s, d] = [opts.service, opts.deviceUuid || opts.hostname];
throw new ExpectedError(stripIndent` throw new ExpectedError(stripIndent`
Found more than one container matching service name "${service}": Found more than one container matching service name "${s}" on device "${d}":
${containers.map((container) => container.name).join(', ')} ${containerNames.join(', ')}
Use different service names to avoid ambiguity. Use different service names to avoid ambiguity.
`); `);
} }
const containerId = containers.length ? containers[0].id : '';
if (!containerId) { if (!containerId) {
const [s, d] = [opts.service, opts.deviceUuid || opts.hostname];
throw new ExpectedError( throw new ExpectedError(
`Could not find a service on device with name ${service}. ${ `Could not find a container matching service name "${s}" on device "${d}".${
serviceNames.length > 0 serviceNames.length > 0
? `Available services:\n${reduce( ? `\nAvailable services:\n\t${serviceNames.join('\n\t')}`
serviceNames,
(str, name) => `${str}\t${name}\n`,
'',
)}`
: '' : ''
}`, }`,
); );
@ -94,13 +127,25 @@ async function getContainerIdForService(
export async function performLocalDeviceSSH( export async function performLocalDeviceSSH(
opts: DeviceSSHOpts, opts: DeviceSSHOpts,
): Promise<void> { ): Promise<void> {
// Before we started using `findBestUsernameForDevice`, we tried the approach
// of attempting ssh with the 'root' username first and, if that failed, then
// attempting ssh with a regular user (balenaCloud username). The problem with
// that approach was that it would print the following message to the console:
// "root@192.168.1.36: Permission denied (publickey)"
// ... right before having success as a regular user, which looked broken or
// confusing from users' point of view. Capturing stderr to prevent that
// message from being printed is tricky because the messages printed to stderr
// may include the stderr output of remote commands that are of interest to
// the user.
const username = await findBestUsernameForDevice(opts.hostname, opts.port);
let cmd = ''; let cmd = '';
if (opts.service) { if (opts.service) {
const containerId = await getContainerIdForService( const containerId = await getContainerIdForService({
opts.service, ...opts,
opts.address, service: opts.service,
); username,
});
const shellCmd = `/bin/sh -c "if [ -e /bin/bash ]; then exec /bin/bash; else exec /bin/sh; fi"`; const shellCmd = `/bin/sh -c "if [ -e /bin/bash ]; then exec /bin/bash; else exec /bin/sh; fi"`;
// stdin (fd=0) is not a tty when data is piped in, for example // stdin (fd=0) is not a tty when data is piped in, for example
@ -112,29 +157,5 @@ export async function performLocalDeviceSSH(
cmd = `${deviceContainerEngineBinary} exec -i ${ttyFlag} ${containerId} ${shellCmd}`; cmd = `${deviceContainerEngineBinary} exec -i ${ttyFlag} ${containerId} ${shellCmd}`;
} }
const { findBestUsernameForDevice, runRemoteCommand } = await import( await runRemoteCommand({ ...opts, cmd, username });
'../ssh'
);
// Before we started using `findBestUsernameForDevice`, we tried the approach
// of attempting ssh with the 'root' username first and, if that failed, then
// attempting ssh with a regular user (balenaCloud username). The problem with
// that approach was that it would print the following message to the console:
// "root@192.168.1.36: Permission denied (publickey)"
// ... right before having success as a regular user, which looked broken or
// confusing from users' point of view. Capturing stderr to prevent that
// message from being printed is tricky because the messages printed to stderr
// may include the stderr output of remote commands that are of interest to
// the user. Workarounds based on delays (timing) are tricky too because a
// ssh session length may vary from a fraction of a second (non interactive)
// to hours or days.
const username = await findBestUsernameForDevice(opts.address);
await runRemoteCommand({
cmd,
hostname: opts.address,
port: Number(opts.port) || 'local',
username,
verbose: opts.verbose,
});
} }

View File

@ -247,14 +247,15 @@ export async function getLocalDeviceCmdStdout(
cmd: string, cmd: string,
stdout: 'capture' | 'ignore' | 'inherit' | NodeJS.WritableStream = 'capture', stdout: 'capture' | 'ignore' | 'inherit' | NodeJS.WritableStream = 'capture',
): Promise<Buffer> { ): Promise<Buffer> {
const port = 'local';
return ( return (
await getRemoteCommandOutput({ await getRemoteCommandOutput({
cmd, cmd,
hostname, hostname,
port: 'local', port,
stdout, stdout,
stderr: 'inherit', stderr: 'inherit',
username: await findBestUsernameForDevice(hostname), username: await findBestUsernameForDevice(hostname, port),
}) })
).stdout; ).stdout;
} }
@ -267,16 +268,14 @@ export async function getLocalDeviceCmdStdout(
* added to the device's 'config.json' file. * added to the device's 'config.json' file.
* @return True if succesful, false on any errors. * @return True if succesful, false on any errors.
*/ */
export const isRootUserGood = _.memoize( export const isRootUserGood = _.memoize(async (hostname: string, port) => {
async (hostname: string, port = 'local') => { try {
try { await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore });
await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore }); } catch (e) {
} catch (e) { return false;
return false; }
} return true;
return true; });
},
);
/** /**
* Determine whether the given local device (hostname or IP address) should be * Determine whether the given local device (hostname or IP address) should be
@ -291,7 +290,7 @@ export const isRootUserGood = _.memoize(
* universally possible. * universally possible.
*/ */
export const findBestUsernameForDevice = _.memoize( export const findBestUsernameForDevice = _.memoize(
async (hostname: string, port = 'local'): Promise<string> => { async (hostname: string, port): Promise<string> => {
let username: string | undefined; let username: string | undefined;
if (await isRootUserGood(hostname, port)) { if (await isRootUserGood(hostname, port)) {
username = 'root'; username = 'root';

View File

@ -33,15 +33,17 @@ describe('balena ssh', function () {
let mockedExitCode = 0; let mockedExitCode = 0;
async function mockSpawn({ revert = false } = {}) { async function mockSpawn({ revert = false } = {}) {
const childProcessPath = 'child_process';
if (revert) { if (revert) {
mock.stopAll(); mock.stop(childProcessPath);
mock.reRequire('../../build/utils/ssh'); mock.reRequire('../../build/utils/ssh');
mock.reRequire('../../build/utils/device/ssh');
return; return;
} }
const { EventEmitter } = await import('stream'); const { EventEmitter } = await import('stream');
const childProcessMod = await import('child_process'); const childProcessMod = await import(childProcessPath);
const originalSpawn = childProcessMod.spawn; const originalSpawn = childProcessMod.spawn;
mock('child_process', { mock(childProcessPath, {
...childProcessMod, ...childProcessMod,
spawn: (program: string, ...args: any[]) => { spawn: (program: string, ...args: any[]) => {
if (program.includes('ssh')) { if (program.includes('ssh')) {
@ -117,7 +119,7 @@ describe('balena ssh', function () {
}, },
); );
it('should fail if device not online (mocked, device UUID)', async () => { itSS('should fail if device not online (mocked, device UUID)', async () => {
const deviceUUID = 'abc1234'; const deviceUUID = 'abc1234';
const expectedErrLines = ['Device with UUID abc1234 is offline']; const expectedErrLines = ['Device with UUID abc1234 is offline'];
api.expectGetWhoAmI({ optional: true, persist: true }); api.expectGetWhoAmI({ optional: true, persist: true });
@ -132,6 +134,7 @@ describe('balena ssh', function () {
it('should produce the expected error message (real ssh, device IP address)', async function () { it('should produce the expected error message (real ssh, device IP address)', async function () {
await mockSpawn({ revert: true }); await mockSpawn({ revert: true });
api.expectGetWhoAmI({ optional: true, persist: true });
const expectedErrLines = [ const expectedErrLines = [
'SSH: Process exited with non-zero status code "255"', 'SSH: Process exited with non-zero status code "255"',
]; ];
@ -174,7 +177,7 @@ async function startMockSshServer(): Promise<[Server, number]> {
console.error(`mock ssh server error:\n${err}`); console.error(`mock ssh server error:\n${err}`);
}); });
return new Promise<[Server, number]>((resolve, reject) => { return await new Promise<[Server, number]>((resolve, reject) => {
// TODO: remove 'as any' below. According to @types/node v12.20.42, the // TODO: remove 'as any' below. According to @types/node v12.20.42, the
// callback type is `() => void`, but our code assumes `(err: Error) => void` // callback type is `() => void`, but our code assumes `(err: Error) => void`
const listener = (server.listen as any)(0, '127.0.0.1', (err: Error) => { const listener = (server.listen as any)(0, '127.0.0.1', (err: Error) => {