ssh: Allow ssh to service with IP address and production balenaOS image

Also remove 'balena ssh' dependency on the device supervisor (that may
be down because of device issues or a supervisor bug) when opening a
ssh shell on a container (#1560).

Resolves: #2458
Resolves: #1560
Change-type: minor
This commit is contained in:
Paulo Castro 2022-02-24 22:37:21 +00:00
parent cd17d79067
commit 915f7e3763
4 changed files with 122 additions and 201 deletions

View File

@ -20,7 +20,6 @@ import Command from '../command';
import * as cf from '../utils/common-flags';
import { getBalenaSdk, stripIndent } from '../utils/lazy';
import { parseAsInteger, validateLocalHostnameOrIp } from '../utils/validation';
import * as BalenaSdk from 'balena-sdk';
interface FlagsDef {
port?: number;
@ -128,8 +127,8 @@ export default class SshCmd extends Command {
if (validateLocalHostnameOrIp(params.fleetOrDevice)) {
const { performLocalDeviceSSH } = await import('../utils/device/ssh');
return await performLocalDeviceSSH({
address: params.fleetOrDevice,
port: options.port,
hostname: params.fleetOrDevice,
port: options.port || 'local',
forceTTY: options.tty,
verbose: options.verbose,
service: params.service,
@ -152,12 +151,6 @@ export default class SshCmd extends Command {
params.fleetOrDevice,
);
const device = await sdk.models.device.get(deviceUuid, {
$select: ['id', 'supervisor_version', 'is_online'],
});
const deviceId = device.id;
const supervisorVersion = device.supervisor_version;
const { which } = await import('../utils/which');
const [whichProxytunnel, username, proxyUrl] = await Promise.all([
@ -209,19 +202,15 @@ export default class SshCmd extends Command {
// that we know exists and is accessible
let containerId: string | undefined;
if (params.service != null) {
containerId = await this.getContainerId(
sdk,
const { getContainerIdForService } = await import('../utils/device/ssh');
containerId = await getContainerIdForService({
deviceUuid,
params.service,
{
port: options.port,
proxyCommand,
proxyUrl: proxyUrl || '',
username: username!,
},
supervisorVersion,
deviceId,
);
hostname: `ssh.${proxyUrl}`,
port: options.port || 'cloud',
proxyCommand,
service: params.service,
username: username!,
});
}
let accessCommand: string;
@ -234,101 +223,10 @@ export default class SshCmd extends Command {
await runRemoteCommand({
cmd: accessCommand,
hostname: `ssh.${proxyUrl}`,
port: options.port,
port: options.port || 'cloud',
proxyCommand,
username,
verbose: options.verbose,
});
}
async getContainerId(
sdk: BalenaSdk.BalenaSDK,
uuid: string,
serviceName: string,
sshOpts: {
port?: number;
proxyCommand?: string[];
proxyUrl: string;
username: string;
},
version?: string,
id?: number,
): Promise<string> {
const semver = await import('balena-semver');
if (version == null || id == null) {
const device = await sdk.models.device.get(uuid, {
$select: ['id', 'supervisor_version'],
});
version = device.supervisor_version;
id = device.id;
}
let containerId: string | undefined;
if (semver.gte(version, '8.6.0')) {
const apiUrl = await sdk.settings.get('apiUrl');
// TODO: Move this into the SDKs device model
const request = await sdk.request.send({
method: 'POST',
url: '/supervisor/v2/containerId',
baseUrl: apiUrl,
body: {
method: 'GET',
deviceId: id,
},
});
if (request.status !== 200) {
throw new Error(
`There was an error connecting to device ${uuid}, HTTP response code: ${request.status}.`,
);
}
const body = request.body;
if (body.status !== 'success') {
throw new Error(
`There was an error communicating with device ${uuid}.\n\tError: ${body.message}`,
);
}
containerId = body.services[serviceName];
} else {
console.error(stripIndent`
Using slow legacy method to determine container IDs. To speed up
this process, update the device supervisor to v8.6.0 or later.
`);
// We need to execute a balena ps command on the device,
// and parse the output, looking for a specific
// container
const { escapeRegExp } = await import('lodash');
const { deviceContainerEngineBinary } = await import(
'../utils/device/ssh'
);
const { getRemoteCommandOutput } = await import('../utils/ssh');
const containers: string = (
await getRemoteCommandOutput({
cmd: `host ${uuid} "${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`,
hostname: `ssh.${sshOpts.proxyUrl}`,
port: sshOpts.port,
proxyCommand: sshOpts.proxyCommand,
stderr: 'inherit',
username: sshOpts.username,
})
).stdout.toString();
const lines = containers.split('\n');
const regex = new RegExp(`\\/?${escapeRegExp(serviceName)}_\\d+_\\d+`);
for (const container of lines) {
const [cId, name] = container.split(' ');
if (regex.test(name)) {
containerId = cId;
break;
}
}
}
if (containerId == null) {
throw new Error(
`Could not find a service ${serviceName} on device ${uuid}.`,
);
}
return containerId;
}
}

View File

@ -13,53 +13,88 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import type { ContainerInfo } from 'dockerode';
import { ExpectedError } from '../../errors';
import { stripIndent } from '../lazy';
export interface DeviceSSHOpts {
address: string;
port?: number;
import {
findBestUsernameForDevice,
getRemoteCommandOutput,
runRemoteCommand,
SshRemoteCommandOpts,
} from '../ssh';
export interface DeviceSSHOpts extends SshRemoteCommandOpts {
forceTTY?: boolean;
verbose: boolean;
service?: string;
}
export const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`;
const deviceContainerEngineBinary = `$(if [ -f /usr/bin/balena ]; then echo "balena"; else echo "docker"; fi)`;
/**
* List the running containers on the device with dockerode, and return the
* container ID that matches the given service name.
* List the running containers on the device over ssh, and return the full
* container name that matches the given service name.
*
* Note: In the past, two other approaches were implemented for this function:
*
* - Obtaining container IDs through a supervisor API call:
* '/supervisor/v2/containerId' endpoint, via cloud.
* - Obtaining container IDs using 'dockerode' connected directly to
* balenaEngine on a device, TCP port 2375.
*
* The problem with using the supervisor API is that it means that 'balena ssh'
* becomes dependent on the supervisor being up an running, but sometimes ssh
* is needed to investigate devices issues where the supervisor has got into
* trouble (e.g. supervisor in restart loop). This is the subject of CLI issue
* https://github.com/balena-io/balena-cli/issues/1560 .
*
* The problem with using dockerode to connect directly to port 2375 (balenaEngine)
* is that it only works with development variants of balenaOS. Production variants
* block access to port 2375 for security reasons. 'balena ssh' should support
* production variants as well, especially after balenaOS v2.44.0 that introduced
* support for using the cloud account username for ssh authentication.
*
* Overall, the most reliable approach is to run 'balena-engine ps' over ssh.
* It is OK to depend on balenaEngine because ssh to a container is implemented
* through 'balena-engine exec' anyway, and of course it is OK to depend on ssh
* itself.
*/
async function getContainerIdForService(
service: string,
deviceAddress: string,
export async function getContainerIdForService(
opts: SshRemoteCommandOpts & { service: string; deviceUuid?: string },
): Promise<string> {
const { escapeRegExp, reduce } = await import('lodash');
const Docker = await import('dockerode');
const docker = new Docker({
host: deviceAddress,
port: 2375,
});
const regex = new RegExp(`(^|\\/)${escapeRegExp(service)}_\\d+_\\d+`);
const nameRegex = /\/?([a-zA-Z0-9_-]+)_\d+_\d+/;
let allContainers: ContainerInfo[];
try {
allContainers = await docker.listContainers();
} catch (_e) {
throw new ExpectedError(stripIndent`
Could not access docker daemon on device ${deviceAddress}.
Please ensure the device is in local mode.`);
opts.cmd = `"${deviceContainerEngineBinary}" ps --format "{{.ID}} {{.Names}}"`;
if (opts.deviceUuid) {
// If a device UUID is given, perform ssh via cloud proxy 'host' command
opts.cmd = `host ${opts.deviceUuid} ${opts.cmd}`;
}
const psLines: string[] = (
await getRemoteCommandOutput({ ...opts, stderr: 'inherit' })
).stdout
.toString()
.split('\n')
.filter((l) => l);
const { escapeRegExp } = await import('lodash');
const regex = new RegExp(`(?:^|\\/)${escapeRegExp(opts.service)}_\\d+_\\d+`);
// Old balenaOS container name pattern:
// main_1234567_2345678
// New balenaOS container name patterns:
// main_1234567_2345678_a000b111c222d333e444f555a666b777
// main_1_1_localrelease
const nameRegex = /(?:^|\/)([a-zA-Z0-9_-]+)_\d+_\d+(?:_.+)?$/;
const serviceNames: string[] = [];
const containers: Array<{ id: string; name: string }> = [];
for (const container of allContainers) {
for (const name of container.Names) {
const containerNames: string[] = [];
let containerId: string | undefined;
// sample psLine: 'b603c74e951e bar_4587562_2078151_3261c9d4c22f2c53a5267be459c89990'
for (const psLine of psLines) {
const [cId, name] = psLine.split(' ');
if (cId && name) {
if (regex.test(name)) {
containers.push({ id: container.Id, name });
break;
containerNames.push(name);
containerId = cId;
}
const match = name.match(nameRegex);
if (match) {
@ -67,23 +102,21 @@ async function getContainerIdForService(
}
}
}
if (containers.length > 1) {
if (containerNames.length > 1) {
const [s, d] = [opts.service, opts.deviceUuid || opts.hostname];
throw new ExpectedError(stripIndent`
Found more than one container matching service name "${service}":
${containers.map((container) => container.name).join(', ')}
Found more than one container matching service name "${s}" on device "${d}":
${containerNames.join(', ')}
Use different service names to avoid ambiguity.
`);
}
const containerId = containers.length ? containers[0].id : '';
if (!containerId) {
const [s, d] = [opts.service, opts.deviceUuid || opts.hostname];
throw new ExpectedError(
`Could not find a service on device with name ${service}. ${
`Could not find a container matching service name "${s}" on device "${d}".${
serviceNames.length > 0
? `Available services:\n${reduce(
serviceNames,
(str, name) => `${str}\t${name}\n`,
'',
)}`
? `\nAvailable services:\n\t${serviceNames.join('\n\t')}`
: ''
}`,
);
@ -94,13 +127,25 @@ async function getContainerIdForService(
export async function performLocalDeviceSSH(
opts: DeviceSSHOpts,
): Promise<void> {
// Before we started using `findBestUsernameForDevice`, we tried the approach
// of attempting ssh with the 'root' username first and, if that failed, then
// attempting ssh with a regular user (balenaCloud username). The problem with
// that approach was that it would print the following message to the console:
// "root@192.168.1.36: Permission denied (publickey)"
// ... right before having success as a regular user, which looked broken or
// confusing from users' point of view. Capturing stderr to prevent that
// message from being printed is tricky because the messages printed to stderr
// may include the stderr output of remote commands that are of interest to
// the user.
const username = await findBestUsernameForDevice(opts.hostname, opts.port);
let cmd = '';
if (opts.service) {
const containerId = await getContainerIdForService(
opts.service,
opts.address,
);
const containerId = await getContainerIdForService({
...opts,
service: opts.service,
username,
});
const shellCmd = `/bin/sh -c "if [ -e /bin/bash ]; then exec /bin/bash; else exec /bin/sh; fi"`;
// stdin (fd=0) is not a tty when data is piped in, for example
@ -112,29 +157,5 @@ export async function performLocalDeviceSSH(
cmd = `${deviceContainerEngineBinary} exec -i ${ttyFlag} ${containerId} ${shellCmd}`;
}
const { findBestUsernameForDevice, runRemoteCommand } = await import(
'../ssh'
);
// Before we started using `findBestUsernameForDevice`, we tried the approach
// of attempting ssh with the 'root' username first and, if that failed, then
// attempting ssh with a regular user (balenaCloud username). The problem with
// that approach was that it would print the following message to the console:
// "root@192.168.1.36: Permission denied (publickey)"
// ... right before having success as a regular user, which looked broken or
// confusing from users' point of view. Capturing stderr to prevent that
// message from being printed is tricky because the messages printed to stderr
// may include the stderr output of remote commands that are of interest to
// the user. Workarounds based on delays (timing) are tricky too because a
// ssh session length may vary from a fraction of a second (non interactive)
// to hours or days.
const username = await findBestUsernameForDevice(opts.address);
await runRemoteCommand({
cmd,
hostname: opts.address,
port: Number(opts.port) || 'local',
username,
verbose: opts.verbose,
});
await runRemoteCommand({ ...opts, cmd, username });
}

View File

@ -247,14 +247,15 @@ export async function getLocalDeviceCmdStdout(
cmd: string,
stdout: 'capture' | 'ignore' | 'inherit' | NodeJS.WritableStream = 'capture',
): Promise<Buffer> {
const port = 'local';
return (
await getRemoteCommandOutput({
cmd,
hostname,
port: 'local',
port,
stdout,
stderr: 'inherit',
username: await findBestUsernameForDevice(hostname),
username: await findBestUsernameForDevice(hostname, port),
})
).stdout;
}
@ -267,16 +268,14 @@ export async function getLocalDeviceCmdStdout(
* added to the device's 'config.json' file.
* @return True if succesful, false on any errors.
*/
export const isRootUserGood = _.memoize(
async (hostname: string, port = 'local') => {
try {
await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore });
} catch (e) {
return false;
}
return true;
},
);
export const isRootUserGood = _.memoize(async (hostname: string, port) => {
try {
await runRemoteCommand({ cmd: 'exit 0', hostname, port, ...stdioIgnore });
} catch (e) {
return false;
}
return true;
});
/**
* Determine whether the given local device (hostname or IP address) should be
@ -291,7 +290,7 @@ export const isRootUserGood = _.memoize(
* universally possible.
*/
export const findBestUsernameForDevice = _.memoize(
async (hostname: string, port = 'local'): Promise<string> => {
async (hostname: string, port): Promise<string> => {
let username: string | undefined;
if (await isRootUserGood(hostname, port)) {
username = 'root';

View File

@ -33,15 +33,17 @@ describe('balena ssh', function () {
let mockedExitCode = 0;
async function mockSpawn({ revert = false } = {}) {
const childProcessPath = 'child_process';
if (revert) {
mock.stopAll();
mock.stop(childProcessPath);
mock.reRequire('../../build/utils/ssh');
mock.reRequire('../../build/utils/device/ssh');
return;
}
const { EventEmitter } = await import('stream');
const childProcessMod = await import('child_process');
const childProcessMod = await import(childProcessPath);
const originalSpawn = childProcessMod.spawn;
mock('child_process', {
mock(childProcessPath, {
...childProcessMod,
spawn: (program: string, ...args: any[]) => {
if (program.includes('ssh')) {
@ -117,7 +119,7 @@ describe('balena ssh', function () {
},
);
it('should fail if device not online (mocked, device UUID)', async () => {
itSS('should fail if device not online (mocked, device UUID)', async () => {
const deviceUUID = 'abc1234';
const expectedErrLines = ['Device with UUID abc1234 is offline'];
api.expectGetWhoAmI({ optional: true, persist: true });
@ -132,6 +134,7 @@ describe('balena ssh', function () {
it('should produce the expected error message (real ssh, device IP address)', async function () {
await mockSpawn({ revert: true });
api.expectGetWhoAmI({ optional: true, persist: true });
const expectedErrLines = [
'SSH: Process exited with non-zero status code "255"',
];
@ -174,7 +177,7 @@ async function startMockSshServer(): Promise<[Server, number]> {
console.error(`mock ssh server error:\n${err}`);
});
return new Promise<[Server, number]>((resolve, reject) => {
return await new Promise<[Server, number]>((resolve, reject) => {
// TODO: remove 'as any' below. According to @types/node v12.20.42, the
// callback type is `() => void`, but our code assumes `(err: Error) => void`
const listener = (server.listen as any)(0, '127.0.0.1', (err: Error) => {