genode/repos/ports/src/noux/syscall.cc

800 lines
20 KiB
C++
Raw Normal View History

/*
* \brief Noux syscall dispatcher
* \author Norman Feske
* \date 2011-02-14
*/
/*
* Copyright (C) 2011-2017 Genode Labs GmbH
*
* This file is part of the Genode OS framework, which is distributed
* under the terms of the GNU Affero General Public License version 3.
*/
/* Noux includes */
#include <child.h>
#include <child_env.h>
#include <vfs_io_channel.h>
#include <pipe_io_channel.h>
namespace Noux {
/**
* This function is used to generate inode values from the given
* path using the FNV-1a algorithm.
*/
inline uint32_t hash_path(const char *path, size_t len)
{
const unsigned char * p = reinterpret_cast<const unsigned char*>(path);
uint32_t hash = 2166136261U;
for (size_t i = 0; i < len; i++) {
hash ^= p[i];
hash *= 16777619;
}
return hash;
}
};
bool Noux::Child::syscall(Noux::Session::Syscall sc)
{
if (_verbose.syscalls())
log("PID ", pid(), " -> SYSCALL ", Noux::Session::syscall_name(sc));
bool result = false;
try {
switch (sc) {
case SYSCALL_WRITE:
{
size_t const count_in = _sysio.write_in.count;
for (size_t offset = 0; offset != count_in; ) {
Shared_pointer<Io_channel> io = _lookup_channel(_sysio.write_in.fd);
if (!io->nonblocking())
_block_for_io_channel(io, false, true, false);
if (io->check_unblock(false, true, false)) {
/*
* 'io->write' is expected to update
* '_sysio.write_out.count' and 'offset'
*/
result = io->write(_sysio, offset);
if (result == false)
break;
} else {
if (result == false) {
/* nothing was written yet */
_sysio.error.write = Vfs::File_io_service::WRITE_ERR_INTERRUPT;
}
break;
}
}
break;
}
case SYSCALL_READ:
{
Shared_pointer<Io_channel> io = _lookup_channel(_sysio.read_in.fd);
if (!io->nonblocking())
_block_for_io_channel(io, true, false, false);
if (io->check_unblock(true, false, false))
result = io->read(_sysio);
else
_sysio.error.read = Vfs::File_io_service::READ_ERR_INTERRUPT;
break;
}
case SYSCALL_FTRUNCATE:
{
Shared_pointer<Io_channel> io = _lookup_channel(_sysio.ftruncate_in.fd);
_block_for_io_channel(io, false, true, false);
if (io->check_unblock(false, true, false))
result = io->ftruncate(_sysio);
else
_sysio.error.ftruncate = Vfs::File_io_service::FTRUNCATE_ERR_INTERRUPT;
break;
}
case SYSCALL_STAT:
case SYSCALL_LSTAT: /* XXX implement difference between 'lstat' and 'stat' */
{
/**
* We calculate the inode by hashing the path because there is
* no inode registry in noux.
*/
size_t path_len = strlen(_sysio.stat_in.path);
uint32_t path_hash = hash_path(_sysio.stat_in.path, path_len);
Vfs::Directory_service::Stat stat_out;
_sysio.error.stat = _root_dir.stat(_sysio.stat_in.path, stat_out);
result = (_sysio.error.stat == Vfs::Directory_service::STAT_OK);
/*
* Instead of using the uid/gid given by the actual file system
* we use the ones specificed in the config.
*/
if (result) {
stat_out.uid = _user_info.uid();
stat_out.gid = _user_info.gid();
stat_out.inode = path_hash;
}
_sysio.stat_out.st = stat_out;
break;
}
case SYSCALL_FSTAT:
{
Shared_pointer<Io_channel> io = _lookup_channel(_sysio.fstat_in.fd);
result = io->fstat(_sysio);
if (result) {
Sysio::Path path;
/**
* Only actual fd's are valid fstat targets.
*/
if (io->path(path, sizeof (path))) {
size_t path_len = strlen(path);
uint32_t path_hash = hash_path(path, path_len);
_sysio.stat_out.st.inode = path_hash;
}
}
break;
}
case SYSCALL_FCNTL:
if (_sysio.fcntl_in.cmd == Sysio::FCNTL_CMD_SET_FD_FLAGS) {
/* we assume that there is only the close-on-execve flag */
_lookup_channel(_sysio.fcntl_in.fd)->close_on_execve =
!!_sysio.fcntl_in.long_arg;
result = true;
break;
}
result = _lookup_channel(_sysio.fcntl_in.fd)->fcntl(_sysio);
break;
case SYSCALL_OPEN:
{
Vfs::Vfs_handle *vfs_handle = 0;
_sysio.error.open = _root_dir.open(_sysio.open_in.path,
_sysio.open_in.mode,
&vfs_handle, _heap);
if (!vfs_handle)
break;
char const *leaf_path = _root_dir.leaf_path(_sysio.open_in.path);
/*
* File descriptors of opened directories are handled by
* '_root_dir'. In this case, we use the absolute path as leaf
* path because path operations always refer to the global
* root.
*/
if (&vfs_handle->ds() == &_root_dir)
leaf_path = _sysio.open_in.path;
Shared_pointer<Io_channel>
channel(new (_heap) Vfs_io_channel(_sysio.open_in.path,
leaf_path, &_root_dir,
vfs_handle, _env.ep()),
_heap);
_sysio.open_out.fd = add_io_channel(channel);
result = true;
break;
}
case SYSCALL_CLOSE:
{
remove_io_channel(_sysio.close_in.fd);
result = true;
break;
}
case SYSCALL_IOCTL:
result = _lookup_channel(_sysio.ioctl_in.fd)->ioctl(_sysio);
break;
case SYSCALL_LSEEK:
result = _lookup_channel(_sysio.lseek_in.fd)->lseek(_sysio);
break;
case SYSCALL_DIRENT:
result = _lookup_channel(_sysio.dirent_in.fd)->dirent(_sysio);
break;
case SYSCALL_EXECVE:
{
/*
* We have to check the dataspace twice because the binary
* could be a script that uses an interpreter which maybe
* does not exist.
*/
Dataspace_capability binary_ds =
_root_dir.dataspace(_sysio.execve_in.filename);
if (!binary_ds.valid()) {
_sysio.error.execve = Sysio::EXECVE_NONEXISTENT;
break;
}
Child_env<sizeof(_sysio.execve_in.args)>
child_env(_env.rm(),
_sysio.execve_in.filename, binary_ds,
_sysio.execve_in.args, _sysio.execve_in.env);
_root_dir.release(_sysio.execve_in.filename, binary_ds);
binary_ds = _root_dir.dataspace(child_env.binary_name());
if (!binary_ds.valid()) {
_sysio.error.execve = Sysio::EXECVE_NONEXISTENT;
break;
}
_root_dir.release(child_env.binary_name(), binary_ds);
try {
_parent_execve.execve_child(*this,
child_env.binary_name(),
child_env.args(),
child_env.env());
/*
* 'return' instead of 'break' to skip possible signal delivery,
* which might cause the old child process to exit itself
*/
return true;
}
catch (Child::Binary_does_not_exist) {
_sysio.error.execve = Sysio::EXECVE_NONEXISTENT; }
catch (Child::Insufficient_memory) {
_sysio.error.execve = Sysio::EXECVE_NOMEM; }
break;
}
case SYSCALL_SELECT:
{
size_t in_fds_total = _sysio.select_in.fds.total_fds();
Sysio::Select_fds in_fds;
for (Genode::size_t i = 0; i < in_fds_total; i++)
in_fds.array[i] = _sysio.select_in.fds.array[i];
in_fds.num_rd = _sysio.select_in.fds.num_rd;
in_fds.num_wr = _sysio.select_in.fds.num_wr;
in_fds.num_ex = _sysio.select_in.fds.num_ex;
int _rd_array[in_fds_total];
int _wr_array[in_fds_total];
long timeout_sec = _sysio.select_in.timeout.sec;
long timeout_usec = _sysio.select_in.timeout.usec;
bool timeout_reached = false;
/* reset the blocker lock to the 'locked' state */
_blocker.unlock();
_blocker.lock();
/*
* Register ourself at all watched I/O channels
*
* We instantiate as many notifiers as we have file
* descriptors to observe. Each notifier is associated
* with the child's blocking semaphore. When any of the
* notifiers get woken up, the semaphore gets unblocked.
*
* XXX However, the blocker may get unblocked for other
* conditions such as the destruction of the child.
* ...to be done.
*/
Wake_up_notifier notifiers[in_fds_total];
for (Genode::size_t i = 0; i < in_fds_total; i++) {
int fd = in_fds.array[i];
if (!fd_in_use(fd)) continue;
Shared_pointer<Io_channel> io = io_channel_by_fd(fd);
notifiers[i].lock = &_blocker;
io->register_wake_up_notifier(&notifiers[i]);
}
/**
* Register ourself at the Io_receptor_registry
*
* Each entry in the registry will be unblocked if an external
* event has happend, e.g. network I/O.
*/
Io_receptor receptor(&_blocker);
io_receptor_registry()->register_receptor(&receptor);
/*
* Block for one action of the watched file descriptors
*/
for (;;) {
/*
* Check I/O channels of specified file descriptors for
* unblock condition. Return if one I/O channel satisfies
* the condition.
*/
size_t unblock_rd = 0;
size_t unblock_wr = 0;
size_t unblock_ex = 0;
/* process read fds */
for (Genode::size_t i = 0; i < in_fds_total; i++) {
int fd = in_fds.array[i];
if (!fd_in_use(fd)) continue;
Shared_pointer<Io_channel> io = io_channel_by_fd(fd);
if (in_fds.watch_for_rd(i))
if (io->check_unblock(true, false, false)) {
_rd_array[unblock_rd++] = fd;
}
if (in_fds.watch_for_wr(i))
if (io->check_unblock(false, true, false)) {
_wr_array[unblock_wr++] = fd;
}
if (in_fds.watch_for_ex(i))
if (io->check_unblock(false, false, true)) {
unblock_ex++;
}
}
if (unblock_rd || unblock_wr || unblock_ex) {
/**
* Merge the fd arrays in one output array
*/
for (size_t i = 0; i < unblock_rd; i++) {
_sysio.select_out.fds.array[i] = _rd_array[i];
}
_sysio.select_out.fds.num_rd = unblock_rd;
/* XXX could use a pointer to select_out.fds.array instead */
for (size_t j = unblock_rd, i = 0; i < unblock_wr; i++, j++) {
_sysio.select_out.fds.array[j] = _wr_array[i];
}
_sysio.select_out.fds.num_wr = unblock_wr;
/* exception fds are currently not considered */
_sysio.select_out.fds.num_ex = unblock_ex;
result = true;
break;
}
/*
* Return if timeout is zero or timeout exceeded
*/
if (_sysio.select_in.timeout.zero() || timeout_reached) {
/*
if (timeout_reached) log("timeout_reached");
else log("timeout.zero()");
*/
_sysio.select_out.fds.num_rd = 0;
_sysio.select_out.fds.num_wr = 0;
_sysio.select_out.fds.num_ex = 0;
result = true;
break;
}
/*
* Return if signals are pending
*/
if (!_pending_signals.empty()) {
_sysio.error.select = Sysio::SELECT_ERR_INTERRUPT;
break;
}
/*
* Block at barrier except when reaching the timeout
*/
if (!_sysio.select_in.timeout.infinite()) {
unsigned long to_msec = (timeout_sec * 1000) + (timeout_usec / 1000);
Timeout_state ts;
Timeout_alarm ta(ts, _blocker, _timeout_scheduler, to_msec);
/* block until timeout is reached or we were unblocked */
_blocker.lock();
if (ts.timed_out) {
timeout_reached = 1;
}
else {
/*
* We woke up before reaching the timeout,
* so we discard the alarm
*/
ta.discard();
}
}
else {
/* let's block infinitely */
_blocker.lock();
}
}
/*
* Unregister barrier at watched I/O channels
*/
for (Genode::size_t i = 0; i < in_fds_total; i++) {
int fd = in_fds.array[i];
if (!fd_in_use(fd)) continue;
Shared_pointer<Io_channel> io = io_channel_by_fd(fd);
io->unregister_wake_up_notifier(&notifiers[i]);
}
/*
* Unregister receptor
*/
io_receptor_registry()->unregister_receptor(&receptor);
break;
}
case SYSCALL_FORK:
{
Genode::addr_t ip = _sysio.fork_in.ip;
Genode::addr_t sp = _sysio.fork_in.sp;
Genode::addr_t parent_cap_addr = _sysio.fork_in.parent_cap_addr;
int const new_pid = _pid_allocator.alloc();
Child * child = nullptr;
try {
/*
* XXX To ease debugging, it would be useful to generate a
* unique name that includes the PID instead of just
* reusing the name of the parent.
*/
child = new (_heap) Child(_child_policy.name(),
_verbose,
_user_info,
this,
_kill_broadcaster,
_timeout_scheduler,
*this,
_pid_allocator,
new_pid,
_env,
_root_dir,
_args,
_sysio_env.env(),
_heap,
Capability quota accounting and trading This patch mirrors the accounting and trading scheme that Genode employs for physical memory to the accounting of capability allocations. Capability quotas must now be explicitly assigned to subsystems by specifying a 'caps=<amount>' attribute to init's start nodes. Analogously to RAM quotas, cap quotas can be traded between clients and servers as part of the session protocol. The capability budget of each component is maintained by the component's corresponding PD session at core. At the current stage, the accounting is applied to RPC capabilities, signal-context capabilities, and dataspace capabilities. Capabilities that are dynamically allocated via core's CPU and TRACE service are not yet covered. Also, the capabilities allocated by resource multiplexers outside of core (like nitpicker) must be accounted by the respective servers, which is not covered yet. If a component runs out of capabilities, core's PD service prints a warning to the log. To observe the consumption of capabilities per component in detail, the PD service is equipped with a diagnostic mode, which can be enabled via the 'diag' attribute in the target node of init's routing rules. E.g., the following route enables the diagnostic mode for the PD session of the "timer" component: <default-route> <service name="PD" unscoped_label="timer"> <parent diag="yes"/> </service> ... </default-route> For subsystems based on a sub-init instance, init can be configured to report the capability-quota information of its subsystems by adding the attribute 'child_caps="yes"' to init's '<report>' config node. Init's own capability quota can be reported by adding the attribute 'init_caps="yes"'. Fixes #2398
2017-05-08 19:35:43 +00:00
_ref_pd, _ref_pd_cap,
_ref_ram, _ref_ram_cap,
_parent_services,
true,
_destruct_queue);
} catch (Child::Insufficient_memory) {
_sysio.error.fork = Sysio::FORK_NOMEM;
break;
}
Family_member::insert(child);
_assign_io_channels_to(child);
/* copy our address space into the new child */
try {
_pd.replay(child->ram(), child->pd(), _env.rm(), _heap,
child->ds_registry(), _ep);
/* start executing the main thread of the new process */
child->start_forked_main_thread(ip, sp, parent_cap_addr);
/* activate child entrypoint, thereby starting the new process */
child->start();
_sysio.fork_out.pid = new_pid;
result = true;
}
catch (Region_map::Region_conflict) {
error("region conflict while replaying the address space"); }
break;
}
case SYSCALL_GETPID:
{
_sysio.getpid_out.pid = pid();
return true;
}
case SYSCALL_WAIT4:
{
Family_member *exited = _sysio.wait4_in.nohang ? poll4() : wait4();
if (exited) {
_sysio.wait4_out.pid = exited->pid();
_sysio.wait4_out.status = exited->exit_status();
Family_member::remove(exited);
static_cast<Child *>(exited)->submit_exit_signal();
} else {
if (_sysio.wait4_in.nohang) {
_sysio.wait4_out.pid = 0;
_sysio.wait4_out.status = 0;
} else {
_sysio.error.wait4 = Sysio::WAIT4_ERR_INTERRUPT;
break;
}
}
result = true;
break;
}
case SYSCALL_PIPE:
{
Shared_pointer<Pipe> pipe (new (_heap) Pipe, _heap);
Shared_pointer<Io_channel> pipe_sink (new (_heap) Pipe_sink_io_channel (pipe, _env.ep()), _heap);
Shared_pointer<Io_channel> pipe_source(new (_heap) Pipe_source_io_channel(pipe, _env.ep()), _heap);
_sysio.pipe_out.fd[0] = add_io_channel(pipe_source);
_sysio.pipe_out.fd[1] = add_io_channel(pipe_sink);
result = true;
break;
}
case SYSCALL_DUP2:
{
int fd = add_io_channel(io_channel_by_fd(_sysio.dup2_in.fd),
_sysio.dup2_in.to_fd);
_sysio.dup2_out.fd = fd;
result = true;
break;
}
case SYSCALL_UNLINK:
_sysio.error.unlink = _root_dir.unlink(_sysio.unlink_in.path);
result = (_sysio.error.unlink == Vfs::Directory_service::UNLINK_OK);
break;
case SYSCALL_READLINK:
{
Vfs::file_size out_count = 0;
_sysio.error.readlink =
_root_dir.readlink(_sysio.readlink_in.path,
_sysio.readlink_out.chunk,
min(_sysio.readlink_in.bufsiz,
sizeof(_sysio.readlink_out.chunk)),
out_count);
_sysio.readlink_out.count = out_count;
result = (_sysio.error.readlink == Vfs::Directory_service::READLINK_OK);
break;
}
case SYSCALL_RENAME:
_sysio.error.rename = _root_dir.rename(_sysio.rename_in.from_path,
_sysio.rename_in.to_path);
result = (_sysio.error.rename == Vfs::Directory_service::RENAME_OK);
break;
case SYSCALL_MKDIR:
_sysio.error.mkdir = _root_dir.mkdir(_sysio.mkdir_in.path, 0);
result = (_sysio.error.mkdir == Vfs::Directory_service::MKDIR_OK);
break;
case SYSCALL_SYMLINK:
_sysio.error.symlink = _root_dir.symlink(_sysio.symlink_in.oldpath,
_sysio.symlink_in.newpath);
result = (_sysio.error.symlink == Vfs::Directory_service::SYMLINK_OK);
break;
case SYSCALL_USERINFO:
{
if (_sysio.userinfo_in.request == Sysio::USERINFO_GET_UID
|| _sysio.userinfo_in.request == Sysio::USERINFO_GET_GID) {
_sysio.userinfo_out.uid = _user_info.uid();
_sysio.userinfo_out.gid = _user_info.gid();
result = true;
break;
}
/*
* Since NOUX supports exactly one user, return false if we
* got a unknown uid.
*/
if (_sysio.userinfo_in.uid != _user_info.uid())
break;
Genode::memcpy(_sysio.userinfo_out.name,
_user_info.name().string(),
sizeof(User_info::Name));
Genode::memcpy(_sysio.userinfo_out.shell,
_user_info.shell().string(),
sizeof(User_info::Shell));
Genode::memcpy(_sysio.userinfo_out.home,
_user_info.home().string(),
sizeof(User_info::Home));
_sysio.userinfo_out.uid = _user_info.uid();
_sysio.userinfo_out.gid = _user_info.gid();
result = true;
break;
}
case SYSCALL_GETTIMEOFDAY:
{
/**
* Since the timeout_scheduler thread is started after noux it
* basicly returns the eleapsed time since noux was started. We
* abuse this timer to provide a more useful implemenation of
* gettimeofday() to make certain programs (e.g. ping(1)) happy.
* Note: this is just a short-term solution because Genode currently
* lacks a proper time interface (there is a RTC driver however, but
* there is no interface for it).
*/
unsigned long time = _timeout_scheduler.curr_time();
_sysio.gettimeofday_out.sec = (time / 1000);
_sysio.gettimeofday_out.usec = (time % 1000) * 1000;
result = true;
break;
}
case SYSCALL_CLOCK_GETTIME:
{
/**
* It's the same procedure as in SYSCALL_GETTIMEOFDAY.
*/
unsigned long time = _timeout_scheduler.curr_time();
switch (_sysio.clock_gettime_in.clock_id) {
/* CLOCK_SECOND is used by time(3) in the libc. */
case Sysio::CLOCK_ID_SECOND:
{
_sysio.clock_gettime_out.sec = (time / 1000);
_sysio.clock_gettime_out.nsec = 0;
result = true;
break;
}
default:
{
_sysio.clock_gettime_out.sec = 0;
_sysio.clock_gettime_out.nsec = 0;
_sysio.error.clock = Sysio::CLOCK_ERR_INVALID;
break;
}
}
break;
}
case SYSCALL_UTIMES:
{
/**
* This systemcall is currently not implemented because we lack
* the needed mechanisms in most file-systems.
*
* But we return true anyway to keep certain programs, e.g. make
* happy.
*/
result = true;
break;
}
case SYSCALL_SYNC:
{
_root_dir.sync("/");
result = true;
break;
}
case SYSCALL_KILL:
{
if (_kill_broadcaster.kill(_sysio.kill_in.pid,
_sysio.kill_in.sig))
result = true;
else
_sysio.error.kill = Sysio::KILL_ERR_SRCH;
break;
}
case SYSCALL_GETDTABLESIZE:
{
_sysio.getdtablesize_out.n =
Noux::File_descriptor_registry::MAX_FILE_DESCRIPTORS;
result = true;
break;
}
case SYSCALL_SOCKET:
case SYSCALL_GETSOCKOPT:
case SYSCALL_SETSOCKOPT:
case SYSCALL_ACCEPT:
case SYSCALL_BIND:
case SYSCALL_LISTEN:
case SYSCALL_SEND:
case SYSCALL_SENDTO:
case SYSCALL_RECV:
case SYSCALL_RECVFROM:
case SYSCALL_GETPEERNAME:
case SYSCALL_SHUTDOWN:
case SYSCALL_CONNECT:
result = _syscall_net(sc);
break;
case SYSCALL_INVALID: break;
}
}
catch (Invalid_fd) {
_sysio.error.general = Vfs::Directory_service::ERR_FD_INVALID;
error("invalid file descriptor"); }
catch (...) { error("unexpected exception"); }
/* handle signals which might have occured */
while (!_pending_signals.empty() &&
(_sysio.pending_signals.avail_capacity() > 0)) {
_sysio.pending_signals.add(_pending_signals.get());
}
return result;
}