From 0bef0669497c7afa9d6e410f7ea2cde5f0a1fe2f Mon Sep 17 00:00:00 2001 From: Andrew Bettison Date: Fri, 7 Mar 2014 16:55:38 +1030 Subject: [PATCH] Store Rhizome external blob files in "blob" subdirectory This will make issue #50 a bit easier to tackle --- conf_schema.h | 2 +- os.c | 4 ++-- os.h | 8 +++++-- rhizome.h | 12 +++++------ rhizome_database.c | 50 +++++++++++++++++++++++++------------------ rhizome_store.c | 10 ++++----- tests/rhizomeops | 6 ++++-- tests/rhizomeprotocol | 18 ++++++++++------ 8 files changed, 63 insertions(+), 47 deletions(-) diff --git a/conf_schema.h b/conf_schema.h index 600bbb45..0d6d9244 100644 --- a/conf_schema.h +++ b/conf_schema.h @@ -427,7 +427,7 @@ ATOM(bool_t, enable, 1, boolean,, "If true, server opens ATOM(bool_t, fetch, 1, boolean,, "If false, no new bundles will be fetched from peers") ATOM(bool_t, clean_on_open, 0, boolean,, "If true, Rhizome database is cleaned at start of every command") ATOM(bool_t, clean_on_start, 1, boolean,, "If true, Rhizome database is cleaned at start of daemon") -STRING(256, datastore_path, "", absolute_path,, "Path of rhizome storage directory, absolute or relative to instance directory") +STRING(256, datastore_path, "", str_nonempty,, "Path of rhizome storage directory, absolute or relative to instance directory") ATOM(uint64_t, database_size, 1000000, uint64_scaled,, "Size of database in bytes") ATOM(uint32_t, max_blob_size, 128 * 1024, uint32_scaled,, "Store payloads larger than this in files not SQLite blobs") diff --git a/os.c b/os.c index ce7433bb..e5a5de3c 100644 --- a/os.c +++ b/os.c @@ -36,14 +36,14 @@ int mkdirs(const char *path, mode_t mode) return mkdirsn(path, strlen(path), mode); } -int emkdirs(const char *path, mode_t mode) +int _emkdirs(struct __sourceloc __whence, const char *path, mode_t mode) { if (mkdirs(path, mode) == -1) return WHYF_perror("mkdirs(%s,%o)", alloca_str_toprint(path), mode); return 0; } -int emkdirsn(const char *path, size_t len, mode_t mode) +int _emkdirsn(struct __sourceloc __whence, const char *path, size_t len, mode_t mode) { if (mkdirsn(path, len, mode) == -1) return WHYF_perror("mkdirsn(%s,%lu,%o)", alloca_toprint(-1, path, len), (unsigned long)len, mode); diff --git a/os.h b/os.h index 14f15f69..0ba8037f 100644 --- a/os.h +++ b/os.h @@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #include #include #include +#include "log.h" #ifndef __SERVAL_DNA__OS_INLINE # if __GNUC__ && !__GNUC_STDC_INLINE__ @@ -103,9 +104,12 @@ __SERVAL_DNA__OS_INLINE off64_t lseek64(int fd, off64_t offset, int whence) { /* The "e" variants log the error before returning -1. */ int mkdirs(const char *path, mode_t mode); -int emkdirs(const char *path, mode_t mode); int mkdirsn(const char *path, size_t len, mode_t mode); -int emkdirsn(const char *path, size_t len, mode_t mode); +int _emkdirs(struct __sourceloc, const char *path, mode_t mode); +int _emkdirsn(struct __sourceloc, const char *path, size_t len, mode_t mode); + +#define emkdirs(path, mode) _emkdirs(__WHENCE__, (path), (mode)) +#define emkdirsn(path, len, mode) _emkdirsn(__WHENCE__, (path), (len), (mode)) void srandomdev(); int urandombytes(unsigned char *buf, size_t len); diff --git a/rhizome.h b/rhizome.h index 41b8eb53..12f771c3 100644 --- a/rhizome.h +++ b/rhizome.h @@ -398,14 +398,14 @@ int rhizome_fetch_delay_ms(); int rhizome_set_datastore_path(const char *path); const char *rhizome_datastore_path(); -int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...); -int create_rhizome_datastore_dir(); +int form_rhizome_datastore_path(struct __sourceloc, char * buf, size_t bufsiz, const char *fmt, ...); /* Handy statement for forming the path of a rhizome store file in a char buffer whose declaration * is in scope (so that sizeof(buf) will work). Evaluates to true if the pathname fitted into * the provided buffer, false (0) otherwise (after logging an error). */ -#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__)) -#define FORM_RHIZOME_IMPORT_PATH(buf,fmt,...) (form_rhizome_import_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__)) +#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path(__WHENCE__, (buf), sizeof(buf), (fmt), ##__VA_ARGS__)) + +#define RHIZOME_BLOB_SUBDIR "blob" extern sqlite3 *rhizome_db; serval_uuid_t rhizome_db_uuid; @@ -478,9 +478,9 @@ int rhizome_manifest_verify(rhizome_manifest *m); int rhizome_hash_file(rhizome_manifest *m, const char *path, rhizome_filehash_t *hash_out, uint64_t *size_out); -void _rhizome_manifest_free(struct __sourceloc __whence, rhizome_manifest *m); +void _rhizome_manifest_free(struct __sourceloc, rhizome_manifest *m); #define rhizome_manifest_free(m) _rhizome_manifest_free(__WHENCE__,m) -rhizome_manifest *_rhizome_new_manifest(struct __sourceloc __whence); +rhizome_manifest *_rhizome_new_manifest(struct __sourceloc); #define rhizome_new_manifest() _rhizome_new_manifest(__WHENCE__) int rhizome_store_manifest(rhizome_manifest *m); diff --git a/rhizome_database.c b/rhizome_database.c index 9eece4b7..37fbdb79 100644 --- a/rhizome_database.c +++ b/rhizome_database.c @@ -51,7 +51,7 @@ int rhizome_set_datastore_path(const char *path) return 0; } -int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...) +int form_rhizome_datastore_path(struct __sourceloc __whence, char * buf, size_t bufsiz, const char *fmt, ...) { va_list ap; strbuf b = strbuf_local(buf, bufsiz); @@ -70,9 +70,10 @@ int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...) return 1; } -int create_rhizome_datastore_dir() +static int create_rhizome_datastore_dir() { - if (config.debug.rhizome) DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path()); + if (config.debug.rhizome) + DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path()); return emkdirs(rhizome_datastore_path(), 0700); } @@ -238,23 +239,24 @@ int rhizome_opendb() IN(); - if (create_rhizome_datastore_dir() == -1){ - RETURN(WHY("No Directory")); - } + if (create_rhizome_datastore_dir() == -1) + RETURN(-1); char dbpath[1024]; - if (!sqlite3_temp_directory){ - if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "")){ - RETURN(WHY("Invalid path")); - } + if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, RHIZOME_BLOB_SUBDIR)) + RETURN(-1); + if (emkdirs(dbpath, 0700) == -1) + RETURN(-1); + if (!sqlite3_temp_directory) { + if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "sqlite3tmp")) + RETURN(-1); + if (emkdirs(dbpath, 0700) == -1) + RETURN(-1); sqlite3_temp_directory = sqlite3_mprintf("%s", dbpath); } - - if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db")){ - RETURN(WHY("Invalid path")); - } - sqlite3_config(SQLITE_CONFIG_LOG,sqlite_log,NULL); + if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db")) + RETURN(-1); if (sqlite3_open(dbpath,&rhizome_db)){ RETURN(WHYF("SQLite could not open database %s: %s", dbpath, sqlite3_errmsg(rhizome_db))); } @@ -1189,7 +1191,7 @@ static int rhizome_delete_external(const rhizome_filehash_t *hashp) { // attempt to remove any external blob char blob_path[1024]; - if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(*hashp))) + if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))) return -1; if (unlink(blob_path) == -1) { if (errno != ENOENT) @@ -1242,7 +1244,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) time_ms_t insert_horizon_no_manifest = now - (orphan_payload_persist_ms ? atoi(orphan_payload_persist_ms) : 1000); // 1 second ago time_ms_t insert_horizon_not_valid = now - (invalid_payload_persist_ms ? atoi(invalid_payload_persist_ms) : 300000); // 5 minutes ago - // cleanup external blobs for unreferenced files + // Remove external payload files for stale, incomplete payloads. unsigned candidates = 0; sqlite3_stmt *statement = sqlite_prepare_bind(&retry, "SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 0;", @@ -1258,6 +1260,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) } sqlite3_finalize(statement); + // Remove external payload files for old, unreferenced payloads. statement = sqlite_prepare_bind(&retry, "SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 1 AND NOT EXISTS( SELECT 1 FROM MANIFESTS WHERE MANIFESTS.filehash = FILES.id);", INT64, insert_horizon_no_manifest, END); @@ -1271,10 +1274,14 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) ++report->deleted_orphan_files; } sqlite3_finalize(statement); - + + // TODO Iterate through all files in RHIZOME_BLOB_SUBDIR and delete any which are no longer + // referenced or are stale. This could take a long time, so for scalability should be done + // in an incremental background task. See GitHub issue #50. + + // Remove payload records that are stale and incomplete or old and unreferenced. int ret; if (candidates) { - // clean out unreferenced files ret = sqlite_exec_void_retry_loglevel(LOG_LEVEL_WARN, &retry, "DELETE FROM FILES WHERE inserttime < ? AND datavalid = 0;", INT64, insert_horizon_not_valid, END); @@ -1286,10 +1293,11 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) if (report && ret > 0) report->deleted_orphan_files += ret; } - + + // Remove payload blobs that are no longer referenced. if ((ret = rhizome_delete_orphan_fileblobs_retry(&retry)) > 0 && report) report->deleted_orphan_fileblobs += ret; - + if (config.debug.rhizome && report) DEBUGF("report deleted_stale_incoming_files=%u deleted_orphan_files=%u deleted_orphan_fileblobs=%u", report->deleted_stale_incoming_files, diff --git a/rhizome_store.c b/rhizome_store.c index 14146797..9c2c57f5 100644 --- a/rhizome_store.c +++ b/rhizome_store.c @@ -105,7 +105,7 @@ enum rhizome_payload_status rhizome_open_write(struct rhizome_write *write, cons } char blob_path[1024]; if (file_length == RHIZOME_SIZE_UNSET || file_length > config.rhizome.max_blob_size) { - if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) + if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id)) return RHIZOME_PAYLOAD_STATUS_ERROR; if (config.debug.externalblobs) DEBUGF("Attempting to put blob for id='%"PRIu64"' in %s", write->temp_id, blob_path); @@ -480,7 +480,7 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write) SHA512_End(&write->sha512_context, NULL); char blob_path[1024]; - if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) { + if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id)) { WHYF("Failed to generate external blob path"); status = RHIZOME_PAYLOAD_STATUS_ERROR; goto failure; @@ -556,10 +556,8 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write) if (external) { char dest_path[1024]; - if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, alloca_tohex_rhizome_filehash_t(write->id))){ - WHYF("Failed to generate file path"); + if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id))) goto dbfailure; - } if (rename(blob_path, dest_path) == -1) { WHYF_perror("rename(%s, %s)", blob_path, dest_path); goto dbfailure; @@ -785,7 +783,7 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r } else { // No row in FILEBLOBS, look for an external blob file. char blob_path[1024]; - if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(read->id))) + if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id))) return RHIZOME_PAYLOAD_STATUS_ERROR; read->blob_fd = open(blob_path, O_RDONLY); if (read->blob_fd == -1) { diff --git a/tests/rhizomeops b/tests/rhizomeops index c8540832..6b53b572 100755 --- a/tests/rhizomeops +++ b/tests/rhizomeops @@ -357,7 +357,8 @@ setup_CorruptExternalBlob() { executeOk_servald rhizome add file $SIDB1 file1 file1.manifest extract_manifest_id manifestid file1.manifest extract_manifest_filehash filehash file1.manifest - echo "Replacement" >$SERVALINSTANCE_PATH/$filehash + assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash" + echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash" } test_CorruptExternalBlob() { execute --exit-status=255 $servald rhizome extract file $manifestid file1a @@ -1081,7 +1082,8 @@ setup_ImportOwnBundle() { extract_manifest_filehash filehash fileB.manifest extract_manifest_BK BK fileB.manifest extract_manifest_date date fileB.manifest - rm -f $SERVALINSTANCE_PATH/rhizome.db + assert [ -e "$SERVALINSTANCE_PATH/rhizome.db" ] + rm -f "$SERVALINSTANCE_PATH/rhizome.db" executeOk_servald rhizome list assert_rhizome_list } diff --git a/tests/rhizomeprotocol b/tests/rhizomeprotocol index aff12e35..e3d677a3 100755 --- a/tests/rhizomeprotocol +++ b/tests/rhizomeprotocol @@ -452,17 +452,21 @@ setup_CorruptPayload() { set_instance +A executeOk_servald config set rhizome.max_blob_size 0 rhizome_add_file file1 1024 - create_file file2 1024 start_servald_instances +A +B wait_until bundle_received_by $BID:$VERSION +B -} -test_CorruptPayload() { - set_instance +A - cp file2 $SERVALINSTANCE_PATH/$FILEHASH + assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH" + create_file file2 1024 + cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH" execute --exit-status=255 $servald rhizome extract file $BID file1a + # TODO at the moment, the re-fetch is only triggered by restarting the + # daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch + # shoud be automatic and immediate without restarting the daemon. stop_servald_server start_servald_server +} +test_CorruptPayload() { wait_until grep -i "Stored file $FILEHASH" $LOGA + executeOk $servald rhizome extract file $BID file1a } doc_MissingPayload="A missing payload should be re-fetched" @@ -471,10 +475,10 @@ setup_MissingPayload() { set_instance +A executeOk_servald config set rhizome.max_blob_size 0 rhizome_add_file file1 1024 - assert [ -s "$SERVALINSTANCE_PATH/$FILEHASH" ] + assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH" start_servald_instances +A +B wait_until bundle_received_by $BID:$VERSION +B - rm $SERVALINSTANCE_PATH/$FILEHASH + rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH" execute --exit-status=1 --stderr $servald rhizome extract file $BID file1a # TODO at the moment, the re-fetch is only triggered by restarting the # daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch