Store Rhizome external blob files in "blob" subdirectory

This will make issue #50 a bit easier to tackle
This commit is contained in:
Andrew Bettison 2014-03-07 16:55:38 +10:30
parent fa6c563e5b
commit 0bef066949
8 changed files with 63 additions and 47 deletions

View File

@ -427,7 +427,7 @@ ATOM(bool_t, enable, 1, boolean,, "If true, server opens
ATOM(bool_t, fetch, 1, boolean,, "If false, no new bundles will be fetched from peers") ATOM(bool_t, fetch, 1, boolean,, "If false, no new bundles will be fetched from peers")
ATOM(bool_t, clean_on_open, 0, boolean,, "If true, Rhizome database is cleaned at start of every command") ATOM(bool_t, clean_on_open, 0, boolean,, "If true, Rhizome database is cleaned at start of every command")
ATOM(bool_t, clean_on_start, 1, boolean,, "If true, Rhizome database is cleaned at start of daemon") ATOM(bool_t, clean_on_start, 1, boolean,, "If true, Rhizome database is cleaned at start of daemon")
STRING(256, datastore_path, "", absolute_path,, "Path of rhizome storage directory, absolute or relative to instance directory") STRING(256, datastore_path, "", str_nonempty,, "Path of rhizome storage directory, absolute or relative to instance directory")
ATOM(uint64_t, database_size, 1000000, uint64_scaled,, "Size of database in bytes") ATOM(uint64_t, database_size, 1000000, uint64_scaled,, "Size of database in bytes")
ATOM(uint32_t, max_blob_size, 128 * 1024, uint32_scaled,, "Store payloads larger than this in files not SQLite blobs") ATOM(uint32_t, max_blob_size, 128 * 1024, uint32_scaled,, "Store payloads larger than this in files not SQLite blobs")

4
os.c
View File

@ -36,14 +36,14 @@ int mkdirs(const char *path, mode_t mode)
return mkdirsn(path, strlen(path), mode); return mkdirsn(path, strlen(path), mode);
} }
int emkdirs(const char *path, mode_t mode) int _emkdirs(struct __sourceloc __whence, const char *path, mode_t mode)
{ {
if (mkdirs(path, mode) == -1) if (mkdirs(path, mode) == -1)
return WHYF_perror("mkdirs(%s,%o)", alloca_str_toprint(path), mode); return WHYF_perror("mkdirs(%s,%o)", alloca_str_toprint(path), mode);
return 0; return 0;
} }
int emkdirsn(const char *path, size_t len, mode_t mode) int _emkdirsn(struct __sourceloc __whence, const char *path, size_t len, mode_t mode)
{ {
if (mkdirsn(path, len, mode) == -1) if (mkdirsn(path, len, mode) == -1)
return WHYF_perror("mkdirsn(%s,%lu,%o)", alloca_toprint(-1, path, len), (unsigned long)len, mode); return WHYF_perror("mkdirsn(%s,%lu,%o)", alloca_toprint(-1, path, len), (unsigned long)len, mode);

8
os.h
View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
#include "log.h"
#ifndef __SERVAL_DNA__OS_INLINE #ifndef __SERVAL_DNA__OS_INLINE
# if __GNUC__ && !__GNUC_STDC_INLINE__ # if __GNUC__ && !__GNUC_STDC_INLINE__
@ -103,9 +104,12 @@ __SERVAL_DNA__OS_INLINE off64_t lseek64(int fd, off64_t offset, int whence) {
/* The "e" variants log the error before returning -1. /* The "e" variants log the error before returning -1.
*/ */
int mkdirs(const char *path, mode_t mode); int mkdirs(const char *path, mode_t mode);
int emkdirs(const char *path, mode_t mode);
int mkdirsn(const char *path, size_t len, mode_t mode); int mkdirsn(const char *path, size_t len, mode_t mode);
int emkdirsn(const char *path, size_t len, mode_t mode); int _emkdirs(struct __sourceloc, const char *path, mode_t mode);
int _emkdirsn(struct __sourceloc, const char *path, size_t len, mode_t mode);
#define emkdirs(path, mode) _emkdirs(__WHENCE__, (path), (mode))
#define emkdirsn(path, len, mode) _emkdirsn(__WHENCE__, (path), (len), (mode))
void srandomdev(); void srandomdev();
int urandombytes(unsigned char *buf, size_t len); int urandombytes(unsigned char *buf, size_t len);

View File

@ -398,14 +398,14 @@ int rhizome_fetch_delay_ms();
int rhizome_set_datastore_path(const char *path); int rhizome_set_datastore_path(const char *path);
const char *rhizome_datastore_path(); const char *rhizome_datastore_path();
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...); int form_rhizome_datastore_path(struct __sourceloc, char * buf, size_t bufsiz, const char *fmt, ...);
int create_rhizome_datastore_dir();
/* Handy statement for forming the path of a rhizome store file in a char buffer whose declaration /* Handy statement for forming the path of a rhizome store file in a char buffer whose declaration
* is in scope (so that sizeof(buf) will work). Evaluates to true if the pathname fitted into * is in scope (so that sizeof(buf) will work). Evaluates to true if the pathname fitted into
* the provided buffer, false (0) otherwise (after logging an error). */ * the provided buffer, false (0) otherwise (after logging an error). */
#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__)) #define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path(__WHENCE__, (buf), sizeof(buf), (fmt), ##__VA_ARGS__))
#define FORM_RHIZOME_IMPORT_PATH(buf,fmt,...) (form_rhizome_import_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__))
#define RHIZOME_BLOB_SUBDIR "blob"
extern sqlite3 *rhizome_db; extern sqlite3 *rhizome_db;
serval_uuid_t rhizome_db_uuid; serval_uuid_t rhizome_db_uuid;
@ -478,9 +478,9 @@ int rhizome_manifest_verify(rhizome_manifest *m);
int rhizome_hash_file(rhizome_manifest *m, const char *path, rhizome_filehash_t *hash_out, uint64_t *size_out); int rhizome_hash_file(rhizome_manifest *m, const char *path, rhizome_filehash_t *hash_out, uint64_t *size_out);
void _rhizome_manifest_free(struct __sourceloc __whence, rhizome_manifest *m); void _rhizome_manifest_free(struct __sourceloc, rhizome_manifest *m);
#define rhizome_manifest_free(m) _rhizome_manifest_free(__WHENCE__,m) #define rhizome_manifest_free(m) _rhizome_manifest_free(__WHENCE__,m)
rhizome_manifest *_rhizome_new_manifest(struct __sourceloc __whence); rhizome_manifest *_rhizome_new_manifest(struct __sourceloc);
#define rhizome_new_manifest() _rhizome_new_manifest(__WHENCE__) #define rhizome_new_manifest() _rhizome_new_manifest(__WHENCE__)
int rhizome_store_manifest(rhizome_manifest *m); int rhizome_store_manifest(rhizome_manifest *m);

View File

@ -51,7 +51,7 @@ int rhizome_set_datastore_path(const char *path)
return 0; return 0;
} }
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...) int form_rhizome_datastore_path(struct __sourceloc __whence, char * buf, size_t bufsiz, const char *fmt, ...)
{ {
va_list ap; va_list ap;
strbuf b = strbuf_local(buf, bufsiz); strbuf b = strbuf_local(buf, bufsiz);
@ -70,9 +70,10 @@ int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...)
return 1; return 1;
} }
int create_rhizome_datastore_dir() static int create_rhizome_datastore_dir()
{ {
if (config.debug.rhizome) DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path()); if (config.debug.rhizome)
DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path());
return emkdirs(rhizome_datastore_path(), 0700); return emkdirs(rhizome_datastore_path(), 0700);
} }
@ -238,23 +239,24 @@ int rhizome_opendb()
IN(); IN();
if (create_rhizome_datastore_dir() == -1){ if (create_rhizome_datastore_dir() == -1)
RETURN(WHY("No Directory")); RETURN(-1);
}
char dbpath[1024]; char dbpath[1024];
if (!sqlite3_temp_directory){ if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, RHIZOME_BLOB_SUBDIR))
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "")){ RETURN(-1);
RETURN(WHY("Invalid path")); if (emkdirs(dbpath, 0700) == -1)
} RETURN(-1);
if (!sqlite3_temp_directory) {
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "sqlite3tmp"))
RETURN(-1);
if (emkdirs(dbpath, 0700) == -1)
RETURN(-1);
sqlite3_temp_directory = sqlite3_mprintf("%s", dbpath); sqlite3_temp_directory = sqlite3_mprintf("%s", dbpath);
} }
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db")){
RETURN(WHY("Invalid path"));
}
sqlite3_config(SQLITE_CONFIG_LOG,sqlite_log,NULL); sqlite3_config(SQLITE_CONFIG_LOG,sqlite_log,NULL);
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db"))
RETURN(-1);
if (sqlite3_open(dbpath,&rhizome_db)){ if (sqlite3_open(dbpath,&rhizome_db)){
RETURN(WHYF("SQLite could not open database %s: %s", dbpath, sqlite3_errmsg(rhizome_db))); RETURN(WHYF("SQLite could not open database %s: %s", dbpath, sqlite3_errmsg(rhizome_db)));
} }
@ -1189,7 +1191,7 @@ static int rhizome_delete_external(const rhizome_filehash_t *hashp)
{ {
// attempt to remove any external blob // attempt to remove any external blob
char blob_path[1024]; char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(*hashp))) if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp)))
return -1; return -1;
if (unlink(blob_path) == -1) { if (unlink(blob_path) == -1) {
if (errno != ENOENT) if (errno != ENOENT)
@ -1242,7 +1244,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
time_ms_t insert_horizon_no_manifest = now - (orphan_payload_persist_ms ? atoi(orphan_payload_persist_ms) : 1000); // 1 second ago time_ms_t insert_horizon_no_manifest = now - (orphan_payload_persist_ms ? atoi(orphan_payload_persist_ms) : 1000); // 1 second ago
time_ms_t insert_horizon_not_valid = now - (invalid_payload_persist_ms ? atoi(invalid_payload_persist_ms) : 300000); // 5 minutes ago time_ms_t insert_horizon_not_valid = now - (invalid_payload_persist_ms ? atoi(invalid_payload_persist_ms) : 300000); // 5 minutes ago
// cleanup external blobs for unreferenced files // Remove external payload files for stale, incomplete payloads.
unsigned candidates = 0; unsigned candidates = 0;
sqlite3_stmt *statement = sqlite_prepare_bind(&retry, sqlite3_stmt *statement = sqlite_prepare_bind(&retry,
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 0;", "SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 0;",
@ -1258,6 +1260,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
} }
sqlite3_finalize(statement); sqlite3_finalize(statement);
// Remove external payload files for old, unreferenced payloads.
statement = sqlite_prepare_bind(&retry, statement = sqlite_prepare_bind(&retry,
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 1 AND NOT EXISTS( SELECT 1 FROM MANIFESTS WHERE MANIFESTS.filehash = FILES.id);", "SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 1 AND NOT EXISTS( SELECT 1 FROM MANIFESTS WHERE MANIFESTS.filehash = FILES.id);",
INT64, insert_horizon_no_manifest, END); INT64, insert_horizon_no_manifest, END);
@ -1271,10 +1274,14 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
++report->deleted_orphan_files; ++report->deleted_orphan_files;
} }
sqlite3_finalize(statement); sqlite3_finalize(statement);
// TODO Iterate through all files in RHIZOME_BLOB_SUBDIR and delete any which are no longer
// referenced or are stale. This could take a long time, so for scalability should be done
// in an incremental background task. See GitHub issue #50.
// Remove payload records that are stale and incomplete or old and unreferenced.
int ret; int ret;
if (candidates) { if (candidates) {
// clean out unreferenced files
ret = sqlite_exec_void_retry_loglevel(LOG_LEVEL_WARN, &retry, ret = sqlite_exec_void_retry_loglevel(LOG_LEVEL_WARN, &retry,
"DELETE FROM FILES WHERE inserttime < ? AND datavalid = 0;", "DELETE FROM FILES WHERE inserttime < ? AND datavalid = 0;",
INT64, insert_horizon_not_valid, END); INT64, insert_horizon_not_valid, END);
@ -1286,10 +1293,11 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
if (report && ret > 0) if (report && ret > 0)
report->deleted_orphan_files += ret; report->deleted_orphan_files += ret;
} }
// Remove payload blobs that are no longer referenced.
if ((ret = rhizome_delete_orphan_fileblobs_retry(&retry)) > 0 && report) if ((ret = rhizome_delete_orphan_fileblobs_retry(&retry)) > 0 && report)
report->deleted_orphan_fileblobs += ret; report->deleted_orphan_fileblobs += ret;
if (config.debug.rhizome && report) if (config.debug.rhizome && report)
DEBUGF("report deleted_stale_incoming_files=%u deleted_orphan_files=%u deleted_orphan_fileblobs=%u", DEBUGF("report deleted_stale_incoming_files=%u deleted_orphan_files=%u deleted_orphan_fileblobs=%u",
report->deleted_stale_incoming_files, report->deleted_stale_incoming_files,

View File

@ -105,7 +105,7 @@ enum rhizome_payload_status rhizome_open_write(struct rhizome_write *write, cons
} }
char blob_path[1024]; char blob_path[1024];
if (file_length == RHIZOME_SIZE_UNSET || file_length > config.rhizome.max_blob_size) { if (file_length == RHIZOME_SIZE_UNSET || file_length > config.rhizome.max_blob_size) {
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id))
return RHIZOME_PAYLOAD_STATUS_ERROR; return RHIZOME_PAYLOAD_STATUS_ERROR;
if (config.debug.externalblobs) if (config.debug.externalblobs)
DEBUGF("Attempting to put blob for id='%"PRIu64"' in %s", write->temp_id, blob_path); DEBUGF("Attempting to put blob for id='%"PRIu64"' in %s", write->temp_id, blob_path);
@ -480,7 +480,7 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
SHA512_End(&write->sha512_context, NULL); SHA512_End(&write->sha512_context, NULL);
char blob_path[1024]; char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) { if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id)) {
WHYF("Failed to generate external blob path"); WHYF("Failed to generate external blob path");
status = RHIZOME_PAYLOAD_STATUS_ERROR; status = RHIZOME_PAYLOAD_STATUS_ERROR;
goto failure; goto failure;
@ -556,10 +556,8 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
if (external) { if (external) {
char dest_path[1024]; char dest_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, alloca_tohex_rhizome_filehash_t(write->id))){ if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id)))
WHYF("Failed to generate file path");
goto dbfailure; goto dbfailure;
}
if (rename(blob_path, dest_path) == -1) { if (rename(blob_path, dest_path) == -1) {
WHYF_perror("rename(%s, %s)", blob_path, dest_path); WHYF_perror("rename(%s, %s)", blob_path, dest_path);
goto dbfailure; goto dbfailure;
@ -785,7 +783,7 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r
} else { } else {
// No row in FILEBLOBS, look for an external blob file. // No row in FILEBLOBS, look for an external blob file.
char blob_path[1024]; char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(read->id))) if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id)))
return RHIZOME_PAYLOAD_STATUS_ERROR; return RHIZOME_PAYLOAD_STATUS_ERROR;
read->blob_fd = open(blob_path, O_RDONLY); read->blob_fd = open(blob_path, O_RDONLY);
if (read->blob_fd == -1) { if (read->blob_fd == -1) {

View File

@ -357,7 +357,8 @@ setup_CorruptExternalBlob() {
executeOk_servald rhizome add file $SIDB1 file1 file1.manifest executeOk_servald rhizome add file $SIDB1 file1 file1.manifest
extract_manifest_id manifestid file1.manifest extract_manifest_id manifestid file1.manifest
extract_manifest_filehash filehash file1.manifest extract_manifest_filehash filehash file1.manifest
echo "Replacement" >$SERVALINSTANCE_PATH/$filehash assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash"
echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash"
} }
test_CorruptExternalBlob() { test_CorruptExternalBlob() {
execute --exit-status=255 $servald rhizome extract file $manifestid file1a execute --exit-status=255 $servald rhizome extract file $manifestid file1a
@ -1081,7 +1082,8 @@ setup_ImportOwnBundle() {
extract_manifest_filehash filehash fileB.manifest extract_manifest_filehash filehash fileB.manifest
extract_manifest_BK BK fileB.manifest extract_manifest_BK BK fileB.manifest
extract_manifest_date date fileB.manifest extract_manifest_date date fileB.manifest
rm -f $SERVALINSTANCE_PATH/rhizome.db assert [ -e "$SERVALINSTANCE_PATH/rhizome.db" ]
rm -f "$SERVALINSTANCE_PATH/rhizome.db"
executeOk_servald rhizome list executeOk_servald rhizome list
assert_rhizome_list assert_rhizome_list
} }

View File

@ -452,17 +452,21 @@ setup_CorruptPayload() {
set_instance +A set_instance +A
executeOk_servald config set rhizome.max_blob_size 0 executeOk_servald config set rhizome.max_blob_size 0
rhizome_add_file file1 1024 rhizome_add_file file1 1024
create_file file2 1024
start_servald_instances +A +B start_servald_instances +A +B
wait_until bundle_received_by $BID:$VERSION +B wait_until bundle_received_by $BID:$VERSION +B
} assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
test_CorruptPayload() { create_file file2 1024
set_instance +A cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
cp file2 $SERVALINSTANCE_PATH/$FILEHASH
execute --exit-status=255 $servald rhizome extract file $BID file1a execute --exit-status=255 $servald rhizome extract file $BID file1a
# TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch
# shoud be automatic and immediate without restarting the daemon.
stop_servald_server stop_servald_server
start_servald_server start_servald_server
}
test_CorruptPayload() {
wait_until grep -i "Stored file $FILEHASH" $LOGA wait_until grep -i "Stored file $FILEHASH" $LOGA
executeOk $servald rhizome extract file $BID file1a
} }
doc_MissingPayload="A missing payload should be re-fetched" doc_MissingPayload="A missing payload should be re-fetched"
@ -471,10 +475,10 @@ setup_MissingPayload() {
set_instance +A set_instance +A
executeOk_servald config set rhizome.max_blob_size 0 executeOk_servald config set rhizome.max_blob_size 0
rhizome_add_file file1 1024 rhizome_add_file file1 1024
assert [ -s "$SERVALINSTANCE_PATH/$FILEHASH" ] assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
start_servald_instances +A +B start_servald_instances +A +B
wait_until bundle_received_by $BID:$VERSION +B wait_until bundle_received_by $BID:$VERSION +B
rm $SERVALINSTANCE_PATH/$FILEHASH rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH"
execute --exit-status=1 --stderr $servald rhizome extract file $BID file1a execute --exit-status=1 --stderr $servald rhizome extract file $BID file1a
# TODO at the moment, the re-fetch is only triggered by restarting the # TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch # daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch