Store Rhizome external blob files in "blob" subdirectory

This will make issue #50 a bit easier to tackle
This commit is contained in:
Andrew Bettison 2014-03-07 16:55:38 +10:30
parent fa6c563e5b
commit 0bef066949
8 changed files with 63 additions and 47 deletions

View File

@ -427,7 +427,7 @@ ATOM(bool_t, enable, 1, boolean,, "If true, server opens
ATOM(bool_t, fetch, 1, boolean,, "If false, no new bundles will be fetched from peers")
ATOM(bool_t, clean_on_open, 0, boolean,, "If true, Rhizome database is cleaned at start of every command")
ATOM(bool_t, clean_on_start, 1, boolean,, "If true, Rhizome database is cleaned at start of daemon")
STRING(256, datastore_path, "", absolute_path,, "Path of rhizome storage directory, absolute or relative to instance directory")
STRING(256, datastore_path, "", str_nonempty,, "Path of rhizome storage directory, absolute or relative to instance directory")
ATOM(uint64_t, database_size, 1000000, uint64_scaled,, "Size of database in bytes")
ATOM(uint32_t, max_blob_size, 128 * 1024, uint32_scaled,, "Store payloads larger than this in files not SQLite blobs")

4
os.c
View File

@ -36,14 +36,14 @@ int mkdirs(const char *path, mode_t mode)
return mkdirsn(path, strlen(path), mode);
}
int emkdirs(const char *path, mode_t mode)
int _emkdirs(struct __sourceloc __whence, const char *path, mode_t mode)
{
if (mkdirs(path, mode) == -1)
return WHYF_perror("mkdirs(%s,%o)", alloca_str_toprint(path), mode);
return 0;
}
int emkdirsn(const char *path, size_t len, mode_t mode)
int _emkdirsn(struct __sourceloc __whence, const char *path, size_t len, mode_t mode)
{
if (mkdirsn(path, len, mode) == -1)
return WHYF_perror("mkdirsn(%s,%lu,%o)", alloca_toprint(-1, path, len), (unsigned long)len, mode);

8
os.h
View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include <stdint.h>
#include <unistd.h>
#include <sys/types.h>
#include "log.h"
#ifndef __SERVAL_DNA__OS_INLINE
# if __GNUC__ && !__GNUC_STDC_INLINE__
@ -103,9 +104,12 @@ __SERVAL_DNA__OS_INLINE off64_t lseek64(int fd, off64_t offset, int whence) {
/* The "e" variants log the error before returning -1.
*/
int mkdirs(const char *path, mode_t mode);
int emkdirs(const char *path, mode_t mode);
int mkdirsn(const char *path, size_t len, mode_t mode);
int emkdirsn(const char *path, size_t len, mode_t mode);
int _emkdirs(struct __sourceloc, const char *path, mode_t mode);
int _emkdirsn(struct __sourceloc, const char *path, size_t len, mode_t mode);
#define emkdirs(path, mode) _emkdirs(__WHENCE__, (path), (mode))
#define emkdirsn(path, len, mode) _emkdirsn(__WHENCE__, (path), (len), (mode))
void srandomdev();
int urandombytes(unsigned char *buf, size_t len);

View File

@ -398,14 +398,14 @@ int rhizome_fetch_delay_ms();
int rhizome_set_datastore_path(const char *path);
const char *rhizome_datastore_path();
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...);
int create_rhizome_datastore_dir();
int form_rhizome_datastore_path(struct __sourceloc, char * buf, size_t bufsiz, const char *fmt, ...);
/* Handy statement for forming the path of a rhizome store file in a char buffer whose declaration
* is in scope (so that sizeof(buf) will work). Evaluates to true if the pathname fitted into
* the provided buffer, false (0) otherwise (after logging an error). */
#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__))
#define FORM_RHIZOME_IMPORT_PATH(buf,fmt,...) (form_rhizome_import_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__))
#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path(__WHENCE__, (buf), sizeof(buf), (fmt), ##__VA_ARGS__))
#define RHIZOME_BLOB_SUBDIR "blob"
extern sqlite3 *rhizome_db;
serval_uuid_t rhizome_db_uuid;
@ -478,9 +478,9 @@ int rhizome_manifest_verify(rhizome_manifest *m);
int rhizome_hash_file(rhizome_manifest *m, const char *path, rhizome_filehash_t *hash_out, uint64_t *size_out);
void _rhizome_manifest_free(struct __sourceloc __whence, rhizome_manifest *m);
void _rhizome_manifest_free(struct __sourceloc, rhizome_manifest *m);
#define rhizome_manifest_free(m) _rhizome_manifest_free(__WHENCE__,m)
rhizome_manifest *_rhizome_new_manifest(struct __sourceloc __whence);
rhizome_manifest *_rhizome_new_manifest(struct __sourceloc);
#define rhizome_new_manifest() _rhizome_new_manifest(__WHENCE__)
int rhizome_store_manifest(rhizome_manifest *m);

View File

@ -51,7 +51,7 @@ int rhizome_set_datastore_path(const char *path)
return 0;
}
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...)
int form_rhizome_datastore_path(struct __sourceloc __whence, char * buf, size_t bufsiz, const char *fmt, ...)
{
va_list ap;
strbuf b = strbuf_local(buf, bufsiz);
@ -70,9 +70,10 @@ int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...)
return 1;
}
int create_rhizome_datastore_dir()
static int create_rhizome_datastore_dir()
{
if (config.debug.rhizome) DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path());
if (config.debug.rhizome)
DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path());
return emkdirs(rhizome_datastore_path(), 0700);
}
@ -238,23 +239,24 @@ int rhizome_opendb()
IN();
if (create_rhizome_datastore_dir() == -1){
RETURN(WHY("No Directory"));
}
if (create_rhizome_datastore_dir() == -1)
RETURN(-1);
char dbpath[1024];
if (!sqlite3_temp_directory){
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "")){
RETURN(WHY("Invalid path"));
}
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, RHIZOME_BLOB_SUBDIR))
RETURN(-1);
if (emkdirs(dbpath, 0700) == -1)
RETURN(-1);
if (!sqlite3_temp_directory) {
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "sqlite3tmp"))
RETURN(-1);
if (emkdirs(dbpath, 0700) == -1)
RETURN(-1);
sqlite3_temp_directory = sqlite3_mprintf("%s", dbpath);
}
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db")){
RETURN(WHY("Invalid path"));
}
sqlite3_config(SQLITE_CONFIG_LOG,sqlite_log,NULL);
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db"))
RETURN(-1);
if (sqlite3_open(dbpath,&rhizome_db)){
RETURN(WHYF("SQLite could not open database %s: %s", dbpath, sqlite3_errmsg(rhizome_db)));
}
@ -1189,7 +1191,7 @@ static int rhizome_delete_external(const rhizome_filehash_t *hashp)
{
// attempt to remove any external blob
char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(*hashp)))
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp)))
return -1;
if (unlink(blob_path) == -1) {
if (errno != ENOENT)
@ -1242,7 +1244,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
time_ms_t insert_horizon_no_manifest = now - (orphan_payload_persist_ms ? atoi(orphan_payload_persist_ms) : 1000); // 1 second ago
time_ms_t insert_horizon_not_valid = now - (invalid_payload_persist_ms ? atoi(invalid_payload_persist_ms) : 300000); // 5 minutes ago
// cleanup external blobs for unreferenced files
// Remove external payload files for stale, incomplete payloads.
unsigned candidates = 0;
sqlite3_stmt *statement = sqlite_prepare_bind(&retry,
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 0;",
@ -1258,6 +1260,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
}
sqlite3_finalize(statement);
// Remove external payload files for old, unreferenced payloads.
statement = sqlite_prepare_bind(&retry,
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 1 AND NOT EXISTS( SELECT 1 FROM MANIFESTS WHERE MANIFESTS.filehash = FILES.id);",
INT64, insert_horizon_no_manifest, END);
@ -1271,10 +1274,14 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
++report->deleted_orphan_files;
}
sqlite3_finalize(statement);
// TODO Iterate through all files in RHIZOME_BLOB_SUBDIR and delete any which are no longer
// referenced or are stale. This could take a long time, so for scalability should be done
// in an incremental background task. See GitHub issue #50.
// Remove payload records that are stale and incomplete or old and unreferenced.
int ret;
if (candidates) {
// clean out unreferenced files
ret = sqlite_exec_void_retry_loglevel(LOG_LEVEL_WARN, &retry,
"DELETE FROM FILES WHERE inserttime < ? AND datavalid = 0;",
INT64, insert_horizon_not_valid, END);
@ -1286,10 +1293,11 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
if (report && ret > 0)
report->deleted_orphan_files += ret;
}
// Remove payload blobs that are no longer referenced.
if ((ret = rhizome_delete_orphan_fileblobs_retry(&retry)) > 0 && report)
report->deleted_orphan_fileblobs += ret;
if (config.debug.rhizome && report)
DEBUGF("report deleted_stale_incoming_files=%u deleted_orphan_files=%u deleted_orphan_fileblobs=%u",
report->deleted_stale_incoming_files,

View File

@ -105,7 +105,7 @@ enum rhizome_payload_status rhizome_open_write(struct rhizome_write *write, cons
}
char blob_path[1024];
if (file_length == RHIZOME_SIZE_UNSET || file_length > config.rhizome.max_blob_size) {
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id))
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id))
return RHIZOME_PAYLOAD_STATUS_ERROR;
if (config.debug.externalblobs)
DEBUGF("Attempting to put blob for id='%"PRIu64"' in %s", write->temp_id, blob_path);
@ -480,7 +480,7 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
SHA512_End(&write->sha512_context, NULL);
char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) {
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id)) {
WHYF("Failed to generate external blob path");
status = RHIZOME_PAYLOAD_STATUS_ERROR;
goto failure;
@ -556,10 +556,8 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
if (external) {
char dest_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, alloca_tohex_rhizome_filehash_t(write->id))){
WHYF("Failed to generate file path");
if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id)))
goto dbfailure;
}
if (rename(blob_path, dest_path) == -1) {
WHYF_perror("rename(%s, %s)", blob_path, dest_path);
goto dbfailure;
@ -785,7 +783,7 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r
} else {
// No row in FILEBLOBS, look for an external blob file.
char blob_path[1024];
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(read->id)))
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id)))
return RHIZOME_PAYLOAD_STATUS_ERROR;
read->blob_fd = open(blob_path, O_RDONLY);
if (read->blob_fd == -1) {

View File

@ -357,7 +357,8 @@ setup_CorruptExternalBlob() {
executeOk_servald rhizome add file $SIDB1 file1 file1.manifest
extract_manifest_id manifestid file1.manifest
extract_manifest_filehash filehash file1.manifest
echo "Replacement" >$SERVALINSTANCE_PATH/$filehash
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash"
echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash"
}
test_CorruptExternalBlob() {
execute --exit-status=255 $servald rhizome extract file $manifestid file1a
@ -1081,7 +1082,8 @@ setup_ImportOwnBundle() {
extract_manifest_filehash filehash fileB.manifest
extract_manifest_BK BK fileB.manifest
extract_manifest_date date fileB.manifest
rm -f $SERVALINSTANCE_PATH/rhizome.db
assert [ -e "$SERVALINSTANCE_PATH/rhizome.db" ]
rm -f "$SERVALINSTANCE_PATH/rhizome.db"
executeOk_servald rhizome list
assert_rhizome_list
}

View File

@ -452,17 +452,21 @@ setup_CorruptPayload() {
set_instance +A
executeOk_servald config set rhizome.max_blob_size 0
rhizome_add_file file1 1024
create_file file2 1024
start_servald_instances +A +B
wait_until bundle_received_by $BID:$VERSION +B
}
test_CorruptPayload() {
set_instance +A
cp file2 $SERVALINSTANCE_PATH/$FILEHASH
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
create_file file2 1024
cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
execute --exit-status=255 $servald rhizome extract file $BID file1a
# TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch
# shoud be automatic and immediate without restarting the daemon.
stop_servald_server
start_servald_server
}
test_CorruptPayload() {
wait_until grep -i "Stored file $FILEHASH" $LOGA
executeOk $servald rhizome extract file $BID file1a
}
doc_MissingPayload="A missing payload should be re-fetched"
@ -471,10 +475,10 @@ setup_MissingPayload() {
set_instance +A
executeOk_servald config set rhizome.max_blob_size 0
rhizome_add_file file1 1024
assert [ -s "$SERVALINSTANCE_PATH/$FILEHASH" ]
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
start_servald_instances +A +B
wait_until bundle_received_by $BID:$VERSION +B
rm $SERVALINSTANCE_PATH/$FILEHASH
rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH"
execute --exit-status=1 --stderr $servald rhizome extract file $BID file1a
# TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch