mirror of
https://github.com/servalproject/serval-dna.git
synced 2025-01-18 02:39:44 +00:00
Store Rhizome external blob files in "blob" subdirectory
This will make issue #50 a bit easier to tackle
This commit is contained in:
parent
fa6c563e5b
commit
0bef066949
@ -427,7 +427,7 @@ ATOM(bool_t, enable, 1, boolean,, "If true, server opens
|
||||
ATOM(bool_t, fetch, 1, boolean,, "If false, no new bundles will be fetched from peers")
|
||||
ATOM(bool_t, clean_on_open, 0, boolean,, "If true, Rhizome database is cleaned at start of every command")
|
||||
ATOM(bool_t, clean_on_start, 1, boolean,, "If true, Rhizome database is cleaned at start of daemon")
|
||||
STRING(256, datastore_path, "", absolute_path,, "Path of rhizome storage directory, absolute or relative to instance directory")
|
||||
STRING(256, datastore_path, "", str_nonempty,, "Path of rhizome storage directory, absolute or relative to instance directory")
|
||||
ATOM(uint64_t, database_size, 1000000, uint64_scaled,, "Size of database in bytes")
|
||||
ATOM(uint32_t, max_blob_size, 128 * 1024, uint32_scaled,, "Store payloads larger than this in files not SQLite blobs")
|
||||
|
||||
|
4
os.c
4
os.c
@ -36,14 +36,14 @@ int mkdirs(const char *path, mode_t mode)
|
||||
return mkdirsn(path, strlen(path), mode);
|
||||
}
|
||||
|
||||
int emkdirs(const char *path, mode_t mode)
|
||||
int _emkdirs(struct __sourceloc __whence, const char *path, mode_t mode)
|
||||
{
|
||||
if (mkdirs(path, mode) == -1)
|
||||
return WHYF_perror("mkdirs(%s,%o)", alloca_str_toprint(path), mode);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int emkdirsn(const char *path, size_t len, mode_t mode)
|
||||
int _emkdirsn(struct __sourceloc __whence, const char *path, size_t len, mode_t mode)
|
||||
{
|
||||
if (mkdirsn(path, len, mode) == -1)
|
||||
return WHYF_perror("mkdirsn(%s,%lu,%o)", alloca_toprint(-1, path, len), (unsigned long)len, mode);
|
||||
|
8
os.h
8
os.h
@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include "log.h"
|
||||
|
||||
#ifndef __SERVAL_DNA__OS_INLINE
|
||||
# if __GNUC__ && !__GNUC_STDC_INLINE__
|
||||
@ -103,9 +104,12 @@ __SERVAL_DNA__OS_INLINE off64_t lseek64(int fd, off64_t offset, int whence) {
|
||||
/* The "e" variants log the error before returning -1.
|
||||
*/
|
||||
int mkdirs(const char *path, mode_t mode);
|
||||
int emkdirs(const char *path, mode_t mode);
|
||||
int mkdirsn(const char *path, size_t len, mode_t mode);
|
||||
int emkdirsn(const char *path, size_t len, mode_t mode);
|
||||
int _emkdirs(struct __sourceloc, const char *path, mode_t mode);
|
||||
int _emkdirsn(struct __sourceloc, const char *path, size_t len, mode_t mode);
|
||||
|
||||
#define emkdirs(path, mode) _emkdirs(__WHENCE__, (path), (mode))
|
||||
#define emkdirsn(path, len, mode) _emkdirsn(__WHENCE__, (path), (len), (mode))
|
||||
|
||||
void srandomdev();
|
||||
int urandombytes(unsigned char *buf, size_t len);
|
||||
|
12
rhizome.h
12
rhizome.h
@ -398,14 +398,14 @@ int rhizome_fetch_delay_ms();
|
||||
int rhizome_set_datastore_path(const char *path);
|
||||
|
||||
const char *rhizome_datastore_path();
|
||||
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...);
|
||||
int create_rhizome_datastore_dir();
|
||||
int form_rhizome_datastore_path(struct __sourceloc, char * buf, size_t bufsiz, const char *fmt, ...);
|
||||
|
||||
/* Handy statement for forming the path of a rhizome store file in a char buffer whose declaration
|
||||
* is in scope (so that sizeof(buf) will work). Evaluates to true if the pathname fitted into
|
||||
* the provided buffer, false (0) otherwise (after logging an error). */
|
||||
#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__))
|
||||
#define FORM_RHIZOME_IMPORT_PATH(buf,fmt,...) (form_rhizome_import_path((buf), sizeof(buf), (fmt), ##__VA_ARGS__))
|
||||
#define FORM_RHIZOME_DATASTORE_PATH(buf,fmt,...) (form_rhizome_datastore_path(__WHENCE__, (buf), sizeof(buf), (fmt), ##__VA_ARGS__))
|
||||
|
||||
#define RHIZOME_BLOB_SUBDIR "blob"
|
||||
|
||||
extern sqlite3 *rhizome_db;
|
||||
serval_uuid_t rhizome_db_uuid;
|
||||
@ -478,9 +478,9 @@ int rhizome_manifest_verify(rhizome_manifest *m);
|
||||
|
||||
int rhizome_hash_file(rhizome_manifest *m, const char *path, rhizome_filehash_t *hash_out, uint64_t *size_out);
|
||||
|
||||
void _rhizome_manifest_free(struct __sourceloc __whence, rhizome_manifest *m);
|
||||
void _rhizome_manifest_free(struct __sourceloc, rhizome_manifest *m);
|
||||
#define rhizome_manifest_free(m) _rhizome_manifest_free(__WHENCE__,m)
|
||||
rhizome_manifest *_rhizome_new_manifest(struct __sourceloc __whence);
|
||||
rhizome_manifest *_rhizome_new_manifest(struct __sourceloc);
|
||||
#define rhizome_new_manifest() _rhizome_new_manifest(__WHENCE__)
|
||||
|
||||
int rhizome_store_manifest(rhizome_manifest *m);
|
||||
|
@ -51,7 +51,7 @@ int rhizome_set_datastore_path(const char *path)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...)
|
||||
int form_rhizome_datastore_path(struct __sourceloc __whence, char * buf, size_t bufsiz, const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
strbuf b = strbuf_local(buf, bufsiz);
|
||||
@ -70,9 +70,10 @@ int form_rhizome_datastore_path(char * buf, size_t bufsiz, const char *fmt, ...)
|
||||
return 1;
|
||||
}
|
||||
|
||||
int create_rhizome_datastore_dir()
|
||||
static int create_rhizome_datastore_dir()
|
||||
{
|
||||
if (config.debug.rhizome) DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path());
|
||||
if (config.debug.rhizome)
|
||||
DEBUGF("mkdirs(%s, 0700)", rhizome_datastore_path());
|
||||
return emkdirs(rhizome_datastore_path(), 0700);
|
||||
}
|
||||
|
||||
@ -238,23 +239,24 @@ int rhizome_opendb()
|
||||
|
||||
IN();
|
||||
|
||||
if (create_rhizome_datastore_dir() == -1){
|
||||
RETURN(WHY("No Directory"));
|
||||
}
|
||||
if (create_rhizome_datastore_dir() == -1)
|
||||
RETURN(-1);
|
||||
char dbpath[1024];
|
||||
if (!sqlite3_temp_directory){
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "")){
|
||||
RETURN(WHY("Invalid path"));
|
||||
}
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, RHIZOME_BLOB_SUBDIR))
|
||||
RETURN(-1);
|
||||
if (emkdirs(dbpath, 0700) == -1)
|
||||
RETURN(-1);
|
||||
if (!sqlite3_temp_directory) {
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "sqlite3tmp"))
|
||||
RETURN(-1);
|
||||
if (emkdirs(dbpath, 0700) == -1)
|
||||
RETURN(-1);
|
||||
sqlite3_temp_directory = sqlite3_mprintf("%s", dbpath);
|
||||
}
|
||||
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db")){
|
||||
RETURN(WHY("Invalid path"));
|
||||
}
|
||||
|
||||
sqlite3_config(SQLITE_CONFIG_LOG,sqlite_log,NULL);
|
||||
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dbpath, "rhizome.db"))
|
||||
RETURN(-1);
|
||||
if (sqlite3_open(dbpath,&rhizome_db)){
|
||||
RETURN(WHYF("SQLite could not open database %s: %s", dbpath, sqlite3_errmsg(rhizome_db)));
|
||||
}
|
||||
@ -1189,7 +1191,7 @@ static int rhizome_delete_external(const rhizome_filehash_t *hashp)
|
||||
{
|
||||
// attempt to remove any external blob
|
||||
char blob_path[1024];
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(*hashp)))
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp)))
|
||||
return -1;
|
||||
if (unlink(blob_path) == -1) {
|
||||
if (errno != ENOENT)
|
||||
@ -1242,7 +1244,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
|
||||
time_ms_t insert_horizon_no_manifest = now - (orphan_payload_persist_ms ? atoi(orphan_payload_persist_ms) : 1000); // 1 second ago
|
||||
time_ms_t insert_horizon_not_valid = now - (invalid_payload_persist_ms ? atoi(invalid_payload_persist_ms) : 300000); // 5 minutes ago
|
||||
|
||||
// cleanup external blobs for unreferenced files
|
||||
// Remove external payload files for stale, incomplete payloads.
|
||||
unsigned candidates = 0;
|
||||
sqlite3_stmt *statement = sqlite_prepare_bind(&retry,
|
||||
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 0;",
|
||||
@ -1258,6 +1260,7 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
|
||||
}
|
||||
sqlite3_finalize(statement);
|
||||
|
||||
// Remove external payload files for old, unreferenced payloads.
|
||||
statement = sqlite_prepare_bind(&retry,
|
||||
"SELECT id FROM FILES WHERE inserttime < ? AND datavalid = 1 AND NOT EXISTS( SELECT 1 FROM MANIFESTS WHERE MANIFESTS.filehash = FILES.id);",
|
||||
INT64, insert_horizon_no_manifest, END);
|
||||
@ -1271,10 +1274,14 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
|
||||
++report->deleted_orphan_files;
|
||||
}
|
||||
sqlite3_finalize(statement);
|
||||
|
||||
|
||||
// TODO Iterate through all files in RHIZOME_BLOB_SUBDIR and delete any which are no longer
|
||||
// referenced or are stale. This could take a long time, so for scalability should be done
|
||||
// in an incremental background task. See GitHub issue #50.
|
||||
|
||||
// Remove payload records that are stale and incomplete or old and unreferenced.
|
||||
int ret;
|
||||
if (candidates) {
|
||||
// clean out unreferenced files
|
||||
ret = sqlite_exec_void_retry_loglevel(LOG_LEVEL_WARN, &retry,
|
||||
"DELETE FROM FILES WHERE inserttime < ? AND datavalid = 0;",
|
||||
INT64, insert_horizon_not_valid, END);
|
||||
@ -1286,10 +1293,11 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
|
||||
if (report && ret > 0)
|
||||
report->deleted_orphan_files += ret;
|
||||
}
|
||||
|
||||
|
||||
// Remove payload blobs that are no longer referenced.
|
||||
if ((ret = rhizome_delete_orphan_fileblobs_retry(&retry)) > 0 && report)
|
||||
report->deleted_orphan_fileblobs += ret;
|
||||
|
||||
|
||||
if (config.debug.rhizome && report)
|
||||
DEBUGF("report deleted_stale_incoming_files=%u deleted_orphan_files=%u deleted_orphan_fileblobs=%u",
|
||||
report->deleted_stale_incoming_files,
|
||||
|
@ -105,7 +105,7 @@ enum rhizome_payload_status rhizome_open_write(struct rhizome_write *write, cons
|
||||
}
|
||||
char blob_path[1024];
|
||||
if (file_length == RHIZOME_SIZE_UNSET || file_length > config.rhizome.max_blob_size) {
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id))
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id))
|
||||
return RHIZOME_PAYLOAD_STATUS_ERROR;
|
||||
if (config.debug.externalblobs)
|
||||
DEBUGF("Attempting to put blob for id='%"PRIu64"' in %s", write->temp_id, blob_path);
|
||||
@ -480,7 +480,7 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
|
||||
SHA512_End(&write->sha512_context, NULL);
|
||||
|
||||
char blob_path[1024];
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%"PRIu64, write->temp_id)) {
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id)) {
|
||||
WHYF("Failed to generate external blob path");
|
||||
status = RHIZOME_PAYLOAD_STATUS_ERROR;
|
||||
goto failure;
|
||||
@ -556,10 +556,8 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
|
||||
|
||||
if (external) {
|
||||
char dest_path[1024];
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, alloca_tohex_rhizome_filehash_t(write->id))){
|
||||
WHYF("Failed to generate file path");
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id)))
|
||||
goto dbfailure;
|
||||
}
|
||||
if (rename(blob_path, dest_path) == -1) {
|
||||
WHYF_perror("rename(%s, %s)", blob_path, dest_path);
|
||||
goto dbfailure;
|
||||
@ -785,7 +783,7 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r
|
||||
} else {
|
||||
// No row in FILEBLOBS, look for an external blob file.
|
||||
char blob_path[1024];
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, alloca_tohex_rhizome_filehash_t(read->id)))
|
||||
if (!FORM_RHIZOME_DATASTORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id)))
|
||||
return RHIZOME_PAYLOAD_STATUS_ERROR;
|
||||
read->blob_fd = open(blob_path, O_RDONLY);
|
||||
if (read->blob_fd == -1) {
|
||||
|
@ -357,7 +357,8 @@ setup_CorruptExternalBlob() {
|
||||
executeOk_servald rhizome add file $SIDB1 file1 file1.manifest
|
||||
extract_manifest_id manifestid file1.manifest
|
||||
extract_manifest_filehash filehash file1.manifest
|
||||
echo "Replacement" >$SERVALINSTANCE_PATH/$filehash
|
||||
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash"
|
||||
echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash"
|
||||
}
|
||||
test_CorruptExternalBlob() {
|
||||
execute --exit-status=255 $servald rhizome extract file $manifestid file1a
|
||||
@ -1081,7 +1082,8 @@ setup_ImportOwnBundle() {
|
||||
extract_manifest_filehash filehash fileB.manifest
|
||||
extract_manifest_BK BK fileB.manifest
|
||||
extract_manifest_date date fileB.manifest
|
||||
rm -f $SERVALINSTANCE_PATH/rhizome.db
|
||||
assert [ -e "$SERVALINSTANCE_PATH/rhizome.db" ]
|
||||
rm -f "$SERVALINSTANCE_PATH/rhizome.db"
|
||||
executeOk_servald rhizome list
|
||||
assert_rhizome_list
|
||||
}
|
||||
|
@ -452,17 +452,21 @@ setup_CorruptPayload() {
|
||||
set_instance +A
|
||||
executeOk_servald config set rhizome.max_blob_size 0
|
||||
rhizome_add_file file1 1024
|
||||
create_file file2 1024
|
||||
start_servald_instances +A +B
|
||||
wait_until bundle_received_by $BID:$VERSION +B
|
||||
}
|
||||
test_CorruptPayload() {
|
||||
set_instance +A
|
||||
cp file2 $SERVALINSTANCE_PATH/$FILEHASH
|
||||
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
|
||||
create_file file2 1024
|
||||
cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
|
||||
execute --exit-status=255 $servald rhizome extract file $BID file1a
|
||||
# TODO at the moment, the re-fetch is only triggered by restarting the
|
||||
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch
|
||||
# shoud be automatic and immediate without restarting the daemon.
|
||||
stop_servald_server
|
||||
start_servald_server
|
||||
}
|
||||
test_CorruptPayload() {
|
||||
wait_until grep -i "Stored file $FILEHASH" $LOGA
|
||||
executeOk $servald rhizome extract file $BID file1a
|
||||
}
|
||||
|
||||
doc_MissingPayload="A missing payload should be re-fetched"
|
||||
@ -471,10 +475,10 @@ setup_MissingPayload() {
|
||||
set_instance +A
|
||||
executeOk_servald config set rhizome.max_blob_size 0
|
||||
rhizome_add_file file1 1024
|
||||
assert [ -s "$SERVALINSTANCE_PATH/$FILEHASH" ]
|
||||
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
|
||||
start_servald_instances +A +B
|
||||
wait_until bundle_received_by $BID:$VERSION +B
|
||||
rm $SERVALINSTANCE_PATH/$FILEHASH
|
||||
rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH"
|
||||
execute --exit-status=1 --stderr $servald rhizome extract file $BID file1a
|
||||
# TODO at the moment, the re-fetch is only triggered by restarting the
|
||||
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch
|
||||
|
Loading…
Reference in New Issue
Block a user