Keep rhizome's external blobs in subfolders, migrating each file on first use

This commit is contained in:
Jeremy Lakeman 2018-04-10 08:51:45 +09:30
parent a691a39870
commit 57767e444d
7 changed files with 101 additions and 48 deletions

View File

@ -632,7 +632,6 @@ int rhizome_advertise_manifest(struct subscriber *dest, rhizome_manifest *m);
int rhizome_delete_bundle(const rhizome_bid_t *bidp);
int rhizome_delete_manifest(const rhizome_bid_t *bidp);
int rhizome_delete_payload(const rhizome_bid_t *bidp);
int rhizome_delete_file_id(const char *id);
int rhizome_delete_file(const rhizome_filehash_t *hashp);
#define RHIZOME_DONTVERIFY 0

View File

@ -1304,7 +1304,9 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
"SELECT id FROM FILES WHERE datavalid = 0;", END);
while (sqlite_step_retry(&retry, statement) == SQLITE_ROW) {
const char *id = (const char *) sqlite3_column_text(statement, 0);
if (rhizome_delete_file_id(id)==0 && report)
rhizome_filehash_t filehash;
if (str_to_rhizome_filehash_t(&filehash, id) != -1
&& rhizome_delete_file(&filehash)==0 && report)
++report->deleted_stale_incoming_files;
}
sqlite3_finalize(statement);
@ -1315,7 +1317,9 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report)
INT64, insert_horizon_no_manifest, END);
while (sqlite_step_retry(&retry, statement) == SQLITE_ROW) {
const char *id = (const char *) sqlite3_column_text(statement, 0);
if (rhizome_delete_file_id(id)==0 && report)
rhizome_filehash_t filehash;
if (str_to_rhizome_filehash_t(&filehash, id) != -1
&& rhizome_delete_file(&filehash)==0 && report)
++report->deleted_orphan_files;
}
sqlite3_finalize(statement);

View File

@ -42,6 +42,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
uint64_t rhizome_copy_file_to_blob(int fd, uint64_t id, size_t size);
static int form_store_blob_path(char *buff, size_t buff_size, const char *subdir, const rhizome_filehash_t *hash){
return formf_rhizome_store_path(buff, buff_size, "%s/%02X/%02X/%s", subdir, hash->binary[0], hash->binary[1], alloca_tohex(&hash->binary[2], sizeof(hash->binary)-2));
}
#define FORM_BLOB_PATH(BUFF,SUBDIR,HASH) form_store_blob_path((BUFF),sizeof(BUFF),(SUBDIR),(HASH))
enum rhizome_payload_status rhizome_exists(const rhizome_filehash_t *hashp)
{
sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT;
@ -56,12 +61,22 @@ enum rhizome_payload_status rhizome_exists(const rhizome_filehash_t *hashp)
return RHIZOME_PAYLOAD_STATUS_NEW;
char blob_path[1024];
if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){
if (FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, hashp)){
struct stat st;
if (stat(blob_path, &st) == 0)
return RHIZOME_PAYLOAD_STATUS_STORED;
}
char legacy_path[1024];
// migrate from flat folder to sub-tree's
if (FORMF_RHIZOME_STORE_PATH(legacy_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){
struct stat st;
if (stat(legacy_path, &st) == 0
&& emkdirsn(legacy_path, strrchr(legacy_path,'/') - legacy_path, 0700)!=-1
&& rename(legacy_path, blob_path) != -1)
return RHIZOME_PAYLOAD_STATUS_STORED;
}
uint64_t blob_rowid = 0;
stepcode = sqlite_exec_uint64_retry(&retry, &blob_rowid,
"SELECT rowid "
@ -97,13 +112,13 @@ static uint64_t rhizome_create_fileblob(sqlite_retry_state *retry, uint64_t id,
return rowid;
}
static int rhizome_delete_external(const char *id)
static int rhizome_delete_external(const rhizome_filehash_t *id)
{
// attempt to remove any external blob & partial hash file
char blob_path[1024];
if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_HASH_SUBDIR, id))
if (FORM_BLOB_PATH(blob_path, RHIZOME_HASH_SUBDIR, id))
unlink(blob_path);
if (!FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, id))
if (!FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, id))
return -1;
if (unlink(blob_path) == -1) {
if (errno != ENOENT)
@ -114,14 +129,14 @@ static int rhizome_delete_external(const char *id)
return 0;
}
static int rhizome_delete_file_id_retry(sqlite_retry_state *retry, const char *id)
static int rhizome_delete_file_retry(sqlite_retry_state *retry, const rhizome_filehash_t *filehash)
{
int ret = 0;
rhizome_delete_external(id);
sqlite3_stmt *statement = sqlite_prepare_bind(retry, "DELETE FROM fileblobs WHERE id = ?", STATIC_TEXT, id, END);
rhizome_delete_external(filehash);
sqlite3_stmt *statement = sqlite_prepare_bind(retry, "DELETE FROM fileblobs WHERE id = ?", RHIZOME_FILEHASH_T, filehash, END);
if (!statement || sqlite_exec_retry(retry, statement) == -1)
ret = -1;
statement = sqlite_prepare_bind(retry, "DELETE FROM files WHERE id = ?", STATIC_TEXT, id, END);
statement = sqlite_prepare_bind(retry, "DELETE FROM files WHERE id = ?", RHIZOME_FILEHASH_T, filehash, END);
if (!statement || sqlite_exec_retry(retry, statement) == -1)
ret = -1;
return ret == -1 ? -1 : sqlite3_changes(rhizome_db) ? 0 : 1;
@ -133,8 +148,13 @@ static int rhizome_delete_payload_retry(sqlite_retry_state *retry, const rhizome
int rows = sqlite_exec_strbuf_retry(retry, fh, "SELECT filehash FROM manifests WHERE id = ?", RHIZOME_BID_T, bidp, END);
if (rows == -1)
return -1;
if (rows && rhizome_delete_file_id_retry(retry, strbuf_str(fh)) == -1)
return -1;
if (rows){
rhizome_filehash_t hash;
if (str_to_rhizome_filehash_t(&hash, strbuf_str(fh))==-1)
return -1;
if (rhizome_delete_file_retry(retry, &hash) == -1)
return -1;
}
return 0;
}
@ -153,12 +173,6 @@ int rhizome_delete_payload(const rhizome_bid_t *bidp)
return rhizome_delete_payload_retry(&retry, bidp);
}
int rhizome_delete_file_id(const char *id)
{
sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT;
return rhizome_delete_file_id_retry(&retry, id);
}
/* Remove a file from the database, given its file hash.
*
* Returns 0 if file is found and removed
@ -167,9 +181,10 @@ int rhizome_delete_file_id(const char *id)
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int rhizome_delete_file(const rhizome_filehash_t *hashp)
int rhizome_delete_file(const rhizome_filehash_t *filehash)
{
return rhizome_delete_file_id(alloca_tohex_rhizome_filehash_t(*hashp));
sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT;
return rhizome_delete_file_retry(&retry, filehash);
}
static uint64_t store_get_free_space()
@ -293,7 +308,9 @@ static enum rhizome_payload_status store_make_space(uint64_t bytes, struct rhizo
break;
// drop the existing content and recalculate used space
if (rhizome_delete_external(id)==0)
rhizome_filehash_t hash;
if (str_to_rhizome_filehash_t(&hash, id)!=-1
&& rhizome_delete_external(&hash)==0)
external_bytes -= length;
int rowcount=0;
@ -718,8 +735,10 @@ static int keep_hash(struct rhizome_write *write_state, struct crypto_hash_sha51
{
char dest_path[1024];
// capture the state of writing the file hash
if (!FORMF_RHIZOME_STORE_PATH(dest_path, "%s/%s", RHIZOME_HASH_SUBDIR, alloca_tohex_rhizome_filehash_t(write_state->id)))
if (!FORM_BLOB_PATH(dest_path, RHIZOME_HASH_SUBDIR, &write_state->id))
return WHYF("Path too long?");
if (emkdirsn(dest_path, strrchr(dest_path,'/') - dest_path, 0700)<0)
return -1;
int fd = open(dest_path, O_WRONLY | O_CREAT | O_TRUNC, 0664);
if (fd < 0)
return WHYF_perror("Failed to create %s", dest_path);
@ -861,7 +880,9 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
if (external) {
char dest_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id)))
if (!FORM_BLOB_PATH(dest_path, RHIZOME_BLOB_SUBDIR, &write->id))
goto dbfailure;
if (emkdirsn(dest_path, strrchr(dest_path,'/') - dest_path, 0700)<0)
goto dbfailure;
if (rename(blob_path, dest_path) == -1) {
WHYF_perror("rename(%s, %s)", blob_path, dest_path);
@ -1093,9 +1114,22 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r
crypto_hash_sha512_init(&read->sha512_context);
char blob_path[1024];
if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id))){
if (FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, &read->id)){
int fd = open(blob_path, O_RDONLY);
DEBUGF(rhizome_store, "open(%s) = %d", alloca_str_toprint(blob_path), fd);
if (fd == -1 && errno == ENOENT){
char legacy_path[1024];
// migrate from flat folder to sub-tree's
if (FORMF_RHIZOME_STORE_PATH(legacy_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){
struct stat st;
if (stat(legacy_path, &st) == 0
&& emkdirsn(legacy_path, strrchr(legacy_path,'/') - legacy_path, 0700)!=-1
&& rename(legacy_path, blob_path) != -1)
fd = open(blob_path, O_RDONLY);
}
}
if (fd == -1){
if (errno!=ENOENT)
WHYF_perror("open(%s)", alloca_str_toprint(blob_path));
@ -1670,9 +1704,8 @@ static int append_existing_journal_file(struct rhizome_write *write, const rhizo
// Try to append directly into the previous journal file, linking them together
DEBUGF(rhizome, "Attempting to append into journal blob");
// First, we need to read a previous partial hash state
char *filehash = alloca_tohex_rhizome_filehash_t(*hashp);
char existing_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(existing_path, "%s/%s", RHIZOME_BLOB_SUBDIR, filehash))
if (!FORM_BLOB_PATH(existing_path, RHIZOME_BLOB_SUBDIR, hashp))
return WHYF("existing path too long?");
int payloadfd = open(existing_path, O_RDWR, 0664);
@ -1692,7 +1725,7 @@ static int append_existing_journal_file(struct rhizome_write *write, const rhizo
}
char hash_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(hash_path, "%s/%s", RHIZOME_HASH_SUBDIR, filehash)){
if (!FORM_BLOB_PATH(hash_path, RHIZOME_HASH_SUBDIR, hashp)){
close(payloadfd);
return WHYF("hash path too long?");
}

View File

@ -232,6 +232,15 @@ strip_signatures() {
done
}
get_external_blob_path(){
local _var="$1"
local _hash="$2"
local _filepath="$SERVALINSTANCE_PATH/blob/${_hash:0:2}/${_hash:2:2}/${_hash:4}"
if [ -n "$_var" ]; then
eval "$_var=\$_filepath"
fi
}
extract_stdout_manifestid() {
extract_stdout_keyvalue "$1" manifestid "$rexp_manifestid"
}
@ -617,7 +626,8 @@ rhizome_add_bundles() {
rhizome_delete_payload_blobs() {
local filehash
for filehash; do
assert --message="Rhizome external blob file exists, filehash=$filehash" [ -e "$SERVALINSTANCE_PATH/blob/$filehash" ]
rm -f "$SERVALINSTANCE_PATH/blob/$filehash"
get_external_blob_path blob_path "$filehash"
assert --message="Rhizome external blob file exists, filehash=$filehash" [ -e "$blob_path" ]
rm -f "$blob_path"
done
}

View File

@ -481,8 +481,9 @@ setup_CorruptExternalBlob() {
executeOk_servald rhizome add file "$SIDA" file1 file1.manifest
extract_manifest_id manifestid file1.manifest
extract_manifest_filehash filehash file1.manifest
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash"
echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash"
get_external_blob_path blob_file "$filehash"
assert cmp file1 "$blob_file"
echo "Replacement" >"$blob_file"
}
test_CorruptExternalBlob() {
execute --exit-status=255 "$servald" rhizome extract file "$manifestid" file1a
@ -1111,10 +1112,10 @@ test_JournalAppendNoHash() {
assert_stdout_add_file file1
extract_stdout_manifestid BID
extract_stdout_filehash HASH
assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 1 ]
assert [ $(find "$SERVALINSTANCE_PATH/hash" -type f| wc -l) -eq 1 ]
executeOk_servald rhizome journal append "$SIDA" "$BID" file2
tfw_cat --stdout --stderr
assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 2 ]
assert [ $(find "$SERVALINSTANCE_PATH/hash" -type f| wc -l) -eq 2 ]
assertStderrGrep 'Reusing journal'
executeOk_servald rhizome extract file "$BID" filex
tfw_cat --stdout --stderr
@ -1136,11 +1137,13 @@ setup_JournalAppendSharedPayload() {
cat file1 file2 >file12
executeOk_servald rhizome add file '' file1
extract_stdout_filehash HASH1
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$HASH1"
get_external_blob_path blob_file1 "$HASH1"
assert cmp file1 "$blob_file1"
executeOk_servald rhizome add file '' file12
extract_stdout_filehash HASH12
assert cmp file12 "$SERVALINSTANCE_PATH/blob/$HASH12"
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
get_external_blob_path blob_file12 "$HASH12"
assert cmp file12 "$blob_file12"
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ]
}
test_JournalAppendSharedPayload() {
executeOk_servald rhizome journal append "$SIDA" "" file1
@ -1148,14 +1151,14 @@ test_JournalAppendSharedPayload() {
assert_stdout_add_file file1
extract_stdout_filehash addedhash
assert [ "$addedhash" = "$HASH1" ]
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ]
extract_stdout_manifestid BID
executeOk_servald rhizome journal append "$SIDA" "$BID" file2
tfw_cat --stdout --stderr
assert_stdout_add_file file12 !name
extract_stdout_filehash addedhash
assert [ "$addedhash" = "$HASH12" ]
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ]
}
doc_JournalAddCreate="Cannot create a journal using file add"

View File

@ -480,10 +480,11 @@ setup_CorruptPayload() {
wait_until bundle_received_by "$BID:$VERSION" +B
set_instance +A
stop_servald_server
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
get_external_blob_path blob_file "$FILEHASH"
assert cmp file1 "$blob_file"
create_file file2 1024
assert --error-on-fail ! cmp file1 file2
cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
cp file2 "$blob_file"
execute --exit-status=255 "$servald" rhizome extract file "$BID" file1a
# TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch
@ -503,12 +504,13 @@ setup_MissingPayload() {
set rhizome.max_blob_size 0 \
set debug.rhizome_store 1
rhizome_add_file file1 1024
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH"
get_external_blob_path blob_file "$FILEHASH"
assert cmp file1 "$blob_file"
start_servald_instances +A +B
wait_until bundle_received_by "$BID:$VERSION" +B
set_instance +A
stop_servald_server
rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH"
rm -f "$blob_file"
execute --exit-status=1 --stderr "$servald" rhizome extract file "$BID" file1a
# TODO at the moment, the re-fetch is only triggered by restarting the
# daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch

View File

@ -1097,11 +1097,13 @@ setup_RhizomeJournalAppendSharedPayload() {
cat file1 file2 >file12
executeOk_servald rhizome add file '' file1
extract_stdout_filehash HASH1
assert cmp file1 "$SERVALINSTANCE_PATH/blob/$HASH1"
get_external_blob_path blob_path1 "$HASH1"
assert cmp file1 "$blob_path1"
executeOk_servald rhizome add file '' file12
extract_stdout_filehash HASH12
assert cmp file12 "$SERVALINSTANCE_PATH/blob/$HASH12"
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
get_external_blob_path blob_path12 "$HASH12"
assert cmp file12 "$blob_path12"
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ]
}
test_RhizomeJournalAppendSharedPayload() {
rest_request POST "/restful/rhizome/append" 201 \
@ -1115,7 +1117,7 @@ test_RhizomeJournalAppendSharedPayload() {
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Code: 2$CR\$"
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$"
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$"
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ]
extract_http_header BID response.headers Serval-Rhizome-Bundle-Id "$rexp_manifestid"
rest_request POST "/restful/rhizome/append" 201 \
--form-part="bundle-id=$BID;type=rhizome/bid;format=hex" \
@ -1127,7 +1129,7 @@ test_RhizomeJournalAppendSharedPayload() {
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Bundle-Status-Message: .*bundle new to store.*$CR\$"
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Code: 2$CR\$"
assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$"
assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ]
assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ]
}
doc_RhizomeAppendNonJournalForbidden="REST API Rhizome cannot append to non-journal"