diff --git a/rhizome.h b/rhizome.h index 6c71740e..cf629004 100644 --- a/rhizome.h +++ b/rhizome.h @@ -632,7 +632,6 @@ int rhizome_advertise_manifest(struct subscriber *dest, rhizome_manifest *m); int rhizome_delete_bundle(const rhizome_bid_t *bidp); int rhizome_delete_manifest(const rhizome_bid_t *bidp); int rhizome_delete_payload(const rhizome_bid_t *bidp); -int rhizome_delete_file_id(const char *id); int rhizome_delete_file(const rhizome_filehash_t *hashp); #define RHIZOME_DONTVERIFY 0 diff --git a/rhizome_database.c b/rhizome_database.c index 9af5c723..3d486045 100644 --- a/rhizome_database.c +++ b/rhizome_database.c @@ -1304,7 +1304,9 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) "SELECT id FROM FILES WHERE datavalid = 0;", END); while (sqlite_step_retry(&retry, statement) == SQLITE_ROW) { const char *id = (const char *) sqlite3_column_text(statement, 0); - if (rhizome_delete_file_id(id)==0 && report) + rhizome_filehash_t filehash; + if (str_to_rhizome_filehash_t(&filehash, id) != -1 + && rhizome_delete_file(&filehash)==0 && report) ++report->deleted_stale_incoming_files; } sqlite3_finalize(statement); @@ -1315,7 +1317,9 @@ int rhizome_cleanup(struct rhizome_cleanup_report *report) INT64, insert_horizon_no_manifest, END); while (sqlite_step_retry(&retry, statement) == SQLITE_ROW) { const char *id = (const char *) sqlite3_column_text(statement, 0); - if (rhizome_delete_file_id(id)==0 && report) + rhizome_filehash_t filehash; + if (str_to_rhizome_filehash_t(&filehash, id) != -1 + && rhizome_delete_file(&filehash)==0 && report) ++report->deleted_orphan_files; } sqlite3_finalize(statement); diff --git a/rhizome_store.c b/rhizome_store.c index b0bb14a5..63d1fada 100644 --- a/rhizome_store.c +++ b/rhizome_store.c @@ -42,6 +42,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. uint64_t rhizome_copy_file_to_blob(int fd, uint64_t id, size_t size); +static int form_store_blob_path(char *buff, size_t buff_size, const char *subdir, const rhizome_filehash_t *hash){ + return formf_rhizome_store_path(buff, buff_size, "%s/%02X/%02X/%s", subdir, hash->binary[0], hash->binary[1], alloca_tohex(&hash->binary[2], sizeof(hash->binary)-2)); +} +#define FORM_BLOB_PATH(BUFF,SUBDIR,HASH) form_store_blob_path((BUFF),sizeof(BUFF),(SUBDIR),(HASH)) + enum rhizome_payload_status rhizome_exists(const rhizome_filehash_t *hashp) { sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT; @@ -56,12 +61,22 @@ enum rhizome_payload_status rhizome_exists(const rhizome_filehash_t *hashp) return RHIZOME_PAYLOAD_STATUS_NEW; char blob_path[1024]; - if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){ + if (FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, hashp)){ struct stat st; if (stat(blob_path, &st) == 0) return RHIZOME_PAYLOAD_STATUS_STORED; } + char legacy_path[1024]; + // migrate from flat folder to sub-tree's + if (FORMF_RHIZOME_STORE_PATH(legacy_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){ + struct stat st; + if (stat(legacy_path, &st) == 0 + && emkdirsn(legacy_path, strrchr(legacy_path,'/') - legacy_path, 0700)!=-1 + && rename(legacy_path, blob_path) != -1) + return RHIZOME_PAYLOAD_STATUS_STORED; + } + uint64_t blob_rowid = 0; stepcode = sqlite_exec_uint64_retry(&retry, &blob_rowid, "SELECT rowid " @@ -97,13 +112,13 @@ static uint64_t rhizome_create_fileblob(sqlite_retry_state *retry, uint64_t id, return rowid; } -static int rhizome_delete_external(const char *id) +static int rhizome_delete_external(const rhizome_filehash_t *id) { // attempt to remove any external blob & partial hash file char blob_path[1024]; - if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_HASH_SUBDIR, id)) + if (FORM_BLOB_PATH(blob_path, RHIZOME_HASH_SUBDIR, id)) unlink(blob_path); - if (!FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, id)) + if (!FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, id)) return -1; if (unlink(blob_path) == -1) { if (errno != ENOENT) @@ -114,14 +129,14 @@ static int rhizome_delete_external(const char *id) return 0; } -static int rhizome_delete_file_id_retry(sqlite_retry_state *retry, const char *id) +static int rhizome_delete_file_retry(sqlite_retry_state *retry, const rhizome_filehash_t *filehash) { int ret = 0; - rhizome_delete_external(id); - sqlite3_stmt *statement = sqlite_prepare_bind(retry, "DELETE FROM fileblobs WHERE id = ?", STATIC_TEXT, id, END); + rhizome_delete_external(filehash); + sqlite3_stmt *statement = sqlite_prepare_bind(retry, "DELETE FROM fileblobs WHERE id = ?", RHIZOME_FILEHASH_T, filehash, END); if (!statement || sqlite_exec_retry(retry, statement) == -1) ret = -1; - statement = sqlite_prepare_bind(retry, "DELETE FROM files WHERE id = ?", STATIC_TEXT, id, END); + statement = sqlite_prepare_bind(retry, "DELETE FROM files WHERE id = ?", RHIZOME_FILEHASH_T, filehash, END); if (!statement || sqlite_exec_retry(retry, statement) == -1) ret = -1; return ret == -1 ? -1 : sqlite3_changes(rhizome_db) ? 0 : 1; @@ -133,8 +148,13 @@ static int rhizome_delete_payload_retry(sqlite_retry_state *retry, const rhizome int rows = sqlite_exec_strbuf_retry(retry, fh, "SELECT filehash FROM manifests WHERE id = ?", RHIZOME_BID_T, bidp, END); if (rows == -1) return -1; - if (rows && rhizome_delete_file_id_retry(retry, strbuf_str(fh)) == -1) - return -1; + if (rows){ + rhizome_filehash_t hash; + if (str_to_rhizome_filehash_t(&hash, strbuf_str(fh))==-1) + return -1; + if (rhizome_delete_file_retry(retry, &hash) == -1) + return -1; + } return 0; } @@ -153,12 +173,6 @@ int rhizome_delete_payload(const rhizome_bid_t *bidp) return rhizome_delete_payload_retry(&retry, bidp); } -int rhizome_delete_file_id(const char *id) -{ - sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT; - return rhizome_delete_file_id_retry(&retry, id); -} - /* Remove a file from the database, given its file hash. * * Returns 0 if file is found and removed @@ -167,9 +181,10 @@ int rhizome_delete_file_id(const char *id) * * @author Andrew Bettison */ -int rhizome_delete_file(const rhizome_filehash_t *hashp) +int rhizome_delete_file(const rhizome_filehash_t *filehash) { - return rhizome_delete_file_id(alloca_tohex_rhizome_filehash_t(*hashp)); + sqlite_retry_state retry = SQLITE_RETRY_STATE_DEFAULT; + return rhizome_delete_file_retry(&retry, filehash); } static uint64_t store_get_free_space() @@ -293,7 +308,9 @@ static enum rhizome_payload_status store_make_space(uint64_t bytes, struct rhizo break; // drop the existing content and recalculate used space - if (rhizome_delete_external(id)==0) + rhizome_filehash_t hash; + if (str_to_rhizome_filehash_t(&hash, id)!=-1 + && rhizome_delete_external(&hash)==0) external_bytes -= length; int rowcount=0; @@ -718,8 +735,10 @@ static int keep_hash(struct rhizome_write *write_state, struct crypto_hash_sha51 { char dest_path[1024]; // capture the state of writing the file hash - if (!FORMF_RHIZOME_STORE_PATH(dest_path, "%s/%s", RHIZOME_HASH_SUBDIR, alloca_tohex_rhizome_filehash_t(write_state->id))) + if (!FORM_BLOB_PATH(dest_path, RHIZOME_HASH_SUBDIR, &write_state->id)) return WHYF("Path too long?"); + if (emkdirsn(dest_path, strrchr(dest_path,'/') - dest_path, 0700)<0) + return -1; int fd = open(dest_path, O_WRONLY | O_CREAT | O_TRUNC, 0664); if (fd < 0) return WHYF_perror("Failed to create %s", dest_path); @@ -861,7 +880,9 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write) if (external) { char dest_path[1024]; - if (!FORMF_RHIZOME_STORE_PATH(dest_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(write->id))) + if (!FORM_BLOB_PATH(dest_path, RHIZOME_BLOB_SUBDIR, &write->id)) + goto dbfailure; + if (emkdirsn(dest_path, strrchr(dest_path,'/') - dest_path, 0700)<0) goto dbfailure; if (rename(blob_path, dest_path) == -1) { WHYF_perror("rename(%s, %s)", blob_path, dest_path); @@ -1093,9 +1114,22 @@ enum rhizome_payload_status rhizome_open_read(struct rhizome_read *read, const r crypto_hash_sha512_init(&read->sha512_context); char blob_path[1024]; - if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(read->id))){ + if (FORM_BLOB_PATH(blob_path, RHIZOME_BLOB_SUBDIR, &read->id)){ int fd = open(blob_path, O_RDONLY); DEBUGF(rhizome_store, "open(%s) = %d", alloca_str_toprint(blob_path), fd); + + if (fd == -1 && errno == ENOENT){ + char legacy_path[1024]; + // migrate from flat folder to sub-tree's + if (FORMF_RHIZOME_STORE_PATH(legacy_path, "%s/%s", RHIZOME_BLOB_SUBDIR, alloca_tohex_rhizome_filehash_t(*hashp))){ + struct stat st; + if (stat(legacy_path, &st) == 0 + && emkdirsn(legacy_path, strrchr(legacy_path,'/') - legacy_path, 0700)!=-1 + && rename(legacy_path, blob_path) != -1) + fd = open(blob_path, O_RDONLY); + } + } + if (fd == -1){ if (errno!=ENOENT) WHYF_perror("open(%s)", alloca_str_toprint(blob_path)); @@ -1670,9 +1704,8 @@ static int append_existing_journal_file(struct rhizome_write *write, const rhizo // Try to append directly into the previous journal file, linking them together DEBUGF(rhizome, "Attempting to append into journal blob"); // First, we need to read a previous partial hash state - char *filehash = alloca_tohex_rhizome_filehash_t(*hashp); char existing_path[1024]; - if (!FORMF_RHIZOME_STORE_PATH(existing_path, "%s/%s", RHIZOME_BLOB_SUBDIR, filehash)) + if (!FORM_BLOB_PATH(existing_path, RHIZOME_BLOB_SUBDIR, hashp)) return WHYF("existing path too long?"); int payloadfd = open(existing_path, O_RDWR, 0664); @@ -1692,7 +1725,7 @@ static int append_existing_journal_file(struct rhizome_write *write, const rhizo } char hash_path[1024]; - if (!FORMF_RHIZOME_STORE_PATH(hash_path, "%s/%s", RHIZOME_HASH_SUBDIR, filehash)){ + if (!FORM_BLOB_PATH(hash_path, RHIZOME_HASH_SUBDIR, hashp)){ close(payloadfd); return WHYF("hash path too long?"); } diff --git a/testdefs_rhizome.sh b/testdefs_rhizome.sh index 28ccd8fe..4c140e15 100644 --- a/testdefs_rhizome.sh +++ b/testdefs_rhizome.sh @@ -232,6 +232,15 @@ strip_signatures() { done } +get_external_blob_path(){ + local _var="$1" + local _hash="$2" + local _filepath="$SERVALINSTANCE_PATH/blob/${_hash:0:2}/${_hash:2:2}/${_hash:4}" + if [ -n "$_var" ]; then + eval "$_var=\$_filepath" + fi +} + extract_stdout_manifestid() { extract_stdout_keyvalue "$1" manifestid "$rexp_manifestid" } @@ -617,7 +626,8 @@ rhizome_add_bundles() { rhizome_delete_payload_blobs() { local filehash for filehash; do - assert --message="Rhizome external blob file exists, filehash=$filehash" [ -e "$SERVALINSTANCE_PATH/blob/$filehash" ] - rm -f "$SERVALINSTANCE_PATH/blob/$filehash" + get_external_blob_path blob_path "$filehash" + assert --message="Rhizome external blob file exists, filehash=$filehash" [ -e "$blob_path" ] + rm -f "$blob_path" done } diff --git a/tests/rhizomeops b/tests/rhizomeops index fbd079e1..993fc825 100755 --- a/tests/rhizomeops +++ b/tests/rhizomeops @@ -481,8 +481,9 @@ setup_CorruptExternalBlob() { executeOk_servald rhizome add file "$SIDA" file1 file1.manifest extract_manifest_id manifestid file1.manifest extract_manifest_filehash filehash file1.manifest - assert cmp file1 "$SERVALINSTANCE_PATH/blob/$filehash" - echo "Replacement" >"$SERVALINSTANCE_PATH/blob/$filehash" + get_external_blob_path blob_file "$filehash" + assert cmp file1 "$blob_file" + echo "Replacement" >"$blob_file" } test_CorruptExternalBlob() { execute --exit-status=255 "$servald" rhizome extract file "$manifestid" file1a @@ -1111,10 +1112,10 @@ test_JournalAppendNoHash() { assert_stdout_add_file file1 extract_stdout_manifestid BID extract_stdout_filehash HASH - assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 1 ] + assert [ $(find "$SERVALINSTANCE_PATH/hash" -type f| wc -l) -eq 1 ] executeOk_servald rhizome journal append "$SIDA" "$BID" file2 tfw_cat --stdout --stderr - assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 2 ] + assert [ $(find "$SERVALINSTANCE_PATH/hash" -type f| wc -l) -eq 2 ] assertStderrGrep 'Reusing journal' executeOk_servald rhizome extract file "$BID" filex tfw_cat --stdout --stderr @@ -1136,11 +1137,13 @@ setup_JournalAppendSharedPayload() { cat file1 file2 >file12 executeOk_servald rhizome add file '' file1 extract_stdout_filehash HASH1 - assert cmp file1 "$SERVALINSTANCE_PATH/blob/$HASH1" + get_external_blob_path blob_file1 "$HASH1" + assert cmp file1 "$blob_file1" executeOk_servald rhizome add file '' file12 extract_stdout_filehash HASH12 - assert cmp file12 "$SERVALINSTANCE_PATH/blob/$HASH12" - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + get_external_blob_path blob_file12 "$HASH12" + assert cmp file12 "$blob_file12" + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ] } test_JournalAppendSharedPayload() { executeOk_servald rhizome journal append "$SIDA" "" file1 @@ -1148,14 +1151,14 @@ test_JournalAppendSharedPayload() { assert_stdout_add_file file1 extract_stdout_filehash addedhash assert [ "$addedhash" = "$HASH1" ] - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ] extract_stdout_manifestid BID executeOk_servald rhizome journal append "$SIDA" "$BID" file2 tfw_cat --stdout --stderr assert_stdout_add_file file12 !name extract_stdout_filehash addedhash assert [ "$addedhash" = "$HASH12" ] - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f| wc -l) -eq 2 ] } doc_JournalAddCreate="Cannot create a journal using file add" diff --git a/tests/rhizomeprotocol b/tests/rhizomeprotocol index 2866a962..7540ee6c 100755 --- a/tests/rhizomeprotocol +++ b/tests/rhizomeprotocol @@ -480,10 +480,11 @@ setup_CorruptPayload() { wait_until bundle_received_by "$BID:$VERSION" +B set_instance +A stop_servald_server - assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH" + get_external_blob_path blob_file "$FILEHASH" + assert cmp file1 "$blob_file" create_file file2 1024 assert --error-on-fail ! cmp file1 file2 - cp file2 "$SERVALINSTANCE_PATH/blob/$FILEHASH" + cp file2 "$blob_file" execute --exit-status=255 "$servald" rhizome extract file "$BID" file1a # TODO at the moment, the re-fetch is only triggered by restarting the # daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch @@ -503,12 +504,13 @@ setup_MissingPayload() { set rhizome.max_blob_size 0 \ set debug.rhizome_store 1 rhizome_add_file file1 1024 - assert cmp file1 "$SERVALINSTANCE_PATH/blob/$FILEHASH" + get_external_blob_path blob_file "$FILEHASH" + assert cmp file1 "$blob_file" start_servald_instances +A +B wait_until bundle_received_by "$BID:$VERSION" +B set_instance +A stop_servald_server - rm -f "$SERVALINSTANCE_PATH/blob/$FILEHASH" + rm -f "$blob_file" execute --exit-status=1 --stderr "$servald" rhizome extract file "$BID" file1a # TODO at the moment, the re-fetch is only triggered by restarting the # daemon. Eventually (when the Rhizome Rank is implemented), the re-fetch diff --git a/tests/rhizomerestful b/tests/rhizomerestful index 7a162a7d..ffa3e9f9 100755 --- a/tests/rhizomerestful +++ b/tests/rhizomerestful @@ -1097,11 +1097,13 @@ setup_RhizomeJournalAppendSharedPayload() { cat file1 file2 >file12 executeOk_servald rhizome add file '' file1 extract_stdout_filehash HASH1 - assert cmp file1 "$SERVALINSTANCE_PATH/blob/$HASH1" + get_external_blob_path blob_path1 "$HASH1" + assert cmp file1 "$blob_path1" executeOk_servald rhizome add file '' file12 extract_stdout_filehash HASH12 - assert cmp file12 "$SERVALINSTANCE_PATH/blob/$HASH12" - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + get_external_blob_path blob_path12 "$HASH12" + assert cmp file12 "$blob_path12" + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ] } test_RhizomeJournalAppendSharedPayload() { rest_request POST "/restful/rhizome/append" 201 \ @@ -1115,7 +1117,7 @@ test_RhizomeJournalAppendSharedPayload() { assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Code: 2$CR\$" assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$" assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$" - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ] extract_http_header BID response.headers Serval-Rhizome-Bundle-Id "$rexp_manifestid" rest_request POST "/restful/rhizome/append" 201 \ --form-part="bundle-id=$BID;type=rhizome/bid;format=hex" \ @@ -1127,7 +1129,7 @@ test_RhizomeJournalAppendSharedPayload() { assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Bundle-Status-Message: .*bundle new to store.*$CR\$" assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Code: 2$CR\$" assertGrep --matches=1 --ignore-case response.headers "^Serval-Rhizome-Result-Payload-Status-Message: .*payload already in store.*$CR\$" - assert [ $(ls "$SERVALINSTANCE_PATH/blob" | wc -l) -eq 2 ] + assert [ $(find "$SERVALINSTANCE_PATH/blob" -type f | wc -l) -eq 2 ] } doc_RhizomeAppendNonJournalForbidden="REST API Rhizome cannot append to non-journal"