Link multiple versions of a journal payload to the same file

This commit is contained in:
Jeremy Lakeman 2016-05-24 14:46:46 +09:30
parent 4a4e4c2dcf
commit 89ab832c21
4 changed files with 153 additions and 43 deletions

View File

@ -117,40 +117,40 @@ typedef struct rhizome_manifest
* appended. All fields below may not be valid until the manifest has been
* finalised.
*/
bool_t finalised;
bool_t finalised:1;
/* Whether the manifest contains a signature that corresponds to the manifest
* id (ie public key).
*/
bool_t selfSigned;
bool_t selfSigned:1;
/* Set if the ID field (cryptoSignPublic) contains a bundle ID.
*/
bool_t has_id;
bool_t has_id:1;
/* Set if the filehash field contains a file hash.
*/
bool_t has_filehash;
bool_t has_filehash:1;
/* Set if the tail field is valid, ie, the bundle is a journal.
*/
bool_t is_journal;
bool_t is_journal:1;
/* Set if the date field is valid, ie, the manifest contains a valid "date"
* field.
*/
bool_t has_date;
bool_t has_date:1;
/* Set if the bundle_key field is valid, ie, the manifest contains a valid
* "BK" field.
*/
bool_t has_bundle_key;
bool_t has_bundle_key:1;
/* Set if the sender and recipient fields are valid, ie, the manifest
* contains a valid "sender"/"recipient" field.
*/
bool_t has_sender;
bool_t has_recipient;
bool_t has_sender:1;
bool_t has_recipient:1;
/* Local authorship. Useful for dividing bundle lists between "sent" and
* "inbox" views.
@ -215,11 +215,6 @@ typedef struct rhizome_manifest
*/
sid_t author;
/* Unused. SHOULD BE DELETED.
*/
unsigned group_count;
char *groups[MAX_MANIFEST_VARS];
size_t manifest_body_bytes;
size_t manifest_all_bytes;
unsigned char manifestdata[MAX_MANIFEST_BYTES];
@ -341,6 +336,7 @@ int rhizome_enabled();
int rhizome_fetch_delay_ms();
#define RHIZOME_BLOB_SUBDIR "blob"
#define RHIZOME_HASH_SUBDIR "hash"
extern __thread sqlite3 *rhizome_db;
serval_uuid_t rhizome_db_uuid;
@ -724,9 +720,10 @@ struct rhizome_write
sqlite3_blob *sql_blob;
rhizome_filehash_t id;
uint8_t id_known;
uint8_t crypt;
uint8_t id_known:1;
uint8_t crypt:1;
uint8_t journal:1;
unsigned char key[RHIZOME_CRYPT_KEY_BYTES];
unsigned char nonce[crypto_box_NONCEBYTES];
};

View File

@ -197,6 +197,10 @@ int rhizome_opendb()
RETURN(-1);
if (emkdirs_info(dbpath, 0700) == -1)
RETURN(-1);
if (!FORMF_RHIZOME_STORE_PATH(dbpath, RHIZOME_HASH_SUBDIR))
RETURN(-1);
if (emkdirs_info(dbpath, 0700) == -1)
RETURN(-1);
if (!sqlite3_temp_directory) {
if (!FORMF_RHIZOME_STORE_PATH(dbpath, "sqlite3tmp"))
RETURN(-1);

View File

@ -85,8 +85,10 @@ static uint64_t rhizome_create_fileblob(sqlite_retry_state *retry, uint64_t id,
static int rhizome_delete_external(const char *id)
{
// attempt to remove any external blob
// attempt to remove any external blob & partial hash file
char blob_path[1024];
if (FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_HASH_SUBDIR, id))
unlink(blob_path);
if (!FORMF_RHIZOME_STORE_PATH(blob_path, "%s/%s", RHIZOME_BLOB_SUBDIR, id))
return -1;
if (unlink(blob_path) == -1) {
@ -673,6 +675,23 @@ void rhizome_fail_write(struct rhizome_write *write)
}
}
static int keep_hash(struct rhizome_write *write_state, struct crypto_hash_sha512_state *hash_state)
{
char dest_path[1024];
// capture the state of writing the file hash
if (!FORMF_RHIZOME_STORE_PATH(dest_path, "%s/%s", RHIZOME_HASH_SUBDIR, alloca_tohex_rhizome_filehash_t(write_state->id)))
return WHYF("Path too long?");
int fd = open(dest_path, O_WRONLY | O_CREAT | O_TRUNC, 0664);
if (fd < 0)
return WHYF_perror("Failed to create %s", dest_path);
ssize_t w = write(fd, hash_state, sizeof *hash_state);
close(fd);
if (w != sizeof *hash_state)
return WHYF("Failed to write hash state");
DEBUGF(rhizome, "Preserved partial hash to %s", dest_path);
return 1;
}
enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
{
DEBUGF(rhizome_store, "blob_fd=%d file_offset=%"PRIu64"", write->blob_fd, write->file_offset);
@ -717,7 +736,11 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
DEBUGF(rhizome_store, "Ignoring empty write");
goto failure;
}
struct crypto_hash_sha512_state hash_state;
if (write->journal)
bcopy(&write->sha512_context, &hash_state, sizeof hash_state);
rhizome_filehash_t hash_out;
crypto_hash_sha512_final(&write->sha512_context, hash_out.binary);
@ -809,6 +832,8 @@ enum rhizome_payload_status rhizome_finish_write(struct rhizome_write *write)
goto dbfailure;
}
DEBUGF(rhizome_store, "Renamed %s to %s", blob_path, dest_path);
if (write->journal)
keep_hash(write, &hash_state);
}else{
if (sqlite_exec_void_retry(
&retry,
@ -1554,6 +1579,54 @@ enum rhizome_payload_status rhizome_journal_pipe(struct rhizome_write *write, co
return status;
}
static int append_existing_journal_file(struct rhizome_write *write, rhizome_manifest *m){
// Try to append directly into the previous journal file, linking them together
DEBUGF(rhizome, "Attempting to append into journal blob");
// First, we need to read a previous partial hash state
char *filehash = alloca_tohex_rhizome_filehash_t(m->filehash);
char existing_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(existing_path, "%s/%s", RHIZOME_BLOB_SUBDIR, filehash))
return WHYF("existing path too long?");
char hash_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(hash_path, "%s/%s", RHIZOME_HASH_SUBDIR, filehash))
return WHYF("hash path too long?");
int fd = open(hash_path, O_RDONLY);
if (fd < 0){
if (errno != ENOENT)
WHYF_perror("Failed to open partial hash state %s", hash_path);
return -1;
}
struct crypto_hash_sha512_state hash_state;
ssize_t r = read(fd, &hash_state, sizeof hash_state);
close(fd);
if (r != sizeof hash_state)
return WHYF("Expected %u bytes", (unsigned)sizeof hash_state);
char new_path[1024];
if (!FORMF_RHIZOME_STORE_PATH(new_path, "%s/%"PRIu64, RHIZOME_BLOB_SUBDIR, write->temp_id))
return WHYF("Temp path too long?");
if (link(existing_path, new_path)==-1)
return WHYF_perror("Failed to link journal payloads together");
fd = open(new_path, O_RDWR, 0664);
if (fd<0)
return WHYF_perror("Failed to open new journal file");
// (write_data always seeks so we don't have to)
write->written_offset = write->file_offset = m->filesize;
write->blob_fd = fd;
bcopy(&hash_state, &write->sha512_context, sizeof hash_state);
// Used by tests
DEBUGF(rhizome,"Reusing journal payload file, keeping %"PRIu64" existing bytes", m->filesize);
return 1;
}
// open an existing journal bundle, advance the head pointer, duplicate the existing content and get ready to add more.
enum rhizome_payload_status rhizome_write_open_journal(struct rhizome_write *write, rhizome_manifest *m, uint64_t advance_by, uint64_t append_size)
{
@ -1568,33 +1641,43 @@ enum rhizome_payload_status rhizome_write_open_journal(struct rhizome_write *wri
}
if (advance_by > 0)
rhizome_manifest_set_tail(m, m->tail + advance_by);
enum rhizome_payload_status status = rhizome_open_write(write, NULL, new_filesize);
DEBUGF(rhizome, "rhizome_open_write() returned %d %s", status, rhizome_payload_status_message(status));
if (status == RHIZOME_PAYLOAD_STATUS_NEW && copy_length > 0) {
// we don't need to bother decrypting the existing journal payload
enum rhizome_payload_status rstatus = rhizome_journal_pipe(write, &m->filehash, advance_by, copy_length);
DEBUGF(rhizome, "rhizome_journal_pipe() returned %d %s", rstatus, rhizome_payload_status_message(rstatus));
int rstatus_valid = 0;
switch (rstatus) {
case RHIZOME_PAYLOAD_STATUS_EMPTY:
case RHIZOME_PAYLOAD_STATUS_NEW:
case RHIZOME_PAYLOAD_STATUS_STORED:
rstatus_valid = 1;
break;
case RHIZOME_PAYLOAD_STATUS_ERROR:
case RHIZOME_PAYLOAD_STATUS_TOO_BIG:
rstatus_valid = 1;
status = rstatus;
break;
case RHIZOME_PAYLOAD_STATUS_WRONG_SIZE:
case RHIZOME_PAYLOAD_STATUS_WRONG_HASH:
case RHIZOME_PAYLOAD_STATUS_CRYPTO_FAIL:
case RHIZOME_PAYLOAD_STATUS_EVICTED:
// rhizome_journal_pipe() should not return any of these codes
FATALF("rhizome_journal_pipe() returned %d %s", rstatus, rhizome_payload_status_message(rstatus));
if (status == RHIZOME_PAYLOAD_STATUS_NEW) {
write->journal=1;
if (copy_length > 0 && advance_by == 0){
if (append_existing_journal_file(write, m)!=-1)
copy_length = 0;
}
if (copy_length > 0){
// we don't need to bother decrypting the existing journal payload
enum rhizome_payload_status rstatus = rhizome_journal_pipe(write, &m->filehash, advance_by, copy_length);
DEBUGF(rhizome, "rhizome_journal_pipe() returned %d %s", rstatus, rhizome_payload_status_message(rstatus));
int rstatus_valid = 0;
switch (rstatus) {
case RHIZOME_PAYLOAD_STATUS_EMPTY:
case RHIZOME_PAYLOAD_STATUS_NEW:
case RHIZOME_PAYLOAD_STATUS_STORED:
rstatus_valid = 1;
break;
case RHIZOME_PAYLOAD_STATUS_ERROR:
case RHIZOME_PAYLOAD_STATUS_TOO_BIG:
rstatus_valid = 1;
status = rstatus;
break;
case RHIZOME_PAYLOAD_STATUS_WRONG_SIZE:
case RHIZOME_PAYLOAD_STATUS_WRONG_HASH:
case RHIZOME_PAYLOAD_STATUS_CRYPTO_FAIL:
case RHIZOME_PAYLOAD_STATUS_EVICTED:
// rhizome_journal_pipe() should not return any of these codes
FATALF("rhizome_journal_pipe() returned %d %s", rstatus, rhizome_payload_status_message(rstatus));
}
if (!rstatus_valid)
FATALF("rstatus = %d", rstatus);
}
if (!rstatus_valid)
FATALF("rstatus = %d", rstatus);
}
if (status == RHIZOME_PAYLOAD_STATUS_NEW) {
status = rhizome_write_derive_key(m, write);

View File

@ -1050,6 +1050,32 @@ test_JournalAppend() {
assert diff file filex
}
doc_JournalAppendNoHash="Appending to a journal should not rehash"
setup_JournalAppendNoHash() {
setup_servald
setup_rhizome
executeOk_servald config \
set rhizome.max_blob_size 0 \
set debug.rhizome on
echo "Part One" > file1
echo "Part Two" > file2
cat file1 file2 > file
}
test_JournalAppendNoHash() {
executeOk_servald rhizome journal append $SIDA "" file1
tfw_cat --stdout --stderr
assert_stdout_add_file file1
extract_stdout_manifestid BID
assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 1 ]
executeOk_servald rhizome journal append $SIDA $BID file2
tfw_cat --stdout --stderr
assert [ $(ls "$SERVALINSTANCE_PATH/hash" | wc -l) -eq 2 ]
assertStderrGrep 'Reusing journal'
executeOk_servald rhizome extract file $BID filex
tfw_cat --stdout --stderr
assert diff file filex
}
doc_JournalAppendSharedPayload="Journal append produces a shared payload"
setup_JournalAppendSharedPayload() {
setup_servald