Rhizome add does not add duplicate files

- If a bundle has the same payload hash and manifest.name as one already in
   the rhizome store, then the "dna rhizome add" command will not add it
 - Now passes fourth test case in tests/dna_rhizome
This commit is contained in:
Andrew Bettison 2012-04-12 18:30:52 +09:30
parent 575ef91c45
commit 7b53fd6782
5 changed files with 133 additions and 44 deletions

View File

@ -159,6 +159,7 @@ int parseCommandLine(int argc, char **args)
if (cli_call<0) return cli_usage();
/* Otherwise, make call */
setVerbosity(confValueGet("debug",""));
return command_line_options[cli_call].function(argc,args, &command_line_options[cli_call]);
}
@ -229,8 +230,6 @@ int app_server_start(int argc,char **argv,struct command_line_option *o)
" You should probably put something in the interfaces setting.\n");
}
setVerbosity(confValueGet("debug",""));
int pid=-1;
int running = servalNodeRunning(&pid);
if (running<0) return -1;
@ -639,17 +638,17 @@ int app_rhizome_add_file(int argc, char **argv, struct command_line_option *o)
* it, otherwise create a blank manifest. */
rhizome_manifest *m = NULL;
if (manifestpath[0]) {
m = rhizome_read_manifest_file(manifestpath, 0, RHIZOME_VERIFY);
m = rhizome_read_manifest_file(manifestpath, 0, 0); // no verify
} else {
m = rhizome_new_manifest();
}
/* Ensure the manifest has a "name" value. Use the file's basename if missing. */
/* Use the file's basename to fill in a missing "name". */
if (rhizome_manifest_get(m, "name", NULL, 0) == NULL) {
const char *name = strrchr(filepath, '/');
name = name ? name + 1 : filepath;
rhizome_manifest_set(m, "name", name);
}
/* Ensure the manifest has a "date" value. Use current time if missing. */
/* Use current time to fill in a missing "date". */
if (rhizome_manifest_get(m, "date", NULL, 0) == NULL) {
rhizome_manifest_set_ll(m, "date", overlay_gettime_ms());
}
@ -658,7 +657,7 @@ int app_rhizome_add_file(int argc, char **argv, struct command_line_option *o)
int ret = rhizome_add_manifest(m, filepath,
NULL, // no groups - XXX should allow them
255, // ttl - XXX should read from somewhere
0, // int verifyP
manifestpath[0] != 0, // int verifyP
1, // int checkFileP
1 // int signP
);

View File

@ -134,6 +134,13 @@ int rhizome_add_manifest(rhizome_manifest *m,
char hexhash[SHA512_DIGEST_STRING_LENGTH];
int verifyErrors = 0;
/* Ensure manifest meets basic sanity checks. */
const char *name = rhizome_manifest_get(m, "name", NULL, 0);
if (name == NULL || !name[0])
return WHY("Manifest missing 'name' field");
if (rhizome_manifest_get_ll(m, "date") == -1)
return WHY("Manifest missing 'date' field");
/* Keep payload file name handy for later */
m->dataFileName = strdup(filename);
@ -148,8 +155,10 @@ int rhizome_add_manifest(rhizome_manifest *m,
return WHY("Could not stat() payload file");
m->fileLength = stat.st_size;
long long mfilesize = rhizome_manifest_get_ll(m, "filesize");
if (mfilesize != -1 && mfilesize != m->fileLength)
if (mfilesize != -1 && mfilesize != m->fileLength) {
WHYF("Manifest.filesize (%lld) != actual file size (%lld)", mfilesize, m->fileLength);
++verifyErrors;
}
}
/* Compute hash of payload unless we know verification has already failed */
@ -163,33 +172,55 @@ int rhizome_add_manifest(rhizome_manifest *m,
/* Check that paylod hash matches manifest */
if (checkFileP) {
const char *mhexhash = rhizome_manifest_get(m, "filehash", NULL, 0);
if (mhexhash && strcmp(hexhash, mhexhash))
if (mhexhash && strcmp(hexhash, mhexhash)) {
WHYF("Manifest.filehash (%s) != actual file hash (%s)", mhexhash, hexhash);
++verifyErrors;
}
}
/* If any signature errors were encountered on loading, or manifest is inconsistent with payload,
then bail out now. */
if (verifyP) {
if (m->errors)
WHYF("Manifest.errors (%d) is non-zero", m->errors);
if (verifyErrors || m->errors)
return WHY("Errors encountered verifying bundle manifest");
}
/* Fill in the manifest to avoid redundant work by rhizome_manifest_finalise() below */
/* Fill in the manifest so that duplicate detection can be performed, and to avoid redundant work
by rhizome_manifest_finalise() below. */
if (checkFileP) {
rhizome_manifest_set(m, "filehash", hexhash);
rhizome_manifest_set_ll(m, "first_byte", 0);
rhizome_manifest_set_ll(m, "last_byte", m->fileLength);
}
/* Check if a manifest is already stored for the same payload with the same details.
This catches the case of "dna rhizome add file <filename>" on the same file more than once.
(Debounce!) */
rhizome_manifest *dupm = NULL;
if (rhizome_find_duplicate(m, &dupm) == -1)
return WHY("Errors encountered searching for duplicate manifest");
if (dupm) {
if (debug & DEBUG_RHIZOME) fprintf(stderr, "Not adding manifest for payload name=\"%s\" hexhash=%s - duplicate found in rhizome store\n", name, hexhash);
#if 0
/* TODO Upgrade the version of the duplicate? */
long long version = rhizome_manifest_get_ll(m, "version");
long long dupversion = rhizome_manifest_get_ll(dupm, "version");
if (version > dupversion) {
rhizome_manifest_set_ll(dupm, "version", version);
...
}
#endif
rhizome_manifest_free(dupm);
return 0;
}
/* Supply manifest version number if missing, so we can do the version check below */
if (rhizome_manifest_get(m, "version", NULL, 0) == NULL) {
rhizome_manifest_set_ll(m, "version", overlay_gettime_ms());
}
/* Check if a manifest is already stored for the same file with the same details, except version
number. This catches the case of "dna rhizome add file <filename>" on the same file more than
once. (Debounce!) */
/* If the manifest already has an ID, look to see if we possess its private key */
if ((id = rhizome_manifest_get(m, "id", NULL, 0))) {
rhizome_hex_to_bytes(id, m->cryptoSignPublic, crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES*2);

View File

@ -166,8 +166,8 @@ int rhizome_drop_stored_file(char *id,int maximum_priority);
int rhizome_manifest_priority(char *id);
rhizome_manifest *rhizome_read_manifest_file(const char *filename, int bufferPAndSize, int flags);
int rhizome_hash_file(const char *filename,char *hash_out);
char *rhizome_manifest_get(rhizome_manifest *m,char *var,char *value_out,int maxlen);
long long rhizome_manifest_get_ll(rhizome_manifest *m,char *var);
char *rhizome_manifest_get(const rhizome_manifest *m, const char *var, char *out, int maxlen);
long long rhizome_manifest_get_ll(rhizome_manifest *m, const char *var);
int rhizome_manifest_set_ll(rhizome_manifest *m,char *var,long long value);
int rhizome_manifest_set(rhizome_manifest *m, const char *var, const char *value);
long long rhizome_file_size(char *filename);
@ -207,6 +207,7 @@ int chartonybl(int c);
int rhizome_manifest_extract_signature(rhizome_manifest *m,int *ofs);
long long sqlite_exec_int64(char *sqlformat,...);
int rhizome_update_file_priority(char *fileid);
int rhizome_find_duplicate(const rhizome_manifest *m, rhizome_manifest **found);
int rhizome_manifest_to_bar(rhizome_manifest *m,unsigned char *bar);
char nybltochar(int n);
int rhizome_queue_manifest_import(rhizome_manifest *m,struct sockaddr_in *peerip);

View File

@ -74,7 +74,7 @@ rhizome_manifest *rhizome_read_manifest_file(const char *filename, int bufferP,
if (sscanf(line,"%[^=]=%[^\n\r]",var,value)==2)
{
if (rhizome_manifest_get(m,var,NULL,0)!=NULL) {
WHY("Error in manifest file (duplicate variable -- keeping first value).");
if (debug&DEBUG_RHIZOME) fprintf(stderr, "Error in manifest file (duplicate variable \"%s\"-- keeping first value)\n", var);
m->errors++;
}
if (m->var_count<MAX_MANIFEST_VARS)
@ -119,7 +119,7 @@ rhizome_manifest *rhizome_read_manifest_file(const char *filename, int bufferP,
char *id=rhizome_manifest_get(m,"id",NULL,0);
if (!id) {
if (debug&DEBUG_RHIZOME) fprintf(stderr,"Manifest lacks id variable.");
if (debug&DEBUG_RHIZOME) fprintf(stderr,"Manifest lacks id variable.\n");
m->errors++; }
else {
unsigned char manifest_bytes[crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES];
@ -139,7 +139,7 @@ rhizome_manifest *rhizome_read_manifest_file(const char *filename, int bufferP,
}
if (debug&DEBUG_RHIZOME)
WHY("Group membership determination not implemented (see which signatories are groups? what about manifests signed by groups we don't yet know about?)");
fprintf(stderr, "Group membership determination not implemented (see which signatories are groups? what about manifests signed by groups we don't yet know about?)\n");
}
m->manifest_bytes=end_of_text;
@ -154,24 +154,22 @@ int rhizome_hash_file(const char *filename,char *hash_out)
and may be very resource constrained. Thus we need a streamable SHA-512
implementation.
*/
FILE *f=fopen(filename,"r");
if (!f) return WHY("Could not open file for reading to calculage SHA512 hash.");
unsigned char buffer[8192];
int r;
FILE *f = fopen(filename, "r");
if (!f)
return WHYF("Could not read %s to calculate SHA512 hash.", filename);
SHA512_CTX context;
SHA512_Init(&context);
while(!feof(f)) {
r=fread(buffer,1,8192,f);
if (r>0) SHA512_Update(&context,buffer,r);
while (!feof(f)) {
unsigned char buffer[8192];
int r = fread(buffer, 1, 8192, f);
if (r > 0)
SHA512_Update(&context, buffer, r);
}
SHA512_End(&context,(char *)hash_out);
SHA512_End(&context, (char *)hash_out);
return 0;
}
char *rhizome_manifest_get(rhizome_manifest *m,char *var,char *out,int maxlen)
char *rhizome_manifest_get(const rhizome_manifest *m, const char *var, char *out, int maxlen)
{
int i,j;
@ -190,15 +188,18 @@ char *rhizome_manifest_get(rhizome_manifest *m,char *var,char *out,int maxlen)
return NULL;
}
long long rhizome_manifest_get_ll(rhizome_manifest *m,char *var)
long long rhizome_manifest_get_ll(rhizome_manifest *m, const char *var)
{
if (!m)
return -1;
int i;
if (!m) return -1;
for(i=0;i<m->var_count;i++)
if (!strcmp(m->vars[i],var))
return strtoll(m->values[i],NULL,10);
for (i = 0;i != m->var_count; ++i)
if (!strcmp(m->vars[i], var)) {
char *vp = m->values[i];
char *ep = vp;
long long val = strtoll(vp, &ep, 10);
return (ep != vp && *ep == '\0') ? val : -1;
}
return -1;
}
@ -380,7 +381,7 @@ int rhizome_manifest_pack_variables(rhizome_manifest *m)
}
m->manifestdata[ofs++]=0x00;
m->manifest_bytes=ofs;
if (debug&DEBUG_RHIZOME) WHY("Repacked variables in manifest.");
if (debug&DEBUG_RHIZOME) fprintf(stderr, "Repacked variables in manifest.\n");
m->manifest_all_bytes=ofs;
/* Recalculate hash */

View File

@ -340,7 +340,7 @@ int rhizome_store_bundle(rhizome_manifest *m, const char *associated_filename)
}
/* Store manifest */
WHY("*** Writing into manifests table");
if (debug & DEBUG_RHIZOME) fprintf(stderr, "Writing into manifests table\n");
snprintf(sqlcmd,1024,
"INSERT INTO MANIFESTS(id,manifest,version,inserttime,bar) VALUES('%s',?,%lld,%lld,?);",
manifestid,m->version,overlay_gettime_ms());
@ -392,8 +392,9 @@ int rhizome_store_bundle(rhizome_manifest *m, const char *associated_filename)
WHY("*** Insert into manifests failed (4).");
return WHY("SQLite3 failed to insert row for manifest");
}
else
WHY("*** Insert into manifests apparently worked.");
else {
if (debug & DEBUG_RHIZOME) fprintf(stderr, "Insert into manifests apparently worked.\n");
}
/* Create relationship between file and manifest */
long long r=sqlite_exec_int64("INSERT INTO FILEMANIFESTS(manifestid,fileid) VALUES('%s','%s');",
@ -539,12 +540,12 @@ int rhizome_list_manifests(int limit, int offset)
break;
}
size_t filesize = sqlite3_column_int(statement, 1);
size_t manifestblobsize = sqlite3_column_bytes(statement, 4);
const char *manifestblob = (char *) sqlite3_column_blob(statement, 4);
size_t manifestblobsize = sqlite3_column_bytes(statement, 4); // must call after sqlite3_column_blob()
//printf("manifest blob = %s\n", manifestblob);
rhizome_manifest *m = rhizome_read_manifest_file(manifestblob, manifestblobsize, RHIZOME_VERIFY);
rhizome_manifest *m = rhizome_read_manifest_file(manifestblob, manifestblobsize, 0);
const char *name = rhizome_manifest_get(m, "name", NULL, 0);
printf("file id = %s\nfile length = %u\nfile datavalid = %u\nfile name = \"%s\"\n",
printf("file id = %s\nfile length = %u\nfile datavalid = %u\nfile name = \"%s\"\n\n",
sqlite3_column_text(statement, 0),
filesize,
sqlite3_column_int(statement, 2),
@ -750,3 +751,59 @@ int rhizome_update_file_priority(char *fileid)
}
return 0;
}
/* Search the database for a manifest having the same name and payload content.
*/
int rhizome_find_duplicate(const rhizome_manifest *m, rhizome_manifest **found)
{
if (!m->fileHashedP)
return WHY("Manifest payload is not hashed");
char sqlcmd[1024];
int n = snprintf(sqlcmd, sizeof(sqlcmd), "SELECT manifests.id, manifests.manifest FROM filemanifests, manifests WHERE filemanifests.fileid = ? AND filemanifests.manifestid = manifests.id");
if (n >= sizeof(sqlcmd))
return WHY("SQL command too long");
int ret = 0;
sqlite3_stmt *statement;
const char *cmdtail;
if (sqlite3_prepare_v2(rhizome_db, sqlcmd, strlen(sqlcmd) + 1, &statement, &cmdtail) != SQLITE_OK) {
sqlite3_finalize(statement);
ret = WHY(sqlite3_errmsg(rhizome_db));
} else {
if (debug & DEBUG_RHIZOME) fprintf(stderr, "fileHaxHash = \"%s\"\n", m->fileHexHash);
sqlite3_bind_text(statement, 1, m->fileHexHash, -1, SQLITE_STATIC);
const char *name = rhizome_manifest_get(m, "name", NULL, 0);
size_t rows = 0;
while (sqlite3_step(statement) == SQLITE_ROW) {
++rows;
if (debug & DEBUG_RHIZOME) fprintf(stderr, "Row %d\n", rows);
if (!( sqlite3_column_count(statement) == 2
&& sqlite3_column_type(statement, 0) == SQLITE_TEXT
&& sqlite3_column_type(statement, 1) == SQLITE_BLOB
)) {
ret = WHY("Incorrect statement columns");
break;
}
const char *manifestid = (char *) sqlite3_column_text(statement, 0);
size_t manifestidsize = sqlite3_column_bytes(statement, 0); // must call after sqlite3_column_text()
if (manifestidsize != crypto_sign_edwards25519sha512batch_PUBLICKEYBYTES * 2) {
ret = WHYF("Malformed manifest.id from query: %s", manifestid);
break;
}
const char *manifestblob = (char *) sqlite3_column_blob(statement, 1);
size_t manifestblobsize = sqlite3_column_bytes(statement, 1); // must call after sqlite3_column_blob()
rhizome_manifest *mq = rhizome_read_manifest_file(manifestblob, manifestblobsize, 0);
const char *nameq = rhizome_manifest_get(mq, "name", NULL, 0);
/* No need to compare "filehash" here, but we do so as a precaution. */
if (debug & DEBUG_RHIZOME) fprintf(stderr, "Consider manifest.id=%s manifest.name=\"%s\"\n", manifestid, nameq);
if ( !strcmp(nameq, name)
&& !strncmp(rhizome_manifest_get(mq, "filehash", NULL, 0), m->fileHexHash, SHA512_DIGEST_STRING_LENGTH)) {
*found = mq;
ret = 1;
break;
}
rhizome_manifest_free(mq);
}
}
sqlite3_finalize(statement);
return ret;
}