2009-02-03 04:09:02 +00:00
|
|
|
|
|
|
|
import os.path
|
|
|
|
import time
|
|
|
|
import urllib
|
|
|
|
import simplejson
|
2009-02-24 18:14:25 +00:00
|
|
|
import datetime
|
2010-02-11 02:43:18 +00:00
|
|
|
from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
|
|
|
|
UnknownAliasError
|
2010-06-07 01:02:15 +00:00
|
|
|
from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
|
2009-02-03 04:09:02 +00:00
|
|
|
from allmydata.util import time_format
|
2009-02-06 02:56:40 +00:00
|
|
|
from allmydata.scripts import backupdb
|
2010-10-29 09:42:23 +00:00
|
|
|
from allmydata.util.encodingutil import listdir_unicode, quote_output, \
|
|
|
|
to_str, FilenameEncodingError, unicode_to_url
|
2010-05-20 00:43:56 +00:00
|
|
|
from allmydata.util.assertutil import precondition
|
2010-07-22 00:14:18 +00:00
|
|
|
from allmydata.util.fileutil import abspath_expanduser_unicode
|
2010-05-20 00:43:56 +00:00
|
|
|
|
2009-02-03 04:09:02 +00:00
|
|
|
|
|
|
|
def get_local_metadata(path):
|
|
|
|
metadata = {}
|
|
|
|
|
|
|
|
# posix stat(2) metadata, depends on the platform
|
|
|
|
os.stat_float_times(True)
|
|
|
|
s = os.stat(path)
|
|
|
|
metadata["ctime"] = s.st_ctime
|
|
|
|
metadata["mtime"] = s.st_mtime
|
|
|
|
|
|
|
|
misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
|
|
|
|
macos_misc_fields = ("st_rsize", "st_creator", "st_type")
|
|
|
|
for field in misc_fields + macos_misc_fields:
|
|
|
|
if hasattr(s, field):
|
|
|
|
metadata[field] = getattr(s, field)
|
|
|
|
|
|
|
|
# TODO: extended attributes, like on OS-X's HFS+
|
|
|
|
return metadata
|
|
|
|
|
|
|
|
def mkdir(contents, options):
|
2009-11-18 19:28:13 +00:00
|
|
|
kids = dict([ (childname, (contents[childname][0],
|
2009-02-03 04:09:02 +00:00
|
|
|
{"ro_uri": contents[childname][1],
|
|
|
|
"metadata": contents[childname][2],
|
|
|
|
}))
|
|
|
|
for childname in contents
|
|
|
|
])
|
2009-11-26 23:42:57 +00:00
|
|
|
body = simplejson.dumps(kids).encode("utf-8")
|
2009-11-18 19:28:13 +00:00
|
|
|
url = options['node-url'] + "uri?t=mkdir-immutable"
|
|
|
|
resp = do_http("POST", url, body)
|
|
|
|
if resp.status < 200 or resp.status >= 300:
|
2010-06-07 01:02:15 +00:00
|
|
|
raise HTTPError("Error during mkdir", resp)
|
|
|
|
|
|
|
|
dircap = to_str(resp.read().strip())
|
2009-02-03 04:09:02 +00:00
|
|
|
return dircap
|
|
|
|
|
|
|
|
def put_child(dirurl, childname, childcap):
|
|
|
|
assert dirurl[-1] == "/"
|
2010-10-16 09:09:22 +00:00
|
|
|
url = dirurl + urllib.quote(unicode_to_url(childname)) + "?t=uri"
|
2009-02-03 04:09:02 +00:00
|
|
|
resp = do_http("PUT", url, childcap)
|
|
|
|
if resp.status not in (200, 201):
|
2010-06-07 01:02:15 +00:00
|
|
|
raise HTTPError("Error during put_child", resp)
|
2009-02-03 04:09:02 +00:00
|
|
|
|
2009-02-22 18:02:52 +00:00
|
|
|
class BackupProcessingError(Exception):
|
|
|
|
pass
|
|
|
|
|
2009-02-06 05:07:01 +00:00
|
|
|
class BackerUpper:
|
|
|
|
def __init__(self, options):
|
|
|
|
self.options = options
|
|
|
|
self.files_uploaded = 0
|
|
|
|
self.files_reused = 0
|
|
|
|
self.files_checked = 0
|
2010-01-20 09:42:49 +00:00
|
|
|
self.files_skipped = 0
|
2009-02-06 05:07:01 +00:00
|
|
|
self.directories_created = 0
|
|
|
|
self.directories_reused = 0
|
|
|
|
self.directories_checked = 0
|
2010-01-20 09:42:49 +00:00
|
|
|
self.directories_skipped = 0
|
2009-02-06 05:07:01 +00:00
|
|
|
|
|
|
|
def run(self):
|
|
|
|
options = self.options
|
|
|
|
nodeurl = options['node-url']
|
|
|
|
self.verbosity = 1
|
|
|
|
if options['quiet']:
|
|
|
|
self.verbosity = 0
|
|
|
|
if options['verbose']:
|
|
|
|
self.verbosity = 2
|
|
|
|
stdout = options.stdout
|
|
|
|
stderr = options.stderr
|
|
|
|
|
2009-02-24 18:14:25 +00:00
|
|
|
start_timestamp = datetime.datetime.now()
|
2009-02-06 05:07:01 +00:00
|
|
|
self.backupdb = None
|
2009-06-04 17:31:31 +00:00
|
|
|
bdbfile = os.path.join(options["node-directory"],
|
|
|
|
"private", "backupdb.sqlite")
|
2010-07-22 00:14:18 +00:00
|
|
|
bdbfile = abspath_expanduser_unicode(bdbfile)
|
2009-06-04 17:31:31 +00:00
|
|
|
self.backupdb = backupdb.get_backupdb(bdbfile, stderr)
|
|
|
|
if not self.backupdb:
|
|
|
|
print >>stderr, "ERROR: Unable to load backup db."
|
|
|
|
return 1
|
2009-02-06 05:07:01 +00:00
|
|
|
|
2010-02-11 02:43:18 +00:00
|
|
|
try:
|
|
|
|
rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
|
|
|
|
except UnknownAliasError, e:
|
2010-06-07 01:02:15 +00:00
|
|
|
e.display(stderr)
|
2010-02-11 02:43:18 +00:00
|
|
|
return 1
|
2009-02-06 05:07:01 +00:00
|
|
|
to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
|
|
|
|
if path:
|
|
|
|
to_url += escape_path(path)
|
|
|
|
if not to_url.endswith("/"):
|
|
|
|
to_url += "/"
|
|
|
|
|
|
|
|
archives_url = to_url + "Archives/"
|
|
|
|
|
|
|
|
# first step: make sure the target directory exists, as well as the
|
|
|
|
# Archives/ subdirectory.
|
|
|
|
resp = do_http("GET", archives_url + "?t=json")
|
|
|
|
if resp.status == 404:
|
|
|
|
resp = do_http("POST", archives_url + "?t=mkdir")
|
|
|
|
if resp.status != 200:
|
2010-06-07 01:02:15 +00:00
|
|
|
print >>stderr, format_http_error("Unable to create target directory", resp)
|
2009-02-06 05:07:01 +00:00
|
|
|
return 1
|
|
|
|
|
2009-11-26 23:42:57 +00:00
|
|
|
# second step: process the tree
|
|
|
|
new_backup_dircap = self.process(options.from_dir)
|
|
|
|
|
|
|
|
# third: attach the new backup to the list
|
2009-02-06 05:07:01 +00:00
|
|
|
now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
|
|
|
|
|
2009-11-26 23:42:57 +00:00
|
|
|
put_child(archives_url, now, new_backup_dircap)
|
|
|
|
put_child(to_url, "Latest", new_backup_dircap)
|
2009-02-24 18:14:25 +00:00
|
|
|
end_timestamp = datetime.datetime.now()
|
|
|
|
# calc elapsed time, omitting microseconds
|
|
|
|
elapsed_time = str(end_timestamp - start_timestamp).split('.')[0]
|
2009-02-06 05:07:01 +00:00
|
|
|
|
|
|
|
if self.verbosity >= 1:
|
|
|
|
print >>stdout, (" %d files uploaded (%d reused), "
|
2010-01-20 09:42:49 +00:00
|
|
|
"%d files skipped, "
|
|
|
|
"%d directories created (%d reused), "
|
|
|
|
"%d directories skipped"
|
2009-02-06 05:07:01 +00:00
|
|
|
% (self.files_uploaded,
|
|
|
|
self.files_reused,
|
2010-01-20 09:42:49 +00:00
|
|
|
self.files_skipped,
|
2009-02-06 05:07:01 +00:00
|
|
|
self.directories_created,
|
2010-01-20 09:42:49 +00:00
|
|
|
self.directories_reused,
|
|
|
|
self.directories_skipped))
|
2009-02-06 05:07:01 +00:00
|
|
|
if self.verbosity >= 2:
|
2009-11-26 23:42:57 +00:00
|
|
|
print >>stdout, (" %d files checked, %d directories checked"
|
2009-02-06 05:07:01 +00:00
|
|
|
% (self.files_checked,
|
2009-11-26 23:42:57 +00:00
|
|
|
self.directories_checked))
|
2009-02-24 18:14:25 +00:00
|
|
|
print >>stdout, " backup done, elapsed time: %s" % elapsed_time
|
2010-01-20 09:42:49 +00:00
|
|
|
|
|
|
|
# The command exits with code 2 if files or directories were skipped
|
|
|
|
if self.files_skipped or self.directories_skipped:
|
|
|
|
return 2
|
|
|
|
|
2009-02-06 05:07:01 +00:00
|
|
|
# done!
|
|
|
|
return 0
|
|
|
|
|
2009-02-03 04:09:02 +00:00
|
|
|
def verboseprint(self, msg):
|
2010-06-07 01:02:15 +00:00
|
|
|
precondition(isinstance(msg, str), msg)
|
2009-02-06 05:07:01 +00:00
|
|
|
if self.verbosity >= 2:
|
2009-02-03 04:09:02 +00:00
|
|
|
print >>self.options.stdout, msg
|
|
|
|
|
2010-01-20 09:42:49 +00:00
|
|
|
def warn(self, msg):
|
2010-06-07 01:02:15 +00:00
|
|
|
precondition(isinstance(msg, str), msg)
|
2010-01-20 09:42:49 +00:00
|
|
|
print >>self.options.stderr, msg
|
|
|
|
|
2009-11-26 23:42:57 +00:00
|
|
|
def process(self, localpath):
|
2010-05-20 00:43:56 +00:00
|
|
|
precondition(isinstance(localpath, unicode), localpath)
|
2009-02-03 04:09:02 +00:00
|
|
|
# returns newdircap
|
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint("processing %s" % quote_output(localpath))
|
2009-11-26 23:42:57 +00:00
|
|
|
create_contents = {} # childname -> (type, rocap, metadata)
|
|
|
|
compare_contents = {} # childname -> rocap
|
2010-01-20 09:42:49 +00:00
|
|
|
|
|
|
|
try:
|
2010-07-18 01:34:35 +00:00
|
|
|
children = listdir_unicode(localpath)
|
2010-01-20 09:42:49 +00:00
|
|
|
except EnvironmentError:
|
|
|
|
self.directories_skipped += 1
|
2010-06-07 01:02:15 +00:00
|
|
|
self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
|
2010-01-20 09:42:49 +00:00
|
|
|
children = []
|
2010-07-18 01:34:35 +00:00
|
|
|
except FilenameEncodingError:
|
2010-07-12 00:30:15 +00:00
|
|
|
self.directories_skipped += 1
|
2010-07-18 01:34:35 +00:00
|
|
|
self.warn("WARNING: could not list directory %s due to a filename encoding error" % quote_output(localpath))
|
2010-07-12 00:30:15 +00:00
|
|
|
children = []
|
2010-01-20 09:42:49 +00:00
|
|
|
|
|
|
|
for child in self.options.filter_listdir(children):
|
2010-06-17 03:39:01 +00:00
|
|
|
assert isinstance(child, unicode), child
|
2009-02-03 04:09:02 +00:00
|
|
|
childpath = os.path.join(localpath, child)
|
2010-01-27 22:35:17 +00:00
|
|
|
# note: symlinks to directories are both islink() and isdir()
|
|
|
|
if os.path.isdir(childpath) and not os.path.islink(childpath):
|
2009-02-03 04:09:02 +00:00
|
|
|
metadata = get_local_metadata(childpath)
|
2009-02-06 05:07:01 +00:00
|
|
|
# recurse on the child directory
|
2009-11-26 23:42:57 +00:00
|
|
|
childcap = self.process(childpath)
|
|
|
|
assert isinstance(childcap, str)
|
|
|
|
create_contents[child] = ("dirnode", childcap, metadata)
|
|
|
|
compare_contents[child] = childcap
|
2010-01-27 22:35:17 +00:00
|
|
|
elif os.path.isfile(childpath) and not os.path.islink(childpath):
|
2010-01-20 09:42:49 +00:00
|
|
|
try:
|
|
|
|
childcap, metadata = self.upload(childpath)
|
|
|
|
assert isinstance(childcap, str)
|
|
|
|
create_contents[child] = ("filenode", childcap, metadata)
|
|
|
|
compare_contents[child] = childcap
|
|
|
|
except EnvironmentError:
|
|
|
|
self.files_skipped += 1
|
2010-06-07 01:02:15 +00:00
|
|
|
self.warn("WARNING: permission denied on file %s" % quote_output(childpath))
|
2009-02-03 04:09:02 +00:00
|
|
|
else:
|
2010-01-20 09:42:49 +00:00
|
|
|
self.files_skipped += 1
|
2010-01-27 22:35:17 +00:00
|
|
|
if os.path.islink(childpath):
|
2010-06-07 01:02:15 +00:00
|
|
|
self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath))
|
2010-01-27 22:35:17 +00:00
|
|
|
else:
|
2010-06-07 01:02:15 +00:00
|
|
|
self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))
|
2009-11-26 23:42:57 +00:00
|
|
|
|
|
|
|
must_create, r = self.check_backupdb_directory(compare_contents)
|
|
|
|
if must_create:
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint(" creating directory for %s" % quote_output(localpath))
|
2009-11-26 23:42:57 +00:00
|
|
|
newdircap = mkdir(create_contents, self.options)
|
|
|
|
assert isinstance(newdircap, str)
|
|
|
|
if r:
|
|
|
|
r.did_create(newdircap)
|
2009-02-06 05:07:01 +00:00
|
|
|
self.directories_created += 1
|
2009-11-26 23:42:57 +00:00
|
|
|
return newdircap
|
|
|
|
else:
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint(" re-using old directory for %s" % quote_output(localpath))
|
2009-11-26 23:42:57 +00:00
|
|
|
self.directories_reused += 1
|
|
|
|
return r.was_created()
|
2009-02-03 04:09:02 +00:00
|
|
|
|
2009-11-26 23:42:57 +00:00
|
|
|
def check_backupdb_file(self, childpath):
|
2009-02-06 05:07:01 +00:00
|
|
|
if not self.backupdb:
|
2009-02-06 02:56:40 +00:00
|
|
|
return True, None
|
|
|
|
use_timestamps = not self.options["ignore-timestamps"]
|
2009-02-06 05:07:01 +00:00
|
|
|
r = self.backupdb.check_file(childpath, use_timestamps)
|
2009-02-06 02:56:40 +00:00
|
|
|
|
|
|
|
if not r.was_uploaded():
|
|
|
|
return True, r
|
|
|
|
|
|
|
|
if not r.should_check():
|
|
|
|
# the file was uploaded or checked recently, so we can just use
|
|
|
|
# it
|
|
|
|
return False, r
|
|
|
|
|
|
|
|
# we must check the file before using the results
|
|
|
|
filecap = r.was_uploaded()
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint("checking %s" % quote_output(filecap))
|
2009-02-06 02:56:40 +00:00
|
|
|
nodeurl = self.options['node-url']
|
|
|
|
checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
|
2009-02-06 05:07:01 +00:00
|
|
|
self.files_checked += 1
|
2009-02-06 02:56:40 +00:00
|
|
|
resp = do_http("POST", checkurl)
|
|
|
|
if resp.status != 200:
|
|
|
|
# can't check, so we must assume it's bad
|
|
|
|
return True, r
|
|
|
|
|
|
|
|
cr = simplejson.loads(resp.read())
|
|
|
|
healthy = cr["results"]["healthy"]
|
|
|
|
if not healthy:
|
|
|
|
# must upload
|
|
|
|
return True, r
|
|
|
|
# file is healthy, no need to upload
|
|
|
|
r.did_check_healthy(cr)
|
|
|
|
return False, r
|
|
|
|
|
2009-11-26 23:42:57 +00:00
|
|
|
def check_backupdb_directory(self, compare_contents):
|
|
|
|
if not self.backupdb:
|
|
|
|
return True, None
|
|
|
|
r = self.backupdb.check_directory(compare_contents)
|
|
|
|
|
|
|
|
if not r.was_created():
|
|
|
|
return True, r
|
|
|
|
|
|
|
|
if not r.should_check():
|
|
|
|
# the file was uploaded or checked recently, so we can just use
|
|
|
|
# it
|
|
|
|
return False, r
|
|
|
|
|
|
|
|
# we must check the directory before re-using it
|
|
|
|
dircap = r.was_created()
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint("checking %s" % quote_output(dircap))
|
2009-11-26 23:42:57 +00:00
|
|
|
nodeurl = self.options['node-url']
|
|
|
|
checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap)
|
|
|
|
self.directories_checked += 1
|
|
|
|
resp = do_http("POST", checkurl)
|
2009-02-06 05:07:01 +00:00
|
|
|
if resp.status != 200:
|
2009-11-26 23:42:57 +00:00
|
|
|
# can't check, so we must assume it's bad
|
|
|
|
return True, r
|
|
|
|
|
|
|
|
cr = simplejson.loads(resp.read())
|
|
|
|
healthy = cr["results"]["healthy"]
|
|
|
|
if not healthy:
|
|
|
|
# must create
|
|
|
|
return True, r
|
|
|
|
# directory is healthy, no need to upload
|
|
|
|
r.did_check_healthy(cr)
|
|
|
|
return False, r
|
2009-02-06 05:07:01 +00:00
|
|
|
|
2010-01-20 09:42:49 +00:00
|
|
|
# This function will raise an IOError exception when called on an unreadable file
|
2009-02-03 04:09:02 +00:00
|
|
|
def upload(self, childpath):
|
2010-05-20 00:43:56 +00:00
|
|
|
precondition(isinstance(childpath, unicode), childpath)
|
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
#self.verboseprint("uploading %s.." % quote_output(childpath))
|
2009-02-06 02:56:40 +00:00
|
|
|
metadata = get_local_metadata(childpath)
|
|
|
|
|
2009-02-03 04:09:02 +00:00
|
|
|
# we can use the backupdb here
|
2009-11-26 23:42:57 +00:00
|
|
|
must_upload, bdb_results = self.check_backupdb_file(childpath)
|
2009-02-03 04:09:02 +00:00
|
|
|
|
2009-02-06 02:56:40 +00:00
|
|
|
if must_upload:
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint("uploading %s.." % quote_output(childpath))
|
2010-07-18 01:34:35 +00:00
|
|
|
infileobj = open(childpath, "rb")
|
2009-02-06 02:56:40 +00:00
|
|
|
url = self.options['node-url'] + "uri"
|
|
|
|
resp = do_http("PUT", url, infileobj)
|
|
|
|
if resp.status not in (200, 201):
|
2010-06-07 01:02:15 +00:00
|
|
|
raise HTTPError("Error during file PUT", resp)
|
|
|
|
|
2009-02-06 02:56:40 +00:00
|
|
|
filecap = resp.read().strip()
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint(" %s -> %s" % (quote_output(childpath, quotemarks=False),
|
|
|
|
quote_output(filecap, quotemarks=False)))
|
|
|
|
#self.verboseprint(" metadata: %s" % (quote_output(metadata, quotemarks=False),))
|
2009-02-06 02:56:40 +00:00
|
|
|
|
|
|
|
if bdb_results:
|
|
|
|
bdb_results.did_upload(filecap)
|
|
|
|
|
2009-02-06 05:07:01 +00:00
|
|
|
self.files_uploaded += 1
|
2009-02-06 02:56:40 +00:00
|
|
|
return filecap, metadata
|
|
|
|
|
|
|
|
else:
|
2010-06-07 01:02:15 +00:00
|
|
|
self.verboseprint("skipping %s.." % quote_output(childpath))
|
2009-02-06 05:07:01 +00:00
|
|
|
self.files_reused += 1
|
2009-02-06 02:56:40 +00:00
|
|
|
return bdb_results.was_uploaded(), metadata
|
2009-02-03 04:09:02 +00:00
|
|
|
|
2009-02-06 05:07:01 +00:00
|
|
|
def backup(options):
|
|
|
|
bu = BackerUpper(options)
|
|
|
|
return bu.run()
|