Fix handling of correctly encoded unicode filenames (#534)

Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
backup', have been improved to correctly handle filenames containing non-ASCII
characters.
  
In the case where Tahoe encounters a filename which cannot be decoded using the
system encoding, an error will be returned and the operation will fail.  Under
Linux, this typically happens when the filesystem contains filenames encoded
with another encoding, for instance latin1, than the system locale, for
instance UTF-8.  In such case, you'll need to fix your system with tools such
as 'convmv' before using Tahoe CLI.
  
All CLI commands have been improved to support non-ASCII parameters such as
filenames and aliases on all supported Operating Systems except Windows as of
now.
This commit is contained in:
Francois Deppierraz 2010-05-19 17:43:56 -07:00
parent d0ed14e1bb
commit 496802420e
11 changed files with 277 additions and 65 deletions

21
NEWS
View File

@ -1,5 +1,26 @@
User visible changes in Tahoe-LAFS. -*- outline -*-
* Release 1.7.0
** Bugfixes
*** Unicode filenames handling
Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe
backup', have been improved to correctly handle filenames containing non-ASCII
characters.
In the case where Tahoe encounters a filename which cannot be decoded using the
system encoding, an error will be returned and the operation will fail. Under
Linux, this typically happens when the filesystem contains filenames encoded
with another encoding, for instance latin1, than the system locale, for
instance UTF-8. In such case, you'll need to fix your system with tools such
as 'convmv' before using Tahoe CLI.
All CLI commands have been improved to support non-ASCII parameters such as
filenames and aliases on all supported Operating Systems except Windows as of
now.
* Release 1.6.1 (2010-02-27)
** Bugfixes

View File

@ -136,13 +136,13 @@ starting directory provides a different, possibly overlapping
perspective on the graph of files and directories.
Each tahoe node remembers a list of starting points, named "aliases",
in a file named ~/.tahoe/private/aliases . These aliases are short
strings that stand in for a directory read- or write- cap. If you use
the command line "ls" without any "[STARTING_DIR]:" argument, then it
will use the default alias, which is "tahoe", therefore "tahoe ls" has
the same effect as "tahoe ls tahoe:". The same goes for the other
commands which can reasonably use a default alias: get, put, mkdir,
mv, and rm.
in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8
encoded strings that stand in for a directory read- or write- cap. If
you use the command line "ls" without any "[STARTING_DIR]:" argument,
then it will use the default alias, which is "tahoe", therefore "tahoe
ls" has the same effect as "tahoe ls tahoe:". The same goes for the
other commands which can reasonably use a default alias: get, put,
mkdir, mv, and rm.
For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not
found in ~/.tahoe/private/aliases, the CLI will use the contents of

View File

@ -1,6 +1,7 @@
import os.path, re, sys, fnmatch
from twisted.python import usage
from allmydata.scripts.common import BaseOptions, get_aliases
from allmydata.util.stringutils import argv_to_unicode
NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
@ -49,12 +50,12 @@ class VDriveOptions(BaseOptions, usage.Options):
class MakeDirectoryOptions(VDriveOptions):
def parseArgs(self, where=""):
self.where = where
self.where = argv_to_unicode(where)
longdesc = """Create a new directory, either unlinked or as a subdirectory."""
class AddAliasOptions(VDriveOptions):
def parseArgs(self, alias, cap):
self.alias = alias
self.alias = argv_to_unicode(alias)
self.cap = cap
def getSynopsis(self):
@ -64,7 +65,7 @@ class AddAliasOptions(VDriveOptions):
class CreateAliasOptions(VDriveOptions):
def parseArgs(self, alias):
self.alias = alias
self.alias = argv_to_unicode(alias)
def getSynopsis(self):
return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),)
@ -83,7 +84,7 @@ class ListOptions(VDriveOptions):
("json", None, "Show the raw JSON output"),
]
def parseArgs(self, where=""):
self.where = where
self.where = argv_to_unicode(where)
longdesc = """
List the contents of some portion of the grid.
@ -118,8 +119,13 @@ class GetOptions(VDriveOptions):
# tahoe get FOO bar # write to local file
# tahoe get tahoe:FOO bar # same
self.from_file = arg1
self.to_file = arg2
self.from_file = argv_to_unicode(arg1)
if arg2:
self.to_file = argv_to_unicode(arg2)
else:
self.to_file = None
if self.to_file == "-":
self.to_file = None
@ -151,15 +157,15 @@ class PutOptions(VDriveOptions):
# see Examples below
if arg1 is not None and arg2 is not None:
self.from_file = arg1
self.to_file = arg2
self.from_file = argv_to_unicode(arg1)
self.to_file = argv_to_unicode(arg2)
elif arg1 is not None and arg2 is None:
self.from_file = arg1 # might be "-"
self.from_file = argv_to_unicode(arg1) # might be "-"
self.to_file = None
else:
self.from_file = None
self.to_file = None
if self.from_file == "-":
if self.from_file == u"-":
self.from_file = None
def getSynopsis(self):
@ -197,8 +203,8 @@ class CpOptions(VDriveOptions):
def parseArgs(self, *args):
if len(args) < 2:
raise usage.UsageError("cp requires at least two arguments")
self.sources = args[:-1]
self.destination = args[-1]
self.sources = map(argv_to_unicode, args[:-1])
self.destination = argv_to_unicode(args[-1])
def getSynopsis(self):
return "Usage: tahoe [options] cp FROM.. TO"
longdesc = """
@ -228,15 +234,15 @@ class CpOptions(VDriveOptions):
class RmOptions(VDriveOptions):
def parseArgs(self, where):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),)
class MvOptions(VDriveOptions):
def parseArgs(self, frompath, topath):
self.from_file = frompath
self.to_file = topath
self.from_file = argv_to_unicode(frompath)
self.to_file = argv_to_unicode(topath)
def getSynopsis(self):
return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),)
@ -254,8 +260,8 @@ class MvOptions(VDriveOptions):
class LnOptions(VDriveOptions):
def parseArgs(self, frompath, topath):
self.from_file = frompath
self.to_file = topath
self.from_file = argv_to_unicode(frompath)
self.to_file = argv_to_unicode(topath)
def getSynopsis(self):
return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),)
@ -279,8 +285,8 @@ class BackupOptions(VDriveOptions):
self['exclude'] = set()
def parseArgs(self, localdir, topath):
self.from_dir = localdir
self.to_dir = topath
self.from_dir = argv_to_unicode(localdir)
self.to_dir = argv_to_unicode(topath)
def getSynopsis(Self):
return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0])
@ -337,7 +343,7 @@ class WebopenOptions(VDriveOptions):
("info", "i", "Open the t=info page for the file"),
]
def parseArgs(self, where=''):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@ -354,7 +360,7 @@ class ManifestOptions(VDriveOptions):
("raw", "r", "Display raw JSON data instead of parsed"),
]
def parseArgs(self, where=''):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@ -367,7 +373,7 @@ class StatsOptions(VDriveOptions):
("raw", "r", "Display raw JSON data instead of parsed"),
]
def parseArgs(self, where=''):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@ -383,7 +389,7 @@ class CheckOptions(VDriveOptions):
("add-lease", None, "Add/renew lease on all shares"),
]
def parseArgs(self, where=''):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)
@ -402,7 +408,7 @@ class DeepCheckOptions(VDriveOptions):
("verbose", "v", "Be noisy about what is happening."),
]
def parseArgs(self, where=''):
self.where = where
self.where = argv_to_unicode(where)
def getSynopsis(self):
return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),)

View File

@ -1,7 +1,9 @@
import os, sys, urllib
import codecs
from twisted.python import usage
from allmydata.util.stringutils import unicode_to_url
from allmydata.util.assertutil import precondition
class BaseOptions:
# unit tests can override these to point at StringIO instances
@ -100,14 +102,14 @@ def get_aliases(nodedir):
except EnvironmentError:
pass
try:
f = open(aliasfile, "r")
f = codecs.open(aliasfile, "r", "utf-8")
for line in f.readlines():
line = line.strip()
if line.startswith("#") or not line:
continue
name, cap = line.split(":", 1)
# normalize it: remove http: prefix, urldecode
cap = cap.strip()
cap = cap.strip().encode('utf-8')
aliases[name] = uri.from_string_dirnode(cap).to_string()
except EnvironmentError:
pass
@ -138,7 +140,7 @@ def get_alias(aliases, path, default):
# and default is not found in aliases, an UnknownAliasError is
# raised.
path = path.strip()
if uri.has_uri_prefix(path):
if uri.has_uri_prefix(path.encode('utf-8')):
# We used to require "URI:blah:./foo" in order to get a subpath,
# stripping out the ":./" sequence. We still allow that for compatibility,
# but now also allow just "URI:blah/foo".
@ -180,4 +182,4 @@ def get_alias(aliases, path, default):
def escape_path(path):
segments = path.split("/")
return "/".join([urllib.quote(s) for s in segments])
return "/".join([urllib.quote(unicode_to_url(s)) for s in segments])

View File

@ -1,16 +1,20 @@
import os.path
import codecs
import sys
from allmydata import uri
from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_aliases
from allmydata.util.fileutil import move_into_place
from allmydata.util.stringutils import unicode_to_stdout
def add_line_to_aliasfile(aliasfile, alias, cap):
# we use os.path.exists, rather than catching EnvironmentError, to avoid
# clobbering the valuable alias file in case of spurious or transient
# filesystem errors.
if os.path.exists(aliasfile):
f = open(aliasfile, "r")
f = codecs.open(aliasfile, "r", "utf-8")
aliases = f.read()
f.close()
if not aliases.endswith("\n"):
@ -18,7 +22,7 @@ def add_line_to_aliasfile(aliasfile, alias, cap):
else:
aliases = ""
aliases += "%s: %s\n" % (alias, cap)
f = open(aliasfile+".tmp", "w")
f = codecs.open(aliasfile+".tmp", "w", "utf-8")
f.write(aliases)
f.close()
move_into_place(aliasfile+".tmp", aliasfile)
@ -41,7 +45,7 @@ def add_alias(options):
add_line_to_aliasfile(aliasfile, alias, cap)
print >>stdout, "Alias '%s' added" % (alias,)
print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
return 0
def create_alias(options):
@ -74,7 +78,7 @@ def create_alias(options):
add_line_to_aliasfile(aliasfile, alias, new_uri)
print >>stdout, "Alias '%s' created" % (alias,)
print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
return 0
def list_aliases(options):

View File

@ -9,6 +9,11 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
from allmydata.scripts.common_http import do_http
from allmydata.util import time_format
from allmydata.scripts import backupdb
import sys
from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
from allmydata.util.assertutil import precondition
from twisted.python import usage
class HTTPError(Exception):
pass
@ -154,12 +159,16 @@ class BackerUpper:
def verboseprint(self, msg):
if self.verbosity >= 2:
if isinstance(msg, unicode):
msg = unicode_to_stdout(msg)
print >>self.options.stdout, msg
def warn(self, msg):
print >>self.options.stderr, msg
def process(self, localpath):
precondition(isinstance(localpath, unicode), localpath)
# returns newdircap
self.verboseprint("processing %s" % localpath)
@ -167,7 +176,7 @@ class BackerUpper:
compare_contents = {} # childname -> rocap
try:
children = os.listdir(localpath)
children = listdir_unicode(localpath)
except EnvironmentError:
self.directories_skipped += 1
self.warn("WARNING: permission denied on directory %s" % localpath)
@ -283,6 +292,8 @@ class BackerUpper:
# This function will raise an IOError exception when called on an unreadable file
def upload(self, childpath):
precondition(isinstance(childpath, unicode), childpath)
#self.verboseprint("uploading %s.." % childpath)
metadata = get_local_metadata(childpath)
@ -291,7 +302,7 @@ class BackerUpper:
if must_upload:
self.verboseprint("uploading %s.." % childpath)
infileobj = open(os.path.expanduser(childpath), "rb")
infileobj = open_unicode(os.path.expanduser(childpath), "rb")
url = self.options['node-url'] + "uri"
resp = do_http("PUT", url, infileobj)
if resp.status not in (200, 201):

View File

@ -2,12 +2,17 @@
import os.path
import urllib
import simplejson
import sys
from cStringIO import StringIO
from twisted.python.failure import Failure
from allmydata.scripts.common import get_alias, escape_path, \
DefaultAliasMarker, UnknownAliasError
from allmydata.scripts.common_http import do_http
from allmydata import uri
from twisted.python import usage
from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
from allmydata.util.assertutil import precondition
def ascii_or_none(s):
if s is None:
@ -70,6 +75,7 @@ def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
class LocalFileSource:
def __init__(self, pathname):
precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def need_to_copy_bytes(self):
@ -80,6 +86,7 @@ class LocalFileSource:
class LocalFileTarget:
def __init__(self, pathname):
precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def put_file(self, inf):
outf = open(self.pathname, "wb")
@ -92,6 +99,7 @@ class LocalFileTarget:
class LocalMissingTarget:
def __init__(self, pathname):
precondition(isinstance(pathname, unicode), pathname)
self.pathname = pathname
def put_file(self, inf):
@ -105,6 +113,8 @@ class LocalMissingTarget:
class LocalDirectorySource:
def __init__(self, progressfunc, pathname):
precondition(isinstance(pathname, unicode), pathname)
self.progressfunc = progressfunc
self.pathname = pathname
self.children = None
@ -113,7 +123,7 @@ class LocalDirectorySource:
if self.children is not None:
return
self.children = {}
children = os.listdir(self.pathname)
children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
@ -130,6 +140,8 @@ class LocalDirectorySource:
class LocalDirectoryTarget:
def __init__(self, progressfunc, pathname):
precondition(isinstance(pathname, unicode), pathname)
self.progressfunc = progressfunc
self.pathname = pathname
self.children = None
@ -138,7 +150,7 @@ class LocalDirectoryTarget:
if self.children is not None:
return
self.children = {}
children = os.listdir(self.pathname)
children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
@ -161,8 +173,9 @@ class LocalDirectoryTarget:
return LocalDirectoryTarget(self.progressfunc, pathname)
def put_file(self, name, inf):
precondition(isinstance(name, unicode), name)
pathname = os.path.join(self.pathname, name)
outf = open(pathname, "wb")
outf = open_unicode(pathname, "wb")
while True:
data = inf.read(32768)
if not data:
@ -355,7 +368,7 @@ class TahoeDirectoryTarget:
if self.writecap:
url = self.nodeurl + "/".join(["uri",
urllib.quote(self.writecap),
urllib.quote(name.encode('utf-8'))])
urllib.quote(unicode_to_url(name))])
self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
writecap, readcap, url)
elif data[0] == "dirnode":

View File

@ -4,6 +4,7 @@ import simplejson
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError
from allmydata.scripts.common_http import do_http
from allmydata.util.stringutils import unicode_to_stdout
def list(options):
nodeurl = options['node-url']
@ -130,7 +131,7 @@ def list(options):
line.append(ctime_s)
if not options["classify"]:
classify = ""
line.append(name + classify)
line.append(unicode_to_stdout(name) + classify)
if options["uri"]:
line.append(uri)
if options["readonly-uri"]:

View File

@ -85,7 +85,7 @@ class ManifestStreamer(LineOnlyReceiver):
try:
print >>stdout, d["cap"], "/".join(d["path"])
except UnicodeEncodeError:
print >>stdout, d["cap"], "/".join([p.encode("utf-8")
print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
for p in d["path"]])
def manifest(options):

View File

@ -2,6 +2,7 @@
import urllib
from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
from allmydata.util.stringutils import unicode_to_url
def mkdir(options):
nodeurl = options['node-url']
@ -35,7 +36,7 @@ def mkdir(options):
path = path[:-1]
# path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
urllib.quote(path))
urllib.quote(unicode_to_url(path)))
resp = do_http("POST", url)
check_http_error(resp, stderr)
new_uri = resp.read().strip()

View File

@ -6,6 +6,7 @@ from cStringIO import StringIO
import urllib
import re
import simplejson
import sys
from allmydata.util import fileutil, hashutil, base32
from allmydata import uri
@ -26,6 +27,9 @@ from allmydata.test.no_network import GridTestMixin
from twisted.internet import threads # CLI tests use deferToThread
from twisted.python import usage
from allmydata.util.stringutils import listdir_unicode, open_unicode, \
unicode_platform, FilenameEncodingError
timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
@ -284,7 +288,7 @@ class CLI(unittest.TestCase):
"work": "WA",
"c": "CA"}
def ga1(path):
return get_alias(aliases, path, "tahoe")
return get_alias(aliases, path, u"tahoe")
uses_lettercolon = common.platform_uses_lettercolon_drivename()
self.failUnlessEqual(ga1("bare"), ("TA", "bare"))
self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file"))
@ -379,7 +383,7 @@ class CLI(unittest.TestCase):
# default set to something that isn't in the aliases argument should
# raise an UnknownAliasError.
def ga4(path):
return get_alias(aliases, path, "badddefault:")
return get_alias(aliases, path, u"badddefault:")
self.failUnlessRaises(common.UnknownAliasError, ga4, "afile")
self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/")
@ -387,12 +391,44 @@ class CLI(unittest.TestCase):
old = common.pretend_platform_uses_lettercolon
try:
common.pretend_platform_uses_lettercolon = True
retval = get_alias(aliases, path, "baddefault:")
retval = get_alias(aliases, path, u"baddefault:")
finally:
common.pretend_platform_uses_lettercolon = old
return retval
self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows")
def test_listdir_unicode_good(self):
basedir = u"cli/common/listdir_unicode_good"
fileutil.make_dirs(basedir)
files = (u'Lôzane', u'Bern', u'Genève')
for file in files:
open(os.path.join(basedir, file), "w").close()
for file in listdir_unicode(basedir):
self.failUnlessEqual(file in files, True)
def test_listdir_unicode_bad(self):
if unicode_platform():
raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.")
basedir = u"cli/common/listdir_unicode_bad"
fileutil.make_dirs(basedir)
files = (u'Lôzane', u'Bern', u'Genève')
# We use a wrong encoding on purpose
if sys.getfilesystemencoding() == 'UTF-8':
encoding = 'latin1'
else:
encoding = 'UTF-8'
for file in files:
path = os.path.join(basedir, file).encode(encoding)
open(path, "w").close()
self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir)
class Help(unittest.TestCase):
@ -592,8 +628,73 @@ class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase):
self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:"))
d.addCallback(_check_not_corrupted)
return d
def test_create_unicode(self):
if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
if sys.stdout.encoding not in ('UTF-8'):
raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
self.basedir = "cli/CreateAlias/create_unicode"
self.set_up_grid()
aliasfile = os.path.join(self.get_clientdir(), "private", "aliases")
d = self.do_cli("create-alias", "études")
def _check_create_unicode((rc,stdout,stderr)):
self.failUnlessEqual(rc, 0)
self.failIf(stderr)
# If stdout only supports ascii, accentuated characters are
# being replaced by '?'
if sys.stdout.encoding == "ANSI_X3.4-1968":
self.failUnless("Alias '?tudes' created" in stdout)
else:
self.failUnless("Alias 'études' created" in stdout)
aliases = get_aliases(self.get_clientdir())
self.failUnless(aliases[u"études"].startswith("URI:DIR2:"))
d.addCallback(_check_create_unicode)
d.addCallback(lambda res: self.do_cli("ls", "études:"))
def _check_ls1((rc, stdout, stderr)):
self.failUnlessEqual(rc, 0)
self.failIf(stderr)
self.failUnlessEqual(stdout, "")
d.addCallback(_check_ls1)
d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt",
stdin="Blah blah blah"))
d.addCallback(lambda res: self.do_cli("ls", "études:"))
def _check_ls2((rc, stdout, stderr)):
self.failUnlessEqual(rc, 0)
self.failIf(stderr)
self.failUnlessEqual(stdout, "uploaded.txt\n")
d.addCallback(_check_ls2)
d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt"))
def _check_get((rc, stdout, stderr)):
self.failUnlessEqual(rc, 0)
self.failIf(stderr)
self.failUnlessEqual(stdout, "Blah blah blah")
d.addCallback(_check_get)
# Ensure that an Unicode filename in an Unicode alias works as expected
d.addCallback(lambda res: self.do_cli("put", "-", "études:lumière.txt",
stdin="Let the sunshine In!"))
d.addCallback(lambda res: self.do_cli("get",
get_aliases(self.get_clientdir())[u"études"] + "/lumière.txt"))
def _check_get((rc, stdout, stderr)):
self.failUnlessEqual(rc, 0)
self.failIf(stderr)
self.failUnlessEqual(stdout, "Let the sunshine In!")
d.addCallback(_check_get)
return d
class Ln(GridTestMixin, CLITestMixin, unittest.TestCase):
def _create_test_file(self):
@ -865,6 +966,40 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
return d
def test_immutable_from_file_unicode(self):
if sys.stdout.encoding not in ('UTF-8'):
raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
# tahoe put file.txt "à trier.txt"
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid()
rel_fn = os.path.join(self.basedir, "DATAFILE")
abs_fn = os.path.abspath(rel_fn)
# we make the file small enough to fit in a LIT file, for speed
DATA = "short file"
f = open(rel_fn, "w")
f.write(DATA)
f.close()
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res:
self.do_cli("put", rel_fn, "à trier.txt"))
def _uploaded((rc,stdout,stderr)):
readcap = stdout.strip()
self.failUnless(readcap.startswith("URI:LIT:"))
self.failUnless("201 Created" in stderr, stderr)
self.readcap = readcap
d.addCallback(_uploaded)
d.addCallback(lambda res:
self.do_cli("get", "tahoe:à trier.txt"))
d.addCallback(lambda (rc,stdout,stderr):
self.failUnlessEqual(stdout, DATA))
return d
class List(GridTestMixin, CLITestMixin, unittest.TestCase):
def test_list(self):
self.basedir = "cli/List/list"
@ -1146,32 +1281,39 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase):
o.parseOptions, ["onearg"])
def test_unicode_filename(self):
if sys.getfilesystemencoding() not in ('UTF-8', 'mbcs'):
raise unittest.SkipTest("Arbitrary filenames are not supported by this platform")
if sys.stdout.encoding not in ('UTF-8'):
raise unittest.SkipTest("Arbitrary command-line arguments (argv) are not supported by this platform")
self.basedir = "cli/Cp/unicode_filename"
self.set_up_grid()
d = self.do_cli("create-alias", "tahoe")
fn1 = os.path.join(self.basedir, "Ärtonwall")
# Use unicode strings when calling os functions
fn1 = os.path.join(self.basedir, u"Ärtonwall")
DATA1 = "unicode file content"
fileutil.write(fn1, DATA1)
fn2 = os.path.join(self.basedir, "Metallica")
DATA2 = "non-unicode file content"
fileutil.write(fn2, DATA2)
# Bug #534
# Assure that uploading a file whose name contains unicode character
# doesn't prevent further uploads in the same directory
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:"))
d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:"))
d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:"))
d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall"))
d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1))
fn2 = os.path.join(self.basedir, u"Metallica")
DATA2 = "non-unicode file content"
fileutil.write(fn2, DATA2)
d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:"))
d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica"))
d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2))
d.addCallback(lambda res: self.do_cli("ls", "tahoe:"))
d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, "Metallica\nÄrtonwall\n"))
return d
test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms. See issue ticket #534."
def test_dangling_symlink_vs_recursion(self):
if not hasattr(os, 'symlink'):
@ -1278,6 +1420,17 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase):
return d
class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase):
def test_unicode_mkdir(self):
self.basedir = os.path.dirname(self.mktemp())
self.set_up_grid()
d = self.do_cli("create-alias", "tahoe")
d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead"))
return d
class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
def writeto(self, path, data):