Rename stringutils to encodingutil, and drop listdir_unicode and open_unicode (since the Python stdlib functions work fine with Unicode paths). Also move some utility functions to fileutil.

This commit is contained in:
david-sarah 2010-07-11 17:30:15 -07:00
parent fa0fd66e17
commit 11077ea74d
21 changed files with 67 additions and 263 deletions

View File

@ -16,7 +16,7 @@ from allmydata.check_results import DeepCheckResults, \
DeepCheckAndRepairResults DeepCheckAndRepairResults
from allmydata.monitor import Monitor from allmydata.monitor import Monitor
from allmydata.util import hashutil, mathutil, base32, log from allmydata.util import hashutil, mathutil, base32, log
from allmydata.util.stringutils import quote_output from allmydata.util.encodingutil import quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.netstring import netstring, split_netstring from allmydata.util.netstring import netstring, split_netstring
from allmydata.util.consumer import download_to_data from allmydata.util.consumer import download_to_data

View File

@ -1,7 +1,7 @@
import os.path, re, sys, fnmatch import os.path, re, sys, fnmatch
from twisted.python import usage from twisted.python import usage
from allmydata.scripts.common import BaseOptions, get_aliases from allmydata.scripts.common import BaseOptions, get_aliases
from allmydata.util.stringutils import argv_to_unicode from allmydata.util.encodingutil import argv_to_unicode
NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")

View File

@ -2,7 +2,7 @@
import os, sys, urllib import os, sys, urllib
import codecs import codecs
from twisted.python import usage from twisted.python import usage
from allmydata.util.stringutils import unicode_to_url, quote_output from allmydata.util.encodingutil import unicode_to_url, quote_output
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
class BaseOptions: class BaseOptions:

View File

@ -3,7 +3,7 @@ from cStringIO import StringIO
import urlparse, httplib import urlparse, httplib
import allmydata # for __full_version__ import allmydata # for __full_version__
from allmydata.util.stringutils import quote_output from allmydata.util.encodingutil import quote_output
from allmydata.scripts.common import TahoeError from allmydata.scripts.common import TahoeError

View File

@ -4,7 +4,7 @@ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util import base32 from allmydata.util import base32
from allmydata.util.stringutils import quote_output, is_printable_ascii from allmydata.util.encodingutil import quote_output, is_printable_ascii
import urllib import urllib
import simplejson import simplejson

View File

@ -5,7 +5,7 @@ from allmydata import uri
from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_aliases from allmydata.scripts.common import get_aliases
from allmydata.util.fileutil import move_into_place from allmydata.util.fileutil import move_into_place
from allmydata.util.stringutils import unicode_to_output, quote_output from allmydata.util.encodingutil import unicode_to_output, quote_output
def add_line_to_aliasfile(aliasfile, alias, cap): def add_line_to_aliasfile(aliasfile, alias, cap):

View File

@ -9,7 +9,8 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
from allmydata.util import time_format from allmydata.util import time_format
from allmydata.scripts import backupdb from allmydata.scripts import backupdb
from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str from allmydata.util.encodingutil import quote_output, to_str
from allmydata.util.fileutil import open_expanduser
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
@ -166,11 +167,15 @@ class BackerUpper:
compare_contents = {} # childname -> rocap compare_contents = {} # childname -> rocap
try: try:
children = listdir_unicode(localpath) children = os.listdir(localpath)
except EnvironmentError: except EnvironmentError:
self.directories_skipped += 1 self.directories_skipped += 1
self.warn("WARNING: permission denied on directory %s" % quote_output(localpath)) self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
children = [] children = []
except (UnicodeEncodeError, UnicodeDecodeError):
self.directories_skipped += 1
self.warn("WARNING: could not list directory %s due to an encoding error" % quote_output(localpath))
children = []
for child in self.options.filter_listdir(children): for child in self.options.filter_listdir(children):
assert isinstance(child, unicode), child assert isinstance(child, unicode), child
@ -292,7 +297,7 @@ class BackerUpper:
if must_upload: if must_upload:
self.verboseprint("uploading %s.." % quote_output(childpath)) self.verboseprint("uploading %s.." % quote_output(childpath))
infileobj = open_unicode(childpath, "rb") infileobj = open_expanduser(childpath, "rb")
url = self.options['node-url'] + "uri" url = self.options['node-url'] + "uri"
resp = do_http("PUT", url, infileobj) resp = do_http("PUT", url, infileobj)
if resp.status not in (200, 201): if resp.status not in (200, 201):

View File

@ -5,7 +5,7 @@ from twisted.protocols.basic import LineOnlyReceiver
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.stringutils import quote_output, quote_path from allmydata.util.encodingutil import quote_output, quote_path
class Checker: class Checker:
pass pass

View File

@ -8,25 +8,12 @@ from allmydata.scripts.common import get_alias, escape_path, \
DefaultAliasMarker, TahoeError DefaultAliasMarker, TahoeError
from allmydata.scripts.common_http import do_http, HTTPError from allmydata.scripts.common_http import do_http, HTTPError
from allmydata import uri from allmydata import uri
from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode, \ from allmydata.util.encodingutil import unicode_to_url, quote_output, to_str
abspath_expanduser_unicode, quote_output, to_str from allmydata.util import fileutil
from allmydata.util.fileutil import open_expanduser, abspath_expanduser
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
def _put_local_file(pathname, inf):
# TODO: create temporary file and move into place?
# TODO: move this to fileutil.
outf = open_unicode(pathname, "wb")
try:
while True:
data = inf.read(32768)
if not data:
break
outf.write(data)
finally:
outf.close()
class MissingSourceError(TahoeError): class MissingSourceError(TahoeError):
def __init__(self, name): def __init__(self, name):
TahoeError.__init__(self, "No such file or directory %s" % quote_output(name)) TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
@ -81,7 +68,7 @@ class LocalFileSource:
return True return True
def open(self, caps_only): def open(self, caps_only):
return open_unicode(self.pathname, "rb") return open_expanduser(self.pathname, "rb")
class LocalFileTarget: class LocalFileTarget:
@ -90,7 +77,7 @@ class LocalFileTarget:
self.pathname = pathname self.pathname = pathname
def put_file(self, inf): def put_file(self, inf):
_put_local_file(self.pathname, inf) fileutil.put_file(self.pathname, inf)
class LocalMissingTarget: class LocalMissingTarget:
@ -99,7 +86,7 @@ class LocalMissingTarget:
self.pathname = pathname self.pathname = pathname
def put_file(self, inf): def put_file(self, inf):
_put_local_file(self.pathname, inf) fileutil.put_file(self.pathname, inf)
class LocalDirectorySource: class LocalDirectorySource:
@ -114,7 +101,7 @@ class LocalDirectorySource:
if self.children is not None: if self.children is not None:
return return
self.children = {} self.children = {}
children = listdir_unicode(self.pathname) children = os.listdir(self.pathname)
for i,n in enumerate(children): for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children))) self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n) pn = os.path.join(self.pathname, n)
@ -142,7 +129,7 @@ class LocalDirectoryTarget:
if self.children is not None: if self.children is not None:
return return
self.children = {} self.children = {}
children = listdir_unicode(self.pathname) children = os.listdir(self.pathname)
for i,n in enumerate(children): for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children))) self.progressfunc("examining %d of %d" % (i, len(children)))
n = unicode(n) n = unicode(n)
@ -168,7 +155,7 @@ class LocalDirectoryTarget:
def put_file(self, name, inf): def put_file(self, name, inf):
precondition(isinstance(name, unicode), name) precondition(isinstance(name, unicode), name)
pathname = os.path.join(self.pathname, name) pathname = os.path.join(self.pathname, name)
_put_local_file(pathname, inf) fileutil.put_file(pathname, inf)
def set_children(self): def set_children(self):
pass pass
@ -525,7 +512,7 @@ class Copier:
rootcap, path = get_alias(self.aliases, destination_spec, None) rootcap, path = get_alias(self.aliases, destination_spec, None)
if rootcap == DefaultAliasMarker: if rootcap == DefaultAliasMarker:
# no alias, so this is a local file # no alias, so this is a local file
pathname = abspath_expanduser_unicode(path.decode('utf-8')) pathname = abspath_expanduser(path.decode('utf-8'))
if not os.path.exists(pathname): if not os.path.exists(pathname):
t = LocalMissingTarget(pathname) t = LocalMissingTarget(pathname)
elif os.path.isdir(pathname): elif os.path.isdir(pathname):
@ -565,7 +552,7 @@ class Copier:
rootcap, path = get_alias(self.aliases, source_spec, None) rootcap, path = get_alias(self.aliases, source_spec, None)
if rootcap == DefaultAliasMarker: if rootcap == DefaultAliasMarker:
# no alias, so this is a local file # no alias, so this is a local file
pathname = abspath_expanduser_unicode(path.decode('utf-8')) pathname = abspath_expanduser(path.decode('utf-8'))
name = os.path.basename(pathname) name = os.path.basename(pathname)
if not os.path.exists(pathname): if not os.path.exists(pathname):
raise MissingSourceError(source_spec) raise MissingSourceError(source_spec)

View File

@ -3,7 +3,7 @@ import urllib
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.stringutils import open_unicode from allmydata.util.fileutil import open_expanduser
def get(options): def get(options):
nodeurl = options['node-url'] nodeurl = options['node-url']
@ -27,7 +27,7 @@ def get(options):
resp = do_http("GET", url) resp = do_http("GET", url)
if resp.status in (200, 201,): if resp.status in (200, 201,):
if to_file: if to_file:
outf = open_unicode(to_file, "wb") outf = open_expanduser(to_file, "wb")
else: else:
outf = stdout outf = stdout
while True: while True:

View File

@ -4,7 +4,7 @@ import simplejson
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.stringutils import unicode_to_output, quote_output, is_printable_ascii, to_str from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str
def list(options): def list(options):
nodeurl = options['node-url'] nodeurl = options['node-url']

View File

@ -6,7 +6,7 @@ from allmydata.scripts.slow_operation import SlowOperationRunner
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.stringutils import quote_output, quote_path from allmydata.util.encodingutil import quote_output, quote_path
class FakeTransport: class FakeTransport:
disconnecting = False disconnecting = False

View File

@ -2,7 +2,7 @@
import urllib import urllib
from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common_http import do_http, check_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
from allmydata.util.stringutils import quote_output from allmydata.util.encodingutil import quote_output
def mkdir(options): def mkdir(options):
nodeurl = options['node-url'] nodeurl = options['node-url']

View File

@ -5,7 +5,7 @@ import simplejson
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.stringutils import to_str from allmydata.util.encodingutil import to_str
# this script is used for both 'mv' and 'ln' # this script is used for both 'mv' and 'ln'

View File

@ -4,7 +4,8 @@ import urllib
from allmydata.scripts.common_http import do_http, format_http_success, format_http_error from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError UnknownAliasError
from allmydata.util.stringutils import quote_output, open_unicode from allmydata.util.encodingutil import quote_output
from allmydata.util.fileutil import open_expanduser
def put(options): def put(options):
""" """
@ -64,7 +65,7 @@ def put(options):
if mutable: if mutable:
url += "?mutable=true" url += "?mutable=true"
if from_file: if from_file:
infileobj = open_unicode(from_file, "rb") infileobj = open_expanduser(from_file, "rb")
else: else:
# do_http() can't use stdin directly: for one thing, we need a # do_http() can't use stdin directly: for one thing, we need a
# Content-Length field. So we currently must copy it. # Content-Length field. So we currently must copy it.

View File

@ -4,7 +4,7 @@ from StringIO import StringIO
from twisted.trial import unittest from twisted.trial import unittest
from allmydata.util import fileutil from allmydata.util import fileutil
from allmydata.util.stringutils import listdir_unicode, get_filesystem_encoding, unicode_platform from allmydata.util.encodingutil import get_filesystem_encoding, unicode_platform
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.scripts import backupdb from allmydata.scripts import backupdb
@ -249,7 +249,7 @@ class BackupDB(unittest.TestCase):
self.failUnless(bdb) self.failUnless(bdb)
self.writeto(u"f\u00f6\u00f6.txt", "foo.txt") self.writeto(u"f\u00f6\u00f6.txt", "foo.txt")
files = [fn for fn in listdir_unicode(unicode(basedir)) if fn.endswith(".txt")] files = [fn for fn in os.listdir(unicode(basedir)) if fn.endswith(".txt")]
self.failUnlessEqual(len(files), 1) self.failUnlessEqual(len(files), 1)
foo_fn = os.path.join(basedir, files[0]) foo_fn = os.path.join(basedir, files[0])
#print foo_fn, type(foo_fn) #print foo_fn, type(foo_fn)

View File

@ -31,8 +31,8 @@ from twisted.internet import threads # CLI tests use deferToThread
from twisted.python import usage from twisted.python import usage
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from allmydata.util.stringutils import listdir_unicode, open_unicode, unicode_platform, \ from allmydata.util.encodingutil import unicode_platform, quote_output, \
quote_output, get_output_encoding, get_argv_encoding, get_filesystem_encoding, \ get_output_encoding, get_argv_encoding, get_filesystem_encoding, \
unicode_to_output, to_str unicode_to_output, to_str
timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
@ -439,9 +439,9 @@ class CLI(CLITestMixin, unittest.TestCase):
fileutil.make_dirs(basedir) fileutil.make_dirs(basedir)
for name in filenames: for name in filenames:
open_unicode(os.path.join(unicode(basedir), name), "wb").close() open(os.path.join(unicode(basedir), name), "wb").close()
for file in listdir_unicode(unicode(basedir)): for file in os.listdir(unicode(basedir)):
self.failUnlessIn(normalize(file), filenames) self.failUnlessIn(normalize(file), filenames)
@ -974,11 +974,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
rel_fn = os.path.join(unicode(self.basedir), u"à trier.txt") rel_fn = os.path.join(unicode(self.basedir), u"à trier.txt")
# we make the file small enough to fit in a LIT file, for speed # we make the file small enough to fit in a LIT file, for speed
DATA = "short file" DATA = "short file"
f = open_unicode(rel_fn, "wb") fileutil.write(rel_fn, DATA)
try:
f.write(DATA)
finally:
f.close()
d = self.do_cli("create-alias", "tahoe") d = self.do_cli("create-alias", "tahoe")
@ -1349,11 +1345,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase):
self.set_up_grid() self.set_up_grid()
DATA1 = "unicode file content" DATA1 = "unicode file content"
f = open_unicode(fn1, "wb") fileutil.write(fn1, DATA1)
try:
f.write(DATA1)
finally:
f.close()
fn2 = os.path.join(self.basedir, "Metallica") fn2 = os.path.join(self.basedir, "Metallica")
DATA2 = "non-unicode file content" DATA2 = "non-unicode file content"

View File

@ -13,9 +13,7 @@ TEST_FILENAMES = (
# systems. # systems.
if __name__ == "__main__": if __name__ == "__main__":
import sys, os import sys
import tempfile
import shutil
import platform import platform
if len(sys.argv) != 2: if len(sys.argv) != 2:
@ -31,24 +29,8 @@ if __name__ == "__main__":
print " filesystem_encoding = '%s'" % sys.getfilesystemencoding() print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
print " output_encoding = '%s'" % sys.stdout.encoding print " output_encoding = '%s'" % sys.stdout.encoding
print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding) print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
try:
tmpdir = tempfile.mkdtemp()
for fname in TEST_FILENAMES:
open(os.path.join(tmpdir, fname), 'w').close()
# Use Unicode API under Windows or MacOS X
if sys.platform in ('win32', 'darwin'):
dirlist = os.listdir(unicode(tmpdir))
else:
dirlist = os.listdir(tmpdir)
print " dirlist = %s" % repr(dirlist)
except:
print " # Oops, I cannot write filenames containing non-ascii characters"
print print
shutil.rmtree(tmpdir)
sys.exit(0) sys.exit(0)
from twisted.trial import unittest from twisted.trial import unittest
@ -56,10 +38,8 @@ from mock import patch
import sys import sys
from allmydata.test.common_util import ReallyEqualMixin from allmydata.test.common_util import ReallyEqualMixin
from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \ from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \ unicode_to_output, unicode_platform, get_output_encoding, _reload
FilenameEncodingError, get_output_encoding, _reload
from allmydata.dirnode import normalize
from twisted.python import usage from twisted.python import usage
@ -102,61 +82,6 @@ class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
_reload() _reload()
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
@patch('os.listdir')
def test_no_unicode_normalization(self, mock):
# Pretend to run on a Unicode platform.
# We normalized to NFC in 1.7beta, but we now don't.
orig_platform = sys.platform
try:
sys.platform = 'darwin'
mock.return_value = [Artonwall_nfd]
_reload()
self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
finally:
sys.platform = orig_platform
# The following tests applies only to platforms which don't store filenames as
# Unicode entities on the filesystem.
class StringUtilsNonUnicodePlatform(unittest.TestCase):
def setUp(self):
# Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
sys.platform = 'linux'
def tearDown(self):
sys.platform = self.original_platform
_reload()
@patch('sys.getfilesystemencoding')
@patch('os.listdir')
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
# What happens if latin1-encoded filenames are encountered on an UTF-8
# filesystem?
mock_listdir.return_value = [
lumiere_nfc.encode('utf-8'),
lumiere_nfc.encode('latin1')]
mock_getfilesystemencoding.return_value = 'utf-8'
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/dummy')
# We're trying to list a directory whose name cannot be represented in
# the filesystem encoding. This should fail.
mock_getfilesystemencoding.return_value = 'ascii'
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/' + lumiere_nfc)
@patch('sys.getfilesystemencoding')
def test_open_unicode(self, mock):
mock.return_value = 'ascii'
_reload()
self.failUnlessRaises(FilenameEncodingError,
open_unicode,
lumiere_nfc, 'rb')
class StringUtils(ReallyEqualMixin): class StringUtils(ReallyEqualMixin):
def setUp(self): def setUp(self):
@ -202,56 +127,6 @@ class StringUtils(ReallyEqualMixin):
_reload() _reload()
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform]) self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
@patch('sys.getfilesystemencoding')
@patch('os.listdir')
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
if 'dirlist' not in dir(self):
return
try:
u"test".encode(self.filesystem_encoding)
except (LookupError, AttributeError):
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
"that we are testing for the benefit of a different platform."
% (self.filesystem_encoding,))
mock_listdir.return_value = self.dirlist
mock_getfilesystemencoding.return_value = self.filesystem_encoding
_reload()
filenames = listdir_unicode(u'/dummy')
self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
set(TEST_FILENAMES))
@patch('sys.getfilesystemencoding')
@patch('__builtin__.open')
def test_open_unicode(self, mock_open, mock_getfilesystemencoding):
mock_getfilesystemencoding.return_value = self.filesystem_encoding
fn = u'/dummy_directory/" + lumiere_nfc + ".txt'
try:
u"test".encode(self.filesystem_encoding)
except (LookupError, AttributeError):
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
"that we are testing for the benefit of a different platform."
% (self.filesystem_encoding,))
_reload()
try:
open_unicode(fn, 'rb')
except FilenameEncodingError:
return
# Pass Unicode string to open() on Unicode platforms
if unicode_platform():
mock_open.assert_called_with(fn, 'rb')
# Pass correctly encoded bytestrings to open() on non-Unicode platforms
else:
fn_bytestring = fn.encode(self.filesystem_encoding)
mock_open.assert_called_with(fn_bytestring, 'rb')
class UbuntuKarmicUTF8(StringUtils, unittest.TestCase): class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
@ -261,7 +136,6 @@ class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
filesystem_encoding = 'UTF-8' filesystem_encoding = 'UTF-8'
output_encoding = 'UTF-8' output_encoding = 'UTF-8'
argv_encoding = 'UTF-8' argv_encoding = 'UTF-8'
dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
class UbuntuKarmicLatin1(StringUtils, unittest.TestCase): class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
@ -271,7 +145,6 @@ class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
filesystem_encoding = 'ISO-8859-1' filesystem_encoding = 'ISO-8859-1'
output_encoding = 'ISO-8859-1' output_encoding = 'ISO-8859-1'
argv_encoding = 'ISO-8859-1' argv_encoding = 'ISO-8859-1'
dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
class WindowsXP(StringUtils, unittest.TestCase): class WindowsXP(StringUtils, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
@ -280,7 +153,6 @@ class WindowsXP(StringUtils, unittest.TestCase):
filesystem_encoding = 'mbcs' filesystem_encoding = 'mbcs'
output_encoding = 'cp850' output_encoding = 'cp850'
argv_encoding = 'ascii' argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class WindowsXP_UTF8(StringUtils, unittest.TestCase): class WindowsXP_UTF8(StringUtils, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
@ -289,7 +161,6 @@ class WindowsXP_UTF8(StringUtils, unittest.TestCase):
filesystem_encoding = 'mbcs' filesystem_encoding = 'mbcs'
output_encoding = 'cp65001' output_encoding = 'cp65001'
argv_encoding = 'ascii' argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class WindowsVista(StringUtils, unittest.TestCase): class WindowsVista(StringUtils, unittest.TestCase):
uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel' uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
@ -298,7 +169,6 @@ class WindowsVista(StringUtils, unittest.TestCase):
filesystem_encoding = 'mbcs' filesystem_encoding = 'mbcs'
output_encoding = 'cp850' output_encoding = 'cp850'
argv_encoding = 'ascii' argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class MacOSXLeopard(StringUtils, unittest.TestCase): class MacOSXLeopard(StringUtils, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
@ -308,7 +178,6 @@ class MacOSXLeopard(StringUtils, unittest.TestCase):
filesystem_encoding = 'utf-8' filesystem_encoding = 'utf-8'
output_encoding = 'UTF-8' output_encoding = 'UTF-8'
argv_encoding = 'UTF-8' argv_encoding = 'UTF-8'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class MacOSXLeopard7bit(StringUtils, unittest.TestCase): class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
@ -316,7 +185,6 @@ class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
filesystem_encoding = 'utf-8' filesystem_encoding = 'utf-8'
output_encoding = 'US-ASCII' output_encoding = 'US-ASCII'
argv_encoding = 'US-ASCII' argv_encoding = 'US-ASCII'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class OpenBSD(StringUtils, unittest.TestCase): class OpenBSD(StringUtils, unittest.TestCase):
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'

View File

@ -4,7 +4,6 @@ unicode and back.
""" """
import sys import sys
import os
import re import re
from allmydata.util.assertutil import precondition from allmydata.util.assertutil import precondition
from twisted.python import usage from twisted.python import usage
@ -174,71 +173,3 @@ def unicode_platform():
Does the current platform handle Unicode filenames natively? Does the current platform handle Unicode filenames natively?
""" """
return is_unicode_platform return is_unicode_platform
class FilenameEncodingError(Exception):
"""
Filename cannot be encoded using the current encoding of your filesystem
(%s). Please configure your locale correctly or rename this file.
"""
pass
def listdir_unicode_fallback(path):
"""
This function emulates a fallback Unicode API similar to one available
under Windows or MacOS X.
If badly encoded filenames are encountered, an exception is raised.
"""
precondition(isinstance(path, unicode), path)
try:
byte_path = path.encode(filesystem_encoding)
except (UnicodeEncodeError, UnicodeDecodeError):
raise FilenameEncodingError(path)
try:
return [unicode(fn, filesystem_encoding) for fn in os.listdir(byte_path)]
except UnicodeDecodeError:
raise FilenameEncodingError(fn)
def listdir_unicode(path):
"""
Wrapper around listdir() which provides safe access to the convenient
Unicode API even under platforms that don't provide one natively.
"""
precondition(isinstance(path, unicode), path)
# On Windows and MacOS X, the Unicode API is used
# On other platforms (ie. Unix systems), the byte-level API is used
if is_unicode_platform:
return os.listdir(path)
else:
return listdir_unicode_fallback(path)
def open_unicode(path, mode):
"""
Wrapper around open() which provides safe access to the convenient Unicode
API even under Unix.
"""
precondition(isinstance(path, unicode), path)
if is_unicode_platform:
return open(os.path.expanduser(path), mode)
else:
try:
return open(os.path.expanduser(path.encode(filesystem_encoding)), mode)
except UnicodeEncodeError:
raise FilenameEncodingError(path)
def abspath_expanduser_unicode(path):
precondition(isinstance(path, unicode), path)
if is_unicode_platform:
return os.path.abspath(os.path.expanduser(path))
else:
try:
pathstr = path.encode(filesystem_encoding)
return os.path.abspath(os.path.expanduser(pathstr)).decode(filesystem_encoding)
except (UnicodeEncodeError, UnicodeDecodeError):
raise FilenameEncodingError(path)

View File

@ -208,3 +208,23 @@ def read(path):
return rf.read() return rf.read()
finally: finally:
rf.close() rf.close()
def put_file(pathname, inf):
# TODO: create temporary file and move into place?
outf = open_expanduser(pathname, "wb")
try:
while True:
data = inf.read(32768)
if not data:
break
outf.write(data)
finally:
outf.close()
def open_expanduser(path, mode):
assert isinstance(path, unicode), path
return open(os.path.expanduser(path), mode)
def abspath_expanduser(path):
assert isinstance(path, unicode), path
return os.path.abspath(os.path.expanduser(path))

View File

@ -12,7 +12,7 @@ from allmydata.interfaces import ExistingChildError, NoSuchChildError, \
MustBeReadonlyError, MustNotBeUnknownRWError MustBeReadonlyError, MustNotBeUnknownRWError
from allmydata.mutable.common import UnrecoverableFileError from allmydata.mutable.common import UnrecoverableFileError
from allmydata.util import abbreviate from allmydata.util import abbreviate
from allmydata.util.stringutils import to_str from allmydata.util.encodingutil import to_str
class IOpHandleTable(Interface): class IOpHandleTable(Interface):
pass pass