Correct stringutils->encodingutil patch to be the newer version, rather than the old version that was committed in error.

This commit is contained in:
david-sarah 2010-07-17 18:34:35 -07:00
parent 0d79a4a7d1
commit a8161c915a
9 changed files with 217 additions and 46 deletions

View File

@ -9,8 +9,7 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
from allmydata.util import time_format
from allmydata.scripts import backupdb
from allmydata.util.encodingutil import quote_output, to_str
from allmydata.util.fileutil import open_expanduser
from allmydata.util.encodingutil import listdir_unicode, quote_output, to_str, FilenameEncodingError
from allmydata.util.assertutil import precondition
@ -167,14 +166,14 @@ class BackerUpper:
compare_contents = {} # childname -> rocap
try:
children = os.listdir(localpath)
children = listdir_unicode(localpath)
except EnvironmentError:
self.directories_skipped += 1
self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
children = []
except (UnicodeEncodeError, UnicodeDecodeError):
except FilenameEncodingError:
self.directories_skipped += 1
self.warn("WARNING: could not list directory %s due to an encoding error" % quote_output(localpath))
self.warn("WARNING: could not list directory %s due to a filename encoding error" % quote_output(localpath))
children = []
for child in self.options.filter_listdir(children):
@ -297,7 +296,7 @@ class BackerUpper:
if must_upload:
self.verboseprint("uploading %s.." % quote_output(childpath))
infileobj = open_expanduser(childpath, "rb")
infileobj = open(childpath, "rb")
url = self.options['node-url'] + "uri"
resp = do_http("PUT", url, infileobj)
if resp.status not in (200, 201):

View File

@ -8,9 +8,8 @@ from allmydata.scripts.common import get_alias, escape_path, \
DefaultAliasMarker, TahoeError
from allmydata.scripts.common_http import do_http, HTTPError
from allmydata import uri
from allmydata.util.encodingutil import unicode_to_url, quote_output, to_str
from allmydata.util import fileutil
from allmydata.util.fileutil import open_expanduser, abspath_expanduser
from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str
from allmydata.util.assertutil import precondition
@ -68,7 +67,7 @@ class LocalFileSource:
return True
def open(self, caps_only):
return open_expanduser(self.pathname, "rb")
return open(os.path.expanduser(self.pathname), "rb")
class LocalFileTarget:
@ -101,7 +100,7 @@ class LocalDirectorySource:
if self.children is not None:
return
self.children = {}
children = os.listdir(self.pathname)
children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
@ -129,7 +128,7 @@ class LocalDirectoryTarget:
if self.children is not None:
return
self.children = {}
children = os.listdir(self.pathname)
children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
n = unicode(n)
@ -512,7 +511,7 @@ class Copier:
rootcap, path = get_alias(self.aliases, destination_spec, None)
if rootcap == DefaultAliasMarker:
# no alias, so this is a local file
pathname = abspath_expanduser(path.decode('utf-8'))
pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
if not os.path.exists(pathname):
t = LocalMissingTarget(pathname)
elif os.path.isdir(pathname):
@ -552,7 +551,7 @@ class Copier:
rootcap, path = get_alias(self.aliases, source_spec, None)
if rootcap == DefaultAliasMarker:
# no alias, so this is a local file
pathname = abspath_expanduser(path.decode('utf-8'))
pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
name = os.path.basename(pathname)
if not os.path.exists(pathname):
raise MissingSourceError(source_spec)

View File

@ -1,9 +1,8 @@
import urllib
import os, urllib
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError
from allmydata.scripts.common_http import do_http, format_http_error
from allmydata.util.fileutil import open_expanduser
def get(options):
nodeurl = options['node-url']
@ -27,7 +26,7 @@ def get(options):
resp = do_http("GET", url)
if resp.status in (200, 201,):
if to_file:
outf = open_expanduser(to_file, "wb")
outf = open(os.path.expanduser(to_file), "wb")
else:
outf = stdout
while True:

View File

@ -1,11 +1,11 @@
import os
from cStringIO import StringIO
import urllib
from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
UnknownAliasError
from allmydata.util.encodingutil import quote_output
from allmydata.util.fileutil import open_expanduser
def put(options):
"""
@ -65,7 +65,7 @@ def put(options):
if mutable:
url += "?mutable=true"
if from_file:
infileobj = open_expanduser(from_file, "rb")
infileobj = open(os.path.expanduser(from_file), "rb")
else:
# do_http() can't use stdin directly: for one thing, we need a
# Content-Length field. So we currently must copy it.

View File

@ -4,7 +4,7 @@ from StringIO import StringIO
from twisted.trial import unittest
from allmydata.util import fileutil
from allmydata.util.encodingutil import get_filesystem_encoding, unicode_platform
from allmydata.util.encodingutil import listdir_unicode, get_filesystem_encoding, unicode_platform
from allmydata.util.assertutil import precondition
from allmydata.scripts import backupdb
@ -249,7 +249,7 @@ class BackupDB(unittest.TestCase):
self.failUnless(bdb)
self.writeto(u"f\u00f6\u00f6.txt", "foo.txt")
files = [fn for fn in os.listdir(unicode(basedir)) if fn.endswith(".txt")]
files = [fn for fn in listdir_unicode(unicode(basedir)) if fn.endswith(".txt")]
self.failUnlessEqual(len(files), 1)
foo_fn = os.path.join(basedir, files[0])
#print foo_fn, type(foo_fn)

View File

@ -31,8 +31,8 @@ from twisted.internet import threads # CLI tests use deferToThread
from twisted.python import usage
from allmydata.util.assertutil import precondition
from allmydata.util.encodingutil import unicode_platform, quote_output, \
get_output_encoding, get_argv_encoding, get_filesystem_encoding, \
from allmydata.util.encodingutil import listdir_unicode, unicode_platform, \
quote_output, get_output_encoding, get_argv_encoding, get_filesystem_encoding, \
unicode_to_output, to_str
timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
@ -441,7 +441,7 @@ class CLI(CLITestMixin, unittest.TestCase):
for name in filenames:
open(os.path.join(unicode(basedir), name), "wb").close()
for file in os.listdir(unicode(basedir)):
for file in listdir_unicode(unicode(basedir)):
self.failUnlessIn(normalize(file), filenames)

View File

@ -13,7 +13,9 @@ TEST_FILENAMES = (
# systems.
if __name__ == "__main__":
import sys
import sys, os
import tempfile
import shutil
import platform
if len(sys.argv) != 2:
@ -21,7 +23,7 @@ if __name__ == "__main__":
sys.exit(1)
print
print "class MyWeirdOS(StringUtils, unittest.TestCase):"
print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
print " uname = '%s'" % ' '.join(platform.uname())
if sys.platform != "win32":
print " argv = %s" % repr(sys.argv[1])
@ -29,21 +31,39 @@ if __name__ == "__main__":
print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
print " output_encoding = '%s'" % sys.stdout.encoding
print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
try:
tmpdir = tempfile.mkdtemp()
for fname in TEST_FILENAMES:
open(os.path.join(tmpdir, fname), 'w').close()
# Use Unicode API under Windows or MacOS X
if sys.platform in ('win32', 'darwin'):
dirlist = os.listdir(unicode(tmpdir))
else:
dirlist = os.listdir(tmpdir)
print " dirlist = %s" % repr(dirlist)
except:
print " # Oops, I cannot write filenames containing non-ascii characters"
print
shutil.rmtree(tmpdir)
sys.exit(0)
from twisted.trial import unittest
from mock import patch
import sys, locale
import os, sys, locale
from allmydata.test.common_util import ReallyEqualMixin
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
unicode_to_output, unicode_platform, get_output_encoding, _reload
unicode_to_output, unicode_platform, listdir_unicode, FilenameEncodingError, \
get_output_encoding, get_filesystem_encoding, _reload
from allmydata.dirnode import normalize
from twisted.python import usage
class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
def tearDown(self):
_reload()
@ -103,8 +123,55 @@ class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
_reload()
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
@patch('os.listdir')
def test_no_unicode_normalization(self, mock):
# Pretend to run on a Unicode platform.
# We normalized to NFC in 1.7beta, but we now don't.
orig_platform = sys.platform
try:
sys.platform = 'darwin'
mock.return_value = [Artonwall_nfd]
_reload()
self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
finally:
sys.platform = orig_platform
class StringUtils(ReallyEqualMixin):
# The following tests apply only to platforms that don't store filenames as
# Unicode entities on the filesystem.
class EncodingUtilNonUnicodePlatform(unittest.TestCase):
def setUp(self):
# Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
sys.platform = 'linux'
def tearDown(self):
sys.platform = self.original_platform
_reload()
@patch('sys.getfilesystemencoding')
@patch('os.listdir')
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
# What happens if latin1-encoded filenames are encountered on an UTF-8
# filesystem?
mock_listdir.return_value = [
lumiere_nfc.encode('utf-8'),
lumiere_nfc.encode('latin1')]
mock_getfilesystemencoding.return_value = 'utf-8'
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/dummy')
# We're trying to list a directory whose name cannot be represented in
# the filesystem encoding. This should fail.
mock_getfilesystemencoding.return_value = 'ascii'
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/' + lumiere_nfc)
class EncodingUtil(ReallyEqualMixin):
def setUp(self):
# Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
@ -148,8 +215,74 @@ class StringUtils(ReallyEqualMixin):
_reload()
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
@patch('sys.getfilesystemencoding')
@patch('os.listdir')
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
if 'dirlist' not in dir(self):
return
class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
try:
u"test".encode(self.filesystem_encoding)
except (LookupError, AttributeError):
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
"that we are testing for the benefit of a different platform."
% (self.filesystem_encoding,))
mock_listdir.return_value = self.dirlist
mock_getfilesystemencoding.return_value = self.filesystem_encoding
_reload()
filenames = listdir_unicode(u'/dummy')
self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
set(TEST_FILENAMES))
class StdlibUnicode(unittest.TestCase):
"""This mainly tests that some of the stdlib functions support Unicode paths, but also that
listdir_unicode works for valid filenames."""
def skip_if_cannot_represent_filename(self, u):
enc = get_filesystem_encoding()
if not unicode_platform():
try:
u.encode(enc)
except UnicodeEncodeError:
raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
def test_mkdir_open_exists_abspath_listdir_expanduser(self):
self.skip_if_cannot_represent_filename(lumiere_nfc)
try:
os.mkdir(lumiere_nfc)
except EnvironmentError, e:
raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
"does not support Unicode, even though the platform does." % (e,))
fn = lumiere_nfc + '/' + lumiere_nfc + '.txt'
open(fn, 'wb').close()
self.failUnless(os.path.exists(fn))
self.failUnless(os.path.exists(os.path.abspath(fn)))
filenames = listdir_unicode(lumiere_nfc)
# We only require that the listing includes a filename that is canonically equivalent
# to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
expanded = os.path.expanduser("~/" + lumiere_nfc)
self.failIfIn("~", expanded)
self.failUnless(expanded.endswith(lumiere_nfc), expanded)
@patch('sys.getfilesystemencoding')
def test_open_unrepresentable(self, mock):
if unicode_platform():
raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
mock.return_value = 'ascii'
self.failUnlessRaises(UnicodeEncodeError, open, lumiere_nfc, 'rb')
class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
output = 'lumi\xc3\xa8re'
argv = 'lumi\xc3\xa8re'
@ -157,8 +290,9 @@ class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
filesystem_encoding = 'UTF-8'
output_encoding = 'UTF-8'
argv_encoding = 'UTF-8'
dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
output = 'lumi\xe8re'
argv = 'lumi\xe8re'
@ -166,32 +300,36 @@ class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
filesystem_encoding = 'ISO-8859-1'
output_encoding = 'ISO-8859-1'
argv_encoding = 'ISO-8859-1'
dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
class WindowsXP(StringUtils, unittest.TestCase):
class WindowsXP(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
output = 'lumi\x8are'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp850'
argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class WindowsXP_UTF8(StringUtils, unittest.TestCase):
class WindowsXP_UTF8(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
output = 'lumi\xc3\xa8re'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp65001'
argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class WindowsVista(StringUtils, unittest.TestCase):
class WindowsVista(EncodingUtil, unittest.TestCase):
uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
output = 'lumi\x8are'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp850'
argv_encoding = 'ascii'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class MacOSXLeopard(StringUtils, unittest.TestCase):
class MacOSXLeopard(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
output = 'lumi\xc3\xa8re'
argv = 'lumi\xc3\xa8re'
@ -199,15 +337,17 @@ class MacOSXLeopard(StringUtils, unittest.TestCase):
filesystem_encoding = 'utf-8'
output_encoding = 'UTF-8'
argv_encoding = 'UTF-8'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
platform = 'darwin'
filesystem_encoding = 'utf-8'
output_encoding = 'US-ASCII'
argv_encoding = 'US-ASCII'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class OpenBSD(StringUtils, unittest.TestCase):
class OpenBSD(EncodingUtil, unittest.TestCase):
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
platform = 'openbsd4'
filesystem_encoding = '646'

View File

@ -4,6 +4,7 @@ unicode and back.
"""
import sys
import os
import re
from allmydata.util.assertutil import precondition
from twisted.python import usage
@ -173,3 +174,44 @@ def unicode_platform():
Does the current platform handle Unicode filenames natively?
"""
return is_unicode_platform
class FilenameEncodingError(Exception):
"""
Filename cannot be encoded using the current encoding of your filesystem
(%s). Please configure your locale correctly or rename this file.
"""
pass
def listdir_unicode_fallback(path):
"""
This function emulates a fallback Unicode API similar to one available
under Windows or MacOS X.
If badly encoded filenames are encountered, an exception is raised.
"""
precondition(isinstance(path, unicode), path)
try:
byte_path = path.encode(filesystem_encoding)
except (UnicodeEncodeError, UnicodeDecodeError):
raise FilenameEncodingError(path)
try:
return [unicode(fn, filesystem_encoding) for fn in os.listdir(byte_path)]
except UnicodeDecodeError:
raise FilenameEncodingError(fn)
def listdir_unicode(path):
"""
Wrapper around listdir() which provides safe access to the convenient
Unicode API even under platforms that don't provide one natively.
"""
precondition(isinstance(path, unicode), path)
# On Windows and MacOS X, the Unicode API is used
# On other platforms (ie. Unix systems), the byte-level API is used
if is_unicode_platform:
return os.listdir(path)
else:
return listdir_unicode_fallback(path)

View File

@ -211,7 +211,7 @@ def read(path):
def put_file(pathname, inf):
# TODO: create temporary file and move into place?
outf = open_expanduser(pathname, "wb")
outf = open(os.path.expanduser(pathname), "wb")
try:
while True:
data = inf.read(32768)
@ -220,11 +220,3 @@ def put_file(pathname, inf):
outf.write(data)
finally:
outf.close()
def open_expanduser(path, mode):
assert isinstance(path, unicode), path
return open(os.path.expanduser(path), mode)
def abspath_expanduser(path):
assert isinstance(path, unicode), path
return os.path.abspath(os.path.expanduser(path))