mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-01-15 09:19:49 +00:00
93d4a8373f
s31e = skip_if_cannot_represent_filename
636 lines
25 KiB
Python
636 lines
25 KiB
Python
from __future__ import print_function
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import unicode_literals
|
|
|
|
from future.utils import PY2, PY3
|
|
if PY2:
|
|
# We don't import str because omg way too ambiguous in this context.
|
|
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401
|
|
|
|
from past.builtins import unicode
|
|
|
|
lumiere_nfc = u"lumi\u00E8re"
|
|
Artonwall_nfc = u"\u00C4rtonwall.mp3"
|
|
Artonwall_nfd = u"A\u0308rtonwall.mp3"
|
|
|
|
TEST_FILENAMES = (
|
|
Artonwall_nfc,
|
|
u'test_file',
|
|
u'Blah blah.txt',
|
|
)
|
|
|
|
# The following main helps to generate a test class for other operating
|
|
# systems.
|
|
|
|
if __name__ == "__main__":
|
|
import sys, os
|
|
import tempfile
|
|
import shutil
|
|
import platform
|
|
|
|
if len(sys.argv) != 2:
|
|
print("Usage: %s lumi<e-grave>re" % sys.argv[0])
|
|
sys.exit(1)
|
|
|
|
if sys.platform == "win32":
|
|
try:
|
|
from allmydata.windows.fixups import initialize
|
|
except ImportError:
|
|
print("set PYTHONPATH to the src directory")
|
|
sys.exit(1)
|
|
initialize()
|
|
|
|
print()
|
|
print("class MyWeirdOS(EncodingUtil, unittest.TestCase):")
|
|
print(" uname = '%s'" % ' '.join(platform.uname()))
|
|
print(" argv = %s" % repr(sys.argv[1]))
|
|
print(" platform = '%s'" % sys.platform)
|
|
print(" filesystem_encoding = '%s'" % sys.getfilesystemencoding())
|
|
print(" io_encoding = '%s'" % sys.stdout.encoding)
|
|
try:
|
|
tmpdir = tempfile.mkdtemp()
|
|
for fname in TEST_FILENAMES:
|
|
open(os.path.join(tmpdir, fname), 'w').close()
|
|
|
|
# On Python 2, listing directories returns unicode under Windows or
|
|
# MacOS X if the input is unicode. On Python 3, it always returns
|
|
# Unicode.
|
|
if PY2 and sys.platform in ('win32', 'darwin'):
|
|
dirlist = os.listdir(unicode(tmpdir))
|
|
else:
|
|
dirlist = os.listdir(tmpdir)
|
|
|
|
print(" dirlist = %s" % repr(dirlist))
|
|
except:
|
|
print(" # Oops, I cannot write filenames containing non-ascii characters")
|
|
print()
|
|
|
|
shutil.rmtree(tmpdir)
|
|
sys.exit(0)
|
|
|
|
|
|
import os, sys, locale
|
|
from unittest import skipIf
|
|
|
|
from twisted.trial import unittest
|
|
|
|
from twisted.python.filepath import FilePath
|
|
|
|
from allmydata.test.common_util import (
|
|
ReallyEqualMixin, skip_if_cannot_represent_filename,
|
|
)
|
|
from allmydata.util import encodingutil, fileutil
|
|
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
|
|
unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
|
|
quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \
|
|
get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \
|
|
to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \
|
|
unicode_to_argv
|
|
from twisted.python import usage
|
|
|
|
|
|
class MockStdout(object):
|
|
pass
|
|
|
|
class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
|
|
def test_get_io_encoding(self):
|
|
mock_stdout = MockStdout()
|
|
self.patch(sys, 'stdout', mock_stdout)
|
|
|
|
mock_stdout.encoding = 'UTF-8'
|
|
_reload()
|
|
self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
|
|
|
|
mock_stdout.encoding = 'cp65001'
|
|
_reload()
|
|
self.assertEqual(get_io_encoding(), 'utf-8')
|
|
|
|
mock_stdout.encoding = 'koi8-r'
|
|
expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
|
|
_reload()
|
|
self.failUnlessReallyEqual(get_io_encoding(), expected)
|
|
|
|
mock_stdout.encoding = 'nonexistent_encoding'
|
|
if sys.platform == "win32":
|
|
_reload()
|
|
self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
|
|
else:
|
|
self.failUnlessRaises(AssertionError, _reload)
|
|
|
|
def test_get_io_encoding_not_from_stdout(self):
|
|
preferredencoding = 'koi8-r'
|
|
def call_locale_getpreferredencoding():
|
|
return preferredencoding
|
|
self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding)
|
|
mock_stdout = MockStdout()
|
|
self.patch(sys, 'stdout', mock_stdout)
|
|
|
|
expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
|
|
_reload()
|
|
self.failUnlessReallyEqual(get_io_encoding(), expected)
|
|
|
|
mock_stdout.encoding = None
|
|
_reload()
|
|
self.failUnlessReallyEqual(get_io_encoding(), expected)
|
|
|
|
preferredencoding = None
|
|
_reload()
|
|
self.assertEqual(get_io_encoding(), 'utf-8')
|
|
|
|
def test_argv_to_unicode(self):
|
|
encodingutil.io_encoding = 'utf-8'
|
|
self.failUnlessRaises(usage.UsageError,
|
|
argv_to_unicode,
|
|
lumiere_nfc.encode('latin1'))
|
|
|
|
@skipIf(PY3, "Python 2 only.")
|
|
def test_unicode_to_output(self):
|
|
encodingutil.io_encoding = 'koi8-r'
|
|
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
|
|
|
|
def test_no_unicode_normalization(self):
|
|
# Pretend to run on a Unicode platform.
|
|
# listdir_unicode normalized to NFC in 1.7beta, but now doesn't.
|
|
|
|
def call_os_listdir(path):
|
|
return [Artonwall_nfd]
|
|
self.patch(os, 'listdir', call_os_listdir)
|
|
self.patch(sys, 'platform', 'darwin')
|
|
|
|
_reload()
|
|
self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
|
|
|
|
|
|
# The following tests apply only to platforms that don't store filenames as
|
|
# Unicode entities on the filesystem.
|
|
class EncodingUtilNonUnicodePlatform(unittest.TestCase):
|
|
@skipIf(PY3, "Python 3 is always Unicode, regardless of OS.")
|
|
def setUp(self):
|
|
# Mock sys.platform because unicode_platform() uses it
|
|
self.original_platform = sys.platform
|
|
sys.platform = 'linux'
|
|
|
|
def tearDown(self):
|
|
sys.platform = self.original_platform
|
|
_reload()
|
|
|
|
def test_listdir_unicode(self):
|
|
# What happens if latin1-encoded filenames are encountered on an UTF-8
|
|
# filesystem?
|
|
def call_os_listdir(path):
|
|
return [
|
|
lumiere_nfc.encode('utf-8'),
|
|
lumiere_nfc.encode('latin1')
|
|
]
|
|
self.patch(os, 'listdir', call_os_listdir)
|
|
|
|
sys_filesystemencoding = 'utf-8'
|
|
def call_sys_getfilesystemencoding():
|
|
return sys_filesystemencoding
|
|
self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
|
|
|
|
_reload()
|
|
self.failUnlessRaises(FilenameEncodingError,
|
|
listdir_unicode,
|
|
u'/dummy')
|
|
|
|
# We're trying to list a directory whose name cannot be represented in
|
|
# the filesystem encoding. This should fail.
|
|
sys_filesystemencoding = 'ascii'
|
|
_reload()
|
|
self.failUnlessRaises(FilenameEncodingError,
|
|
listdir_unicode,
|
|
u'/' + lumiere_nfc)
|
|
|
|
|
|
class EncodingUtil(ReallyEqualMixin):
|
|
def setUp(self):
|
|
self.original_platform = sys.platform
|
|
sys.platform = self.platform
|
|
|
|
def tearDown(self):
|
|
sys.platform = self.original_platform
|
|
_reload()
|
|
|
|
def test_argv_to_unicode(self):
|
|
if 'argv' not in dir(self):
|
|
return
|
|
|
|
mock_stdout = MockStdout()
|
|
mock_stdout.encoding = self.io_encoding
|
|
self.patch(sys, 'stdout', mock_stdout)
|
|
|
|
argu = lumiere_nfc
|
|
argv = self.argv
|
|
_reload()
|
|
self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
|
|
|
|
def test_unicode_to_url(self):
|
|
self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re")
|
|
|
|
@skipIf(PY3, "Python 3 is always Unicode, regardless of OS.")
|
|
def test_unicode_to_output_py2(self):
|
|
if 'argv' not in dir(self):
|
|
return
|
|
|
|
mock_stdout = MockStdout()
|
|
mock_stdout.encoding = self.io_encoding
|
|
self.patch(sys, 'stdout', mock_stdout)
|
|
|
|
_reload()
|
|
self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
|
|
|
|
@skipIf(PY2, "Python 3 only.")
|
|
def test_unicode_to_output_py3(self):
|
|
self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc)
|
|
|
|
@skipIf(PY3, "Python 2 only.")
|
|
def test_unicode_to_argv_py2(self):
|
|
"""unicode_to_argv() converts to bytes on Python 2."""
|
|
self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding))
|
|
|
|
@skipIf(PY2, "Python 3 only.")
|
|
def test_unicode_to_argv_py3(self):
|
|
"""unicode_to_argv() is noop on Python 3."""
|
|
self.assertEqual(unicode_to_argv("abc"), "abc")
|
|
|
|
@skipIf(PY3, "Python 3 only.")
|
|
def test_unicode_platform_py2(self):
|
|
matrix = {
|
|
'linux2': False,
|
|
'linux3': False,
|
|
'openbsd4': False,
|
|
'win32': True,
|
|
'darwin': True,
|
|
}
|
|
|
|
_reload()
|
|
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
|
|
|
|
@skipIf(PY2, "Python 3 isn't Python 2.")
|
|
def test_unicode_platform_py3(self):
|
|
_reload()
|
|
self.failUnlessReallyEqual(unicode_platform(), True)
|
|
|
|
def test_listdir_unicode(self):
|
|
if 'dirlist' not in dir(self):
|
|
return
|
|
|
|
try:
|
|
u"test".encode(self.filesystem_encoding)
|
|
except (LookupError, AttributeError):
|
|
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
|
|
"that we are testing for the benefit of a different platform."
|
|
% (self.filesystem_encoding,))
|
|
|
|
def call_os_listdir(path):
|
|
if PY2:
|
|
return self.dirlist
|
|
else:
|
|
# Python 3 always lists unicode filenames:
|
|
return [d.decode(self.filesystem_encoding) if isinstance(d, bytes)
|
|
else d
|
|
for d in self.dirlist]
|
|
|
|
self.patch(os, 'listdir', call_os_listdir)
|
|
|
|
def call_sys_getfilesystemencoding():
|
|
return self.filesystem_encoding
|
|
self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
|
|
|
|
_reload()
|
|
filenames = listdir_unicode(u'/dummy')
|
|
|
|
self.failUnlessEqual(set([encodingutil.normalize(fname) for fname in filenames]),
|
|
set(TEST_FILENAMES))
|
|
|
|
|
|
class StdlibUnicode(unittest.TestCase):
|
|
"""This mainly tests that some of the stdlib functions support Unicode paths, but also that
|
|
listdir_unicode works for valid filenames."""
|
|
|
|
def test_mkdir_open_exists_abspath_listdir_expanduser(self):
|
|
skip_if_cannot_represent_filename(lumiere_nfc)
|
|
|
|
try:
|
|
os.mkdir(lumiere_nfc)
|
|
except EnvironmentError as e:
|
|
raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
|
|
"does not support Unicode, even though the platform does." % (e,))
|
|
|
|
fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
|
|
open(fn, 'wb').close()
|
|
self.failUnless(os.path.exists(fn))
|
|
if PY2:
|
|
getcwdu = os.getcwdu
|
|
else:
|
|
getcwdu = os.getcwd
|
|
self.failUnless(os.path.exists(os.path.join(getcwdu(), fn)))
|
|
filenames = listdir_unicode(lumiere_nfc)
|
|
|
|
# We only require that the listing includes a filename that is canonically equivalent
|
|
# to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
|
|
self.failUnlessIn(lumiere_nfc + u".txt", set([encodingutil.normalize(fname) for fname in filenames]))
|
|
|
|
expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
|
|
self.failIfIn(u"~", expanded)
|
|
self.failUnless(expanded.endswith(lumiere_nfc), expanded)
|
|
|
|
def test_open_unrepresentable(self):
|
|
if unicode_platform():
|
|
raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
|
|
|
|
enc = get_filesystem_encoding()
|
|
fn = u'\u2621.txt'
|
|
try:
|
|
fn.encode(enc)
|
|
raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
|
|
except UnicodeEncodeError:
|
|
self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
|
|
|
|
|
|
class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
|
|
def tearDown(self):
|
|
_reload()
|
|
|
|
def _check(self, inp, out, enc, optional_quotes, quote_newlines):
|
|
if PY3 and isinstance(out, bytes):
|
|
out = out.decode(enc or encodingutil.io_encoding)
|
|
out2 = out
|
|
if optional_quotes:
|
|
out2 = out2[1:-1]
|
|
self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
|
|
self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
|
|
if out[0:2] == 'b"':
|
|
pass
|
|
elif isinstance(inp, bytes):
|
|
try:
|
|
unicode_inp = inp.decode("utf-8")
|
|
except UnicodeDecodeError:
|
|
# Some things decode on Python 2, but not Python 3...
|
|
return
|
|
self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quote_newlines=quote_newlines), out)
|
|
self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
|
|
else:
|
|
try:
|
|
bytes_inp = inp.encode('utf-8')
|
|
except UnicodeEncodeError:
|
|
# Some things encode on Python 2, but not Python 3, e.g.
|
|
# surrogates like u"\uDC00\uD800"...
|
|
return
|
|
self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quote_newlines=quote_newlines), out)
|
|
self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
|
|
|
|
def _test_quote_output_all(self, enc):
|
|
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
|
if PY3:
|
|
# Result is always Unicode on Python 3
|
|
out = out.decode("ascii")
|
|
self._check(inp, out, enc, optional_quotes, quote_newlines)
|
|
|
|
# optional single quotes
|
|
check(b"foo", b"'foo'", True)
|
|
check(b"\\", b"'\\'", True)
|
|
check(b"$\"`", b"'$\"`'", True)
|
|
check(b"\n", b"'\n'", True, quote_newlines=False)
|
|
|
|
# mandatory single quotes
|
|
check(b"\"", b"'\"'")
|
|
|
|
# double quotes
|
|
check(b"'", b"\"'\"")
|
|
check(b"\n", b"\"\\x0a\"", quote_newlines=True)
|
|
check(b"\x00", b"\"\\x00\"")
|
|
|
|
# invalid Unicode and astral planes
|
|
check(u"\uFDD0\uFDEF", b"\"\\ufdd0\\ufdef\"")
|
|
check(u"\uDC00\uD800", b"\"\\udc00\\ud800\"")
|
|
check(u"\uDC00\uD800\uDC00", b"\"\\udc00\\U00010000\"")
|
|
check(u"\uD800\uDC00", b"\"\\U00010000\"")
|
|
check(u"\uD800\uDC01", b"\"\\U00010001\"")
|
|
check(u"\uD801\uDC00", b"\"\\U00010400\"")
|
|
check(u"\uDBFF\uDFFF", b"\"\\U0010ffff\"")
|
|
check(u"'\uDBFF\uDFFF", b"\"'\\U0010ffff\"")
|
|
check(u"\"\uDBFF\uDFFF", b"\"\\\"\\U0010ffff\"")
|
|
|
|
# invalid UTF-8
|
|
check(b"\xFF", b"b\"\\xff\"")
|
|
check(b"\x00\"$\\`\x80\xFF", b"b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
|
|
|
|
def test_quote_output_ascii(self, enc='ascii'):
|
|
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
|
self._check(inp, out, enc, optional_quotes, quote_newlines)
|
|
|
|
self._test_quote_output_all(enc)
|
|
check(u"\u00D7", b"\"\\xd7\"")
|
|
check(u"'\u00D7", b"\"'\\xd7\"")
|
|
check(u"\"\u00D7", b"\"\\\"\\xd7\"")
|
|
check(u"\u2621", b"\"\\u2621\"")
|
|
check(u"'\u2621", b"\"'\\u2621\"")
|
|
check(u"\"\u2621", b"\"\\\"\\u2621\"")
|
|
check(u"\n", b"'\n'", True, quote_newlines=False)
|
|
check(u"\n", b"\"\\x0a\"", quote_newlines=True)
|
|
|
|
def test_quote_output_latin1(self, enc='latin1'):
|
|
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
|
self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
|
|
|
|
self._test_quote_output_all(enc)
|
|
check(u"\u00D7", u"'\u00D7'", True)
|
|
check(u"'\u00D7", u"\"'\u00D7\"")
|
|
check(u"\"\u00D7", u"'\"\u00D7'")
|
|
check(u"\u00D7\"", u"'\u00D7\"'", True)
|
|
check(u"\u2621", u"\"\\u2621\"")
|
|
check(u"'\u2621", u"\"'\\u2621\"")
|
|
check(u"\"\u2621", u"\"\\\"\\u2621\"")
|
|
check(u"\n", u"'\n'", True, quote_newlines=False)
|
|
check(u"\n", u"\"\\x0a\"", quote_newlines=True)
|
|
|
|
def test_quote_output_utf8(self, enc='utf-8'):
|
|
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
|
if PY2:
|
|
# On Python 3 output is always Unicode:
|
|
out = out.encode('utf-8')
|
|
self._check(inp, out, enc, optional_quotes, quote_newlines)
|
|
|
|
self._test_quote_output_all(enc)
|
|
check(u"\u2621", u"'\u2621'", True)
|
|
check(u"'\u2621", u"\"'\u2621\"")
|
|
check(u"\"\u2621", u"'\"\u2621'")
|
|
check(u"\u2621\"", u"'\u2621\"'", True)
|
|
check(u"\n", u"'\n'", True, quote_newlines=False)
|
|
check(u"\n", u"\"\\x0a\"", quote_newlines=True)
|
|
|
|
def test_quote_output_default(self):
|
|
self.patch(encodingutil, 'io_encoding', 'ascii')
|
|
self.test_quote_output_ascii(None)
|
|
|
|
self.patch(encodingutil, 'io_encoding', 'latin1')
|
|
self.test_quote_output_latin1(None)
|
|
|
|
self.patch(encodingutil, 'io_encoding', 'utf-8')
|
|
self.test_quote_output_utf8(None)
|
|
|
|
|
|
def win32_other(win32, other):
|
|
return win32 if sys.platform == "win32" else other
|
|
|
|
class QuotePaths(ReallyEqualMixin, unittest.TestCase):
|
|
|
|
def assertPathsEqual(self, actual, expected):
|
|
if PY3:
|
|
# On Python 3, results should be unicode:
|
|
expected = expected.decode("ascii")
|
|
self.failUnlessReallyEqual(actual, expected)
|
|
|
|
def test_quote_path(self):
|
|
self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
|
|
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
|
|
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
|
|
self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
|
|
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
|
|
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
|
|
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
|
|
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
|
|
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
|
|
win32_other(b"C:\\foo", b"\\\\?\\C:\\foo"))
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
|
|
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
|
|
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
|
|
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
|
|
win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar"))
|
|
|
|
def test_quote_filepath(self):
|
|
foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
|
|
self.assertPathsEqual(quote_filepath(foo_bar_fp),
|
|
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
|
|
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True),
|
|
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
|
|
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False),
|
|
win32_other(b"C:\\foo\\bar", b"/foo/bar"))
|
|
|
|
if sys.platform == "win32":
|
|
foo_longfp = FilePath(u'\\\\?\\C:\\foo')
|
|
self.assertPathsEqual(quote_filepath(foo_longfp),
|
|
b"'C:\\foo'")
|
|
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True),
|
|
b"'C:\\foo'")
|
|
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False),
|
|
b"C:\\foo")
|
|
|
|
|
|
class FilePaths(ReallyEqualMixin, unittest.TestCase):
|
|
def test_to_filepath(self):
|
|
foo_u = win32_other(u'C:\\foo', u'/foo')
|
|
|
|
nosep_fp = to_filepath(foo_u)
|
|
sep_fp = to_filepath(foo_u + os.path.sep)
|
|
|
|
for fp in (nosep_fp, sep_fp):
|
|
self.failUnlessReallyEqual(fp, FilePath(foo_u))
|
|
if encodingutil.use_unicode_filepath:
|
|
self.failUnlessReallyEqual(fp.path, foo_u)
|
|
|
|
if sys.platform == "win32":
|
|
long_u = u'\\\\?\\C:\\foo'
|
|
longfp = to_filepath(long_u + u'\\')
|
|
self.failUnlessReallyEqual(longfp, FilePath(long_u))
|
|
self.failUnlessReallyEqual(longfp.path, long_u)
|
|
|
|
def test_extend_filepath(self):
|
|
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
|
|
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
|
|
foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')
|
|
|
|
for foo_fp in (foo_bfp, foo_ufp):
|
|
fp = extend_filepath(foo_fp, [u'bar', u'baz'])
|
|
self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u))
|
|
if encodingutil.use_unicode_filepath:
|
|
self.failUnlessReallyEqual(fp.path, foo_bar_baz_u)
|
|
|
|
def test_unicode_from_filepath(self):
|
|
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
|
|
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
|
|
foo_u = win32_other(u'C:\\foo', u'/foo')
|
|
|
|
for foo_fp in (foo_bfp, foo_ufp):
|
|
self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u)
|
|
|
|
def test_unicode_segments_from(self):
|
|
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
|
|
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
|
|
foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz'))
|
|
foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz'))
|
|
|
|
for foo_fp in (foo_bfp, foo_ufp):
|
|
for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp):
|
|
self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp),
|
|
[u'bar', u'baz'])
|
|
|
|
|
|
class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
|
|
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
|
|
argv = b'lumi\xc3\xa8re'
|
|
platform = 'linux2'
|
|
filesystem_encoding = 'UTF-8'
|
|
io_encoding = 'UTF-8'
|
|
dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt']
|
|
|
|
class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
|
|
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
|
|
argv = b'lumi\xe8re'
|
|
platform = 'linux2'
|
|
filesystem_encoding = 'ISO-8859-1'
|
|
io_encoding = 'ISO-8859-1'
|
|
dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3']
|
|
|
|
class Windows(EncodingUtil, unittest.TestCase):
|
|
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
|
|
argv = b'lumi\xc3\xa8re'
|
|
platform = 'win32'
|
|
filesystem_encoding = 'mbcs'
|
|
io_encoding = 'utf-8'
|
|
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
|
|
|
|
class MacOSXLeopard(EncodingUtil, unittest.TestCase):
|
|
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
|
|
output = b'lumi\xc3\xa8re'
|
|
platform = 'darwin'
|
|
filesystem_encoding = 'utf-8'
|
|
io_encoding = 'UTF-8'
|
|
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
|
|
|
|
class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
|
|
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
|
|
platform = 'darwin'
|
|
filesystem_encoding = 'utf-8'
|
|
io_encoding = 'US-ASCII'
|
|
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
|
|
|
|
class OpenBSD(EncodingUtil, unittest.TestCase):
|
|
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
|
|
platform = 'openbsd4'
|
|
filesystem_encoding = '646'
|
|
io_encoding = '646'
|
|
# Oops, I cannot write filenames containing non-ascii characters
|
|
|
|
|
|
class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
|
|
def test_to_bytes(self):
|
|
self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo")
|
|
self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re")
|
|
self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want?
|
|
self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re")
|
|
self.failUnlessReallyEqual(to_bytes(None), None)
|
|
|
|
def test_from_utf8_or_none(self):
|
|
self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
|
|
self.failUnlessReallyEqual(from_utf8_or_none(b"lumi\xc3\xa8re"), u"lumi\u00E8re")
|
|
self.failUnlessReallyEqual(from_utf8_or_none(None), None)
|
|
self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, b"\xFF")
|