tahoe-lafs/src/allmydata/test/test_encodingutil.py
Chad Whitacre 93d4a8373f Move ReallyEqualMixin and s31e back to common_util
s31e = skip_if_cannot_represent_filename
2020-09-30 23:31:13 -04:00

636 lines
25 KiB
Python

from __future__ import print_function
from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals
from future.utils import PY2, PY3
if PY2:
# We don't import str because omg way too ambiguous in this context.
from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401
from past.builtins import unicode
lumiere_nfc = u"lumi\u00E8re"
Artonwall_nfc = u"\u00C4rtonwall.mp3"
Artonwall_nfd = u"A\u0308rtonwall.mp3"
TEST_FILENAMES = (
Artonwall_nfc,
u'test_file',
u'Blah blah.txt',
)
# The following main helps to generate a test class for other operating
# systems.
if __name__ == "__main__":
import sys, os
import tempfile
import shutil
import platform
if len(sys.argv) != 2:
print("Usage: %s lumi<e-grave>re" % sys.argv[0])
sys.exit(1)
if sys.platform == "win32":
try:
from allmydata.windows.fixups import initialize
except ImportError:
print("set PYTHONPATH to the src directory")
sys.exit(1)
initialize()
print()
print("class MyWeirdOS(EncodingUtil, unittest.TestCase):")
print(" uname = '%s'" % ' '.join(platform.uname()))
print(" argv = %s" % repr(sys.argv[1]))
print(" platform = '%s'" % sys.platform)
print(" filesystem_encoding = '%s'" % sys.getfilesystemencoding())
print(" io_encoding = '%s'" % sys.stdout.encoding)
try:
tmpdir = tempfile.mkdtemp()
for fname in TEST_FILENAMES:
open(os.path.join(tmpdir, fname), 'w').close()
# On Python 2, listing directories returns unicode under Windows or
# MacOS X if the input is unicode. On Python 3, it always returns
# Unicode.
if PY2 and sys.platform in ('win32', 'darwin'):
dirlist = os.listdir(unicode(tmpdir))
else:
dirlist = os.listdir(tmpdir)
print(" dirlist = %s" % repr(dirlist))
except:
print(" # Oops, I cannot write filenames containing non-ascii characters")
print()
shutil.rmtree(tmpdir)
sys.exit(0)
import os, sys, locale
from unittest import skipIf
from twisted.trial import unittest
from twisted.python.filepath import FilePath
from allmydata.test.common_util import (
ReallyEqualMixin, skip_if_cannot_represent_filename,
)
from allmydata.util import encodingutil, fileutil
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \
get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \
to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \
unicode_to_argv
from twisted.python import usage
class MockStdout(object):
pass
class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
def test_get_io_encoding(self):
mock_stdout = MockStdout()
self.patch(sys, 'stdout', mock_stdout)
mock_stdout.encoding = 'UTF-8'
_reload()
self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
mock_stdout.encoding = 'cp65001'
_reload()
self.assertEqual(get_io_encoding(), 'utf-8')
mock_stdout.encoding = 'koi8-r'
expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
_reload()
self.failUnlessReallyEqual(get_io_encoding(), expected)
mock_stdout.encoding = 'nonexistent_encoding'
if sys.platform == "win32":
_reload()
self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
else:
self.failUnlessRaises(AssertionError, _reload)
def test_get_io_encoding_not_from_stdout(self):
preferredencoding = 'koi8-r'
def call_locale_getpreferredencoding():
return preferredencoding
self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding)
mock_stdout = MockStdout()
self.patch(sys, 'stdout', mock_stdout)
expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
_reload()
self.failUnlessReallyEqual(get_io_encoding(), expected)
mock_stdout.encoding = None
_reload()
self.failUnlessReallyEqual(get_io_encoding(), expected)
preferredencoding = None
_reload()
self.assertEqual(get_io_encoding(), 'utf-8')
def test_argv_to_unicode(self):
encodingutil.io_encoding = 'utf-8'
self.failUnlessRaises(usage.UsageError,
argv_to_unicode,
lumiere_nfc.encode('latin1'))
@skipIf(PY3, "Python 2 only.")
def test_unicode_to_output(self):
encodingutil.io_encoding = 'koi8-r'
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
def test_no_unicode_normalization(self):
# Pretend to run on a Unicode platform.
# listdir_unicode normalized to NFC in 1.7beta, but now doesn't.
def call_os_listdir(path):
return [Artonwall_nfd]
self.patch(os, 'listdir', call_os_listdir)
self.patch(sys, 'platform', 'darwin')
_reload()
self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
# The following tests apply only to platforms that don't store filenames as
# Unicode entities on the filesystem.
class EncodingUtilNonUnicodePlatform(unittest.TestCase):
@skipIf(PY3, "Python 3 is always Unicode, regardless of OS.")
def setUp(self):
# Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
sys.platform = 'linux'
def tearDown(self):
sys.platform = self.original_platform
_reload()
def test_listdir_unicode(self):
# What happens if latin1-encoded filenames are encountered on an UTF-8
# filesystem?
def call_os_listdir(path):
return [
lumiere_nfc.encode('utf-8'),
lumiere_nfc.encode('latin1')
]
self.patch(os, 'listdir', call_os_listdir)
sys_filesystemencoding = 'utf-8'
def call_sys_getfilesystemencoding():
return sys_filesystemencoding
self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/dummy')
# We're trying to list a directory whose name cannot be represented in
# the filesystem encoding. This should fail.
sys_filesystemencoding = 'ascii'
_reload()
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/' + lumiere_nfc)
class EncodingUtil(ReallyEqualMixin):
def setUp(self):
self.original_platform = sys.platform
sys.platform = self.platform
def tearDown(self):
sys.platform = self.original_platform
_reload()
def test_argv_to_unicode(self):
if 'argv' not in dir(self):
return
mock_stdout = MockStdout()
mock_stdout.encoding = self.io_encoding
self.patch(sys, 'stdout', mock_stdout)
argu = lumiere_nfc
argv = self.argv
_reload()
self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
def test_unicode_to_url(self):
self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re")
@skipIf(PY3, "Python 3 is always Unicode, regardless of OS.")
def test_unicode_to_output_py2(self):
if 'argv' not in dir(self):
return
mock_stdout = MockStdout()
mock_stdout.encoding = self.io_encoding
self.patch(sys, 'stdout', mock_stdout)
_reload()
self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
@skipIf(PY2, "Python 3 only.")
def test_unicode_to_output_py3(self):
self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc)
@skipIf(PY3, "Python 2 only.")
def test_unicode_to_argv_py2(self):
"""unicode_to_argv() converts to bytes on Python 2."""
self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding))
@skipIf(PY2, "Python 3 only.")
def test_unicode_to_argv_py3(self):
"""unicode_to_argv() is noop on Python 3."""
self.assertEqual(unicode_to_argv("abc"), "abc")
@skipIf(PY3, "Python 3 only.")
def test_unicode_platform_py2(self):
matrix = {
'linux2': False,
'linux3': False,
'openbsd4': False,
'win32': True,
'darwin': True,
}
_reload()
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
@skipIf(PY2, "Python 3 isn't Python 2.")
def test_unicode_platform_py3(self):
_reload()
self.failUnlessReallyEqual(unicode_platform(), True)
def test_listdir_unicode(self):
if 'dirlist' not in dir(self):
return
try:
u"test".encode(self.filesystem_encoding)
except (LookupError, AttributeError):
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
"that we are testing for the benefit of a different platform."
% (self.filesystem_encoding,))
def call_os_listdir(path):
if PY2:
return self.dirlist
else:
# Python 3 always lists unicode filenames:
return [d.decode(self.filesystem_encoding) if isinstance(d, bytes)
else d
for d in self.dirlist]
self.patch(os, 'listdir', call_os_listdir)
def call_sys_getfilesystemencoding():
return self.filesystem_encoding
self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
_reload()
filenames = listdir_unicode(u'/dummy')
self.failUnlessEqual(set([encodingutil.normalize(fname) for fname in filenames]),
set(TEST_FILENAMES))
class StdlibUnicode(unittest.TestCase):
"""This mainly tests that some of the stdlib functions support Unicode paths, but also that
listdir_unicode works for valid filenames."""
def test_mkdir_open_exists_abspath_listdir_expanduser(self):
skip_if_cannot_represent_filename(lumiere_nfc)
try:
os.mkdir(lumiere_nfc)
except EnvironmentError as e:
raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
"does not support Unicode, even though the platform does." % (e,))
fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
open(fn, 'wb').close()
self.failUnless(os.path.exists(fn))
if PY2:
getcwdu = os.getcwdu
else:
getcwdu = os.getcwd
self.failUnless(os.path.exists(os.path.join(getcwdu(), fn)))
filenames = listdir_unicode(lumiere_nfc)
# We only require that the listing includes a filename that is canonically equivalent
# to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
self.failUnlessIn(lumiere_nfc + u".txt", set([encodingutil.normalize(fname) for fname in filenames]))
expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
self.failIfIn(u"~", expanded)
self.failUnless(expanded.endswith(lumiere_nfc), expanded)
def test_open_unrepresentable(self):
if unicode_platform():
raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
enc = get_filesystem_encoding()
fn = u'\u2621.txt'
try:
fn.encode(enc)
raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
except UnicodeEncodeError:
self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
def tearDown(self):
_reload()
def _check(self, inp, out, enc, optional_quotes, quote_newlines):
if PY3 and isinstance(out, bytes):
out = out.decode(enc or encodingutil.io_encoding)
out2 = out
if optional_quotes:
out2 = out2[1:-1]
self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
if out[0:2] == 'b"':
pass
elif isinstance(inp, bytes):
try:
unicode_inp = inp.decode("utf-8")
except UnicodeDecodeError:
# Some things decode on Python 2, but not Python 3...
return
self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quote_newlines=quote_newlines), out)
self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
else:
try:
bytes_inp = inp.encode('utf-8')
except UnicodeEncodeError:
# Some things encode on Python 2, but not Python 3, e.g.
# surrogates like u"\uDC00\uD800"...
return
self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quote_newlines=quote_newlines), out)
self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
def _test_quote_output_all(self, enc):
def check(inp, out, optional_quotes=False, quote_newlines=None):
if PY3:
# Result is always Unicode on Python 3
out = out.decode("ascii")
self._check(inp, out, enc, optional_quotes, quote_newlines)
# optional single quotes
check(b"foo", b"'foo'", True)
check(b"\\", b"'\\'", True)
check(b"$\"`", b"'$\"`'", True)
check(b"\n", b"'\n'", True, quote_newlines=False)
# mandatory single quotes
check(b"\"", b"'\"'")
# double quotes
check(b"'", b"\"'\"")
check(b"\n", b"\"\\x0a\"", quote_newlines=True)
check(b"\x00", b"\"\\x00\"")
# invalid Unicode and astral planes
check(u"\uFDD0\uFDEF", b"\"\\ufdd0\\ufdef\"")
check(u"\uDC00\uD800", b"\"\\udc00\\ud800\"")
check(u"\uDC00\uD800\uDC00", b"\"\\udc00\\U00010000\"")
check(u"\uD800\uDC00", b"\"\\U00010000\"")
check(u"\uD800\uDC01", b"\"\\U00010001\"")
check(u"\uD801\uDC00", b"\"\\U00010400\"")
check(u"\uDBFF\uDFFF", b"\"\\U0010ffff\"")
check(u"'\uDBFF\uDFFF", b"\"'\\U0010ffff\"")
check(u"\"\uDBFF\uDFFF", b"\"\\\"\\U0010ffff\"")
# invalid UTF-8
check(b"\xFF", b"b\"\\xff\"")
check(b"\x00\"$\\`\x80\xFF", b"b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
def test_quote_output_ascii(self, enc='ascii'):
def check(inp, out, optional_quotes=False, quote_newlines=None):
self._check(inp, out, enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u00D7", b"\"\\xd7\"")
check(u"'\u00D7", b"\"'\\xd7\"")
check(u"\"\u00D7", b"\"\\\"\\xd7\"")
check(u"\u2621", b"\"\\u2621\"")
check(u"'\u2621", b"\"'\\u2621\"")
check(u"\"\u2621", b"\"\\\"\\u2621\"")
check(u"\n", b"'\n'", True, quote_newlines=False)
check(u"\n", b"\"\\x0a\"", quote_newlines=True)
def test_quote_output_latin1(self, enc='latin1'):
def check(inp, out, optional_quotes=False, quote_newlines=None):
self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u00D7", u"'\u00D7'", True)
check(u"'\u00D7", u"\"'\u00D7\"")
check(u"\"\u00D7", u"'\"\u00D7'")
check(u"\u00D7\"", u"'\u00D7\"'", True)
check(u"\u2621", u"\"\\u2621\"")
check(u"'\u2621", u"\"'\\u2621\"")
check(u"\"\u2621", u"\"\\\"\\u2621\"")
check(u"\n", u"'\n'", True, quote_newlines=False)
check(u"\n", u"\"\\x0a\"", quote_newlines=True)
def test_quote_output_utf8(self, enc='utf-8'):
def check(inp, out, optional_quotes=False, quote_newlines=None):
if PY2:
# On Python 3 output is always Unicode:
out = out.encode('utf-8')
self._check(inp, out, enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u2621", u"'\u2621'", True)
check(u"'\u2621", u"\"'\u2621\"")
check(u"\"\u2621", u"'\"\u2621'")
check(u"\u2621\"", u"'\u2621\"'", True)
check(u"\n", u"'\n'", True, quote_newlines=False)
check(u"\n", u"\"\\x0a\"", quote_newlines=True)
def test_quote_output_default(self):
self.patch(encodingutil, 'io_encoding', 'ascii')
self.test_quote_output_ascii(None)
self.patch(encodingutil, 'io_encoding', 'latin1')
self.test_quote_output_latin1(None)
self.patch(encodingutil, 'io_encoding', 'utf-8')
self.test_quote_output_utf8(None)
def win32_other(win32, other):
return win32 if sys.platform == "win32" else other
class QuotePaths(ReallyEqualMixin, unittest.TestCase):
def assertPathsEqual(self, actual, expected):
if PY3:
# On Python 3, results should be unicode:
expected = expected.decode("ascii")
self.failUnlessReallyEqual(actual, expected)
def test_quote_path(self):
self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
win32_other(b"C:\\foo", b"\\\\?\\C:\\foo"))
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar"))
def test_quote_filepath(self):
foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
self.assertPathsEqual(quote_filepath(foo_bar_fp),
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True),
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False),
win32_other(b"C:\\foo\\bar", b"/foo/bar"))
if sys.platform == "win32":
foo_longfp = FilePath(u'\\\\?\\C:\\foo')
self.assertPathsEqual(quote_filepath(foo_longfp),
b"'C:\\foo'")
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True),
b"'C:\\foo'")
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False),
b"C:\\foo")
class FilePaths(ReallyEqualMixin, unittest.TestCase):
def test_to_filepath(self):
foo_u = win32_other(u'C:\\foo', u'/foo')
nosep_fp = to_filepath(foo_u)
sep_fp = to_filepath(foo_u + os.path.sep)
for fp in (nosep_fp, sep_fp):
self.failUnlessReallyEqual(fp, FilePath(foo_u))
if encodingutil.use_unicode_filepath:
self.failUnlessReallyEqual(fp.path, foo_u)
if sys.platform == "win32":
long_u = u'\\\\?\\C:\\foo'
longfp = to_filepath(long_u + u'\\')
self.failUnlessReallyEqual(longfp, FilePath(long_u))
self.failUnlessReallyEqual(longfp.path, long_u)
def test_extend_filepath(self):
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')
for foo_fp in (foo_bfp, foo_ufp):
fp = extend_filepath(foo_fp, [u'bar', u'baz'])
self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u))
if encodingutil.use_unicode_filepath:
self.failUnlessReallyEqual(fp.path, foo_bar_baz_u)
def test_unicode_from_filepath(self):
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
foo_u = win32_other(u'C:\\foo', u'/foo')
for foo_fp in (foo_bfp, foo_ufp):
self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u)
def test_unicode_segments_from(self):
foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz'))
foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz'))
for foo_fp in (foo_bfp, foo_ufp):
for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp):
self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp),
[u'bar', u'baz'])
class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
argv = b'lumi\xc3\xa8re'
platform = 'linux2'
filesystem_encoding = 'UTF-8'
io_encoding = 'UTF-8'
dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt']
class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
argv = b'lumi\xe8re'
platform = 'linux2'
filesystem_encoding = 'ISO-8859-1'
io_encoding = 'ISO-8859-1'
dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3']
class Windows(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
argv = b'lumi\xc3\xa8re'
platform = 'win32'
filesystem_encoding = 'mbcs'
io_encoding = 'utf-8'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class MacOSXLeopard(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
output = b'lumi\xc3\xa8re'
platform = 'darwin'
filesystem_encoding = 'utf-8'
io_encoding = 'UTF-8'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
platform = 'darwin'
filesystem_encoding = 'utf-8'
io_encoding = 'US-ASCII'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class OpenBSD(EncodingUtil, unittest.TestCase):
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
platform = 'openbsd4'
filesystem_encoding = '646'
io_encoding = '646'
# Oops, I cannot write filenames containing non-ascii characters
class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
def test_to_bytes(self):
self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo")
self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want?
self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_bytes(None), None)
def test_from_utf8_or_none(self):
self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
self.failUnlessReallyEqual(from_utf8_or_none(b"lumi\xc3\xa8re"), u"lumi\u00E8re")
self.failUnlessReallyEqual(from_utf8_or_none(None), None)
self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, b"\xFF")