from __future__ import print_function from __future__ import absolute_import from __future__ import division from __future__ import unicode_literals from future.utils import PY2, PY3 if PY2: # We don't import str because omg way too ambiguous in this context. from builtins import filter, map, zip, ascii, chr, hex, input, next, oct, open, pow, round, super, bytes, dict, list, object, range, max, min # noqa: F401 from past.builtins import unicode lumiere_nfc = u"lumi\u00E8re" Artonwall_nfc = u"\u00C4rtonwall.mp3" Artonwall_nfd = u"A\u0308rtonwall.mp3" TEST_FILENAMES = ( Artonwall_nfc, u'test_file', u'Blah blah.txt', ) # The following main helps to generate a test class for other operating # systems. if __name__ == "__main__": import sys, os import tempfile import shutil import platform if len(sys.argv) != 2: print("Usage: %s lumire" % sys.argv[0]) sys.exit(1) if sys.platform == "win32": try: from allmydata.windows.fixups import initialize except ImportError: print("set PYTHONPATH to the src directory") sys.exit(1) initialize() print() print("class MyWeirdOS(EncodingUtil, unittest.TestCase):") print(" uname = '%s'" % ' '.join(platform.uname())) print(" argv = %s" % repr(sys.argv[1])) print(" platform = '%s'" % sys.platform) print(" filesystem_encoding = '%s'" % sys.getfilesystemencoding()) print(" io_encoding = '%s'" % sys.stdout.encoding) try: tmpdir = tempfile.mkdtemp() for fname in TEST_FILENAMES: open(os.path.join(tmpdir, fname), 'w').close() # On Python 2, listing directories returns unicode under Windows or # MacOS X if the input is unicode. On Python 3, it always returns # Unicode. if PY2 and sys.platform in ('win32', 'darwin'): dirlist = os.listdir(unicode(tmpdir)) else: dirlist = os.listdir(tmpdir) print(" dirlist = %s" % repr(dirlist)) except: print(" # Oops, I cannot write filenames containing non-ascii characters") print() shutil.rmtree(tmpdir) sys.exit(0) import os, sys, locale from unittest import skipIf from twisted.trial import unittest from twisted.python.filepath import FilePath from allmydata.test.common_util import ( ReallyEqualMixin, skip_if_cannot_represent_filename, ) from allmydata.util import encodingutil, fileutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \ quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \ get_io_encoding, get_filesystem_encoding, to_bytes, from_utf8_or_none, _reload, \ to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from, \ unicode_to_argv from twisted.python import usage class MockStdout(object): pass class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): def test_get_io_encoding(self): mock_stdout = MockStdout() self.patch(sys, 'stdout', mock_stdout) mock_stdout.encoding = 'UTF-8' _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'cp65001' _reload() self.assertEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'koi8-r' expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) mock_stdout.encoding = 'nonexistent_encoding' if sys.platform == "win32": _reload() self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') else: self.failUnlessRaises(AssertionError, _reload) def test_get_io_encoding_not_from_stdout(self): preferredencoding = 'koi8-r' def call_locale_getpreferredencoding(): return preferredencoding self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding) mock_stdout = MockStdout() self.patch(sys, 'stdout', mock_stdout) expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) mock_stdout.encoding = None _reload() self.failUnlessReallyEqual(get_io_encoding(), expected) preferredencoding = None _reload() self.assertEqual(get_io_encoding(), 'utf-8') def test_argv_to_unicode(self): encodingutil.io_encoding = 'utf-8' self.failUnlessRaises(usage.UsageError, argv_to_unicode, lumiere_nfc.encode('latin1')) @skipIf(PY3, "Python 2 only.") def test_unicode_to_output(self): encodingutil.io_encoding = 'koi8-r' self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) def test_no_unicode_normalization(self): # Pretend to run on a Unicode platform. # listdir_unicode normalized to NFC in 1.7beta, but now doesn't. def call_os_listdir(path): return [Artonwall_nfd] self.patch(os, 'listdir', call_os_listdir) self.patch(sys, 'platform', 'darwin') _reload() self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) # The following tests apply only to platforms that don't store filenames as # Unicode entities on the filesystem. class EncodingUtilNonUnicodePlatform(unittest.TestCase): @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def setUp(self): # Mock sys.platform because unicode_platform() uses it self.original_platform = sys.platform sys.platform = 'linux' def tearDown(self): sys.platform = self.original_platform _reload() def test_listdir_unicode(self): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? def call_os_listdir(path): return [ lumiere_nfc.encode('utf-8'), lumiere_nfc.encode('latin1') ] self.patch(os, 'listdir', call_os_listdir) sys_filesystemencoding = 'utf-8' def call_sys_getfilesystemencoding(): return sys_filesystemencoding self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding) _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/dummy') # We're trying to list a directory whose name cannot be represented in # the filesystem encoding. This should fail. sys_filesystemencoding = 'ascii' _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/' + lumiere_nfc) class EncodingUtil(ReallyEqualMixin): def setUp(self): self.original_platform = sys.platform sys.platform = self.platform def tearDown(self): sys.platform = self.original_platform _reload() def test_argv_to_unicode(self): if 'argv' not in dir(self): return mock_stdout = MockStdout() mock_stdout.encoding = self.io_encoding self.patch(sys, 'stdout', mock_stdout) argu = lumiere_nfc argv = self.argv _reload() self.failUnlessReallyEqual(argv_to_unicode(argv), argu) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), b"lumi\xc3\xa8re") @skipIf(PY3, "Python 3 is always Unicode, regardless of OS.") def test_unicode_to_output_py2(self): if 'argv' not in dir(self): return mock_stdout = MockStdout() mock_stdout.encoding = self.io_encoding self.patch(sys, 'stdout', mock_stdout) _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv) @skipIf(PY2, "Python 3 only.") def test_unicode_to_output_py3(self): self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), lumiere_nfc) @skipIf(PY3, "Python 2 only.") def test_unicode_to_argv_py2(self): """unicode_to_argv() converts to bytes on Python 2.""" self.assertEqual(unicode_to_argv("abc"), u"abc".encode(self.io_encoding)) @skipIf(PY2, "Python 3 only.") def test_unicode_to_argv_py3(self): """unicode_to_argv() is noop on Python 3.""" self.assertEqual(unicode_to_argv("abc"), "abc") @skipIf(PY3, "Python 3 only.") def test_unicode_platform_py2(self): matrix = { 'linux2': False, 'linux3': False, 'openbsd4': False, 'win32': True, 'darwin': True, } _reload() self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform]) @skipIf(PY2, "Python 3 isn't Python 2.") def test_unicode_platform_py3(self): _reload() self.failUnlessReallyEqual(unicode_platform(), True) def test_listdir_unicode(self): if 'dirlist' not in dir(self): return try: u"test".encode(self.filesystem_encoding) except (LookupError, AttributeError): raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " "that we are testing for the benefit of a different platform." % (self.filesystem_encoding,)) def call_os_listdir(path): if PY2: return self.dirlist else: # Python 3 always lists unicode filenames: return [d.decode(self.filesystem_encoding) if isinstance(d, bytes) else d for d in self.dirlist] self.patch(os, 'listdir', call_os_listdir) def call_sys_getfilesystemencoding(): return self.filesystem_encoding self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding) _reload() filenames = listdir_unicode(u'/dummy') self.failUnlessEqual(set([encodingutil.normalize(fname) for fname in filenames]), set(TEST_FILENAMES)) class StdlibUnicode(unittest.TestCase): """This mainly tests that some of the stdlib functions support Unicode paths, but also that listdir_unicode works for valid filenames.""" def test_mkdir_open_exists_abspath_listdir_expanduser(self): skip_if_cannot_represent_filename(lumiere_nfc) try: os.mkdir(lumiere_nfc) except EnvironmentError as e: raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run " "does not support Unicode, even though the platform does." % (e,)) fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt' open(fn, 'wb').close() self.failUnless(os.path.exists(fn)) if PY2: getcwdu = os.getcwdu else: getcwdu = os.getcwd self.failUnless(os.path.exists(os.path.join(getcwdu(), fn))) filenames = listdir_unicode(lumiere_nfc) # We only require that the listing includes a filename that is canonically equivalent # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent). self.failUnlessIn(lumiere_nfc + u".txt", set([encodingutil.normalize(fname) for fname in filenames])) expanded = fileutil.expanduser(u"~/" + lumiere_nfc) self.failIfIn(u"~", expanded) self.failUnless(expanded.endswith(lumiere_nfc), expanded) def test_open_unrepresentable(self): if unicode_platform(): raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.") enc = get_filesystem_encoding() fn = u'\u2621.txt' try: fn.encode(enc) raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.") except UnicodeEncodeError: self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb') class QuoteOutput(ReallyEqualMixin, unittest.TestCase): def tearDown(self): _reload() def _check(self, inp, out, enc, optional_quotes, quote_newlines): if PY3 and isinstance(out, bytes): out = out.decode(enc or encodingutil.io_encoding) out2 = out if optional_quotes: out2 = out2[1:-1] self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) if out[0:2] == 'b"': pass elif isinstance(inp, bytes): try: unicode_inp = inp.decode("utf-8") except UnicodeDecodeError: # Some things decode on Python 2, but not Python 3... return self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(unicode_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) else: try: bytes_inp = inp.encode('utf-8') except UnicodeEncodeError: # Some things encode on Python 2, but not Python 3, e.g. # surrogates like u"\uDC00\uD800"... return self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quote_newlines=quote_newlines), out) self.failUnlessReallyEqual(quote_output(bytes_inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2) def _test_quote_output_all(self, enc): def check(inp, out, optional_quotes=False, quote_newlines=None): if PY3: # Result is always Unicode on Python 3 out = out.decode("ascii") self._check(inp, out, enc, optional_quotes, quote_newlines) # optional single quotes check(b"foo", b"'foo'", True) check(b"\\", b"'\\'", True) check(b"$\"`", b"'$\"`'", True) check(b"\n", b"'\n'", True, quote_newlines=False) # mandatory single quotes check(b"\"", b"'\"'") # double quotes check(b"'", b"\"'\"") check(b"\n", b"\"\\x0a\"", quote_newlines=True) check(b"\x00", b"\"\\x00\"") # invalid Unicode and astral planes check(u"\uFDD0\uFDEF", b"\"\\ufdd0\\ufdef\"") check(u"\uDC00\uD800", b"\"\\udc00\\ud800\"") check(u"\uDC00\uD800\uDC00", b"\"\\udc00\\U00010000\"") check(u"\uD800\uDC00", b"\"\\U00010000\"") check(u"\uD800\uDC01", b"\"\\U00010001\"") check(u"\uD801\uDC00", b"\"\\U00010400\"") check(u"\uDBFF\uDFFF", b"\"\\U0010ffff\"") check(u"'\uDBFF\uDFFF", b"\"'\\U0010ffff\"") check(u"\"\uDBFF\uDFFF", b"\"\\\"\\U0010ffff\"") # invalid UTF-8 check(b"\xFF", b"b\"\\xff\"") check(b"\x00\"$\\`\x80\xFF", b"b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"") def test_quote_output_ascii(self, enc='ascii'): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out, enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u00D7", b"\"\\xd7\"") check(u"'\u00D7", b"\"'\\xd7\"") check(u"\"\u00D7", b"\"\\\"\\xd7\"") check(u"\u2621", b"\"\\u2621\"") check(u"'\u2621", b"\"'\\u2621\"") check(u"\"\u2621", b"\"\\\"\\u2621\"") check(u"\n", b"'\n'", True, quote_newlines=False) check(u"\n", b"\"\\x0a\"", quote_newlines=True) def test_quote_output_latin1(self, enc='latin1'): def check(inp, out, optional_quotes=False, quote_newlines=None): self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u00D7", u"'\u00D7'", True) check(u"'\u00D7", u"\"'\u00D7\"") check(u"\"\u00D7", u"'\"\u00D7'") check(u"\u00D7\"", u"'\u00D7\"'", True) check(u"\u2621", u"\"\\u2621\"") check(u"'\u2621", u"\"'\\u2621\"") check(u"\"\u2621", u"\"\\\"\\u2621\"") check(u"\n", u"'\n'", True, quote_newlines=False) check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_utf8(self, enc='utf-8'): def check(inp, out, optional_quotes=False, quote_newlines=None): if PY2: # On Python 3 output is always Unicode: out = out.encode('utf-8') self._check(inp, out, enc, optional_quotes, quote_newlines) self._test_quote_output_all(enc) check(u"\u2621", u"'\u2621'", True) check(u"'\u2621", u"\"'\u2621\"") check(u"\"\u2621", u"'\"\u2621'") check(u"\u2621\"", u"'\u2621\"'", True) check(u"\n", u"'\n'", True, quote_newlines=False) check(u"\n", u"\"\\x0a\"", quote_newlines=True) def test_quote_output_default(self): self.patch(encodingutil, 'io_encoding', 'ascii') self.test_quote_output_ascii(None) self.patch(encodingutil, 'io_encoding', 'latin1') self.test_quote_output_latin1(None) self.patch(encodingutil, 'io_encoding', 'utf-8') self.test_quote_output_utf8(None) def win32_other(win32, other): return win32 if sys.platform == "win32" else other class QuotePaths(ReallyEqualMixin, unittest.TestCase): def assertPathsEqual(self, actual, expected): if PY3: # On Python 3, results should be unicode: expected = expected.decode("ascii") self.failUnlessReallyEqual(actual, expected) def test_quote_path(self): self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'") self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'") self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar") self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"') self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"') self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"') self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True), win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'")) self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False), win32_other(b"C:\\foo", b"\\\\?\\C:\\foo")) self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True), win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'")) self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False), win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar")) def test_quote_filepath(self): foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar')) self.assertPathsEqual(quote_filepath(foo_bar_fp), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True), win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'")) self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False), win32_other(b"C:\\foo\\bar", b"/foo/bar")) if sys.platform == "win32": foo_longfp = FilePath(u'\\\\?\\C:\\foo') self.assertPathsEqual(quote_filepath(foo_longfp), b"'C:\\foo'") self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True), b"'C:\\foo'") self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False), b"C:\\foo") class FilePaths(ReallyEqualMixin, unittest.TestCase): def test_to_filepath(self): foo_u = win32_other(u'C:\\foo', u'/foo') nosep_fp = to_filepath(foo_u) sep_fp = to_filepath(foo_u + os.path.sep) for fp in (nosep_fp, sep_fp): self.failUnlessReallyEqual(fp, FilePath(foo_u)) if encodingutil.use_unicode_filepath: self.failUnlessReallyEqual(fp.path, foo_u) if sys.platform == "win32": long_u = u'\\\\?\\C:\\foo' longfp = to_filepath(long_u + u'\\') self.failUnlessReallyEqual(longfp, FilePath(long_u)) self.failUnlessReallyEqual(longfp.path, long_u) def test_extend_filepath(self): foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz') for foo_fp in (foo_bfp, foo_ufp): fp = extend_filepath(foo_fp, [u'bar', u'baz']) self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u)) if encodingutil.use_unicode_filepath: self.failUnlessReallyEqual(fp.path, foo_bar_baz_u) def test_unicode_from_filepath(self): foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) foo_u = win32_other(u'C:\\foo', u'/foo') for foo_fp in (foo_bfp, foo_ufp): self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u) def test_unicode_segments_from(self): foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo')) foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo')) foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz')) foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')) for foo_fp in (foo_bfp, foo_ufp): for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp): self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp), [u'bar', u'baz']) class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' argv = b'lumi\xc3\xa8re' platform = 'linux2' filesystem_encoding = 'UTF-8' io_encoding = 'UTF-8' dirlist = [b'test_file', b'\xc3\x84rtonwall.mp3', b'Blah blah.txt'] class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' argv = b'lumi\xe8re' platform = 'linux2' filesystem_encoding = 'ISO-8859-1' io_encoding = 'ISO-8859-1' dirlist = [b'test_file', b'Blah blah.txt', b'\xc4rtonwall.mp3'] class Windows(EncodingUtil, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' argv = b'lumi\xc3\xa8re' platform = 'win32' filesystem_encoding = 'mbcs' io_encoding = 'utf-8' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class MacOSXLeopard(EncodingUtil, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' output = b'lumi\xc3\xa8re' platform = 'darwin' filesystem_encoding = 'utf-8' io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' platform = 'darwin' filesystem_encoding = 'utf-8' io_encoding = 'US-ASCII' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class OpenBSD(EncodingUtil, unittest.TestCase): uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' platform = 'openbsd4' filesystem_encoding = '646' io_encoding = '646' # Oops, I cannot write filenames containing non-ascii characters class TestToFromStr(ReallyEqualMixin, unittest.TestCase): def test_to_bytes(self): self.failUnlessReallyEqual(to_bytes(b"foo"), b"foo") self.failUnlessReallyEqual(to_bytes(b"lumi\xc3\xa8re"), b"lumi\xc3\xa8re") self.failUnlessReallyEqual(to_bytes(b"\xFF"), b"\xFF") # passes through invalid UTF-8 -- is this what we want? self.failUnlessReallyEqual(to_bytes(u"lumi\u00E8re"), b"lumi\xc3\xa8re") self.failUnlessReallyEqual(to_bytes(None), None) def test_from_utf8_or_none(self): self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo") self.failUnlessReallyEqual(from_utf8_or_none(b"lumi\xc3\xa8re"), u"lumi\u00E8re") self.failUnlessReallyEqual(from_utf8_or_none(None), None) self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, b"\xFF")