lumiere_nfc = u"lumi\u00E8re" Artonwall_nfc = u"\u00C4rtonwall.mp3" Artonwall_nfd = u"A\u0308rtonwall.mp3" TEST_FILENAMES = ( Artonwall_nfc, u'test_file', u'Blah blah.txt', ) # The following main helps to generate a test class for other operating # systems. if __name__ == "__main__": import sys, os import tempfile import shutil import platform if len(sys.argv) != 2: print "Usage: %s lumire" % sys.argv[0] sys.exit(1) print print "class MyWeirdOS(StringUtils, unittest.TestCase):" print " uname = '%s'" % ' '.join(platform.uname()) if sys.platform != "win32": print " argv = %s" % repr(sys.argv[1]) print " platform = '%s'" % sys.platform print " filesystem_encoding = '%s'" % sys.getfilesystemencoding() print " output_encoding = '%s'" % sys.stdout.encoding print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding) try: tmpdir = tempfile.mkdtemp() for fname in TEST_FILENAMES: open(os.path.join(tmpdir, fname), 'w').close() # Use Unicode API under Windows or MacOS X if sys.platform in ('win32', 'darwin'): dirlist = os.listdir(unicode(tmpdir)) else: dirlist = os.listdir(tmpdir) print " dirlist = %s" % repr(dirlist) except: print " # Oops, I cannot write filenames containing non-ascii characters" print shutil.rmtree(tmpdir) sys.exit(0) from twisted.trial import unittest from mock import patch import sys from allmydata.test.common_util import ReallyEqualMixin from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \ unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \ FilenameEncodingError, get_output_encoding, _reload from allmydata.dirnode import normalize from twisted.python import usage class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase): def tearDown(self): _reload() @patch('sys.stdout') def test_get_output_encoding(self, mock_stdout): mock_stdout.encoding = 'UTF-8' _reload() self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') mock_stdout.encoding = 'cp65001' _reload() self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') mock_stdout.encoding = 'koi8-r' _reload() self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r') mock_stdout.encoding = 'nonexistent_encoding' self.failUnlessRaises(AssertionError, _reload) # TODO: mock_stdout.encoding = None @patch('sys.stdout') def test_argv_to_unicode(self, mock): mock.encoding = 'utf-8' _reload() self.failUnlessRaises(usage.UsageError, argv_to_unicode, lumiere_nfc.encode('latin1')) @patch('sys.stdout') def test_unicode_to_output(self, mock): # Encoding koi8-r cannot represent e-grave mock.encoding = 'koi8-r' _reload() self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) @patch('os.listdir') def test_no_unicode_normalization(self, mock): # Pretend to run on a Unicode platform. # We normalized to NFC in 1.7beta, but we now don't. orig_platform = sys.platform try: sys.platform = 'darwin' mock.return_value = [Artonwall_nfd] _reload() self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) finally: sys.platform = orig_platform # The following tests applies only to platforms which don't store filenames as # Unicode entities on the filesystem. class StringUtilsNonUnicodePlatform(unittest.TestCase): def setUp(self): # Mock sys.platform because unicode_platform() uses it self.original_platform = sys.platform sys.platform = 'linux' def tearDown(self): sys.platform = self.original_platform _reload() @patch('sys.getfilesystemencoding') @patch('os.listdir') def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): # What happens if latin1-encoded filenames are encountered on an UTF-8 # filesystem? mock_listdir.return_value = [ lumiere_nfc.encode('utf-8'), lumiere_nfc.encode('latin1')] mock_getfilesystemencoding.return_value = 'utf-8' _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/dummy') # We're trying to list a directory whose name cannot be represented in # the filesystem encoding. This should fail. mock_getfilesystemencoding.return_value = 'ascii' _reload() self.failUnlessRaises(FilenameEncodingError, listdir_unicode, u'/' + lumiere_nfc) @patch('sys.getfilesystemencoding') def test_open_unicode(self, mock): mock.return_value = 'ascii' _reload() self.failUnlessRaises(FilenameEncodingError, open_unicode, lumiere_nfc, 'rb') class StringUtils(ReallyEqualMixin): def setUp(self): # Mock sys.platform because unicode_platform() uses it self.original_platform = sys.platform sys.platform = self.platform def tearDown(self): sys.platform = self.original_platform _reload() @patch('sys.stdout') def test_argv_to_unicode(self, mock): if 'argv' not in dir(self): return mock.encoding = self.output_encoding argu = lumiere_nfc argv = self.argv _reload() self.failUnlessReallyEqual(argv_to_unicode(argv), argu) def test_unicode_to_url(self): self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re") @patch('sys.stdout') def test_unicode_to_output(self, mock): if 'output' not in dir(self): return mock.encoding = self.output_encoding _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.output) def test_unicode_platform(self): matrix = { 'linux2': False, 'openbsd4': False, 'win32': True, 'darwin': True, } _reload() self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform]) @patch('sys.getfilesystemencoding') @patch('os.listdir') def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): if 'dirlist' not in dir(self): return try: u"test".encode(self.filesystem_encoding) except (LookupError, AttributeError): raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " "that we are testing for the benefit of a different platform." % (self.filesystem_encoding,)) mock_listdir.return_value = self.dirlist mock_getfilesystemencoding.return_value = self.filesystem_encoding _reload() filenames = listdir_unicode(u'/dummy') self.failUnlessEqual(set([normalize(fname) for fname in filenames]), set(TEST_FILENAMES)) @patch('sys.getfilesystemencoding') @patch('__builtin__.open') def test_open_unicode(self, mock_open, mock_getfilesystemencoding): mock_getfilesystemencoding.return_value = self.filesystem_encoding fn = u'/dummy_directory/" + lumiere_nfc + ".txt' try: u"test".encode(self.filesystem_encoding) except (LookupError, AttributeError): raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " "that we are testing for the benefit of a different platform." % (self.filesystem_encoding,)) _reload() try: open_unicode(fn, 'rb') except FilenameEncodingError: return # Pass Unicode string to open() on Unicode platforms if unicode_platform(): mock_open.assert_called_with(fn, 'rb') # Pass correctly encoded bytestrings to open() on non-Unicode platforms else: fn_bytestring = fn.encode(self.filesystem_encoding) mock_open.assert_called_with(fn_bytestring, 'rb') class UbuntuKarmicUTF8(StringUtils, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' output = 'lumi\xc3\xa8re' argv = 'lumi\xc3\xa8re' platform = 'linux2' filesystem_encoding = 'UTF-8' output_encoding = 'UTF-8' argv_encoding = 'UTF-8' dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt'] class UbuntuKarmicLatin1(StringUtils, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' output = 'lumi\xe8re' argv = 'lumi\xe8re' platform = 'linux2' filesystem_encoding = 'ISO-8859-1' output_encoding = 'ISO-8859-1' argv_encoding = 'ISO-8859-1' dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3'] class WindowsXP(StringUtils, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' output = 'lumi\x8are' platform = 'win32' filesystem_encoding = 'mbcs' output_encoding = 'cp850' argv_encoding = 'ascii' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class WindowsXP_UTF8(StringUtils, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' output = 'lumi\xc3\xa8re' platform = 'win32' filesystem_encoding = 'mbcs' output_encoding = 'cp65001' argv_encoding = 'ascii' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class WindowsVista(StringUtils, unittest.TestCase): uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel' output = 'lumi\x8are' platform = 'win32' filesystem_encoding = 'mbcs' output_encoding = 'cp850' argv_encoding = 'ascii' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class MacOSXLeopard(StringUtils, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' output = 'lumi\xc3\xa8re' argv = 'lumi\xc3\xa8re' platform = 'darwin' filesystem_encoding = 'utf-8' output_encoding = 'UTF-8' argv_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class MacOSXLeopard7bit(StringUtils, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' platform = 'darwin' filesystem_encoding = 'utf-8' output_encoding = 'US-ASCII' argv_encoding = 'US-ASCII' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class OpenBSD(StringUtils, unittest.TestCase): uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' platform = 'openbsd4' filesystem_encoding = '646' output_encoding = '646' argv_encoding = '646' # Oops, I cannot write filenames containing non-ascii characters