2010-05-20 00:41:05 +00:00
|
|
|
# coding=utf-8
|
|
|
|
|
|
|
|
TEST_FILENAMES = (
|
|
|
|
u'Ärtonwall.mp3',
|
|
|
|
u'test_file',
|
|
|
|
u'Blah blah.txt',
|
|
|
|
)
|
|
|
|
|
|
|
|
# The following main helps to generate a test class for other operating
|
|
|
|
# systems.
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import sys, os
|
|
|
|
import tempfile
|
|
|
|
import shutil
|
|
|
|
import platform
|
2010-06-07 01:02:15 +00:00
|
|
|
|
2010-05-20 00:41:05 +00:00
|
|
|
if len(sys.argv) != 2:
|
|
|
|
print "Usage: %s lumière" % sys.argv[0]
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
print
|
|
|
|
print "class MyWeirdOS(StringUtils, unittest.TestCase):"
|
|
|
|
print " uname = '%s'" % ' '.join(platform.uname())
|
2010-06-07 01:02:15 +00:00
|
|
|
if sys.platform != "win32":
|
|
|
|
print " argv = %s" % repr(sys.argv[1])
|
2010-05-20 00:41:05 +00:00
|
|
|
print " platform = '%s'" % sys.platform
|
2010-06-07 01:02:15 +00:00
|
|
|
print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
|
|
|
|
print " output_encoding = '%s'" % sys.stdout.encoding
|
2010-06-09 00:08:03 +00:00
|
|
|
print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
try:
|
|
|
|
tmpdir = tempfile.mkdtemp()
|
|
|
|
for fname in TEST_FILENAMES:
|
|
|
|
open(os.path.join(tmpdir, fname), 'w').close()
|
|
|
|
|
|
|
|
# Use Unicode API under Windows or MacOS X
|
|
|
|
if sys.platform in ('win32', 'darwin'):
|
|
|
|
dirlist = os.listdir(unicode(tmpdir))
|
|
|
|
else:
|
|
|
|
dirlist = os.listdir(tmpdir)
|
|
|
|
|
|
|
|
print " dirlist = %s" % repr(dirlist)
|
|
|
|
except:
|
|
|
|
print " # Oops, I cannot write filenames containing non-ascii characters"
|
|
|
|
print
|
|
|
|
|
|
|
|
shutil.rmtree(tmpdir)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
from twisted.trial import unittest
|
|
|
|
from mock import patch
|
2010-06-07 01:02:15 +00:00
|
|
|
import sys
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
from allmydata.test.common_util import ReallyEqualMixin
|
2010-05-20 00:41:05 +00:00
|
|
|
from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
|
2010-06-07 01:02:15 +00:00
|
|
|
unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \
|
|
|
|
FilenameEncodingError, get_output_encoding, _reload
|
|
|
|
|
2010-05-20 00:41:05 +00:00
|
|
|
from twisted.python import usage
|
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
|
|
|
|
def tearDown(self):
|
|
|
|
_reload()
|
|
|
|
|
2010-05-20 00:41:05 +00:00
|
|
|
@patch('sys.stdout')
|
2010-06-07 01:02:15 +00:00
|
|
|
def test_get_output_encoding(self, mock_stdout):
|
|
|
|
mock_stdout.encoding = 'UTF-8'
|
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
|
|
|
|
|
|
|
|
mock_stdout.encoding = 'cp65001'
|
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
|
|
|
|
|
|
|
|
mock_stdout.encoding = 'koi8-r'
|
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
|
|
|
|
|
|
|
|
mock_stdout.encoding = 'nonexistent_encoding'
|
|
|
|
self.failUnlessRaises(AssertionError, _reload)
|
2010-06-04 14:12:51 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
# TODO: mock_stdout.encoding = None
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('sys.stdout')
|
|
|
|
def test_argv_to_unicode(self, mock):
|
|
|
|
mock.encoding = 'utf-8'
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
self.failUnlessRaises(usage.UsageError,
|
|
|
|
argv_to_unicode,
|
|
|
|
u'lumière'.encode('latin1'))
|
|
|
|
|
|
|
|
@patch('sys.stdout')
|
2010-06-07 01:02:15 +00:00
|
|
|
def test_unicode_to_output(self, mock):
|
2010-05-20 00:41:05 +00:00
|
|
|
# Encoding koi8-r cannot represent 'è'
|
|
|
|
mock.encoding = 'koi8-r'
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
|
|
|
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, u'lumière')
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('os.listdir')
|
|
|
|
def test_unicode_normalization(self, mock):
|
2010-06-07 01:02:15 +00:00
|
|
|
# Pretend to run on an Unicode platform
|
2010-05-20 00:41:05 +00:00
|
|
|
orig_platform = sys.platform
|
2010-06-07 01:02:15 +00:00
|
|
|
try:
|
|
|
|
sys.platform = 'darwin'
|
|
|
|
mock.return_value = [u'A\u0308rtonwall.mp3']
|
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
|
|
|
|
finally:
|
|
|
|
sys.platform = orig_platform
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
# The following tests applies only to platforms which don't store filenames as
|
|
|
|
# Unicode entities on the filesystem.
|
|
|
|
class StringUtilsNonUnicodePlatform(unittest.TestCase):
|
|
|
|
def setUp(self):
|
|
|
|
# Mock sys.platform because unicode_platform() uses it
|
|
|
|
self.original_platform = sys.platform
|
|
|
|
sys.platform = 'linux'
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
sys.platform = self.original_platform
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('sys.getfilesystemencoding')
|
|
|
|
@patch('os.listdir')
|
|
|
|
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
|
2010-06-07 01:02:15 +00:00
|
|
|
# What happens if latin1-encoded filenames are encountered on an UTF-8
|
2010-05-20 00:41:05 +00:00
|
|
|
# filesystem?
|
|
|
|
mock_listdir.return_value = [
|
|
|
|
u'lumière'.encode('utf-8'),
|
|
|
|
u'lumière'.encode('latin1')]
|
|
|
|
|
|
|
|
mock_getfilesystemencoding.return_value = 'utf-8'
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
self.failUnlessRaises(FilenameEncodingError,
|
|
|
|
listdir_unicode,
|
|
|
|
u'/dummy')
|
|
|
|
|
|
|
|
# We're trying to list a directory whose name cannot be represented in
|
|
|
|
# the filesystem encoding. This should fail.
|
|
|
|
mock_getfilesystemencoding.return_value = 'ascii'
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
self.failUnlessRaises(FilenameEncodingError,
|
|
|
|
listdir_unicode,
|
|
|
|
u'/lumière')
|
|
|
|
|
|
|
|
@patch('sys.getfilesystemencoding')
|
|
|
|
def test_open_unicode(self, mock):
|
|
|
|
mock.return_value = 'ascii'
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
self.failUnlessRaises(FilenameEncodingError,
|
|
|
|
open_unicode,
|
2010-06-07 01:02:15 +00:00
|
|
|
u'lumière', 'rb')
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
class StringUtils(ReallyEqualMixin):
|
2010-05-20 00:41:05 +00:00
|
|
|
def setUp(self):
|
|
|
|
# Mock sys.platform because unicode_platform() uses it
|
|
|
|
self.original_platform = sys.platform
|
|
|
|
sys.platform = self.platform
|
|
|
|
|
|
|
|
def tearDown(self):
|
|
|
|
sys.platform = self.original_platform
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('sys.stdout')
|
|
|
|
def test_argv_to_unicode(self, mock):
|
|
|
|
if 'argv' not in dir(self):
|
2010-06-07 01:02:15 +00:00
|
|
|
return
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
mock.encoding = self.output_encoding
|
2010-05-20 00:41:05 +00:00
|
|
|
argu = u'lumière'
|
|
|
|
argv = self.argv
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
def test_unicode_to_url(self):
|
2010-06-07 01:02:15 +00:00
|
|
|
self.failUnless(unicode_to_url(u'lumière'), "lumi\xc3\xa8re")
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('sys.stdout')
|
2010-06-07 01:02:15 +00:00
|
|
|
def test_unicode_to_output(self, mock):
|
|
|
|
if 'output' not in dir(self):
|
|
|
|
return
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
mock.encoding = self.output_encoding
|
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(unicode_to_output(u'lumière'), self.output)
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
def test_unicode_platform(self):
|
|
|
|
matrix = {
|
|
|
|
'linux2': False,
|
2010-05-21 14:00:53 +00:00
|
|
|
'openbsd4': False,
|
2010-05-20 00:41:05 +00:00
|
|
|
'win32': True,
|
|
|
|
'darwin': True,
|
|
|
|
}
|
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
|
|
|
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
@patch('sys.getfilesystemencoding')
|
|
|
|
@patch('os.listdir')
|
|
|
|
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
|
2010-05-21 16:05:59 +00:00
|
|
|
if 'dirlist' not in dir(self):
|
2010-06-07 01:02:15 +00:00
|
|
|
return
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 04:32:38 +00:00
|
|
|
try:
|
|
|
|
u"test".encode(self.filesystem_encoding)
|
2010-06-09 06:50:56 +00:00
|
|
|
except (LookupError, AttributeError):
|
2010-06-07 04:32:38 +00:00
|
|
|
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
|
2010-06-07 06:14:55 +00:00
|
|
|
"that we are testing for the benefit of a different platform."
|
|
|
|
% (self.filesystem_encoding,))
|
2010-06-07 04:32:38 +00:00
|
|
|
|
2010-05-20 00:41:05 +00:00
|
|
|
mock_listdir.return_value = self.dirlist
|
2010-06-07 01:02:15 +00:00
|
|
|
mock_getfilesystemencoding.return_value = self.filesystem_encoding
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-20 00:41:05 +00:00
|
|
|
filenames = listdir_unicode(u'/dummy')
|
|
|
|
|
|
|
|
for fname in TEST_FILENAMES:
|
|
|
|
self.failUnless(isinstance(fname, unicode))
|
2010-06-07 01:02:15 +00:00
|
|
|
self.failUnlessIn(fname, filenames)
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-05-21 13:58:17 +00:00
|
|
|
@patch('sys.getfilesystemencoding')
|
|
|
|
@patch('__builtin__.open')
|
|
|
|
def test_open_unicode(self, mock_open, mock_getfilesystemencoding):
|
2010-06-07 01:02:15 +00:00
|
|
|
mock_getfilesystemencoding.return_value = self.filesystem_encoding
|
2010-05-21 13:58:17 +00:00
|
|
|
fn = u'/dummy_directory/lumière.txt'
|
2010-05-20 00:41:05 +00:00
|
|
|
|
2010-06-07 05:33:58 +00:00
|
|
|
try:
|
|
|
|
u"test".encode(self.filesystem_encoding)
|
2010-06-09 06:50:56 +00:00
|
|
|
except (LookupError, AttributeError):
|
2010-06-07 05:33:58 +00:00
|
|
|
raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
|
2010-06-07 06:14:55 +00:00
|
|
|
"that we are testing for the benefit of a different platform."
|
|
|
|
% (self.filesystem_encoding,))
|
2010-06-07 05:33:58 +00:00
|
|
|
|
2010-06-07 01:02:15 +00:00
|
|
|
_reload()
|
2010-05-21 13:58:17 +00:00
|
|
|
try:
|
2010-06-07 01:02:15 +00:00
|
|
|
open_unicode(fn, 'rb')
|
2010-05-21 13:58:17 +00:00
|
|
|
except FilenameEncodingError:
|
2010-06-07 01:02:15 +00:00
|
|
|
return
|
2010-05-21 13:58:17 +00:00
|
|
|
|
|
|
|
# Pass Unicode string to open() on Unicode platforms
|
|
|
|
if unicode_platform():
|
2010-06-07 01:02:15 +00:00
|
|
|
mock_open.assert_called_with(fn, 'rb')
|
2010-05-21 13:58:17 +00:00
|
|
|
|
|
|
|
# Pass correctly encoded bytestrings to open() on non-Unicode platforms
|
|
|
|
else:
|
2010-06-07 01:02:15 +00:00
|
|
|
fn_bytestring = fn.encode(self.filesystem_encoding)
|
|
|
|
mock_open.assert_called_with(fn_bytestring, 'rb')
|
|
|
|
|
2010-05-20 00:41:05 +00:00
|
|
|
|
|
|
|
class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\xc3\xa8re'
|
2010-05-20 00:41:05 +00:00
|
|
|
argv = 'lumi\xc3\xa8re'
|
|
|
|
platform = 'linux2'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'UTF-8'
|
|
|
|
output_encoding = 'UTF-8'
|
|
|
|
argv_encoding = 'UTF-8'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
|
|
|
|
|
|
|
|
class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\xe8re'
|
2010-05-20 00:41:05 +00:00
|
|
|
argv = 'lumi\xe8re'
|
|
|
|
platform = 'linux2'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'ISO-8859-1'
|
|
|
|
output_encoding = 'ISO-8859-1'
|
|
|
|
argv_encoding = 'ISO-8859-1'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
|
|
|
|
|
|
|
|
class WindowsXP(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\x8are'
|
2010-05-20 00:41:05 +00:00
|
|
|
platform = 'win32'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'mbcs'
|
|
|
|
output_encoding = 'cp850'
|
2010-06-09 00:08:03 +00:00
|
|
|
argv_encoding = 'ascii'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
|
|
|
|
|
|
|
|
class WindowsXP_UTF8(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\xc3\xa8re'
|
2010-05-20 00:41:05 +00:00
|
|
|
platform = 'win32'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'mbcs'
|
|
|
|
output_encoding = 'cp65001'
|
2010-06-09 00:08:03 +00:00
|
|
|
argv_encoding = 'ascii'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
|
|
|
|
|
|
|
|
class WindowsVista(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\x8are'
|
2010-05-20 00:41:05 +00:00
|
|
|
platform = 'win32'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'mbcs'
|
|
|
|
output_encoding = 'cp850'
|
2010-06-09 00:08:03 +00:00
|
|
|
argv_encoding = 'ascii'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
|
|
|
|
|
|
|
|
class MacOSXLeopard(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
|
2010-06-07 01:02:15 +00:00
|
|
|
output = 'lumi\xc3\xa8re'
|
2010-05-20 00:41:05 +00:00
|
|
|
argv = 'lumi\xc3\xa8re'
|
|
|
|
platform = 'darwin'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'utf-8'
|
|
|
|
output_encoding = 'UTF-8'
|
|
|
|
argv_encoding = 'UTF-8'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
|
|
|
|
|
|
|
|
class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
|
|
|
|
platform = 'darwin'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = 'utf-8'
|
|
|
|
output_encoding = 'US-ASCII'
|
|
|
|
argv_encoding = 'US-ASCII'
|
2010-05-20 00:41:05 +00:00
|
|
|
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
|
2010-05-21 14:00:53 +00:00
|
|
|
|
|
|
|
class OpenBSD(StringUtils, unittest.TestCase):
|
|
|
|
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
|
|
|
|
platform = 'openbsd4'
|
2010-06-07 01:02:15 +00:00
|
|
|
filesystem_encoding = '646'
|
|
|
|
output_encoding = '646'
|
|
|
|
argv_encoding = '646'
|
2010-05-21 14:00:53 +00:00
|
|
|
# Oops, I cannot write filenames containing non-ascii characters
|