mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2025-01-19 03:06:33 +00:00
Make quote_* and friends return unicode.
This commit is contained in:
parent
f95f9c481e
commit
97c3be0509
@ -355,6 +355,8 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
|
||||
_reload()
|
||||
|
||||
def _check(self, inp, out, enc, optional_quotes, quote_newlines):
|
||||
if PY3 and isinstance(out, bytes):
|
||||
out = out.decode(enc or encodingutil.io_encoding)
|
||||
out2 = out
|
||||
if optional_quotes:
|
||||
out2 = out2[1:-1]
|
||||
@ -382,6 +384,9 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
|
||||
|
||||
def _test_quote_output_all(self, enc):
|
||||
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
||||
if PY3:
|
||||
# Result is always Unicode on Python 3
|
||||
out = out.decode("ascii")
|
||||
self._check(inp, out, enc, optional_quotes, quote_newlines)
|
||||
|
||||
# optional single quotes
|
||||
@ -444,7 +449,10 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
|
||||
|
||||
def test_quote_output_utf8(self, enc='utf-8'):
|
||||
def check(inp, out, optional_quotes=False, quote_newlines=None):
|
||||
self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
|
||||
if PY2:
|
||||
# On Python 3 output is always Unicode:
|
||||
out = out.encode('utf-8')
|
||||
self._check(inp, out, enc, optional_quotes, quote_newlines)
|
||||
|
||||
self._test_quote_output_all(enc)
|
||||
check(u"\u2621", u"'\u2621'", True)
|
||||
@ -469,43 +477,50 @@ def win32_other(win32, other):
|
||||
return win32 if sys.platform == "win32" else other
|
||||
|
||||
class QuotePaths(ReallyEqualMixin, unittest.TestCase):
|
||||
def test_quote_path(self):
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
|
||||
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
|
||||
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
|
||||
def assertPathsEqual(self, actual, expected):
|
||||
if PY3:
|
||||
# On Python 3, results should be unicode:
|
||||
expected = expected.decode("ascii")
|
||||
self.failUnlessReallyEqual(actual, expected)
|
||||
|
||||
def test_quote_path(self):
|
||||
self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
|
||||
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
|
||||
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
|
||||
self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
|
||||
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
|
||||
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
|
||||
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
|
||||
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
|
||||
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
|
||||
win32_other(b"C:\\foo", b"\\\\?\\C:\\foo"))
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
|
||||
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
|
||||
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
|
||||
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
|
||||
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
|
||||
win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar"))
|
||||
|
||||
def test_quote_filepath(self):
|
||||
foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp),
|
||||
self.assertPathsEqual(quote_filepath(foo_bar_fp),
|
||||
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True),
|
||||
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True),
|
||||
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False),
|
||||
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False),
|
||||
win32_other(b"C:\\foo\\bar", b"/foo/bar"))
|
||||
|
||||
if sys.platform == "win32":
|
||||
foo_longfp = FilePath(u'\\\\?\\C:\\foo')
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_longfp),
|
||||
self.assertPathsEqual(quote_filepath(foo_longfp),
|
||||
b"'C:\\foo'")
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True),
|
||||
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True),
|
||||
b"'C:\\foo'")
|
||||
self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False),
|
||||
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False),
|
||||
b"C:\\foo")
|
||||
|
||||
|
||||
|
@ -262,30 +262,49 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None):
|
||||
Python-compatible backslash escaping is used.
|
||||
|
||||
If not explicitly given, quote_newlines is True when quotemarks is True.
|
||||
|
||||
On Python 3, returns Unicode strings.
|
||||
"""
|
||||
precondition(isinstance(s, (bytes, unicode)), s)
|
||||
encoding = encoding or io_encoding
|
||||
|
||||
if quote_newlines is None:
|
||||
quote_newlines = quotemarks
|
||||
|
||||
if isinstance(s, bytes):
|
||||
try:
|
||||
s = s.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),)
|
||||
def _encode(s):
|
||||
if isinstance(s, bytes):
|
||||
try:
|
||||
s = s.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),)
|
||||
|
||||
must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE
|
||||
if must_double_quote.search(s) is None:
|
||||
try:
|
||||
out = s.encode(encoding or io_encoding)
|
||||
if quotemarks or out.startswith(b'"'):
|
||||
return b"'%s'" % (out,)
|
||||
else:
|
||||
return out
|
||||
except (UnicodeDecodeError, UnicodeEncodeError):
|
||||
pass
|
||||
must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE
|
||||
if must_double_quote.search(s) is None:
|
||||
try:
|
||||
out = s.encode(encoding)
|
||||
if quotemarks or out.startswith(b'"'):
|
||||
return b"'%s'" % (out,)
|
||||
else:
|
||||
return out
|
||||
except (UnicodeDecodeError, UnicodeEncodeError):
|
||||
pass
|
||||
|
||||
escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s)
|
||||
return b'"%s"' % (escaped.encode(encoding, 'backslashreplace'),)
|
||||
|
||||
result = _encode(s)
|
||||
if PY3:
|
||||
# On Python half of what this function does is unnecessary, since
|
||||
# output is always Unicode. To ensure no encode errors, one can do:
|
||||
#
|
||||
# sys.stdout.reconfigure(encoding=sys.stdout.encoding, errors="backslashreplace")
|
||||
#
|
||||
# Although the problem is that doesn't work in Python 3.6, only 3.7 or
|
||||
# later... For now not thinking about it, just returning unicode since
|
||||
# that is the right thing to do on Python 3.
|
||||
result = result.decode(encoding)
|
||||
return result
|
||||
|
||||
escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s)
|
||||
return b'"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),)
|
||||
|
||||
def quote_path(path, quotemarks=True):
|
||||
return quote_output(b"/".join(map(to_bytes, path)), quotemarks=quotemarks, quote_newlines=True)
|
||||
|
Loading…
Reference in New Issue
Block a user