Make quote_* and friends return unicode.

This commit is contained in:
Itamar Turner-Trauring 2020-08-17 13:29:49 -04:00
parent f95f9c481e
commit 97c3be0509
2 changed files with 71 additions and 37 deletions

View File

@ -355,6 +355,8 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
_reload()
def _check(self, inp, out, enc, optional_quotes, quote_newlines):
if PY3 and isinstance(out, bytes):
out = out.decode(enc or encodingutil.io_encoding)
out2 = out
if optional_quotes:
out2 = out2[1:-1]
@ -382,6 +384,9 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
def _test_quote_output_all(self, enc):
def check(inp, out, optional_quotes=False, quote_newlines=None):
if PY3:
# Result is always Unicode on Python 3
out = out.decode("ascii")
self._check(inp, out, enc, optional_quotes, quote_newlines)
# optional single quotes
@ -444,7 +449,10 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
def test_quote_output_utf8(self, enc='utf-8'):
def check(inp, out, optional_quotes=False, quote_newlines=None):
self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
if PY2:
# On Python 3 output is always Unicode:
out = out.encode('utf-8')
self._check(inp, out, enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u2621", u"'\u2621'", True)
@ -469,43 +477,50 @@ def win32_other(win32, other):
return win32 if sys.platform == "win32" else other
class QuotePaths(ReallyEqualMixin, unittest.TestCase):
def test_quote_path(self):
self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
def assertPathsEqual(self, actual, expected):
if PY3:
# On Python 3, results should be unicode:
expected = expected.decode("ascii")
self.failUnlessReallyEqual(actual, expected)
def test_quote_path(self):
self.assertPathsEqual(quote_path([u'foo', u'bar']), b"'foo/bar'")
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=True), b"'foo/bar'")
self.assertPathsEqual(quote_path([u'foo', u'bar'], quotemarks=False), b"foo/bar")
self.assertPathsEqual(quote_path([u'foo', u'\nbar']), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), b'"foo/\\x0abar"')
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
win32_other(b"'C:\\foo'", b"'\\\\?\\C:\\foo'"))
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
win32_other(b"C:\\foo", b"\\\\?\\C:\\foo"))
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
win32_other(b"'\\\\foo\\bar'", b"'\\\\?\\UNC\\foo\\bar'"))
self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
self.assertPathsEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
win32_other(b"\\\\foo\\bar", b"\\\\?\\UNC\\foo\\bar"))
def test_quote_filepath(self):
foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp),
self.assertPathsEqual(quote_filepath(foo_bar_fp),
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True),
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=True),
win32_other(b"'C:\\foo\\bar'", b"'/foo/bar'"))
self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False),
self.assertPathsEqual(quote_filepath(foo_bar_fp, quotemarks=False),
win32_other(b"C:\\foo\\bar", b"/foo/bar"))
if sys.platform == "win32":
foo_longfp = FilePath(u'\\\\?\\C:\\foo')
self.failUnlessReallyEqual(quote_filepath(foo_longfp),
self.assertPathsEqual(quote_filepath(foo_longfp),
b"'C:\\foo'")
self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True),
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=True),
b"'C:\\foo'")
self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False),
self.assertPathsEqual(quote_filepath(foo_longfp, quotemarks=False),
b"C:\\foo")

View File

@ -262,30 +262,49 @@ def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None):
Python-compatible backslash escaping is used.
If not explicitly given, quote_newlines is True when quotemarks is True.
On Python 3, returns Unicode strings.
"""
precondition(isinstance(s, (bytes, unicode)), s)
encoding = encoding or io_encoding
if quote_newlines is None:
quote_newlines = quotemarks
if isinstance(s, bytes):
try:
s = s.decode('utf-8')
except UnicodeDecodeError:
return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),)
def _encode(s):
if isinstance(s, bytes):
try:
s = s.decode('utf-8')
except UnicodeDecodeError:
return b'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _bytes_escape(m, quote_newlines), s),)
must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE
if must_double_quote.search(s) is None:
try:
out = s.encode(encoding or io_encoding)
if quotemarks or out.startswith(b'"'):
return b"'%s'" % (out,)
else:
return out
except (UnicodeDecodeError, UnicodeEncodeError):
pass
must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE
if must_double_quote.search(s) is None:
try:
out = s.encode(encoding)
if quotemarks or out.startswith(b'"'):
return b"'%s'" % (out,)
else:
return out
except (UnicodeDecodeError, UnicodeEncodeError):
pass
escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s)
return b'"%s"' % (escaped.encode(encoding, 'backslashreplace'),)
result = _encode(s)
if PY3:
# On Python half of what this function does is unnecessary, since
# output is always Unicode. To ensure no encode errors, one can do:
#
# sys.stdout.reconfigure(encoding=sys.stdout.encoding, errors="backslashreplace")
#
# Although the problem is that doesn't work in Python 3.6, only 3.7 or
# later... For now not thinking about it, just returning unicode since
# that is the right thing to do on Python 3.
result = result.decode(encoding)
return result
escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s)
return b'"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),)
def quote_path(path, quotemarks=True):
return quote_output(b"/".join(map(to_bytes, path)), quotemarks=quotemarks, quote_newlines=True)