Add from_utf8_or_none and tests.

Signed-off-by: Daira Hopwood <daira@jacaranda.org>
This commit is contained in:
Daira Hopwood 2015-03-03 20:04:57 +00:00
parent 3f9c73bd68
commit 21226cbb82
2 changed files with 25 additions and 5 deletions

View File

@ -65,7 +65,7 @@ from allmydata.util import encodingutil, fileutil
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \
get_filesystem_encoding, _reload
get_filesystem_encoding, to_str, from_utf8_or_none, _reload
from allmydata.dirnode import normalize
from twisted.python import usage
@ -467,3 +467,18 @@ class OpenBSD(EncodingUtil, unittest.TestCase):
filesystem_encoding = '646'
io_encoding = '646'
# Oops, I cannot write filenames containing non-ascii characters
class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
def test_to_str(self):
self.failUnlessReallyEqual(to_str("foo"), "foo")
self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want?
self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
self.failUnlessReallyEqual(to_str(None), None)
def test_from_utf8_or_none(self):
self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
self.failUnlessReallyEqual(from_utf8_or_none(None), None)
self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")

View File

@ -3,12 +3,11 @@ Functions used to convert inputs from whatever encoding used in the system to
unicode and back.
"""
import sys
import os
import re
import sys, os, re, locale
from types import NoneType
from allmydata.util.assertutil import precondition
from twisted.python import usage
import locale
from allmydata.util import log
from allmydata.util.fileutil import abspath_expanduser_unicode
@ -127,6 +126,12 @@ def to_str(s):
return s
return s.encode('utf-8')
def from_utf8_or_none(s):
precondition(isinstance(s, (NoneType, str)), s)
if s is None:
return s
return s.decode('utf-8')
PRINTABLE_ASCII = re.compile(r'^[\n\r\x20-\x7E]*$', re.DOTALL)
PRINTABLE_8BIT = re.compile(r'^[\n\r\x20-\x7E\x80-\xFF]*$', re.DOTALL)