Unicode fixes.

2025-06-01 07:10:53 +00:00 · 2010-06-06 18:02:15 -07:00 · 2010-06-06 18:02:15 -07:00 · 80252debcd
commit 80252debcd
parent 1fc6be28f4
26 changed files with 1347 additions and 967 deletions
--- a/src/allmydata/scripts/cli.py
+++ b/src/allmydata/scripts/cli.py
@ -22,6 +22,7 @@ class VDriveOptions(BaseOptions, usage.Options):
        ]

    def postOptions(self):
+        # FIXME: allow Unicode node-dir
        # compute a node-url from the existing options, put in self['node-url']
        if self['node-directory']:
            if sys.platform == 'win32' and self['node-directory'] == '~/.tahoe':
--- a/src/allmydata/scripts/common.py
+++ b/src/allmydata/scripts/common.py
@ -2,7 +2,7 @@
 import os, sys, urllib
 import codecs
 from twisted.python import usage
-from allmydata.util.stringutils import unicode_to_url
+from allmydata.util.stringutils import unicode_to_url, quote_output
 from allmydata.util.assertutil import precondition

 class BaseOptions:
@ -19,12 +19,12 @@ class BaseOptions:

    def opt_version(self):
        import allmydata
-        print allmydata.get_package_versions_string()
+        print >>self.stdout, allmydata.get_package_versions_string()
        sys.exit(0)

    def opt_version_and_path(self):
        import allmydata
-        print allmydata.get_package_versions_string(show_paths=True)
+        print >>self.stdout, allmydata.get_package_versions_string(show_paths=True)
        sys.exit(0)


@ -84,7 +84,7 @@ class NoDefaultBasedirMixin(BasedirMixin):
        if not self.basedirs:
            raise usage.UsageError("--basedir must be provided")

-DEFAULT_ALIAS = "tahoe"
+DEFAULT_ALIAS = u"tahoe"


 def get_aliases(nodedir):
@ -96,7 +96,7 @@ def get_aliases(nodedir):
        f = open(rootfile, "r")
        rootcap = f.read().strip()
        if rootcap:
-            aliases["tahoe"] = uri.from_string_dirnode(rootcap).to_string()
+            aliases[u"tahoe"] = uri.from_string_dirnode(rootcap).to_string()
    except EnvironmentError:
        pass
    try:
@ -105,7 +105,7 @@ def get_aliases(nodedir):
            line = line.strip()
            if line.startswith("#") or not line:
                continue
-            name, cap = line.split(":", 1)
+            name, cap = line.split(u":", 1)
            # normalize it: remove http: prefix, urldecode
            cap = cap.strip().encode('utf-8')
            aliases[name] = uri.from_string_dirnode(cap).to_string()
@ -124,21 +124,37 @@ def platform_uses_lettercolon_drivename():
        return True
    return False

-class UnknownAliasError(Exception):
-    pass

-def get_alias(aliases, path, default):
+class TahoeError(Exception):
+    def __init__(self, msg):
+        Exception.__init__(self, msg)
+        self.msg = msg
+
+    def display(self, err):
+        print >>err, self.msg
+
+
+class UnknownAliasError(TahoeError):
+    def __init__(self, msg):
+        TahoeError.__init__(self, "error: " + msg)
+
+
+def get_alias(aliases, path_unicode, default):
+    """
+    Transform u"work:path/filename" into (aliases[u"work"], u"path/filename".encode('utf-8')).
+    If default=None, then an empty alias is indicated by returning
+    DefaultAliasMarker. We special-case strings with a recognized cap URI
+    prefix, to make it easy to access specific files/directories by their
+    caps.
+    If the transformed alias is either not found in aliases, or is blank
+    and default is not found in aliases, an UnknownAliasError is
+    raised.
+    """
+    precondition(isinstance(path_unicode, unicode), path_unicode)
+
    from allmydata import uri
-    # transform "work:path/filename" into (aliases["work"], "path/filename").
-    # If default=None, then an empty alias is indicated by returning
-    # DefaultAliasMarker. We special-case strings with a recognized cap URI
-    # prefix, to make it easy to access specific files/directories by their
-    # caps.
-    # If the transformed alias is either not found in aliases, or is blank
-    # and default is not found in aliases, an UnknownAliasError is
-    # raised.
-    path = path.strip()
-    if uri.has_uri_prefix(path.encode('utf-8')):
+    path = path_unicode.encode('utf-8').strip(" ")
+    if uri.has_uri_prefix(path):
        # We used to require "URI:blah:./foo" in order to get a subpath,
        # stripping out the ":./" sequence. We still allow that for compatibility,
        # but now also allow just "URI:blah/foo".
@ -159,12 +175,14 @@ def get_alias(aliases, path, default):
                                    "'tahoe' alias doesn't exist. To create "
                                    "it, use 'tahoe create-alias tahoe'.")
        return aliases[default], path
-    if colon == 1 and default == None and platform_uses_lettercolon_drivename():
+    if colon == 1 and default is None and platform_uses_lettercolon_drivename():
        # treat C:\why\must\windows\be\so\weird as a local path, not a tahoe
        # file in the "C:" alias
        return DefaultAliasMarker, path
-    alias = path[:colon]
-    if "/" in alias:
+
+    # decoding must succeed because path is valid UTF-8 and colon & space are ASCII
+    alias = path[:colon].decode('utf-8')
+    if u"/" in alias:
        # no alias, but there's a colon in a dirname/filename, like
        # "foo/bar:7"
        if default == None:
@ -175,7 +193,8 @@ def get_alias(aliases, path, default):
                                    "it, use 'tahoe create-alias tahoe'.")
        return aliases[default], path
    if alias not in aliases:
-        raise UnknownAliasError("Unknown alias '%s', please create it with 'tahoe add-alias' or 'tahoe create-alias'." % alias)
+        raise UnknownAliasError("Unknown alias %s, please create it with 'tahoe add-alias' or 'tahoe create-alias'." %
+                                quote_output(alias))
    return aliases[alias], path[colon+1:]

 def escape_path(path):
--- a/src/allmydata/scripts/common_http.py
+++ b/src/allmydata/scripts/common_http.py
@ -3,6 +3,10 @@ from cStringIO import StringIO
 import urlparse, httplib
 import allmydata # for __full_version__

+from allmydata.util.stringutils import quote_output
+from allmydata.scripts.common import TahoeError
+
+
 # copied from twisted/web/client.py
 def parse_url(url, defaultPort=None):
    url = url.strip()
@ -63,7 +67,20 @@ def do_http(method, url, body=""):

    return c.getresponse()

+
+def format_http_success(resp):
+    return "%s %s" % (resp.status, quote_output(resp.reason, quotemarks=False))
+
+def format_http_error(msg, resp):
+    return "%s: %s %s\n%s" % (msg, resp.status, quote_output(resp.reason, quotemarks=False),
+                              quote_output(resp.read(), quotemarks=False))
+
 def check_http_error(resp, stderr):
    if resp.status < 200 or resp.status >= 300:
-        print >>stderr, "error %d during HTTP request" % resp.status
+        print >>stderr, format_http_error("Error during HTTP request", resp)
        return 1
+
+
+class HTTPError(TahoeError):
+    def __init__(self, msg, resp):
+        TahoeError.__init__(format_http_error(msg, resp))
--- a/src/allmydata/scripts/consolidate.py
+++ b/src/allmydata/scripts/consolidate.py
@ -4,12 +4,61 @@ import sqlite3 as sqlite

 import urllib
 import simplejson
-from allmydata.scripts.common_http import do_http
-from allmydata.scripts.tahoe_backup import parse_old_timestamp, readonly, \
-     raiseHTTPError, HTTPError
-from allmydata.util import hashutil, base32
+from allmydata.scripts.common_http import do_http, HTTPError
+from allmydata.util import hashutil, base32, time_format
+from allmydata.util.stringutils import to_str, quote_output, quote_path
 from allmydata.util.netstring import netstring
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
+from allmydata import uri
+
+
+def readonly(writedircap):
+    return uri.from_string_dirnode(writedircap).get_readonly().to_string()
+
+def parse_old_timestamp(s, options):
+    try:
+        if not s.endswith("Z"):
+            raise ValueError
+        # This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC
+        # time string. This might raise ValueError if the string is not in the
+        # right format.
+        when = time_format.iso_utc_time_to_seconds(s[:-1])
+        return when
+    except ValueError:
+        pass
+
+    try:
+        # "2008-11-16 10.34 PM" (localtime)
+        if s[-3:] in (" AM", " PM"):
+            # this might raise ValueError
+            when = time.strptime(s[:-3], "%Y-%m-%d %I.%M")
+            if s[-3:] == "PM":
+                when += 12*60*60
+            return when
+    except ValueError:
+        pass
+
+    try:
+        # "2008-11-16 10.34.56 PM" (localtime)
+        if s[-3:] in (" AM", " PM"):
+            # this might raise ValueError
+            when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S")
+            if s[-3:] == "PM":
+                when += 12*60*60
+            return when
+    except ValueError:
+        pass
+
+    try:
+        # "2008-12-31 18.21.43"
+        when = time.strptime(s, "%Y-%m-%d %H.%M.%S")
+        return when
+    except ValueError:
+        pass
+
+    print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
+    return None
+

 TAG = "consolidator_dirhash_v1"

@ -23,6 +72,7 @@ class Consolidator:
        self.rootcap, path = get_alias(options.aliases, options.where,
                                       DEFAULT_ALIAS)
        assert path == ""
+        # TODO: allow dbfile and backupfile to be Unicode
        self.dbfile = options["dbfile"]
        assert self.dbfile, "--dbfile is required"
        self.backupfile = options["backupfile"]
@ -47,7 +97,7 @@ class Consolidator:
        url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap)
        resp = do_http("GET", url)
        if resp.status != 200:
-            raiseHTTPError("Error during directory GET", resp)
+            raise HTTPError("Error during directory GET", resp)
        jd = simplejson.load(resp)
        ntype, ndata = jd
        if ntype != "dirnode":
@ -72,18 +122,25 @@ class Consolidator:
        for (childname, (childtype, childdata)) in kids.items():
            if childtype != "dirnode":
                continue
-            potential_systems[childname] = str(childdata["rw_uri"])
+            if "rw_uri" not in childdata:
+                self.msg("%s: not writeable" % quote_output(childname))
+                continue
+            potential_systems[childname] = to_str(childdata["rw_uri"])
        backup_data = {"Backups": data, "systems": {}, "archives": {}}
        systems = {}
        for name, sdircap in potential_systems.items():
            sdata = self.read_directory_json(sdircap)
            kids = sdata["children"]
            if not u"Archives" in kids and not u"Latest Backup" in kids:
-                self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % name)
+                self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % quote_output(name))
                continue
-            self.msg("%s is a system" % name)
+            archives_capdata = kids[u"Archives"][1]
+            if "rw_uri" not in archives_capdata:
+                self.msg("%s: /Archives is not writeable" % quote_output(name))
+                continue
+            self.msg("%s is a system" % quote_output(name))
            backup_data["systems"][name] = sdata
-            archives_dircap = kids[u"Archives"][1]["rw_uri"]
+            archives_dircap = to_str(archives_capdata["rw_uri"])
            archives_data = self.read_directory_json(archives_dircap)
            backup_data["archives"][name] = archives_data
            systems[name] = archives_dircap
@ -136,7 +193,7 @@ class Consolidator:
        #  [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
        #  [None, None, $NAME, readcap] : processed and replaced

-        self.msg("consolidating system %s" % system_name)
+        self.msg("consolidating system %s" % quote_output(system_name))
        self.directories_reused = 0
        self.directories_used_as_is = 0
        self.directories_created = 0
@ -149,11 +206,11 @@ class Consolidator:
        children = sorted(data["children"].items())
        for i, (childname, (childtype, childdata)) in enumerate(children):
            if childtype != "dirnode":
-                self.msg("non-dirnode %s in Archives/" % childname)
+                self.msg("non-dirnode %s in Archives/" % quote_output(childname))
                continue
-            timename = childname
-            if childname.endswith("-readonly"):
-                timename = childname[:-len("-readonly")]
+            timename = to_str(childname)
+            if timename.endswith("-readonly"):
+                timename = timename[:-len("-readonly")]
            timestamp = parse_old_timestamp(timename, self.options)
            assert timestamp is not None, timename
            snapshots.setdefault(timestamp, [None, None, None, None])
@ -161,15 +218,15 @@ class Consolidator:
            # need to re-scan it
            is_readonly = not childdata.has_key("rw_uri")
            if is_readonly:
-                readcap = str(childdata["ro_uri"])
+                readcap = to_str(childdata["ro_uri"])
                if self.must_rescan_readonly_snapshots:
                    self.msg(" scanning old %s (%d/%d)" %
-                             (childname, i+1, len(children)))
-                    self.scan_old_directory(str(childdata["ro_uri"]))
+                             (quote_output(childname), i+1, len(children)))
+                    self.scan_old_directory(to_str(childdata["ro_uri"]))
                snapshots[timestamp][2] = childname
                snapshots[timestamp][3] = readcap
            else:
-                writecap = str(childdata["rw_uri"])
+                writecap = to_str(childdata["rw_uri"])
                snapshots[timestamp][0] = childname
                snapshots[timestamp][1] = writecap
        snapshots = [ [timestamp] + values
@ -197,7 +254,7 @@ class Consolidator:
                assert roname
                assert not rwname
                first_snapshot = False
-                self.msg(" %s already readonly" % roname)
+                self.msg(" %s already readonly" % quote_output(roname))
                continue
            if readcap and writecap:
                # we've processed it, creating a -readonly version, but we
@ -205,9 +262,9 @@ class Consolidator:
                assert roname
                assert rwname
                first_snapshot = False
-                self.msg(" %s processed but not yet replaced" % roname)
+                self.msg(" %s processed but not yet replaced" % quote_output(roname))
                if self.options["really"]:
-                    self.msg("  replacing %s with %s" % (rwname, roname))
+                    self.msg("  replacing %s with %s" % (quote_output(rwname), quote_output(roname)))
                    self.put_child(archives_dircap, rwname, readcap)
                    self.delete_child(archives_dircap, roname)
                continue
@ -221,29 +278,29 @@ class Consolidator:
                first_snapshot = False
                readcap = readonly(writecap)
                self.directories_used_as_is += 1
-                self.msg(" %s: oldest snapshot, using as-is" % rwname)
+                self.msg(" %s: oldest snapshot, using as-is" % quote_output(rwname))
                self.scan_old_directory(readcap)
            else:
                # for the others, we must scan their contents and build up a new
                # readonly directory (which shares common subdirs with previous
                # backups)
-                self.msg(" %s: processing (%d/%d)" % (rwname, i+1, len(snapshots)))
+                self.msg(" %s: processing (%d/%d)" % (quote_output(rwname), i+1, len(snapshots)))
                started = time.time()
                readcap = self.process_directory(readonly(writecap), (rwname,))
                elapsed = time.time() - started
                eta = "%ds" % (elapsed * (len(snapshots) - i-1))
            if self.options["really"]:
-                self.msg("  replaced %s" % rwname)
+                self.msg("  replaced %s" % quote_output(rwname))
                self.put_child(archives_dircap, rwname, readcap)
            else:
-                self.msg("  created %s" % roname)
+                self.msg("  created %s" % quote_output(roname))
                self.put_child(archives_dircap, roname, readcap)

            snapshot_created = self.directories_created - start_created
            snapshot_used_as_is = self.directories_used_as_is - start_used_as_is
            snapshot_reused = self.directories_reused - start_reused
            self.msg("  %s: done: %d dirs created, %d used as-is, %d reused, eta %s"
-                     % (rwname,
+                     % (quote_output(rwname),
                        snapshot_created, snapshot_used_as_is, snapshot_reused,
                        eta))
        # done!
@ -259,7 +316,7 @@ class Consolidator:
        # for my contents. In all cases I return a directory readcap that
        # points to my contents.

-        assert isinstance(readcap, str)
+        readcap = to_str(readcap)
        self.directories_seen.add(readcap)

        # build up contents to pass to mkdir() (which uses t=set_children)
@ -271,13 +328,13 @@ class Consolidator:
        for (childname, (childtype, childdata)) in sorted(data["children"].items()):
            if childtype == "dirnode":
                childpath = path + (childname,)
-                old_childcap = str(childdata["ro_uri"])
+                old_childcap = to_str(childdata["ro_uri"])
                childcap = self.process_directory(old_childcap, childpath)
                if childcap != old_childcap:
                    children_modified = True
                contents[childname] = ("dirnode", childcap, None)
            else:
-                childcap = str(childdata["ro_uri"])
+                childcap = to_str(childdata["ro_uri"])
                contents[childname] = (childtype, childcap, None)
            hashkids.append( (childname, childcap) )

@ -285,7 +342,7 @@ class Consolidator:
        old_dircap = self.get_old_dirhash(dirhash)
        if old_dircap:
            if self.options["verbose"]:
-                self.msg("   %r: reused" % "/".join(path))
+                self.msg("   %s: reused" % quote_path(path))
            assert isinstance(old_dircap, str)
            self.directories_reused += 1
            self.directories_used.add(old_dircap)
@ -293,7 +350,7 @@ class Consolidator:
        if not children_modified:
            # we're allowed to use this directory as-is
            if self.options["verbose"]:
-                self.msg("   %r: used as-is" % "/".join(path))
+                self.msg("   %s: used as-is" % quote_path(path))
            new_dircap = readonly(readcap)
            assert isinstance(new_dircap, str)
            self.store_dirhash(dirhash, new_dircap)
@ -302,7 +359,7 @@ class Consolidator:
            return new_dircap
        # otherwise, we need to create a new directory
        if self.options["verbose"]:
-            self.msg("   %r: created" % "/".join(path))
+            self.msg("   %s: created" % quote_path(path))
        new_dircap = readonly(self.mkdir(contents))
        assert isinstance(new_dircap, str)
        self.store_dirhash(dirhash, new_dircap)
@ -315,21 +372,21 @@ class Consolidator:
                                                  urllib.quote(childname))
        resp = do_http("PUT", url, childcap)
        if resp.status not in (200, 201):
-            raiseHTTPError("error during put_child", resp)
+            raise HTTPError("Error during put_child", resp)

    def delete_child(self, dircap, childname):
        url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap),
                                            urllib.quote(childname))
        resp = do_http("DELETE", url)
        if resp.status not in (200, 201):
-            raiseHTTPError("error during delete_child", resp)
+            raise HTTPError("Error during delete_child", resp)

    def mkdir(self, contents):
        url = self.nodeurl + "uri?t=mkdir"
        resp = do_http("POST", url)
        if resp.status < 200 or resp.status >= 300:
-            raiseHTTPError("error during mkdir", resp)
-        dircap = str(resp.read().strip())
+            raise HTTPError("Error during mkdir", resp)
+        dircap = to_str(resp.read().strip())
        url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap)
        body = dict([ (childname, (contents[childname][0],
                                   {"ro_uri": contents[childname][1],
@ -339,7 +396,7 @@ class Consolidator:
                      ])
        resp = do_http("POST", url, simplejson.dumps(body))
        if resp.status != 200:
-            raiseHTTPError("error during set_children", resp)
+            raise HTTPError("Error during set_children", resp)
        return dircap

    def scan_old_directory(self, dircap, ancestors=()):
@ -358,7 +415,7 @@ class Consolidator:
        data = self.read_directory_json(dircap)
        kids = []
        for (childname, (childtype, childdata)) in data["children"].items():
-            childcap = str(childdata["ro_uri"])
+            childcap = to_str(childdata["ro_uri"])
            if childtype == "dirnode":
                self.scan_old_directory(childcap, ancestors)
            kids.append( (childname, childcap) )
@ -368,7 +425,7 @@ class Consolidator:

    def hash_directory_contents(self, kids):
        kids.sort()
-        s = "".join([netstring(childname.encode("utf-8"))+netstring(childcap)
+        s = "".join([netstring(to_str(childname))+netstring(childcap)
                     for (childname, childcap) in kids])
        return hashutil.tagged_hash(TAG, s)

--- a/src/allmydata/scripts/debug.py
+++ b/src/allmydata/scripts/debug.py
@ -5,6 +5,7 @@ import struct, time, os
 from twisted.python import usage, failure
 from twisted.internet import defer
 from allmydata.scripts.cli import VDriveOptions
+from allmydata.util.stringutils import argv_to_unicode

 class DumpOptions(usage.Options):
    def getSynopsis(self):
@ -768,7 +769,7 @@ class ConsolidateOptions(VDriveOptions):
        ("verbose", "v", "Emit a line for every directory examined"),
        ]
    def parseArgs(self, where):
-        self.where = where
+        self.where = argv_to_unicode(where)

 def consolidate(options):
    from allmydata.scripts.consolidate import main
--- a/src/allmydata/scripts/slow_operation.py
+++ b/src/allmydata/scripts/slow_operation.py
@ -2,8 +2,9 @@
 import os, time
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_error
 from allmydata.util import base32
+from allmydata.util.stringutils import quote_output, is_printable_ascii
 import urllib
 import simplejson

@ -21,7 +22,7 @@ class SlowOperationRunner:
        try:
            rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1
        if path == '/':
            path = ''
@ -32,7 +33,7 @@ class SlowOperationRunner:
        url = self.make_url(url, ophandle)
        resp = do_http("POST", url)
        if resp.status not in (200, 302):
-            print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+            print >>stderr, format_http_error("ERROR", resp)
            return 1
        # now we poll for results. We nominally poll at t=1, 5, 10, 30, 60,
        # 90, k*120 seconds, but if the poll takes non-zero time, that will
@ -65,14 +66,17 @@ class SlowOperationRunner:
        stderr = self.options.stderr
        resp = do_http("GET", url)
        if resp.status != 200:
-            print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+            print >>stderr, format_http_error("ERROR", resp)
            return True
        jdata = resp.read()
        data = simplejson.loads(jdata)
        if not data["finished"]:
            return False
        if self.options.get("raw"):
-            print >>stdout, jdata
+            if is_printable_ascii(jdata):
+                print >>stdout, jdata
+            else:
+                print >>stderr, "The JSON response contained unprintable characters:\n%s" % quote_output(jdata)
            return True
        self.write_results(data)
        return True
--- a/src/allmydata/scripts/tahoe_add_alias.py
+++ b/src/allmydata/scripts/tahoe_add_alias.py
@ -5,7 +5,7 @@ from allmydata import uri
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_aliases
 from allmydata.util.fileutil import move_into_place
-from allmydata.util.stringutils import unicode_to_stdout
+from allmydata.util.stringutils import unicode_to_output, quote_output


 def add_line_to_aliasfile(aliasfile, alias, cap):
@ -37,14 +37,14 @@ def add_alias(options):

    old_aliases = get_aliases(nodedir)
    if alias in old_aliases:
-        print >>stderr, "Alias '%s' already exists!" % alias
+        print >>stderr, "Alias %s already exists!" % quote_output(alias)
        return 1
    aliasfile = os.path.join(nodedir, "private", "aliases")
    cap = uri.from_string_dirnode(cap).to_string()

    add_line_to_aliasfile(aliasfile, alias, cap)

-    print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),)
+    print >>stdout, "Alias %s added" % quote_output(alias)
    return 0

 def create_alias(options):
@ -58,7 +58,7 @@ def create_alias(options):

    old_aliases = get_aliases(nodedir)
    if alias in old_aliases:
-        print >>stderr, "Alias '%s' already exists!" % alias
+        print >>stderr, "Alias %s already exists!" % quote_output(alias)
        return 1

    aliasfile = os.path.join(nodedir, "private", "aliases")
@ -77,16 +77,26 @@ def create_alias(options):

    add_line_to_aliasfile(aliasfile, alias, new_uri)

-    print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),)
+    print >>stdout, "Alias %s created" % (quote_output(alias),)
    return 0

 def list_aliases(options):
    nodedir = options['node-directory']
    stdout = options.stdout
+    stderr = options.stderr
    aliases = get_aliases(nodedir)
    alias_names = sorted(aliases.keys())
-    max_width = max([len(name) for name in alias_names] + [0])
+    max_width = max([len(quote_output(name)) for name in alias_names] + [0])
    fmt = "%" + str(max_width) + "s: %s"
+    rc = 0
    for name in alias_names:
-        print >>stdout, fmt % (name, aliases[name])
+        try:
+            print >>stdout, fmt % (unicode_to_output(name), unicode_to_output(aliases[name].decode('utf-8')))
+        except (UnicodeEncodeError, UnicodeDecodeError):
+            print >>stderr, fmt % (quote_output(name), quote_output(aliases[name]))
+            rc = 1

+    if rc == 1:
+        print >>stderr, "\nThis listing included aliases or caps that could not be converted to the terminal" \
+                        "\noutput encoding. These are shown using backslash escapes and in quotes."
+    return rc
--- a/src/allmydata/scripts/tahoe_backup.py
+++ b/src/allmydata/scripts/tahoe_backup.py
@ -6,20 +6,13 @@ import simplejson
 import datetime
 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
 from allmydata.util import time_format
 from allmydata.scripts import backupdb
-from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode
+from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str
 from allmydata.util.assertutil import precondition


-class HTTPError(Exception):
-    pass
-
-def raiseHTTPError(msg, resp):
-    msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read())
-    raise HTTPError(msg)
-
 def get_local_metadata(path):
    metadata = {}

@ -49,8 +42,9 @@ def mkdir(contents, options):
    url = options['node-url'] + "uri?t=mkdir-immutable"
    resp = do_http("POST", url, body)
    if resp.status < 200 or resp.status >= 300:
-        raiseHTTPError("error during mkdir", resp)
-    dircap = str(resp.read().strip())
+        raise HTTPError("Error during mkdir", resp)
+
+    dircap = to_str(resp.read().strip())
    return dircap

 def put_child(dirurl, childname, childcap):
@ -58,7 +52,7 @@ def put_child(dirurl, childname, childcap):
    url = dirurl + urllib.quote(childname) + "?t=uri"
    resp = do_http("PUT", url, childcap)
    if resp.status not in (200, 201):
-        raiseHTTPError("error during put_child", resp)
+        raise HTTPError("Error during put_child", resp)

 class BackupProcessingError(Exception):
    pass
@ -99,7 +93,7 @@ class BackerUpper:
        try:
            rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1
        to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
        if path:
@ -115,8 +109,7 @@ class BackerUpper:
        if resp.status == 404:
            resp = do_http("POST", archives_url + "?t=mkdir")
            if resp.status != 200:
-                print >>stderr, "Unable to create target directory: %s %s %s" % \
-                      (resp.status, resp.reason, resp.read())
+                print >>stderr, format_http_error("Unable to create target directory", resp)
                return 1

        # second step: process the tree
@ -156,20 +149,19 @@ class BackerUpper:
        return 0

    def verboseprint(self, msg):
+        precondition(isinstance(msg, str), msg)
        if self.verbosity >= 2:
-            if isinstance(msg, unicode):
-                msg = unicode_to_stdout(msg)
-
            print >>self.options.stdout, msg

    def warn(self, msg):
+        precondition(isinstance(msg, str), msg)
        print >>self.options.stderr, msg

    def process(self, localpath):
        precondition(isinstance(localpath, unicode), localpath)
        # returns newdircap

-        self.verboseprint("processing %s" % localpath)
+        self.verboseprint("processing %s" % quote_output(localpath))
        create_contents = {} # childname -> (type, rocap, metadata)
        compare_contents = {} # childname -> rocap

@ -177,7 +169,7 @@ class BackerUpper:
            children = listdir_unicode(localpath)
        except EnvironmentError:
            self.directories_skipped += 1
-            self.warn("WARNING: permission denied on directory %s" % localpath)
+            self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
            children = []

        for child in self.options.filter_listdir(children):
@ -199,17 +191,17 @@ class BackerUpper:
                    compare_contents[child] = childcap
                except EnvironmentError:
                    self.files_skipped += 1
-                    self.warn("WARNING: permission denied on file %s" % childpath)
+                    self.warn("WARNING: permission denied on file %s" % quote_output(childpath))
            else:
                self.files_skipped += 1
                if os.path.islink(childpath):
-                    self.warn("WARNING: cannot backup symlink %s" % childpath)
+                    self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath))
                else:
-                    self.warn("WARNING: cannot backup special file %s" % childpath)
+                    self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))

        must_create, r = self.check_backupdb_directory(compare_contents)
        if must_create:
-            self.verboseprint(" creating directory for %s" % localpath)
+            self.verboseprint(" creating directory for %s" % quote_output(localpath))
            newdircap = mkdir(create_contents, self.options)
            assert isinstance(newdircap, str)
            if r:
@ -217,7 +209,7 @@ class BackerUpper:
            self.directories_created += 1
            return newdircap
        else:
-            self.verboseprint(" re-using old directory for %s" % localpath)
+            self.verboseprint(" re-using old directory for %s" % quote_output(localpath))
            self.directories_reused += 1
            return r.was_created()

@ -237,7 +229,7 @@ class BackerUpper:

        # we must check the file before using the results
        filecap = r.was_uploaded()
-        self.verboseprint("checking %s" % filecap)
+        self.verboseprint("checking %s" % quote_output(filecap))
        nodeurl = self.options['node-url']
        checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
        self.files_checked += 1
@ -270,7 +262,7 @@ class BackerUpper:

        # we must check the directory before re-using it
        dircap = r.was_created()
-        self.verboseprint("checking %s" % dircap)
+        self.verboseprint("checking %s" % quote_output(dircap))
        nodeurl = self.options['node-url']
        checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap)
        self.directories_checked += 1
@ -292,22 +284,24 @@ class BackerUpper:
    def upload(self, childpath):
        precondition(isinstance(childpath, unicode), childpath)

-        #self.verboseprint("uploading %s.." % childpath)
+        #self.verboseprint("uploading %s.." % quote_output(childpath))
        metadata = get_local_metadata(childpath)

        # we can use the backupdb here
        must_upload, bdb_results = self.check_backupdb_file(childpath)

        if must_upload:
-            self.verboseprint("uploading %s.." % childpath)
-            infileobj = open_unicode(os.path.expanduser(childpath), "rb")
+            self.verboseprint("uploading %s.." % quote_output(childpath))
+            infileobj = open_unicode(childpath, "rb")
            url = self.options['node-url'] + "uri"
            resp = do_http("PUT", url, infileobj)
            if resp.status not in (200, 201):
-                raiseHTTPError("Error during file PUT", resp)
+                raise HTTPError("Error during file PUT", resp)
+
            filecap = resp.read().strip()
-            self.verboseprint(" %s -> %s" % (childpath, filecap))
-            #self.verboseprint(" metadata: %s" % (metadata,))
+            self.verboseprint(" %s -> %s" % (quote_output(childpath, quotemarks=False),
+                                             quote_output(filecap, quotemarks=False)))
+            #self.verboseprint(" metadata: %s" % (quote_output(metadata, quotemarks=False),))

            if bdb_results:
                bdb_results.did_upload(filecap)
@ -316,7 +310,7 @@ class BackerUpper:
            return filecap, metadata

        else:
-            self.verboseprint("skipping %s.." % childpath)
+            self.verboseprint("skipping %s.." % quote_output(childpath))
            self.files_reused += 1
            return bdb_results.was_uploaded(), metadata

--- a/src/allmydata/scripts/tahoe_check.py
+++ b/src/allmydata/scripts/tahoe_check.py
@ -4,7 +4,8 @@ import simplejson
 from twisted.protocols.basic import LineOnlyReceiver
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_error
+from allmydata.util.stringutils import quote_output, quote_path

 class Checker:
    pass
@ -19,7 +20,7 @@ def check(options):
    try:
        rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    if path == '/':
        path = ''
@ -37,7 +38,7 @@ def check(options):

    resp = do_http("POST", url)
    if resp.status != 200:
-        print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+        print >>stderr, format_http_error("ERROR", resp)
        return 1
    jdata = resp.read()
    if options.get("raw"):
@ -108,12 +109,12 @@ class DeepCheckOutput(LineOnlyReceiver):

    def lineReceived(self, line):
        if self.in_error:
-            print >>self.stderr, line
+            print >>self.stderr, quote_output(line, quotemarks=False)
            return
        if line.startswith("ERROR:"):
            self.in_error = True
            self.streamer.rc = 1
-            print >>self.stderr, line
+            print >>self.stderr, quote_output(line, quotemarks=False)
            return

        d = simplejson.loads(line)
@ -135,12 +136,8 @@ class DeepCheckOutput(LineOnlyReceiver):
            if not path:
                path = ["<root>"]
            summary = cr.get("summary", "Healthy (LIT)")
-            try:
-                print >>stdout, "%s: %s" % ("/".join(path), summary)
-            except UnicodeEncodeError:
-                print >>stdout, "%s: %s" % ("/".join([p.encode("utf-8")
-                                                      for p in path]),
-                                            summary)
+            print >>stdout, "%s: %s" % (quote_path(path), summary)
+
        # always print out corrupt shares
        for shareloc in cr["results"].get("list-corrupt-shares", []):
            (serverid, storage_index, sharenum) = shareloc
@ -174,12 +171,12 @@ class DeepCheckAndRepairOutput(LineOnlyReceiver):

    def lineReceived(self, line):
        if self.in_error:
-            print >>self.stderr, line
+            print >>self.stderr, quote_output(line, quotemarks=False)
            return
        if line.startswith("ERROR:"):
            self.in_error = True
            self.streamer.rc = 1
-            print >>self.stderr, line
+            print >>self.stderr, quote_output(line, quotemarks=False)
            return

        d = simplejson.loads(line)
@ -221,12 +218,8 @@ class DeepCheckAndRepairOutput(LineOnlyReceiver):
                summary = "healthy"
            else:
                summary = "not healthy"
-            try:
-                print >>stdout, "%s: %s" % ("/".join(path), summary)
-            except UnicodeEncodeError:
-                print >>stdout, "%s: %s" % ("/".join([p.encode("utf-8")
-                                                      for p in path]),
-                                            summary)
+            print >>stdout, "%s: %s" % (quote_path(path), summary)
+
        # always print out corrupt shares
        prr = crr.get("pre-repair-results", {})
        for shareloc in prr.get("results", {}).get("list-corrupt-shares", []):
@ -272,7 +265,7 @@ class DeepCheckStreamer(LineOnlyReceiver):
        try:
            rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1
        if path == '/':
            path = ''
@ -292,7 +285,7 @@ class DeepCheckStreamer(LineOnlyReceiver):
            url += "&add-lease=true"
        resp = do_http("POST", url)
        if resp.status not in (200, 302):
-            print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+            print >>stderr, format_http_error("ERROR", resp)
            return 1

        # use Twisted to split this into lines
--- a/src/allmydata/scripts/tahoe_cp.py
+++ b/src/allmydata/scripts/tahoe_cp.py
@ -5,37 +5,38 @@ import simplejson
 from cStringIO import StringIO
 from twisted.python.failure import Failure
 from allmydata.scripts.common import get_alias, escape_path, \
-                                     DefaultAliasMarker, UnknownAliasError
-from allmydata.scripts.common_http import do_http
+                                     DefaultAliasMarker, TahoeError
+from allmydata.scripts.common_http import do_http, HTTPError
 from allmydata import uri
-from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
+from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode, \
+    abspath_expanduser_unicode, quote_output, to_str
 from allmydata.util.assertutil import precondition


-def ascii_or_none(s):
-    if s is None:
-        return s
-    return str(s)
+def _put_local_file(pathname, inf):
+    # TODO: create temporary file and move into place?
+    # TODO: move this to fileutil.
+    outf = open_unicode(pathname, "wb")
+    try:
+        while True:
+            data = inf.read(32768)
+            if not data:
+                break
+            outf.write(data)
+    finally:
+        outf.close()

-class TahoeError(Exception):
-    def __init__(self, msg, resp):
-        self.msg = msg
-        self.status = resp.status
-        self.reason = resp.reason
-        self.body = resp.read()

-    def display(self, err):
-        print >>err, "%s: %s %s" % (self.msg, self.status, self.reason)
-        print >>err, self.body
+class MissingSourceError(TahoeError):
+    def __init__(self, name):
+        TahoeError.__init__("No such file or directory %s" % quote_output(name))

-class MissingSourceError(Exception):
-    pass

 def GET_to_file(url):
    resp = do_http("GET", url)
    if resp.status == 200:
        return resp
-    raise TahoeError("Error during GET", resp)
+    raise HTTPError("Error during GET", resp)

 def GET_to_string(url):
    f = GET_to_file(url)
@ -45,20 +46,20 @@ def PUT(url, data):
    resp = do_http("PUT", url, data)
    if resp.status in (200, 201):
        return resp.read()
-    raise TahoeError("Error during PUT", resp)
+    raise HTTPError("Error during PUT", resp)

 def POST(url, data):
    resp = do_http("POST", url, data)
    if resp.status in (200, 201):
        return resp.read()
-    raise TahoeError("Error during POST", resp)
+    raise HTTPError("Error during POST", resp)

 def mkdir(targeturl):
    url = targeturl + "?t=mkdir"
    resp = do_http("POST", url)
    if resp.status in (200, 201):
        return resp.read().strip()
-    raise TahoeError("Error during mkdir", resp)
+    raise HTTPError("Error during mkdir", resp)

 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
    url = nodeurl + "/".join(["uri",
@ -68,7 +69,7 @@ def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
    resp = do_http("POST", url)
    if resp.status in (200, 201):
        return resp.read().strip()
-    raise TahoeError("Error during mkdir", resp)
+    raise HTTPError("Error during mkdir", resp)


 class LocalFileSource:
@ -80,20 +81,17 @@ class LocalFileSource:
        return True

    def open(self, caps_only):
-        return open(self.pathname, "rb")
+        return open_unicode(self.pathname, "rb")
+

 class LocalFileTarget:
    def __init__(self, pathname):
        precondition(isinstance(pathname, unicode), pathname)
        self.pathname = pathname
+
    def put_file(self, inf):
-        outf = open(self.pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        _put_local_file(self.pathname, inf)
+

 class LocalMissingTarget:
    def __init__(self, pathname):
@ -101,13 +99,8 @@ class LocalMissingTarget:
        self.pathname = pathname

    def put_file(self, inf):
-        outf = open(self.pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        _put_local_file(self.pathname, inf)
+

 class LocalDirectorySource:
    def __init__(self, progressfunc, pathname):
@ -134,6 +127,7 @@ class LocalDirectorySource:
                self.children[n] = LocalFileSource(pn)
            else:
                # Could be dangling symlink; probably not copy-able.
+                # TODO: output a warning
                pass

 class LocalDirectoryTarget:
@ -151,6 +145,7 @@ class LocalDirectoryTarget:
        children = listdir_unicode(self.pathname)
        for i,n in enumerate(children):
            self.progressfunc("examining %d of %d" % (i, len(children)))
+            n = unicode(n)
            pn = os.path.join(self.pathname, n)
            if os.path.isdir(pn):
                child = LocalDirectoryTarget(self.progressfunc, pn)
@ -173,13 +168,7 @@ class LocalDirectoryTarget:
    def put_file(self, name, inf):
        precondition(isinstance(name, unicode), name)
        pathname = os.path.join(self.pathname, name)
-        outf = open_unicode(pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        _put_local_file(pathname, inf)

    def set_children(self):
        pass
@ -238,7 +227,7 @@ class TahoeDirectorySource:
        url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
        resp = do_http("GET", url + "?t=json")
        if resp.status != 200:
-            raise TahoeError("Error examining source directory", resp)
+            raise HTTPError("Error examining source directory", resp)
        parsed = simplejson.loads(resp.read())
        nodetype, d = parsed
        assert nodetype == "dirnode"
@ -250,8 +239,8 @@ class TahoeDirectorySource:

    def init_from_parsed(self, parsed):
        nodetype, d = parsed
-        self.writecap = ascii_or_none(d.get("rw_uri"))
-        self.readcap = ascii_or_none(d.get("ro_uri"))
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
        self.mutable = d.get("mutable", False) # older nodes don't provide it
        self.children_d = dict( [(unicode(name),value)
                                 for (name,value)
@ -266,13 +255,13 @@ class TahoeDirectorySource:
            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
            if data[0] == "filenode":
                mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                self.children[name] = TahoeFileSource(self.nodeurl, mutable,
                                                      writecap, readcap)
            elif data[0] == "dirnode":
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                if writecap and writecap in self.cache:
                    child = self.cache[writecap]
                elif readcap and readcap in self.cache:
@ -320,8 +309,8 @@ class TahoeDirectoryTarget:

    def init_from_parsed(self, parsed):
        nodetype, d = parsed
-        self.writecap = ascii_or_none(d.get("rw_uri"))
-        self.readcap = ascii_or_none(d.get("ro_uri"))
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
        self.mutable = d.get("mutable", False) # older nodes don't provide it
        self.children_d = dict( [(unicode(name),value)
                                 for (name,value)
@ -335,7 +324,7 @@ class TahoeDirectoryTarget:
        url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
        resp = do_http("GET", url + "?t=json")
        if resp.status != 200:
-            raise TahoeError("Error examining target directory", resp)
+            raise HTTPError("Error examining target directory", resp)
        parsed = simplejson.loads(resp.read())
        nodetype, d = parsed
        assert nodetype == "dirnode"
@ -360,8 +349,8 @@ class TahoeDirectoryTarget:
            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
            if data[0] == "filenode":
                mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                url = None
                if self.writecap:
                    url = self.nodeurl + "/".join(["uri",
@ -370,8 +359,8 @@ class TahoeDirectoryTarget:
                self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
                                                      writecap, readcap, url)
            elif data[0] == "dirnode":
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                if writecap and writecap in self.cache:
                    child = self.cache[writecap]
                elif readcap and readcap in self.cache:
@ -466,8 +455,9 @@ class Copier:
            status = self.try_copy()
            return status
        except TahoeError, te:
-            Failure().printTraceback(self.stderr)
-            print >>self.stderr
+            if verbosity >= 2:
+                Failure().printTraceback(self.stderr)
+                print >>self.stderr
            te.display(self.stderr)
            return 1

@ -476,23 +466,12 @@ class Copier:
        destination_spec = self.options.destination
        recursive = self.options["recursive"]

-        try:
-            target = self.get_target_info(destination_spec)
-        except UnknownAliasError, e:
-            self.to_stderr("error: %s" % e.args[0])
-            return 1
+        target = self.get_target_info(destination_spec)

-        try:
-            sources = [] # list of (name, source object)
-            for ss in source_specs:
-                name, source = self.get_source_info(ss)
-                sources.append( (name, source) )
-        except MissingSourceError, e:
-            self.to_stderr("No such file or directory %s" % e.args[0])
-            return 1
-        except UnknownAliasError, e:
-            self.to_stderr("error: %s" % e.args[0])
-            return 1
+        sources = [] # list of (name, source object)
+        for ss in source_specs:
+            name, source = self.get_source_info(ss)
+            sources.append( (name, source) )

        have_source_dirs = bool([s for (name,s) in sources
                                 if isinstance(s, (LocalDirectorySource,
@ -506,7 +485,7 @@ class Copier:
            # cp STUFF foo.txt, where foo.txt already exists. This limits the
            # possibilities considerably.
            if len(sources) > 1:
-                self.to_stderr("target '%s' is not a directory" % destination_spec)
+                self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
                return 1
            if have_source_dirs:
                self.to_stderr("cannot copy directory into a file")
@ -546,7 +525,7 @@ class Copier:
        rootcap, path = get_alias(self.aliases, destination_spec, None)
        if rootcap == DefaultAliasMarker:
            # no alias, so this is a local file
-            pathname = os.path.abspath(os.path.expanduser(path))
+            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
            if not os.path.exists(pathname):
                t = LocalMissingTarget(pathname)
            elif os.path.isdir(pathname):
@ -572,21 +551,21 @@ class Copier:
                                             self.progress)
                    t.init_from_parsed(parsed)
                else:
-                    writecap = ascii_or_none(d.get("rw_uri"))
-                    readcap = ascii_or_none(d.get("ro_uri"))
+                    writecap = to_str(d.get("rw_uri"))
+                    readcap = to_str(d.get("ro_uri"))
                    mutable = d.get("mutable", False)
                    t = TahoeFileTarget(self.nodeurl, mutable,
                                        writecap, readcap, url)
            else:
-                raise TahoeError("Error examining target '%s'"
-                                 % destination_spec, resp)
+                raise HTTPError("Error examining target %s"
+                                 % quote_output(destination_spec), resp)
        return t

    def get_source_info(self, source_spec):
        rootcap, path = get_alias(self.aliases, source_spec, None)
        if rootcap == DefaultAliasMarker:
            # no alias, so this is a local file
-            pathname = os.path.abspath(os.path.expanduser(path))
+            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
            name = os.path.basename(pathname)
            if not os.path.exists(pathname):
                raise MissingSourceError(source_spec)
@ -610,8 +589,8 @@ class Copier:
            if resp.status == 404:
                raise MissingSourceError(source_spec)
            elif resp.status != 200:
-                raise TahoeError("Error examining source '%s'" % source_spec,
-                                 resp)
+                raise HTTPError("Error examining source %s" % quote_output(source_spec),
+                                resp)
            parsed = simplejson.loads(resp.read())
            nodetype, d = parsed
            if nodetype == "dirnode":
@ -619,8 +598,8 @@ class Copier:
                                         self.progress)
                t.init_from_parsed(parsed)
            else:
-                writecap = ascii_or_none(d.get("rw_uri"))
-                readcap = ascii_or_none(d.get("ro_uri"))
+                writecap = to_str(d.get("rw_uri"))
+                readcap = to_str(d.get("ro_uri"))
                mutable = d.get("mutable", False) # older nodes don't provide it
                if source_spec.rfind('/') != -1:
                    name = source_spec[source_spec.rfind('/')+1:]
@ -630,7 +609,7 @@ class Copier:

    def dump_graph(self, s, indent=" "):
        for name, child in s.children.items():
-            print indent + name + ":" + str(child)
+            print "%s%s: %r" % (indent, quote_output(name), child)
            if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
                self.dump_graph(child, indent+"  ")

--- a/src/allmydata/scripts/tahoe_get.py
+++ b/src/allmydata/scripts/tahoe_get.py
@ -2,7 +2,8 @@
 import urllib
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_error
+from allmydata.util.stringutils import open_unicode

 def get(options):
    nodeurl = options['node-url']
@ -17,7 +18,7 @@ def get(options):
    try:
        rootcap, path = get_alias(aliases, from_file, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    url = nodeurl + "uri/%s" % urllib.quote(rootcap)
    if path:
@ -26,7 +27,7 @@ def get(options):
    resp = do_http("GET", url)
    if resp.status in (200, 201,):
        if to_file:
-            outf = open(to_file, "wb")
+            outf = open_unicode(to_file, "wb")
        else:
            outf = stdout
        while True:
@ -38,8 +39,7 @@ def get(options):
            outf.close()
        rc = 0
    else:
-        print >>stderr, "Error, got %s %s" % (resp.status, resp.reason)
-        print >>stderr, resp.read()
+        print >>stderr, format_http_error("Error during GET", resp)
        rc = 1

    return rc
--- a/src/allmydata/scripts/tahoe_ls.py
+++ b/src/allmydata/scripts/tahoe_ls.py
@ -3,8 +3,8 @@ import urllib, time
 import simplejson
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
-from allmydata.util.stringutils import unicode_to_stdout
+from allmydata.scripts.common_http import do_http, format_http_error
+from allmydata.util.stringutils import unicode_to_output, quote_output, is_printable_ascii, to_str

 def list(options):
    nodeurl = options['node-url']
@ -20,7 +20,7 @@ def list(options):
    try:
        rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    url = nodeurl + "uri/%s" % urllib.quote(rootcap)
    if path:
@ -33,9 +33,7 @@ def list(options):
        print >>stderr, "No such file or directory"
        return 2
    if resp.status != 200:
-        print >>stderr, "Error during GET: %s %s %s" % (resp.status,
-                                                        resp.reason,
-                                                        resp.read())
+        print >>stderr, format_http_error("Error during GET", resp)
        if resp.status == 0:
            return 3
        else:
@ -44,20 +42,28 @@ def list(options):
    data = resp.read()

    if options['json']:
-        print >>stdout, data
-        return
+        # The webapi server should always output printable ASCII.
+        if is_printable_ascii(data):
+            print >>stdout, data
+            return 0
+        else:
+            print >>stderr, "The JSON response contained unprintable characters:\n%s" % quote_output(data)
+            return 1

    try:
        parsed = simplejson.loads(data)
-    except Exception, le:
-        le.args = tuple(le.args + (data,))
-        raise
+    except Exception, e:
+        print >>stderr, "error: %s" % quote_output(e.args[0], quotemarks=False)
+        print >>stderr, "Could not parse JSON response:\n%s" % quote_output(data)
+        return 1
+
    nodetype, d = parsed
    children = {}
    if nodetype == "dirnode":
        children = d['children']
    else:
-        childname = path.split("/")[-1]
+        # paths returned from get_alias are always valid UTF-8
+        childname = path.split("/")[-1].decode('utf-8')
        children = {childname: (nodetype, d)}
        if "metadata" not in d:
            d["metadata"] = {}
@ -71,8 +77,8 @@ def list(options):
    has_unknowns = False

    for name in childnames:
-        name = unicode(name)
        child = children[name]
+        name = unicode(name)
        childtype = child[0]

        # See webapi.txt for a discussion of the meanings of unix local
@ -85,8 +91,8 @@ def list(options):
        mtime = child[1].get("metadata", {}).get('tahoe', {}).get("linkmotime")
        if not mtime:
            mtime = child[1]["metadata"].get("mtime")
-        rw_uri = child[1].get("rw_uri")
-        ro_uri = child[1].get("ro_uri")
+        rw_uri = to_str(child[1].get("rw_uri"))
+        ro_uri = to_str(child[1].get("ro_uri"))
        if ctime:
            # match for formatting that GNU 'ls' does
            if (now - ctime) > 6*30*24*60*60:
@ -131,17 +137,24 @@ def list(options):
            line.append(ctime_s)
        if not options["classify"]:
            classify = ""
-        line.append(unicode_to_stdout(name) + classify)
+
+        encoding_error = False
+        try:
+            line.append(unicode_to_output(name) + classify)
+        except UnicodeEncodeError:
+            encoding_error = True
+            line.append(quote_output(name) + classify)
+
        if options["uri"]:
            line.append(uri)
        if options["readonly-uri"]:
-            line.append(ro_uri or "-")
+            line.append(quote_output(ro_uri or "-", quotemarks=False))

-        rows.append(line)
+        rows.append((encoding_error, line))

    max_widths = []
    left_justifys = []
-    for row in rows:
+    for (encoding_error, row) in rows:
        for i,cell in enumerate(row):
            while len(max_widths) <= i:
                max_widths.append(0)
@ -161,14 +174,20 @@ def list(options):
        piece += "s"
        fmt_pieces.append(piece)
    fmt = " ".join(fmt_pieces)
-    for row in rows:
-        print >>stdout, (fmt % tuple(row)).rstrip()
+    
+    rc = 0
+    for (encoding_error, row) in rows:
+        if encoding_error:
+            print >>stderr, (fmt % tuple(row)).rstrip()
+            rc = 1
+        else:
+            print >>stdout, (fmt % tuple(row)).rstrip()

+    if rc == 1:
+        print >>stderr, "\nThis listing included files whose names could not be converted to the terminal" \
+                        "\noutput encoding. Their names are shown using backslash escapes and in quotes."
    if has_unknowns:
        print >>stderr, "\nThis listing included unknown objects. Using a webapi server that supports" \
                        "\na later version of Tahoe may help."

-    return 0
-
-# error cases that need improvement:
-#  list-one-file: tahoe ls my:docs/Makefile
+    return rc
--- a/src/allmydata/scripts/tahoe_manifest.py
+++ b/src/allmydata/scripts/tahoe_manifest.py
@ -5,8 +5,8 @@ from allmydata.util.abbreviate import abbreviate_space_both
 from allmydata.scripts.slow_operation import SlowOperationRunner
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
-from allmydata.util.stringutils import unicode_to_stdout
+from allmydata.scripts.common_http import do_http, format_http_error
+from allmydata.util.stringutils import quote_output, quote_path

 class FakeTransport:
    disconnecting = False
@ -30,7 +30,7 @@ class ManifestStreamer(LineOnlyReceiver):
        try:
            rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1
        if path == '/':
            path = ''
@ -41,7 +41,7 @@ class ManifestStreamer(LineOnlyReceiver):
        url += "?t=stream-manifest"
        resp = do_http("POST", url)
        if resp.status not in (200, 302):
-            print >>stderr, "ERROR", resp.status, resp.reason, resp.read()
+            print >>stderr, format_http_error("ERROR", resp)
            return 1
        #print "RESP", dir(resp)
        # use Twisted to split this into lines
@ -60,31 +60,35 @@ class ManifestStreamer(LineOnlyReceiver):
        stdout = self.options.stdout
        stderr = self.options.stderr
        if self.in_error:
-            print >>stderr, line
+            print >>stderr, quote_output(line, quotemarks=False)
            return
        if line.startswith("ERROR:"):
            self.in_error = True
            self.rc = 1
-            print >>stderr, line
+            print >>stderr, quote_output(line, quotemarks=False)
            return

-        d = simplejson.loads(line.decode('utf-8'))
-        if d["type"] in ("file", "directory"):
-            if self.options["storage-index"]:
-                si = d["storage-index"]
-                if si:
-                    print >>stdout, si
-            elif self.options["verify-cap"]:
-                vc = d["verifycap"]
-                if vc:
-                    print >>stdout, vc
-            elif self.options["repair-cap"]:
-                vc = d["repaircap"]
-                if vc:
-                    print >>stdout, vc
-            else:
-                print >>stdout, d["cap"], "/".join([unicode_to_stdout(p)
-                                                        for p in d["path"]])
+        try:
+            d = simplejson.loads(line.decode('utf-8'))
+        except Exception, e:
+            print >>stderr, "ERROR could not decode/parse %s\nERROR  %r" % (quote_output(line), e)
+        else:
+            if d["type"] in ("file", "directory"):
+                if self.options["storage-index"]:
+                    si = d.get("storage-index", None)
+                    if si:
+                        print >>stdout, quote_output(si, quotemarks=False)
+                elif self.options["verify-cap"]:
+                    vc = d.get("verifycap", None)
+                    if vc:
+                        print >>stdout, quote_output(vc, quotemarks=False)
+                elif self.options["repair-cap"]:
+                    vc = d.get("repaircap", None)
+                    if vc:
+                        print >>stdout, quote_output(vc, quotemarks=False)
+                else:
+                    print >>stdout, "%s %s" % (quote_output(d["cap"], quotemarks=False),
+                                               quote_path(d["path"], quotemarks=False))

 def manifest(options):
    return ManifestStreamer().run(options)
--- a/src/allmydata/scripts/tahoe_mkdir.py
+++ b/src/allmydata/scripts/tahoe_mkdir.py
@ -2,7 +2,7 @@
 import urllib
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
-from allmydata.util.stringutils import unicode_to_url
+from allmydata.util.stringutils import quote_output

 def mkdir(options):
    nodeurl = options['node-url']
@ -16,7 +16,7 @@ def mkdir(options):
        try:
            rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1

    if not where or not path:
@ -28,17 +28,17 @@ def mkdir(options):
            return rc
        new_uri = resp.read().strip()
        # emit its write-cap
-        print >>stdout, new_uri
+        print >>stdout, quote_output(new_uri, quotemarks=False)
        return 0

    # create a new directory at the given location
    if path.endswith("/"):
        path = path[:-1]
-    # path (in argv) must be "/".join([s.encode("utf-8") for s in segments])
+    # path must be "/".join([s.encode("utf-8") for s in segments])
    url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap),
-                                           urllib.quote(unicode_to_url(path)))
+                                           urllib.quote(path))
    resp = do_http("POST", url)
    check_http_error(resp, stderr)
    new_uri = resp.read().strip()
-    print >>stdout, new_uri
+    print >>stdout, quote_output(new_uri, quotemarks=False)
    return 0
--- a/src/allmydata/scripts/tahoe_mv.py
+++ b/src/allmydata/scripts/tahoe_mv.py
@ -4,7 +4,8 @@ import urllib
 import simplejson
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_error
+from allmydata.util.stringutils import to_str

 # this script is used for both 'mv' and 'ln'

@ -21,7 +22,7 @@ def mv(options, mode="move"):
    try:
        rootcap, from_path = get_alias(aliases, from_file, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    from_url = nodeurl + "uri/%s" % urllib.quote(rootcap)
    if from_path:
@ -29,16 +30,13 @@ def mv(options, mode="move"):
    # figure out the source cap
    data = urllib.urlopen(from_url + "?t=json").read()
    nodetype, attrs = simplejson.loads(data)
-    cap = attrs.get("rw_uri") or attrs["ro_uri"]
-    # simplejson sometimes returns unicode, but we know that it's really just
-    # an ASCII file-cap.
-    cap = str(cap)
+    cap = to_str(attrs.get("rw_uri") or attrs["ro_uri"])

    # now get the target
    try:
        rootcap, path = get_alias(aliases, to_file, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    to_url = nodeurl + "uri/%s" % urllib.quote(rootcap)
    if path:
@ -56,18 +54,17 @@ def mv(options, mode="move"):
        if status == 409:
            print >>stderr, "Error: You can't overwrite a directory with a file"
        else:
-            print >>stderr, "error, got %s %s" % (resp.status, resp.reason)
-            print >>stderr, resp.read()
+            print >>stderr, format_http_error("Error", resp)
            if mode == "move":
                print >>stderr, "NOT removing the original"
-        return
+        return 1

    if mode == "move":
        # now remove the original
        resp = do_http("DELETE", from_url)
        if not re.search(r'^2\d\d$', str(status)):
-            print >>stderr, "error, got %s %s" % (resp.status, resp.reason)
-            print >>stderr, resp.read()
+            print >>stderr, format_http_error("Error deleting original after move", resp)
+            return 2

    print >>stdout, "OK"
-    return
+    return 0
--- a/src/allmydata/scripts/tahoe_put.py
+++ b/src/allmydata/scripts/tahoe_put.py
@ -1,10 +1,10 @@

 from cStringIO import StringIO
-import os.path
 import urllib
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError
+from allmydata.util.stringutils import quote_output, open_unicode

 def put(options):
    """
@ -48,12 +48,12 @@ def put(options):
            try:
                rootcap, path = get_alias(aliases, to_file, DEFAULT_ALIAS)
            except UnknownAliasError, e:
-                print >>stderr, "error: %s" % e.args[0]
+                e.display(stderr)
                return 1
            if path.startswith("/"):
-                suggestion = to_file.replace("/", "", 1)
-                print >>stderr, "ERROR: The remote filename must not start with a slash"
-                print >>stderr, "Please try again, perhaps with:", suggestion
+                suggestion = to_file.replace(u"/", u"", 1)
+                print >>stderr, "Error: The remote filename must not start with a slash"
+                print >>stderr, "Please try again, perhaps with %s" % quote_output(suggestion)
                return 1
            url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
            if path:
@ -64,7 +64,7 @@ def put(options):
    if mutable:
        url += "?mutable=true"
    if from_file:
-        infileobj = open(os.path.expanduser(from_file), "rb")
+        infileobj = open_unicode(from_file, "rb")
    else:
        # do_http() can't use stdin directly: for one thing, we need a
        # Content-Length field. So we currently must copy it.
@ -76,10 +76,9 @@ def put(options):
    resp = do_http("PUT", url, infileobj)

    if resp.status in (200, 201,):
-        print >>stderr, "%s %s" % (resp.status, resp.reason)
-        print >>stdout, resp.read()
+        print >>stderr, format_http_success(resp)
+        print >>stdout, quote_output(resp.read(), quotemarks=False)
        return 0

-    print >>stderr, "error, got %s %s" % (resp.status, resp.reason)
-    print >>stderr, resp.read()
+    print >>stderr, format_http_error("Error", resp)
    return 1
--- a/src/allmydata/scripts/tahoe_rm.py
+++ b/src/allmydata/scripts/tahoe_rm.py
@ -1,6 +1,6 @@

 import urllib
-from allmydata.scripts.common_http import do_http
+from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                     UnknownAliasError

@ -19,7 +19,7 @@ def rm(options):
    try:
        rootcap, path = get_alias(aliases, where, DEFAULT_ALIAS)
    except UnknownAliasError, e:
-        print >>stderr, "error: %s" % e.args[0]
+        e.display(stderr)
        return 1
    assert path
    url = nodeurl + "uri/%s" % urllib.quote(rootcap)
@ -28,9 +28,8 @@ def rm(options):
    resp = do_http("DELETE", url)

    if resp.status in (200,):
-        print >>stdout, "%s %s" % (resp.status, resp.reason)
+        print >>stdout, format_http_success(resp)
        return 0

-    print >>stderr, "error, got %s %s" % (resp.status, resp.reason)
-    print >>stderr, resp.read()
+    print >>stderr, format_http_error("ERROR", resp)
    return 1
--- a/src/allmydata/scripts/tahoe_webopen.py
+++ b/src/allmydata/scripts/tahoe_webopen.py
@ -13,7 +13,7 @@ def webopen(options, opener=None):
        try:
            rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
        except UnknownAliasError, e:
-            print >>stderr, "error: %s" % e.args[0]
+            e.display(stderr)
            return 1
        if path == '/':
            path = ''
--- a/src/allmydata/test/common_util.py
+++ b/src/allmydata/test/common_util.py
@ -25,6 +25,13 @@ def flip_one_bit(s, offset=0, size=None):
    assert result != s, "Internal error -- flip_one_bit() produced the same string as its input: %s == %s" % (result, s)
    return result

+
+class ReallyEqualMixin:
+    def failUnlessReallyEqual(self, a, b, msg=None):
+        self.failUnlessEqual(a, b, msg=msg)
+        self.failUnlessEqual(type(a), type(b), msg=msg)
+
+
 class SignalMixin:
    # This class is necessary for any code which wants to use Processes
    # outside the usual reactor.run() environment. It is copied from
@ -131,7 +138,7 @@ class TestMixin(SignalMixin):
            if p.active():
                p.cancel()
            else:
-                print "WEIRNESS! pending timed call not active+!"
+                print "WEIRDNESS! pending timed call not active!"
        if required_to_quiesce and active:
            self.fail("Reactor was still active when it was required to be quiescent.")

--- a/src/allmydata/test/test_cli.py
+++ b/src/allmydata/test/test_cli.py
--- a/src/allmydata/test/test_consolidate.py
+++ b/src/allmydata/test/test_consolidate.py
@ -4,12 +4,23 @@ from cStringIO import StringIO
 import pickle
 from twisted.trial import unittest
 from allmydata.test.no_network import GridTestMixin
+from allmydata.test.common_util import ReallyEqualMixin
 from allmydata.util import fileutil
 from allmydata.scripts import runner, debug
 from allmydata.scripts.common import get_aliases
 from twisted.internet import defer, threads # CLI tests use deferToThread
 from allmydata.interfaces import IDirectoryNode

+have_sqlite3 = False
+try:
+    import sqlite3
+    sqlite3  # hush pyflakes
+    have_sqlite3 = True
+except ImportError:
+    pass
+else:
+    from allmydata.scripts import consolidate
+

 class CLITestMixin:
    def do_cli(self, verb, *args, **kwargs):
@ -30,7 +41,7 @@ class CLITestMixin:
        d.addCallback(_done)
        return d

-class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
+class Consolidate(GridTestMixin, CLITestMixin, ReallyEqualMixin, unittest.TestCase):

    def writeto(self, path, data):
        d = os.path.dirname(os.path.join(self.basedir, "home", path))
@ -46,8 +57,8 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
    def do_cli_good(self, verb, *args, **kwargs):
        d = self.do_cli(verb, *args, **kwargs)
        def _check((rc,out,err)):
-            self.failUnlessEqual(err, "", verb)
-            self.failUnlessEqual(rc, 0, verb)
+            self.failUnlessReallyEqual(err, "", verb)
+            self.failUnlessReallyEqual(rc, 0, verb)
            return out
        d.addCallback(_check)
        return d
@ -59,29 +70,14 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
        co.parseOptions(["--node-directory", self.get_clientdir(),
                         "--dbfile", "foo.db", "--backupfile", "backup", "--really",
                         "URI:DIR2:foo"])
-        self.failUnlessEqual(co["dbfile"], "foo.db")
-        self.failUnlessEqual(co["backupfile"], "backup")
+        self.failUnlessReallyEqual(co["dbfile"], "foo.db")
+        self.failUnlessReallyEqual(co["backupfile"], "backup")
        self.failUnless(co["really"])
-        self.failUnlessEqual(co.where, "URI:DIR2:foo")
+        self.failUnlessReallyEqual(co.where, u"URI:DIR2:foo")

-    def OFF_test_basic(self):
-        # rename this method to enable the test. I've disabled it because, in
-        # my opinion:
-        #
-        #  1: 'tahoe debug consolidate' is useful enough to include in trunk,
-        #     but not useful enough justify a lot of compatibility effort or
-        #     extra test time
-        #  2: it requires sqlite3; I did not bother to make it work with
-        #     pysqlite, nor did I bother making it fail gracefully when
-        #     sqlite3 is not available
-        #  3: this test takes 30 seconds to run on my workstation, and it likely
-        #     to take several minutes on the old slow dapper buildslave
-        #  4: I don't want other folks to see a SkipTest and wonder "oh no, what
-        #     did I do wrong to not allow this test to run"
-        #
-        # These may not be strong arguments: I welcome feedback. In particular,
-        # this command may be more suitable for a plugin of some sort, if we
-        # had plugins of some sort. -warner 12-Mar-09
+    def test_basic(self):
+        if not have_sqlite3:
+            raise unittest.SkipTest("'tahoe debug consolidate' is not supported because sqlite3 is not available.")

        self.basedir = "consolidate/Consolidate/basic"
        self.set_up_grid(num_clients=1)
@ -175,7 +171,7 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
        def _check_consolidate_output1(out):
            lines = out.splitlines()
            last = lines[-1]
-            self.failUnlessEqual(last.strip(),
+            self.failUnlessReallyEqual(last.strip(),
                                 "system done, dircounts: "
                                 "25/12 seen/used, 7 created, 2 as-is, 13 reused")
            self.failUnless(os.path.exists(dbfile))
@ -185,7 +181,7 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
            self.failUnless(u"fluxx" in backup["archives"])
            adata = backup["archives"]["fluxx"]
            kids = adata[u"children"]
-            self.failUnlessEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
+            self.failUnlessReallyEqual(str(kids[u"2009-03-01 01.01.01"][1][u"rw_uri"]),
                                 c("1-b-start"))
        d.addCallback(_check_consolidate_output1)
        d.addCallback(lambda ign:
@ -196,11 +192,11 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
        def _check_consolidate_output2(out):
            lines = out.splitlines()
            last = lines[-1]
-            self.failUnlessEqual(last.strip(),
+            self.failUnlessReallyEqual(last.strip(),
                                 "system done, dircounts: "
                                 "0/0 seen/used, 0 created, 0 as-is, 0 reused")
            backup = pickle.load(open(backupfile, "rb"))
-            self.failUnlessEqual(backup, self.first_backup)
+            self.failUnlessReallyEqual(backup, self.first_backup)
            self.failUnless(os.path.exists(backupfile + ".0"))
        d.addCallback(_check_consolidate_output2)

@ -214,14 +210,13 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
            #                            self.manifests[which][path])

            # last snapshot should be untouched
-            self.failUnlessEqual(c("7-b"), c("7-b-start"))
+            self.failUnlessReallyEqual(c("7-b"), c("7-b-start"))

            # first snapshot should be a readonly form of the original
-            from allmydata.scripts.tahoe_backup import readonly
-            self.failUnlessEqual(c("1-b-finish"), readonly(c("1-b-start")))
-            self.failUnlessEqual(c("1-bp-finish"), readonly(c("1-bp-start")))
-            self.failUnlessEqual(c("1-bps1-finish"), readonly(c("1-bps1-start")))
-            self.failUnlessEqual(c("1-bps2-finish"), readonly(c("1-bps2-start")))
+            self.failUnlessReallyEqual(c("1-b-finish"), consolidate.readonly(c("1-b-start")))
+            self.failUnlessReallyEqual(c("1-bp-finish"), consolidate.readonly(c("1-bp-start")))
+            self.failUnlessReallyEqual(c("1-bps1-finish"), consolidate.readonly(c("1-bps1-start")))
+            self.failUnlessReallyEqual(c("1-bps2-finish"), consolidate.readonly(c("1-bps2-start")))

            # new directories should be different than the old ones
            self.failIfEqual(c("1-b"), c("1-b-start"))
@ -246,33 +241,33 @@ class Consolidate(GridTestMixin, CLITestMixin, unittest.TestCase):
            self.failIfEqual(c("5-bps2"), c("5-bps2-start"))

            # snapshot 1 and snapshot 2 should be identical
-            self.failUnlessEqual(c("2-b"), c("1-b"))
+            self.failUnlessReallyEqual(c("2-b"), c("1-b"))

            # snapshot 3 modified a file underneath parent/
            self.failIfEqual(c("3-b"), c("2-b")) # 3 modified a file
            self.failIfEqual(c("3-bp"), c("2-bp"))
            # but the subdirs are the same
-            self.failUnlessEqual(c("3-bps1"), c("2-bps1"))
-            self.failUnlessEqual(c("3-bps2"), c("2-bps2"))
+            self.failUnlessReallyEqual(c("3-bps1"), c("2-bps1"))
+            self.failUnlessReallyEqual(c("3-bps2"), c("2-bps2"))

            # snapshot 4 should be the same as 2
-            self.failUnlessEqual(c("4-b"), c("2-b"))
-            self.failUnlessEqual(c("4-bp"), c("2-bp"))
-            self.failUnlessEqual(c("4-bps1"), c("2-bps1"))
-            self.failUnlessEqual(c("4-bps2"), c("2-bps2"))
+            self.failUnlessReallyEqual(c("4-b"), c("2-b"))
+            self.failUnlessReallyEqual(c("4-bp"), c("2-bp"))
+            self.failUnlessReallyEqual(c("4-bps1"), c("2-bps1"))
+            self.failUnlessReallyEqual(c("4-bps2"), c("2-bps2"))

            # snapshot 5 added a file under subdir1
            self.failIfEqual(c("5-b"), c("4-b"))
            self.failIfEqual(c("5-bp"), c("4-bp"))
            self.failIfEqual(c("5-bps1"), c("4-bps1"))
-            self.failUnlessEqual(c("5-bps2"), c("4-bps2"))
+            self.failUnlessReallyEqual(c("5-bps2"), c("4-bps2"))

            # snapshot 6 copied a directory-it should be shared
            self.failIfEqual(c("6-b"), c("5-b"))
            self.failIfEqual(c("6-bp"), c("5-bp"))
-            self.failUnlessEqual(c("6-bps1"), c("5-bps1"))
+            self.failUnlessReallyEqual(c("6-bps1"), c("5-bps1"))
            self.failIfEqual(c("6-bps2"), c("5-bps2"))
-            self.failUnlessEqual(c("6-bps2c1"), c("6-bps1"))
+            self.failUnlessReallyEqual(c("6-bps2c1"), c("6-bps1"))

        d.addCallback(check_consolidation)

--- a/src/allmydata/test/test_sftp.py
+++ b/src/allmydata/test/test_sftp.py
@ -30,10 +30,11 @@ from allmydata.util.consumer import download_to_data
 from allmydata.immutable import upload
 from allmydata.test.no_network import GridTestMixin
 from allmydata.test.common import ShouldFailMixin
+from allmydata.test.common_util import ReallyEqualMixin

 timeout = 240

-class Handler(GridTestMixin, ShouldFailMixin, unittest.TestCase):
+class Handler(GridTestMixin, ShouldFailMixin, ReallyEqualMixin, unittest.TestCase):
    """This is a no-network unit test of the SFTPUserHandler and the abstractions it uses."""

    if not have_pycrypto:
--- a/src/allmydata/test/test_stringutils.py
+++ b/src/allmydata/test/test_stringutils.py
@ -14,7 +14,7 @@ if __name__ == "__main__":
    import tempfile
    import shutil
    import platform
-    
+
    if len(sys.argv) != 2:
        print "Usage: %s lumière" % sys.argv[0]
        sys.exit(1)
@ -22,10 +22,12 @@ if __name__ == "__main__":
    print
    print "class MyWeirdOS(StringUtils, unittest.TestCase):"
    print "    uname = '%s'" % ' '.join(platform.uname())
-    print "    argv = %s" % repr(sys.argv[1])
+    if sys.platform != "win32":
+        print "    argv = %s" % repr(sys.argv[1])
    print "    platform = '%s'" % sys.platform
-    print "    filesystemencoding = '%s'" % sys.getfilesystemencoding()
-    print "    stdoutencoding = '%s'" % sys.stdout.encoding
+    print "    filesystem_encoding = '%s'" % sys.getfilesystemencoding()
+    print "    output_encoding = '%s'" % sys.stdout.encoding
+    print "    argv_encoding = '%s'" % (sys.platform == "win32" and 'utf-8' or sys.stdout.encoding)

    try:
        tmpdir = tempfile.mkdtemp()
@ -48,47 +50,65 @@ if __name__ == "__main__":

 from twisted.trial import unittest
 from mock import patch
-import locale, sys
+import sys

+from allmydata.test.common_util import ReallyEqualMixin
 from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
-    unicode_to_stdout, unicode_platform, listdir_unicode, open_unicode, \
-    FilenameEncodingError, get_term_encoding
+    unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \
+    FilenameEncodingError, get_output_encoding, _reload
+
 from twisted.python import usage

-class StringUtilsErrors(unittest.TestCase):
-    @patch('sys.stdout')
-    def test_get_term_encoding(self, mock):
-        mock.encoding = None
+class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
+    def tearDown(self):
+        _reload()

-        self.failUnlessEqual(get_term_encoding().lower(), locale.getpreferredencoding().lower())
+    @patch('sys.stdout')
+    def test_get_output_encoding(self, mock_stdout):
+        mock_stdout.encoding = 'UTF-8'
+        _reload()
+        self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
+
+        mock_stdout.encoding = 'cp65001'
+        _reload()
+        self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
+
+        mock_stdout.encoding = 'koi8-r'
+        _reload()
+        self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
+
+        mock_stdout.encoding = 'nonexistent_encoding'
+        self.failUnlessRaises(AssertionError, _reload)
+
+        # TODO: mock_stdout.encoding = None

    @patch('sys.stdout')
    def test_argv_to_unicode(self, mock):
        mock.encoding = 'utf-8'
+        _reload()

        self.failUnlessRaises(usage.UsageError,
                              argv_to_unicode,
                              u'lumière'.encode('latin1'))

-    def test_unicode_to_url(self):
-        pass
-
    @patch('sys.stdout')
-    def test_unicode_to_stdout(self, mock):
+    def test_unicode_to_output(self, mock):
        # Encoding koi8-r cannot represent 'è'
        mock.encoding = 'koi8-r'
-        self.failUnlessEqual(unicode_to_stdout(u'lumière'), 'lumi?re')
+        _reload()
+        self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, u'lumière')

    @patch('os.listdir')
    def test_unicode_normalization(self, mock):
-        # Pretend to run on an Unicode platform such as Windows
+        # Pretend to run on an Unicode platform
        orig_platform = sys.platform
-        sys.platform = 'win32'
-
-        mock.return_value = [u'A\u0308rtonwall.mp3']
-        self.failUnlessEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
-
-        sys.platform = orig_platform
+        try:
+            sys.platform = 'darwin'
+            mock.return_value = [u'A\u0308rtonwall.mp3']
+            _reload()
+            self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
+        finally:
+            sys.platform = orig_platform

 # The following tests applies only to platforms which don't store filenames as
 # Unicode entities on the filesystem.
@ -100,18 +120,19 @@ class StringUtilsNonUnicodePlatform(unittest.TestCase):

    def tearDown(self):
        sys.platform = self.original_platform
+        _reload()

    @patch('sys.getfilesystemencoding')
    @patch('os.listdir')
    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
-        # What happen if a latin1-encoded filenames is encountered on an UTF-8
+        # What happens if latin1-encoded filenames are encountered on an UTF-8
        # filesystem?
        mock_listdir.return_value = [
            u'lumière'.encode('utf-8'),
            u'lumière'.encode('latin1')]

        mock_getfilesystemencoding.return_value = 'utf-8'
-       
+        _reload()
        self.failUnlessRaises(FilenameEncodingError,
                              listdir_unicode,
                              u'/dummy')
@ -119,6 +140,7 @@ class StringUtilsNonUnicodePlatform(unittest.TestCase):
        # We're trying to list a directory whose name cannot be represented in
        # the filesystem encoding.  This should fail.
        mock_getfilesystemencoding.return_value = 'ascii'
+        _reload()
        self.failUnlessRaises(FilenameEncodingError,
                              listdir_unicode,
                              u'/lumière')
@ -126,12 +148,12 @@ class StringUtilsNonUnicodePlatform(unittest.TestCase):
    @patch('sys.getfilesystemencoding')
    def test_open_unicode(self, mock):
        mock.return_value = 'ascii'
-
+        _reload()
        self.failUnlessRaises(FilenameEncodingError,
                              open_unicode,
-                              u'lumière')
+                              u'lumière', 'rb')

-class StringUtils:
+class StringUtils(ReallyEqualMixin):
    def setUp(self):
        # Mock sys.platform because unicode_platform() uses it
        self.original_platform = sys.platform
@ -139,29 +161,30 @@ class StringUtils:

    def tearDown(self):
        sys.platform = self.original_platform
+        _reload()

    @patch('sys.stdout')
    def test_argv_to_unicode(self, mock):
        if 'argv' not in dir(self):
-            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
-
-        mock.encoding = self.stdoutencoding
+            return

+        mock.encoding = self.output_encoding
        argu = u'lumière'
        argv = self.argv
-
-        self.failUnlessEqual(argv_to_unicode(argv), argu)
+        _reload()
+        self.failUnlessReallyEqual(argv_to_unicode(argv), argu)

    def test_unicode_to_url(self):
-        self.failUnless(unicode_to_url(u'lumière'), u'lumière'.encode('utf-8'))
+        self.failUnless(unicode_to_url(u'lumière'), "lumi\xc3\xa8re")

    @patch('sys.stdout')
-    def test_unicode_to_stdout(self, mock):
-        if 'argv' not in dir(self):
-            raise unittest.SkipTest("There's no way to pass non-ASCII arguments in CLI on this (mocked) platform")
+    def test_unicode_to_output(self, mock):
+        if 'output' not in dir(self):
+            return

-        mock.encoding = self.stdoutencoding
-        self.failUnlessEqual(unicode_to_stdout(u'lumière'), self.argv)
+        mock.encoding = self.output_encoding
+        _reload()
+        self.failUnlessReallyEqual(unicode_to_output(u'lumière'), self.output)

    def test_unicode_platform(self):
        matrix = {
@ -171,113 +194,119 @@ class StringUtils:
          'darwin': True,
        }

-        self.failUnlessEqual(unicode_platform(), matrix[self.platform])
+        _reload()
+        self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
 
    @patch('sys.getfilesystemencoding')
    @patch('os.listdir')
    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
        if 'dirlist' not in dir(self):
-            raise unittest.SkipTest("No way to write non-ASCII filenames on this system")
+            return

        mock_listdir.return_value = self.dirlist
-        mock_getfilesystemencoding.return_value = self.filesystemencoding
+        mock_getfilesystemencoding.return_value = self.filesystem_encoding
       
+        _reload()
        filenames = listdir_unicode(u'/dummy')

        for fname in TEST_FILENAMES:
            self.failUnless(isinstance(fname, unicode))
-
-            if fname not in filenames:
-                self.fail("Cannot find %r in %r" % (fname, filenames))
+            self.failUnlessIn(fname, filenames)

    @patch('sys.getfilesystemencoding')
    @patch('__builtin__.open')
    def test_open_unicode(self, mock_open, mock_getfilesystemencoding):
-        mock_getfilesystemencoding.return_value = self.filesystemencoding
-
+        mock_getfilesystemencoding.return_value = self.filesystem_encoding
        fn = u'/dummy_directory/lumière.txt'

+        _reload()
        try:
-            open_unicode(fn)
+            open_unicode(fn, 'rb')
        except FilenameEncodingError:
-            raise unittest.SkipTest("Cannot represent test filename on this (mocked) platform")
+            return

        # Pass Unicode string to open() on Unicode platforms
        if unicode_platform():
-            mock_open.assert_called_with(fn, 'r')
+            mock_open.assert_called_with(fn, 'rb')

        # Pass correctly encoded bytestrings to open() on non-Unicode platforms
        else:
-            fn_bytestring = fn.encode(self.filesystemencoding)
-            mock_open.assert_called_with(fn_bytestring, 'r')
+            fn_bytestring = fn.encode(self.filesystem_encoding)
+            mock_open.assert_called_with(fn_bytestring, 'rb')
+

 class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
+    output = 'lumi\xc3\xa8re'
    argv = 'lumi\xc3\xa8re'
    platform = 'linux2'
-    filesystemencoding = 'UTF-8'
-    stdoutencoding = 'UTF-8'
+    filesystem_encoding = 'UTF-8'
+    output_encoding = 'UTF-8'
+    argv_encoding = 'UTF-8'
    dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']

-
 class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
    uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
+    output = 'lumi\xe8re'
    argv = 'lumi\xe8re'
    platform = 'linux2'
-    filesystemencoding = 'ISO-8859-1'
-    stdoutencoding = 'ISO-8859-1'
+    filesystem_encoding = 'ISO-8859-1'
+    output_encoding = 'ISO-8859-1'
+    argv_encoding = 'ISO-8859-1'
    dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']

 class WindowsXP(StringUtils, unittest.TestCase):
    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
-    argv = 'lumi\xe8re'
+    output = 'lumi\x8are'
+    argv = 'lumi\xc3\xa8re'
    platform = 'win32'
-    filesystemencoding = 'mbcs'
-    stdoutencoding = 'cp850'
+    filesystem_encoding = 'mbcs'
+    output_encoding = 'cp850'
+    argv_encoding = 'utf-8'
    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']

-    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
-
 class WindowsXP_UTF8(StringUtils, unittest.TestCase):
    uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
-    argv = 'lumi\xe8re'
+    output = 'lumi\xc3\xa8re'
+    argv = 'lumi\xc3\xa8re'
    platform = 'win32'
-    filesystemencoding = 'mbcs'
-    stdoutencoding = 'cp65001'
+    filesystem_encoding = 'mbcs'
+    output_encoding = 'cp65001'
+    argv_encoding = 'utf-8'
    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']

-    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
-
 class WindowsVista(StringUtils, unittest.TestCase):
    uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
-    argv = 'lumi\xe8re'
+    output = 'lumi\x8are'
+    argv = 'lumi\xc3\xa8re'
    platform = 'win32'
-    filesystemencoding = 'mbcs'
-    stdoutencoding = 'cp850'
+    filesystem_encoding = 'mbcs'
+    output_encoding = 'cp850'
+    argv_encoding = 'utf-8'
    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']

-    todo = "Unicode arguments on the command-line is not yet supported under Windows, see bug #565."
-
 class MacOSXLeopard(StringUtils, unittest.TestCase):
    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
+    output = 'lumi\xc3\xa8re'
    argv = 'lumi\xc3\xa8re'
    platform = 'darwin'
-    filesystemencoding = 'utf-8'
-    stdoutencoding = 'UTF-8'
+    filesystem_encoding = 'utf-8'
+    output_encoding = 'UTF-8'
+    argv_encoding = 'UTF-8'
    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']

 class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
    uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
-    #argv = 'lumiere'
    platform = 'darwin'
-    filesystemencoding = 'utf-8'
-    stdoutencoding = 'US-ASCII'
+    filesystem_encoding = 'utf-8'
+    output_encoding = 'US-ASCII'
+    argv_encoding = 'US-ASCII'
    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']

 class OpenBSD(StringUtils, unittest.TestCase):
    uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
-    #argv = 'lumiere'
    platform = 'openbsd4'
-    filesystemencoding = '646'
-    stdoutencoding = '646'
+    filesystem_encoding = '646'
+    output_encoding = '646'
+    argv_encoding = '646'
    # Oops, I cannot write filenames containing non-ascii characters
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@ -1416,7 +1416,7 @@ class SystemTest(SystemTestMixin, unittest.TestCase):
        d.addCallback(run, "list-aliases")
        def _check_aliases_1((out,err)):
            self.failUnlessEqual(err, "")
-            self.failUnlessEqual(out, "tahoe: %s\n" % private_uri)
+            self.failUnlessEqual(out.strip(" \n"), "tahoe: %s" % private_uri)
        d.addCallback(_check_aliases_1)

        # now that that's out of the way, remove root_dir.cap and work with
--- a/src/allmydata/util/stringutils.py
+++ b/src/allmydata/util/stringutils.py
@ -5,125 +5,235 @@ unicode and back.

 import sys
 import os
+import re
 import unicodedata
 from allmydata.util.assertutil import precondition
 from twisted.python import usage
 import locale

-def get_term_encoding():
-    """
-    Returns expected encoding for writing to the terminal and reading
-    arguments from the command-line.
-    """

-    if sys.stdout.encoding:
-        return sys.stdout.encoding
+def _canonical_encoding(encoding):
+    if encoding is None:
+        encoding = 'utf-8'
+    encoding = encoding.lower()
+    if encoding == "cp65001":
+        encoding = 'utf-8'
+    elif encoding == "us-ascii" or encoding == "646":
+        encoding = 'ascii'
+
+    # sometimes Python returns an encoding name that it doesn't support for conversion
+    # fail early if this happens
+    try:
+        u"test".encode(encoding)
+    except LookupError:
+        raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,))
+
+    return encoding
+
+filesystem_encoding = None
+output_encoding = None
+argv_encoding = None
+is_unicode_platform = False
+
+def _reload():
+    global filesystem_encoding, output_encoding, argv_encoding, is_unicode_platform
+
+    filesystem_encoding = _canonical_encoding(sys.getfilesystemencoding())
+    output_encoding = _canonical_encoding(sys.stdout.encoding or locale.getpreferredencoding())
+    if sys.platform == 'win32':
+        # arguments are converted to utf-8 in windows/tahoe.py
+        argv_encoding = 'utf-8'
    else:
-        return locale.getpreferredencoding()
+        argv_encoding = output_encoding
+    is_unicode_platform = sys.platform in ["win32", "darwin"]
+
+_reload()
+
+
+def get_filesystem_encoding():
+    """
+    Returns expected encoding for local filenames.
+    """
+    return filesystem_encoding
+
+def get_output_encoding():
+    """
+    Returns expected encoding for writing to stdout or stderr.
+    """
+    return output_encoding
+
+def get_argv_encoding():
+    """
+    Returns expected encoding for command-line arguments.
+    """
+    return argv_encoding

 def argv_to_unicode(s):
    """
-    Decode given argv element to unicode.
+    Decode given argv element to unicode. If this fails, raise a UsageError.
    """
-    # Try to decode the command-line argument with the encoding returned by
-    # get_term_encoding(), if this fails print an error message to the user.
-
    precondition(isinstance(s, str), s)

    try:
-        return unicode(s, get_term_encoding())
+        return unicode(s, argv_encoding)
    except UnicodeDecodeError:
-        raise usage.UsageError("Argument '%s' cannot be decoded as %s." %
-                               (s, get_term_encoding()))
+        raise usage.UsageError("Argument %s cannot be decoded as %s." %
+                               (quote_output(s), argv_encoding))

 def unicode_to_url(s):
    """
    Encode an unicode object used in an URL.
    """
-    # According to RFC 2718, non-ascii characters in url's must be UTF-8 encoded.
+    # According to RFC 2718, non-ascii characters in URLs must be UTF-8 encoded.

-    precondition(isinstance(s, unicode), s)
+    # FIXME
+    return to_str(s)
+    #precondition(isinstance(s, unicode), s)
+    #return s.encode('utf-8')
+
+def to_str(s):
+    if s is None or isinstance(s, str):
+        return s
    return s.encode('utf-8')

-def unicode_to_stdout(s):
-    """
-    Encode an unicode object for representation on stdout.
-    """
+def to_argv(s):
+    if isinstance(s, str):
+        return s
+    return s.encode(argv_encoding)

+PRINTABLE_ASCII = re.compile(r'^[ -~\n\r]*$', re.DOTALL)
+PRINTABLE_8BIT = re.compile(r'^[ -&(-~\n\r\x80-\xFF]*$', re.DOTALL)
+
+def is_printable_ascii(s):
+    return PRINTABLE_ASCII.search(s) is not None
+
+def unicode_to_output(s):
+    """
+    Encode an unicode object for representation on stdout or stderr.
+    """
    precondition(isinstance(s, unicode), s)
-    return s.encode(get_term_encoding(), 'replace')
+
+    try:
+        out = s.encode(output_encoding)
+    except UnicodeEncodeError:
+        raise UnicodeEncodeError(output_encoding, s, 0, 0,
+                                 "A string could not be encoded as %s for output to the terminal:\n%r" %
+                                 (output_encoding, repr(s)))
+
+    if PRINTABLE_8BIT.search(out) is None:
+        raise UnicodeEncodeError(output_encoding, s, 0, 0,
+                                 "A string encoded as %s for output to the terminal contained unsafe bytes:\n%r" %
+                                 (output_encoding, repr(s)))
+    return out
+
+def quote_output(s, quotemarks=True):
+    """
+    Encode either a Unicode string or a UTF-8-encoded bytestring for representation
+    on stdout or stderr, tolerating errors. If 'quotemarks' is True, the string is
+    always surrounded by single quotes; otherwise, it is quoted only if necessary to
+    avoid ambiguity or control bytes in the output.
+    """
+    precondition(isinstance(s, (str, unicode)), s)
+
+    if isinstance(s, str):
+        try:
+            s = s.decode('utf-8')
+        except UnicodeDecodeError:
+            return 'b' + repr(s)
+
+    try:
+        out = s.encode(output_encoding)
+    except UnicodeEncodeError:
+        return repr(s)
+
+    if PRINTABLE_8BIT.search(out) is None:
+        return repr(out)
+
+    if quotemarks:
+        return "'" + out.replace("\\", "\\\\").replace("'", "\'") + "'"
+    else:
+        return out
+
+def quote_path(path, quotemarks=True):
+    return quote_output("/".join(map(to_str, path)), quotemarks=quotemarks)
+

 def unicode_platform():
    """
-    Does the current platform handle Unicode filenames natively ?
+    Does the current platform handle Unicode filenames natively?
    """
-
-    return sys.platform in ('win32', 'darwin')
+    return is_unicode_platform

 class FilenameEncodingError(Exception):
    """
    Filename cannot be encoded using the current encoding of your filesystem
    (%s). Please configure your locale correctly or rename this file.
    """
-
    pass

-def listdir_unicode_unix(path):
+def listdir_unicode_fallback(path):
    """
-    This function emulates an Unicode API under Unix similar to one available
+    This function emulates a fallback Unicode API similar to one available
    under Windows or MacOS X.

    If badly encoded filenames are encountered, an exception is raised.
    """
    precondition(isinstance(path, unicode), path)

-    encoding = sys.getfilesystemencoding()
    try:
-        byte_path = path.encode(encoding)
+        byte_path = path.encode(filesystem_encoding)
    except UnicodeEncodeError:
        raise FilenameEncodingError(path)

    try:
-        return [unicode(fn, encoding) for fn in os.listdir(byte_path)]
+        return [unicode(fn, filesystem_encoding) for fn in os.listdir(byte_path)]
    except UnicodeDecodeError:
        raise FilenameEncodingError(fn)

-def listdir_unicode(path, encoding = None):
+def listdir_unicode(path):
    """
    Wrapper around listdir() which provides safe access to the convenient
-    Unicode API even under Unix.
+    Unicode API even under platforms that don't provide one natively.
    """
-
    precondition(isinstance(path, unicode), path)

    # On Windows and MacOS X, the Unicode API is used
-    if unicode_platform():
-        dirlist = os.listdir(path)
-
    # On other platforms (ie. Unix systems), the byte-level API is used
+
+    if is_unicode_platform:
+        dirlist = os.listdir(path)
    else:
-        dirlist = listdir_unicode_unix(path)
+        dirlist = listdir_unicode_fallback(path)

    # Normalize the resulting unicode filenames
    #
-    # This prevents different OS from generating non-equal unicode strings for
+    # This prevents different OSes from generating non-equal unicode strings for
    # the same filename representation
    return [unicodedata.normalize('NFC', fname) for fname in dirlist]

-def open_unicode(path, mode='r'):
+def open_unicode(path, mode):
    """
    Wrapper around open() which provides safe access to the convenient Unicode
    API even under Unix.
    """
-
    precondition(isinstance(path, unicode), path)

-    if unicode_platform():
-        return open(path, mode)
+    if is_unicode_platform:
+        return open(os.path.expanduser(path), mode)
    else:
-        encoding = sys.getfilesystemencoding()
-
        try:
-            return open(path.encode(encoding), mode)
+            return open(os.path.expanduser(path.encode(filesystem_encoding)), mode)
        except UnicodeEncodeError:
            raise FilenameEncodingError(path)
+
+def abspath_expanduser_unicode(path):
+    precondition(isinstance(path, unicode), path)
+
+    if is_unicode_platform:
+        return os.path.abspath(os.path.expanduser(path))
+    else:
+        try:
+            pathstr = path.encode(filesystem_encoding)
+            return os.path.abspath(os.path.expanduser(pathstr)).decode(filesystem_encoding)
+        except (UnicodeEncodeError, UnicodeDecodeError):
+            raise FilenameEncodingError(path)
--- a/windows/tahoe.py
+++ b/windows/tahoe.py
@ -3,5 +3,17 @@ pkgresutil.install() # this is done before nevow is imported by depends
 import depends # import dependencies so that py2exe finds them
 _junk = depends # appease pyflakes

+import sys
+from ctypes import WINFUNCTYPE, POINTER, byref, c_wchar_p, c_int, windll
 from allmydata.scripts import runner
-runner.run(install_node_control=False)
+
+GetCommandLineW = WINFUNCTYPE(c_wchar_p)(("GetCommandLineW", windll.kernel32))
+CommandLineToArgvW = WINFUNCTYPE(POINTER(c_wchar_p), c_wchar_p, POINTER(c_int)) \
+                         (("CommandLineToArgvW", windll.shell32))
+
+argc = c_int(0)
+argv = CommandLineToArgvW(GetCommandLineW(), byref(argc))
+argv_utf8 = [argv[i].encode('utf-8') for i in xrange(1, argc.value)]
+
+rc = runner(argv_utf8, install_node_control=False)
+sys.exit(rc)