From 5ef04ef59e8a5ae5756d86b84308cec9d5120e17 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Mon, 26 Sep 2016 20:42:42 -0700 Subject: [PATCH] add hashutil.permute_server_hash which uses SHA1 to combine the file's storage index (known as "peer selection index" in this context) and each server's "server permutation seed". This is the only thing in tahoe that uses SHA1. With this change, we stop importing sha1 from anywhere else. --- src/allmydata/storage_client.py | 5 +++-- src/allmydata/test/no_network.py | 4 ++-- src/allmydata/test/test_util.py | 5 +++++ src/allmydata/util/hashutil.py | 12 ++++-------- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index b02f88fb5..64a6985f1 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -40,7 +40,7 @@ from allmydata.util import log, base32 from allmydata.util.assertutil import precondition from allmydata.util.observer import ObserverList from allmydata.util.rrefutil import add_version_to_remote_reference -from allmydata.util.hashutil import sha1 +from allmydata.util.hashutil import permute_server_hash # who is responsible for de-duplication? # both? @@ -200,7 +200,8 @@ class StorageFarmBroker(service.MultiService): def _permuted(server): seed = server.get_permutation_seed() is_unpreferred = server not in preferred_servers - return (is_unpreferred, sha1(peer_selection_index + seed).digest()) + return (is_unpreferred, + permute_server_hash(peer_selection_index, seed)) return sorted(connected_servers, key=_permuted) def get_all_serverids(self): diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index df3c88137..56ccfa5a9 100644 --- a/src/allmydata/test/no_network.py +++ b/src/allmydata/test/no_network.py @@ -27,7 +27,7 @@ from allmydata import uri as tahoe_uri from allmydata.client import Client from allmydata.storage.server import StorageServer, storage_index_to_dir from allmydata.util import fileutil, idlib, hashutil -from allmydata.util.hashutil import sha1 +from allmydata.util.hashutil import permute_server_hash from allmydata.test.common_web import HTTPClientGETFactory from allmydata.interfaces import IStorageBroker, IServer from .common import TEST_RSA_KEY_SIZE @@ -169,7 +169,7 @@ class NoNetworkStorageBroker: def get_servers_for_psi(self, peer_selection_index): def _permuted(server): seed = server.get_permutation_seed() - return sha1(peer_selection_index + seed).digest() + return permute_server_hash(peer_selection_index, seed) return sorted(self.get_connected_servers(), key=_permuted) def get_connected_servers(self): return self.client._servers diff --git a/src/allmydata/test/test_util.py b/src/allmydata/test/test_util.py index 470a5d822..b593dc588 100644 --- a/src/allmydata/test/test_util.py +++ b/src/allmydata/test/test_util.py @@ -943,6 +943,11 @@ class HashUtilTests(unittest.TestCase): self._testknown(hashutil.ssk_readkey_data_hash, "73wsaldnvdzqaf7v4pzbr2ae5a", "iv", "rk") self._testknown(hashutil.ssk_storage_index_hash, "j7icz6kigb6hxrej3tv4z7ayym", "") + self._testknown(hashutil.permute_server_hash, + "kb4354zeeurpo3ze5e275wzbynm6hlap", # b32(expected) + "SI", # peer selection index == storage_index + base32.a2b("u33m4y7klhz3bypswqkozwetvabelhxt"), # seed + ) class Abbreviate(unittest.TestCase): def test_time(self): diff --git a/src/allmydata/util/hashutil.py b/src/allmydata/util/hashutil.py index 81522ad5d..3c57cdf6b 100644 --- a/src/allmydata/util/hashutil.py +++ b/src/allmydata/util/hashutil.py @@ -1,15 +1,8 @@ from pycryptopp.hash.sha256 import SHA256 import os +import hashlib from allmydata.util.netstring import netstring -try: - import hashlib - sha1 = hashlib.sha1 -except ImportError: - # hashlib was added in Python 2.5 - import sha - sha1 = sha.new - # Be very very cautious when modifying this file. Almost any change will # cause a compatibility break, invalidating all outstanding URIs and making # any previously uploaded files become inaccessible. BE CONSERVATIVE AND TEST @@ -209,3 +202,6 @@ def timing_safe_compare(a, b): BACKUPDB_DIRHASH_TAG = "allmydata_backupdb_dirhash_v1" def backupdb_dirhash(contents): return tagged_hash(BACKUPDB_DIRHASH_TAG, contents) + +def permute_server_hash(peer_selection_index, server_permutation_seed): + return hashlib.sha1(peer_selection_index + server_permutation_seed).digest()