From a555c13f3873ac4aad53c61af544db7a2ff17d4b Mon Sep 17 00:00:00 2001 From: sgerodes Date: Wed, 8 Jan 2025 16:42:10 +0100 Subject: [PATCH 01/10] fix(parse_duration): resolve error when parsing durations in seconds - Added support for parsing durations specified in seconds (e.g., "10s"). - Fixed an issue where configuring seconds previously resulted in errors due to missing elif statement. --- src/allmydata/util/time_format.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/allmydata/util/time_format.py b/src/allmydata/util/time_format.py index fb4d735ab..c65072c0d 100644 --- a/src/allmydata/util/time_format.py +++ b/src/allmydata/util/time_format.py @@ -53,12 +53,14 @@ def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P\d{4}) def parse_duration(s): orig = s unit = None + SECOND = 1 DAY = 24*60*60 MONTH = 31*DAY YEAR = 365*DAY if s.endswith("s"): + unit = SECOND s = s[:-1] - if s.endswith("day"): + elif s.endswith("day"): unit = DAY s = s[:-len("day")] elif s.endswith("month"): From 875f7fa47e8832d6ab1d9d136b158817cf71ea54 Mon Sep 17 00:00:00 2001 From: sgerodes Date: Wed, 8 Jan 2025 16:50:29 +0100 Subject: [PATCH 02/10] test(parse_duration): add tests for seconds --- src/allmydata/test/test_time_format.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/allmydata/test/test_time_format.py b/src/allmydata/test/test_time_format.py index f3b9a8990..373b380a4 100644 --- a/src/allmydata/test/test_time_format.py +++ b/src/allmydata/test/test_time_format.py @@ -81,6 +81,8 @@ class TimeFormat(unittest.TestCase, TimezoneMixin): DAY = 24*60*60 MONTH = 31*DAY YEAR = 365*DAY + self.failUnlessEqual(p("1s"), 1) + self.failUnlessEqual(p("86400s"), DAY) self.failUnlessEqual(p("1 day"), DAY) self.failUnlessEqual(p("2 days"), 2*DAY) self.failUnlessEqual(p("3 months"), 3*MONTH) From c09a0ebeb2d560f3fc8aea6e3a377749590d56f3 Mon Sep 17 00:00:00 2001 From: sgerodes Date: Wed, 8 Jan 2025 16:56:03 +0100 Subject: [PATCH 03/10] feat(parse_duration): improve the error message --- src/allmydata/util/time_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/time_format.py b/src/allmydata/util/time_format.py index c65072c0d..fd86376d6 100644 --- a/src/allmydata/util/time_format.py +++ b/src/allmydata/util/time_format.py @@ -73,7 +73,7 @@ def parse_duration(s): unit = YEAR s = s[:-len("YEAR")] else: - raise ValueError("no unit (like day, month, or year) in '%s'" % orig) + raise ValueError("no unit (like s, day, mo, month, or year) in '%s'" % orig) s = s.strip() return int(s) * unit From 14bf5adadb4b5a070a069be78657b80b3e97aa98 Mon Sep 17 00:00:00 2001 From: sgerodes Date: Fri, 10 Jan 2025 00:48:00 +0100 Subject: [PATCH 04/10] refactor(time_format): enhance duration parsing with Enum and dynamic regex - Introduced `ParseDurationUnitFormat` Enum for cleaner unit handling. - Improved `parse_duration` to support case-insensitive matching and dynamic error messages. - Added detailed docstrings for better clarity and usability. - Refactored and added testcases --- src/allmydata/test/test_time_format.py | 32 +++++++--- src/allmydata/util/time_format.py | 83 +++++++++++++++++++------- 2 files changed, 86 insertions(+), 29 deletions(-) diff --git a/src/allmydata/test/test_time_format.py b/src/allmydata/test/test_time_format.py index 373b380a4..f89df10d2 100644 --- a/src/allmydata/test/test_time_format.py +++ b/src/allmydata/test/test_time_format.py @@ -81,24 +81,42 @@ class TimeFormat(unittest.TestCase, TimezoneMixin): DAY = 24*60*60 MONTH = 31*DAY YEAR = 365*DAY + + # seconds self.failUnlessEqual(p("1s"), 1) + self.failUnlessEqual(p("12 s"), 12) + self.failUnlessEqual(p("333second"), 333) + self.failUnlessEqual(p(" 333 second "), 333) + self.failUnlessEqual(p("5 seconds"), 5) + self.failUnlessEqual(p("60 SECONDS"), 60) self.failUnlessEqual(p("86400s"), DAY) + + # days self.failUnlessEqual(p("1 day"), DAY) self.failUnlessEqual(p("2 days"), 2*DAY) - self.failUnlessEqual(p("3 months"), 3*MONTH) - self.failUnlessEqual(p("4 mo"), 4*MONTH) - self.failUnlessEqual(p("5 years"), 5*YEAR) - e = self.failUnlessRaises(ValueError, p, "123") - self.failUnlessIn("no unit (like day, month, or year) in '123'", - str(e)) + self.failUnlessEqual(p("5days"), 5*DAY) self.failUnlessEqual(p("7days"), 7*DAY) self.failUnlessEqual(p("31day"), 31*DAY) self.failUnlessEqual(p("60 days"), 60*DAY) + self.failUnlessEqual(p("70 DAYS"), 70*DAY) + + # months + self.failUnlessEqual(p("4 mo"), 4*MONTH) self.failUnlessEqual(p("2mo"), 2*MONTH) self.failUnlessEqual(p("3 month"), 3*MONTH) + self.failUnlessEqual(p("3 months"), 3*MONTH) + + # years + self.failUnlessEqual(p("5 years"), 5*YEAR) + self.failUnlessEqual(p("8 year"), 8*YEAR) self.failUnlessEqual(p("2years"), 2*YEAR) + self.failUnlessEqual(p("11YEARS"), 11*YEAR) + + # errors + e = self.failUnlessRaises(ValueError, p, "123") + self.failUnlessIn("No valid unit in",str(e)) e = self.failUnlessRaises(ValueError, p, "2kumquats") - self.failUnlessIn("no unit (like day, month, or year) in '2kumquats'", str(e)) + self.failUnlessIn("No valid unit in", str(e)) def test_parse_date(self): p = time_format.parse_date diff --git a/src/allmydata/util/time_format.py b/src/allmydata/util/time_format.py index fd86376d6..4404b3f51 100644 --- a/src/allmydata/util/time_format.py +++ b/src/allmydata/util/time_format.py @@ -6,8 +6,26 @@ http://www.cl.cam.ac.uk/~mgk25/iso-time.html """ import calendar, datetime, re, time - from typing import Optional +from enum import Enum + + +class ParseDurationUnitFormat(Enum): + SECONDS0 = "s" + SECONDS1 = "second" + SECONDS2 = "seconds" + DAYS0 = "day" + DAYS1 = "days" + MONTHS0 = "mo" + MONTHS1 = "month" + MONTHS2 = "months" + YEARS0 = "year" + YEARS1 = "years" + + @classmethod + def list_values(cls): + return list(map(lambda c: c.value, cls)) + def format_time(t): return time.strftime("%Y-%m-%d %H:%M:%S", t) @@ -50,32 +68,53 @@ def iso_utc_time_to_seconds(isotime, _conversion_re=re.compile(r"(?P\d{4}) return calendar.timegm( (year, month, day, hour, minute, second, 0, 1, 0) ) + subsecfloat + def parse_duration(s): - orig = s - unit = None + """ + Parses a duration string and converts it to seconds. The unit format is case insensitive + + Args: + s (str): The duration string to parse. Expected format: `` + where `unit` can be one of the values defined in `ParseDurationUnitFormat`. + + Returns: + int: The duration in seconds. + + Raises: + ValueError: If the input string does not match the expected format or contains invalid units. + """ SECOND = 1 DAY = 24*60*60 MONTH = 31*DAY YEAR = 365*DAY - if s.endswith("s"): - unit = SECOND - s = s[:-1] - elif s.endswith("day"): - unit = DAY - s = s[:-len("day")] - elif s.endswith("month"): - unit = MONTH - s = s[:-len("month")] - elif s.endswith("mo"): - unit = MONTH - s = s[:-len("mo")] - elif s.endswith("year"): - unit = YEAR - s = s[:-len("YEAR")] - else: - raise ValueError("no unit (like s, day, mo, month, or year) in '%s'" % orig) - s = s.strip() - return int(s) * unit + time_map = { + ParseDurationUnitFormat.SECONDS0: SECOND, + ParseDurationUnitFormat.SECONDS1: SECOND, + ParseDurationUnitFormat.SECONDS2: SECOND, + ParseDurationUnitFormat.DAYS0: DAY, + ParseDurationUnitFormat.DAYS1: DAY, + ParseDurationUnitFormat.MONTHS0: MONTH, + ParseDurationUnitFormat.MONTHS1: MONTH, + ParseDurationUnitFormat.MONTHS2: MONTH, + ParseDurationUnitFormat.YEARS0: YEAR, + ParseDurationUnitFormat.YEARS1: YEAR, + } + + # Build a regex pattern dynamically from the list of valid values + unit_pattern = "|".join(re.escape(unit) for unit in ParseDurationUnitFormat.list_values()) + pattern = rf"^\s*(\d+)\s*({unit_pattern})\s*$" + + # case-insensitive regex matching + match = re.match(pattern, s, re.IGNORECASE) + if not match: + # Generate dynamic error message + valid_units = ", ".join(f"'{value}'" for value in ParseDurationUnitFormat.list_values()) + raise ValueError(f"No valid unit in '{s}'. Expected one of: ({valid_units})") + + number = int(match.group(1)) # Extract the numeric value + unit = match.group(2).lower() # Extract the unit & normalize the unit to lowercase + + return number * time_map[unit] def parse_date(s): # return seconds-since-epoch for the UTC midnight that starts the given From c73541a88bfc0e37180f8dde75b1577213ba8313 Mon Sep 17 00:00:00 2001 From: sgerodes Date: Fri, 10 Jan 2025 00:49:38 +0100 Subject: [PATCH 05/10] chore(news): add newsfragment for ticket #4155 to ensure codechecks pass --- newsfragments/4155.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/4155.minor diff --git a/newsfragments/4155.minor b/newsfragments/4155.minor new file mode 100644 index 000000000..e69de29bb From 8aed2d51c83aa091642f33561b2cf1afdc35e3fb Mon Sep 17 00:00:00 2001 From: sgerodes Date: Fri, 10 Jan 2025 01:00:54 +0100 Subject: [PATCH 06/10] fix(time_format): invalid comparison of strings to enums --- src/allmydata/util/time_format.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/util/time_format.py b/src/allmydata/util/time_format.py index 4404b3f51..652365787 100644 --- a/src/allmydata/util/time_format.py +++ b/src/allmydata/util/time_format.py @@ -10,7 +10,7 @@ from typing import Optional from enum import Enum -class ParseDurationUnitFormat(Enum): +class ParseDurationUnitFormat(str, Enum): SECONDS0 = "s" SECONDS1 = "second" SECONDS2 = "seconds" From a733ba6ef9603536edbcb6ab1d44725408d87b63 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Mon, 13 Jan 2025 21:25:09 +0000 Subject: [PATCH 07/10] Mark a couple of strings with backslashes in them as 'raw strings' Refs [ticket: 4158](https://tahoe-lafs.org/trac/tahoe-lafs/ticket/4158) --- src/allmydata/hashtree.py | 4 ++-- src/allmydata/interfaces.py | 2 +- src/allmydata/node.py | 2 +- src/allmydata/test/cli/test_backup.py | 6 +++--- src/allmydata/test/cli/test_cli.py | 2 +- src/allmydata/test/test_runner.py | 2 +- src/allmydata/test/web/test_web.py | 2 +- src/allmydata/util/i2p_provider.py | 2 +- src/allmydata/web/status.py | 4 ++-- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/allmydata/hashtree.py b/src/allmydata/hashtree.py index c9ae78970..b67f9a49e 100644 --- a/src/allmydata/hashtree.py +++ b/src/allmydata/hashtree.py @@ -70,7 +70,7 @@ def roundup_pow2(x): class CompleteBinaryTreeMixin: - """ + r""" Adds convenience methods to a complete binary tree. Assumes the total number of elements in the binary tree may be @@ -179,7 +179,7 @@ def pair_hash(a, b): return tagged_pair_hash(b'Merkle tree internal node', a, b) class HashTree(CompleteBinaryTreeMixin, list): - """ + r""" Compute Merkle hashes at any node in a complete binary tree. Tree is indexed like so:: diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index e44a0e8bb..40899c1d6 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -406,7 +406,7 @@ class IStorageBucketWriter(Interface): """ def put_uri_extension(data): - """This block of data contains integrity-checking information (hashes + r"""This block of data contains integrity-checking information (hashes of plaintext, crypttext, and shares), as well as encoding parameters that are necessary to recover the data. This is a serialized dict mapping strings to other strings. The hash of this data is kept in diff --git a/src/allmydata/node.py b/src/allmydata/node.py index e81a1e953..160d3e437 100644 --- a/src/allmydata/node.py +++ b/src/allmydata/node.py @@ -90,7 +90,7 @@ def _common_valid_config(): }) # group 1 will be addr (dotted quad string), group 3 if any will be portnum (string) -ADDR_RE = re.compile("^([1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*)(:([1-9][0-9]*))?$") +ADDR_RE = re.compile(r"^([1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*\.[1-9][0-9]*)(:([1-9][0-9]*))?$") # this is put into README in new node-directories (for client and introducers) PRIV_README = """ diff --git a/src/allmydata/test/cli/test_backup.py b/src/allmydata/test/cli/test_backup.py index 7ff1a14d0..bbed86fa0 100644 --- a/src/allmydata/test/cli/test_backup.py +++ b/src/allmydata/test/cli/test_backup.py @@ -36,9 +36,9 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def count_output(self, out): mo = re.search(r"(\d)+ files uploaded \((\d+) reused\), " - "(\d)+ files skipped, " - "(\d+) directories created \((\d+) reused\), " - "(\d+) directories skipped", out) + r"(\d)+ files skipped, " + r"(\d+) directories created \((\d+) reused\), " + r"(\d+) directories skipped", out) return [int(s) for s in mo.groups()] def count_output2(self, out): diff --git a/src/allmydata/test/cli/test_cli.py b/src/allmydata/test/cli/test_cli.py index e4d4c3781..f2fb13a41 100644 --- a/src/allmydata/test/cli/test_cli.py +++ b/src/allmydata/test/cli/test_cli.py @@ -764,7 +764,7 @@ class Errors(GridTestMixin, CLITestMixin, unittest.TestCase): # enough shares. The one remaining share might be in either the # COMPLETE or the PENDING state. in_complete_msg = "ran out of shares: complete=sh0 pending= overdue= unused= need 3" - in_pending_msg_regex = "ran out of shares: complete= pending=Share\(.+\) overdue= unused= need 3" + in_pending_msg_regex = r"ran out of shares: complete= pending=Share\(.+\) overdue= unused= need 3" d.addCallback(lambda ign: self.do_cli("get", self.uri_1share)) def _check1(args): diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py index bc55d507d..4a4f727ef 100644 --- a/src/allmydata/test/test_runner.py +++ b/src/allmydata/test/test_runner.py @@ -254,7 +254,7 @@ class CreateNode(unittest.TestCase): # Fail if there is a non-empty line that doesn't end with a # punctuation mark. for line in err.splitlines(): - self.failIf(re.search("[\S][^\.!?]$", line), (line,)) + self.failIf(re.search(r"[\S][^\.!?]$", line), (line,)) # test that the non --basedir form works too n2 = os.path.join(basedir, command + "-n2") diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index 505c99a92..fb507d5ce 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -1968,7 +1968,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi def test_CSS_FILE(self): d = self.GET("/tahoe.css", followRedirect=True) def _check(res): - CSS_STYLE=re.compile(b'toolbar\s{.+text-align:\scenter.+toolbar-item.+display:\sinline',re.DOTALL) + CSS_STYLE=re.compile(r'toolbar\s{.+text-align:\scenter.+toolbar-item.+display:\sinline',re.DOTALL) self.failUnless(CSS_STYLE.search(res), res) d.addCallback(_check) return d diff --git a/src/allmydata/util/i2p_provider.py b/src/allmydata/util/i2p_provider.py index c480cd2f1..6aabdfa5c 100644 --- a/src/allmydata/util/i2p_provider.py +++ b/src/allmydata/util/i2p_provider.py @@ -189,7 +189,7 @@ class _Provider(service.MultiService): privkeyfile = self._get_i2p_config("dest.private_key_file") external_port = self._get_i2p_config("dest.port") sam_port = self._get_i2p_config("sam.port") - escaped_sam_port = sam_port.replace(':', '\:') + escaped_sam_port = sam_port.replace(':', r'\:') # for now, this returns a string, which then gets passed to # endpoints.serverFromString . But it can also return an Endpoint # directly, which means we don't need to encode all these options diff --git a/src/allmydata/web/status.py b/src/allmydata/web/status.py index 1737a4d1b..fa6c1607d 100644 --- a/src/allmydata/web/status.py +++ b/src/allmydata/web/status.py @@ -1556,8 +1556,8 @@ class Statistics(MultiFormatResource): def mangle_name(name): return re.sub( - u"_(\d\d)_(\d)_percentile", - u'{quantile="0.\g<1>\g<2>"}', + r"_(\d\d)_(\d)_percentile", + r'{quantile="0.\g<1>\g<2>"}', name.replace(u".", u"_") ) From ce1326176a0ea85030ac7d17ad780e9f8b650aff Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Mon, 13 Jan 2025 21:41:06 +0000 Subject: [PATCH 08/10] Compiled regexes don't like raw strings, only byte strings --- src/allmydata/test/web/test_web.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/allmydata/test/web/test_web.py b/src/allmydata/test/web/test_web.py index fb507d5ce..c213b1983 100644 --- a/src/allmydata/test/web/test_web.py +++ b/src/allmydata/test/web/test_web.py @@ -1968,7 +1968,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi def test_CSS_FILE(self): d = self.GET("/tahoe.css", followRedirect=True) def _check(res): - CSS_STYLE=re.compile(r'toolbar\s{.+text-align:\scenter.+toolbar-item.+display:\sinline',re.DOTALL) + CSS_STYLE=re.compile(b'toolbar\\s{.+text-align:\\scenter.+toolbar-item.+display:\\sinline',re.DOTALL) self.failUnless(CSS_STYLE.search(res), res) d.addCallback(_check) return d From ad1e48666541409e8cabafaaaf039e391e411016 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Mon, 13 Jan 2025 21:44:02 +0000 Subject: [PATCH 09/10] (No) news. --- newsfragments/4158.minor | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 newsfragments/4158.minor diff --git a/newsfragments/4158.minor b/newsfragments/4158.minor new file mode 100644 index 000000000..e69de29bb From dc65ecf98c411232dc4a7820f449d523ff5b9500 Mon Sep 17 00:00:00 2001 From: Florian Sesser Date: Wed, 15 Jan 2025 19:08:39 +0000 Subject: [PATCH 10/10] Add detail to newsfragment and trigger CI in the process --- newsfragments/4158.minor | 1 + 1 file changed, 1 insertion(+) diff --git a/newsfragments/4158.minor b/newsfragments/4158.minor index e69de29bb..56eaa76ca 100644 --- a/newsfragments/4158.minor +++ b/newsfragments/4158.minor @@ -0,0 +1 @@ +Fixed a couple of SyntaxWarnings from invalid escape sequences (shown under Python 3.12 and 3.13).