From 5973196931d2143f68a34d9b01857339582ec5c0 Mon Sep 17 00:00:00 2001
From: meejah <meejah@meejah.ca>
Date: Wed, 21 Sep 2022 19:00:27 -0600
Subject: [PATCH] refactor: use filelock and test it

---
 setup.py                          |   1 +
 src/allmydata/test/test_runner.py |  47 ++++++++++++
 src/allmydata/util/pid.py         | 117 ++++++++++++++++++------------
 3 files changed, 119 insertions(+), 46 deletions(-)

diff --git a/setup.py b/setup.py
index bd16a61ce..d99831347 100644
--- a/setup.py
+++ b/setup.py
@@ -141,6 +141,7 @@ install_requires = [
 
     # for pid-file support
     "psutil",
+    "filelock",
 ]
 
 setup_requires = [
diff --git a/src/allmydata/test/test_runner.py b/src/allmydata/test/test_runner.py
index cf6e9f3b5..5a8311649 100644
--- a/src/allmydata/test/test_runner.py
+++ b/src/allmydata/test/test_runner.py
@@ -50,6 +50,11 @@ from twisted.internet.defer import (
 from twisted.python.filepath import FilePath
 from allmydata.util import fileutil, pollmixin
 from allmydata.util.encodingutil import unicode_to_argv
+from allmydata.util.pid import (
+    check_pid_process,
+    _pidfile_to_lockpath,
+    ProcessInTheWay,
+)
 from allmydata.test import common_util
 import allmydata
 from allmydata.scripts.runner import (
@@ -617,3 +622,45 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin):
         # What's left is a perfect indicator that the process has exited and
         # we won't get blamed for leaving the reactor dirty.
         yield client_running
+
+
+class PidFileLocking(SyncTestCase):
+    """
+    Direct tests for allmydata.util.pid functions
+    """
+
+    def test_locking(self):
+        """
+        Fail to create a pidfile if another process has the lock already.
+        """
+        # this can't just be "our" process because the locking library
+        # allows the same process to acquire a lock multiple times.
+        pidfile = FilePath("foo")
+        lockfile = _pidfile_to_lockpath(pidfile)
+
+        with open("code.py", "w") as f:
+            f.write(
+                "\n".join([
+                    "import filelock, time",
+                    "with filelock.FileLock('{}', timeout=1):".format(lockfile.path),
+                    "    print('.', flush=True)",
+                    "    time.sleep(5)",
+                ])
+            )
+        proc = Popen(
+            [sys.executable, "code.py"],
+            stdout=PIPE,
+            stderr=PIPE,
+            start_new_session=True,
+        )
+        # make sure our subprocess has had time to acquire the lock
+        # for sure (from the "." it prints)
+        self.assertThat(
+            proc.stdout.read(1),
+            Equals(b".")
+        )
+
+        # we should not be able to acuire this corresponding lock as well
+        with self.assertRaises(ProcessInTheWay):
+            check_pid_process(pidfile)
+        proc.terminate()
diff --git a/src/allmydata/util/pid.py b/src/allmydata/util/pid.py
index ff8129bbc..e256615d6 100644
--- a/src/allmydata/util/pid.py
+++ b/src/allmydata/util/pid.py
@@ -1,6 +1,10 @@
 import os
 import psutil
 
+# the docs are a little misleading, but this is either WindowsFileLock
+# or UnixFileLock depending upon the platform we're currently on
+from filelock import FileLock, Timeout
+
 
 class ProcessInTheWay(Exception):
     """
@@ -20,6 +24,14 @@ class CannotRemovePidFile(Exception):
     """
 
 
+def _pidfile_to_lockpath(pidfile):
+    """
+    internal helper.
+    :returns FilePath: a path to use for file-locking the given pidfile
+    """
+    return pidfile.sibling("{}.lock".format(pidfile.basename()))
+
+
 def check_pid_process(pidfile, find_process=None):
     """
     If another instance appears to be running already, raise an
@@ -34,57 +46,70 @@ def check_pid_process(pidfile, find_process=None):
     :raises ProcessInTheWay: if a running process exists at our PID
     """
     find_process = psutil.Process if find_process is None else find_process
-    # check if we have another instance running already
-    if pidfile.exists():
-        with pidfile.open("r") as f:
-            content = f.read().decode("utf8").strip()
-        try:
-            pid, starttime = content.split()
-            pid = int(pid)
-            starttime = float(starttime)
-        except ValueError:
-            raise InvalidPidFile(
-                "found invalid PID file in {}".format(
-                    pidfile
-                )
-            )
-        try:
-            # if any other process is running at that PID, let the
-            # user decide if this is another legitimate
-            # instance. Automated programs may use the start-time to
-            # help decide this (if the PID is merely recycled, the
-            # start-time won't match).
-            find_process(pid)
-            raise ProcessInTheWay(
-                "A process is already running as PID {}".format(pid)
-            )
-        except psutil.NoSuchProcess:
-            print(
-                "'{pidpath}' refers to {pid} that isn't running".format(
-                    pidpath=pidfile.path,
-                    pid=pid,
-                )
-            )
-            # nothing is running at that PID so it must be a stale file
-            pidfile.remove()
+    lock_path = _pidfile_to_lockpath(pidfile)
 
-    # write our PID + start-time to the pid-file
-    pid = os.getpid()
-    starttime = find_process(pid).create_time()
-    with pidfile.open("w") as f:
-        f.write("{} {}\n".format(pid, starttime).encode("utf8"))
+    try:
+        # a short timeout is fine, this lock should only be active
+        # while someone is reading or deleting the pidfile .. and
+        # facilitates testing the locking itself.
+        with FileLock(lock_path.path, timeout=2):
+            # check if we have another instance running already
+            if pidfile.exists():
+                with pidfile.open("r") as f:
+                    content = f.read().decode("utf8").strip()
+                try:
+                    pid, starttime = content.split()
+                    pid = int(pid)
+                    starttime = float(starttime)
+                except ValueError:
+                    raise InvalidPidFile(
+                        "found invalid PID file in {}".format(
+                            pidfile
+                        )
+                    )
+                try:
+                    # if any other process is running at that PID, let the
+                    # user decide if this is another legitimate
+                    # instance. Automated programs may use the start-time to
+                    # help decide this (if the PID is merely recycled, the
+                    # start-time won't match).
+                    find_process(pid)
+                    raise ProcessInTheWay(
+                        "A process is already running as PID {}".format(pid)
+                    )
+                except psutil.NoSuchProcess:
+                    print(
+                        "'{pidpath}' refers to {pid} that isn't running".format(
+                            pidpath=pidfile.path,
+                            pid=pid,
+                        )
+                    )
+                    # nothing is running at that PID so it must be a stale file
+                    pidfile.remove()
+
+            # write our PID + start-time to the pid-file
+            pid = os.getpid()
+            starttime = find_process(pid).create_time()
+            with pidfile.open("w") as f:
+                f.write("{} {}\n".format(pid, starttime).encode("utf8"))
+    except Timeout:
+        raise ProcessInTheWay(
+            "Another process is still locking {}".format(pidfile.path)
+        )
 
 
 def cleanup_pidfile(pidfile):
     """
     Safely clean up a PID-file
     """
-    try:
-        pidfile.remove()
-    except Exception as e:
-        raise CannotRemovePidFile(
-            "Couldn't remove '{pidfile}': {err}.".format(
-                pidfile=pidfile.path,
-                err=e,
+    lock_path = _pidfile_to_lockpath(pidfile)
+    with FileLock(lock_path.path):
+        try:
+            pidfile.remove()
+        except Exception as e:
+            raise CannotRemovePidFile(
+                "Couldn't remove '{pidfile}': {err}.".format(
+                    pidfile=pidfile.path,
+                    err=e,
+                )
             )
-        )