refactor: use filelock and test it

2025-02-20 09:46:18 +00:00 · 2022-09-21 19:00:27 -06:00 · 2022-09-21 19:00:27 -06:00 · 5973196931
commit 5973196931
parent 1f29cc9c29
3 changed files with 119 additions and 46 deletions
--- a/setup.py
+++ b/setup.py
@ -141,6 +141,7 @@ install_requires = [

    # for pid-file support
    "psutil",
+    "filelock",
 ]

 setup_requires = [
--- a/src/allmydata/test/test_runner.py
+++ b/src/allmydata/test/test_runner.py
@ -50,6 +50,11 @@ from twisted.internet.defer import (
 from twisted.python.filepath import FilePath
 from allmydata.util import fileutil, pollmixin
 from allmydata.util.encodingutil import unicode_to_argv
+from allmydata.util.pid import (
+    check_pid_process,
+    _pidfile_to_lockpath,
+    ProcessInTheWay,
+)
 from allmydata.test import common_util
 import allmydata
 from allmydata.scripts.runner import (
@ -617,3 +622,45 @@ class RunNode(common_util.SignalMixin, unittest.TestCase, pollmixin.PollMixin):
        # What's left is a perfect indicator that the process has exited and
        # we won't get blamed for leaving the reactor dirty.
        yield client_running
+
+
+class PidFileLocking(SyncTestCase):
+    """
+    Direct tests for allmydata.util.pid functions
+    """
+
+    def test_locking(self):
+        """
+        Fail to create a pidfile if another process has the lock already.
+        """
+        # this can't just be "our" process because the locking library
+        # allows the same process to acquire a lock multiple times.
+        pidfile = FilePath("foo")
+        lockfile = _pidfile_to_lockpath(pidfile)
+
+        with open("code.py", "w") as f:
+            f.write(
+                "\n".join([
+                    "import filelock, time",
+                    "with filelock.FileLock('{}', timeout=1):".format(lockfile.path),
+                    "    print('.', flush=True)",
+                    "    time.sleep(5)",
+                ])
+            )
+        proc = Popen(
+            [sys.executable, "code.py"],
+            stdout=PIPE,
+            stderr=PIPE,
+            start_new_session=True,
+        )
+        # make sure our subprocess has had time to acquire the lock
+        # for sure (from the "." it prints)
+        self.assertThat(
+            proc.stdout.read(1),
+            Equals(b".")
+        )
+
+        # we should not be able to acuire this corresponding lock as well
+        with self.assertRaises(ProcessInTheWay):
+            check_pid_process(pidfile)
+        proc.terminate()
--- a/src/allmydata/util/pid.py
+++ b/src/allmydata/util/pid.py
@ -1,6 +1,10 @@
 import os
 import psutil

+# the docs are a little misleading, but this is either WindowsFileLock
+# or UnixFileLock depending upon the platform we're currently on
+from filelock import FileLock, Timeout
+

 class ProcessInTheWay(Exception):
    """
@ -20,6 +24,14 @@ class CannotRemovePidFile(Exception):
    """


+def _pidfile_to_lockpath(pidfile):
+    """
+    internal helper.
+    :returns FilePath: a path to use for file-locking the given pidfile
+    """
+    return pidfile.sibling("{}.lock".format(pidfile.basename()))
+
+
 def check_pid_process(pidfile, find_process=None):
    """
    If another instance appears to be running already, raise an
@ -34,57 +46,70 @@ def check_pid_process(pidfile, find_process=None):
    :raises ProcessInTheWay: if a running process exists at our PID
    """
    find_process = psutil.Process if find_process is None else find_process
-    # check if we have another instance running already
-    if pidfile.exists():
-        with pidfile.open("r") as f:
-            content = f.read().decode("utf8").strip()
-        try:
-            pid, starttime = content.split()
-            pid = int(pid)
-            starttime = float(starttime)
-        except ValueError:
-            raise InvalidPidFile(
-                "found invalid PID file in {}".format(
-                    pidfile
-                )
-            )
-        try:
-            # if any other process is running at that PID, let the
-            # user decide if this is another legitimate
-            # instance. Automated programs may use the start-time to
-            # help decide this (if the PID is merely recycled, the
-            # start-time won't match).
-            find_process(pid)
-            raise ProcessInTheWay(
-                "A process is already running as PID {}".format(pid)
-            )
-        except psutil.NoSuchProcess:
-            print(
-                "'{pidpath}' refers to {pid} that isn't running".format(
-                    pidpath=pidfile.path,
-                    pid=pid,
-                )
-            )
-            # nothing is running at that PID so it must be a stale file
-            pidfile.remove()
+    lock_path = _pidfile_to_lockpath(pidfile)

-    # write our PID + start-time to the pid-file
-    pid = os.getpid()
-    starttime = find_process(pid).create_time()
-    with pidfile.open("w") as f:
-        f.write("{} {}\n".format(pid, starttime).encode("utf8"))
+    try:
+        # a short timeout is fine, this lock should only be active
+        # while someone is reading or deleting the pidfile .. and
+        # facilitates testing the locking itself.
+        with FileLock(lock_path.path, timeout=2):
+            # check if we have another instance running already
+            if pidfile.exists():
+                with pidfile.open("r") as f:
+                    content = f.read().decode("utf8").strip()
+                try:
+                    pid, starttime = content.split()
+                    pid = int(pid)
+                    starttime = float(starttime)
+                except ValueError:
+                    raise InvalidPidFile(
+                        "found invalid PID file in {}".format(
+                            pidfile
+                        )
+                    )
+                try:
+                    # if any other process is running at that PID, let the
+                    # user decide if this is another legitimate
+                    # instance. Automated programs may use the start-time to
+                    # help decide this (if the PID is merely recycled, the
+                    # start-time won't match).
+                    find_process(pid)
+                    raise ProcessInTheWay(
+                        "A process is already running as PID {}".format(pid)
+                    )
+                except psutil.NoSuchProcess:
+                    print(
+                        "'{pidpath}' refers to {pid} that isn't running".format(
+                            pidpath=pidfile.path,
+                            pid=pid,
+                        )
+                    )
+                    # nothing is running at that PID so it must be a stale file
+                    pidfile.remove()
+
+            # write our PID + start-time to the pid-file
+            pid = os.getpid()
+            starttime = find_process(pid).create_time()
+            with pidfile.open("w") as f:
+                f.write("{} {}\n".format(pid, starttime).encode("utf8"))
+    except Timeout:
+        raise ProcessInTheWay(
+            "Another process is still locking {}".format(pidfile.path)
+        )


 def cleanup_pidfile(pidfile):
    """
    Safely clean up a PID-file
    """
-    try:
-        pidfile.remove()
-    except Exception as e:
-        raise CannotRemovePidFile(
-            "Couldn't remove '{pidfile}': {err}.".format(
-                pidfile=pidfile.path,
-                err=e,
+    lock_path = _pidfile_to_lockpath(pidfile)
+    with FileLock(lock_path.path):
+        try:
+            pidfile.remove()
+        except Exception as e:
+            raise CannotRemovePidFile(
+                "Couldn't remove '{pidfile}': {err}.".format(
+                    pidfile=pidfile.path,
+                    err=e,
+                )
            )
-        )