2007-03-30 12:12:23 -07:00
"""
Futz with files like a pro .
"""
2010-07-11 14:37:21 -07:00
import sys , exceptions , os , stat , tempfile , time , binascii
2007-03-30 12:12:23 -07:00
from twisted . python import log
2010-07-11 14:37:21 -07:00
from pycryptopp . cipher . aes import AES
2007-03-30 12:12:23 -07:00
def rename ( src , dst , tries = 4 , basedelay = 0.1 ) :
""" Here is a superkludge to workaround the fact that occasionally on
Windows some other process ( e . g . an anti - virus scanner , a local search
engine , etc . ) is looking at your file when you want to delete or move it ,
and hence you can ' t. The horrible workaround is to sit and spin, trying
to delete it , for a short time and then give up .
With the default values of tries and basedelay this can block for less
than a second .
@param tries : number of tries - - each time after the first we wait twice
as long as the previous wait
@param basedelay : how long to wait before the second try
"""
for i in range ( tries - 1 ) :
try :
return os . rename ( src , dst )
except EnvironmentError , le :
# XXX Tighten this to check if this is a permission denied error (possibly due to another Windows process having the file open and execute the superkludge only in this case.
log . msg ( " XXX KLUDGE Attempting to move file %s => %s ; got %s ; sleeping %s seconds " % ( src , dst , le , basedelay , ) )
time . sleep ( basedelay )
basedelay * = 2
return os . rename ( src , dst ) # The last try.
def remove ( f , tries = 4 , basedelay = 0.1 ) :
""" Here is a superkludge to workaround the fact that occasionally on
Windows some other process ( e . g . an anti - virus scanner , a local search
engine , etc . ) is looking at your file when you want to delete or move it ,
and hence you can ' t. The horrible workaround is to sit and spin, trying
to delete it , for a short time and then give up .
With the default values of tries and basedelay this can block for less
than a second .
@param tries : number of tries - - each time after the first we wait twice
as long as the previous wait
@param basedelay : how long to wait before the second try
"""
try :
os . chmod ( f , stat . S_IWRITE | stat . S_IEXEC | stat . S_IREAD )
except :
pass
for i in range ( tries - 1 ) :
try :
return os . remove ( f )
except EnvironmentError , le :
# XXX Tighten this to check if this is a permission denied error (possibly due to another Windows process having the file open and execute the superkludge only in this case.
if not os . path . exists ( f ) :
return
log . msg ( " XXX KLUDGE Attempting to remove file %s ; got %s ; sleeping %s seconds " % ( f , le , basedelay , ) )
time . sleep ( basedelay )
basedelay * = 2
return os . remove ( f ) # The last try.
2008-11-04 17:47:15 -07:00
class ReopenableNamedTemporaryFile :
"""
This uses tempfile . mkstemp ( ) to generate a secure temp file . It then closes
the file , leaving a zero - length file as a placeholder . You can get the
filename with ReopenableNamedTemporaryFile . name . When the
ReopenableNamedTemporaryFile instance is garbage collected or its shutdown ( )
method is called , it deletes the file .
"""
def __init__ ( self , * args , * * kwargs ) :
fd , self . name = tempfile . mkstemp ( * args , * * kwargs )
os . close ( fd )
def __repr__ ( self ) :
return " < %s instance at %x %s > " % ( self . __class__ . __name__ , id ( self ) , self . name )
def __str__ ( self ) :
return self . __repr__ ( )
def __del__ ( self ) :
self . shutdown ( )
def shutdown ( self ) :
remove ( self . name )
2007-03-30 12:12:23 -07:00
class NamedTemporaryDirectory :
"""
This calls tempfile . mkdtemp ( ) , stores the name of the dir in
self . name , and rmrf ' s the dir when it gets garbage collected or
" shutdown() " .
"""
def __init__ ( self , cleanup = True , * args , * * kwargs ) :
""" If cleanup, then the directory will be rmrf ' ed when the object is shutdown. """
self . cleanup = cleanup
self . name = tempfile . mkdtemp ( * args , * * kwargs )
def __repr__ ( self ) :
return " < %s instance at %x %s > " % ( self . __class__ . __name__ , id ( self ) , self . name )
def __str__ ( self ) :
return self . __repr__ ( )
def __del__ ( self ) :
try :
self . shutdown ( )
except :
import traceback
traceback . print_exc ( )
def shutdown ( self ) :
if self . cleanup and hasattr ( self , ' name ' ) :
rm_dir ( self . name )
2010-07-11 14:37:21 -07:00
class EncryptedTemporaryFile :
# not implemented: next, readline, readlines, xreadlines, writelines
def __init__ ( self ) :
self . file = tempfile . TemporaryFile ( )
self . key = os . urandom ( 16 ) # AES-128
def _crypt ( self , offset , data ) :
offset_big = offset / / 16
offset_small = offset % 16
iv = binascii . unhexlify ( " %032x " % offset_big )
cipher = AES ( self . key , iv = iv )
cipher . process ( " \x00 " * offset_small )
return cipher . process ( data )
def close ( self ) :
self . file . close ( )
def flush ( self ) :
self . file . flush ( )
def seek ( self , offset , whence = 0 ) : # 0 = SEEK_SET
self . file . seek ( offset , whence )
def tell ( self ) :
offset = self . file . tell ( )
return offset
def read ( self , size = - 1 ) :
2010-07-16 22:46:47 -07:00
""" A read must not follow a write, or vice-versa, without an intervening seek. """
2010-07-11 14:37:21 -07:00
index = self . file . tell ( )
ciphertext = self . file . read ( size )
plaintext = self . _crypt ( index , ciphertext )
return plaintext
def write ( self , plaintext ) :
2010-07-16 22:46:47 -07:00
""" A read must not follow a write, or vice-versa, without an intervening seek.
If seeking and then writing causes a ' hole ' in the file , the contents of the
hole are unspecified . """
2010-07-11 14:37:21 -07:00
index = self . file . tell ( )
ciphertext = self . _crypt ( index , plaintext )
self . file . write ( ciphertext )
def truncate ( self , newsize ) :
2010-07-16 22:46:47 -07:00
""" Truncate or extend the file to ' newsize ' . If it is extended, the contents after the
old end - of - file are unspecified . The file position after this operation is unspecified . """
2010-07-11 14:37:21 -07:00
self . file . truncate ( newsize )
2007-12-17 16:39:54 -07:00
def make_dirs ( dirname , mode = 0777 ) :
2007-03-30 12:12:23 -07:00
"""
2007-12-17 16:39:54 -07:00
An idempotent version of os . makedirs ( ) . If the dir already exists , do
nothing and return without raising an exception . If this call creates the
dir , return without raising an exception . If there is an error that
prevents creation or if the directory gets deleted after make_dirs ( ) creates
it and before make_dirs ( ) checks that it exists , raise an exception .
2007-03-30 12:12:23 -07:00
"""
tx = None
try :
os . makedirs ( dirname , mode )
except OSError , x :
tx = x
if not os . path . isdir ( dirname ) :
if tx :
raise tx
raise exceptions . IOError , " unknown error prevented creation of directory, or deleted the directory immediately after creation: %s " % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning...
def rm_dir ( dirname ) :
"""
A threadsafe and idempotent version of shutil . rmtree ( ) . If the dir is
already gone , do nothing and return without raising an exception . If this
call removes the dir , return without raising an exception . If there is an
error that prevents deletion or if the directory gets created again after
rm_dir ( ) deletes it and before rm_dir ( ) checks that it is gone , raise an
exception .
"""
excs = [ ]
try :
os . chmod ( dirname , stat . S_IWRITE | stat . S_IEXEC | stat . S_IREAD )
for f in os . listdir ( dirname ) :
fullname = os . path . join ( dirname , f )
if os . path . isdir ( fullname ) :
rm_dir ( fullname )
else :
remove ( fullname )
os . rmdir ( dirname )
except Exception , le :
# Ignore "No such file or directory"
if ( not isinstance ( le , OSError ) ) or le . args [ 0 ] != 2 :
excs . append ( le )
# Okay, now we've recursively removed everything, ignoring any "No
# such file or directory" errors, and collecting any other errors.
if os . path . exists ( dirname ) :
if len ( excs ) == 1 :
raise excs [ 0 ]
if len ( excs ) == 0 :
raise OSError , " Failed to remove dir for unknown reason. "
raise OSError , excs
def remove_if_possible ( f ) :
try :
remove ( f )
except :
pass
2007-04-17 20:14:26 -07:00
2007-07-03 15:49:45 -07:00
def du ( basedir ) :
size = 0
for root , dirs , files in os . walk ( basedir ) :
for f in files :
fn = os . path . join ( root , f )
size + = os . path . getsize ( fn )
return size
2009-02-18 23:13:10 -07:00
def move_into_place ( source , dest ) :
""" Atomically replace a file, or as near to it as the platform allows.
The dest file may or may not exist . """
if " win32 " in sys . platform . lower ( ) :
remove_if_possible ( dest )
os . rename ( source , dest )
2010-02-05 17:37:27 -08:00
write node.url and portnum files atomically, to fix race in test_runner
Previously, test_runner sometimes fails because the _node_has_started()
poller fires after the portnum file has been opened, but before it has
actually been filled, allowing the test process to observe an empty file,
which flunks the test.
This adds a new fileutil.write_atomically() function (using the usual
write-to-.tmp-then-rename approach), and uses it for both node.url and
client.port . These files are written a bit before the node is really up and
running, but they're late enough for test_runner's purposes, which is to know
when it's safe to read client.port and use 'tahoe restart' (and therefore
SIGINT) to restart the node.
The current node/client code doesn't offer any better "are you really done
with startup" indicator.. the ideal approach would be to either watch the
logfile, or connect to its flogport, but both are a hassle. Changing the node
to write out a new "all done" file would be intrusive for regular
operations.
2012-05-14 13:32:03 -07:00
def write_atomically ( target , contents , mode = " b " ) :
f = open ( target + " .tmp " , " w " + mode )
2012-05-16 23:08:39 +00:00
try :
f . write ( contents )
finally :
f . close ( )
write node.url and portnum files atomically, to fix race in test_runner
Previously, test_runner sometimes fails because the _node_has_started()
poller fires after the portnum file has been opened, but before it has
actually been filled, allowing the test process to observe an empty file,
which flunks the test.
This adds a new fileutil.write_atomically() function (using the usual
write-to-.tmp-then-rename approach), and uses it for both node.url and
client.port . These files are written a bit before the node is really up and
running, but they're late enough for test_runner's purposes, which is to know
when it's safe to read client.port and use 'tahoe restart' (and therefore
SIGINT) to restart the node.
The current node/client code doesn't offer any better "are you really done
with startup" indicator.. the ideal approach would be to either watch the
logfile, or connect to its flogport, but both are a hassle. Changing the node
to write out a new "all done" file would be intrusive for regular
operations.
2012-05-14 13:32:03 -07:00
move_into_place ( target + " .tmp " , target )
2013-03-18 17:30:57 -07:00
def write ( path , data , mode = " wb " ) :
wf = open ( path , mode )
2010-02-05 17:37:27 -08:00
try :
wf . write ( data )
finally :
wf . close ( )
def read ( path ) :
rf = open ( path , " rb " )
try :
return rf . read ( )
finally :
rf . close ( )
2010-07-11 17:30:15 -07:00
2015-01-30 00:50:18 +00:00
def put_file ( path , inf ) :
precondition_abspath ( path )
2010-07-11 17:30:15 -07:00
# TODO: create temporary file and move into place?
2015-01-30 00:50:18 +00:00
outf = open ( path , " wb " )
2010-07-11 17:30:15 -07:00
try :
while True :
data = inf . read ( 32768 )
if not data :
break
outf . write ( data )
finally :
outf . close ( )
2010-07-21 16:15:07 -07:00
2015-01-30 00:47:09 +00:00
def precondition_abspath ( path ) :
if not isinstance ( path , unicode ) :
raise AssertionError ( " an abspath must be a Unicode string " )
if sys . platform == " win32 " :
# This intentionally doesn't view absolute paths starting with a drive specification, or
# paths relative to the current drive, as acceptable.
if not path . startswith ( " \\ \\ " ) :
raise AssertionError ( " an abspath should be normalized using abspath_expanduser_unicode " )
else :
# This intentionally doesn't view the path '~' or paths starting with '~/' as acceptable.
if not os . path . isabs ( path ) :
raise AssertionError ( " an abspath should be normalized using abspath_expanduser_unicode " )
2010-07-21 16:15:07 -07:00
# Work around <http://bugs.python.org/issue3426>. This code is adapted from
# <http://svn.python.org/view/python/trunk/Lib/ntpath.py?revision=78247&view=markup>
# with some simplifications.
_getfullpathname = None
try :
from nt import _getfullpathname
except ImportError :
pass
2015-01-30 00:47:09 +00:00
def abspath_expanduser_unicode ( path , base = None ) :
"""
Return the absolute version of a path . If ' base ' is given and ' path ' is relative ,
the path will be expanded relative to ' base ' .
' path ' must be a Unicode string . ' base ' , if given , must be a Unicode string
corresponding to an absolute path as returned by a previous call to
abspath_expanduser_unicode .
"""
if not isinstance ( path , unicode ) :
raise AssertionError ( " paths must be Unicode strings " )
if base is not None :
precondition_abspath ( base )
2010-07-21 16:15:07 -07:00
2015-01-29 18:32:05 +00:00
path = expanduser ( path )
2010-07-21 16:15:07 -07:00
if _getfullpathname :
# On Windows, os.path.isabs will return True for paths without a drive letter,
# e.g. "\\". See <http://bugs.python.org/issue1669539>.
try :
path = _getfullpathname ( path or u " . " )
2010-07-25 15:26:03 -07:00
except OSError :
2010-07-21 16:15:07 -07:00
pass
if not os . path . isabs ( path ) :
2015-01-30 00:47:09 +00:00
if base is None :
path = os . path . join ( os . getcwdu ( ) , path )
else :
path = os . path . join ( base , path )
2010-07-21 16:15:07 -07:00
# We won't hit <http://bugs.python.org/issue5827> because
# there is always at least one Unicode path component.
2015-01-30 00:05:14 +00:00
path = os . path . normpath ( path )
if sys . platform == " win32 " :
path = to_windows_long_path ( path )
return path
def to_windows_long_path ( path ) :
# '/' is normally a perfectly valid path component separator in Windows.
# However, when using the "\\?\" syntax it is not recognized, so we
# replace it with '\' here.
path = path . replace ( u " / " , u " \\ " )
# Note that other normalizations such as removing '.' and '..' should
# be done outside this function.
if path . startswith ( u " \\ \\ ? \\ " ) or path . startswith ( u " \\ \\ . \\ " ) :
return path
elif path . startswith ( u " \\ \\ " ) :
return u " \\ \\ ? \\ UNC \\ " + path [ 2 : ]
else :
return u " \\ \\ ? \\ " + path
2010-09-10 08:35:20 -08:00
2011-01-18 23:59:11 -08:00
have_GetDiskFreeSpaceExW = False
if sys . platform == " win32 " :
2015-01-29 18:32:05 +00:00
from ctypes import WINFUNCTYPE , windll , POINTER , byref , c_ulonglong , create_unicode_buffer
from ctypes . wintypes import BOOL , DWORD , LPCWSTR , LPWSTR
# <http://msdn.microsoft.com/en-us/library/ms679360%28v=VS.85%29.aspx>
GetLastError = WINFUNCTYPE ( DWORD ) ( ( " GetLastError " , windll . kernel32 ) )
2011-01-18 23:59:11 -08:00
2015-01-29 18:32:05 +00:00
# <http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188%28v=vs.85%29.aspx>
GetEnvironmentVariableW = WINFUNCTYPE ( DWORD , LPCWSTR , LPWSTR , DWORD ) (
( " GetEnvironmentVariableW " , windll . kernel32 ) )
try :
2011-01-18 23:59:11 -08:00
# <http://msdn.microsoft.com/en-us/library/aa383742%28v=VS.85%29.aspx>
PULARGE_INTEGER = POINTER ( c_ulonglong )
# <http://msdn.microsoft.com/en-us/library/aa364937%28VS.85%29.aspx>
GetDiskFreeSpaceExW = WINFUNCTYPE ( BOOL , LPCWSTR , PULARGE_INTEGER , PULARGE_INTEGER , PULARGE_INTEGER ) (
( " GetDiskFreeSpaceExW " , windll . kernel32 ) )
have_GetDiskFreeSpaceExW = True
except Exception :
import traceback
traceback . print_exc ( )
2010-09-10 08:35:20 -08:00
2015-01-29 18:32:05 +00:00
def expanduser ( path ) :
# os.path.expanduser is hopelessly broken for Unicode paths on Windows (ticket #1674).
if sys . platform == " win32 " :
return windows_expanduser ( path )
else :
return os . path . expanduser ( path )
def windows_expanduser ( path ) :
if not path . startswith ( ' ~ ' ) :
return path
home_drive = windows_getenv ( u ' HOMEDRIVE ' )
home_path = windows_getenv ( u ' HOMEPATH ' )
if path == ' ~ ' :
return os . path . join ( home_drive , home_path )
elif path . startswith ( ' ~/ ' ) or path . startswith ( ' ~ \\ ' ) :
return os . path . join ( home_drive , home_path , path [ 2 : ] )
else :
return path
def windows_getenv ( name ) :
# Based on <http://stackoverflow.com/questions/2608200/problems-with-umlauts-in-python-appdata-environvent-variable/2608368#2608368>,
# with improved error handling.
if not isinstance ( name , unicode ) :
raise AssertionError ( " name must be Unicode " )
n = GetEnvironmentVariableW ( name , None , 0 )
if n < = 0 :
err = GetLastError ( )
raise OSError ( " Windows error %d attempting to read environment variable %r "
% ( err , name ) )
buf = create_unicode_buffer ( u ' \0 ' * n )
retval = GetEnvironmentVariableW ( name , buf , n )
if retval < = 0 :
err = GetLastError ( )
raise OSError ( " Windows error %d attempting to read environment variable %r "
% ( err , name ) )
return buf . value
2010-09-10 08:35:20 -08:00
def get_disk_stats ( whichdir , reserved_space = 0 ) :
""" Return disk statistics for the storage disk, in the form of a dict
with the following fields .
total : total bytes on disk
free_for_root : bytes actually free on disk
free_for_nonroot : bytes free for " a non-privileged user " [ Unix ] or
the current user [ Windows ] ; might take into
account quotas depending on platform
used : bytes used on disk
avail : bytes available excluding reserved space
An AttributeError can occur if the OS has no API to get disk information .
An EnvironmentError can occur if the OS call fails .
whichdir is a directory on the filesystem in question - - the
answer is about the filesystem , not about the directory , so the
directory is used only to specify which filesystem .
reserved_space is how many bytes to subtract from the answer , so
you can pass how many bytes you would like to leave unused on this
filesystem as reserved_space .
"""
2011-01-18 23:59:11 -08:00
if have_GetDiskFreeSpaceExW :
# If this is a Windows system and GetDiskFreeSpaceExW is available, use it.
# (This might put up an error dialog unless
# SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX) has been called,
# which we do in allmydata.windows.fixups.initialize().)
n_free_for_nonroot = c_ulonglong ( 0 )
n_total = c_ulonglong ( 0 )
n_free_for_root = c_ulonglong ( 0 )
retval = GetDiskFreeSpaceExW ( whichdir , byref ( n_free_for_nonroot ) ,
byref ( n_total ) ,
byref ( n_free_for_root ) )
if retval == 0 :
raise OSError ( " Windows error %d attempting to get disk statistics for %r "
% ( GetLastError ( ) , whichdir ) )
free_for_nonroot = n_free_for_nonroot . value
total = n_total . value
free_for_root = n_free_for_root . value
2010-09-10 08:35:20 -08:00
else :
# For Unix-like systems.
# <http://docs.python.org/library/os.html#os.statvfs>
# <http://opengroup.org/onlinepubs/7990989799/xsh/fstatvfs.html>
# <http://opengroup.org/onlinepubs/7990989799/xsh/sysstatvfs.h.html>
s = os . statvfs ( whichdir )
# on my mac laptop:
# statvfs(2) is a wrapper around statfs(2).
# statvfs.f_frsize = statfs.f_bsize :
# "minimum unit of allocation" (statvfs)
# "fundamental file system block size" (statfs)
# statvfs.f_bsize = statfs.f_iosize = stat.st_blocks : preferred IO size
# on an encrypted home directory ("FileVault"), it gets f_blocks
# wrong, and s.f_blocks*s.f_frsize is twice the size of my disk,
# but s.f_bavail*s.f_frsize is correct
total = s . f_frsize * s . f_blocks
free_for_root = s . f_frsize * s . f_bfree
free_for_nonroot = s . f_frsize * s . f_bavail
# valid for all platforms:
used = total - free_for_root
avail = max ( free_for_nonroot - reserved_space , 0 )
2011-01-18 23:59:11 -08:00
return { ' total ' : total ,
' free_for_root ' : free_for_root ,
2010-09-10 08:35:20 -08:00
' free_for_nonroot ' : free_for_nonroot ,
2011-01-18 23:59:11 -08:00
' used ' : used ,
' avail ' : avail ,
}
2010-09-10 08:35:20 -08:00
def get_available_space ( whichdir , reserved_space ) :
""" Returns available space for share storage in bytes, or None if no
API to get this information is available .
whichdir is a directory on the filesystem in question - - the
answer is about the filesystem , not about the directory , so the
directory is used only to specify which filesystem .
reserved_space is how many bytes to subtract from the answer , so
you can pass how many bytes you would like to leave unused on this
filesystem as reserved_space .
"""
try :
return get_disk_stats ( whichdir , reserved_space ) [ ' avail ' ]
except AttributeError :
return None
except EnvironmentError :
log . msg ( " OS call to get disk statistics failed " )
return 0