p4scripts/p4RemoveUnversioned.py
Brian 97da25ce38 Threaded console, threaded cleanup. Yes!
Made the threaded console to batch messages and so I could manually
flush or clear them.  At some point I would consider a safety maximum
buffer size that triggers it to auto flush. This worked out really well,
though I have to see why in some cases lines appear to double up still,
could be something with the process not completing when I expect it to.

This is possible a naive thread implementation, since it pushes a
directory for every thread which seems too drastic. I'd like to see how
much better it works without all the context switches.  It's also a
matter of figuring out how much to handle yourself before letting
another thread join in.  Right now the threads don't branch out too much
since I think they basically do a breadth-first search, though I have to
double check on that.

Still to come, trying to safely work with fstat across multiple
directories. It's fast, but on the console the script would appear to
stall as it parses everything, so I'd still want to break it down
somewhat so you can see the script making visible progress.  I would
also prefer this because then console messages wouldn't be so short and
blocky.

Improvements to come!
2014-05-08 22:55:12 -06:00

300 lines
No EOL
10 KiB
Python

#!/usr/bin/python
# -*- coding: utf8 -*-
# author : Brian Ernst
# python_version : 2.7.6 and 3.4.0
# =================================
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
# todo: add option of using send2trash
# todo: buffer output, after exceeding a certain amount print to the output.
# todo: allow logging output besides console output, or redirection altogether
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
re_remove_comment = re.compile( "#.*$" )
def remove_comment( s ):
return re.sub( re_remove_comment, "", s )
try: input = raw_input
except: pass
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
p4_ignore = ".p4ignore"
main_pid = os.getpid( )
def PressEnter( ):
print( "\nPress ENTER to continue..." )
s=input( "" )
def get_ignore_list( path, files_to_ignore ):
# have to split path and test top directory
dirs = path.split( os.sep )
ignore_list = [ ]
for i, val in enumerate( dirs ):
path_to_find = os.sep.join( dirs[ : i + 1] )
if path_to_find in files_to_ignore:
ignore_list.extend( files_to_ignore[ path_to_find ] )
return ignore_list
def match_in_ignore_list( path, ignore_list ):
for r in ignore_list:
if re.match( r, path ):
return True
return False
class PTable( list ):
def __init__( self, *args ):
list.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
class PDict( dict ):
def __init__( self, *args ):
dict.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
class Console( threading.Thread ):
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
def __init__( self ):
threading.Thread.__init__( self )
self.buffers = {}
self.running = True
self.queue = multiprocessing.JoinableQueue( )
def write( self, data ):
self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) )
def flush( self ):
self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) )
def clear( self ):
self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) )
def __enter__( self ):
self.start( )
return self
def __exit__( self, type, value, tb ):
self.running = False
def run( self ):
# TODO: switch to a queue so we're not spinning and wasting a thread
self.running = True
while True:
data = self.queue.get( )
event = data[0]
if event == Console.MSG.SHUTDOWN:
# flush remaining buffers before shutting down
for ( pid, buffer ) in self.buffers.iteritems( ):
for line in buffer:
print( line )
break
elif event == Console.MSG.WRITE:
pid, s = data[ 1 : ]
if pid not in self.buffers:
self.buffers[ pid ] = []
self.buffers[ pid ].append( s )
elif event == Console.MSG.FLUSH:
pid = data[ 1 ]
if pid in self.buffers:
for line in self.buffers[ pid ]:
print( line )
elif event == Console.MSG.CLEAR:
pid = data[ 1 ]
if pid in self.buffers:
del self.buffers[ pid ]
class Worker( threading.Thread ):
def __init__( self, console, queue, files_to_ignore ):
threading.Thread.__init__( self )
self.console = console
self.queue = queue
self.files_to_ignore = files_to_ignore
def run( self ):
while True:
( cmd, data ) = self.queue.get( )
if cmd == MSG.SHUTDOWN:
self.console.write("SHUTDOWN")
self.queue.task_done( )
self.console.flush( )
break
if cmd != MSG.PARSE_DIRECTORY or data is None:
self.console.flush( )
self.queue.task_done( )
continue
directory = data
self.console.write( "Working on " + directory )
dir_contents = os.listdir( directory )
if p4_ignore in dir_contents:
file_regexes = []
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
path = os.path.join( directory, p4_ignore )
with open( path ) as f:
for line in f:
new_line = remove_comment( line.strip( ) )
if len( new_line ) > 0:
file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) )
self.console.write( "|Appending ignores from " + path )
with self.files_to_ignore.mutex:
if directory not in self.files_to_ignore:
self.files_to_ignore[ directory ] = []
self.files_to_ignore[ directory ].extend( file_regexes )
ignore_list = get_ignore_list( directory, self.files_to_ignore )
files = []
command = "p4 fstat *"
proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory )
(out, err) = proc.communicate()
for line in err.decode('utf-8').split( os.linesep ):
if len( line ) == 0:
continue
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
# # I could use regex to verify the expected string, but that will just slow us down.
# basename = os.path.basename( line )
i = line.rfind( ' - ')
if i >= 0:
basename = line[ : i ]
if basename == "*":
# Directory is empty, we could delete it now
continue
path = os.path.join( directory, basename )
if not os.path.isdir( path ):
files.append( basename )
for content in dir_contents:
path = os.path.join( directory, content )
if os.path.isdir( path ):
if match_in_ignore_list( path, ignore_list ):
self.console.write( "| Ignoring " + content )
else:
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
for file in files:
path = os.path.join( directory, file )
if match_in_ignore_list( path, ignore_list ):
self.console.write( "| Ignoring " + path )
continue
self.console.write( "| " + file + " is unversioned, removing it." )
os.chmod( path, stat.S_IWRITE )
os.remove( path )
self.console.write( "|Done." )
self.console.flush( )
self.queue.task_done( )
def main( args ):
# check requirements
if os.system( 'p4 > Nul' ) != 0:
print( 'Perforce Command-line Client(p4) is required for this script.' )
sys.exit( 1 )
#http://docs.python.org/library/optparse.html
parser = optparse.OptionParser( )
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=2 )
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
( options, args ) = parser.parse_args( )
root_full_path = os.getcwd( )
# Files are added from .p4ignore
# Key is the file root, the value is the table of file regexes for that directory.
files_to_ignore = PDict()
# make sure script doesn't delete itself
with files_to_ignore.mutex:
files_to_ignore[ root_full_path ] = [ re.compile( os.path.join( re.escape( root_full_path + os.sep ), os.path.basename( __file__ ) ) ) ]
# Setup threading
threads = []
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
queue = multiprocessing.JoinableQueue( )
with Console() as c:
for i in range( thread_count ):
t = Worker( c, queue, files_to_ignore )
threads.append( t )
t.start( )
if len( threads ) == 1:
print( "Spawned %s thread." % len( threads ) )
else:
print( "Spawned %s threads." % len( threads ) )
queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) )
queue.join( )
for i in range( thread_count ):
queue.put( ( MSG.SHUTDOWN, None ) )
print( os.linesep + "Removing empty directories...")
# remove empty directories in reverse order
for root, dirs, files in os.walk( root_full_path, topdown=False ):
ignore_list = get_ignore_list( root, files_to_ignore )
for d in dirs:
path = os.path.join( root, d )
if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash
print( "| ignoring " + d )
dirs.remove( d )
try:
os.rmdir(path)
print( "| " + d + " was removed." )
except OSError:
# Fails on non-empty directory
pass
print( "|Done." )
for t in threads:
t.join( )
if __name__ == "__main__":
try:
main( sys.argv )
except:
print( "Unexpected error!" )
traceback.print_exc( file = sys.stdout )
PressEnter()