Working on threaded support.

This commit is contained in:
Brian 2014-05-08 21:05:55 -06:00
parent 1f4b52e3a9
commit 2b14c4a273
1 changed files with 181 additions and 96 deletions

View File

@ -1,14 +1,16 @@
#!/usr/bin/python #!/usr/bin/python
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
# author : Brian Ernst # author : Brian Ernst
# python_version : 2.7.6 # python_version : 2.7.6 and 3.4.0
# ================================= # =================================
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file. # todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output # todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
# todo: add option of using send2trash # todo: add option of using send2trash
# todo: buffer output, after exceeding a certain amount print to the output.
# todo: allow logging output besides console output, or redirection altogether
import inspect, os, re, stat, subprocess, sys, traceback import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
re_remove_comment = re.compile( "#.*$" ) re_remove_comment = re.compile( "#.*$" )
@ -19,21 +21,20 @@ def remove_comment( s ):
try: input = raw_input try: input = raw_input
except: pass except: pass
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
p4_ignore = ".p4ignore"
def PressEnter( ): def PressEnter( ):
print( "\nPress ENTER to continue..." ) print( "\nPress ENTER to continue..." )
s=input( "" ) s=input( "" )
def main( ): def get_ignore_list( path, files_to_ignore ):
# check requirement
if os.system( 'p4 > Nul' ) != 0:
print( 'Perforce Command-line Client(p4) is required for this script.' )
sys.exit( 1 )
# Files are added from .p4ignore
# Key is the file root, the value is the table of file regexes for that directory.
files_to_ignore = {}
def get_ignore_list( path ):
# have to split path and test top directory # have to split path and test top directory
dirs = path.split( os.sep ) dirs = path.split( os.sep )
@ -43,7 +44,7 @@ def main( ):
path_to_find = os.sep.join( dirs[ : i + 1] ) path_to_find = os.sep.join( dirs[ : i + 1] )
if path_to_find in files_to_ignore: if path_to_find in files_to_ignore:
ignore_list = ignore_list + files_to_ignore[ path_to_find ] ignore_list.extend( files_to_ignore[ path_to_find ] )
return ignore_list return ignore_list
@ -53,52 +54,80 @@ def main( ):
return True return True
return False return False
root_path = "." class Console:
root_full_path = os.getcwd( ) def __init__( self ):
p4_ignore = ".p4ignore" self.mutex = multiprocessing.Semaphore( )
def Write( self, data ):
with self.mutex:
print( data )
# make sure script doesn't delete itself class PTable( list ):
def __init__( self, *args ):
list.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ] class PDict( dict ):
def __init__( self, *args ):
dict.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
for root, dirs, files in os.walk( root_path ):
print ( os.linesep + "Checking '" + root + "' ...") class Worker( threading.Thread ):
def __init__( self, queue, files_to_ignore ):
threading.Thread.__init__( self )
if p4_ignore in files: self.queue = queue
self.files_to_ignore = files_to_ignore
def run( self ):
while True:
( cmd, data ) = self.queue.get( )
if cmd == MSG.SHUTDOWN:
break
if cmd != MSG.PARSE_DIRECTORY or data is None:
self.queue.task_done( )
continue
directory = data
current_directory = os.getcwd( )
dir_contents = os.listdir( directory )
if p4_ignore in dir_contents:
file_regexes = [] file_regexes = []
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted. # Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
path = os.path.join( current_directory, p4_ignore )
path = os.path.join( root, p4_ignore )
with open( path ) as f: with open( path ) as f:
for line in f: for line in f:
new_line = remove_comment( line.strip( ) ) new_line = remove_comment( line.strip( ) )
if len( new_line ) > 0: if len( new_line ) > 0:
file_regexes.append( re.compile( os.path.join( re.escape( root + os.sep ), new_line ) ) ) file_regexes.append( re.compile( os.path.join( re.escape( current_directory + os.sep ), new_line ) ) )
print( "|Appending ignores from " + path ) print( "|Appending ignores from " + path )
files_to_ignore[ root ] = files_to_ignore[ root ] + file_regexes with self.files_to_ignore.mutex:
if current_directory not in self.files_to_ignore:
self.files_to_ignore[ current_directory ] = []
self.files_to_ignore[ current_directory ].extend( file_regexes )
ignore_list = get_ignore_list( root ) ignore_list = get_ignore_list( current_directory, self.files_to_ignore )
#command = "p4 have \"" + root + os.sep + "*\""
files = []
command = "p4 fstat *" command = "p4 fstat *"
print("|" + command) os.chdir( directory )
os.chdir( root )
proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
(out, err) = proc.communicate() (out, err) = proc.communicate()
os.chdir( root_full_path ) os.chdir( current_directory )
# For ease we're doing a weird solution, rebuilding the file list. This is so we only need to parse unadded files. for line in err.decode('utf-8').split( os.linesep ):
files = []
for line in err.split( os.linesep ):
if len( line ) == 0: if len( line ) == 0:
continue continue
print(line)
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path" # # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
# # I could use regex to verify the expected string, but that will just slow us down. # # I could use regex to verify the expected string, but that will just slow us down.
# basename = os.path.basename( line ) # basename = os.path.basename( line )
@ -108,13 +137,21 @@ def main( ):
if basename == "*": if basename == "*":
# Directory is empty, we could delete it now # Directory is empty, we could delete it now
continue continue
path = os.path.join( root, basename ) path = os.path.join( current_directory, basename )
if not os.path.isdir( path ): if not os.path.isdir( path ):
files.append( basename ) files.append( basename )
for content in dir_contents:
if os.path.isdir( content ):
path = os.path.join( current_directory, content )
if match_in_ignore_list( path, ignore_list ):
print( "| Ignoring " + content )
else:
self.queue.put( ( MSG.PARSE_DIRECTORY, content ) )
for file in files: for file in files:
path = os.path.join( root, file ) path = os.path.join( current_directory, file )
if match_in_ignore_list( path, ignore_list ): if match_in_ignore_list( path, ignore_list ):
print( "| Ignoring " + path ) print( "| Ignoring " + path )
@ -124,21 +161,66 @@ def main( ):
os.chmod( path, stat.S_IWRITE ) os.chmod( path, stat.S_IWRITE )
os.remove( path ) os.remove( path )
dirs_copy = dirs
for d in dirs_copy:
path = os.path.join( root, d )
if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash
print( "| Ignoring " + d )
dirs.remove( d )
print( "|Done." ) print( "|Done." )
self.queue.task_done( )
def main( args ):
# check requirements
if os.system( 'p4 > Nul' ) != 0:
print( 'Perforce Command-line Client(p4) is required for this script.' )
sys.exit( 1 )
#http://docs.python.org/library/optparse.html
parser = optparse.OptionParser( )
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=10 )
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
( options, args ) = parser.parse_args( )
root_path = "."
root_full_path = os.getcwd( )
# Files are added from .p4ignore
# Key is the file root, the value is the table of file regexes for that directory.
files_to_ignore = PDict()
# make sure script doesn't delete itself
with files_to_ignore.mutex:
files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ]
# Setup threading
threads = []
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
queue = multiprocessing.JoinableQueue( )
for i in range( thread_count ):
t = Worker( queue, files_to_ignore )
threads.append( t )
t.start( )
if len( threads ) == 1:
print( "Spawned %s thread." % len( threads ) )
else:
print( "Spawned %s threads." % len( threads ) )
queue.put( ( MSG.PARSE_DIRECTORY, "." ) )
queue.join( )
for i in range( thread_count ):
queue.put( ( MSG.SHUTDOWN, None ) )
print( os.linesep + "Removing empty directories...") print( os.linesep + "Removing empty directories...")
# remove empty directories # remove empty directories in reverse order
for root, dirs, files in os.walk( root_path, topdown=False ): for root, dirs, files in os.walk( root_path, topdown=False ):
ignore_list = get_ignore_list( root, files_to_ignore )
for d in dirs: for d in dirs:
path = os.path.join( root, d ) path = os.path.join( root, d )
if match_in_ignore_list( path, ignore_list ): if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash # add option of using send2trash
print( "| ignoring " + d ) print( "| ignoring " + d )
@ -151,9 +233,12 @@ def main( ):
pass pass
print( "|Done." ) print( "|Done." )
for t in threads:
t.join( )
if __name__ == "__main__": if __name__ == "__main__":
try: try:
main( ) main( sys.argv )
except: except:
print( "Unexpected error!" ) print( "Unexpected error!" )
traceback.print_exc( file = sys.stdout ) traceback.print_exc( file = sys.stdout )