From 2b14c4a273fa47538f131eae532a2af364029803 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 8 May 2014 21:05:55 -0600 Subject: [PATCH] Working on threaded support. --- p4RemoveUnversioned.py | 277 +++++++++++++++++++++++++++-------------- 1 file changed, 181 insertions(+), 96 deletions(-) diff --git a/p4RemoveUnversioned.py b/p4RemoveUnversioned.py index a55963e..06c6bf5 100644 --- a/p4RemoveUnversioned.py +++ b/p4RemoveUnversioned.py @@ -1,14 +1,16 @@ #!/usr/bin/python # -*- coding: utf8 -*- # author : Brian Ernst -# python_version : 2.7.6 +# python_version : 2.7.6 and 3.4.0 # ================================= # todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file. # todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output # todo: add option of using send2trash +# todo: buffer output, after exceeding a certain amount print to the output. +# todo: allow logging output besides console output, or redirection altogether -import inspect, os, re, stat, subprocess, sys, traceback +import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback re_remove_comment = re.compile( "#.*$" ) @@ -19,126 +21,206 @@ def remove_comment( s ): try: input = raw_input except: pass +def enum(*sequential, **named): + enums = dict(zip(sequential, range(len(sequential))), **named) + return type('Enum', (), enums) + +MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY') + +p4_ignore = ".p4ignore" + + def PressEnter( ): print( "\nPress ENTER to continue..." ) s=input( "" ) -def main( ): - # check requirement +def get_ignore_list( path, files_to_ignore ): + # have to split path and test top directory + dirs = path.split( os.sep ) + + ignore_list = [ ] + + for i, val in enumerate( dirs ): + path_to_find = os.sep.join( dirs[ : i + 1] ) + + if path_to_find in files_to_ignore: + ignore_list.extend( files_to_ignore[ path_to_find ] ) + + return ignore_list + +def match_in_ignore_list( path, ignore_list ): + for r in ignore_list: + if re.match( r, path ): + return True + return False + +class Console: + def __init__( self ): + self.mutex = multiprocessing.Semaphore( ) + def Write( self, data ): + with self.mutex: + print( data ) + +class PTable( list ): + def __init__( self, *args ): + list.__init__( self, args ) + self.mutex = multiprocessing.Semaphore( ) + +class PDict( dict ): + def __init__( self, *args ): + dict.__init__( self, args ) + self.mutex = multiprocessing.Semaphore( ) + + +class Worker( threading.Thread ): + def __init__( self, queue, files_to_ignore ): + threading.Thread.__init__( self ) + + self.queue = queue + self.files_to_ignore = files_to_ignore + + def run( self ): + while True: + ( cmd, data ) = self.queue.get( ) + + if cmd == MSG.SHUTDOWN: + break + + if cmd != MSG.PARSE_DIRECTORY or data is None: + self.queue.task_done( ) + continue + + directory = data + + current_directory = os.getcwd( ) + + dir_contents = os.listdir( directory ) + + if p4_ignore in dir_contents: + file_regexes = [] + # Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted. + path = os.path.join( current_directory, p4_ignore ) + with open( path ) as f: + for line in f: + new_line = remove_comment( line.strip( ) ) + if len( new_line ) > 0: + file_regexes.append( re.compile( os.path.join( re.escape( current_directory + os.sep ), new_line ) ) ) + + print( "|Appending ignores from " + path ) + with self.files_to_ignore.mutex: + if current_directory not in self.files_to_ignore: + self.files_to_ignore[ current_directory ] = [] + self.files_to_ignore[ current_directory ].extend( file_regexes ) + + + ignore_list = get_ignore_list( current_directory, self.files_to_ignore ) + + + files = [] + command = "p4 fstat *" + + os.chdir( directory ) + proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + (out, err) = proc.communicate() + os.chdir( current_directory ) + + for line in err.decode('utf-8').split( os.linesep ): + if len( line ) == 0: + continue + print(line) + # # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path" + # # I could use regex to verify the expected string, but that will just slow us down. + # basename = os.path.basename( line ) + i = line.rfind( ' - ') + if i >= 0: + basename = line[ : i ] + if basename == "*": + # Directory is empty, we could delete it now + continue + path = os.path.join( current_directory, basename ) + + if not os.path.isdir( path ): + files.append( basename ) + + for content in dir_contents: + if os.path.isdir( content ): + path = os.path.join( current_directory, content ) + if match_in_ignore_list( path, ignore_list ): + print( "| Ignoring " + content ) + else: + self.queue.put( ( MSG.PARSE_DIRECTORY, content ) ) + + for file in files: + path = os.path.join( current_directory, file ) + + if match_in_ignore_list( path, ignore_list ): + print( "| Ignoring " + path ) + continue + + print( "| " + file + " is unversioned, removing it." ) + os.chmod( path, stat.S_IWRITE ) + os.remove( path ) + + print( "|Done." ) + + self.queue.task_done( ) + +def main( args ): + # check requirements if os.system( 'p4 > Nul' ) != 0: print( 'Perforce Command-line Client(p4) is required for this script.' ) sys.exit( 1 ) - # Files are added from .p4ignore - # Key is the file root, the value is the table of file regexes for that directory. - files_to_ignore = {} + #http://docs.python.org/library/optparse.html + parser = optparse.OptionParser( ) - def get_ignore_list( path ): - # have to split path and test top directory - dirs = path.split( os.sep ) + parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=10 ) + parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True ) - ignore_list = [ ] + ( options, args ) = parser.parse_args( ) - for i, val in enumerate( dirs ): - path_to_find = os.sep.join( dirs[ : i + 1] ) - - if path_to_find in files_to_ignore: - ignore_list = ignore_list + files_to_ignore[ path_to_find ] - - return ignore_list - - def match_in_ignore_list( path, ignore_list ): - for r in ignore_list: - if re.match( r, path ): - return True - return False root_path = "." root_full_path = os.getcwd( ) - p4_ignore = ".p4ignore" + + # Files are added from .p4ignore + # Key is the file root, the value is the table of file regexes for that directory. + files_to_ignore = PDict() # make sure script doesn't delete itself + with files_to_ignore.mutex: + files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ] - files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ] + # Setup threading + threads = [] + thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads - for root, dirs, files in os.walk( root_path ): + queue = multiprocessing.JoinableQueue( ) - print ( os.linesep + "Checking '" + root + "' ...") + for i in range( thread_count ): + t = Worker( queue, files_to_ignore ) + threads.append( t ) + t.start( ) - if p4_ignore in files: - file_regexes = [] + if len( threads ) == 1: + print( "Spawned %s thread." % len( threads ) ) + else: + print( "Spawned %s threads." % len( threads ) ) - # Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted. + queue.put( ( MSG.PARSE_DIRECTORY, "." ) ) + queue.join( ) - path = os.path.join( root, p4_ignore ) - with open( path ) as f: - for line in f: - new_line = remove_comment( line.strip( ) ) - if len( new_line ) > 0: - file_regexes.append( re.compile( os.path.join( re.escape( root + os.sep ), new_line ) ) ) - - print( "|Appending ignores from " + path ) - files_to_ignore[ root ] = files_to_ignore[ root ] + file_regexes - - - ignore_list = get_ignore_list( root ) - - #command = "p4 have \"" + root + os.sep + "*\"" - command = "p4 fstat *" - - print("|" + command) - - os.chdir( root ) - proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - (out, err) = proc.communicate() - os.chdir( root_full_path ) - - # For ease we're doing a weird solution, rebuilding the file list. This is so we only need to parse unadded files. - files = [] - - for line in err.split( os.linesep ): - if len(line) == 0: - continue - # # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path" - # # I could use regex to verify the expected string, but that will just slow us down. - # basename = os.path.basename( line ) - i = line.rfind( ' - ') - if i >= 0: - basename = line[ : i ] - if basename == "*": - # Directory is empty, we could delete it now - continue - path = os.path.join( root, basename ) - - if not os.path.isdir( path ): - files.append( basename ) - - for file in files: - path = os.path.join( root, file ) - - if match_in_ignore_list( path, ignore_list ): - print( "| Ignoring " + path ) - continue - - print( "| " + file + " is unversioned, removing it." ) - os.chmod( path, stat.S_IWRITE ) - os.remove( path ) - - dirs_copy = dirs - for d in dirs_copy: - path = os.path.join( root, d ) - if match_in_ignore_list( path, ignore_list ): - # add option of using send2trash - print( "| Ignoring " + d ) - dirs.remove( d ) - - print( "|Done." ) + for i in range( thread_count ): + queue.put( ( MSG.SHUTDOWN, None ) ) print( os.linesep + "Removing empty directories...") - # remove empty directories + # remove empty directories in reverse order for root, dirs, files in os.walk( root_path, topdown=False ): + ignore_list = get_ignore_list( root, files_to_ignore ) + for d in dirs: path = os.path.join( root, d ) + if match_in_ignore_list( path, ignore_list ): # add option of using send2trash print( "| ignoring " + d ) @@ -151,9 +233,12 @@ def main( ): pass print( "|Done." ) + for t in threads: + t.join( ) + if __name__ == "__main__": try: - main( ) + main( sys.argv ) except: print( "Unexpected error!" ) traceback.print_exc( file = sys.stdout )