#!/usr/bin/python # -*- coding: utf8 -*- # author : Brian Ernst # python_version : 2.7.6 and 3.4.0 # ================================= # todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file. # todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output # todo: add option of using send2trash # todo: buffer output, after exceeding a certain amount print to the output. # todo: allow logging output besides console output, or redirection altogether import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback re_remove_comment = re.compile( "#.*$" ) def remove_comment( s ): return re.sub( re_remove_comment, "", s ) try: input = raw_input except: pass def enum(*sequential, **named): enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums) MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY') p4_ignore = ".p4ignore" main_pid = os.getpid( ) def PressEnter( ): print( "\nPress ENTER to continue..." ) s=input( "" ) def get_ignore_list( path, files_to_ignore ): # have to split path and test top directory dirs = path.split( os.sep ) ignore_list = [ ] for i, val in enumerate( dirs ): path_to_find = os.sep.join( dirs[ : i + 1] ) if path_to_find in files_to_ignore: ignore_list.extend( files_to_ignore[ path_to_find ] ) return ignore_list def match_in_ignore_list( path, ignore_list ): for r in ignore_list: if re.match( r, path ): return True return False class PTable( list ): def __init__( self, *args ): list.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) class PDict( dict ): def __init__( self, *args ): dict.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) class Console( threading.Thread ): MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' ) def __init__( self ): threading.Thread.__init__( self ) self.buffers = {} self.running = True self.queue = multiprocessing.JoinableQueue( ) def write( self, data ): self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) ) def flush( self ): self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) ) def clear( self ): self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) ) def __enter__( self ): self.start( ) return self def __exit__( self, type, value, tb ): self.running = False def run( self ): # TODO: switch to a queue so we're not spinning and wasting a thread self.running = True while True: data = self.queue.get( ) event = data[0] if event == Console.MSG.SHUTDOWN: # flush remaining buffers before shutting down for ( pid, buffer ) in self.buffers.iteritems( ): for line in buffer: print( line ) break elif event == Console.MSG.WRITE: pid, s = data[ 1 : ] if pid not in self.buffers: self.buffers[ pid ] = [] self.buffers[ pid ].append( s ) elif event == Console.MSG.FLUSH: pid = data[ 1 ] if pid in self.buffers: for line in self.buffers[ pid ]: print( line ) self.buffers.pop( pid, None ) elif event == Console.MSG.CLEAR: pid = data[ 1 ] if pid in self.buffers: self.buffers.pop( pid, None ) class Worker( threading.Thread ): def __init__( self, console, queue, files_to_ignore ): threading.Thread.__init__( self ) self.console = console self.queue = queue self.files_to_ignore = files_to_ignore def run( self ): while True: ( cmd, data ) = self.queue.get( ) if cmd == MSG.SHUTDOWN: self.queue.task_done( ) self.console.flush( ) break if cmd != MSG.PARSE_DIRECTORY or data is None: self.console.flush( ) self.queue.task_done( ) continue directory = data self.console.write( "Working on " + directory ) dir_contents = os.listdir( directory ) if p4_ignore in dir_contents: file_regexes = [] # Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted. path = os.path.join( directory, p4_ignore ) with open( path ) as f: for line in f: new_line = remove_comment( line.strip( ) ) if len( new_line ) > 0: file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) ) self.console.write( "|Appending ignores from " + path ) with self.files_to_ignore.mutex: if directory not in self.files_to_ignore: self.files_to_ignore[ directory ] = [] self.files_to_ignore[ directory ].extend( file_regexes ) ignore_list = get_ignore_list( directory, self.files_to_ignore ) files = [] command = "p4 fstat *" proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory ) (out, err) = proc.communicate() for line in err.decode('utf-8').split( os.linesep ): if len( line ) == 0: continue # # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path" # # I could use regex to verify the expected string, but that will just slow us down. # basename = os.path.basename( line ) i = line.rfind( ' - ') if i >= 0: basename = line[ : i ] if basename == "*": # Directory is empty, we could delete it now continue path = os.path.join( directory, basename ) if not os.path.isdir( path ): files.append( basename ) for content in dir_contents: path = os.path.join( directory, content ) if os.path.isdir( path ): if match_in_ignore_list( path, ignore_list ): self.console.write( "| Ignoring " + content ) else: self.queue.put( ( MSG.PARSE_DIRECTORY, path ) ) for file in files: path = os.path.join( directory, file ) if match_in_ignore_list( path, ignore_list ): self.console.write( "| Ignoring " + path ) continue self.console.write( "| " + file + " is unversioned, removing it." ) os.chmod( path, stat.S_IWRITE ) os.remove( path ) self.console.write( "|Done." ) self.console.flush( ) self.queue.task_done( ) def main( args ): # check requirements if os.system( 'p4 > Nul' ) != 0: print( 'Perforce Command-line Client(p4) is required for this script.' ) sys.exit( 1 ) #http://docs.python.org/library/optparse.html parser = optparse.OptionParser( ) parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None ) parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 ) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True ) ( options, args ) = parser.parse_args( ) root_full_path = os.getcwd( ) # Files are added from .p4ignore # Key is the file root, the value is the table of file regexes for that directory. files_to_ignore = PDict() # make sure script doesn't delete itself with files_to_ignore.mutex: files_to_ignore[ root_full_path ] = [ re.compile( os.path.join( re.escape( root_full_path + os.sep ), os.path.basename( __file__ ) ) ) ] # Setup threading threads = [] thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads queue = multiprocessing.JoinableQueue( ) with Console() as c: for i in range( thread_count ): t = Worker( c, queue, files_to_ignore ) threads.append( t ) t.start( ) if len( threads ) == 1: print( "Spawned %s thread." % len( threads ) ) else: print( "Spawned %s threads." % len( threads ) ) queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) ) queue.join( ) for i in range( thread_count ): queue.put( ( MSG.SHUTDOWN, None ) ) print( os.linesep + "Removing empty directories...") # remove empty directories in reverse order for root, dirs, files in os.walk( root_full_path, topdown=False ): ignore_list = get_ignore_list( root, files_to_ignore ) for d in dirs: path = os.path.join( root, d ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash print( "| ignoring " + d ) dirs.remove( d ) try: os.rmdir(path) print( "| " + d + " was removed." ) except OSError: # Fails on non-empty directory pass print( "|Done." ) for t in threads: t.join( ) if __name__ == "__main__": try: main( sys.argv ) except: print( "Unexpected error!" ) traceback.print_exc( file = sys.stdout ) PressEnter()