#!/usr/bin/python # -*- coding: utf8 -*- # author : Brian Ernst # python_version : 2.7.6 and 3.4.0 # ================================= # todo: switch to `p4 fstat ...`, and parse the output for clientFile and cache it. # todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file. # todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output # todo: add option of using send2trash # todo: buffer output, after exceeding a certain amount print to the output. # todo: allow logging output besides console output, or redirection altogether import datetime, inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback # trying ntpath, need to test on linux import ntpath re_remove_comment = re.compile( "#.*$" ) def remove_comment( s ): return re.sub( re_remove_comment, "", s ) try: input = raw_input except: pass def enum(*sequential, **named): enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums) MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY') p4_ignore = ".p4ignore" main_pid = os.getpid( ) #if os.name == 'nt' or sys.platform == 'cygwin' def basename( path ): # TODO: import based on platform # https://docs.python.org/2/library/os.path.html # posixpath for UNIX-style paths # ntpath for Windows paths # macpath for old-style MacOS paths # os2emxpath for OS/2 EMX paths #return os.path.basename( path ) return ntpath.basename( path ) def normpath( path ): return ntpath.normpath( path ) def join( patha, pathb ): return ntpath.join( patha, pathb ) def splitdrive( path ): return ntpath.splitdrive( path ) def get_ignore_list( path, files_to_ignore ): # have to split path and test top directory dirs = path.split( os.sep ) ignore_list = [ ] for i, val in enumerate( dirs ): path_to_find = os.sep.join( dirs[ : i + 1] ) if path_to_find in files_to_ignore: ignore_list.extend( files_to_ignore[ path_to_find ] ) return ignore_list def match_in_ignore_list( path, ignore_list ): for r in ignore_list: if re.match( r, path ): return True return False def call_process( args ): return subprocess.call( args.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) use_bytearray_str_conversion = type( b"str" ) is not str def get_str_from_process_stdout( line ): if use_bytearray_str_conversion: return ''.join( map( chr, line ) ) else: return line def singular_pulural( val, singular, plural ): return singular if val == 1 else plural # Keep these in mind if you have issues: # https://stackoverflow.com/questions/16557908/getting-output-of-a-process-at-runtime # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running def get_client_set( path ): files = set( [ ] ) make_drive_upper = True if os.name == 'nt' or sys.platform == 'cygwin' else False command = "p4 fstat ..." proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path ) for line in proc.stdout: line = get_str_from_process_stdout( line ) clientFile_tag = "... clientFile " if not line.startswith( clientFile_tag ): continue local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) ) if make_drive_upper: drive, path = splitdrive( local_path ) local_path = ''.join( [ drive.upper( ), path ] ) files.add( local_path ) # TODO: check error to see if the path is not in the client view. Prompt anyway? return files class PTable( list ): def __init__( self, *args ): list.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) class PDict( dict ): def __init__( self, *args ): dict.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) # TODO: Create a child thread for triggering autoflush events class Console( threading.Thread ): MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' ) # auto_flush_time is time in milliseconds since last flush to trigger a flush when writing def __init__( self, auto_flush_num = None, auto_flush_time = None ): threading.Thread.__init__( self ) self.buffers = {} self.buffer_write_times = {} self.running = True self.queue = multiprocessing.JoinableQueue( ) self.auto_flush_num = auto_flush_num if auto_flush_num is not None else -1 self.auto_flush_time = auto_flush_time * 1000 if auto_flush_time is not None else -1 self.shutting_down = False def write( self, data, pid = None ): self.queue.put( ( Console.MSG.WRITE, pid if pid is not None else os.getpid(), data ) ) def writeflush( self, data, pid = None ): pid = pid if pid is not None else os.getpid() self.queue.put( ( Console.MSG.WRITE, pid, data ) ) self.queue.put( ( Console.MSG.FLUSH, pid ) ) def flush( self, pid = None ): self.queue.put( ( Console.MSG.FLUSH, pid if pid is not None else os.getpid() ) ) def clear( self, pid = None ): self.queue.put( ( Console.MSG.CLEAR, pid if pid is not None else os.getpid() ) ) def __enter__( self ): self.start( ) return self def __exit__( self, type, value, tb ): self.queue.put( ( Console.MSG.SHUTDOWN, ) ) self.queue.join( ) def run( self ): while True: data = self.queue.get( ) event = data[0] if event == Console.MSG.SHUTDOWN: # flush remaining buffers before shutting down for ( pid, buffer ) in self.buffers.items( ): for line in buffer: print( line ) self.buffers.clear( ) self.buffer_write_times.clear( ) self.queue.task_done( ) print(self.queue.qsize()) print(self.queue.empty()) break elif event == Console.MSG.WRITE: pid, s = data[ 1 : ] if pid not in self.buffers: self.buffers[ pid ] = [] if pid not in self.buffer_write_times: self.buffer_write_times[ pid ] = datetime.datetime.now( ) self.buffers[ pid ].append( s ) if self.auto_flush_num >= 0 and len( self.buffers[ pid ] ) >= self.auto_flush_num: self.flush( pid ) elif self.auto_flush_time >= 0 and ( datetime.datetime.now( ) - self.buffer_write_times[ pid ] ).microseconds >= self.auto_flush_time: self.flush( pid ) # TODO: if buffer is not empty and we don't auto flush on write, sleep until a time then auto flush according to auto_flush_time elif event == Console.MSG.FLUSH: pid = data[ 1 ] if pid in self.buffers: for line in self.buffers[ pid ]: print( line ) self.buffers.pop( pid, None ) self.buffer_write_times[ pid ] = datetime.datetime.now( ) elif event == Console.MSG.CLEAR: pid = data[ 1 ] if pid in self.buffers: self.buffers.pop( pid, None ) self.queue.task_done( ) def main( args ): # check requirements if call_process( 'p4 -V' ) != 0: print( 'Perforce Command-line Client(p4) is required for this script.' ) sys.exit( 1 ) #http://docs.python.org/library/optparse.html parser = optparse.OptionParser( ) parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None ) parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 ) parser.add_option( "-q", "--quiet", action="store_true", dest="quiet", help="This overrides verbose", default=False ) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True ) ( options, args ) = parser.parse_args( ) directory = normpath( options.directory if options.directory is not None else os.getcwd( ) ) # Files are added from .p4ignore # Key is the file root, the value is the table of file regexes for that directory. files_to_ignore = PDict() remove_file_count = 0 remove_dir_count = 0 warning_count = 0 error_count = 0 with Console( auto_flush_num=20, auto_flush_time=1000 ) as c: if not options.quiet: c.writeflush( "Caching files in depot, this may take a little while..." ) files_in_depot = get_client_set( directory ) if not options.quiet: c.writeflush( "Checking " + directory) for root, dirs, files in os.walk( directory ): ignore_list = get_ignore_list( root, files_to_ignore ) if not options.quiet: c.write( "|Checking " + root ) for d in dirs: path = join( root, d ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash if not options.quiet: c.write( "| ignoring " + d ) dirs.remove( d ) for f in files: path = normpath( join( root, f ) ) if path not in files_in_depot: if not options.quiet: c.write( "| " + f + " is unversioned, removing it." ) try: os.chmod( path, stat.S_IWRITE ) os.remove( path ) remove_file_count += 1 except OSError as ex: c.writeflush( "| " + type( ex ).__name__ ) c.writeflush( "| " + repr( ex ) ) c.writeflush( "| ^ERROR^" ) error_count += 1 if not options.quiet: c.write( "|Done." ) if not options.quiet: c.write( os.linesep + "Removing empty directories...") # remove empty directories in reverse order for root, dirs, files in os.walk( directory, topdown=False ): ignore_list = get_ignore_list( root, files_to_ignore ) for d in dirs: path = os.path.join( root, d ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash if not options.quiet: c.write( "| ignoring " + d ) dirs.remove( d ) try: os.rmdir(path) remove_dir_count += 1 if not options.quiet: c.write( "| " + d + " was removed." ) except OSError: # Fails on non-empty directory pass if not options.quiet: c.write( "|Done." ) if not options.quiet: output = "\nRemoved " + str( remove_file_count ) + singular_pulural( remove_file_count, " file, ", " files, " ) output += str( remove_dir_count ) + singular_pulural( remove_dir_count, " directory", " directories") if warning_count > 0: output += " w/ " + str( warning_count ) + singular_pulural( warning_count, " warning", " warnings" ) if error_count > 0: output += " w/ " + str( error_count ) + singular_pulural( error_count, " error", " errors" ) c.write( output + "." ) if __name__ == "__main__": try: main( sys.argv ) except: print( "Unexpected error!" ) traceback.print_exc( file = sys.stdout )