#!/usr/bin/python # -*- coding: utf8 -*- # author : Brian Ernst # python_version : 2.7.6 and 3.4.0 # ================================= # todo: switch to `p4 fstat ...`, and parse the output for clientFile and cache it. # todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file. # todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output # todo: add option of using send2trash # todo: buffer output, after exceeding a certain amount print to the output. # todo: allow logging output besides console output, or redirection altogether import datetime, inspect, marshal, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, time, traceback # trying ntpath, need to test on linux import ntpath re_remove_comment = re.compile( "#.*$" ) def remove_comment( s ): return re.sub( re_remove_comment, "", s ) try: input = raw_input except: pass def enum(*sequential, **named): enums = dict(zip(sequential, range(len(sequential))), **named) return type('Enum', (), enums) MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY', 'RUN_FUNCTION') p4_ignore = ".p4ignore" main_pid = os.getpid( ) #if os.name == 'nt' or sys.platform == 'cygwin' def basename( path ): # TODO: import based on platform # https://docs.python.org/2/library/os.path.html # posixpath for UNIX-style paths # ntpath for Windows paths # macpath for old-style MacOS paths # os2emxpath for OS/2 EMX paths #return os.path.basename( path ) return ntpath.basename( path ) def normpath( path ): return ntpath.normpath( path ) def join( patha, pathb ): return ntpath.join( patha, pathb ) def splitdrive( path ): return ntpath.splitdrive( path ) def get_ignore_list( path, files_to_ignore ): # have to split path and test top directory dirs = path.split( os.sep ) ignore_list = [ ] for i, val in enumerate( dirs ): path_to_find = os.sep.join( dirs[ : i + 1] ) if path_to_find in files_to_ignore: ignore_list.extend( files_to_ignore[ path_to_find ] ) return ignore_list def match_in_ignore_list( path, ignore_list ): for r in ignore_list: if re.match( r, path ): return True return False def call_process( args ): return subprocess.call( args.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) def try_call_process( args, path=None ): try: subprocess.check_output( args.split( ), shell=False, cwd=path ) return 0 except subprocess.CalledProcessError: return 1 use_bytearray_str_conversion = type( b"str" ) is not str def get_str_from_process_stdout( line ): if use_bytearray_str_conversion: return ''.join( map( chr, line ) ) else: return line def singular_pulural( val, singular, plural ): return singular if val == 1 else plural def parse_info_from_command( args, value, path = None ): """ :rtype : string """ proc = subprocess.Popen( args.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path ) for line in proc.stdout: line = get_str_from_process_stdout( line ) if not line.startswith( value ): continue return line[ len( value ) : ].strip( ) return None def get_p4_py_results( args, path = None ): results = [] proc = subprocess.Popen( [ 'p4', '-G' ] + args.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path ) try: while True: output = marshal.load( proc.stdout ) results.append( output ) except EOFError: pass finally: proc.stdout.close() return results # Keep these in mind if you have issues: # https://stackoverflow.com/questions/16557908/getting-output-of-a-process-at-runtime # https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running def get_client_set( path ): files = set( [ ] ) make_drive_upper = True if os.name == 'nt' or sys.platform == 'cygwin' else False command = "p4 fstat ..." proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=path ) for line in proc.stdout: line = get_str_from_process_stdout( line ) clientFile_tag = "... clientFile " if not line.startswith( clientFile_tag ): continue local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) ) if make_drive_upper: drive, path = splitdrive( local_path ) local_path = ''.join( [ drive.upper( ), path ] ) files.add( local_path ) proc.wait( ) for line in proc.stderr: raise Exception(line) return files def get_client_root( ): """ :rtype : string """ command = "p4 info" proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) for line in proc.stdout: line = get_str_from_process_stdout( line ) clientFile_tag = "Client root: " if not line.startswith( clientFile_tag ): continue local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) ) return local_path return None class PTable( list ): def __init__( self, *args ): list.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) class PDict( dict ): def __init__( self, *args ): dict.__init__( self, args ) self.mutex = multiprocessing.Semaphore( ) # TODO: Create a child thread for triggering autoflush events class Console( threading.Thread ): MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' ) # auto_flush_time is time in milliseconds since last flush to trigger a flush when writing def __init__( self, auto_flush_num = None, auto_flush_time = None ): threading.Thread.__init__( self ) self.buffers = {} self.buffer_write_times = {} self.running = True self.queue = multiprocessing.JoinableQueue( ) self.auto_flush_num = auto_flush_num if auto_flush_num is not None else -1 self.auto_flush_time = auto_flush_time * 1000 if auto_flush_time is not None else -1 self.shutting_down = False def write( self, data, pid = None ): self.queue.put( ( Console.MSG.WRITE, pid if pid is not None else os.getpid(), data ) ) def writeflush( self, data, pid = None ): pid = pid if pid is not None else os.getpid() self.queue.put( ( Console.MSG.WRITE, pid, data ) ) self.queue.put( ( Console.MSG.FLUSH, pid ) ) def flush( self, pid = None ): self.queue.put( ( Console.MSG.FLUSH, pid if pid is not None else os.getpid() ) ) def clear( self, pid = None ): self.queue.put( ( Console.MSG.CLEAR, pid if pid is not None else os.getpid() ) ) def __enter__( self ): self.start( ) return self def __exit__( self, type, value, tb ): self.queue.put( ( Console.MSG.SHUTDOWN, ) ) self.queue.join( ) def run( self ): while True: data = self.queue.get( ) event = data[0] if event == Console.MSG.SHUTDOWN: # flush remaining buffers before shutting down for ( pid, buffer ) in self.buffers.items( ): for line in buffer: print( line ) self.buffers.clear( ) self.buffer_write_times.clear( ) self.queue.task_done( ) #print(self.queue.qsize()) #print(self.queue.empty()) break elif event == Console.MSG.WRITE: pid, s = data[ 1 : ] if pid not in self.buffers: self.buffers[ pid ] = [] if pid not in self.buffer_write_times: self.buffer_write_times[ pid ] = datetime.datetime.now( ) self.buffers[ pid ].append( s ) if self.auto_flush_num >= 0 and len( self.buffers[ pid ] ) >= self.auto_flush_num: self.flush( pid ) elif self.auto_flush_time >= 0 and ( datetime.datetime.now( ) - self.buffer_write_times[ pid ] ).microseconds >= self.auto_flush_time: self.flush( pid ) # TODO: if buffer is not empty and we don't auto flush on write, sleep until a time then auto flush according to auto_flush_time elif event == Console.MSG.FLUSH: pid = data[ 1 ] if pid in self.buffers: for line in self.buffers[ pid ]: print( line ) self.buffers.pop( pid, None ) self.buffer_write_times[ pid ] = datetime.datetime.now( ) elif event == Console.MSG.CLEAR: pid = data[ 1 ] if pid in self.buffers: self.buffers.pop( pid, None ) self.queue.task_done( ) # class Task( threading.Event ): # def __init__( data, cmd = None ): # threading.Event.__init__( self ) # self.cmd = cmd if cmd is None MSG.RUN_FUNCTION # self.data = data # def isDone( self ): # return self.isSet() # def join( self ): # self.wait( ) class Worker( threading.Thread ): def __init__( self, console, queue, files_to_ignore ): threading.Thread.__init__( self ) self.console = console self.queue = queue self.files_to_ignore = files_to_ignore def run( self ): while True: ( cmd, data ) = self.queue.get( ) if cmd == MSG.SHUTDOWN: self.console.flush( ) self.queue.task_done( ) break if cmd == MSG.RUN_FUNCTION: break if cmd != MSG.PARSE_DIRECTORY or data is None: self.console.flush( ) self.queue.task_done( ) continue directory = data # add threading stuffs self.queue.task_done( ) def main( args ): start = time.clock() # check requirements if call_process( 'p4 -V' ) != 0: print( 'Perforce Command-line Client(p4) is required for this script.' ) sys.exit( 1 ) #http://docs.python.org/library/optparse.html parser = optparse.OptionParser( ) parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None ) parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 ) parser.add_option( "-q", "--quiet", action="store_true", dest="quiet", help="This overrides verbose", default=False ) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True ) parser.add_option( "-i", "--interactive", action="store_true", dest="interactive", default=False ) ( options, args ) = parser.parse_args( args ) directory = normpath( options.directory if options.directory is not None else os.getcwd( ) ) # get user print("\nChecking p4 info...") result = get_p4_py_results('info') if len(result) == 0 or b'userName' not in result[0].keys(): print("Can't find perforce info, is it even setup?") sys.exit(1) username = get_str_from_process_stdout(result[0][b'userName']) client_host = get_str_from_process_stdout(result[0][b'clientHost']) print("|Done.") client_root = get_client_root() ldirectory = directory.lower() workspace_name = None if client_root is None or not ldirectory.startswith(client_root.lower()): print("\nCurrent directory not in client view, checking other workspaces for user '" + username + "' ...") workspace_name = parse_info_from_command('p4 info', 'Client name: ') # get user workspaces result = get_p4_py_results('workspaces -u ' + username) workspaces = [] for r in result: whost = get_str_from_process_stdout(r[b'Host']) if whost is not None and len(whost) != 0 and client_host != whost: continue workspace = {'root': get_str_from_process_stdout(r[b'Root']), 'name': get_str_from_process_stdout(r[b'client'])} workspaces.append(workspace) del result # check current directory against current workspace, see if it matches existing workspaces. for w in workspaces: wname = w['name'] wlower = w['root'].lower() if ldirectory.startswith(wlower): # set current directory, don't forget to revert it back to the existing one print("|Setting client view to: " + wname) if try_call_process( 'p4 set P4CLIENT=' + wname ): print("|There was a problem trying to set the p4 client view (workspace).") sys.exit(1) break else: print( "|Couldn't find a workspace root that matches the current directory for the current user." ) sys.exit(1) print("|Done.") # Files are added from .p4ignore # Key is the file root, the value is the table of file regexes for that directory. files_to_ignore = PDict() processed_file_count = 0 processed_directory_count = 0 remove_file_count = 0 remove_dir_count = 0 warning_count = 0 error_count = 0 with Console( auto_flush_num=20, auto_flush_time=1000 ) as c: if not options.quiet: c.writeflush( "\nCaching files in depot, this may take a little while..." ) # TODO: push this off to a thread and walk the directory so we get a headstart. files_in_depot = get_client_set( directory ) c.writeflush( "|Done." ) # TODO: push a os.walk request off to a thread to build a list of files in the directory; create batch based on directory? # TODO: at this point join on both tasks to wait until they're done # TODO: kick off file removal, make batches from the files for threads to work on since testing has to be done for each. # need to figure out the best way to do this since the ignore list needs to be properly built for each directory; # will at least need to redo how the ignore lists are handled for efficiencies sake. if not options.quiet: c.writeflush( "\nChecking " + directory) for root, dirs, files in os.walk( directory ): ignore_list = get_ignore_list( root, files_to_ignore ) if not options.quiet: c.write( "|Checking " + os.path.relpath( root, directory ) ) for d in dirs: processed_directory_count += 1 path = join( root, d ) rel_path = os.path.relpath( path, directory ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash if not options.quiet: c.write( "| ignoring " + rel_path ) dirs.remove( d ) for f in files: processed_file_count += 1 path = normpath( join( root, f ) ) if path not in files_in_depot: if not options.quiet: c.write( "| " + f + " is unversioned, removing it." ) try: os.chmod( path, stat.S_IWRITE ) os.remove( path ) remove_file_count += 1 except OSError as ex: c.writeflush( "| " + type( ex ).__name__ ) c.writeflush( "| " + repr( ex ) ) c.writeflush( "| ^ERROR^" ) error_count += 1 if not options.quiet: c.write( "|Done." ) if not options.quiet: c.write( os.linesep + "Removing empty directories...") # remove empty directories in reverse order for root, dirs, files in os.walk( directory, topdown=False ): ignore_list = get_ignore_list( root, files_to_ignore ) for d in dirs: processed_directory_count += 1 path = os.path.join( root, d ) rel_path = os.path.relpath( path, directory ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash if not options.quiet: c.write( "| ignoring " + rel_path ) dirs.remove( d ) try: os.rmdir(path) remove_dir_count += 1 if not options.quiet: c.write( "| " + rel_path + " was removed." ) except OSError: # Fails on non-empty directory pass if not options.quiet: c.write( "|Done." ) # This needs to happen automatically even when an exception happens, when we leave scope. if workspace_name is not None: c.write("\nReverting back to original client view...") # set workspace back to the original one if try_call_process( 'p4 set P4CLIENT=' + workspace_name ): error_count += 1 if not options.quiet: c.write("|There was a problem trying to restore the set p4 client view (workspace).") else: if not options.quiet: c.write("|Reverted client view back to '" + workspace_name + "'.") if not options.quiet: c.write("|Done.") if not options.quiet: output = "\nChecked " + str( processed_file_count ) + singular_pulural( processed_file_count, " file, ", " files, " ) output += str( processed_directory_count ) + singular_pulural( processed_directory_count, " directory", " directories") output += "\nRemoved " + str( remove_file_count ) + singular_pulural( remove_file_count, " file, ", " files, " ) output += str( remove_dir_count ) + singular_pulural( remove_dir_count, " directory", " directories") if warning_count > 0: output += " w/ " + str( warning_count ) + singular_pulural( warning_count, " warning", " warnings" ) if error_count > 0: output += " w/ " + str( error_count ) + singular_pulural( error_count, " error", " errors" ) end = time.clock() delta = end - start output += "\nFinished in " + str(delta) + "s" c.write( output ) if __name__ == "__main__": try: main( sys.argv ) except: print( "Unexpected error!" ) traceback.print_exc( file = sys.stdout )