From 266c5555baeec72ab0b076a6d54b81e7b4ac28e4 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 8 May 2014 18:43:05 -0600 Subject: [PATCH] Improved crawling speed, also cleaned up the output. So the speed of the script is much faster than before, though it actually still has much room for improvement, it will just be more complicated. Calling 'p4 fstat' on the entire directory will give you everything you need up front, it's just they're in depot paths, which makes thing a little annoying to parse when you have workspace mappings that move things around so the local path may differ from the depot path, and it becomes harder to determine 100% that you're referring to the same file. And I don't want to have to call p4 on every file to be sure of that, what I'm doing now is the easiest safest way to be sure of that, as far as I know. Another way to speed this up is to add thread crawlers, I'm just not yet sure with HDDs and SSDs how many threads is a good idea to use. --- p4RemoveUnversioned.py | 63 +++++++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/p4RemoveUnversioned.py b/p4RemoveUnversioned.py index 81f357f..fc53418 100644 --- a/p4RemoveUnversioned.py +++ b/p4RemoveUnversioned.py @@ -8,9 +8,6 @@ import inspect, os, re, subprocess, sys, traceback -modules_path = os.path.normpath( os.path.realpath( __file__ ) ) - - re_remove_comment = re.compile( "#.*$" ) def remove_comment( s ): return re.sub( re_remove_comment, "", s ) @@ -55,12 +52,17 @@ def main( ): return True return False - + root = "." + root_full_path = os.getcwd() p4_ignore = ".p4ignore" - for root, dirs, files in os.walk( '.' ): + # make sure script doesn't delete itself - print ("\nChecking '" + root + "' ...") + files_to_ignore[ root ] = [ re.compile( os.path.join( re.escape( root + os.sep ), os.path.basename( __file__ ) ) ) ] + + for root, dirs, files in os.walk( root ): + + print ( os.linesep + "Checking '" + root + "' ...") if p4_ignore in files: file_regexes = [] @@ -74,43 +76,72 @@ def main( ): if len( new_line ) > 0: file_regexes.append( re.compile( os.path.join( re.escape( root + os.sep ), new_line ) ) ) - print( " appending ignores from " + path ) + print( "|appending ignores from " + path ) files_to_ignore[ root ] = file_regexes ignore_list = get_ignore_list( root ) + #command = "p4 have \"" + root + os.sep + "*\"" + command = "p4 fstat *" + + print("|" + command) + + os.chdir( root ) + proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + (out, err) = proc.communicate() + os.chdir( root_full_path ) + + # For ease we're doing a weird solution, rebuilding the file list. This is so we only need to parse unadded files. + files = [] + + for line in err.split( os.linesep ): + if len(line) == 0: + continue + # # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path" + # # I could use regex to verify the expected string, but that will just slow us down. + # basename = os.path.basename( line ) + i = line.rfind( ' - ') + if i >= 0: + basename = line[ : i ] + path = os.path.join( root, basename ) + if not os.path.isdir( path ): + for file in files: + if file == basename: + files.append( file ) + break + for file in files: path = os.path.join( root, file ) if match_in_ignore_list( path, ignore_list ): - print( " ignoring " + path ) + print( "| ignoring " + path ) continue - proc = subprocess.Popen( "p4 fstat \"" + path + "\"", stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - (out, err) = proc.communicate() - - if len( err ) > 0: - print( " " + file + " is unversioned, removing it." ) - os.remove( path ) + print( "| " + file + " is unversioned, removing it." ) + os.remove( path ) dirs_copy = dirs for d in dirs_copy: path = os.path.join( root, d ) if match_in_ignore_list( path, ignore_list ): # add option of using send2trash - print( " ignoring " + d ) + print( "| ignoring " + d ) dirs.remove( d ) + print( "|Done." ) + + print( os.linesep + "Removing empty directories...") # remove empty directories for root, dirs, files in os.walk( '.', topdown=False ): for d in dirs: try: os.rmdir(d) - print( " " + d + " was removed." ) + print( "|" + d + " was removed." ) except OSError: # Fails on non-empty directory pass + print( "|Done." ) if __name__ == "__main__": try: