Grabs depot tree first hand to make looping through directory faster.

The big catch right now is, this method is single threaded, I haven't
made it multi-threaded yet, but it definitely looks like it can benefit
from it.
This commit is contained in:
Brian 2014-05-09 17:19:44 -06:00
parent 8d425d6413
commit c175b21dcf
1 changed files with 92 additions and 60 deletions

View File

@ -11,7 +11,7 @@
# todo: buffer output, after exceeding a certain amount print to the output. # todo: buffer output, after exceeding a certain amount print to the output.
# todo: allow logging output besides console output, or redirection altogether # todo: allow logging output besides console output, or redirection altogether
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback import inspect, multiprocessing, optparse, os, platform, re, stat, subprocess, sys, threading, traceback
# trying ntpath, need to test on linux # trying ntpath, need to test on linux
import ntpath import ntpath
@ -36,10 +36,27 @@ p4_ignore = ".p4ignore"
main_pid = os.getpid( ) main_pid = os.getpid( )
#if os.name == 'nt' or sys.platform == 'cygwin'
def basename( path ): def basename( path ):
# TODO: import based on platform
# https://docs.python.org/2/library/os.path.html
# posixpath for UNIX-style paths
# ntpath for Windows paths
# macpath for old-style MacOS paths
# os2emxpath for OS/2 EMX paths
#return os.path.basename( path ) #return os.path.basename( path )
return ntpath.basename( path ) return ntpath.basename( path )
def normpath( path ):
return ntpath.normpath( path )
def join( patha, pathb ):
return ntpath.join( patha, pathb )
def splitdrive( path ):
return ntpath.splitdrive( path )
def get_ignore_list( path, files_to_ignore ): def get_ignore_list( path, files_to_ignore ):
# have to split path and test top directory # have to split path and test top directory
dirs = path.split( os.sep ) dirs = path.split( os.sep )
@ -60,6 +77,31 @@ def match_in_ignore_list( path, ignore_list ):
return True return True
return False return False
# Keep these in mind if you have issues:
# https://stackoverflow.com/questions/16557908/getting-output-of-a-process-at-runtime
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
def get_client_set( path ):
files = set( [ ] )
make_drive_upper = True if os.name == 'nt' or sys.platform == 'cygwin' else False
command = "p4 fstat ..."
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=None, cwd=path )
for line in proc.stdout:
clientFile_tag = "... clientFile "
if not line.startswith( clientFile_tag ):
continue
local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) )
if make_drive_upper:
drive, path = splitdrive( local_path )
local_path = ''.join( [ drive.upper( ), path ] )
files.add( local_path )
return files
class PTable( list ): class PTable( list ):
def __init__( self, *args ): def __init__( self, *args ):
list.__init__( self, args ) list.__init__( self, args )
@ -73,20 +115,27 @@ class PDict( dict ):
class Console( threading.Thread ): class Console( threading.Thread ):
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' ) MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
def __init__( self ): def __init__( self, auto_flush_num = None, auto_flush_time = None ):
threading.Thread.__init__( self ) threading.Thread.__init__( self )
self.buffers = {} self.buffers = {}
self.running = True self.running = True
self.queue = multiprocessing.JoinableQueue( ) self.queue = multiprocessing.JoinableQueue( )
self.auto_flush_num = auto_flush_num if auto_flush_num is not None else -1
self.auto_flush_time = auto_flush_time if auto_flush_time is not None else -1
def write( self, data ): def write( self, data, pid = None ):
self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) ) self.queue.put( ( Console.MSG.WRITE, pid if pid is not None else os.getpid(), data ) )
def flush( self ): def writeflush( self, data, pid = None ):
self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) ) pid = pid if pid is not None else os.getpid()
self.queue.put( ( Console.MSG.WRITE, pid, data ) )
self.queue.put( ( Console.MSG.FLUSH, pid ) )
def clear( self ): def flush( self, pid = None ):
self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) ) self.queue.put( ( Console.MSG.FLUSH, pid if pid is not None else os.getpid() ) )
def clear( self, pid = None ):
self.queue.put( ( Console.MSG.CLEAR, pid if pid is not None else os.getpid() ) )
def __enter__( self ): def __enter__( self ):
self.start( ) self.start( )
@ -103,7 +152,7 @@ class Console( threading.Thread ):
if event == Console.MSG.SHUTDOWN: if event == Console.MSG.SHUTDOWN:
# flush remaining buffers before shutting down # flush remaining buffers before shutting down
for ( pid, buffer ) in self.buffers.iteritems( ): for ( pid, buffer ) in self.buffers.items( ):
for line in buffer: for line in buffer:
print( line ) print( line )
self.buffers.clear( ) self.buffers.clear( )
@ -116,6 +165,9 @@ class Console( threading.Thread ):
if pid not in self.buffers: if pid not in self.buffers:
self.buffers[ pid ] = [] self.buffers[ pid ] = []
self.buffers[ pid ].append( s ) self.buffers[ pid ].append( s )
if self.auto_flush_num >= 0 and len( self.buffers[ pid ] ) > self.auto_flush_num:
self.flush( pid )
elif event == Console.MSG.FLUSH: elif event == Console.MSG.FLUSH:
pid = data[ 1 ] pid = data[ 1 ]
if pid in self.buffers: if pid in self.buffers:
@ -170,12 +222,13 @@ class Worker( threading.Thread ):
if p4_ignore in dir_contents: if p4_ignore in dir_contents:
file_regexes = [] file_regexes = []
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted. # Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
path = os.path.join( directory, p4_ignore ) path = join( directory, p4_ignore )
with open( path ) as f: with open( path ) as f:
for line in f: for line in f:
new_line = remove_comment( line.strip( ) ) new_line = remove_comment( line.strip( ) )
if len( new_line ) > 0: if len( new_line ) > 0:
file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) ) # doesn't look quite right, fix it:
file_regexes.append( re.compile( join( re.escape( directory + os.sep ), new_line ) ) )
self.console.write( "| Appending ignores from " + path ) self.console.write( "| Appending ignores from " + path )
with self.files_to_ignore.mutex: with self.files_to_ignore.mutex:
@ -216,13 +269,13 @@ class Worker( threading.Thread ):
if base == "*" or len(base) == 0: if base == "*" or len(base) == 0:
# Directory is empty, we could delete it now # Directory is empty, we could delete it now
continue continue
path = os.path.join( directory, base ) path = join( directory, base )
if not os.path.isdir( path ): if not os.path.isdir( path ):
files.append( base ) files.append( base )
for content in dir_contents: for content in dir_contents:
path = os.path.join( directory, content ) path = join( directory, content )
if os.path.isdir( path ): if os.path.isdir( path ):
if match_in_ignore_list( path, ignore_list ): if match_in_ignore_list( path, ignore_list ):
self.console.write( "| Ignoring " + content ) self.console.write( "| Ignoring " + content )
@ -230,7 +283,7 @@ class Worker( threading.Thread ):
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) ) self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
for file in files: for file in files:
path = os.path.join( directory, file ) path = join( directory, file )
if match_in_ignore_list( path, ignore_list ): if match_in_ignore_list( path, ignore_list ):
self.console.write( "| Ignoring " + path ) self.console.write( "| Ignoring " + path )
@ -261,66 +314,45 @@ def main( args ):
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None ) parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 ) parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", default=False ) parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", help="This overrides verbose", default=False )
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True ) parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
( options, args ) = parser.parse_args( ) ( options, args ) = parser.parse_args( )
root_full_path = os.getcwd( ) directory = normpath( options.directory if options.directory is not None else os.getcwd( ) )
# Files are added from .p4ignore with Console( auto_flush_num=20, auto_flush_time=1000 ) as c:
# Key is the file root, the value is the table of file regexes for that directory. c.writeflush( "Caching files in depot..." )
files_to_ignore = PDict() files_in_depot = get_client_set( directory )
# make sure script doesn't delete itself c.writeflush( "Checking " + directory)
with files_to_ignore.mutex: for root, dirs, files in os.walk( directory ):
files_to_ignore[ root_full_path ] = [ re.compile( re.escape( os.path.join( root_full_path, basename( __file__ ) ) ) ) ] ignore_list = PDict()#get_ignore_list( root, files_to_ignore )
# Setup threading c.write( "|Checking " + root )
threads = []
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
queue = multiprocessing.JoinableQueue( )
with Console() as c:
for i in range( thread_count ):
t = Worker( c, queue, files_to_ignore )
threads.append( t )
t.start( )
if len( threads ) == 1:
print( "Spawned %s thread." % len( threads ) )
else:
print( "Spawned %s threads." % len( threads ) )
queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) )
queue.join( )
for i in range( thread_count ):
queue.put( ( MSG.SHUTDOWN, None ) )
print( os.linesep + "Removing empty directories...")
# remove empty directories in reverse order
for root, dirs, files in os.walk( root_full_path, topdown=False ):
ignore_list = get_ignore_list( root, files_to_ignore )
for d in dirs: for d in dirs:
path = os.path.join( root, d ) path = join( root, d )
if match_in_ignore_list( path, ignore_list ): if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash # add option of using send2trash
print( "| ignoring " + d ) c.write( "| ignoring " + d )
dirs.remove( d ) dirs.remove( d )
try:
os.rmdir(path)
print( "| " + d + " was removed." )
except OSError:
# Fails on non-empty directory
pass
print( "|Done." )
for t in threads: for f in files:
t.join( ) path = normpath( join( root, f ) )
if path not in files_in_depot:
c.write( "| " + path )
c.write( "| " + f + " is unversioned, removing it." )
#try:
# os.chmod( path, stat.S_IWRITE )
# os.remove( path )
#except OSError as ex:
# c.writeflush( "| " + type( ex ).__name__ )
# c.writeflush( "| " + repr( ex ) )
# c.writeflush( "|ERROR." )
c.write( "|Done." )
if __name__ == "__main__": if __name__ == "__main__":
try: try: