Grabs depot tree first hand to make looping through directory faster.
The big catch right now is, this method is single threaded, I haven't made it multi-threaded yet, but it definitely looks like it can benefit from it.
This commit is contained in:
parent
8d425d6413
commit
c175b21dcf
|
@ -11,7 +11,7 @@
|
||||||
# todo: buffer output, after exceeding a certain amount print to the output.
|
# todo: buffer output, after exceeding a certain amount print to the output.
|
||||||
# todo: allow logging output besides console output, or redirection altogether
|
# todo: allow logging output besides console output, or redirection altogether
|
||||||
|
|
||||||
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
|
import inspect, multiprocessing, optparse, os, platform, re, stat, subprocess, sys, threading, traceback
|
||||||
|
|
||||||
# trying ntpath, need to test on linux
|
# trying ntpath, need to test on linux
|
||||||
import ntpath
|
import ntpath
|
||||||
|
@ -36,10 +36,27 @@ p4_ignore = ".p4ignore"
|
||||||
main_pid = os.getpid( )
|
main_pid = os.getpid( )
|
||||||
|
|
||||||
|
|
||||||
|
#if os.name == 'nt' or sys.platform == 'cygwin'
|
||||||
def basename( path ):
|
def basename( path ):
|
||||||
|
# TODO: import based on platform
|
||||||
|
# https://docs.python.org/2/library/os.path.html
|
||||||
|
# posixpath for UNIX-style paths
|
||||||
|
# ntpath for Windows paths
|
||||||
|
# macpath for old-style MacOS paths
|
||||||
|
# os2emxpath for OS/2 EMX paths
|
||||||
|
|
||||||
#return os.path.basename( path )
|
#return os.path.basename( path )
|
||||||
return ntpath.basename( path )
|
return ntpath.basename( path )
|
||||||
|
|
||||||
|
def normpath( path ):
|
||||||
|
return ntpath.normpath( path )
|
||||||
|
|
||||||
|
def join( patha, pathb ):
|
||||||
|
return ntpath.join( patha, pathb )
|
||||||
|
|
||||||
|
def splitdrive( path ):
|
||||||
|
return ntpath.splitdrive( path )
|
||||||
|
|
||||||
def get_ignore_list( path, files_to_ignore ):
|
def get_ignore_list( path, files_to_ignore ):
|
||||||
# have to split path and test top directory
|
# have to split path and test top directory
|
||||||
dirs = path.split( os.sep )
|
dirs = path.split( os.sep )
|
||||||
|
@ -60,6 +77,31 @@ def match_in_ignore_list( path, ignore_list ):
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Keep these in mind if you have issues:
|
||||||
|
# https://stackoverflow.com/questions/16557908/getting-output-of-a-process-at-runtime
|
||||||
|
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
||||||
|
def get_client_set( path ):
|
||||||
|
files = set( [ ] )
|
||||||
|
|
||||||
|
make_drive_upper = True if os.name == 'nt' or sys.platform == 'cygwin' else False
|
||||||
|
|
||||||
|
command = "p4 fstat ..."
|
||||||
|
|
||||||
|
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=None, cwd=path )
|
||||||
|
for line in proc.stdout:
|
||||||
|
clientFile_tag = "... clientFile "
|
||||||
|
if not line.startswith( clientFile_tag ):
|
||||||
|
continue
|
||||||
|
|
||||||
|
local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) )
|
||||||
|
if make_drive_upper:
|
||||||
|
drive, path = splitdrive( local_path )
|
||||||
|
local_path = ''.join( [ drive.upper( ), path ] )
|
||||||
|
|
||||||
|
files.add( local_path )
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
class PTable( list ):
|
class PTable( list ):
|
||||||
def __init__( self, *args ):
|
def __init__( self, *args ):
|
||||||
list.__init__( self, args )
|
list.__init__( self, args )
|
||||||
|
@ -73,20 +115,27 @@ class PDict( dict ):
|
||||||
class Console( threading.Thread ):
|
class Console( threading.Thread ):
|
||||||
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
|
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
|
||||||
|
|
||||||
def __init__( self ):
|
def __init__( self, auto_flush_num = None, auto_flush_time = None ):
|
||||||
threading.Thread.__init__( self )
|
threading.Thread.__init__( self )
|
||||||
self.buffers = {}
|
self.buffers = {}
|
||||||
self.running = True
|
self.running = True
|
||||||
self.queue = multiprocessing.JoinableQueue( )
|
self.queue = multiprocessing.JoinableQueue( )
|
||||||
|
self.auto_flush_num = auto_flush_num if auto_flush_num is not None else -1
|
||||||
|
self.auto_flush_time = auto_flush_time if auto_flush_time is not None else -1
|
||||||
|
|
||||||
def write( self, data ):
|
def write( self, data, pid = None ):
|
||||||
self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) )
|
self.queue.put( ( Console.MSG.WRITE, pid if pid is not None else os.getpid(), data ) )
|
||||||
|
|
||||||
def flush( self ):
|
def writeflush( self, data, pid = None ):
|
||||||
self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) )
|
pid = pid if pid is not None else os.getpid()
|
||||||
|
self.queue.put( ( Console.MSG.WRITE, pid, data ) )
|
||||||
|
self.queue.put( ( Console.MSG.FLUSH, pid ) )
|
||||||
|
|
||||||
def clear( self ):
|
def flush( self, pid = None ):
|
||||||
self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) )
|
self.queue.put( ( Console.MSG.FLUSH, pid if pid is not None else os.getpid() ) )
|
||||||
|
|
||||||
|
def clear( self, pid = None ):
|
||||||
|
self.queue.put( ( Console.MSG.CLEAR, pid if pid is not None else os.getpid() ) )
|
||||||
|
|
||||||
def __enter__( self ):
|
def __enter__( self ):
|
||||||
self.start( )
|
self.start( )
|
||||||
|
@ -103,7 +152,7 @@ class Console( threading.Thread ):
|
||||||
|
|
||||||
if event == Console.MSG.SHUTDOWN:
|
if event == Console.MSG.SHUTDOWN:
|
||||||
# flush remaining buffers before shutting down
|
# flush remaining buffers before shutting down
|
||||||
for ( pid, buffer ) in self.buffers.iteritems( ):
|
for ( pid, buffer ) in self.buffers.items( ):
|
||||||
for line in buffer:
|
for line in buffer:
|
||||||
print( line )
|
print( line )
|
||||||
self.buffers.clear( )
|
self.buffers.clear( )
|
||||||
|
@ -116,6 +165,9 @@ class Console( threading.Thread ):
|
||||||
if pid not in self.buffers:
|
if pid not in self.buffers:
|
||||||
self.buffers[ pid ] = []
|
self.buffers[ pid ] = []
|
||||||
self.buffers[ pid ].append( s )
|
self.buffers[ pid ].append( s )
|
||||||
|
|
||||||
|
if self.auto_flush_num >= 0 and len( self.buffers[ pid ] ) > self.auto_flush_num:
|
||||||
|
self.flush( pid )
|
||||||
elif event == Console.MSG.FLUSH:
|
elif event == Console.MSG.FLUSH:
|
||||||
pid = data[ 1 ]
|
pid = data[ 1 ]
|
||||||
if pid in self.buffers:
|
if pid in self.buffers:
|
||||||
|
@ -170,12 +222,13 @@ class Worker( threading.Thread ):
|
||||||
if p4_ignore in dir_contents:
|
if p4_ignore in dir_contents:
|
||||||
file_regexes = []
|
file_regexes = []
|
||||||
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
|
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
|
||||||
path = os.path.join( directory, p4_ignore )
|
path = join( directory, p4_ignore )
|
||||||
with open( path ) as f:
|
with open( path ) as f:
|
||||||
for line in f:
|
for line in f:
|
||||||
new_line = remove_comment( line.strip( ) )
|
new_line = remove_comment( line.strip( ) )
|
||||||
if len( new_line ) > 0:
|
if len( new_line ) > 0:
|
||||||
file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) )
|
# doesn't look quite right, fix it:
|
||||||
|
file_regexes.append( re.compile( join( re.escape( directory + os.sep ), new_line ) ) )
|
||||||
|
|
||||||
self.console.write( "| Appending ignores from " + path )
|
self.console.write( "| Appending ignores from " + path )
|
||||||
with self.files_to_ignore.mutex:
|
with self.files_to_ignore.mutex:
|
||||||
|
@ -216,13 +269,13 @@ class Worker( threading.Thread ):
|
||||||
if base == "*" or len(base) == 0:
|
if base == "*" or len(base) == 0:
|
||||||
# Directory is empty, we could delete it now
|
# Directory is empty, we could delete it now
|
||||||
continue
|
continue
|
||||||
path = os.path.join( directory, base )
|
path = join( directory, base )
|
||||||
|
|
||||||
if not os.path.isdir( path ):
|
if not os.path.isdir( path ):
|
||||||
files.append( base )
|
files.append( base )
|
||||||
|
|
||||||
for content in dir_contents:
|
for content in dir_contents:
|
||||||
path = os.path.join( directory, content )
|
path = join( directory, content )
|
||||||
if os.path.isdir( path ):
|
if os.path.isdir( path ):
|
||||||
if match_in_ignore_list( path, ignore_list ):
|
if match_in_ignore_list( path, ignore_list ):
|
||||||
self.console.write( "| Ignoring " + content )
|
self.console.write( "| Ignoring " + content )
|
||||||
|
@ -230,7 +283,7 @@ class Worker( threading.Thread ):
|
||||||
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
|
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
path = os.path.join( directory, file )
|
path = join( directory, file )
|
||||||
|
|
||||||
if match_in_ignore_list( path, ignore_list ):
|
if match_in_ignore_list( path, ignore_list ):
|
||||||
self.console.write( "| Ignoring " + path )
|
self.console.write( "| Ignoring " + path )
|
||||||
|
@ -261,66 +314,45 @@ def main( args ):
|
||||||
|
|
||||||
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
|
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
|
||||||
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
|
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
|
||||||
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", default=False )
|
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", help="This overrides verbose", default=False )
|
||||||
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
|
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
|
||||||
|
|
||||||
( options, args ) = parser.parse_args( )
|
( options, args ) = parser.parse_args( )
|
||||||
|
|
||||||
root_full_path = os.getcwd( )
|
directory = normpath( options.directory if options.directory is not None else os.getcwd( ) )
|
||||||
|
|
||||||
# Files are added from .p4ignore
|
with Console( auto_flush_num=20, auto_flush_time=1000 ) as c:
|
||||||
# Key is the file root, the value is the table of file regexes for that directory.
|
c.writeflush( "Caching files in depot..." )
|
||||||
files_to_ignore = PDict()
|
files_in_depot = get_client_set( directory )
|
||||||
|
|
||||||
# make sure script doesn't delete itself
|
c.writeflush( "Checking " + directory)
|
||||||
with files_to_ignore.mutex:
|
for root, dirs, files in os.walk( directory ):
|
||||||
files_to_ignore[ root_full_path ] = [ re.compile( re.escape( os.path.join( root_full_path, basename( __file__ ) ) ) ) ]
|
ignore_list = PDict()#get_ignore_list( root, files_to_ignore )
|
||||||
|
|
||||||
# Setup threading
|
c.write( "|Checking " + root )
|
||||||
threads = []
|
|
||||||
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
|
|
||||||
|
|
||||||
queue = multiprocessing.JoinableQueue( )
|
|
||||||
|
|
||||||
with Console() as c:
|
|
||||||
for i in range( thread_count ):
|
|
||||||
t = Worker( c, queue, files_to_ignore )
|
|
||||||
threads.append( t )
|
|
||||||
t.start( )
|
|
||||||
|
|
||||||
if len( threads ) == 1:
|
|
||||||
print( "Spawned %s thread." % len( threads ) )
|
|
||||||
else:
|
|
||||||
print( "Spawned %s threads." % len( threads ) )
|
|
||||||
|
|
||||||
queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) )
|
|
||||||
queue.join( )
|
|
||||||
|
|
||||||
for i in range( thread_count ):
|
|
||||||
queue.put( ( MSG.SHUTDOWN, None ) )
|
|
||||||
|
|
||||||
print( os.linesep + "Removing empty directories...")
|
|
||||||
# remove empty directories in reverse order
|
|
||||||
for root, dirs, files in os.walk( root_full_path, topdown=False ):
|
|
||||||
ignore_list = get_ignore_list( root, files_to_ignore )
|
|
||||||
|
|
||||||
for d in dirs:
|
for d in dirs:
|
||||||
path = os.path.join( root, d )
|
path = join( root, d )
|
||||||
|
|
||||||
if match_in_ignore_list( path, ignore_list ):
|
if match_in_ignore_list( path, ignore_list ):
|
||||||
# add option of using send2trash
|
# add option of using send2trash
|
||||||
print( "| ignoring " + d )
|
c.write( "| ignoring " + d )
|
||||||
dirs.remove( d )
|
dirs.remove( d )
|
||||||
try:
|
|
||||||
os.rmdir(path)
|
|
||||||
print( "| " + d + " was removed." )
|
|
||||||
except OSError:
|
|
||||||
# Fails on non-empty directory
|
|
||||||
pass
|
|
||||||
print( "|Done." )
|
|
||||||
|
|
||||||
for t in threads:
|
for f in files:
|
||||||
t.join( )
|
path = normpath( join( root, f ) )
|
||||||
|
|
||||||
|
if path not in files_in_depot:
|
||||||
|
c.write( "| " + path )
|
||||||
|
c.write( "| " + f + " is unversioned, removing it." )
|
||||||
|
#try:
|
||||||
|
# os.chmod( path, stat.S_IWRITE )
|
||||||
|
# os.remove( path )
|
||||||
|
#except OSError as ex:
|
||||||
|
# c.writeflush( "| " + type( ex ).__name__ )
|
||||||
|
# c.writeflush( "| " + repr( ex ) )
|
||||||
|
# c.writeflush( "|ERROR." )
|
||||||
|
c.write( "|Done." )
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in New Issue