In case a specific directory was taking a while, I changed it to auto flush after a specified period of time. Right now autoflush is automatically disabled, you have to enable it when creating the console. TODO: I'll probably hook the console up to the stdout and stderr so you can use ordinary print statements, we'll see. This is desirable for easily hooking it into an existing module.
369 lines
No EOL
14 KiB
Python
369 lines
No EOL
14 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf8 -*-
|
|
# author : Brian Ernst
|
|
# python_version : 2.7.6 and 3.4.0
|
|
# =================================
|
|
|
|
# todo: switch to `p4 fstat ...`, and parse the output for clientFile and cache it.
|
|
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
|
|
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
|
|
# todo: add option of using send2trash
|
|
# todo: buffer output, after exceeding a certain amount print to the output.
|
|
# todo: allow logging output besides console output, or redirection altogether
|
|
|
|
import datetime, inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
|
|
|
|
# trying ntpath, need to test on linux
|
|
import ntpath
|
|
|
|
|
|
re_remove_comment = re.compile( "#.*$" )
|
|
def remove_comment( s ):
|
|
return re.sub( re_remove_comment, "", s )
|
|
|
|
|
|
try: input = raw_input
|
|
except: pass
|
|
|
|
def enum(*sequential, **named):
|
|
enums = dict(zip(sequential, range(len(sequential))), **named)
|
|
return type('Enum', (), enums)
|
|
|
|
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
|
|
|
|
p4_ignore = ".p4ignore"
|
|
|
|
main_pid = os.getpid( )
|
|
|
|
|
|
#if os.name == 'nt' or sys.platform == 'cygwin'
|
|
def basename( path ):
|
|
# TODO: import based on platform
|
|
# https://docs.python.org/2/library/os.path.html
|
|
# posixpath for UNIX-style paths
|
|
# ntpath for Windows paths
|
|
# macpath for old-style MacOS paths
|
|
# os2emxpath for OS/2 EMX paths
|
|
|
|
#return os.path.basename( path )
|
|
return ntpath.basename( path )
|
|
|
|
def normpath( path ):
|
|
return ntpath.normpath( path )
|
|
|
|
def join( patha, pathb ):
|
|
return ntpath.join( patha, pathb )
|
|
|
|
def splitdrive( path ):
|
|
return ntpath.splitdrive( path )
|
|
|
|
def get_ignore_list( path, files_to_ignore ):
|
|
# have to split path and test top directory
|
|
dirs = path.split( os.sep )
|
|
|
|
ignore_list = [ ]
|
|
|
|
for i, val in enumerate( dirs ):
|
|
path_to_find = os.sep.join( dirs[ : i + 1] )
|
|
|
|
if path_to_find in files_to_ignore:
|
|
ignore_list.extend( files_to_ignore[ path_to_find ] )
|
|
|
|
return ignore_list
|
|
|
|
def match_in_ignore_list( path, ignore_list ):
|
|
for r in ignore_list:
|
|
if re.match( r, path ):
|
|
return True
|
|
return False
|
|
|
|
# Keep these in mind if you have issues:
|
|
# https://stackoverflow.com/questions/16557908/getting-output-of-a-process-at-runtime
|
|
# https://stackoverflow.com/questions/4417546/constantly-print-subprocess-output-while-process-is-running
|
|
def get_client_set( path ):
|
|
files = set( [ ] )
|
|
|
|
make_drive_upper = True if os.name == 'nt' or sys.platform == 'cygwin' else False
|
|
|
|
command = "p4 fstat ..."
|
|
|
|
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=None, cwd=path )
|
|
for line in proc.stdout:
|
|
clientFile_tag = "... clientFile "
|
|
if not line.startswith( clientFile_tag ):
|
|
continue
|
|
|
|
local_path = normpath( line[ len( clientFile_tag ) : ].strip( ) )
|
|
if make_drive_upper:
|
|
drive, path = splitdrive( local_path )
|
|
local_path = ''.join( [ drive.upper( ), path ] )
|
|
|
|
files.add( local_path )
|
|
|
|
return files
|
|
|
|
class PTable( list ):
|
|
def __init__( self, *args ):
|
|
list.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class PDict( dict ):
|
|
def __init__( self, *args ):
|
|
dict.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class Console( threading.Thread ):
|
|
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
|
|
|
|
# auto_flush_time is time in milliseconds since last flush to trigger a flush when writing
|
|
def __init__( self, auto_flush_num = None, auto_flush_time = None ):
|
|
threading.Thread.__init__( self )
|
|
self.buffers = {}
|
|
self.buffer_write_times = {}
|
|
self.running = True
|
|
self.queue = multiprocessing.JoinableQueue( )
|
|
self.auto_flush_num = auto_flush_num if auto_flush_num is not None else -1
|
|
self.auto_flush_time = auto_flush_time * 1000 if auto_flush_time is not None else -1
|
|
|
|
def write( self, data, pid = None ):
|
|
self.queue.put( ( Console.MSG.WRITE, pid if pid is not None else os.getpid(), data ) )
|
|
|
|
def writeflush( self, data, pid = None ):
|
|
pid = pid if pid is not None else os.getpid()
|
|
self.queue.put( ( Console.MSG.WRITE, pid, data ) )
|
|
self.queue.put( ( Console.MSG.FLUSH, pid ) )
|
|
|
|
def flush( self, pid = None ):
|
|
self.queue.put( ( Console.MSG.FLUSH, pid if pid is not None else os.getpid() ) )
|
|
|
|
def clear( self, pid = None ):
|
|
self.queue.put( ( Console.MSG.CLEAR, pid if pid is not None else os.getpid() ) )
|
|
|
|
def __enter__( self ):
|
|
self.start( )
|
|
return self
|
|
|
|
def __exit__( self, type, value, tb ):
|
|
self.queue.put( ( Console.MSG.SHUTDOWN, ) )
|
|
self.queue.join( )
|
|
|
|
def run( self ):
|
|
while True:
|
|
data = self.queue.get( )
|
|
event = data[0]
|
|
|
|
if event == Console.MSG.SHUTDOWN:
|
|
# flush remaining buffers before shutting down
|
|
for ( pid, buffer ) in self.buffers.items( ):
|
|
for line in buffer:
|
|
print( line )
|
|
self.buffers.clear( )
|
|
self.queue.task_done( )
|
|
break
|
|
|
|
elif event == Console.MSG.WRITE:
|
|
pid, s = data[ 1 : ]
|
|
|
|
if pid not in self.buffers:
|
|
self.buffers[ pid ] = []
|
|
if pid not in self.buffer_write_times:
|
|
self.buffer_write_times[ pid ] = datetime.datetime.now( )
|
|
self.buffers[ pid ].append( s )
|
|
|
|
if self.auto_flush_num >= 0 and len( self.buffers[ pid ] ) >= self.auto_flush_num:
|
|
self.flush( pid )
|
|
elif self.auto_flush_time >= 0 and ( datetime.datetime.now( ) - self.buffer_write_times[ pid ] ).microseconds >= self.auto_flush_time:
|
|
self.flush( pid )
|
|
elif event == Console.MSG.FLUSH:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
for line in self.buffers[ pid ]:
|
|
print( line )
|
|
self.buffers.pop( pid, None )
|
|
self.buffer_write_times[ pid ] = datetime.datetime.now( )
|
|
elif event == Console.MSG.CLEAR:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
self.buffers.pop( pid, None )
|
|
|
|
self.queue.task_done( )
|
|
|
|
class Worker( threading.Thread ):
|
|
def __init__( self, console, queue, files_to_ignore ):
|
|
threading.Thread.__init__( self )
|
|
|
|
self.console = console
|
|
self.queue = queue
|
|
self.files_to_ignore = files_to_ignore
|
|
|
|
def run( self ):
|
|
while True:
|
|
( cmd, data ) = self.queue.get( )
|
|
|
|
if cmd == MSG.SHUTDOWN:
|
|
self.queue.task_done( )
|
|
self.console.flush( )
|
|
break
|
|
|
|
if cmd != MSG.PARSE_DIRECTORY or data is None:
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
directory = data
|
|
|
|
self.console.write( "Working on " + directory )
|
|
|
|
try:
|
|
dir_contents = os.listdir( directory )
|
|
except OSError as ex:
|
|
self.console.write( "| " + type( ex ).__name__ )
|
|
# args can be anything, can't guarantee they'll convert to a string
|
|
#self.console.write( "| " + ' '.join( [ str( arg ) for arg in ex.args ] ) )
|
|
self.console.write( "| " + repr( ex ) )
|
|
self.console.write( "|ERROR." )
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
if p4_ignore in dir_contents:
|
|
file_regexes = []
|
|
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
|
|
path = join( directory, p4_ignore )
|
|
with open( path ) as f:
|
|
for line in f:
|
|
new_line = remove_comment( line.strip( ) )
|
|
if len( new_line ) > 0:
|
|
# doesn't look quite right, fix it:
|
|
file_regexes.append( re.compile( join( re.escape( directory + os.sep ), new_line ) ) )
|
|
|
|
self.console.write( "| Appending ignores from " + path )
|
|
with self.files_to_ignore.mutex:
|
|
if directory not in self.files_to_ignore:
|
|
self.files_to_ignore[ directory ] = []
|
|
self.files_to_ignore[ directory ].extend( file_regexes )
|
|
|
|
|
|
ignore_list = get_ignore_list( directory, self.files_to_ignore )
|
|
|
|
|
|
files = []
|
|
command = "p4 fstat *"
|
|
|
|
try:
|
|
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory )
|
|
(out, err) = proc.communicate()
|
|
except Exception as ex:
|
|
self.console.write( "| " + type( ex ).__name__ )
|
|
# args can be anything, can't guarantee they'll convert to a string
|
|
#self.console.write( "| " + ' '.join( [ str( arg ) for arg in ex.args ] ) )
|
|
self.console.write( "| " + repr( ex ) )
|
|
self.console.write( "|ERROR." )
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
for line in err.decode('utf-8').split( os.linesep ):
|
|
if len( line ) == 0:
|
|
continue
|
|
|
|
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
|
|
# # I could use regex to verify the expected string, but that will just slow us down.
|
|
# base = basename( line )
|
|
i = line.rfind( ' - ')
|
|
if i >= 0:
|
|
base = line[ : i ]
|
|
if base == "*" or len(base) == 0:
|
|
# Directory is empty, we could delete it now
|
|
continue
|
|
path = join( directory, base )
|
|
|
|
if not os.path.isdir( path ):
|
|
files.append( base )
|
|
|
|
for content in dir_contents:
|
|
path = join( directory, content )
|
|
if os.path.isdir( path ):
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + content )
|
|
else:
|
|
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
|
|
|
|
for file in files:
|
|
path = join( directory, file )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + path )
|
|
continue
|
|
|
|
self.console.write( "| " + file + " is unversioned, removing it." )
|
|
try:
|
|
os.chmod( path, stat.S_IWRITE )
|
|
os.remove( path )
|
|
except OSError as ex:
|
|
self.console.write( "| " + type( ex ).__name__ )
|
|
self.console.write( "| " + repr( ex ) )
|
|
self.console.write( "|ERROR." )
|
|
|
|
self.console.write( "|Done." )
|
|
self.console.flush( )
|
|
|
|
self.queue.task_done( )
|
|
|
|
def main( args ):
|
|
# check requirements
|
|
if os.system( 'p4 > Nul' ) != 0:
|
|
print( 'Perforce Command-line Client(p4) is required for this script.' )
|
|
sys.exit( 1 )
|
|
|
|
#http://docs.python.org/library/optparse.html
|
|
parser = optparse.OptionParser( )
|
|
|
|
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
|
|
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
|
|
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", help="This overrides verbose", default=False )
|
|
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
|
|
|
|
( options, args ) = parser.parse_args( )
|
|
|
|
directory = normpath( options.directory if options.directory is not None else os.getcwd( ) )
|
|
|
|
with Console( auto_flush_num=20, auto_flush_time=1000 ) as c:
|
|
c.writeflush( "Caching files in depot..." )
|
|
files_in_depot = get_client_set( directory )
|
|
|
|
c.writeflush( "Checking " + directory)
|
|
for root, dirs, files in os.walk( directory ):
|
|
ignore_list = PDict()#get_ignore_list( root, files_to_ignore )
|
|
|
|
c.write( "|Checking " + root )
|
|
|
|
for d in dirs:
|
|
path = join( root, d )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
# add option of using send2trash
|
|
c.write( "| ignoring " + d )
|
|
dirs.remove( d )
|
|
|
|
for f in files:
|
|
path = normpath( join( root, f ) )
|
|
|
|
if path not in files_in_depot:
|
|
c.write( "| " + path )
|
|
c.write( "| " + f + " is unversioned, removing it." )
|
|
#try:
|
|
# os.chmod( path, stat.S_IWRITE )
|
|
# os.remove( path )
|
|
#except OSError as ex:
|
|
# c.writeflush( "| " + type( ex ).__name__ )
|
|
# c.writeflush( "| " + repr( ex ) )
|
|
# c.writeflush( "|ERROR." )
|
|
c.write( "|Done." )
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main( sys.argv )
|
|
except:
|
|
print( "Unexpected error!" )
|
|
traceback.print_exc( file = sys.stdout ) |