314 lines
11 KiB
Python
314 lines
11 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf8 -*-
|
|
# author : Brian Ernst
|
|
# python_version : 2.7.6 and 3.4.0
|
|
# =================================
|
|
|
|
# todo: switch to `p4 fstat ...`, and parse the output for clientFile and cache it.
|
|
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
|
|
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
|
|
# todo: add option of using send2trash
|
|
# todo: buffer output, after exceeding a certain amount print to the output.
|
|
# todo: allow logging output besides console output, or redirection altogether
|
|
|
|
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
|
|
|
|
# trying ntpath, need to test on linux
|
|
import ntpath
|
|
|
|
|
|
re_remove_comment = re.compile( "#.*$" )
|
|
def remove_comment( s ):
|
|
return re.sub( re_remove_comment, "", s )
|
|
|
|
|
|
try: input = raw_input
|
|
except: pass
|
|
|
|
def enum(*sequential, **named):
|
|
enums = dict(zip(sequential, range(len(sequential))), **named)
|
|
return type('Enum', (), enums)
|
|
|
|
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
|
|
|
|
p4_ignore = ".p4ignore"
|
|
|
|
main_pid = os.getpid( )
|
|
|
|
|
|
def basename( path ):
|
|
#return os.path.basename( path )
|
|
return ntpath.basename( path )
|
|
|
|
def get_ignore_list( path, files_to_ignore ):
|
|
# have to split path and test top directory
|
|
dirs = path.split( os.sep )
|
|
|
|
ignore_list = [ ]
|
|
|
|
for i, val in enumerate( dirs ):
|
|
path_to_find = os.sep.join( dirs[ : i + 1] )
|
|
|
|
if path_to_find in files_to_ignore:
|
|
ignore_list.extend( files_to_ignore[ path_to_find ] )
|
|
|
|
return ignore_list
|
|
|
|
def match_in_ignore_list( path, ignore_list ):
|
|
for r in ignore_list:
|
|
if re.match( r, path ):
|
|
return True
|
|
return False
|
|
|
|
class PTable( list ):
|
|
def __init__( self, *args ):
|
|
list.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class PDict( dict ):
|
|
def __init__( self, *args ):
|
|
dict.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class Console( threading.Thread ):
|
|
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
|
|
|
|
def __init__( self ):
|
|
threading.Thread.__init__( self )
|
|
self.buffers = {}
|
|
self.running = True
|
|
self.queue = multiprocessing.JoinableQueue( )
|
|
|
|
def write( self, data ):
|
|
self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) )
|
|
|
|
def flush( self ):
|
|
self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) )
|
|
|
|
def clear( self ):
|
|
self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) )
|
|
|
|
def __enter__( self ):
|
|
self.start( )
|
|
return self
|
|
|
|
def __exit__( self, type, value, tb ):
|
|
self.queue.put( ( Console.MSG.SHUTDOWN, ) )
|
|
self.queue.join( )
|
|
|
|
def run( self ):
|
|
while True:
|
|
data = self.queue.get( )
|
|
event = data[0]
|
|
|
|
if event == Console.MSG.SHUTDOWN:
|
|
# flush remaining buffers before shutting down
|
|
for ( pid, buffer ) in self.buffers.iteritems( ):
|
|
for line in buffer:
|
|
print( line )
|
|
self.buffers.clear( )
|
|
self.queue.task_done( )
|
|
break
|
|
|
|
elif event == Console.MSG.WRITE:
|
|
pid, s = data[ 1 : ]
|
|
|
|
if pid not in self.buffers:
|
|
self.buffers[ pid ] = []
|
|
self.buffers[ pid ].append( s )
|
|
elif event == Console.MSG.FLUSH:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
for line in self.buffers[ pid ]:
|
|
print( line )
|
|
self.buffers.pop( pid, None )
|
|
elif event == Console.MSG.CLEAR:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
self.buffers.pop( pid, None )
|
|
|
|
self.queue.task_done( )
|
|
|
|
class Worker( threading.Thread ):
|
|
def __init__( self, console, queue, files_to_ignore ):
|
|
threading.Thread.__init__( self )
|
|
|
|
self.console = console
|
|
self.queue = queue
|
|
self.files_to_ignore = files_to_ignore
|
|
|
|
def run( self ):
|
|
while True:
|
|
( cmd, data ) = self.queue.get( )
|
|
|
|
if cmd == MSG.SHUTDOWN:
|
|
self.queue.task_done( )
|
|
self.console.flush( )
|
|
break
|
|
|
|
if cmd != MSG.PARSE_DIRECTORY or data is None:
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
directory = data
|
|
|
|
self.console.write( "Working on " + directory )
|
|
|
|
dir_contents = os.listdir( directory )
|
|
|
|
if p4_ignore in dir_contents:
|
|
file_regexes = []
|
|
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
|
|
path = os.path.join( directory, p4_ignore )
|
|
with open( path ) as f:
|
|
for line in f:
|
|
new_line = remove_comment( line.strip( ) )
|
|
if len( new_line ) > 0:
|
|
file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) )
|
|
|
|
self.console.write( "| Appending ignores from " + path )
|
|
with self.files_to_ignore.mutex:
|
|
if directory not in self.files_to_ignore:
|
|
self.files_to_ignore[ directory ] = []
|
|
self.files_to_ignore[ directory ].extend( file_regexes )
|
|
|
|
|
|
ignore_list = get_ignore_list( directory, self.files_to_ignore )
|
|
|
|
|
|
files = []
|
|
command = "p4 fstat *"
|
|
|
|
try:
|
|
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory )
|
|
(out, err) = proc.communicate()
|
|
except Exception as ex:
|
|
self.console.write( "| " + type( ex ) )
|
|
self.console.write( "| " + ex.args )
|
|
self.console.write( "| " + ex )
|
|
self.console.write( "|ERROR." )
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
for line in err.decode('utf-8').split( os.linesep ):
|
|
if len( line ) == 0:
|
|
continue
|
|
|
|
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
|
|
# # I could use regex to verify the expected string, but that will just slow us down.
|
|
# base = basename( line )
|
|
i = line.rfind( ' - ')
|
|
if i >= 0:
|
|
base = line[ : i ]
|
|
if base == "*" or len(base) == 0:
|
|
# Directory is empty, we could delete it now
|
|
continue
|
|
path = os.path.join( directory, base )
|
|
|
|
if not os.path.isdir( path ):
|
|
files.append( base )
|
|
|
|
for content in dir_contents:
|
|
path = os.path.join( directory, content )
|
|
if os.path.isdir( path ):
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + content )
|
|
else:
|
|
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
|
|
|
|
for file in files:
|
|
path = os.path.join( directory, file )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + path )
|
|
continue
|
|
|
|
self.console.write( "| " + file + " is unversioned, removing it." )
|
|
os.chmod( path, stat.S_IWRITE )
|
|
os.remove( path )
|
|
|
|
self.console.write( "|Done." )
|
|
self.console.flush( )
|
|
|
|
self.queue.task_done( )
|
|
|
|
def main( args ):
|
|
# check requirements
|
|
if os.system( 'p4 > Nul' ) != 0:
|
|
print( 'Perforce Command-line Client(p4) is required for this script.' )
|
|
sys.exit( 1 )
|
|
|
|
#http://docs.python.org/library/optparse.html
|
|
parser = optparse.OptionParser( )
|
|
|
|
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
|
|
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
|
|
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", default=False )
|
|
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
|
|
|
|
( options, args ) = parser.parse_args( )
|
|
|
|
root_full_path = os.getcwd( )
|
|
|
|
# Files are added from .p4ignore
|
|
# Key is the file root, the value is the table of file regexes for that directory.
|
|
files_to_ignore = PDict()
|
|
|
|
# make sure script doesn't delete itself
|
|
with files_to_ignore.mutex:
|
|
files_to_ignore[ root_full_path ] = [ re.compile( re.escape( os.path.join( root_full_path, basename( __file__ ) ) ) ) ]
|
|
|
|
# Setup threading
|
|
threads = []
|
|
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
|
|
|
|
queue = multiprocessing.JoinableQueue( )
|
|
|
|
with Console() as c:
|
|
for i in range( thread_count ):
|
|
t = Worker( c, queue, files_to_ignore )
|
|
threads.append( t )
|
|
t.start( )
|
|
|
|
if len( threads ) == 1:
|
|
print( "Spawned %s thread." % len( threads ) )
|
|
else:
|
|
print( "Spawned %s threads." % len( threads ) )
|
|
|
|
queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) )
|
|
queue.join( )
|
|
|
|
for i in range( thread_count ):
|
|
queue.put( ( MSG.SHUTDOWN, None ) )
|
|
|
|
print( os.linesep + "Removing empty directories...")
|
|
# remove empty directories in reverse order
|
|
for root, dirs, files in os.walk( root_full_path, topdown=False ):
|
|
ignore_list = get_ignore_list( root, files_to_ignore )
|
|
|
|
for d in dirs:
|
|
path = os.path.join( root, d )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
# add option of using send2trash
|
|
print( "| ignoring " + d )
|
|
dirs.remove( d )
|
|
try:
|
|
os.rmdir(path)
|
|
print( "| " + d + " was removed." )
|
|
except OSError:
|
|
# Fails on non-empty directory
|
|
pass
|
|
print( "|Done." )
|
|
|
|
for t in threads:
|
|
t.join( )
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main( sys.argv )
|
|
except:
|
|
print( "Unexpected error!" )
|
|
traceback.print_exc( file = sys.stdout ) |