Also removed PressEnter. Added global basename function so we can override which version we're using, right now I'm seeing if ntpath.basename works for all cases.
312 lines
No EOL
11 KiB
Python
312 lines
No EOL
11 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf8 -*-
|
|
# author : Brian Ernst
|
|
# python_version : 2.7.6 and 3.4.0
|
|
# =================================
|
|
|
|
# todo: switch to `p4 fstat ...`, and parse the output for clientFile and cache it.
|
|
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
|
|
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
|
|
# todo: add option of using send2trash
|
|
# todo: buffer output, after exceeding a certain amount print to the output.
|
|
# todo: allow logging output besides console output, or redirection altogether
|
|
|
|
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
|
|
|
|
# trying ntpath, need to test on linux
|
|
import ntpath
|
|
|
|
|
|
re_remove_comment = re.compile( "#.*$" )
|
|
def remove_comment( s ):
|
|
return re.sub( re_remove_comment, "", s )
|
|
|
|
|
|
try: input = raw_input
|
|
except: pass
|
|
|
|
def enum(*sequential, **named):
|
|
enums = dict(zip(sequential, range(len(sequential))), **named)
|
|
return type('Enum', (), enums)
|
|
|
|
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
|
|
|
|
p4_ignore = ".p4ignore"
|
|
|
|
main_pid = os.getpid( )
|
|
|
|
|
|
def basename( path ):
|
|
#return os.path.basename( path )
|
|
return ntpath.basename( path )
|
|
|
|
def get_ignore_list( path, files_to_ignore ):
|
|
# have to split path and test top directory
|
|
dirs = path.split( os.sep )
|
|
|
|
ignore_list = [ ]
|
|
|
|
for i, val in enumerate( dirs ):
|
|
path_to_find = os.sep.join( dirs[ : i + 1] )
|
|
|
|
if path_to_find in files_to_ignore:
|
|
ignore_list.extend( files_to_ignore[ path_to_find ] )
|
|
|
|
return ignore_list
|
|
|
|
def match_in_ignore_list( path, ignore_list ):
|
|
for r in ignore_list:
|
|
if re.match( r, path ):
|
|
return True
|
|
return False
|
|
|
|
class PTable( list ):
|
|
def __init__( self, *args ):
|
|
list.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class PDict( dict ):
|
|
def __init__( self, *args ):
|
|
dict.__init__( self, args )
|
|
self.mutex = multiprocessing.Semaphore( )
|
|
|
|
class Console( threading.Thread ):
|
|
MSG = enum('WRITE', 'FLUSH', 'SHUTDOWN', 'CLEAR' )
|
|
|
|
def __init__( self ):
|
|
threading.Thread.__init__( self )
|
|
self.buffers = {}
|
|
self.running = True
|
|
self.queue = multiprocessing.JoinableQueue( )
|
|
|
|
def write( self, data ):
|
|
self.queue.put( ( Console.MSG.WRITE, os.getpid(), data ) )
|
|
|
|
def flush( self ):
|
|
self.queue.put( ( Console.MSG.FLUSH, os.getpid() ) )
|
|
|
|
def clear( self ):
|
|
self.queue.put( ( Console.MSG.CLEAR, os.getpid() ) )
|
|
|
|
def __enter__( self ):
|
|
self.start( )
|
|
return self
|
|
|
|
def __exit__( self, type, value, tb ):
|
|
self.running = False
|
|
|
|
def run( self ):
|
|
# TODO: switch to a queue so we're not spinning and wasting a thread
|
|
self.running = True
|
|
while True:
|
|
data = self.queue.get( )
|
|
event = data[0]
|
|
|
|
if event == Console.MSG.SHUTDOWN:
|
|
# flush remaining buffers before shutting down
|
|
for ( pid, buffer ) in self.buffers.iteritems( ):
|
|
for line in buffer:
|
|
print( line )
|
|
break
|
|
|
|
elif event == Console.MSG.WRITE:
|
|
pid, s = data[ 1 : ]
|
|
|
|
if pid not in self.buffers:
|
|
self.buffers[ pid ] = []
|
|
self.buffers[ pid ].append( s )
|
|
elif event == Console.MSG.FLUSH:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
for line in self.buffers[ pid ]:
|
|
print( line )
|
|
self.buffers.pop( pid, None )
|
|
elif event == Console.MSG.CLEAR:
|
|
pid = data[ 1 ]
|
|
if pid in self.buffers:
|
|
self.buffers.pop( pid, None )
|
|
|
|
class Worker( threading.Thread ):
|
|
def __init__( self, console, queue, files_to_ignore ):
|
|
threading.Thread.__init__( self )
|
|
|
|
self.console = console
|
|
self.queue = queue
|
|
self.files_to_ignore = files_to_ignore
|
|
|
|
def run( self ):
|
|
while True:
|
|
( cmd, data ) = self.queue.get( )
|
|
|
|
if cmd == MSG.SHUTDOWN:
|
|
self.queue.task_done( )
|
|
self.console.flush( )
|
|
break
|
|
|
|
if cmd != MSG.PARSE_DIRECTORY or data is None:
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
directory = data
|
|
|
|
self.console.write( "Working on " + directory )
|
|
|
|
dir_contents = os.listdir( directory )
|
|
|
|
if p4_ignore in dir_contents:
|
|
file_regexes = []
|
|
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
|
|
path = os.path.join( directory, p4_ignore )
|
|
with open( path ) as f:
|
|
for line in f:
|
|
new_line = remove_comment( line.strip( ) )
|
|
if len( new_line ) > 0:
|
|
file_regexes.append( re.compile( os.path.join( re.escape( directory + os.sep ), new_line ) ) )
|
|
|
|
self.console.write( "| Appending ignores from " + path )
|
|
with self.files_to_ignore.mutex:
|
|
if directory not in self.files_to_ignore:
|
|
self.files_to_ignore[ directory ] = []
|
|
self.files_to_ignore[ directory ].extend( file_regexes )
|
|
|
|
|
|
ignore_list = get_ignore_list( directory, self.files_to_ignore )
|
|
|
|
|
|
files = []
|
|
command = "p4 fstat *"
|
|
|
|
try:
|
|
proc = subprocess.Popen( command.split( ), stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=directory )
|
|
(out, err) = proc.communicate()
|
|
except Exception as ex:
|
|
self.console.write( "| " + type( ex ) )
|
|
self.console.write( "| " + ex.args )
|
|
self.console.write( "| " + ex )
|
|
self.console.write( "|ERROR." )
|
|
self.console.flush( )
|
|
self.queue.task_done( )
|
|
continue
|
|
|
|
for line in err.decode('utf-8').split( os.linesep ):
|
|
if len( line ) == 0:
|
|
continue
|
|
|
|
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
|
|
# # I could use regex to verify the expected string, but that will just slow us down.
|
|
# base = basename( line )
|
|
i = line.rfind( ' - ')
|
|
if i >= 0:
|
|
base = line[ : i ]
|
|
if base == "*" or len(base) == 0:
|
|
# Directory is empty, we could delete it now
|
|
continue
|
|
path = os.path.join( directory, base )
|
|
|
|
if not os.path.isdir( path ):
|
|
files.append( base )
|
|
|
|
for content in dir_contents:
|
|
path = os.path.join( directory, content )
|
|
if os.path.isdir( path ):
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + content )
|
|
else:
|
|
self.queue.put( ( MSG.PARSE_DIRECTORY, path ) )
|
|
|
|
for file in files:
|
|
path = os.path.join( directory, file )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
self.console.write( "| Ignoring " + path )
|
|
continue
|
|
|
|
self.console.write( "| " + file + " is unversioned, removing it." )
|
|
os.chmod( path, stat.S_IWRITE )
|
|
os.remove( path )
|
|
|
|
self.console.write( "|Done." )
|
|
self.console.flush( )
|
|
|
|
self.queue.task_done( )
|
|
|
|
def main( args ):
|
|
# check requirements
|
|
if os.system( 'p4 > Nul' ) != 0:
|
|
print( 'Perforce Command-line Client(p4) is required for this script.' )
|
|
sys.exit( 1 )
|
|
|
|
#http://docs.python.org/library/optparse.html
|
|
parser = optparse.OptionParser( )
|
|
|
|
parser.add_option( "-d", "--dir", dest="directory", help="Desired directory to crawl.", default=None )
|
|
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=100 )
|
|
parser.add_option( "-q", "--quiet", action="store_false", dest="quiet", default=False )
|
|
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
|
|
|
|
( options, args ) = parser.parse_args( )
|
|
|
|
|
|
root_full_path = os.getcwd( )
|
|
|
|
# Files are added from .p4ignore
|
|
# Key is the file root, the value is the table of file regexes for that directory.
|
|
files_to_ignore = PDict()
|
|
|
|
# make sure script doesn't delete itself
|
|
with files_to_ignore.mutex:
|
|
files_to_ignore[ root_full_path ] = [ re.compile( re.escape( os.path.join( root_full_path, basename( __file__ ) ) ) ) ]
|
|
|
|
# Setup threading
|
|
threads = []
|
|
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
|
|
|
|
queue = multiprocessing.JoinableQueue( )
|
|
|
|
with Console() as c:
|
|
for i in range( thread_count ):
|
|
t = Worker( c, queue, files_to_ignore )
|
|
threads.append( t )
|
|
t.start( )
|
|
|
|
if len( threads ) == 1:
|
|
print( "Spawned %s thread." % len( threads ) )
|
|
else:
|
|
print( "Spawned %s threads." % len( threads ) )
|
|
|
|
queue.put( ( MSG.PARSE_DIRECTORY, options.directory if options.directory is not None else os.getcwd( ) ) )
|
|
queue.join( )
|
|
|
|
for i in range( thread_count ):
|
|
queue.put( ( MSG.SHUTDOWN, None ) )
|
|
|
|
print( os.linesep + "Removing empty directories...")
|
|
# remove empty directories in reverse order
|
|
for root, dirs, files in os.walk( root_full_path, topdown=False ):
|
|
ignore_list = get_ignore_list( root, files_to_ignore )
|
|
|
|
for d in dirs:
|
|
path = os.path.join( root, d )
|
|
|
|
if match_in_ignore_list( path, ignore_list ):
|
|
# add option of using send2trash
|
|
print( "| ignoring " + d )
|
|
dirs.remove( d )
|
|
try:
|
|
os.rmdir(path)
|
|
print( "| " + d + " was removed." )
|
|
except OSError:
|
|
# Fails on non-empty directory
|
|
pass
|
|
print( "|Done." )
|
|
|
|
for t in threads:
|
|
t.join( )
|
|
|
|
if __name__ == "__main__":
|
|
try:
|
|
main( sys.argv )
|
|
except:
|
|
print( "Unexpected error!" )
|
|
traceback.print_exc( file = sys.stdout ) |