Working on threaded support.

This commit is contained in:
Brian 2014-05-08 21:05:55 -06:00
parent 1f4b52e3a9
commit 2b14c4a273
1 changed files with 181 additions and 96 deletions

View File

@ -1,14 +1,16 @@
#!/usr/bin/python
# -*- coding: utf8 -*-
# author : Brian Ernst
# python_version : 2.7.6
# python_version : 2.7.6 and 3.4.0
# =================================
# todo: have a backup feature, make sure files are moved to the recycle bin or a temporary file.
# todo: switch to faster method of calling p4 fstat on an entire directory and parsing it's output
# todo: add option of using send2trash
# todo: buffer output, after exceeding a certain amount print to the output.
# todo: allow logging output besides console output, or redirection altogether
import inspect, os, re, stat, subprocess, sys, traceback
import inspect, multiprocessing, optparse, os, re, stat, subprocess, sys, threading, traceback
re_remove_comment = re.compile( "#.*$" )
@ -19,126 +21,206 @@ def remove_comment( s ):
try: input = raw_input
except: pass
def enum(*sequential, **named):
enums = dict(zip(sequential, range(len(sequential))), **named)
return type('Enum', (), enums)
MSG = enum('SHUTDOWN', 'PARSE_DIRECTORY')
p4_ignore = ".p4ignore"
def PressEnter( ):
print( "\nPress ENTER to continue..." )
s=input( "" )
def main( ):
# check requirement
def get_ignore_list( path, files_to_ignore ):
# have to split path and test top directory
dirs = path.split( os.sep )
ignore_list = [ ]
for i, val in enumerate( dirs ):
path_to_find = os.sep.join( dirs[ : i + 1] )
if path_to_find in files_to_ignore:
ignore_list.extend( files_to_ignore[ path_to_find ] )
return ignore_list
def match_in_ignore_list( path, ignore_list ):
for r in ignore_list:
if re.match( r, path ):
return True
return False
class Console:
def __init__( self ):
self.mutex = multiprocessing.Semaphore( )
def Write( self, data ):
with self.mutex:
print( data )
class PTable( list ):
def __init__( self, *args ):
list.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
class PDict( dict ):
def __init__( self, *args ):
dict.__init__( self, args )
self.mutex = multiprocessing.Semaphore( )
class Worker( threading.Thread ):
def __init__( self, queue, files_to_ignore ):
threading.Thread.__init__( self )
self.queue = queue
self.files_to_ignore = files_to_ignore
def run( self ):
while True:
( cmd, data ) = self.queue.get( )
if cmd == MSG.SHUTDOWN:
break
if cmd != MSG.PARSE_DIRECTORY or data is None:
self.queue.task_done( )
continue
directory = data
current_directory = os.getcwd( )
dir_contents = os.listdir( directory )
if p4_ignore in dir_contents:
file_regexes = []
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
path = os.path.join( current_directory, p4_ignore )
with open( path ) as f:
for line in f:
new_line = remove_comment( line.strip( ) )
if len( new_line ) > 0:
file_regexes.append( re.compile( os.path.join( re.escape( current_directory + os.sep ), new_line ) ) )
print( "|Appending ignores from " + path )
with self.files_to_ignore.mutex:
if current_directory not in self.files_to_ignore:
self.files_to_ignore[ current_directory ] = []
self.files_to_ignore[ current_directory ].extend( file_regexes )
ignore_list = get_ignore_list( current_directory, self.files_to_ignore )
files = []
command = "p4 fstat *"
os.chdir( directory )
proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
(out, err) = proc.communicate()
os.chdir( current_directory )
for line in err.decode('utf-8').split( os.linesep ):
if len( line ) == 0:
continue
print(line)
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
# # I could use regex to verify the expected string, but that will just slow us down.
# basename = os.path.basename( line )
i = line.rfind( ' - ')
if i >= 0:
basename = line[ : i ]
if basename == "*":
# Directory is empty, we could delete it now
continue
path = os.path.join( current_directory, basename )
if not os.path.isdir( path ):
files.append( basename )
for content in dir_contents:
if os.path.isdir( content ):
path = os.path.join( current_directory, content )
if match_in_ignore_list( path, ignore_list ):
print( "| Ignoring " + content )
else:
self.queue.put( ( MSG.PARSE_DIRECTORY, content ) )
for file in files:
path = os.path.join( current_directory, file )
if match_in_ignore_list( path, ignore_list ):
print( "| Ignoring " + path )
continue
print( "| " + file + " is unversioned, removing it." )
os.chmod( path, stat.S_IWRITE )
os.remove( path )
print( "|Done." )
self.queue.task_done( )
def main( args ):
# check requirements
if os.system( 'p4 > Nul' ) != 0:
print( 'Perforce Command-line Client(p4) is required for this script.' )
sys.exit( 1 )
# Files are added from .p4ignore
# Key is the file root, the value is the table of file regexes for that directory.
files_to_ignore = {}
#http://docs.python.org/library/optparse.html
parser = optparse.OptionParser( )
def get_ignore_list( path ):
# have to split path and test top directory
dirs = path.split( os.sep )
parser.add_option( "-t", "--threads", dest="thread_count", help="Number of threads to crawl your drive and poll p4.", default=10 )
parser.add_option( "-v", "--verbose", action="store_true", dest="verbose", default=True )
ignore_list = [ ]
( options, args ) = parser.parse_args( )
for i, val in enumerate( dirs ):
path_to_find = os.sep.join( dirs[ : i + 1] )
if path_to_find in files_to_ignore:
ignore_list = ignore_list + files_to_ignore[ path_to_find ]
return ignore_list
def match_in_ignore_list( path, ignore_list ):
for r in ignore_list:
if re.match( r, path ):
return True
return False
root_path = "."
root_full_path = os.getcwd( )
p4_ignore = ".p4ignore"
# Files are added from .p4ignore
# Key is the file root, the value is the table of file regexes for that directory.
files_to_ignore = PDict()
# make sure script doesn't delete itself
with files_to_ignore.mutex:
files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ]
files_to_ignore[ root_path ] = [ re.compile( os.path.join( re.escape( root_path + os.sep ), os.path.basename( __file__ ) ) ) ]
# Setup threading
threads = []
thread_count = options.thread_count if options.thread_count > 0 else multiprocessing.cpu_count( ) + threads
for root, dirs, files in os.walk( root_path ):
queue = multiprocessing.JoinableQueue( )
print ( os.linesep + "Checking '" + root + "' ...")
for i in range( thread_count ):
t = Worker( queue, files_to_ignore )
threads.append( t )
t.start( )
if p4_ignore in files:
file_regexes = []
if len( threads ) == 1:
print( "Spawned %s thread." % len( threads ) )
else:
print( "Spawned %s threads." % len( threads ) )
# Should automatically ignore .p4ignore even if it's not specified, otherwise it'll be deleted.
queue.put( ( MSG.PARSE_DIRECTORY, "." ) )
queue.join( )
path = os.path.join( root, p4_ignore )
with open( path ) as f:
for line in f:
new_line = remove_comment( line.strip( ) )
if len( new_line ) > 0:
file_regexes.append( re.compile( os.path.join( re.escape( root + os.sep ), new_line ) ) )
print( "|Appending ignores from " + path )
files_to_ignore[ root ] = files_to_ignore[ root ] + file_regexes
ignore_list = get_ignore_list( root )
#command = "p4 have \"" + root + os.sep + "*\""
command = "p4 fstat *"
print("|" + command)
os.chdir( root )
proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE )
(out, err) = proc.communicate()
os.chdir( root_full_path )
# For ease we're doing a weird solution, rebuilding the file list. This is so we only need to parse unadded files.
files = []
for line in err.split( os.linesep ):
if len(line) == 0:
continue
# # dirty hack that grabs the filename from the ends of the printed out (not err) "depo_path - local_path"
# # I could use regex to verify the expected string, but that will just slow us down.
# basename = os.path.basename( line )
i = line.rfind( ' - ')
if i >= 0:
basename = line[ : i ]
if basename == "*":
# Directory is empty, we could delete it now
continue
path = os.path.join( root, basename )
if not os.path.isdir( path ):
files.append( basename )
for file in files:
path = os.path.join( root, file )
if match_in_ignore_list( path, ignore_list ):
print( "| Ignoring " + path )
continue
print( "| " + file + " is unversioned, removing it." )
os.chmod( path, stat.S_IWRITE )
os.remove( path )
dirs_copy = dirs
for d in dirs_copy:
path = os.path.join( root, d )
if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash
print( "| Ignoring " + d )
dirs.remove( d )
print( "|Done." )
for i in range( thread_count ):
queue.put( ( MSG.SHUTDOWN, None ) )
print( os.linesep + "Removing empty directories...")
# remove empty directories
# remove empty directories in reverse order
for root, dirs, files in os.walk( root_path, topdown=False ):
ignore_list = get_ignore_list( root, files_to_ignore )
for d in dirs:
path = os.path.join( root, d )
if match_in_ignore_list( path, ignore_list ):
# add option of using send2trash
print( "| ignoring " + d )
@ -151,9 +233,12 @@ def main( ):
pass
print( "|Done." )
for t in threads:
t.join( )
if __name__ == "__main__":
try:
main( )
main( sys.argv )
except:
print( "Unexpected error!" )
traceback.print_exc( file = sys.stdout )