|
| 1 | +from concurrent.futures import ThreadPoolExecutor |
| 2 | +from .utils import CountUpDownLatch |
| 3 | +import threading |
| 4 | +import logging |
| 5 | +import multiprocessing |
| 6 | +import os |
| 7 | +import logging.handlers |
| 8 | +from .exceptions import FileNotFoundError |
| 9 | + |
| 10 | + |
| 11 | +try: |
| 12 | + from queue import Empty # Python 3 |
| 13 | +except ImportError: |
| 14 | + from Queue import Empty # Python 2 |
| 15 | +end_queue_sentinel = [None, None] |
| 16 | + |
| 17 | +exception = None |
| 18 | +exception_lock = threading.Lock() |
| 19 | + |
| 20 | + |
| 21 | +threading |
| 22 | +def monitor_exception(exception_queue, process_ids): |
| 23 | + global exception |
| 24 | + logger = logging.getLogger(__name__) |
| 25 | + logger.setLevel(logging.DEBUG) |
| 26 | + |
| 27 | + while True: |
| 28 | + try: |
| 29 | + excep = exception_queue.get(timeout=0.1) |
| 30 | + if excep == end_queue_sentinel: |
| 31 | + break |
| 32 | + logger.log(logging.DEBUG, "Setting global exception") |
| 33 | + exception_lock.acquire() |
| 34 | + exception = excep |
| 35 | + exception_lock.release() |
| 36 | + logger.log(logging.DEBUG, "Closing processes") |
| 37 | + for p in process_ids: |
| 38 | + p.terminate() |
| 39 | + logger.log(logging.DEBUG, "Joining processes") |
| 40 | + for p in process_ids: |
| 41 | + p.join() |
| 42 | + import thread |
| 43 | + logger.log(logging.DEBUG, "Interrupting main") |
| 44 | + raise Exception(excep) |
| 45 | + except Empty: |
| 46 | + pass |
| 47 | + |
| 48 | + |
| 49 | +def log_listener_process(queue): |
| 50 | + while True: |
| 51 | + try: |
| 52 | + record = queue.get(timeout=0.1) |
| 53 | + queue.task_done() |
| 54 | + if record == end_queue_sentinel: # We send this as a sentinel to tell the listener to quit. |
| 55 | + break |
| 56 | + logger = logging.getLogger(record.name) |
| 57 | + logger.handlers.clear() |
| 58 | + logger.handle(record) # No level or filter logic applied - just do it! |
| 59 | + except Empty: # Try again |
| 60 | + pass |
| 61 | + except Exception as e: |
| 62 | + import sys, traceback |
| 63 | + print('Problems in logging') |
| 64 | + traceback.print_exc(file=sys.stderr) |
| 65 | + |
| 66 | + |
| 67 | +def multi_processor_change_acl(adl, path=None, method_name="", acl_spec="", number_of_sub_process=None): |
| 68 | + log_queue = multiprocessing.JoinableQueue() |
| 69 | + exception_queue = multiprocessing.Queue() |
| 70 | + logger = logging.getLogger(__name__) |
| 71 | + logger.setLevel(logging.DEBUG) |
| 72 | + queue_bucket_size = 10 |
| 73 | + worker_thread_num_per_process = 50 |
| 74 | + |
| 75 | + def launch_processes(number_of_processes): |
| 76 | + process_list = [] |
| 77 | + for i in range(number_of_processes): |
| 78 | + process_list.append(multiprocessing.Process(target=processor, |
| 79 | + args=(adl, file_path_queue, finish_queue_processing_flag, |
| 80 | + method_name, acl_spec, log_queue, exception_queue))) |
| 81 | + process_list[-1].start() |
| 82 | + return process_list |
| 83 | + |
| 84 | + def walk(walk_path): |
| 85 | + try: |
| 86 | + paths = [] |
| 87 | + all_files = adl._ls(path=walk_path) |
| 88 | + |
| 89 | + for files in all_files: |
| 90 | + if files['type'] == 'DIRECTORY': |
| 91 | + dir_processed_counter.increment() # A new directory to process |
| 92 | + walk_thread_pool.submit(walk, files['name']) |
| 93 | + paths.append(files['name']) |
| 94 | + if len(paths) == queue_bucket_size: |
| 95 | + file_path_queue.put(list(paths)) |
| 96 | + paths = [] |
| 97 | + if paths != []: |
| 98 | + file_path_queue.put(list(paths)) # For leftover paths < bucket_size |
| 99 | + except FileNotFoundError: |
| 100 | + pass # Continue in case the file was deleted in between |
| 101 | + except: |
| 102 | + import traceback |
| 103 | + logger.exception("Failed to walk for path: " + str(walk_path) + ". Exiting!") |
| 104 | + exception_queue.put(traceback.format_exc()) |
| 105 | + finally: |
| 106 | + dir_processed_counter.decrement() # Processing complete for this directory |
| 107 | + |
| 108 | + finish_queue_processing_flag = multiprocessing.Event() |
| 109 | + file_path_queue = multiprocessing.JoinableQueue() |
| 110 | + if number_of_sub_process == None: |
| 111 | + number_of_sub_process = max(2, multiprocessing.cpu_count()-1) |
| 112 | + |
| 113 | + child_processes = launch_processes(number_of_sub_process) |
| 114 | + exception_monitor_thread = threading.Thread(target=monitor_exception, args=(exception_queue, child_processes)) |
| 115 | + exception_monitor_thread.start() |
| 116 | + log_listener = threading.Thread(target=log_listener_process, args=(log_queue,)) |
| 117 | + log_listener.start() |
| 118 | + |
| 119 | + dir_processed_counter = CountUpDownLatch() |
| 120 | + walk_thread_pool = ThreadPoolExecutor(max_workers=worker_thread_num_per_process) |
| 121 | + |
| 122 | + file_path_queue.put([path]) # Root directory needs to be passed |
| 123 | + dir_processed_counter.increment() |
| 124 | + walk(path) # Start processing root directory |
| 125 | + |
| 126 | + if dir_processed_counter.is_zero(): # Done processing all directories. Blocking call. |
| 127 | + walk_thread_pool.shutdown() |
| 128 | + file_path_queue.close() # No new elements to add |
| 129 | + file_path_queue.join() # Wait for operations to be done |
| 130 | + logger.log(logging.DEBUG, "file path queue closed") |
| 131 | + finish_queue_processing_flag.set() # Set flag to break loop of child processes |
| 132 | + for child in child_processes: # Wait for all child process to finish |
| 133 | + logger.log(logging.DEBUG, "Joining process: "+str(child.pid)) |
| 134 | + child.join() |
| 135 | + |
| 136 | + # Cleanup |
| 137 | + logger.log(logging.DEBUG, "Sending exception sentinel") |
| 138 | + exception_queue.put(end_queue_sentinel) |
| 139 | + exception_monitor_thread.join() |
| 140 | + logger.log(logging.DEBUG, "Exception monitor thread finished") |
| 141 | + logger.log(logging.DEBUG, "Sending logger sentinel") |
| 142 | + log_queue.put(end_queue_sentinel) |
| 143 | + log_queue.join() |
| 144 | + log_queue.close() |
| 145 | + logger.log(logging.DEBUG, "Log queue closed") |
| 146 | + log_listener.join() |
| 147 | + logger.log(logging.DEBUG, "Log thread finished") |
| 148 | + |
| 149 | + |
| 150 | +def processor(adl, file_path_queue, finish_queue_processing_flag, method_name, acl_spec, log_queue, exception_queue): |
| 151 | + logger = logging.getLogger(__name__) |
| 152 | + |
| 153 | + try: |
| 154 | + logger.addHandler(logging.handlers.QueueHandler(log_queue)) |
| 155 | + logger.propagate = False # Prevents double logging |
| 156 | + except AttributeError: |
| 157 | + # Python 2 doesn't have Queue Handler. Default to best effort logging. |
| 158 | + pass |
| 159 | + logger.setLevel(logging.DEBUG) |
| 160 | + |
| 161 | + try: |
| 162 | + worker_thread_num_per_process = 50 |
| 163 | + func_table = {"mod_acl": adl.modify_acl_entries, "set_acl": adl.set_acl, "rem_acl": adl.remove_acl_entries} |
| 164 | + function_thread_pool = ThreadPoolExecutor(max_workers=worker_thread_num_per_process) |
| 165 | + adl_function = func_table[method_name] |
| 166 | + logger.log(logging.DEBUG, "Started processor pid:"+str(os.getpid())) |
| 167 | + |
| 168 | + def func_wrapper(func, path, spec): |
| 169 | + try: |
| 170 | + func(path=path, acl_spec=spec) |
| 171 | + except FileNotFoundError as e: |
| 172 | + logger.exception("File "+str(path)+" not found") |
| 173 | + pass # Exception is being logged in the relevant acl method. Do nothing here |
| 174 | + except: |
| 175 | + # TODO Raise to parent process |
| 176 | + pass |
| 177 | + |
| 178 | + logger.log(logging.DEBUG, "Completed running on path:" + str(path)) |
| 179 | + |
| 180 | + while finish_queue_processing_flag.is_set() == False: |
| 181 | + try: |
| 182 | + file_paths = file_path_queue.get(timeout=0.1) |
| 183 | + file_path_queue.task_done() # Will not be called if empty |
| 184 | + for file_path in file_paths: |
| 185 | + logger.log(logging.DEBUG, "Starting on path:" + str(file_path)) |
| 186 | + function_thread_pool.submit(func_wrapper, adl_function, file_path, acl_spec) |
| 187 | + except Empty: |
| 188 | + pass |
| 189 | + |
| 190 | + except Exception as e: |
| 191 | + import traceback |
| 192 | + # TODO Raise to parent process |
| 193 | + logger.exception("Exception in pid "+str(os.getpid())+"Exception: " + str(e)) |
| 194 | + exception_queue.put(traceback.format_exc()) |
| 195 | + finally: |
| 196 | + function_thread_pool.shutdown() # Blocking call. Will wait till all threads are done executing. |
| 197 | + logger.log(logging.DEBUG, "Finished processor pid: " + str(os.getpid())) |
0 commit comments