Source code for futureproof.task_manager

import logging
import queue
from typing import Any, Callable, List, Union, Iterable, Iterator
from enum import Enum
from functools import partial
from threading import Lock
from itertools import chain

import attr

from futureproof import executors

logger = logging.getLogger(__name__)

[docs]class ErrorPolicyEnum(Enum): """Error policy options.""" IGNORE = "ignore" LOG = "log" RAISE = "raise"
[docs]@attr.s(eq=False) class Task: """Tasks describe an execution with parameters and encapsulate the result. When submitted a Future is added to it encapsulating the result. """ fn = attr.ib() # type: Callable args = attr.ib(default=()) # type: tuple kwargs = attr.ib(default={}) # type: dict result = attr.ib(default=None) # type: Any complete = attr.ib(default=False) # type: bool
[docs]class TaskManager: """Manages how tasks are created and placed in the queue. Executors pull Tasks from the managers. :param executor: The executor that will execute the submitted tasks. :param error_policy: Error policy indicating what should the behaviour be if an exception is raised. Defaults to ``raise``, raising the exception as soon as it happens and stopping all execution. ``log`` will only log the exception but continue the execution of the remaining tasks. ``ignore`` will not do anything, note, however, that the exception will be set as the result of the task so users can re-raise them if they so choose. """ def __init__( self, executor: executors._FutureProofExecutor, error_policy: Union[ErrorPolicyEnum, str] = ErrorPolicyEnum.RAISE, ): self._tasks_in_queue = 0 self._error_policy = ( error_policy if isinstance(error_policy, ErrorPolicyEnum) else ErrorPolicyEnum(error_policy.lower()) ) self._executor = executor self._shutdown = False self._tasks = [] # type: Iterable self._submitted_task_count = 0 # type: int self._results_queue = queue.Queue() # type: queue.Queue #: List of completed Task objects self.completed_tasks = [] # type: List[Task] self._completed_tasks_lock = Lock() # type: Lock def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.join() @property def results(self) -> List: """List of results for completed tasks. Note the contents may different as more tasks are completed. """ with self._completed_tasks_lock: return [task.result for task in self.completed_tasks if task.complete]
[docs] def submit(self, fn: Callable, *args: Any, **kwargs: Any) -> Task: """Submit a task for execution. The newly created :class:`Task` will be returned. """ task = Task(fn, args, kwargs) self._tasks = chain(self._tasks, [task]) return task
[docs] def map(self, fn: Callable, iterable: Iterable) -> None: """Submit a set of tasks from a callable and a iterable of arguments `iterable` may be any iterable of primitives or an iterable of argument tuples. """ def gen(): for i in iterable: args = i if isinstance(i, tuple) else (i,) yield Task(fn, args) self._tasks = chain(self._tasks, gen())
[docs] def run(self) -> None: """Start the manager and wait until all tasks are completed before shutting down.""" for _ in self.as_completed(): pass
[docs] def as_completed(self) -> Iterator[Task]: """Start the manager and return an iterator of completed tasks. When using the task manager as a context manager as_completed must be used *inside* the context, otherwise there will be no effect as the task manager will wait until all tasks are completed. """ for task in self._tasks: if self._shutdown: break if self._tasks_in_queue == self._executor.max_workers: logger.debug("Queue full, waiting for result") yield self._wait_for_result() self._submit_task(task) while len(self.completed_tasks) < self._submitted_task_count: yield self._wait_for_result() self._executor.join()
def _submit_task(self, task: Task) -> None: """Submits a task to the executor, note this will block if the queue is full.""" logger.debug( "Tasks in queue: %d, submitting task %r", self._tasks_in_queue, task ) self._tasks_in_queue += 1 fut = self._executor.submit(task.fn, *task.args, **task.kwargs) cb = partial(self._on_complete, task=task) fut.add_done_callback(cb) self._submitted_task_count += 1 def _on_complete(self, future, task): """Called once per future to perform an operation over the result. Note this function is called by the executing threads. """ complete_task = Task(task.fn, task.args, task.kwargs) complete_task.complete = True try: complete_task.result = future.result() except Exception as exc: logger.debug("Exception on task %r", complete_task) complete_task.result = exc finally: logger.debug("Completed task %r", complete_task) self._results_queue.put(complete_task)
[docs] def join(self) -> None: """Block until all tasks are completed. Alternatively use the task manager as a context manager, upon exiting join will be called and results will be available """ while len(self.completed_tasks) < self._submitted_task_count: self._wait_for_result() self._executor.join()
def _wait_for_result(self) -> Task: """Gather result from a submitted tasks.""" completed_task = self._results_queue.get(block=True) logger.debug("Gathering result for completed task %r", completed_task) self._tasks_in_queue -= 1 self.completed_tasks.append(completed_task) if isinstance(completed_task.result, Exception): if self._error_policy == ErrorPolicyEnum.RAISE: self._raise(completed_task.result) elif self._error_policy == ErrorPolicyEnum.LOG: logger.exception( "Task %s raised an exception", completed_task, exc_info=completed_task.result, ) return completed_task def _raise(self, exception) -> None: """Performs cleanup before raising an exception.""""Raising exception and shutting down") self._shutdown = True self._executor.join() raise exception