Source code for my_code_base.filehandling.utils

# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Author: Markus Ritschel
# eMail:  git@markusritschel.de
# Date:   2024-03-03
# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
#
import filecmp
from functools import wraps
import logging
import pytest


log = logging.getLogger(__name__)


[docs] def check_input_for_duplicates(func): """ A decorator that checks a list of file paths for duplicates before processing them. This decorator takes a function as input and returns a wrapped function that performs the following steps: 1. Checks if the input is a list and contains more than one element. 2. Compares each pair of file paths in the list using the `os.stat` signatures (file type, size, and modification time). 3. Removes any duplicates from the list. 4. Calls the original function with the cleaned-up list of file paths. Parameters ---------- func (function): The function to be decorated. Returns ------- function: The wrapped function. Example ------- >>> pytest.skip() >>> @check_input_for_duplicates >>> def process_files(file_list): >>> # Process the files >>> pass .. note:: The wrapped function can still be parsed by Sphinx due to the :obj:`functools.wraps` decorator. """ @wraps(func) def wrapper(file_list): if not isinstance(file_list, list) or len(file_list) <= 1: return func(file_list) remove_idx = [] for i, f1 in enumerate(file_list): for f2 in file_list[i + 1:]: res = filecmp.cmp(f1,f2, shallow=True) # Note: shallow=False would compare the actual file contents if res: remove_idx.append(i) filtered_file_list = [i for j, i in enumerate(file_list) if j not in remove_idx] remove_files = [i for j, i in enumerate(file_list) if j in remove_idx] if remove_idx: log.info("Found and ignored %s duplicates in file list.", len(remove_idx)) for entry in remove_files: log.debug("Ignored %s", entry) return func(filtered_file_list) return wrapper