src.co_tools.co_utils

  1import os
  2from glob import glob
  3from multiprocessing import cpu_count
  4from pathlib import Path
  5
  6if os.getenv("CO_LOG").lower() == "true":
  7    from .get_logger import LOGGER
  8
  9    log = LOGGER
 10else:
 11    import logging
 12
 13    log = logging.getLogger(__name__)
 14
 15co_cpus = os.getenv("CO_CPUS")
 16aws_batch_job_id = os.getenv("AWS_BATCH_JOB_ID")
 17
 18
 19def get_cpu_limit(co_cpus=co_cpus, aws_batch_job_id=aws_batch_job_id):
 20    """This function returns an integer corresponding to the number of cores
 21
 22    Args:
 23        co_cpus (int, optional): _description_. Defaults to co_cpus.
 24        aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id.
 25
 26    Returns:
 27        int: number of cores available for compute
 28    """
 29    log.debug(f"co_cpus: {co_cpus} aws_batch_job_id: {aws_batch_job_id}")
 30    if co_cpus:
 31        return co_cpus
 32    if aws_batch_job_id:
 33        return 1
 34    with open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") as fp:
 35        cfs_quota_us = int(fp.read())
 36    with open("/sys/fs/cgroup/cpu/cpu.cfs_period_us") as fp:
 37        cfs_period_us = int(fp.read())
 38    container_cpus = cfs_quota_us // cfs_period_us
 39    # For physical machine, the `cfs_quota_us` could be '-1'
 40    log.debug(f"container_cpus: {container_cpus} cpu_count(): {cpu_count()}")
 41    return cpu_count() if container_cpus < 1 else container_cpus
 42
 43
 44def get_dir_contents(dir: str = "../data"):
 45    """This function finds all the files and folders in a dir
 46
 47    Args:
 48        dir (str, optional): The folder you want to search in. Defaults to "../data".
 49
 50    Returns:
 51        str: newline separated string of files and folders in the search dir.
 52    """
 53    if dir_contents := glob(str(f"{dir}/**/*"), recursive=True):
 54        log.debug(f"Found the following files in {dir} {dir_contents}")
 55        return "\n".join(dir_contents)
 56    log.warning(f"There are no files or folders in the {dir} folder.")
 57    return 0
 58
 59
 60def get_groups(filename: str = "../data/sample_sheet.csv"):
 61    """This function returns all the groups in a .csv
 62
 63    Args:
 64        filename (None): Path to a sample sheet. Will default to
 65        ../data/sample_sheet.csv if no path supplied. If a filename is supplied,
 66        this function will attempt to find the path to the file in the
 67        ../data folder
 68
 69    Returns:
 70        str: comma-separated string of groups in ascending alphabetical order.
 71    """
 72    # if not filename:
 73    #     sample_sheet = "../data/sample_sheet.csv"
 74    if Path(filename).is_file():
 75        log.debug(f"{filename} is a file.")
 76        sample_sheet = filename
 77    else:
 78        log.debug(f"type for {filename}: {type(filename)}")
 79        if files_found := glob(str(f"../data/{filename}"), recursive=True):
 80            if len(files_found) > 1:
 81                log.warning(f"Found multiple sample_sheets. Will use {files_found[0]}")
 82            log.debug(f"Searching found the following sample sheet(s): {files_found}")
 83            sample_sheet = files_found[0]
 84        else:
 85            log.warning(f"No sample sheet found for '{filename}'")
 86            return 0
 87    groups_set = set()
 88    try:
 89        with open(f"{sample_sheet}", "r") as infile:
 90            lines = infile.readlines()
 91            for line in lines:
 92                line = line.strip()
 93                line_group = line.split(",")[0]
 94                groups_set.add(line_group)
 95            groups = sorted(list(groups_set))
 96            log.debug(f"Returning the following groups from sample sheet: {groups}")
 97            return ",".join(groups)
 98    except Exception as e:
 99        log.error(f"Could not open {sample_sheet} due to error {e}.")
100        return 0
101
102
103def is_pipeline():
104    """This function lets confirms if code is executing in a pipeline
105
106    Returns:
107        int: Returns 1 if in a pipeline, 0 otherwise.
108    """
109    return 1 if bool(os.getenv("AWS_BATCH_JOB_ID")) else 0
110
111
112def print_log_msg(msg=None, level="WARNING"):
113    level = level.upper()
114    if level == "DEBUG":
115        return log.debug(msg)
116    elif level == "INFO":
117        return log.info(msg)
118    elif level == "WARNING":
119        return log.warning(msg)
120    elif level == "ERROR":
121        return log.error(msg)
122    elif level == "CRITICAL":
123        return log.critical(msg)
124    else:
125        raise Exception(
126            "logging level is not one of [DEBUG, INFO, WARNING, ERROR, CRITICAL]"
127        )
def get_cpu_limit(co_cpus=None, aws_batch_job_id=None):
20def get_cpu_limit(co_cpus=co_cpus, aws_batch_job_id=aws_batch_job_id):
21    """This function returns an integer corresponding to the number of cores
22
23    Args:
24        co_cpus (int, optional): _description_. Defaults to co_cpus.
25        aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id.
26
27    Returns:
28        int: number of cores available for compute
29    """
30    log.debug(f"co_cpus: {co_cpus} aws_batch_job_id: {aws_batch_job_id}")
31    if co_cpus:
32        return co_cpus
33    if aws_batch_job_id:
34        return 1
35    with open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") as fp:
36        cfs_quota_us = int(fp.read())
37    with open("/sys/fs/cgroup/cpu/cpu.cfs_period_us") as fp:
38        cfs_period_us = int(fp.read())
39    container_cpus = cfs_quota_us // cfs_period_us
40    # For physical machine, the `cfs_quota_us` could be '-1'
41    log.debug(f"container_cpus: {container_cpus} cpu_count(): {cpu_count()}")
42    return cpu_count() if container_cpus < 1 else container_cpus

This function returns an integer corresponding to the number of cores

Args: co_cpus (int, optional): _description_. Defaults to co_cpus. aws_batch_job_id (int, optional): _description_. Defaults to aws_batch_job_id.

Returns: int: number of cores available for compute

def get_dir_contents(dir: str = '../data'):
45def get_dir_contents(dir: str = "../data"):
46    """This function finds all the files and folders in a dir
47
48    Args:
49        dir (str, optional): The folder you want to search in. Defaults to "../data".
50
51    Returns:
52        str: newline separated string of files and folders in the search dir.
53    """
54    if dir_contents := glob(str(f"{dir}/**/*"), recursive=True):
55        log.debug(f"Found the following files in {dir} {dir_contents}")
56        return "\n".join(dir_contents)
57    log.warning(f"There are no files or folders in the {dir} folder.")
58    return 0

This function finds all the files and folders in a dir

Args: dir (str, optional): The folder you want to search in. Defaults to "../data".

Returns: str: newline separated string of files and folders in the search dir.

def get_groups(filename: str = '../data/sample_sheet.csv'):
 61def get_groups(filename: str = "../data/sample_sheet.csv"):
 62    """This function returns all the groups in a .csv
 63
 64    Args:
 65        filename (None): Path to a sample sheet. Will default to
 66        ../data/sample_sheet.csv if no path supplied. If a filename is supplied,
 67        this function will attempt to find the path to the file in the
 68        ../data folder
 69
 70    Returns:
 71        str: comma-separated string of groups in ascending alphabetical order.
 72    """
 73    # if not filename:
 74    #     sample_sheet = "../data/sample_sheet.csv"
 75    if Path(filename).is_file():
 76        log.debug(f"{filename} is a file.")
 77        sample_sheet = filename
 78    else:
 79        log.debug(f"type for {filename}: {type(filename)}")
 80        if files_found := glob(str(f"../data/{filename}"), recursive=True):
 81            if len(files_found) > 1:
 82                log.warning(f"Found multiple sample_sheets. Will use {files_found[0]}")
 83            log.debug(f"Searching found the following sample sheet(s): {files_found}")
 84            sample_sheet = files_found[0]
 85        else:
 86            log.warning(f"No sample sheet found for '{filename}'")
 87            return 0
 88    groups_set = set()
 89    try:
 90        with open(f"{sample_sheet}", "r") as infile:
 91            lines = infile.readlines()
 92            for line in lines:
 93                line = line.strip()
 94                line_group = line.split(",")[0]
 95                groups_set.add(line_group)
 96            groups = sorted(list(groups_set))
 97            log.debug(f"Returning the following groups from sample sheet: {groups}")
 98            return ",".join(groups)
 99    except Exception as e:
100        log.error(f"Could not open {sample_sheet} due to error {e}.")
101        return 0

This function returns all the groups in a .csv

Args: filename (None): Path to a sample sheet. Will default to ../data/sample_sheet.csv if no path supplied. If a filename is supplied, this function will attempt to find the path to the file in the ../data folder

Returns: str: comma-separated string of groups in ascending alphabetical order.

def is_pipeline():
104def is_pipeline():
105    """This function lets confirms if code is executing in a pipeline
106
107    Returns:
108        int: Returns 1 if in a pipeline, 0 otherwise.
109    """
110    return 1 if bool(os.getenv("AWS_BATCH_JOB_ID")) else 0

This function lets confirms if code is executing in a pipeline

Returns: int: Returns 1 if in a pipeline, 0 otherwise.