Source code for transposonmapper.processing.essential_genes_names

# =============================================================================
# To import the functions, type:
#    import sys
#    sys.path.insert(1,r'full\path\to\this\file')
#    from essential_genes import list_known_essentials
# =============================================================================

import os
from transposonmapper.importing import load_default_files

[docs]def list_known_essentials(input_files = None, headerlines=3, verbose=True): """Get all known essential genes from two different files and combine them in one list. Input is a list of of paths where files can be found with the known essential genes. A default list is implemented using two files present in the same folder as this file. It is expected that the files contain genes in a single column and nothing else. An option can be set for the number of headerlines, which by default is set to 3. The output is a list containing all the genes present in all files given in the input. Note when using the default files: The length of the output list exceed the number of known essential genes as the list sometimes contains both the standard name and the systematic name of a gene. Parameters ---------- input_files : str, optional File path for the essential file in your file system, by default None headerlines : int, optional by default 3 verbose : bool, optional To show explanations if True, by default True """ if input_files == None: # If necessary, load default files gff_file, essential_genes_files, gene_name_file = load_default_files( gff_file=None, essentials_file=None, gene_names_file=None ) # Verify presence of files data_files = { "gff3": gff_file, "essentials": essential_genes_files, "gene_names": gene_name_file, } for filetype, file_path in data_files.items(): assert file_path, f"{filetype} not found at {file_path}" input_files=essential_genes_files else: essential_genes_files = input_files known_essential_gene_list = [] #making sure essential_genes_files is a list if type(essential_genes_files)!=list: essential_genes_files=[input_files] for files in essential_genes_files: if verbose == True: print('Reading file :',files) with open(files) as f: for header_lines in range(headerlines): next(f) for lines in f: known_essential_gene_list.append(lines.rstrip('\n')) return(known_essential_gene_list)
# #%% # if __name__ == '__main__': # known_essential_gene_list = list_known_essentials()