Source code for transposonmapper.statistics.dataframe_from_pergene


import numpy as np
import pandas as pd



from transposonmapper.statistics.dataframe_from_pergene_helpers import (essential_genes, 
                                                                        read_pergene_file, reads_per_insertion)

[docs]def dataframe_from_pergenefile(pergenefile, verbose=True): """This function creates a dataframe with the information from a pergene.txt file. The gene_essentiality is created based on the genes present in the Cerevisiae_EssentialGenes_List_1.txt and Cerevisiae_EssentialGenes_List_2.txt files The number of reads per insertion (Nreadsperinsrt) is determined by dividing the read_per_gene column by the tn_per_gene column. Author: Gregory van Beek Parameters ---------- pergenefile : str absolute path to the pergene.txt file , one of the outputs of the transposonmapper module verbose : bool, optional [description], by default True Returns ----------- dataframe Output is a dataframe where each row is a single gene and with the following columns: - gene_names - gene_essentiality - tn_per_gene - read_per_gene - Nreadsperinsrt """ # read file genenames_list,tnpergene_list,readpergene_list,lines=read_pergene_file(pergenefile) # determine number of reads per insertion per gene readperinspergene_list=reads_per_insertion(tnpergene_list,readpergene_list,lines) # determine essential genes geneessentiality_list=essential_genes(genenames_list,lines) # create dataframe read_gene_dict = {"gene_names": genenames_list, "gene_essentiality": geneessentiality_list, "tn_per_gene": tnpergene_list, "read_per_gene": readpergene_list, "Nreadsperinsrt": readperinspergene_list} read_gene_df = pd.DataFrame(read_gene_dict, columns = [column_name for column_name in read_gene_dict]) return(read_gene_df)