Source code for transposonmapper.processing.transposonread_profileplot_genome


import os
import numpy as np
import matplotlib.pyplot as plt


from transposonmapper.properties.get_chromosome_position import chromosome_position
from transposonmapper.properties.get_gene_position import  gene_position
from transposonmapper.processing.chromosome_names_in_files import chromosome_name_bedfile
from transposonmapper.processing.essential_genes_names import list_known_essentials

from transposonmapper.importing import load_default_files

from transposonmapper.plotting import profile_genome_plot

from transposonmapper.processing.profileplot_genome_helpers import (summed_chr,
length_genome,middle_chrom_pos,counts_genome,binned_list)


[docs]def profile_genome(bed_file=None, variable="transposons", bar_width=None, savefig=False,showfig=False): """Created on Thu Mar 18 13:05:39 2021 @author: gregoryvanbeek This function creates a bar plot along the entire genome. The height of each bar represents the number of transposons or reads at the genomic position indicated on the x-axis. The bar_width determines how many basepairs are put in one bin. Little basepairs per bin may be slow. Too many basepairs in one bin and possible low transposon areas might be obscured. Parameters ---------- bed_file : str, optional The file path to the location of the bed file in your filesystem, by default None variable : str, optional The variable for plotting throughput the genome, by default "transposons" bar_width : int, optional The width for the histogram of the plot, by default None , which means internally the length of the genome over 1000 savefig : bool, optional Save the figure if True, by default False showfig : bool, optional Show the figure if True, by default False Returns ------- list All insertion sites list Binned insertion sites according the width """ # If necessary, load default files gff_file, essential_file, gene_name_file = load_default_files( gff_file=None, essentials_file=None, gene_names_file=None ) # Verify presence of files data_files = { "gff3": gff_file, "essentials": essential_file, "gene_names": gene_name_file, } for filetype, file_path in data_files.items(): assert file_path, f"{filetype} not found at {file_path}" chrom_list = ['I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X', 'XI', 'XII', 'XIII', 'XIV', 'XV', 'XVI'] chr_length_dict, chr_start_pos_dict, chr_end_pos_dict = chromosome_position(gff_file) summed_chr_length_dict=summed_chr(chr_length_dict) l_genome=length_genome(chr_length_dict) if bar_width == None: bar_width = l_genome/1000 print('Genome length: ', l_genome) middle_chr_position=middle_chrom_pos(chr_length_dict) gene_pos_dict = gene_position(gff_file) genes_currentchrom_pos_list = [k for k, v in gene_pos_dict.items()] genes_essential_list = list_known_essentials(essential_file) allcounts_list=counts_genome(variable,bed_file,gff_file) allcounts_binnedlist=binned_list(allcounts_list,bar_width) if bar_width == (l_genome/1000): allinsertionsites_list = np.linspace(0,l_genome,int(l_genome/bar_width+1)) else: allinsertionsites_list = np.linspace(0,l_genome,int(l_genome/bar_width+2)) ##########Ploting############## profile_genome_plot(bar_width,l_genome,allinsertionsites_list,allcounts_binnedlist,summed_chr_length_dict, middle_chr_position,chrom_list,variable,genes_currentchrom_pos_list,gene_pos_dict) # saving the plot if savefig == True and variable == "transposons": savepath = os.path.splitext(bed_file) print('saving figure at %s' % savepath[0]+'_transposonplot_genome.png') plt.savefig(savepath[0]+'_transposonplot_genome.png', dpi=400) plt.close() elif savefig == True and variable == "reads": savepath = os.path.splitext(bed_file) print('saving figure at %s' % savepath[0]+'_readplot_genome.png') plt.savefig(savepath[0]+'_readplot_genome.png', dpi=400) plt.close() if showfig==True: plt.show() return allinsertionsites_list,allcounts_binnedlist