Source code for transposonmapper.exporting.save_per_gene_insertions

[docs]def save_per_gene_insertions( filename, tn_coordinates, gene_coordinates, chr_lengths_cumsum, ref_tid_roman, aliases_designation, ): """This function write in txt file 5 columns of information about transposon insertions separated by tabs. The tabs are: Gene name,Chromosome,Start location,End location,Insertion locations,Reads per insertion location Parameters ---------- filename : str Path with the filename extension included(e.g "data_file/file.txt") describing where do you want to store the results. By default it will be stored in the same location as the bamfile, with the same basename. Example, if the bamfile path is data_file/data_1.bam then the per_gene file will be data_file/data_1.bam_pergene_insertions.txt tn_coordinates : dict Last output of the function get_insertions _,_, tn_coordinates_per_gene = get_insertions_and_reads( gene_coordinates, tncoordinatescopy_array, readnumb_array) gene_coordinates : dict Output of the function add_chromosome_length: gene_coordinates = add_chromosome_length( gene_coordinates, chr_lengths_cumsum, ref_tid_roman) chr_lengths_cumsum : dict last output of the function get_sequence_length : _, chr_lengths_cumsum = get_sequence_length(bam) ref_tid_roman : dict Dictionary describing roman names as keys , ref_romannums = chromosomename_roman_to_arabic()[1] ref_tid_roman = {key: value for key, value in zip(ref_romannums, ref_tid)} aliases_designation : dict Last output of the function read_genes _, _, aliases_designation = read_genes( gff_file, essential_file, gene_name_file) """ with open(filename, "w") as f: f.write( "Gene name\tChromosome\tStart location\tEnd location\tInsertion locations\tReads per insertion location\n" ) for gene in tn_coordinates: gene_chrom = ref_tid_roman.get(gene_coordinates.get(gene)[0]) tncoordinates = [ ins - chr_lengths_cumsum.get(gene_chrom) for ins in tn_coordinates[gene][3] ] if gene in aliases_designation: gene_alias = aliases_designation.get(gene)[0] else: gene_alias = gene f.write( gene_alias + "\t" + str(tn_coordinates[gene][0]) + "\t" + str(tn_coordinates[gene][1] - chr_lengths_cumsum.get(gene_chrom)) + "\t" + str(tn_coordinates[gene][2] - chr_lengths_cumsum.get(gene_chrom)) + "\t" + str(tncoordinates) + "\t" + str(tn_coordinates[gene][4]) + "\n" )
[docs]def save_per_essential_insertions( filename, tn_coordinates, gene_coordinates, chr_lengths_cumsum, ref_tid_roman, aliases_designation, ): """This function generates a .txt file with the insertions in the annotated essential genes in WT. Parameters ---------- filename : str Path with the filename extension included(e.g "data_file/file.txt") describing where do you want to store the results. By default it will be stored in the same location as the bamfile, with the same basename. Example, if the bamfile path is data_file/data_1.bam then the file will be data_file/data_1.bam_peressential_insertions.txt tn_coordinates : dict Last output of the function get_insertions _,_, tn_coordinates_per_gene = get_insertions_and_reads( gene_coordinates, tncoordinatescopy_array, readnumb_array) gene_coordinates : dict Output of the function add_chromosome_length: gene_coordinates = add_chromosome_length( gene_coordinates, chr_lengths_cumsum, ref_tid_roman) chr_lengths_cumsum : dict last output of the function get_sequence_length : _, chr_lengths_cumsum = get_sequence_length(bam) ref_tid_roman : dict Dictionary describing roman names as keys , ref_romannums = chromosomename_roman_to_arabic()[1] ref_tid_roman = {key: value for key, value in zip(ref_romannums, ref_tid)} aliases_designation : dict Last output of the function read_genes _, _, aliases_designation = read_genes( gff_file, essential_file, gene_name_file) """ with open(filename, "w") as f: f.write( "Essential gene name\tChromosome\tStart location\tEnd location\tInsertion locations\tReads per insertion location\n" ) for gene in tn_coordinates: gene_chrom = ref_tid_roman.get(gene_coordinates.get(gene)[0]) tncoordinates = [ ins - chr_lengths_cumsum.get(gene_chrom) for ins in tn_coordinates[gene][3] ] if gene in aliases_designation: gene_alias = aliases_designation.get(gene)[0] else: gene_alias = gene f.write( gene_alias + "\t" + str(tn_coordinates[gene][0]) + "\t" + str(tn_coordinates[gene][1] - chr_lengths_cumsum.get(gene_chrom)) + "\t" + str(tn_coordinates[gene][2] - chr_lengths_cumsum.get(gene_chrom)) + "\t" + str(tncoordinates) + "\t" + str(tn_coordinates[gene][4]) + "\n" )