Source code for transposonmapper.exporting.save_as_wig

import os
import numpy as np


[docs]def save_as_wig(wigfile, tncoordinates_array, ref_tid, readnumb_array): """This function writes in a .wig file located in the same folder as the bamfile, the information abot how many transposons were found in each genomic location. Parameters ---------- wigfile : str Path with the filename extension included(e.g "data_file/wigfile_name.wig") describing where do you want to store the results. By default it will be stored in the same location as the bamfile, with the same basename. Example, if the bamfile path is data_file/data_1.bam then the wig file will be data_file/data_1.bam.wig tncoordinates_array : numpy array Second Output from the get_reads.py function : _,tncoordinates_array,_=get_reads(bam) ref_tid : dict Output from the get_chromosome_names function : ref_tid = get_chromosome_names(bam) readnumb_array : numpy array 1st Output from the get_reads.py function : readnumb_array,_,_=get_reads(bam) """ readnumbwig_array = readnumb_array ref_names = list(ref_tid.keys()) unique_index_array = np.array([], dtype=int) # =cc N_uniques_perchr_list = [] ll = 0 for kk in ref_names: # get indices for current chromosome. index = np.where(tncoordinates_array[:, 0] == int(ref_tid[kk] + 1)) # get all insertion locations (in tncoordinates, all rows, column 1) unique_index = np.unique(tncoordinates_array[index][:, 1], return_index=True)[1] unique_index_array = np.append(unique_index_array, (unique_index + ll), axis=0) # total amount unique indices found untill current chromosome ll += np.count_nonzero(tncoordinates_array[:, 0] == int(ref_tid[kk] + 1)) N_uniques_perchr_list.append(ll) # Collect duplicates duplicate_list = [] # =dd ll = 0 index_last_unique_previous_chromosome = 0 for ii in N_uniques_perchr_list: index_last_unique = np.where(unique_index_array <= ii)[0][-1] for jj in range(ll, ii): if ( int(jj) not in unique_index_array[ index_last_unique_previous_chromosome:index_last_unique ] ): duplicate_list.append(jj) index_last_unique_previous_chromosome = index_last_unique ll = ii # SUM READNUMB VALUES AT INDEX IN DUPLICATE_LIST AND DUPLICATE_LIST-1 for ii in duplicate_list: readnumbwig_array[ii - 1] = readnumbwig_array[ii - 1] + readnumbwig_array[ii] tncoordinateswig_duplicatesremoved_array = np.delete( tncoordinates_array, duplicate_list, axis=0 ) readnumbwig_duplicatesremoved_array = np.delete( readnumbwig_array, duplicate_list, axis=0 ) # Write wigfile with open(wigfile, "w") as f: base = os.path.basename(wigfile) filename = os.path.splitext(base)[0] f.write("track type=wiggle_0 ,maxheightPixels=60 name=" + filename + "\n") for kk in ref_names: f.write("variableStep chrom=chr" + kk + "\n") index = np.where( tncoordinateswig_duplicatesremoved_array[:, 0] == int(ref_tid[kk] + 1) ) # get indices for current chromosome. for ii in index[0]: f.write( str(tncoordinateswig_duplicatesremoved_array[ii][1]) + " " + str(readnumbwig_duplicatesremoved_array[ii]) + "\n" )