Source code for transposonmapper.exporting.save_as_wig

import os
import numpy as np


[docs]def save_as_wig(wigfile, tncoordinates_array, ref_tid, readnumb_array):
    """This function writes in a .wig file located in the same folder as the bamfile, the information abot how many transposons
    were found in each genomic location. 

    Parameters
    ----------
    wigfile : str 
        Path with the filename extension included(e.g "data_file/wigfile_name.wig") describing where do you want to store the results.
        By default it will be stored in the same location as the bamfile, with the same basename. 
        Example, if the bamfile path is data_file/data_1.bam then the wig file will be data_file/data_1.bam.wig
    tncoordinates_array : numpy array 
        Second Output from the get_reads.py function : _,tncoordinates_array,_=get_reads(bam)
    ref_tid : dict 
        Output from the get_chromosome_names function : ref_tid = get_chromosome_names(bam)
    readnumb_array : numpy array 
        1st Output from the get_reads.py function : readnumb_array,_,_=get_reads(bam)
    """

    readnumbwig_array = readnumb_array
    ref_names = list(ref_tid.keys())

    unique_index_array = np.array([], dtype=int)  # =cc
    N_uniques_perchr_list = []
    ll = 0
    for kk in ref_names:
        # get indices for current chromosome.
        index = np.where(tncoordinates_array[:, 0] == int(ref_tid[kk] + 1))

        # get all insertion locations (in tncoordinates, all rows, column 1)
        unique_index = np.unique(tncoordinates_array[index][:, 1], return_index=True)[1]

        unique_index_array = np.append(unique_index_array, (unique_index + ll), axis=0)

        # total amount unique indices found untill current chromosome
        ll += np.count_nonzero(tncoordinates_array[:, 0] == int(ref_tid[kk] + 1))
        N_uniques_perchr_list.append(ll)

    # Collect duplicates
    duplicate_list = []  # =dd
    ll = 0
    index_last_unique_previous_chromosome = 0
    for ii in N_uniques_perchr_list:
        index_last_unique = np.where(unique_index_array <= ii)[0][-1]
        for jj in range(ll, ii):
            if (
                int(jj)
                not in unique_index_array[
                    index_last_unique_previous_chromosome:index_last_unique
                ]
            ):
                duplicate_list.append(jj)
        index_last_unique_previous_chromosome = index_last_unique
        ll = ii

    # SUM READNUMB VALUES AT INDEX IN DUPLICATE_LIST AND DUPLICATE_LIST-1
    for ii in duplicate_list:
        readnumbwig_array[ii - 1] = readnumbwig_array[ii - 1] + readnumbwig_array[ii]

    tncoordinateswig_duplicatesremoved_array = np.delete(
        tncoordinates_array, duplicate_list, axis=0
    )
    readnumbwig_duplicatesremoved_array = np.delete(
        readnumbwig_array, duplicate_list, axis=0
    )

    # Write wigfile
    with open(wigfile, "w") as f:
        base = os.path.basename(wigfile)
        filename = os.path.splitext(base)[0]
        f.write("track type=wiggle_0 ,maxheightPixels=60 name=" + filename + "\n")
        for kk in ref_names:
            f.write("variableStep chrom=chr" + kk + "\n")

            index = np.where(
                tncoordinateswig_duplicatesremoved_array[:, 0] == int(ref_tid[kk] + 1)
            )  # get indices for current chromosome.
            for ii in index[0]:
                f.write(
                    str(tncoordinateswig_duplicatesremoved_array[ii][1])
                    + " "
                    + str(readnumbwig_duplicatesremoved_array[ii])
                    + "\n"
                )
SATAY pipeline at Delft :)

Source code for transposonmapper.exporting.save_as_wig