Source code for transposonmapper.properties.get_chromosome_position

from ..utils import chromosomename_roman_to_arabic
import pkg_resources
import os

[docs]def chromosome_position(gff_file): """Get the start and end position of each chromosome and determine their respective length. Input is a .gff file downloaded from https://www.ensembl.org/Saccharomyces_cerevisiae/Info/Index Output are three dictionaries for length, start and end position. All dictionaries have keys representing the chromosome number in roman numerals. To get all dictionaries, use: 'a,b,c, chromosome_and_gene_position.chromosome_position()'. 'a' = chromosome length 'b' = chromosome start position 'c' = chromosome end position Parameters ---------- gff_file : str The file path of a .gff file downloaded from https://www.ensembl.org/Saccharomyces_cerevisiae/Info/Index Returns ------- dict A dictionary relating each chromosome with its length dict A dictionary relating each chromosome with its start position dict A dictionary relating each chromosome with its end position """ if gff_file==None: default_path = pkg_resources.resource_filename("transposonmapper", "data_files/") gff_file = os.path.join( default_path, "Saccharomyces_cerevisiae.R64-1-1.99.gff3" ) # Get roman to arabic dictionary roman_to_arabic = chromosomename_roman_to_arabic()[1] # GET END POSITIONS OF THE CHROMOSOMES FROM THE GFF FILE AND STORE THEM IN A DICTIONARY chr_length_dict = {} chr_length_list = [] with open(gff_file) as f: line_counter = 0 next(f) while line_counter < 17: lines = f.readline() chr_line_list = lines.strip("\n").replace(" ", "\t").split("\t") chr_number = chr_line_list[3] # if chr_number != 'Mito': chr_length = int(chr_line_list[5]) chr_length_list.append(chr_length) chr_length_dict[chr_number] = chr_length line_counter += 1 # DETERMINE START AND END POSITION OF EACH OF THE CHROMOSOMES chr_start_pos_dict = {} chr_end_pos_dict = {} counter = 0 for roman in roman_to_arabic.keys(): chr_start_pos_dict[roman] = sum(chr_length_list[:counter]) + 1 chr_end_pos_dict[roman] = sum(chr_length_list[: counter + 1]) if roman == "I": chr_start_pos_dict[roman] = 1 counter += 1 return (chr_length_dict, chr_start_pos_dict, chr_end_pos_dict)