Source code for bioflow.bio_db_parsers.proteinRelParsers

"""
Protein relationships parser
"""
# from csv import reader
from csv import reader as csv_reader
from collections import defaultdict


[docs]def parse_bio_grid(bio_grid): """ Parses the given file as a BioGrid file and returns as :param bio_grid: the location of the biogrid_path bioflow file that needs to bprased :return: """ ret_dict = {} base = [] with open(bio_grid, 'rt') as source_file: biogrid_reader = csv_reader(source_file, 'excel-tab') next(biogrid_reader) for fields in biogrid_reader: ret_dict[tuple(fields[7:9])] = [fields[17]] if fields[18] != '-': ret_dict[tuple(fields[7:9])].append(fields[18]) base.append(fields[7]) base.append(fields[8]) return ret_dict, base
[docs]def parse_hint(_hint_csv): """ Reads protein-protein relationships from a HiNT database file :param _hint_csv: location of the HiNT database tsv file :return: {UP_Identifier:[UP_ID1, UP_ID2, ...]} """ local_relations = defaultdict(list) with open(_hint_csv, 'rt') as source_file: hint_reader = csv_reader(source_file, delimiter='\t') next(hint_reader) for i, fields in enumerate(hint_reader): if fields[2] != fields[3]: local_relations[fields[3]].append(fields[2]) local_relations[fields[2]].append(fields[3]) return dict(local_relations)