Source code for bioflow.bio_db_parsers.ComplexPortalParser

from csv import reader as csv_reader


[docs]def parse_complex_portal(complex_portal_file): def unpack_complex_contents(complex_name): unpacked_subnodes = [] subnode_list = new_nodes[complex_name]['components'] for sub_node in subnode_list: if sub_node in list(new_nodes[complex_name].keys()): unpacked_subnodes += unpack_complex_contents(sub_node) else: if ':' in sub_node or '_9606' in sub_node: pass elif '-' in sub_node: unpacked_subnodes.append(sub_node.split('-')[0]) else: unpacked_subnodes.append(sub_node) return unpacked_subnodes base = [] new_nodes = {} with open(complex_portal_file, 'rt') as source: reader = csv_reader(source, delimiter='\t') header = next(reader) for line in reader: legacy_id = line[0] display_name = line[1] componenets = line[4].split('|') componenets = [comp.split('(')[0] for comp in componenets] node = {'ID': legacy_id, 'displayName': display_name, 'components': componenets} new_nodes[node['ID']] = node # print new_nodes for node in new_nodes.values(): node['components'] = unpack_complex_contents(node['ID']) base += node['components'] # print new_nodes base = list(set(base)) return new_nodes, base