Source code for bioflow.algorithms_bank.flow_significance_evaluation

import numpy as np
from scipy.stats import gumbel_r
from typing import List

from bioflow.utils.log_behavior import get_logger


log = get_logger(__name__)


[docs]def get_neighboring_degrees(degree: int, max_array: np.array, nearest_degrees: int = 0, # currently inactive min_nodes: int = 10) -> List[float]: """ Recovers the maximum flow achieved by nodes of a given degree for each run. On case the user requests it with nearest_degrees or min_nodes parameters, also recovers maximum flow values for the nodes of similar degrees or looks fro flow values in nearest degrees until at least `min_nodes` are found `nearest_degrees` :param degree: degree of the nodes :param max_array: maximum nodes for a given degree in each run :param nearest_degrees: the minimum number of the nearest gedgrees to look for :param min_nodes: the minimum number of nodes until which to look for neighbours :return: """ max_set = max_array[:, max_array[1, :] == degree] max_set_red = max_set[0, :].tolist() if len(max_set_red) < min_nodes: temp_deg_plus = degree temp_deg_minus = degree while len(max_set_red) < min_nodes: temp_deg_minus -= 1 temp_deg_plus += 1 _max_set = max_array[:, max_array[1, :] == temp_deg_minus] max_set_red += _max_set[0, :].tolist() _max_set = max_array[:, max_array[1, :] == temp_deg_plus] max_set_red += _max_set[0, :].tolist() log.debug('deg: %d, list: %s' % (degree, max_set_red)) return max_set_red
[docs]def get_p_val_by_gumbel(entry: np.array, max_set_red: List[float]) -> np.array: """ Recovers the statistical significance (p-value equivalent) by performing a gumbel test :param entry: the values achieved in the real hits information flow computation :param max_set_red: background set of maximum values achieved during blanc sampling runs :return: """ params = gumbel_r.fit(max_set_red) mu = params[-2] beta = params[-1] frozen_gumbel = gumbel_r(loc=mu, scale=beta) a_95_low, a_95_high = frozen_gumbel.interval(0.95) log.debug('gumbel_r fit: mu %.2f, beta: %.2f, 95 alpha: %.2f, .%.2f' % (mu, beta, a_95_low, a_95_high)) p_vals = 1 - frozen_gumbel.cdf(entry[0, :]) return p_vals