Source code for bioflow.algorithms_bank.flow_significance_evaluation
import numpy as np
from scipy.stats import gumbel_r
from typing import List
from bioflow.utils.log_behavior import get_logger
log = get_logger(__name__)
[docs]def get_neighboring_degrees(degree: int,
max_array: np.array,
nearest_degrees: int = 0, # currently inactive
min_nodes: int = 10) -> List[float]:
"""
Recovers the maximum flow achieved by nodes of a given degree for each run. On case the user
requests it with nearest_degrees or min_nodes parameters, also recovers maximum flow
values for the nodes of similar degrees or looks fro flow values in nearest degrees until
at least `min_nodes` are found
`nearest_degrees`
:param degree: degree of the nodes
:param max_array: maximum nodes for a given degree in each run
:param nearest_degrees: the minimum number of the nearest gedgrees to look for
:param min_nodes: the minimum number of nodes until which to look for neighbours
:return:
"""
max_set = max_array[:, max_array[1, :] == degree]
max_set_red = max_set[0, :].tolist()
if len(max_set_red) < min_nodes:
temp_deg_plus = degree
temp_deg_minus = degree
while len(max_set_red) < min_nodes:
temp_deg_minus -= 1
temp_deg_plus += 1
_max_set = max_array[:, max_array[1, :] == temp_deg_minus]
max_set_red += _max_set[0, :].tolist()
_max_set = max_array[:, max_array[1, :] == temp_deg_plus]
max_set_red += _max_set[0, :].tolist()
log.debug('deg: %d, list: %s' % (degree, max_set_red))
return max_set_red
[docs]def get_p_val_by_gumbel(entry: np.array,
max_set_red: List[float]) -> np.array:
"""
Recovers the statistical significance (p-value equivalent) by performing a gumbel test
:param entry: the values achieved in the real hits information flow computation
:param max_set_red: background set of maximum values achieved during blanc sampling runs
:return:
"""
params = gumbel_r.fit(max_set_red)
mu = params[-2]
beta = params[-1]
frozen_gumbel = gumbel_r(loc=mu, scale=beta)
a_95_low, a_95_high = frozen_gumbel.interval(0.95)
log.debug('gumbel_r fit: mu %.2f, beta: %.2f, 95 alpha: %.2f, .%.2f'
% (mu, beta, a_95_low, a_95_high))
p_vals = 1 - frozen_gumbel.cdf(entry[0, :])
return p_vals