Network¶

The Network class is the central object in NeKo. It holds a directed graph of biological nodes (genes, proteins, complexes) and edges (interactions), and exposes methods for expanding, connecting, querying, and exporting those graphs.

Import¶

from neko.core.network import Network

Quick example¶

from neko.core.network import Network
from neko.inputs import Universe

resources = Universe()
resources.build()

net = Network(["EGFR", "KRAS", "MYC"], resources=resources.interactions)
net.connect_nodes()
print(net.nodes)
print(net.edges)

Class reference¶

Network ¶

Network(initial_nodes: list[str] = None, sif_file=None, resources=None)

A molecular interaction network.

The `Network` object is the central organizing component of the `neko`
module. It is the subject of all operations implemented here, including
topological algorithms, graph analysis, network visualization and
integration of database knowledge.

Args: initial_nodes: A list of initial nodes to be added to the network. sif_file: A SIF (Simple Interaction Format) file to load the network from. resources: A pandas DataFrame containing the resources database.

Methods:

Source code in neko/core/network.py

def __init__(
        self,
        initial_nodes: list[str] = None,
        sif_file=None,
        resources=None,
    ):

    self._init_args = locals()
    del self._init_args['self']
    self.nodes = pd.DataFrame(columns=["Genesymbol", "Uniprot", "Type"])
    self.edges = pd.DataFrame(columns=["source", "target", "Type", "Effect", "References"])
    # Internal object-based storage
    self._node_objs = set()  # Set of Node objects
    self._edge_objs = set()  # Set of Edge objects
    self.initial_nodes = initial_nodes
    self._ontology = Ontology()
    # --- NetworkState history tracking ---
    self._states: dict[int, NetworkState] = {}
    self._state_metadata: dict[int, dict] = {}
    self._state_log: list[int] = []
    self._state_counter: int = 0
    self._current_state_id: Optional[int] = None
    self._root_state_id: Optional[int] = None
    self._auto_state_depth: int = 0
    self._history_enabled: bool = True
    self._max_history: Optional[int] = None
    self._is_initializing = True
    self._populate()
    self._is_initializing = False

Functions¶

add_node ¶

add_node(node: str, from_sif: bool = False) -> bool

Adds a node to the network. The node is added to the nodes DataFrame of the network. The function checks the syntax for the genesymbol to ensure it is correct. If the node is a complex, it is added with the 'Genesymbol' as the complex string and 'Uniprot' as the node. Otherwise, it is added with the 'Genesymbol' as the genesymbol and 'Uniprot' as the uniprot. The 'Type' is set as 'NaN' for all new nodes.

Args: - node: A string representing the node to be added. The node can be represented by either its Genesymbol or Uniprot identifier.

Returns: - None.

Source code in neko/core/network.py

@_record_state_operation
def add_node(self, node: str, from_sif: bool = False) -> bool:
    """
    Adds a node to the network. The node is added to the nodes DataFrame of the network. The function checks the
    syntax for the genesymbol to ensure it is correct. If the node is a complex, it is added with the
    'Genesymbol' as the complex string and 'Uniprot' as the node. Otherwise, it is added with the 'Genesymbol' as
    the genesymbol and 'Uniprot' as the uniprot. The 'Type' is set as 'NaN' for all new nodes.

    Args:
        - node: A string representing the node to be added. The node can be represented by either its
                Genesymbol or Uniprot identifier.

    Returns:
        - None.
    """

    if from_sif:
        # check that the new entry node can be translated using the function mapping node identifier (all the
        # output of the function should be None) if it cannot be translated, print an error message but add the
        # node to the network anyway

        complex_string, genesymbol, uniprot = mapping_node_identifier(node)
        if not complex_string and not genesymbol and not uniprot:
            print("Error: node %s could not be automatically translated" % node)
            new_entry = {"Genesymbol": node, "Uniprot": node, "Type": "NaN"}
            self.nodes.loc[len(self.nodes)] = new_entry
            self.nodes = self.nodes.drop_duplicates().reset_index(drop=True)
            self._add_node_obj(node, node, "NaN")
            return True
        new_entry = {"Genesymbol": genesymbol, "Uniprot": uniprot, "Type": "NaN"}
        self.nodes.loc[len(self.nodes)] = new_entry
        self.nodes = self.nodes.drop_duplicates().reset_index(drop=True)
        self._add_node_obj(genesymbol, uniprot, "NaN")
        self.initial_nodes.append(new_entry["Genesymbol"])
        self.initial_nodes = list(set(self.initial_nodes))
        return True
    complex_string, genesymbol, uniprot = mapping_node_identifier(node)
    if complex_string:
        new_entry = {"Genesymbol": complex_string, "Uniprot": node, "Type": "NaN"}
    else:
        new_entry = {"Genesymbol": genesymbol, "Uniprot": uniprot, "Type": "NaN"}
    if not self.check_node(uniprot) and not self.check_node(genesymbol):
        print("Error: node %s is not present in the resources database" % node)
        return False
    self.nodes.loc[len(self.nodes)] = new_entry
    self.nodes = self.nodes.drop_duplicates().reset_index(drop=True)
    self._add_node_obj(new_entry["Genesymbol"], new_entry["Uniprot"], new_entry["Type"])
    return True

add_edge ¶

add_edge(edge: DataFrame) -> None

This method adds an interaction to the list of interactions while converting it to the NeKo-network format. It checks if the edge represents inhibition or stimulation and sets the effect accordingly. It also checks if the nodes involved in the interaction are already present in the network, if not, it adds them.

Args: - edge: A pandas DataFrame representing the interaction. The DataFrame should contain columns for 'source', 'target', 'type', and 'references'. The 'source' and 'target' columns represent the nodes involved in the interaction. The 'type' column represents the type of interaction. The 'references' column contains the references for the interaction.

Returns: - None

Source code in neko/core/network.py

@_record_state_operation
def add_edge(self, edge: pd.DataFrame) -> None:
    """
    This method adds an interaction to the list of interactions while converting it to the NeKo-network format.
    It checks if the edge represents inhibition or stimulation and sets the effect accordingly. It also checks if the
    nodes involved in the interaction are already present in the network, if not, it adds them.

    Args:
        - edge: A pandas DataFrame representing the interaction. The DataFrame should contain columns for
        'source', 'target', 'type', and 'references'. The 'source' and 'target' columns represent the nodes involved
        in the interaction. The 'type' column represents the type of interaction. The 'references' column contains
        the references for the interaction.

    Returns:
        - None
    """

    # Check if the edge represents inhibition or stimulation and set the effect accordingly
    effect = check_sign(edge)
    references = edge["references"].values[0] if "references" in edge.columns else None
    edge_type = edge["type"].values[0] if "type" in edge.columns else None
    df_edge = pd.DataFrame({
        "source": edge["source"],
        "target": edge["target"],
        "Type": edge_type,
        "Effect": effect,
        "References": references
    })

    # Convert the "Uniprot" column to a set for efficient membership test
    uniprot_nodes = set(self.nodes["Uniprot"].unique())

    # add the new nodes to the nodes dataframe
    if edge["source"].values[0] not in uniprot_nodes:
        self.add_node(edge["source"].values[0])
    if edge["target"].values[0] not in uniprot_nodes:
        self.add_node(edge["target"].values[0])

    # if in the edge dataframe there is an edge with the same source, target and effect, merge the references
    existing_edge = self.edges[(self.edges["source"] == edge["source"].values[0]) &
                               (self.edges["target"] == edge["target"].values[0]) &
                               (self.edges["Effect"] == effect)]
    if not existing_edge.empty and references is not None:
        self.edges.loc[existing_edge.index, "References"] += "; " + str(references)
    else:
        # Concatenate the new edge DataFrame with the existing edges in the graph
        self.edges = pd.concat([self.edges, df_edge])

    self.edges = self.edges.drop_duplicates().reset_index(drop=True)
    return

remove_node ¶

remove_node(node: str) -> None

Removes a node from the network. The node is removed from both the list of nodes and the list of edges.

Args: - node: A string representing the node to be removed. The node can be represented by either its Genesymbol or Uniprot identifier.

Returns: - None

Source code in neko/core/network.py

@_record_state_operation
def remove_node(self, node: str) -> None:
    """
    Removes a node from the network. The node is removed from both the list of nodes and the list of edges.

    Args:
        - node: A string representing the node to be removed. The node can be represented by either its
                Genesymbol or Uniprot identifier.

    Returns:
        - None
    """
    # Remove the node from the nodes DataFrame
    self.nodes = self.nodes[(self.nodes.Genesymbol != node) & (self.nodes.Uniprot != node)]

    # Translate the node identifier to Uniprot
    node = mapping_node_identifier(node)[2]

    # Remove any edges associated with the node from the edges DataFrame
    self.edges = self.edges[~self.edges[['source', 'target']].isin([node]).any(axis=1)]

    return

connect_nodes ¶

connect_nodes(only_signed: bool = False, consensus_only: bool = False) -> None

Delegates to strategies.connect_nodes.

Source code in neko/core/network.py

@_record_state_operation
def connect_nodes(self, only_signed: bool = False, consensus_only: bool = False) -> None:
    """
    Delegates to strategies.connect_nodes.
    """
    from .strategies import connect_nodes
    return connect_nodes(self, only_signed=only_signed, consensus_only=consensus_only)

connect_subgroup ¶

connect_subgroup(group, maxlen: int = 1, only_signed: bool = False, consensus: bool = False) -> None

Delegates to strategies.connect_subgroup.

Source code in neko/core/network.py

@_record_state_operation
def connect_subgroup(self, group, maxlen: int = 1, only_signed: bool = False, consensus: bool = False) -> None:
    """
    Delegates to strategies.connect_subgroup.
    """
    from .strategies import connect_subgroup
    return connect_subgroup(self, group, maxlen=maxlen, only_signed=only_signed, consensus=consensus)

connect_component ¶

connect_component(comp_A, comp_B, maxlen: int = 2, mode: Literal['OUT', 'IN', 'ALL'] = 'OUT', only_signed: bool = False, consensus: bool = False) -> None

Delegates to strategies.connect_component.

Source code in neko/core/network.py

@_record_state_operation
def connect_component(self, comp_A, comp_B, maxlen: int = 2, mode: Literal['OUT', 'IN', 'ALL'] = 'OUT', only_signed: bool = False, consensus: bool = False) -> None:
    """
    Delegates to strategies.connect_component.
    """
    from .strategies import connect_component
    return connect_component(self, comp_A, comp_B, maxlen=maxlen, mode=mode, only_signed=only_signed, consensus=consensus)

connect_to_upstream_nodes ¶

connect_to_upstream_nodes(nodes_to_connect=None, depth: int = 1, rank: int = 1, only_signed: bool = True, consensus: bool = False) -> None

Delegates to strategies.connect_to_upstream_nodes.

Source code in neko/core/network.py

@_record_state_operation
def connect_to_upstream_nodes(self, nodes_to_connect=None, depth: int = 1, rank: int = 1, only_signed: bool = True, consensus: bool = False) -> None:
    """
    Delegates to strategies.connect_to_upstream_nodes.
    """
    from .strategies import connect_to_upstream_nodes
    return connect_to_upstream_nodes(self, nodes_to_connect=nodes_to_connect, depth=depth, rank=rank, only_signed=only_signed, consensus=consensus)

connect_genes_to_phenotype ¶

connect_genes_to_phenotype(phenotype: str = None, id_accession: str = None, sub_genes: list = None, maxlen: int = 2, only_signed: bool = False, compress: bool = False) -> None

Delegates to strategies.connect_genes_to_phenotype.

Source code in neko/core/network.py

@_record_state_operation
def connect_genes_to_phenotype(self, phenotype: str = None, id_accession: str = None, sub_genes: list = None, maxlen: int = 2, only_signed: bool = False, compress: bool = False) -> None:
    """
    Delegates to strategies.connect_genes_to_phenotype.
    """
    from .strategies import connect_genes_to_phenotype
    return connect_genes_to_phenotype(self, phenotype=phenotype, id_accession=id_accession, sub_genes=sub_genes, maxlen=maxlen, only_signed=only_signed, compress=compress)

complete_connection ¶

complete_connection(maxlen: Optional[int] = 2, algorithm: Literal['bfs', 'dfs'] = 'dfs', minimal: bool = True, only_signed: bool = False, consensus: bool = False, connect_with_bias: bool = False) -> None

Delegates to strategies.complete_connection.

Source code in neko/core/network.py

@_record_state_operation
def complete_connection(self,
                    maxlen: Optional[int] = 2,
                    algorithm: Literal['bfs', 'dfs'] = 'dfs',
                    minimal: bool = True,
                    only_signed: bool = False,
                    consensus: bool = False,
                        connect_with_bias: bool = False,
                        ) -> None:
    """
    Delegates to strategies.complete_connection.
    """
    from .strategies import complete_connection
    return complete_connection(self, maxlen=maxlen, algorithm=algorithm, minimal=minimal, only_signed=only_signed, consensus=consensus, connect_with_bias=connect_with_bias)

convert_edgelist_into_genesymbol ¶

convert_edgelist_into_genesymbol() -> pd.DataFrame

This function generates a new edges dataframe with the source and target identifiers translated (if possible) in Genesymbol format.

Args: - None

Returns: - A pandas DataFrame containing the edges with the source and target identifiers translated into Genesymbol format.

Source code in neko/core/network.py

def convert_edgelist_into_genesymbol(self) -> pd.DataFrame:
    """
    This function generates a new edges dataframe with the source and target identifiers translated (if possible)
    in Genesymbol format.

    Args:
         - None

    Returns:
        - A pandas DataFrame containing the edges with the source and target identifiers translated into Genesymbol
            format.
    """

    def convert_identifier(x):
        identifiers = mapping_node_identifier(x)
        return identifiers[0] or identifiers[1]

    gs_edges = self.edges.copy()

    gs_edges["source"] = gs_edges["source"].apply(convert_identifier)
    gs_edges["target"] = gs_edges["target"].apply(convert_identifier)

    return gs_edges