Source code for graphflow.hits.hitsrank


# Jon Kleinberg's HITS (Hyperlink-Induced Topic Search) algorithm

import networkx
import numpy as np
import networkx as nx
from nptyping import NDArray, Shape, Float

from .. import L1norm



[docs]
def hits(
        adjMatrix: NDArray[Shape["*, *"], Float],
        eps: float=1e-4,
        maxstep: int=1000
) -> tuple[NDArray[Shape["*"], Float], NDArray[Shape["*"], Float]]:
    """
    Compute the HITS (Hyperlink-Induced Topic Search) algorithm on an adjacency matrix.
    
    This function calculates the hub and authority vectors for a given adjacency matrix
    using the HITS algorithm. The algorithm iteratively computes these vectors until
    convergence or the maximum number of steps is reached.
    
    Parameters
    ----------
    adjMatrix : numpy.ndarray
        The adjacency matrix representing the graph structure.
    eps : float, optional
        The convergence threshold. The algorithm stops when the change in vectors
        is less than this value. Default is 1e-4.
    maxstep : int, optional
        The maximum number of iterations to perform. Default is 1000.
    
    Returns
    -------
    tuple
        A tuple containing:
        - hub vector (numpy.ndarray): The hub scores for each node.
        - authority vector (numpy.ndarray): The authority scores for each node.
    """
    nbnodes = adjMatrix.shape[0]
    # hub vector
    i = np.random.uniform(size=nbnodes).reshape((nbnodes, 1))
    # authority vector
    p = np.random.uniform(size=nbnodes).reshape((nbnodes, 1))

    step = 0
    converged = False

    while step < maxstep and not converged:
        newp = np.matmul(adjMatrix.T, i)
        newi = np.matmul(adjMatrix, p)

        newp = newp / np.linalg.norm(newp)
        newi = newi / np.linalg.norm(newi)

        converged = (L1norm(newp, p) < eps) and (L1norm(newi, i) < eps)
        i, p = newi, newp

    return i.reshape(nbnodes), p.reshape(nbnodes)




[docs]
def CalculateHITS(
        digraph: networkx.DiGraph,
        eps: float=1e-4,
        maxstep: int=1000
) -> tuple[NDArray[Shape["*"], Float], NDArray[Shape["*"], Float]]:
    """
    Compute the HITS (Hyperlink-Induced Topic Search) algorithm on a NetworkX digraph.
    
    This function calculates the hub and authority scores for each node in a directed graph
    using the HITS algorithm. It converts the graph to an adjacency matrix and then applies
    the HITS algorithm.
    
    Parameters
    ----------
    digraph : networkx.DiGraph
        The directed graph on which to compute the HITS algorithm.
    eps : float, optional
        The convergence threshold. The algorithm stops when the change in vectors
        is less than this value. Default is 1e-4.
    maxstep : int, optional
        The maximum number of iterations to perform. Default is 1000.
    
    Returns
    -------
    tuple
        A tuple containing:
        - hubdict (dict): A dictionary mapping node identifiers to their hub scores.
        - authdict (dict): A dictionary mapping node identifiers to their authority scores.
    """
    A = nx.adjacency_matrix(digraph).toarray()
    nodes = list(digraph.nodes())
    hubvec, authvec = hits(A, eps=eps, maxstep=maxstep)
    hubdict = {nodes[i]: hubvec[i] for i in range(len(hubvec))}
    authdict = {nodes[i]: authvec[i] for i in range(len(authvec))}
    return hubdict, authdict