Source code for graphflow.hits.hitsrank


# Jon Kleinberg's HITS (Hyperlink-Induced Topic Search) algorithm

import networkx
import numpy as np
import networkx as nx
from nptyping import NDArray, Shape, Float

from .. import L1norm


[docs] def hits( adjMatrix: NDArray[Shape["*, *"], Float], eps: float=1e-4, maxstep: int=1000 ) -> tuple[NDArray[Shape["*"], Float], NDArray[Shape["*"], Float]]: """ Compute the HITS (Hyperlink-Induced Topic Search) algorithm on an adjacency matrix. This function calculates the hub and authority vectors for a given adjacency matrix using the HITS algorithm. The algorithm iteratively computes these vectors until convergence or the maximum number of steps is reached. Parameters ---------- adjMatrix : numpy.ndarray The adjacency matrix representing the graph structure. eps : float, optional The convergence threshold. The algorithm stops when the change in vectors is less than this value. Default is 1e-4. maxstep : int, optional The maximum number of iterations to perform. Default is 1000. Returns ------- tuple A tuple containing: - hub vector (numpy.ndarray): The hub scores for each node. - authority vector (numpy.ndarray): The authority scores for each node. """ nbnodes = adjMatrix.shape[0] # hub vector i = np.random.uniform(size=nbnodes).reshape((nbnodes, 1)) # authority vector p = np.random.uniform(size=nbnodes).reshape((nbnodes, 1)) step = 0 converged = False while step < maxstep and not converged: newp = np.matmul(adjMatrix.T, i) newi = np.matmul(adjMatrix, p) newp = newp / np.linalg.norm(newp) newi = newi / np.linalg.norm(newi) converged = (L1norm(newp, p) < eps) and (L1norm(newi, i) < eps) i, p = newi, newp return i.reshape(nbnodes), p.reshape(nbnodes)
[docs] def CalculateHITS( digraph: networkx.DiGraph, eps: float=1e-4, maxstep: int=1000 ) -> tuple[NDArray[Shape["*"], Float], NDArray[Shape["*"], Float]]: """ Compute the HITS (Hyperlink-Induced Topic Search) algorithm on a NetworkX digraph. This function calculates the hub and authority scores for each node in a directed graph using the HITS algorithm. It converts the graph to an adjacency matrix and then applies the HITS algorithm. Parameters ---------- digraph : networkx.DiGraph The directed graph on which to compute the HITS algorithm. eps : float, optional The convergence threshold. The algorithm stops when the change in vectors is less than this value. Default is 1e-4. maxstep : int, optional The maximum number of iterations to perform. Default is 1000. Returns ------- tuple A tuple containing: - hubdict (dict): A dictionary mapping node identifiers to their hub scores. - authdict (dict): A dictionary mapping node identifiers to their authority scores. """ A = nx.adjacency_matrix(digraph).toarray() nodes = list(digraph.nodes()) hubvec, authvec = hits(A, eps=eps, maxstep=maxstep) hubdict = {nodes[i]: hubvec[i] for i in range(len(hubvec))} authdict = {nodes[i]: authvec[i] for i in range(len(authvec))} return hubdict, authdict