Source code for graphflow.pagerank.GooglePageRank


import warnings
from typing import Tuple

import networkx
import numpy as np
from nptyping import NDArray, Shape, Float

from .cpagerank import pagerank_cython
from .. import L1norm, PageRankLanguage


[docs] def GoogleMatrix( digraph: networkx.DiGraph, beta: float ) -> Tuple[NDArray[Shape["*, *"], Float], dict[str, int]]: """ Compute the Google Matrix for a directed graph. The Google Matrix is used in PageRank calculations and represents the probability transition matrix of a random walk on the graph. It incorporates a damping factor to handle dangling nodes and ensure convergence. Parameters ---------- digraph : networkx.DiGraph The directed graph for which to compute the Google Matrix. beta : float The damping factor (between 0 and 1). Typically set to 0.85. Returns ------- tuple A tuple containing: - A (numpy.ndarray): The Google Matrix. - nodedict (dict): A dictionary mapping node identifiers to their indices. """ nodedict = {node: idx for idx, node in enumerate(digraph.nodes())} A = (1 - beta) / float(len(digraph)) * np.ones(shape=(len(digraph), len(digraph))) for node1, node2 in digraph.edges(): A[nodedict[node2], nodedict[node1]] += beta / float(len(list(digraph.successors(node1)))) return A, nodedict
def CalculatePageRankFromAdjacencyMatrix_Cython( adjMatrix: NDArray[Shape["*, *"], Float], nodes: dict[str, int], eps: float=1e-4, maxstep: int=1000 ): return pagerank_cython(adjMatrix, nodes, eps, maxstep)
[docs] def CalculatePageRankFromAdjacencyMatrix_Python( adjMatrix: NDArray[Shape["*, *"], Float], nodes: dict[str, int], eps: float=1e-4, maxstep: int=1000 ) -> dict[str, float]: """ Calculate PageRank from an adjacency matrix using Python implementation. This function computes the PageRank scores for nodes in a graph represented by an adjacency matrix. It uses an iterative approach until convergence or maximum steps are reached. Parameters ---------- adjMatrix : numpy.ndarray The adjacency matrix representing the graph structure. nodes : dict A dictionary mapping node identifiers to their indices. eps : float, optional The convergence threshold. The algorithm stops when the change in vectors is less than this value. Default is 1e-4. maxstep : int, optional The maximum number of iterations to perform. Default is 1000. Returns ------- dict A dictionary mapping node identifiers to their PageRank scores. """ nbnodes = adjMatrix.shape[0] r = np.transpose([np.repeat(1 / float(nbnodes), nbnodes)]) converged = False stepid = 0 while not converged and stepid < maxstep: newr = np.matmul(adjMatrix, r) converged = (L1norm(newr, r) < eps) r = newr stepid += 1 nodepr = {node: r[nodes[node], 0] for node in nodes} return nodepr
[docs] def CalculatePageRankFromAdjacencyMatrix( adjMatrix: NDArray[Shape["*, *"], Float], nodes: dict[str, int], eps: float=1e-4, maxstep: int=1000, language: PageRankLanguage=PageRankLanguage.CYTHON ) -> dict[str, float]: """ Calculate PageRank from an adjacency matrix using specified implementation language. This function computes the PageRank scores for nodes in a graph represented by an adjacency matrix. It can use either Cython or Python implementation based on the language parameter. Parameters ---------- adjMatrix : numpy.ndarray The adjacency matrix representing the graph structure. nodes : dict A dictionary mapping node identifiers to their indices. eps : float, optional The convergence threshold. The algorithm stops when the change in vectors is less than this value. Default is 1e-4. maxstep : int, optional The maximum number of iterations to perform. Default is 1000. language : PageRankLanguage, optional The implementation language to use. Default is PageRankLanguage.CYTHON. Returns ------- dict A dictionary mapping node identifiers to their PageRank scores. """ if language == PageRankLanguage.CYTHON: return CalculatePageRankFromAdjacencyMatrix_Cython(adjMatrix, nodes, eps=eps, maxstep=maxstep) else: return CalculatePageRankFromAdjacencyMatrix_Python(adjMatrix, nodes, eps=eps, maxstep=maxstep)
[docs] def CalculatePageRank( digraph: networkx.DiGraph, beta: float, eps: float=1e-4, maxstep: int=1000 ) -> dict[str, float]: """ Calculate PageRank for a directed graph. This function computes the PageRank scores for nodes in a directed graph. It first computes the Google Matrix and then calculates the PageRank using the specified parameters. Parameters ---------- digraph : networkx.DiGraph The directed graph for which to compute PageRank. beta : float The damping factor (between 0 and 1). Typically set to 0.85. eps : float, optional The convergence threshold. The algorithm stops when the change in vectors is less than this value. Default is 1e-4. maxstep : int, optional The maximum number of iterations to perform. Default is 1000. Returns ------- dict A dictionary mapping node identifiers to their PageRank scores. """ A, nodes = GoogleMatrix(digraph, beta) return CalculatePageRankFromAdjacencyMatrix(A, nodes, eps=eps, maxstep=maxstep)