####################### # PageRank of Google ####################### pagerank <- function(G,method='eigen',d=.85,niter=100){ # follows the notation of the matlab article on pagerank. # at http://www.mathworks.com/company/newsletters/news_notes/clevescorner/oct02_cleve.html # G is a connectivity matrix, with G[i,j]=1 if page i points to page j # method is either "power" or "eigen" cvec <- apply(G,2,sum) cvec[cvec==0] <- 1 # nodes with indegree 0 will cause problems if we divide by 0. gvec <- apply(G,1,sum) n <- nrow(G) delta <- (1-d)/n A <- matrix(delta,nrow(G),ncol(G)) for (i in 1:n) A[i,] <- A[i,] + d*G[i,]/cvec # print(A) if (method=='power'){ x <- rep(1,n) for (i in 1:niter) x <- A%*%x } else { x <- Re(eigen(A)$vector[,1]) } x/sum(x) } G <- rbind(c(0,1,1),c(0,0,1),c(1,0,0)) pagerank(G) pagerank(G,'eigen') # ================== # An attempt to get at a "wikipedia" effect. Create 10 nodes, mostly # with random links. Then add another 20 nodes that node 1 points to and # which only point back to node 1. Then add random links within nodes 21...30. n <- 10 G <- matrix(0,n,n) G[1,2:n] <- 1 G[2:n,1] <- 1 pagerank(G) n <- 10 G <- matrix(0,n,n) G[1,2:3] <- G[2:3,1] <- 1 for (i in 1:(2*n)) { i <- sample(1:n,1) j <- sample(1:n,1) G[i,j] <- 1 } G par(mfrow=c(3,2)) image(G); barplot(pagerank(G), ylim=c(0,1), col="orange") G2 <- G n2 <- 20 G2 <- cbind(G,matrix(0,n,n2)) G2 <- rbind(G2,matrix(0,n2,n+n2)) G2[n+(1:n2),1] <- G2[1,n+(1:n2)] <- 1 image(G2); barplot(pagerank(G2),ylim=c(0,1), col="orange") for (i in 1:(2*n2)) { i <- sample(1:n2,1) j <- sample(1:n2,1) G2[i+n,j+n] <- 1 } image(G2); barplot(pagerank(G2),ylim=c(0,1)) #