\documentclass{article}%
\usepackage{amsmath}%
\setcounter{MaxMatrixCols}{30}%
\usepackage{amsfonts}%
\usepackage{amssymb}%
\usepackage{graphicx}
%TCIDATA{OutputFilter=latex2.dll}
%TCIDATA{Version=5.00.0.2552}
%TCIDATA{CSTFile=40 LaTeX article.cst}
%TCIDATA{Created=Monday, October 27, 2014 10:04:07}
%TCIDATA{LastRevised=Monday, October 27, 2014 10:24:31}
%TCIDATA{}
%TCIDATA{}
%TCIDATA{}
\newtheorem{theorem}{Theorem}
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newenvironment{proof}[1][Proof]{\noindent\textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\begin{document}
\section{Challenges to the algorithm}
In this worksheet, we explore some challenges to the PageRank algorithm.
\subsection{Nonuniqueness}
We would like our ranking to be unique, which means that we should have only
one eigenvector representing the eigenvalue 1. It turns out that this is true
if the web is a strongly connected digraph. We will show this later. However,
if the web is disconnected, then we can have a higher dimensional eigenspace
for eigenvalue $1.$ Consider the web in BL Figure 2.2. The link matrix is%
\[
A=\left[
\begin{array}
[c]{ccccc}%
0 & 1 & 0 & 0 & 0\\
1 & 0 & 0 & 0 & 0\\
0 & 0 & 0 & 1 & \frac{1}{2}\\
0 & 0 & 1 & 0 & \frac{1}{2}\\
0 & 0 & 0 & 0 & 0
\end{array}
\right] .
\]
1) Show that the vectors $\left[ \frac{1}{2},\frac{1}{2},0,0,0\right] ^{T}$
and $\left[ 0,0,\frac{1}{2},\frac{1}{2},0\right] ^{T}$ are both eigenvectors
with eigenvalue $1.$
2) Show that any linear combination of these are eigenvectors with eigenvalue
$1,$ and so we have vectors like $\left[ \frac{3}{8},\frac{3}{8},\frac{1}%
{8},\frac{1}{8},0\right] ^{T}$ as well as $\left[ \frac{1}{8},\frac{1}%
{8},\frac{3}{8},\frac{3}{8},0\right] ^{T},$ which would give different rankings!
3) We will show the following:
\begin{proposition}
Let $W$ be a web with $r$ components $W_{1},W_{2},\ldots,W_{r}.$ Then the
eigenspace of the eigenvalue $1$ is at least $r$-dimensional.
\end{proposition}
3A) Show that if we label the web by assigning the vertices in $W_{1}$ first,
then the vertices in $W_{2},$ etc., then the link matrix will have a block
diagonal form like%
\[
A=\left[
\begin{array}
[c]{cccc}%
A_{1} & 0 & 0 & 0\\
0 & A_{2} & 0 & 0\\
0 & 0 & \ddots & 0\\
0 & 0 & 0 & A_{r}%
\end{array}
\right] ,
\]
where $A_{k}$ is the link matrix for the web $W_{k}.$
3B) Show that if each is $A_{k}$ column stochastic, each has an eigenvector
$v_{k}$ with eigenvalue 1, and that can be expanded into a eigenvector $w_{k}$
for $A$ by letting
\[
w_{1}=\left(
\begin{array}
[c]{c}%
v_{1}\\
0\\
0\\
\vdots\\
0
\end{array}
\right) ,\;\;\;\;w_{2}=\left(
\begin{array}
[c]{c}%
0\\
v_{2}\\
0\\
\vdots\\
0
\end{array}
\right) ,
\]
etc. We can show that each of these is linearly independent and part of the
eigenspace $V_{1}$ of eigenvalue 1.
\section{Dealing with disconnected webs}
Let $S$ be the matrix $n\times n$ matrix with all entries $1/n.$ Notice that
this matrix is column stochastic. In terms of probabilities, this matrix
represents equal probabilities of jumping to any page on the web (including
the one you are already on).
4) Show that if $A$ is a column stochastic matrix, then
\[
M=\left( 1-m\right) A+mS
\]
is column stochastic for all values of $m$ between zero and one. Supposedly
the original value for $m$ used by Google was $0.15.$
We will show that the matrix $M$ has a one-dimensional eigenspace
$V_{1}\left( M\right) $ for the eigenvalue $1$ as long as $m>0.$ Note that
the matrix $M$ has all positive entries. This motivates:
\begin{definition}
A matrix $M=\left( M_{ij}\right) $ is positive if $M_{ij}>0$ for every $i$
and $j.$
\end{definition}
\begin{proposition}
If $M$ is a positive, column-stochastic matrix, then any eigenvector with
eigenvalue $1$ is a scalar multiple of a vector with all positive entries.
\end{proposition}
In the interest, we are omitting the proof, though it is in the notes and in
BL and uses only elemetary mathematics.
5) Explain why the matrix $M$ can be used to produce unique rankings if the
link matrix $A$ is column stochastic.
\subsection{Dangling nodes}
\begin{definition}
A \emph{dangling node} is a vertex in the web with outdegree zero (i.e., with
no links).
\end{definition}
6) Show that a dangling node produces a column of zeroes in the link matrix.
This means that the resulting link matrix is not column-stochastic, since some
columns may sum to zero. This means that we may not use our theorem that $1$
is an eigenvalue. In fact, it may not be true.
For dangling nodes, we have the following theorem of Perron:
\begin{theorem}
If $A$ is a positive matrix, then $A$ contains a real, positive eigenvalue
$\rho$ such that
\begin{enumerate}
\item For any other eigenvalue $\lambda,$ we have $\left\vert \lambda
\right\vert <\rho$ (recall that $\lambda$ could be complex).
\item The eigenspace of $\rho$ is one-dimensional and there is a unique
eigenvector $x=\left[ x_{1},x_{2},\ldots,x_{p}\right] ^{T}$ with eigenvalue
$\rho$ such that $x_{i}>0$ for all $i$ and
\[%
%TCIMACRO{\dsum \limits_{i=1}^{p}}%
%BeginExpansion
{\displaystyle\sum\limits_{i=1}^{p}}
%EndExpansion
x_{i}=1.
\]
\end{enumerate}
\end{theorem}
This eigenvector is called the \emph{Perron vector}. Thus, if we had a matrix
with all postive entries, as we got in the last section, we can use the Perron
vector as the ranking.
7) Explain how Perron's theorem can be used to give a unique ranking even if
there are dangling nodes.
\end{document}