\documentclass{article}%
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{graphicx}%
\setcounter{MaxMatrixCols}{30}
%TCIDATA{OutputFilter=latex2.dll}
%TCIDATA{Version=5.00.0.2552}
%TCIDATA{CSTFile=40 LaTeX article.cst}
%TCIDATA{Created=Thursday, August 21, 2008 14:03:59}
%TCIDATA{LastRevised=Monday, November 03, 2014 11:34:41}
%TCIDATA{}
%TCIDATA{}
%TCIDATA{}
%TCIDATA{Language=American English}
\newtheorem{theorem}{Theorem}
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newenvironment{proof}[1][Proof]{\noindent\textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\begin{document}
\title{Math 443/543 Graph Theory Notes 5: Graphs as matrices, spectral graph theory,
and PageRank }
\author{David Glickenstein}
\maketitle
\section{Representing graphs as matrices}
It will sometimes be useful to represent graphs as matrices. This section is
taken from C-10.1.
Let $G$ be a graph of order $p.$ We denote the vertices by $v_{1},\ldots
,v_{p}.$ We can then find an adjacency matrix $A=A\left( G\right) =\left[
a_{ij}\right] $ defined to be the $p\times p$ matrix such that $a_{ij}=1$ if
$v_{i}v_{j}\in E\left( G\right) .$ This matrix will be symmetric for an
undirected graph. We can easily consider the generalization to directed graphs
and multigraphs.
Note that two isomorphic graphs may have different adjacency matrices.
However, they are related by permutation matrices.
\begin{definition}
A permutation matrix is a matrix gotten from the identity by permuting the
columns (i.e., switching some of the columns).
\end{definition}
\begin{proposition}
The graphs $G$ and $G^{\prime}$ are isomorphic if and only if their adjacency
matrices are related by
\[
A=P^{T}A^{\prime}P
\]
for some permutation matrix $P$.
\end{proposition}
\begin{proof}
[Proof (sketch)]Given isomorphic graphs, the isomorphism gives a permutation
of the vertices, which leads to a permutation matrix. Similarly, the
permutation matrix gives an isomorphism.
\end{proof}
Now we see that the adjacency matrix can be used to count $uv$-walks.
\begin{theorem}
Let $A$ be the adjacency matrix of a graph $G$, where $V\left( G\right)
=\left\{ v_{1},v_{2},\ldots,v_{p}\right\} .$ Then the $\left( i,j\right) $
entry of $A^{n},$ where $n\geq1,$ is the number of different $v_{i}v_{j}%
$-walks of length $n$ in $G.$
\end{theorem}
\begin{proof}
We induct on $n.$ Certainly this is true for $n=1.$ Now suppose $A^{n}=\left(
a_{ij}^{\left( n\right) }\right) $ gives the number of $v_{i}v_{j}$-walks
of length $n.$ We can consider the entries of $A^{n+1}=A^{n}A.$ We have
\[
a_{ij}^{\left( n+1\right) }=\sum_{k=1}^{p}a_{ik}^{\left( n\right) }%
a_{kj}.
\]
This is the sum of all walks of length $n$ between $v_{i}$ and $v_{k}$
followed by a walk from $v_{k}$ to $v_{j}$ of length $1.$ All walks of length
$n+1$ are generated in this way, and so the theorem is proven.
\end{proof}
\section{Spectral graph theory}
Recall that an eigenvalue of a matrix $M$ is a number $\lambda$ such that
there is a vector $v$ (called the corresponding eigenvector) such that
\[
Mv=\lambda v.
\]
It turns out that symmetric $n\times n$ matrices have $n$ eigenvalues. Since
adjacency matrices of two isomorphic graphs are related by permutation
matrices as above, and so the set of eigenvalues of $A$ is an invariant of a graph.
We will actually use the Laplacian matrix instead of the adjacency matrix. The
Laplacian matrix is defined to be
\[
L=A-D
\]
where $D$ is the diagonal matrix whose entries are the degrees of the vertices
(called the degree matrix). The Laplacian matrix is also symmetric, and thus
it has a complete set of eigenvalues. The set of these eigenvalues is called
the spectrum of the Laplacian. Notice the following.
\begin{proposition}
Let $G$ be a finite graph. The eigenvalues of the matrix $L$ are all
nonpositive. Moreover, the constant vector $\vec{1}=\left( 1,1,1,\ldots
,1\right) $ is an eigenvector with eigenvalue zero.
\end{proposition}
\begin{proof}
It is clear that $\vec{1}$ is an eigenvector with eigenvalue $0$ since the sum
of the entries in each row must be zero. Now, notice that we can write
\begin{align*}
v^{T}Lv & =\sum v_{i}\left( Lv\right) _{i}\\
& =\sum_{i}v_{i}\sum_{j}L_{ij}v_{j}\\
& =\sum_{v_{i}v_{j}\in E}v_{i}\left( v_{j}-v_{i}\right) \\
& =\frac{1}{2}\left[ \sum_{v_{i}v_{j}\in E}v_{i}\left( v_{j}-v_{i}\right)
+\sum_{v_{i}v_{j}\in E}v_{j}\left( v_{i}-v_{j}\right) \right] \\
& =-\frac{1}{2}\sum_{v_{i}v_{j}\in E}\left( v_{i}-v_{j}\right) ^{2}\leq0.
\end{align*}
(The sums over $i$ are over all vertices, but the sums over $v_{i}v_{j}\in E$
is the sum over the edges.) Now note that if $v$ is an eigenvector of $L$ with
eigenvalue $\lambda,$ then $Lv=\lambda v,$ and
\[
v^{T}Lv=\lambda v^{T}v=\lambda\sum_{i}v_{i}^{2}.
\]
Thus we have that
\[
\lambda=\frac{-\frac{1}{2}\sum_{v_{i}v_{j}\in E}\left( v_{i}-v_{j}\right)
^{2}}{\sum_{i}v_{i}^{2}}\leq0.
\]
\end{proof}
\begin{definition}
The eigenvalues of $-L$ can be arranged $0=\lambda_{0}\leq\lambda_{1}%
\leq\lambda_{2}\leq\cdots\leq\lambda_{p-1},$ where $p$ is the order of the
graph. The collection $\left( \lambda_{0},\lambda_{1},\ldots,\lambda
_{p}\right) $ is called the \emph{spectrum }of the Laplacian.
\end{definition}
\begin{remark}
Sometimes the Laplacian is taken to be $D^{-1/2}LD^{-1/2}.$ If there are no
isolated vertices, these are essentially equivalent.
\end{remark}
\begin{remark}
Note that the Laplacian matrix, much like the adjacency matrix, depends on the
ordering of the vertices and must be considered up to conjugation by
permutation matrices. Since eigenvalues are independent of conjugation by
permutation matrices, the spectrum is an isomorphism invariant of a graph.
\end{remark}
The following is an easy fact about the spectrum:
\begin{proposition}
For a graph $G$ of order $p,$%
\[
\sum_{i=0}^{p-1}\lambda_{i}=2q.
\]
\end{proposition}
\begin{proof}
The sum of the eigenvalues is equal to the trace, which is the sum of the degrees.
\end{proof}
We will be able to use the eigenvalues to determine some geometric properties
of a graph.
\section{Connectivity and spanning trees}
Recall that $\lambda_{0}=0,$ which means that the matrix $L$ is singular and
its determinant is zero. Recall the definition of the adjugate of a matrix.
\begin{definition}
If $M$ is a matrix, the adjugate is the matrix $M^{\dag}=\left[ M_{ij}^{\dag
}\right] $ where $M_{ij}^{\dag}$ is equal to $\left( -1\right) ^{i+j}%
\det\left( \hat{M}_{ij}\right) ,$ where $\hat{M}_{ij}$ is the matrix with
the $i$th row and $j$th column removed.
\end{definition}
The adjugate has the property that
\[
M\left( M^{\dag}\right) ^{T}=\left( \det M\right) I,
\]
where $I$ is the identity matrix. Applying this to $L$ (which is symmetric)
gives that
\[
LL^{\dag}=0.
\]
Now, the $p\times p$ matrix $L$ has rank less than $p.$ If it is less than or
equal to $p-2,$ then all determinants of $\left( p-1\right) \times\left(
p-1\right) $ submatrices are zero, and hence $L^{\dag}=0.$ If $L$ has rank
$p-1,$ then it has only one zero eigenvalue, which must be $\left(
1,1,\ldots,1\right) ^{T}.$ Since $LL^{\dag}=0,$ all columns of $L^{\dag}$
must be a multiple of $\left( 1,1,\ldots,1\right) ^{T}.$ But $L$ is
symmetric, so that means that $L^{\dag}$ must be a multiple of the matrix of
all ones. This motivates the following definition.
\begin{definition}
We define $t\left( G\right) $ by
\[
t\left( G\right) =\left( -1\right) ^{i+j}\det\left( -\hat{L}_{ij}\right)
\]
for any $i$ and $j$ (it does not matter since all are the same).
\end{definition}
\begin{remark}
It follows that $t\left( G\right) $ is an integer.
\end{remark}
\begin{proposition}
$t\left( G\right) =\frac{1}{p}\lambda_{1}\lambda_{2}\cdots\lambda_{p-1}.$
\end{proposition}
\begin{proof}
In general for a matrix $A$ with eigenvalues $\lambda_{0},\ldots,\lambda
_{p-1}$ we have that
\[
\sum_{k=0}^{p-1}\frac{\lambda_{0}\lambda_{1}\lambda_{2}\cdots\lambda_{p-1}%
}{\lambda_{k}}=\sum_{i=0}^{p-1}\det\hat{A}_{ii}.
\]
In our case, $\lambda_{0}=0$ and the right sum is the sum of $p$ of the same
entries, so the result follows.
\end{proof}
\begin{remark}
It follows that $t\left( G\right) \geq0.$
\end{remark}
Recall that a spanning tree of $G$ is a subgraph containing all of the
vertices of $G$ and is a tree.
\begin{theorem}
[Matrix Tree Theorem]The number $t\left( G\right) $ is equal to the number
of spanning trees of $G.$
\end{theorem}
\begin{proof}
Omitted, for now.
\end{proof}
We can apply this, however, as follows.
Example 1, consider the graph $K_{3}.$ Clearly this has Laplacian matrix
\[
L\left( G\right) =\left(
\begin{array}
[c]{ccc}%
2 & -1 & -1\\
-1 & 2 & -1\\
-1 & -1 & 2
\end{array}
\right) .
\]
The number of spanning trees are equal to
\[
\det\left(
\begin{array}
[c]{cc}%
-2 & 1\\
1 & -2
\end{array}
\right) =3.
\]
It is clear that each spanning tree is given by omitting one edge, so it is
clear there are 3.\bigskip
Example 2: Consider the following graph.
\bigskip%
%TCIMACRO{\FRAME{dtbpF}{3.8331in}{2.1843in}{0pt}{}{}{laplacegraph1.jpg}%
%{\special{ language "Scientific Word"; type "GRAPHIC";
%maintain-aspect-ratio TRUE; display "USEDEF"; valid_file "F";
%width 3.8331in; height 2.1843in; depth 0pt; original-width 3.672in;
%original-height 2.0797in; cropleft "0"; croptop "1"; cropright "1";
%cropbottom "0";
%filename '../../2008-2009/math443f08/laplacegraph1.JPG';file-properties "XNPEU";}%
%} }%
%BeginExpansion
\begin{center}
\includegraphics[
natheight=2.079700in,
natwidth=3.672000in,
height=2.1843in,
width=3.8331in
]%
{../../2008-2009/math443f08/laplacegraph1.jpg}%
\end{center}
%EndExpansion
Its Laplacian matrix is
\[
L\left( G\right) =\left(
\begin{array}
[c]{ccccc}%
4 & -1 & -1 & -1 & -1\\
-1 & 2 & -1 & 0 & 0\\
-1 & -1 & 3 & -1 & 0\\
-1 & 0 & -1 & 2 & 0\\
-1 & 0 & 0 & 0 & 1
\end{array}
\right) .
\]
The number of spanning trees are equal to
\begin{align*}
t\left( G\right) & =\det\left(
\begin{array}
[c]{cccc}%
-2 & 1 & 0 & 0\\
1 & -3 & 1 & 0\\
0 & 1 & -2 & 0\\
0 & 0 & 0 & -1
\end{array}
\right) \\
& =\det\left(
\begin{array}
[c]{ccc}%
2 & -1 & 0\\
-1 & 3 & -1\\
0 & -1 & 2
\end{array}
\right) \\
& =2\left( 6-1\right) +\left( -2\right) =8
\end{align*}
One can check directly that it has eight spanning trees.
\begin{corollary}
$\lambda_{1}\neq0$ if and only if $G$ is connected.
\end{corollary}
\begin{proof}
$\lambda_{1}=0$ if and only if $t\left( G\right) =0$ since $t\left(
G\right) $ is the product of the eigenvalues $\lambda_{1}\lambda_{2}%
\cdots\lambda_{p-1}$ and $\lambda_{1}$ is the minimal eigenvalue after
$\lambda_{0}.$ But $t\left( G\right) =0$ means that there are no spanning
trees, so $G$ is not connected.
\end{proof}
Now we can consider the different components.
\begin{definition}
The disjoint union of two graphs $G=G_{1}\sqcup G_{2}$ is the graph gotten by
taking $V\left( G\right) =V\left( G_{1}\right) \sqcup V\left(
G_{2}\right) $ and $E\left( G\right) =E\left( G_{1}\right) \sqcup
E\left( G_{2}\right) $ where $\sqcup$ is the disjoint union of sets.
\end{definition}
It is not hard to see that if we number the vertices in $G$ by first numbering
the vertices of $G_{1}$ and then numbering the vertices of $G_{2},$ that the
Laplacian matrix takes the form%
\[
L\left( G\right) =\left(
\begin{array}
[c]{cc}%
L\left( G_{1}\right) & 0\\
0 & L\left( G_{2}\right)
\end{array}
\right) .
\]
This means that the eigenvalues of $L\left( G\right) $ are the union of the
eigenvalues of $L\left( G_{1}\right) $ and $L\left( G_{2}\right) .$ This
implies the following.
\begin{corollary}
If $\lambda_{n}=0,$ then there are at least $n+1$ connected components of $G.$
\end{corollary}
\begin{proof}
Induct on $n.$ We already know this is true for $n=1.$ Suppose $\lambda
_{n}=0.$ We know there must be at least $n$ components, since $\lambda_{n}=0$
implies $\lambda_{n-1}=0.$ We can then write the matrix $L\left( G\right) $
in the block diagonal form with $L\left( G_{i}\right) $ along the diagonal
for some graphs $G_{i}.$ Since $\lambda_{n}=0,$ one of these graphs must have
$\lambda_{1}\left( G_{i}\right) =0.$ But that means that there is another
connected component, completing the induction.
\end{proof}
\section{PageRank problem and idea of solution}
We will generally follow the paper by Bryan and Leise, denoted BL.
Search engines generally do three things:
\begin{enumerate}
\item Locate all webpages on the web.
\item Index the data so that it can be searched efficiently for relevent words.
\item Rate the importance of each page so that the most important pages can be
shown to the user first.
\end{enumerate}
We will discuss this third step.
We will assign a nonnegative score to each webpage such that more important
pages have higher scores. The first idea is:
\begin{itemize}
\item Derive the score for a page by the number of links to that page from
other pages (called the \textquotedblleft backlinks\textquotedblright\ for the page).
\end{itemize}
In this sense, other pages vote for the page. The linking of pages produces a
digraph. Denote the vertices by $v_{k}$ and the score of vertex $v_{k}$ by
$x_{k}.$
Approach 1: Let $x_{k}$ equal the number of backlinks for page $v_{k}.$ See
example in BL Figure 1 reproduced here:%
%TCIMACRO{\FRAME{dtbpF}{2.9921in}{2.2474in}{0pt}{}{}{web1.wmf}%
%{\special{ language "Scientific Word"; type "GRAPHIC";
%maintain-aspect-ratio TRUE; display "USEDEF"; valid_file "F";
%width 2.9921in; height 2.2474in; depth 0pt; original-width 9.5998in;
%original-height 7.1996in; cropleft "0"; croptop "1"; cropright "1";
%cropbottom "0"; filename 'pics/web1.wmf';file-properties "XNPEU";}} }%
%BeginExpansion
\begin{center}
\includegraphics[
natheight=7.199600in,
natwidth=9.599800in,
height=2.2474in,
width=2.9921in
]%
{pics/web1.wmf}%
\end{center}
%EndExpansion
We see that $x_{1}=2,$ $x_{2}=1,$ $x_{3}=3,$ and $x_{4}=2.$ Here are two
problems with this ranking:
Problem 1: Links from more important pages should increase the score more. For
instance, the scores of $v_{1}$ and $v_{4}$ are the same, but $v_{1}$ has a
link from $x_{3},$ which is a more important page, so maybe it should be
ranked higher. We will deal with this by, instead of letting $x_{i}$ equal the
total number of links to it, we will have it be equal to the sum of the scores
of the pages linking to it, so more important pages count more. Thus we get
the relations%
\begin{align*}
x_{1} & =x_{3}+x_{4}\\
x_{2} & =x_{1}\\
x_{3} & =x_{1}+x_{2}+x_{4}\\
x_{4} & =x_{1}+x_{2}.
\end{align*}
This doesn't quite work as stated, since to solve this linear system, we see
that we get $x_{1}=x_{2}=\frac{1}{2}x_{4}=\frac{1}{4}x_{3},$ which means that
if we look at the first equality, we must have that they are all equal to
zero. However, a slight modification in regard to the next problem will fix this.
Problem 2: One site should not be able to \emph{significantly} affect the
rankings by creating lots of links. Of course, creating links should affect
the rankings, but by creating thousands of links from one site, one should not
be able to boost the importance too much. So instead of giving one vote for
each link out, we will give equal votes to each outlink from a particular
page, but the total votes is equal to one. This changes the above system to
\begin{align*}
x_{1} & =x_{3}+\frac{1}{2}x_{4}\\
x_{2} & =\frac{1}{3}x_{1}\\
x_{3} & =\frac{1}{3}x_{1}+\frac{1}{2}x_{2}+\frac{1}{2}x_{4}\\
x_{4} & =\frac{1}{3}x_{1}+\frac{1}{2}x_{2}.
\end{align*}
This can be solved as follows.
\begin{align*}
x_{2} & =\frac{1}{3}x_{1}\\
x_{4} & =\frac{1}{3}x_{1}+\frac{1}{2}x_{2}=\frac{1}{2}x_{1}\\
x_{3} & =\frac{1}{3}x_{1}+\frac{1}{2}x_{2}+\frac{1}{2}x_{4}=\frac{1}{3}%
x_{1}+\frac{1}{6}x_{1}+\frac{1}{4}x_{1}=\frac{3}{4}x_{1}.
\end{align*}
Thus we can have a score of $x_{1}=1,$ $x_{2}=\frac{1}{3},$ $x_{3}=\frac{3}%
{4},$ $x_{4}=\frac{1}{2}.$ Notice that $x_{1}$ has the highest ranking! This
is because $x_{3}$ threw its whole vote to $x_{1}$ and so that even though
$x_{3}$ got votes from three different sites, they still do not total as much
as what $x_{1}$ gets. Note, usually we will rescale so that the sum is equal
to $1,$ and so we get
\[
x_{1}=\frac{12}{31},\;\;x_{2}=\frac{4}{31},\;\;x_{3}=\frac{9}{31}%
,\;\;x_{4}=\frac{6}{31}.
\]
\section{General formulation}
First, we introduce some terminology for directed graphs.
\begin{definition}
If $e=\left( v,w\right) \in E_{+}$ is a directed edge (we use $E_{+}$ to
denote the edges in a directed graph, and the ordered pair to denote that the
edge is from $v$ to $w,$ denoted in a picture as an arrow from $v$ to $w$), we
say $v$ is \emph{adjacent to} (or \emph{links to}) $w$ and $w$ is
\emph{adjacent from} (or \emph{links from}) $v.$
\end{definition}
\begin{definition}
The \emph{indegree} of a vertex $v,$ denoted $i\deg\left( v\right) ,$ is the
number of vertices adjacent to $v.$ The \emph{outdegree} of $v,$ denoted
$o\deg\left( v\right) $, is the number of vertices adjacent from $v.$
\end{definition}
We can state the Page Rank algorithm in a more general way. We want to assign
scores so that
\[
x_{i}=\sum_{j\in L_{i}}\frac{x_{j}}{n_{j}}%
\]
where $L_{i}$ are the indices such that $v_{j}$ links to $v_{i}$ if $j\in
L_{i},$ and $n_{j}$ is equal to outdegree of $v_{j}.$ Note that $L_{i}$
contains $i\deg\left( v_{i}\right) $ elements. The set $L_{i}$ is called the
set of \emph{backlinks} of vertex $v_{i}.$ This can be rewritten as a vector
equation
\[
x=Ax,
\]
where $A$ is the matrix $A=\left( a_{ij}\right) $ given by
\[
a_{ij}=\left\{
\begin{array}
[c]{cc}%
\frac{1}{n_{j}} & \text{if }j\in L_{i}\\
0 & \text{otherwise}%
\end{array}
\right. .
\]
This matrix is called the \emph{link matrix}. We note that in the example, the
matrix $A$ was
\[
A=\left[
\begin{array}
[c]{cccc}%
0 & 0 & 1 & \frac{1}{2}\\
\frac{1}{3} & 0 & 0 & 0\\
\frac{1}{3} & \frac{1}{2} & 0 & \frac{1}{2}\\
\frac{1}{3} & \frac{1}{2} & 0 & 0
\end{array}
\right] .
\]
The problem of solving for the scores $x$ then amounts to finding an
eigenvector with eigenvalue $1$ for the matrix $A.$
We can consider the link matrix as giving the probabilities of traversing a
link from the page represented by the column to the page representing the row.
Thus it makes sense that the sum of the values of the columns are equal to one.
\begin{definition}
A matrix is called a \emph{column stochastic} matrix if all of its entries are
positive and the sum of the elements in each column are equal to $1.$
\end{definition}
Now the question is whether we can find an eigenvector for a column stochastic
matrix, and the answer is yes.
\begin{proposition}
If $A$ is a column stochastic matrix, then $1$ is an eigenvalue.
\end{proposition}
\begin{proof}
Let $e$ be the column vector of all ones. Since $A$ is column stochastic, we
clearly have that
\[
e^{T}A=e^{T}.
\]
Thus
\[
A^{T}e=e
\]
and $e$ is an eigenvector with eigenvalue $1$ for $A^{T}.$ However, $A$ and
$A^{T}$ have the same eigenvalues (not eigenvectors, though), so $A$ must have
an eigenvalue $1$, too.
\end{proof}
\begin{remark}
Do you remember why $A$ and $A^{T}$ have the same eigenvalues? The eigenvalues
of $A$ are the solutions $\lambda$ of $\det\left( A-\lambda I\right)
=\det\left( A^{T}-\lambda I\right) .$
\end{remark}
\section{Challenges to the algorithm}
See the worksheet.
\section{Computing the ranking}
The basic idea is that we can try to compute an eigenvector iteratively like
\[
x_{k+1}=Mx_{k}=M^{k}x_{0}.
\]
Certainly, if $Mx_{0}=x_{0},$ then this procedure fixes $x_{0}.$ In general,
if we replace this method with
\[
x_{k+1}=\frac{Mx_{k}}{\left\Vert Mx_{k}\right\Vert }%
\]
for any vector norm, we will generally find an eigenvector for the largest eigenvalue.
Before we proceed, let's define the one-norm of a vector.
\begin{definition}
The one-norm of a vector $v=\left( v_{i}\right) \in\mathbb{R}^{n}$ is equal
to
\[
\left\Vert v\right\Vert _{1}=\sum_{i=1}^{n}\left\vert v_{i}\right\vert ,
\]
where $\left\vert v_{i}\right\vert $ is the absolute value of the $i$th
component of $v.$
\end{definition}
\begin{proposition}
\label{normestimate}Let $M$ be a positive column-stochastic $n\times n$ matrix
and let $V$ denote the subspace of $\mathbb{R}^{n}$ consisting of vectors $v$
such that
\[
\sum_{i=1}^{n}v_{i}=0.
\]
Then for any $v\in V$ we have $Mv\in V$ and
\[
\left\Vert Mv\right\Vert _{1}\leq c\left\Vert v\right\Vert _{1},
\]
where $c<1.$
\end{proposition}
\begin{corollary}
In the situation in the proposition,
\[
\left\Vert M^{k}v\right\Vert _{1}\leq c^{k}\left\Vert v\right\Vert _{1}.
\]
\end{corollary}
\begin{proof}
This is a simple induction on $k,$ using the fact that $Mv\in V$ and
\[
\left\Vert M^{k}v\right\Vert _{1}\leq c\left\Vert M^{k-1}v\right\Vert _{1}.
\]
\end{proof}
This is essentially showing that the iteration is a contraction mapping, and
that will allow us to show that the method works.
\begin{proposition}
Every positive column-stochastic matrix $M$ has a unique vector $q$ with
positive components such that $Mq=q$ and $\left\Vert q\right\Vert _{1}=1.$ The
vector can be computed as
\[
q=\lim_{k\rightarrow\infty}M^{k}x_{0}%
\]
for any initial guess $x_{0}$ with positive componets such that $\left\Vert
x_{0}\right\Vert _{1}=1.$
\end{proposition}
\begin{proof}
We already know that $M$ has $1$ as an eigenvalue and that the subspace
$V_{1}\left( M\right) $ is one-dimensional. All eigenvectors have all
positive or all negative components, so we can choose a unique representative
$q$ with positive components and norm 1 by rescaling. Now let $x_{0}$ be any
vector in $\mathbb{R}^{n}$ with positive components and $\left\Vert
x_{0}\right\Vert =1.$ We can write%
\[
x_{0}=q+v
\]
for some vector $v.$ We note that if we sum the components of $x_{0}$ or the
components of $q,$ we get one since both have positive components and 1-norm
equal to one. Thus $v\in V$ as in the previous proposition. Now we see that
\begin{align*}
M^{k}x_{0} & =M^{k}q+M^{k}v\\
& =q+M^{k}v.
\end{align*}
Thus
\[
\left\Vert M^{k}x_{0}-q\right\Vert _{1}=\left\Vert M^{k}v\right\Vert _{1}\leq
c^{k}\left\Vert v\right\Vert _{1}.
\]
Since $c<1,$ we get that $\left\Vert M^{k}x_{0}-q\right\Vert _{1}\rightarrow0$
as $k\rightarrow\infty.$
\end{proof}
We now go back and prove Proposition \ref{normestimate}.
\begin{proof}
[Proof of Proposition \ref{normestimate}]It is pretty clear that $Mv\in V$
since
\begin{align*}
\sum\left( Mv\right) _{j} & =\sum_{j}\sum_{i}M_{ji}v_{i}\\
& =\sum_{i}\sum_{j}M_{ji}v_{i}\\
& =\sum_{i}v_{i}=0
\end{align*}
since $M$ is column-stochastic. Now we consider
\begin{align*}
\left\Vert Mv\right\Vert _{1} & =\sum_{j}\left\vert \sum_{i}M_{ji}%
v_{i}\right\vert \\
& =\sum_{j}e_{j}\sum_{i}M_{ji}v_{i}\\
& =\sum_{i}a_{i}v_{i}%
\end{align*}
where
\[
e_{j}=\operatorname{sgn}\left( \sum_{i}M_{ji}v_{i}\right)
\]
and%
\[
a_{i}=\sum_{j}e_{j}M_{ji}.
\]
Note that if $\left\vert a_{i}\right\vert \leq c$ for all $i,$ then
\[
\left\Vert Mv\right\Vert _{1}\leq c\left\Vert v\right\Vert _{1}.
\]
We can see that
\begin{align*}
\left\vert a_{i}\right\vert & =\left\vert \sum_{j}e_{j}M_{ji}\right\vert \\
& =\left\vert \sum_{j}M_{ji}+\sum_{j}\left( e_{j}-1\right) M_{ji}%
\right\vert \\
& =\left\vert 1+\sum_{j}\left( e_{j}-1\right) M_{ji}\right\vert .
\end{align*}
Each term in the sum is nonpositive, and since $M_{ji}$ are positive and
$e_{j}$ are not all the same sign, the largest this can be is if most $e_{j}$
are 1 except for a single $e_{j}$ which is negative and corresponds to the
smallest $M_{ji}.$ Thus we see that
\[
\left\vert a_{i}\right\vert \leq1-2\min_{j}M_{ji}\leq1-2\min_{i,j}M_{ji}<1.
\]
\end{proof}
\section{Appendix: Approximating the Laplacian on a lattice}
Recall that the Laplacian is the operator
\[
\triangle=\frac{\partial^{2}}{\partial x^{2}}+\frac{\partial^{2}}{\partial
y^{2}}+\frac{\partial^{2}}{\partial z^{2}}%
\]
acting on functions $f\left( x,y,z\right) ,$ with analogues in other
dimension. Let's first consider a way to approximate the one-dimensional
Laplacian. Suppose $f\left( x\right) $ is a function and I want to
approximate the second derivative $\frac{d^{2}f}{dx^{2}}\left( x\right) .$
We can take a centered difference approximation to the get this as
\begin{align*}
\frac{d^{2}f}{dx^{2}}\left( x\right) & \approx\frac{f^{\prime}\left(
x+\frac{1}{2}\Delta x\right) -f^{\prime}\left( x-\frac{1}{2}\Delta x\right)
}{\Delta x}\\
& \approx\frac{1}{\Delta x}\left[ \frac{f\left( x+\Delta x\right)
-f\left( x\right) }{\Delta x}-\frac{f\left( x\right) -f\left( x-\Delta
x\right) }{\Delta x}\right] \\
& =\frac{1}{\left( \Delta x\right) ^{2}}\left[ f\left( x+\Delta x\right)
-f\left( x\right) +\left( f\left( x-\Delta x\right) -f\left( x\right)
\right) \right] \\
& =\frac{1}{\left( \Delta x\right) ^{2}}\left[ f\left( x+\Delta x\right)
+f\left( x-\Delta x\right) -2f\left( x\right) \right]
\end{align*}
Note that if we take $\Delta x=1,$ then this only depends on the value of the
function at the integer points.
Now consider the graph consisting of vertices on the integers of the real line
and edges between consectutive integers. Give an function $f$ on the vertices,
we can compute the Laplacian as
\[
\triangle f\left( v_{i}\right) =f\left( v_{i+1}\right) +f\left(
v_{i-1}\right) -2f\left( v_{i}\right)
\]
for any vertex $v_{i}.$ Notice that the Laplacian is an infinite matrix of the
form%
\[
\triangle f=\left(
\begin{array}
[c]{ccccccc}%
\cdots & \cdots & & & & & \\
\cdots & -2 & 1 & 0 & & & \\
& 1 & -2 & 1 & 0 & & \\
& 0 & 1 & -2 & 1 & 0 & \\
& & 0 & 1 & -2 & 1 & 0\\
& & & 0 & 1 & -2 & \cdots\\
& & & & & \cdots & \cdots
\end{array}
\right) \left(
\begin{array}
[c]{c}%
\cdots\\
f\left( v_{2}\right) \\
f\left( v_{1}\right) \\
f\left( v_{0}\right) \\
f\left( v_{-1}\right) \\
f\left( v_{-2}\right) \\
\cdots
\end{array}
\right) .
\]
Also note that that matrix is exactly equal to the adjacency matrix minus
twice the identity. The number 2 is the degree of each vertex, so we can write
the matrix, which is called the Laplacian matrix, as
\[
L=A-D
\]
where $A$ is the adjacency matrix and $D$ is the diagonal matrix consisting of
degrees (called the degree matrix).
Note that something similar can be done for a two-dimensional grid. We see that%
\begin{align*}
\left( \frac{\partial^{2}}{\partial x^{2}}+\frac{\partial^{2}}{\partial
y^{2}}\right) f\left( x,y\right) & \approx\frac{f_{x}\left( x+\frac
{1}{2}\Delta x,y\right) -f_{x}\left( x-\frac{1}{2}\Delta x,y\right)
}{\Delta x}+\frac{f_{y}\left( x,y+\frac{1}{2}\Delta y\right) -f_{y}\left(
x,y-\frac{1}{2}\Delta y\right) }{\Delta y}\\
& \approx\frac{1}{\Delta x}\frac{f\left( x+\Delta x,y\right) -f\left(
x,y\right) -\left[ f\left( x,y\right) -f\left( x-\Delta x,y\right)
\right] }{\Delta x}\\
& +\frac{1}{\Delta y}\frac{f\left( x,y+\Delta y\right) -f\left(
x,y\right) -\left[ f\left( x,y\right) -f\left( x,y-\Delta y\right)
\right] }{\Delta y}\\
& =\frac{1}{\left( \Delta x\right) ^{2}}\left[ f\left( x+\Delta
x,y\right) -f\left( x,y\right) +\left( f\left( x-\Delta x,y\right)
-f\left( x,y\right) \right) \right] \\
& +\frac{1}{\left( \Delta y\right) ^{2}}\left[ f\left( x,y+\Delta
y\right) -f\left( x,y\right) +\left( f\left( x,y-\Delta y\right)
-f\left( x,y\right) \right) \right] \\
& =\frac{1}{\left( \Delta x\right) ^{2}}\left[ f\left( x+\Delta
x,y\right) +\left( f\left( x-\Delta x,y\right) -2f\left( x,y\right)
\right) \right] \\
& +\frac{1}{\left( \Delta y\right) ^{2}}\left[ f\left( x,y+\Delta
y\right) +\left( f\left( x,y-\Delta y\right) -2f\left( x,y\right)
\right) \right] .
\end{align*}
If we let $\Delta x=\Delta y=1,$ then we get
\[
\left( \frac{\partial^{2}}{\partial x^{2}}+\frac{\partial^{2}}{\partial
y^{2}}\right) f\left( x,y\right) \approx f\left( x+1,y\right) +f\left(
x-1,y\right) +f\left( x,y+1\right) +f\left( x,y-1\right) -4f\left(
x,y\right) .
\]
Note that on the integer grid, this translates to the sum of the value of $f$
for the four vertices neighboring the vertex, minus $4$ times the value at the
vertex. This is precisely the same as the last time, and we see that this
operator can again be written as
\[
L=A-D.
\]
In general we will call this matrix the Laplacian matrix. It can be thought of
as a linear operator on functions on the vertices. Sometimes the Laplacian
will denote the negative of this operator (which gives positive eigenvalues
instead of negative ones), and sometimes a slight variation is used in the
graph theory literature.
\section{Appendix: Electrical networks}
One finds applications of the Laplacian in the theory of electrical networks.
Recall that the current through a circuit is proportional to the change in
voltage, and that constant of proportionality is called the conductance (or
resistance, depending on where the constant is placed. Thus, for a wire with
conductance $C$ between points $v$ and $w$ with voltages $f\left( v\right) $
and $f\left( w\right) $ respectively, the current from $v$ to $w$ is
$C\left( f\left( v\right) -f\left( w\right) \right) .$ Kirchoff's law
says that if we have a network of wires, each with conductance $C,$ the total
current through any given point is zero. Thus, we get that
\[
C\sum_{v\text{ adjacent to }w}\left( f\left( w\right) -f\left( v\right)
\right) =0
\]
which is the same as $\triangle f=0.$ Note that if the conductances are
different, then we get an equation like
\[
\sum_{v\text{ adjacent to }w}c_{vw}\left( f\left( w\right) -f\left(
v\right) \right) =0,
\]
which is quite similar to the Laplacian. Hence we can use the Laplacian to
understand graphs by attaching voltages to some of the vertices and seeing
what happens at the other vertices. This is very much like solving a boundary
value problem for a partial differential equation!
\section{Appendix: Proof of the matrix tree theorem}
In order to prove the Matrix Tree Theorem, we need another characterization of
the Laplacian.
\begin{definition}
Let $G$ be a directed $\left( p,q\right) $-graph. The oriented vertex-edge
incidence graph is a $p\times q$ matrix $Q=\left[ q_{ir}\right] ,$ such that
$q_{ir}=1$ if $e_{r}=\left( v_{i},v_{j}\right) $ for some $j$ and
$q_{ir}=-1$ if $e_{r}=\left( v_{j},v_{i}\right) $ for some $j.$
\end{definition}
\begin{proposition}
The Laplacian matrix $L$ satisfies
\[
-L=QQ^{T}%
\]
for any oriented vertex-edge incidence graph (so, given an undirected graph,
we can take any orientation), i.e.,
\[
L_{ij}=-\sum_{r=1}^{q}q_{ir}q_{jr}.
\]
\end{proposition}
\begin{proof}
If $i=j,$ then we see that $L_{ii}=-\deg v_{i}.$ Notice that $q_{ir}$ is
nonzero if $v_{i}$ is in edge $e_{r}.$ Thus
\[
\sum_{r=1}^{q}\left( q_{ir}\right) ^{2}=\deg v_{i}.
\]
Now for $i\neq j,$ we have that $q_{ir}q_{jr}=-1$ if $e_{r}=v_{i}v_{j},$
giving the result.
\end{proof}
\begin{remark}
This observation can be used to give a different proof that $L_{ij}$ has all
nonpositive eigenvalues.
\end{remark}
The proof of the matrix tree theorem proceeds as follows. We need only
consider the case
\[
\det\left( -\hat{L}_{11}\right) .
\]
A property of determinants allows us to use the fact that $-L=QQ^{T}$ to
compute this determinant in terms of determinants of submatrices of $Q.$
\begin{proposition}
[Binet-Cauchy Formula]Let $A$ be an $n\times m$ matrix and $B$ be an $n\times
m$ matrix (usually we think $n0.$
Note that the matrix $M$ has all positive entries. This motivates:
\begin{definition}
A matrix $M=\left( M_{ij}\right) $ is positive if $M_{ij}>0$ for every $i$
and $j.$
\end{definition}
For future use, we define the following.
\begin{definition}
Given a matrix $M,$ we write $V_{\lambda}\left( M\right) $ for the
eigenspace of eigenvalue $\lambda.$
\end{definition}
We will show that a positive column-stochastic matrix has a one dimensional
eigenspace $V_{1}$ for eigenvalue $1.$
\begin{proposition}
If $M$ is a positive, column-stochastic matrix, then $V_{1}\left( M\right) $
has dimension $1.$
\end{proposition}
\begin{proof}
Suppose $v$ and $w$ are in $V_{1}\left( M\right) .$ Then we know that
$sv+tw\in V_{1}\left( M\right) $ for any real numbers $s$ and $t.$ We will
now show that (1) any eigenvector in $V_{1}\left( M\right) $ has all
positive or all negative components and that (2) if $x$ and $y$ are any two
linearly independent vectors, then there is some $s$ and some $t$ such that
$sx+ty$ has both positive and negative components. This would imply that
$sv+tw$ has all positive or all negative components, and thus $v$ and $w$ must
be linearly dependent.
\end{proof}
First we prove the following:
\begin{proposition}
Any eigenvector in $V_{1}\left( M\right) $ has all positive or all negative components.
\end{proposition}
\begin{proof}
Suppose $Mv=v.$ Since $M$ is column-stochastic, we know that
\[
\sum_{i=1}^{n}M_{ij}=1
\]
for each $j,$ and since $M$ is positive, we know that
\[
\left\vert M_{ij}\right\vert =M_{ij}%
\]
for each $i$ and $j.$ Therefore, we see that
\begin{align*}
\left\Vert v\right\Vert _{1} & =\left\Vert Mv\right\Vert _{1}\\
& =\sum_{i=1}^{n}\left\vert \sum_{j=1}^{n}M_{ij}v_{j}\right\vert \\
& \leq\sum_{i=1}^{n}\sum_{j=1}^{n}\left\vert M_{ij}\right\vert \left\vert
v_{j}\right\vert \\
& =\sum_{j=1}^{n}\sum_{i=1}^{n}M_{ij}\left\vert v_{j}\right\vert \\
& =\sum_{j=1}^{n}\left\vert v_{j}\right\vert =\left\Vert v\right\Vert _{1}.
\end{align*}
That means that the inequality must be an equality, meaning that
\[
\left\vert \sum_{j=1}^{n}M_{ij}v_{j}\right\vert =\sum_{j=1}^{n}\left\vert
M_{ij}\right\vert \left\vert v_{j}\right\vert .
\]
This is only true if $M_{ij}v_{j}\geq0$ for each $i$ and $j$ (or $M_{ij}%
v_{j}\leq0$ for each $i$ and $j$). However, since $M_{ij}>0,$ this implies
that $v_{j}\geq0$ for each $j$ (or $v_{j}\leq0$ for each $j$). Furthermore,
since
\[
v_{i}=\sum_{j=1}^{n}M_{ij}v_{j}%
\]
with $v_{j}\geq0$ ($v_{j}\leq0$)and $M_{ij}>0,$ we must have that either all
$v$ are zero or all are positive (negative). Since $v$ is an eigenvector, it
is not the zero vector.
\end{proof}
\begin{remark}
A similar argument shows that for any positive, column-stochastic matrix, all
eigenvalues $\lambda$ satisfy $\left\vert \lambda\right\vert \leq1.$
\end{remark}
\begin{proposition}
For any linearly independent vectors $x$ and $y\in\mathbb{R}^{n},$ there are
real values of $s$ and $t$ such that $sx+ty$ has both negative and positive components.
\end{proposition}
\begin{proof}
Certainly this is true if either $x$ or $y$ have both postive and negative
components, so we may assume both have only positive components (the other
cases of both negative or one positive and one negative are handled by
adjusting the signs of $s$ and $t$ appropriately). We may now consider the
vector
\[
x=\left( \sum_{i=1}^{n}w_{i}\right) v-\left( \sum_{i=1}^{n}v_{i}\right)
w.
\]
Both the sums in the above expression are nonzero by assumption (in fact,
positive). Also $x$ is nonzero since $v$ and $w$ are linearly independent.
Notice that
\[
\sum_{i=1}^{n}x_{i}=0.
\]
Since $x$ is not the zero vector, this implies that $x$ must have both
positive and negative components.
\end{proof}
Thus, the matrix $M$ can be used to produce unique rankings if there no
dangling nodes.
\subsection{Dealing with Dangling nodes}
For dangling nodes, we have the following theorem of Perron:
\begin{theorem}
If $A$ is a matrix with all positive entries, then $A$ contains a real,
positive eigenvalue $\rho$ such that
\begin{enumerate}
\item For any other eigenvalue $\lambda,$ we have $\left\vert \lambda
\right\vert <\rho$ (recall that $\lambda$ could be complex).
\item The eigenspace of $\rho$ is one-dimensional and there is a unique
eigenvector $x=\left[ x_{1},x_{2},\ldots,x_{p}\right] ^{T}$ with eigenvalue
$\rho$ such that $x_{i}>0$ for all $i$ and
\[%
%TCIMACRO{\dsum \limits_{i=1}^{p}}%
%BeginExpansion
{\displaystyle\sum\limits_{i=1}^{p}}
%EndExpansion
x_{i}=1.
\]
\end{enumerate}
\end{theorem}
This eigenvector is called the \emph{Perron vector}. Thus, if we had a matrix
with all postive entries, as we got in the last section, we can use the Perron
vector as the ranking.
\end{document}