-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsupplementary.tex
150 lines (124 loc) · 6.53 KB
/
supplementary.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
\section{Supplementary material}
\paragraph{Simplicial distance and the localizing property of the Laplacian.}
Suppose that $\sigma$ and $\tau$ are $p$-simplices for which $(\nu_0, \nu_1, \dotsc, \nu_d)$ is the shortest sequence of $p$-simplices with the property that $\nu_0=\sigma$, $\nu_d=\tau$, and each $\nu_i$ shares a face or a coface with $\nu_{i-1}$, and a face or a coface with $\nu_{i+1}$. We say that $d$ is the \emph{simplicial distance} between $\sigma$ and $\tau$. Then for all $N<d$, the entry of $L_p^N$ corresponding to $\sigma$ and $\tau$ is $0$, and so the filter does not cause interaction between $c(\sigma)$ and $c(\tau)$. This is analogous to a size-$d$ ordinary CNN layer not distributing information between pixels that are more than $d$ pixels apart. We will refer to $N$ as the \emph{degree} of the convolutional layer, but one may well wish to keep in mind the notion of \emph{size} from traditional CNNs.
\paragraph{Simplicial complexes as the projections of bipartite graphs.}
Given a bipartite graph $X$-$Y$, the simplicial projection on $Y$ is the simplicial complex whose $(k-1)$-simplices are the sets of $k$ vertices in $Y$ that have at least one common neighbor in $X$.
Cochains on the simplicial projection are naturally given by weights on $X$: Given any $(k-1)$-simplex $[y_1,\dots,y_k]$ and its neighboring vertices $\{x_1,\dots,x_j\}\subseteq X$, one can define a $(k-1)$-cochain as $\phi(\{x_1,\dots,x_j\})$, for any function $\phi: \mathcal{P}(X)\to\RR$.
In our coauthorship application, $\phi$ is the sum and the weight of a paper is the number of times it is cited.
See Figure~\ref{fig:bipartite}.
\begin{figure}[htpb]
%\begin{table*}[!t]
\savebox{\tempbox}{
\begin{tikzpicture}[font=\scriptsize]
\coordinate (I) at (0,0);
\coordinate (II) at (0,-0.5);
\coordinate (III) at (0,-1);
\coordinate (IV) at (0,-1.5);
\coordinate (A) at ($ (I) + (2,0) $);
\coordinate (B) at ($ (II) + (2,0) $);
\coordinate (C) at ($ (III) + (2,0) $);
\coordinate (D) at ($ (IV) + (2,0) $);
\fill[color=black] (I) circle (2pt);
\fill[color=black] (II) circle (2pt);
\fill[color=black] (III) circle (2pt);
\fill[color=black] (IV) circle (2pt);
\fill[color=black] (A) circle (2pt);
\fill[color=black] (B) circle (2pt);
\fill[color=black] (C) circle (2pt);
\fill[color=black] (D) circle (2pt);
\node[anchor=east] at (I) {Paper \MakeUppercase{\romannumeral 1}, $100$ citations};
\node[anchor=east] at (II) {Paper \MakeUppercase{\romannumeral 2}, $50$ citations};
\node[anchor=east] at (III) {Paper \MakeUppercase{\romannumeral 3}, $10$ citations};
\node[anchor=east] at (IV) {Paper \MakeUppercase{\romannumeral 4}, $4$ citations};
\node[anchor=west] at (A) {Author $A$};
\node[anchor=west] at (B) {Author $B$};
\node[anchor=west] at (C) {Author $C$};
\node[anchor=west] at (D) {Author $D$};
\draw (I) -- (A);
\draw (I) -- (B);
\draw (I) -- (C);
\draw (II) -- (A);
\draw (II) -- (B);
\draw (III) -- (A);
\draw (III) -- (D);
\draw (IV) -- (C);
\draw (IV) -- (D);
\end{tikzpicture}
}%
\settowidth{\tempwidth}{\usebox{\tempbox}}%
\hfil\begin{minipage}[b]{\tempwidth}%
\raisebox{-\height}{\usebox{\tempbox}}%
%\vspace{-7pt}
\scriptsize{\caption*{(a)}}%
\end{minipage}%
\savebox{\tempbox}{
\begin{tikzpicture}[font=\scriptsize]
\coordinate (A) at (0,0);
\coordinate (B) at (2,0);
\coordinate (C) at ($ (A) + (300:1) $);
\node[anchor=north] at (C) {$100$};
\coordinate (D) at ($ (B) + (240:1) $);
\node[anchor=north] at (D) {$50$};
\draw[dashed] (A) -- (C) -- (B);
\draw[dashed] (A) -- (D) -- (B);
\coordinate (AA) at ($ (A) + (0, -1.5) $);
\coordinate (BB) at ($ (B) + (0, -1.5) $);
\fill[color=black] (A) circle (2pt);
\fill[color=black] (B) circle (2pt);
\fill[color=black] (AA) circle (2pt);
\fill[color=black] (BB) circle (2pt);
\draw (A) -- (B);
\draw (AA) -- node[below, align=center] {$1$-cochain\\ $150=100+50$} (BB);
\node[anchor=east] at (A) {$A$};
\node[anchor=west] at (B) {$B$};
\node[anchor=east] at (AA) {$A$};
\node[anchor=west] at (BB) {$B$};
\end{tikzpicture}
}%
\settowidth{\tempwidth}{\usebox{\tempbox}}%
\hfil\begin{minipage}[b]{\tempwidth}%
\raisebox{-\height}{\usebox{\tempbox}}%
\scriptsize{\captionof*{figure}{(b)}}%
\end{minipage}%
\vspace{5pt}
%\end{table*}
\savebox{\tempbox}{
\input{figures/coauthorship_complex.tex}
}%%
\settowidth{\tempwidth}{\usebox{\tempbox}}%
\hfil\begin{minipage}[b]{\tempwidth}%
\raisebox{-\height}{\usebox{\tempbox}}%
\scriptsize{\captionof*{figure}{(c)}}%
\end{minipage}%
%\end{table*}
\caption{%
Constructing a simplicial complex and its cochain from a bipartite graph.
(a)~Paper-author bipartite graph (same data as in Figure~\ref{fig:data2complex}).
(b)~The $1$-simplex $[A,B]$ is included in the coauthorship complex since authors $A$ and $B$ collaborated on papers I and II.
The 1-cochain on $[A,B]$ is given by the sum of their common papers' citations.
(c)~Resulting coauthorship complex with cochains.
}\label{fig:bipartite}
\end{figure}
\begin{table}[htbp]
\centering
\scriptsize{
\begin{tabular}{lrrrrrrrrrrr}
\toprule
Dimension & 0 & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10\\
\midrule
CC1 & 352 & 1474 & 3285 & 5019 & 5559 & 4547 & 2732 & 1175 & 343 & 61 & 5\\
CC2 & 1126 & 5059 & 11840 & 18822 & 21472 & 17896 & 10847 & 4673 & 1357 & 238 & 19\\
\bottomrule
\end{tabular}}
\vspace{2pt}
\caption{%
Number of simplices of the two coauthorship complexes sampled from Semantic Scholar.
} \label{table:Simplices-coauthor}
\end{table}
\paragraph{Sampling papers.}
From the Semantic Scholar Open Research Corpus~\cite{ammar18NAACL}, we excluded papers with fewer than $5$ citations or more than $10$ authors.
To sample a CC, we sampled $80$ papers (corresponding to maximal simplices in the CC) by performing a random walk (of length $80$, from a randomly chosen starting paper) on the graph whose vertices represent papers and edges connect papers sharing at least one author.
\paragraph{Mean accuracy and absolute error.}
A missing value is considered to be correctly imputed if the imputed value differs by at most $10\%$ from the true value.
The \emph{accuracy} is the percentage of missing values that has been correctly imputed and the \emph{absolute error} is the magnitude of the difference between the imputed and true value.
For each rate of missing values, we compute the \emph{mean accuracy} $\pm$ standard deviation over 5 samples with randomly damaged portions.