Skip to content

Commit

Permalink
Updated quick start vignette to highlight net_emd options
Browse files Browse the repository at this point in the history
  • Loading branch information
martintoreilly committed Jan 25, 2017
1 parent 8e4042d commit 070e4d3
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 21 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# netdist
An R package implementing the NetEMD network comparison measure

## Usage
See "Quick start" vignette in documentation for example usage.

## Installing package from source
When published to the CRAN package repository, the library and all documentation
will be installed in the standard manner using `install.packages("netdist")`,
Expand Down
26 changes: 20 additions & 6 deletions inst/doc/Quick_start.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,34 @@ virus_edges <- read_all_graphs_as_orca_edge_lists(
format = "ncol", pattern = ".txt")
attr(virus_edges, "names")

# Calculate graphlet orbit degree distributions up to 4 nodes for all graphs
# This only needs to be done once per graph
virus_godd <- purrr::map(virus_edges, godd)
# Calculate graphlet orbit degree distributions for graphlets comprising up to
# 4 nodes for all graphs This only needs to be done once per graph.
# If type is set to "node5", graphlet orbit degree distributions will be
# calculated for graphlets comprising up to 5 nodes
virus_godd <- purrr::map(virus_edges, godd, type = "node4")

# Generate a cross-comparison matrix listing all combinations of graphs
comp_spec <- graph_cross_comparison_spec(virus_edges)
comp_spec[1:5,]

# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
# degree distributions up to 4 nodes
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
# orbit degree distributions, using the fast "optimise" method (default). This
# method uses the built-in R optimise method to efficiently find the offset
# with the minimum EMD, but is not guaranteed to find the global minimum if
# EMD as a function of offset is multimodal.
net_emds <- purrr::simplify(
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
}))
print(net_emds)

# You can also specify method = "fixed_step" to use the much slower method of
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
# The default step size is 1/2 the the minimum spacing between locations in
# either histogram after normalising to unit variance. However, you can
# specifiy your own fixed step using the optional "step_size" parameter.
# Note that this step size is applied to the histograms after they have been
# normalised to unit variance

# Link NetEMDs with their respective comp_specs
comp_spec$net_emd = net_emds
Expand Down
26 changes: 20 additions & 6 deletions inst/doc/Quick_start.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,34 @@ virus_edges <- read_all_graphs_as_orca_edge_lists(
format = "ncol", pattern = ".txt")
attr(virus_edges, "names")
# Calculate graphlet orbit degree distributions up to 4 nodes for all graphs
# This only needs to be done once per graph
virus_godd <- purrr::map(virus_edges, godd)
# Calculate graphlet orbit degree distributions for graphlets comprising up to
# 4 nodes for all graphs This only needs to be done once per graph.
# If type is set to "node5", graphlet orbit degree distributions will be
# calculated for graphlets comprising up to 5 nodes
virus_godd <- purrr::map(virus_edges, godd, type = "node4")
# Generate a cross-comparison matrix listing all combinations of graphs
comp_spec <- graph_cross_comparison_spec(virus_edges)
comp_spec[1:5,]
# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
# degree distributions up to 4 nodes
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
# orbit degree distributions, using the fast "optimise" method (default). This
# method uses the built-in R optimise method to efficiently find the offset
# with the minimum EMD, but is not guaranteed to find the global minimum if
# EMD as a function of offset is multimodal.
net_emds <- purrr::simplify(
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
}))
print(net_emds)
# You can also specify method = "fixed_step" to use the much slower method of
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
# The default step size is 1/2 the the minimum spacing between locations in
# either histogram after normalising to unit variance. However, you can
# specifiy your own fixed step using the optional "step_size" parameter.
# Note that this step size is applied to the histograms after they have been
# normalised to unit variance
# Link NetEMDs with their respective comp_specs
comp_spec$net_emd = net_emds
Expand Down
27 changes: 21 additions & 6 deletions inst/doc/Quick_start.html
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,11 @@ <h2>Virus PPI example (from sample data)</h2>
<span class="dt">format =</span> <span class="st">&quot;ncol&quot;</span>, <span class="dt">pattern =</span> <span class="st">&quot;.txt&quot;</span>)
<span class="kw">attr</span>(virus_edges, <span class="st">&quot;names&quot;</span>)</code></pre></div>
<pre><code>## [1] &quot;EBV-1.txt&quot; &quot;ECL-1.txt&quot; &quot;HSV-1-1.txt&quot; &quot;KSHV-1.txt&quot; &quot;VZV-1.txt&quot;</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Calculate graphlet orbit degree distributions up to 4 nodes for all graphs </span>
<span class="co"># This only needs to be done once per graph</span>
virus_godd &lt;-<span class="st"> </span>purrr::<span class="kw">map</span>(virus_edges, godd)
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Calculate graphlet orbit degree distributions for graphlets comprising up to </span>
<span class="co"># 4 nodes for all graphs This only needs to be done once per graph. </span>
<span class="co"># If type is set to &quot;node5&quot;, graphlet orbit degree distributions will be </span>
<span class="co"># calculated for graphlets comprising up to 5 nodes</span>
virus_godd &lt;-<span class="st"> </span>purrr::<span class="kw">map</span>(virus_edges, godd, <span class="dt">type =</span> <span class="st">&quot;node4&quot;</span>)

<span class="co"># Generate a cross-comparison matrix listing all combinations of graphs</span>
comp_spec &lt;-<span class="st"> </span><span class="kw">graph_cross_comparison_spec</span>(virus_edges)
Expand All @@ -96,12 +98,25 @@ <h2>Virus PPI example (from sample data)</h2>
## 3 EBV-1.txt KSHV-1.txt 1 4
## 4 EBV-1.txt VZV-1.txt 1 5
## 5 ECL-1.txt HSV-1-1.txt 2 3</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Compute NetEMD between all virus PPI graphs based on all graphlet orbir</span>
<span class="co"># degree distributions up to 4 nodes</span>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Compute NetEMD between all virus PPI graphs based on the computed graphlet </span>
<span class="co"># orbit degree distributions, using the fast &quot;optimise&quot; method (default). This</span>
<span class="co"># method uses the built-in R optimise method to efficiently find the offset</span>
<span class="co"># with the minimum EMD, but is not guaranteed to find the global minimum if</span>
<span class="co"># EMD as a function of offset is multimodal.</span>
net_emds &lt;-<span class="st"> </span>purrr::<span class="kw">simplify</span>(
purrr::<span class="kw">map2</span>(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
<span class="kw">net_emd</span>(virus_godd[[index_a]], virus_godd[[index_b]])
<span class="kw">net_emd</span>(virus_godd[[index_a]], virus_godd[[index_b]], <span class="dt">method =</span> <span class="st">&quot;optimise&quot;</span>)
}))
<span class="kw">print</span>(net_emds)</code></pre></div>
<pre><code>## [1] 0.4876042 0.1662899 0.1607306 0.1994613 0.3986293 0.4024197 0.4029355
## [8] 0.1581551 0.2164011 0.2323951</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># You can also specify method = &quot;fixed_step&quot; to use the much slower method of </span>
<span class="co"># exhaustively evaluating the EMD at all offsets separated by a fixed step. </span>
<span class="co"># The default step size is 1/2 the the minimum spacing between locations in </span>
<span class="co"># either histogram after normalising to unit variance. However, you can </span>
<span class="co"># specifiy your own fixed step using the optional &quot;step_size&quot; parameter.</span>
<span class="co"># Note that this step size is applied to the histograms after they have been </span>
<span class="co"># normalised to unit variance</span>

<span class="co"># Link NetEMDs with their respective comp_specs</span>
comp_spec$net_emd =<span class="st"> </span>net_emds
Expand Down
18 changes: 15 additions & 3 deletions vignettes/Quick_start.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,24 @@ virus_godd <- purrr::map(virus_edges, godd, type = "node4")
comp_spec <- graph_cross_comparison_spec(virus_edges)
comp_spec[1:5,]
# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
# degree distributions up to 4 nodes
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
# orbit degree distributions, using the fast "optimise" method (default). This
# method uses the built-in R optimise method to efficiently find the offset
# with the minimum EMD, but is not guaranteed to find the global minimum if
# EMD as a function of offset is multimodal.
net_emds <- purrr::simplify(
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
}))
print(net_emds)
# You can also specify method = "fixed_step" to use the much slower method of
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
# The default step size is 1/2 the the minimum spacing between locations in
# either histogram after normalising to unit variance. However, you can
# specifiy your own fixed step using the optional "step_size" parameter.
# Note that this step size is applied to the histograms after they have been
# normalised to unit variance
# Link NetEMDs with their respective comp_specs
comp_spec$net_emd = net_emds
Expand Down

0 comments on commit 070e4d3

Please sign in to comment.