Editing changes. Fix script transforms.

PATRIC3 · Nov 3, 2017 · 0d3091c · 0d3091c
1 parent cf6d758
commit 0d3091c
Show file tree

Hide file tree

Showing 67 changed files with 1,395 additions and 469 deletions.
diff --git a/cli/p3-module-list.txt b/cli/p3-module-list.txt
@@ -0,0 +1,3 @@
+GenomeTypeObject.pm
+Hsp.pm
+Sim.pm
diff --git a/cli/p3-script-list.txt b/cli/p3-script-list.txt
@@ -1,3 +1,6 @@
+p3-signature-families.pl
+p3-closest-seqs.pl
+p3-identify-clusters.pl
 p3-all-drugs.pl
 p3-all-genomes.pl
 p3-blast.pl

diff --git a/cli/transform-p3-scripts b/cli/transform-p3-scripts
@@ -21,36 +21,29 @@ my %link_map  = ('P3Utils/ih_options' => ":ref:`cli-input-options`",
 
 my $ua = LWP::UserAgent->new;
 
-my $url_base = "https://raw.githubusercontent.com/SEEDtk/RASTtk/master/scripts";
+my $url_top = "https://raw.githubusercontent.com/SEEDtk/RASTtk/master";
 my $out_base = "../docroot/cli_tutorial/command_list";
 
-open(S, "<", "p3-script-list.txt") or die "Cannot open p3-script-list.txt: $!";
+my %unmapped_links;
 
-my $conv = Pod::POM::View::Restructured->new({namespace => 'cli'});
+#
+# read the module list first to find the moduels we can link to.
+#
 
-my %unmapped_links;
 
-while (my $script = <S>)
+my %modules;
+open(M, "<", "p3-module-list.txt") or die "cannot read p3-module-list.txt: $!";
+while (<M>)
 {
-    chomp $script;
-    print "$script\n";
-    my $url = "$url_base/$script";
-    my $res = $ua->get($url);
-    if (!$res->is_success)
-    {
-	die "Error " . $res->code . " fetching $url: " . $res->content;
-    }
-
-    my $txt = $res->content;
-    my $tmp = File::Temp->new();
-    print $tmp $txt;
-    close($tmp);
+    chomp;
+    s/\.pm$//;
+    $modules{$_} = 1;
+}
+close(M);
 
-    my $base = basename($script, ".pl");
-    my $out_file = "$out_base/$base.rst";
+transform("p3-script-list.txt", "$url_top/scripts");
+transform("p3-module-list.txt", "$url_top/lib");
 
-    $conv->convert_file("$tmp", $base, $out_file, { link => \&handle_link });
-}
 
 if (%unmapped_links)
 {
@@ -61,6 +54,39 @@ if (%unmapped_links)
     }
 }
 
+sub transform
+{
+    my($file, $url_base) = @_;
+    open(S, "<", $file) or die "Cannot open $file $!";
+
+    my $conv = Pod::POM::View::Restructured->new(); # {namespace => 'cli'});
+
+    while (my $script = <S>)
+    {
+	chomp $script;
+	print "$script\n";
+	my $url = "$url_base/$script";
+	my $res = $ua->get($url);
+	if (!$res->is_success)
+	{
+	    die "Error " . $res->code . " fetching $url: " . $res->content;
+	}
+
+	my $txt = $res->content;
+	my $tmp = File::Temp->new();
+	print $tmp $txt;
+	close($tmp);
+
+	my $base = basename($script, ".pl", ".pm");
+	my $out_file = "$out_base/$base.rst";
+
+	open(my $fh, ">", $out_file) or die "Cannot write $out_file: $!";
+	print $fh ".. _cli::$base:\n\n";
+
+	$conv->convert_file("$tmp", $base, $fh, { link => \&handle_link });
+    }
+}
+
 sub handle_link
 {
     my($txt) = @_;
@@ -71,6 +97,10 @@ sub handle_link
     {
 	return ('', $link);
     }
+    elsif ($modules{$txt})
+    {
+	return ('', ":ref:`cli::$txt`");
+    }
     elsif ($txt =~ /^(p3.*)\.pl$/)
     {
 	return ('', ":ref:`cli::$1`");

diff --git a/docroot/cli_tutorial/cli_getting_started.rst b/docroot/cli_tutorial/cli_getting_started.rst
@@ -1887,7 +1887,7 @@ get the genome data.
 How close are fig\|1302.21.peg.966 and fig\|1302.21.peg.1019 on the chromosome?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The script :ref:`cli::p3-get-feature-gap` gives us this information. Since
+The script :ref:`cli::p3-feature-gap` gives us this information. Since
 it expects two feature IDs on the same input line, we use a
 :ref:`cli::p3-echo` with two titles to put its two parameters on a single
 line.

diff --git a/docroot/cli_tutorial/command_list/GenomeTypeObject.rst b/docroot/cli_tutorial/command_list/GenomeTypeObject.rst
@@ -0,0 +1,245 @@
+.. _cli::GenomeTypeObject:
+
+
+################
+GenomeTypeObject
+################
+
+.. highlight:: perl
+
+
+****
+NAME
+****
+
+
+GenomeTypeObject - a helper class for manipulating GenomeAnnotation service genome objects.
+
+
+********
+SYNOPSIS
+********
+
+
+
+.. code-block:: perl
+
+   $obj = GenomeTypeObject->new()
+ 
+   $obj = GenomeTypeObject->initialize($raw_genome_object)
+
+
+
+***********
+DESCRIPTION
+***********
+
+
+The \ ``GenomeTypeObject``\  class wraps a number of common operations to be performed
+against the genome object as defined in the KBase GenomeAnnotation service.
+
+To use the methods here it is sufficient to just bless the JSON object containing
+the genome data into the GenomeTypeObject class, but it is more efficient to initialize
+it using the initialize method:
+
+
+.. code-block:: perl
+
+   $obj = GenomeTypeObject->initialize($raw_json)
+
+
+Doing this will create internal indexes on the feature and contig data structures
+to accelerate access to individual data items.
+
+Before using the genome object as a raw JSON object again, however, you must invoke
+the \ ``prepare_for_return()``\  method which strips these indexes out of the data object.
+
+$obj = GenomeTypeObject->new()
+==============================
+
+
+Create a new empty genome object.
+
+
+$obj = GenomeTypeObject->create_from_file($filename)
+====================================================
+
+
+Load the given file, assumed to contain the JSON form of a genome object, and
+return as a GenomeTypeObject instance.
+
+The resulting object has not had the \ ``initialize``\  method invoked on it.
+
+
+$obj->destroy_to_file($filename)
+================================
+
+
+Write the given object in JSON form to the specified file.
+The object will be rendered unusable (i.e., unblessed)
+
+
+$obj->set_metadata({ ... });
+============================
+
+
+Set the metadata fields on this genome object based on a metadata
+object as defined in the GenomeAnnotation typespec:
+
+
+.. code-block:: perl
+
+  typedef structure
+  {
+   genome_id id;
+   string scientific_name;
+   string domain;
+   int genetic_code;
+   string source;
+   string source_id;
+   int ncbi_taxonomy_id;
+   string taxonomy;
+   string owner;
+  } genome_metadata
+
+
+
+$obj->add_contigs($contigs)
+===========================
+
+
+Add the given set of contigs to this genome object. \ ``$contigs``\  is a list of contig
+objects, which we add to the genome object without further inspection.
+
+
+$obj->add_features_from_list($features)
+=======================================
+
+
+Add the given features to the genome. Features here are instances of the compact_tuple type:
+
+
+.. code-block:: perl
+
+  typedef tuple <string id, string location, string feature_type, string function, string aliases> compact_feature;
+
+
+used in the importation of features from an external source via a tab-separated text file.
+
+We create an event for this import so that the source of the features so added is tracked.
+
+Returns a hash mapping from the feature ID in the list to the allocated feature ID.
+
+
+$obj->add_feature($params)
+==========================
+
+
+Add a new feature. The details of the feature are defined in the parameters hash. It has the following
+keys:
+
+
+-id
+
+ Identifier for this feature. If not provided, a new identifier will be
+ created based on the genome id, the type of the feature and the current largest identifier for
+ that feature type.
+
+
+
+
+$obj->write_protein_translations_to_file($filename)
+===================================================
+
+
+Write the protein translations to a FASTA file.
+
+
+$obj->write_contigs_to_file($filename)
+======================================
+
+
+Write the contigs to a FASTA file.
+
+metrics
+-------
+
+
+
+.. code-block:: perl
+
+     my $metricHash = $gto->metrics();
+
+
+Return a hash of metrics about this GTO. The metrics returned will include N50, N70, N90, total DNA length, and
+probable completeness.
+
+
+RETURN
+
+ Returns a reference to a hash with the following keys.
+
+
+ N50
+
+  The N50 of the contig lengths (see `/n_metric </n_metric>`_).
+
+
+
+ N70
+
+  The N70 of the contig lengths.
+
+
+
+ N90
+
+  The N90 of the contig lengths.
+
+
+
+ totlen
+
+  The total DNA length.
+
+
+
+ complete
+
+  \ ``1``\  if the genome is mostly complete, else \ ``0``\ .
+
+
+
+
+
+
+n_metric
+--------
+
+
+
+.. code-block:: perl
+
+     my $length = $gto->n_metric($thresh);
+
+
+Compute the N\ *XX*\  metric for the contig lengths, where \ *XX*\  is a percentage (usually 50, 70, or 90). A higher value
+for the metric indicates a higher-quality assembly. The N70 metric is the length of the shortest contig in the set of
+longest contigs comprising 70% of the total contig lengths. Similarly, the N50 metric is the length of the shortest contig
+in the set of longest contigs comprising 50% of the total contig lengths.
+
+
+thresh
+
+ The threshold to use for the desired metric. For example, specify \ ``70``\  for an N70 metric.
+
+
+
+RETURN
+
+ Returns the length of the contig at the desired metric level.
+
+
+
+
+