Issue #0: Release 1.0.1

Mark2000 · Mark2000 · commit 0a551d7de142 · 2024-08-29T12:03:12.000-06:00
diff --git a/.gitignore b/.gitignore
@@ -75,7 +75,6 @@ target/
 .ipynb_checkpoints
 
 # PDFs and Images
-*.pdf 
 *.jpg
 
 # pyenv
diff --git a/CITATION.cff b/CITATION.cff
@@ -1,19 +1,63 @@
 cff-version: 1.2.0
-message: "If you use this software, please cite it as below."
+title: >-
+  BSK-RL: Modular, High-Fidelity Reinforcement Learning
+  Environments for Spacecraft Tasking
+message: >-
+  If you use this software, please cite it using the
+  metadata from this file.
+type: software
 authors:
-- family-names: "Stephenson"
-  given-names: "Mark"
-  orcid: "https://orcid.org/0009-0004-3438-8127"
-- family-names: "Mantovani"
-  given-names: "Lorenzzo"
-  orcid: "https://orcid.org/0000-0001-7244-7491"
-- family-names: "Herrmann"
-  given-names: "Adam"
-  orcid: "https://orcid.org/0000-0001-6179-7728"
-- family-names: "Schaub"
-  given-names: "Hanspeter"
-  orcid: "https://orcid.org/0000-0003-0002-6035"
-title: "BSK-RL"
-version: 0.0.0
-date-released: 2023
-url: "https://github.com/AVSLab/bsk_rl/"
+  - given-names: Mark
+    family-names: Stephenson
+    email: Mark.A.Stephenson@colorado.edu
+    affiliation: 'University of Colorado, Boulder'
+    orcid: 'https://orcid.org/0009-0004-3438-8127'
+  - given-names: Hanspeter
+    family-names: Schaub
+    orcid: 'https://orcid.org/0000-0003-0002-6035'
+    affiliation: 'University of Colorado, Boulder'
+    email: Hanspeter.Schaub@colorado.edu
+identifiers:
+  - type: url
+    value: 'https://hanspeterschaub.info/Papers/Stephenson2024c.pdf'
+repository-code: 'https://github.com/AVSLab/bsk_rl/'
+url: 'https://avslab.github.io/bsk_rl/'
+abstract: >-
+  Reinforcement learning (RL) is a highly adaptable
+  framework for generating autonomous agents across a wide
+  domain of problems. While RL has been successfully applied
+  to highly complex, real-world systems, a significant
+  amount of the literature studies abstractions and
+  idealized versions of problems. This is especially the
+  case for the field of spacecraft tasking, in which even
+  traditional preplanning approaches tend to use highly
+  simplified models of spacecraft dynamics and operations.
+  When simplified methods are tested in a full-fidelity
+  simulation, they often lead to conservative solutions that
+  are suboptimal or aggressive solutions that are
+  infeasible. As a result, there is a need for a
+  high-fidelity spacecraft simulation environment to
+  evaluate RL-based and other tasking algorithms. This paper
+  introduces BSK-RL, an open-source Python package for
+  creating and customizing reinforcement learning
+  environments for spacecraft tasking problems. It combines
+  Basilisk --- a high-speed and high-fidelity spacecraft
+  simulation framework --- with abstractions of satellite
+  tasks and operational objectives within the standard
+  Gymnasium API wrapper for RL environments. The package is
+  designed to meet the needs of RL and spacecraft operations
+  researchers: Environment parameters are easily
+  reproducible, customizable, and randomizable. Environments
+  are highly modular: satellite state and action spaces can
+  be specified, mission objectives and rewards can be
+  defined, and the satellite dynamics and flight software
+  can be configured, implicitly introducing operational
+  limitations and safety constraints. Heterogeneous
+  multi-agent environments can be created for more complex
+  mission scenarios that consider communication and
+  collaboration. Training and deployment using the package
+  are demonstrated for an Earth-observing satellite with
+  resource constraints.
+license: MIT
+version: 1.0.1
+date-released: '2024-08-27'
diff --git a/docs/build/doctrees/nbsphinx/examples/rllib_training.ipynb b/docs/build/doctrees/nbsphinx/examples/rllib_training.ipynb
diff --git a/docs/source/_static/stephenson_bskrl_2024.pdf b/docs/source/_static/stephenson_bskrl_2024.pdf
diff --git a/docs/source/citation.rst b/docs/source/citation.rst
@@ -1,21 +1,24 @@
 Citation
 ========
-If you use this code in your research, please cite the repository as follows:
+If you use this code in your research, please cite the :download:`IAC 2024 conference paper <_static/stephenson_bskrl_2024.pdf>`.
 
 APA
 ---
 .. code-block:: 
 
-   Stephenson, M., Mantovani, L., Herrmann, A., & Schaub, H. BSK-RL (Version 0.0.0) [Computer software]. https://github.com/AVSLab/bsk_rl/
+   M. Stephenson and H. Schaub, “BSK-RL: Modular, High-Fidelity Reinforcement Learning Environments for Spacecraft Tasking,” International Astronautical Congress, Milan, Italy, Oct. 14–18 2024.
 
 BibTeX
 ------
 .. code-block:: 
 
-    @software{
-        Stephenson_BSK-RL,
-        author = {Stephenson, Mark and Mantovani, Lorenzzo and Herrmann, Adam and Schaub, Hanspeter},
-        title = {{BSK-RL}},
-        url = {https://github.com/AVSLab/bsk_rl/},
-        version = {0.0.0}
+    @inproceedings{stephenson_bskrl_2024,
+        title = {{{BSK-RL}}: {{Modular}}, {{High-Fidelity Reinforcement Learning Environments}} for {{Spacecraft Tasking}}},
+        booktitle = {75th {{International Astronautical Congress}}},
+        author = {Stephenson, Mark A and Schaub, Hanspeter},
+        year = {2024},
+        month = oct,
+        publisher = {IAF},
+        address = {Milan, Italy},
+        langid = {english},
     }
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -53,7 +53,7 @@
 }
 autodoc_typehints = "both"
 # nbsphinx_execute = "never"
-nbsphinx_allow_errors = True
+nbsphinx_allow_errors = False
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -28,6 +28,9 @@ simulation framework, making the simulation environments high-fidelity and compu
 efficient. BSK-RL also includes a collection of utilities and examples 
 for working with these environments.
 
+A whitepaper on the design philosophy behind BSK-RL and an examble use case can be 
+:download:`downloaded here <_static/stephenson_bskrl_2024.pdf>`.
+
 Quickstart
 ----------
 Installation
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -1,9 +1,15 @@
 Release Notes
 =============
 
+Development Version
+-------------------
+*Release Date: MMM. DD, YYYY*
+
+* None.
+
 Version 1.0.1
 -------------
-*Release Date: MMM. DD, YYYY*
+*Release Date: Aug. 29, 2024*
 
 * Change the :class:`~bsk_rl.ConstellationTasking` environment info dictionary to include
   all non-agent information in ``info['__common__']``, which is expected by RLlib's 
diff --git a/examples/rllib_training.ipynb b/examples/rllib_training.ipynb
@@ -13,8 +13,8 @@
     "\n",
     "<div class=\"alert alert-warning\">\n",
     "\n",
-    "**Warning:** RLlib currently has a bug that results in an undesirable timeout which stops\n",
-    "training. Check here to see if it has been resolved: https://github.com/ray-project/ray/pull/45147\n",
+    "**Warning:** RLlib had a bug that results in an undesirable timeout which stops\n",
+    "training. It has since been resolved: https://github.com/ray-project/ray/pull/45147\n",
     "\n",
     "</div>\n",
     "\n",
@@ -65,18 +65,18 @@
     "            dict(prop=\"instrument_pointing_error\", norm=np.pi),\n",
     "            dict(prop=\"solar_pointing_error\", norm=np.pi)\n",
     "        ),\n",
-    "        obs.Eclipse(),\n",
     "        obs.OpportunityProperties(\n",
     "            dict(prop=\"opportunity_open\", norm=5700),\n",
     "            dict(prop=\"opportunity_close\", norm=5700),\n",
     "            type=\"ground_station\",\n",
     "            n_ahead_observe=1,\n",
     "        ),\n",
+    "        obs.Eclipse(norm=5700),\n",
     "        obs.Time(),\n",
     "    ]\n",
     "    action_spec = [\n",
     "        act.Scan(duration=180.0),\n",
-    "        act.Charge(duration=180.0),\n",
+    "        act.Charge(duration=120.0),\n",
     "        act.Downlink(duration=60.0),\n",
     "        act.Desat(duration=60.0),\n",
     "    ]\n",
@@ -102,25 +102,25 @@
     "    \"Scanner-1\",\n",
     "    sat_args=dict(\n",
     "        # Data\n",
-    "        dataStorageCapacity=5000 * 8e6,  # MB to bits\n",
-    "        storageInit=lambda: np.random.uniform(0, 5000 * 8e6),\n",
-    "        instrumentBaudRate=0.5e6,\n",
-    "        transmitterBaudRate=-112e6,\n",
+    "        dataStorageCapacity=5000 * 8e6,  # bits\n",
+    "        storageInit=lambda: np.random.uniform(0.0, 0.8) * 5000 * 8e6,\n",
+    "        instrumentBaudRate=0.5 * 8e6,\n",
+    "        transmitterBaudRate=-50 * 8e6,\n",
     "        # Power\n",
-    "        batteryStorageCapacity=400 * 3600,  # Wh to W*s\n",
-    "        storedCharge_Init=lambda: np.random.uniform(400 * 3600 * 0.2, 400 * 3600 * 0.8),\n",
-    "        basePowerDraw=-10.0,\n",
-    "        instrumentPowerDraw=-30.0,\n",
-    "        transmitterPowerDraw=-25.0,\n",
-    "        thrusterPowerDraw=-80.0,\n",
+    "        batteryStorageCapacity=200 * 3600,  # W*s\n",
+    "        storedCharge_Init=lambda: np.random.uniform(0.3, 1.0) * 200 * 3600,\n",
+    "        basePowerDraw=-10.0,  # W\n",
+    "        instrumentPowerDraw=-30.0,  # W\n",
+    "        transmitterPowerDraw=-25.0,  # W\n",
+    "        thrusterPowerDraw=-80.0,  # W\n",
+    "        panelArea=0.25,\n",
     "        # Attitude\n",
     "        imageAttErrorRequirement=0.1,\n",
     "        imageRateErrorRequirement=0.1,\n",
-    "        disturbance_vector=lambda: np.random.normal(scale=0.0001, size=3),\n",
+    "        disturbance_vector=lambda: np.random.normal(scale=0.0001, size=3),  # N*m\n",
     "        maxWheelSpeed=6000.0,  # RPM\n",
     "        wheelSpeeds=lambda: np.random.uniform(-3000, 3000, 3),\n",
     "        desatAttitude=\"nadir\",\n",
-    "        nHat_B=np.array([0, 0, -1]),  # Solar panel orientation\n",
     "    )\n",
     ")"
    ]
@@ -139,7 +139,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "duration = 2 * 5700.0  # About 2 orbits\n",
+    "duration = 5 * 5700.0  # About 5 orbits\n",
     "env_args = dict(\n",
     "    satellite=sat,\n",
     "    scenario=scene.UniformNadirScanning(value_per_second=1/duration),\n",
@@ -210,10 +210,12 @@
     "from bsk_rl.utils.rllib import unpack_config\n",
     "from ray.rllib.algorithms.ppo import PPOConfig\n",
     "\n",
+    "N_CPUS = 3\n",
+    "\n",
     "training_args = dict(\n",
     "    lr=0.00003,\n",
     "    gamma=0.999,\n",
-    "    train_batch_size=250,  # In practice, usually a bigger number\n",
+    "    train_batch_size=2500,\n",
     "    num_sgd_iter=10,\n",
     "    model=dict(fcnet_hiddens=[512, 512], vf_share_layers=False),\n",
     "    lambda_=0.95,\n",
@@ -225,7 +227,7 @@
     "config = (\n",
     "    PPOConfig()\n",
     "    .training(**training_args)\n",
-    "    .env_runners(num_env_runners=2, sample_timeout_s=1000.0)\n",
+    "    .env_runners(num_env_runners=N_CPUS-1, sample_timeout_s=1000.0)\n",
     "    .environment(\n",
     "        env=unpack_config(SatelliteTasking),\n",
     "        env_config=env_args,\n",
@@ -269,7 +271,7 @@
     "\n",
     "ray.init(\n",
     "    ignore_reinit_error=True,\n",
-    "    num_cpus=3,\n",
+    "    num_cpus=N_CPUS,\n",
     "    object_store_memory=2_000_000_000,  # 2 GB\n",
     ")\n",
     "\n",
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "bsk_rl"
-version = "1.0.0"
+version = "1.0.1"
 authors = [
     { name = "Adam Herrmann", email = "adam.herrmann@colorado.edu" },
     { name = "Mark Stephenson", email = "mark.a.stephenson@colorado.edu" },

Original file line number	Diff line number	Diff line change
`@@ -53,7 +53,7 @@`
`53`	`53`	`}`
`54`	`54`	`autodoc_typehints = "both"`
`55`	`55`	`# nbsphinx_execute = "never"`
`56`		`-nbsphinx_allow_errors = True`
	`56`	`+nbsphinx_allow_errors = False`
`57`	`57`
`58`	`58`	`# The language for content autogenerated by Sphinx. Refer to documentation`
`59`	`59`	`# for a list of supported languages.`