Merge branch 'main' into jysun-dev

Skylark0924 · Oct 24, 2023 · bd54b6a · bd54b6a
2 parents cd718fe + 650cc62
commit bd54b6a
Show file tree

Hide file tree

Showing 34 changed files with 313 additions and 237 deletions.
diff --git a/.gitignore b/.gitignore
@@ -175,7 +175,7 @@ rofunc/.DS_Store
 .DS_Store
 others/
 
-doc/source/auto_examples
+doc/source/examples
 doc/source/apidocs
 .vscode/
 *.pth
diff --git a/README.md b/README.md
@@ -22,17 +22,17 @@ processing, learning, and its deployment on robots.
 ![](doc/img/pipeline.png)
 
 - [Rofunc: The Full Process Python Package for Robot Learning from Demonstration and Robot Manipulation](#rofunc-the-full-process-python-package-for-robot-learning-from-demonstration-and-robot-manipulation)
-    - [Installation](#installation)
-        - [Install from PyPI (stable version)](#install-from-pypi-stable-version)
-        - [Install from Source (nightly version, recommended)](#install-from-source-nightly-version-recommended)
-    - [Documentation](#documentation)
-    - [Star History](#star-history)
-    - [Citation](#citation)
-    - [Related Papers](#related-papers)
-    - [The Team](#the-team)
-    - [Acknowledge](#acknowledge)
-        - [Learning from Demonstration](#learning-from-demonstration)
-        - [Planning and Control](#planning-and-control)
+  - [Installation](#installation)
+    - [Install from PyPI (stable version)](#install-from-pypi-stable-version)
+    - [Install from Source (nightly version, recommended)](#install-from-source-nightly-version-recommended)
+  - [Documentation](#documentation)
+  - [Star History](#star-history)
+  - [Citation](#citation)
+  - [Related Papers](#related-papers)
+  - [The Team](#the-team)
+  - [Acknowledge](#acknowledge)
+    - [Learning from Demonstration](#learning-from-demonstration)
+    - [Planning and Control](#planning-and-control)
 
 ## Installation
 
@@ -85,7 +85,7 @@ sh ./scripts/mac_install.sh
 ## Documentation
 
 [![Documentation](https://img.shields.io/badge/Documentation-Access-brightgreen?style=for-the-badge)](https://rofunc.readthedocs.io/en/latest/)
-[![Example Gallery](https://img.shields.io/badge/Example%20Gallery-Access-brightgreen?style=for-the-badge)](https://rofunc.readthedocs.io/en/latest/auto_examples/index.html)
+[![Example Gallery](https://img.shields.io/badge/Example%20Gallery-Access-brightgreen?style=for-the-badge)](https://rofunc.readthedocs.io/en/latest/examples/index.html)
 
 To give you a quick overview of the pipeline of `rofunc`, we provide an interesting example of learning to play Taichi
 from human demonstration. You can find it in the [Quick start](https://rofunc.readthedocs.io/en/latest/quickstart.html)

diff --git a/doc/requirements_doc.txt b/doc/requirements_doc.txt
@@ -30,3 +30,4 @@ numpy==1.21.6
 open3d
 kinpy
 transformers
+astroid<3
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -29,7 +29,7 @@
 author = 'Junjia Liu'
 
 # The full version, including alpha/beta/rc tags
-release = '0.0.2.4'
+release = '0.0.2.5'
 
 # -- General configuration ---------------------------------------------------
 
@@ -61,8 +61,9 @@
 ]
 
 sphinx_gallery_conf = {
+    'filename_pattern': '/plot_',
     'examples_dirs': '../../examples',  # path to your example scripts
-    'gallery_dirs': 'auto_examples',  # path to where to save gallery generated output
+    'gallery_dirs': 'examples',  # path to where to save gallery generated output
     'ignore_pattern': r'__init__\.py',
     #  'thumbnail_size': (600, 600),
     #  'image_srcset': ["2x"],
@@ -73,6 +74,9 @@
                                        '../../examples/robolab',
                                        '../../examples/simulator']),
     'within_subsection_order': ExampleTitleSortKey,
+    'image_scrapers': ('matplotlib',),
+    'matplotlib_animations': True,
+    'only_warn_on_example_error': True
 }
 
 # Prefix document path to section labels, otherwise autogenerated labels would look like 'heading'

diff --git a/doc/source/index.md b/doc/source/index.md
@@ -135,7 +135,7 @@ the [CLOVER Lab (Collaborative and Versatile Robots Laboratory)](https://feichen
 
 installation
 quickstart
-auto_examples/index
+examples/index
 ```
 
 ```{toctree}

diff --git a/doc/source/tutorial/config_system.md b/doc/source/tutorial/config_system.md
@@ -61,7 +61,7 @@ if __name__ == '__main__':
 ```
 
 :::{tip}
-More examples can be found in [Example/RofuncRL](https://rofunc.readthedocs.io/en/latest/auto_examples/learning_rl/index.html).
+More examples can be found in [Example/RofuncRL](https://rofunc.readthedocs.io/en/latest/examples/learning_rl/index.html).
 :::
 
 ## Customize configurations

diff --git a/doc/source/tutorial/customizeRL.md b/doc/source/tutorial/customizeRL.md
@@ -99,5 +99,5 @@ if __name__ == '__main__':
 
 :::{tip}
 1. For more details about the configuration system, please refer to [Configuration System](https://rofunc.readthedocs.io/en/latest/tutorial/config_system.html).
-2. More examples can be found in [Example/RofuncRL](https://rofunc.readthedocs.io/en/latest/auto_examples/learning_rl/index.html).
+2. More examples can be found in [Example/RofuncRL](https://rofunc.readthedocs.io/en/latest/examples/learning_rl/index.html).
 :::
diff --git a/examples/learning_rl/README.rst b/examples/learning_rl/README.rst
@@ -1,4 +1,153 @@
 Reinforcement learning class
 -----------------------------
 
-The following are examples of reinforcement learning methods for robot learning.
+The following are examples of reinforcement learning methods for robot learning.
+
+.. tabs::
+
+
+    .. tab:: OpenAI Gym
+
+        .. tabs::
+
+            .. tab:: Pendulum
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    python examples/learning_rl/example_GymTasks_RofuncRL.py --task=Gym_Pendulum-v1 --agent=[ppo|a2c|td3|sac]
+
+            .. tab:: CartPole
+
+                .. code-block:: shell
+
+                    '''Training''' 
+                    python examples/learning_rl/example_GymTasks_RofuncRL.py --task=Gym_CartPole-v1 --agent=[ppo|a2c|td3|sac]
+
+            .. tab:: Acrobot
+
+                .. code-block:: shell
+
+                    '''Training''' 
+                    python examples/learning_rl/example_GymTasks_RofuncRL.py --task=Gym_Acrobot-v1 --agent=[ppo|a2c|td3|sac]                    
+
+    .. tab:: IsaacGym
+
+        .. tabs::
+
+            .. tab:: Ant
+
+                .. code-block:: shell
+
+                    '''Training''' 
+                    python examples/learning_rl/example_Ant_RofuncRL.py --agent=[ppo|a2c|td3|sac]
+
+                    '''Inference with pre-trained model in model zoo'''
+                    python examples/learning_rl/example_Ant_RofuncRL.py --agent=ppo --inference   
+                                 
+            .. tab:: CURICabinet
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    python examples/learning_rl/example_CURICabinet_RofuncRL.py --agent=ppo
+
+                    '''Inference with pre-trained model in model zoo'''
+                    python examples/learning_rl/example_CURICabinet_RofuncRL.py --agent=ppo --inference           
+
+            .. tab:: D4RL 
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    # Hopper
+                    python examples/learning_rl/example_D4RL_RofuncRL.py --task=Hopper --agent=dtrans
+                    # Walker2d
+                    python examples/learning_rl/example_D4RL_RofuncRL.py --task=Walker2d --agent=dtrans
+                    # HalfCheetah
+                    python examples/learning_rl/example_D4RL_RofuncRL.py --task=HalfCheetah --agent=dtrans
+                    # Reacher2d
+                    python examples/learning_rl/example_D4RL_RofuncRL.py --task=Reacher2d --agent=dtrans
+
+            .. tab:: FrankaCabinet
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    python examples/learning_rl/example_FrankaCabinet_RofuncRL.py --agent=ppo
+
+                    '''Inference with pre-trained model in model zoo'''
+                    python examples/learning_rl/example_FrankaCabinet_RofuncRL.py --agent=ppo --inference         
+
+            .. tab:: Humanoid
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    python examples/learning_rl/example_Humanoid_RofuncRL.py --agent=ppo
+
+                    '''Inference with pre-trained model in model zoo'''
+                    python examples/learning_rl/example_Humanoid_RofuncRL.py --agent=ppo --inference      
+
+            .. tab:: HumanoidAMP
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    # Backflip
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_backflip --agent=amp
+                    # Walk
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_walk --agent=amp
+                    # Run
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_run --agent=amp
+                    # Dance
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_dance --agent=amp
+                    # Hop
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_hop --agent=amp
+
+                    '''Inference with pre-trained model in model zoo'''
+                    # Backflip
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_backflip --agent=amp --inference
+                    # Walk
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_walk --agent=amp --inference
+                    # Run
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_run --agent=amp --inference
+                    # Dance
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_dance --agent=amp --inference
+                    # Hop
+                    python examples/learning_rl/example_HumanoidAMP_RofuncRL.py --task=HumanoidAMP_hop --agent=amp --inference
+
+            .. tab:: HumanoidASE
+
+                .. code-block:: shell
+
+                    '''Training'''
+                    # Getup
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEGetupSwordShield --agent=ase
+                    # Getup with perturbation
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEPerturbSwordShield --agent=ase
+                    # Heading
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEHeadingSwordShield --agent=ase
+                    # Reach
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEReachSwordShield --agent=ase
+                    # Location
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASELocationSwordShield --agent=ase
+                    # Strike
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEStrikeSwordShield --agent=ase
+
+                    '''Inference with pre-trained model in model zoo'''
+                    # Getup
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEGetupSwordShield --agent=ase --inference
+                    # Getup with perturbation
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEPerturbSwordShield --agent=ase --inference
+                    # Heading
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEHeadingSwordShield --agent=ase --inference
+                    # Reach
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEReachSwordShield --agent=ase --inference
+                    # Location
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASELocationSwordShield --agent=ase --inference
+                    # Strike
+                    python examples/learning_rl/example_HumanoidASE_RofuncRL.py --task=HumanoidASEStrikeSwordShield --agent=ase --inference
+
+
+    .. tab:: OmniIsaacGym
diff --git a/examples/learning_rl/example_AntOmni_RofuncRL.py b/examples/learning_rl/example_AntOmni_RofuncRL.py
@@ -1,5 +1,5 @@
 """
-Ant (RofuncRL)
+AntOmni (RofuncRL)
 ===========================
 
 Ant RL using RofuncRL

diff --git a/examples/learning_rl/example_GymTasks_RofuncRL.py b/examples/learning_rl/example_GymTasks_RofuncRL.py
@@ -1,8 +1,8 @@
 """
-Gym Tasks (RofuncRL)
+OpenAIGym Tasks (RofuncRL)
 ===========================
 
-Gym Tasks RL using RofuncRL
+OpenAIGym Tasks RL using RofuncRL
 """
 
 import argparse

diff --git a/examples/learning_rl/example_GymTasks_SKRL.py b/examples/learning_rl/example_GymTasks_SKRL.py
@@ -1,8 +1,8 @@
 """
-Gym Tasks (SKRL)
+OpenAIGym Tasks (SKRL)
 ===========================
 
-Gym Tasks RL using SKRL
+OpenAIGym Tasks RL using SKRL
 """
 
 import argparse

diff --git a/examples/learning_rl/example_HumanoidAMP_RofuncRL.py b/examples/learning_rl/example_HumanoidAMP_RofuncRL.py
@@ -2,7 +2,7 @@
 HumanoidAMP (RofuncRL)
 ===========================
 
-Humanoid walking, trained by RofuncRL
+Humanoid backflip/walk/run/dance/hop, trained by RofuncRL
 """
 
 import argparse
@@ -88,11 +88,11 @@ def inference(custom_args):
 
 
 if __name__ == '__main__':
-    gpu_id = 3
+    gpu_id = 0
 
     parser = argparse.ArgumentParser()
     # Available tasks: HumanoidAMP_backflip, HumanoidAMP_walk, HumanoidAMP_run, HumanoidAMP_dance, HumanoidAMP_hop
-    parser.add_argument("--task", type=str, default="HumanoidAMP_run")
+    parser.add_argument("--task", type=str, default="HumanoidAMP_dance")
     parser.add_argument("--agent", type=str, default="amp")  # Available agent: amp
     parser.add_argument("--num_envs", type=int, default=4096)
     parser.add_argument("--sim_device", type=str, default="cuda:{}".format(gpu_id))

diff --git a/examples/learning_rl/example_HumanoidAMP_SKRL.py b/examples/learning_rl/example_HumanoidAMP_SKRL.py
@@ -2,7 +2,7 @@
 HumanoidAMP (SKRL)
 ===========================
 
-Humanoid walking, trained by SKRL
+Humanoid backflip/walk/run/dance/hop, trained by SKRL
 """
 
 import argparse

diff --git a/examples/learning_rl/example_HumanoidASE_RofuncRL.py b/examples/learning_rl/example_HumanoidASE_RofuncRL.py
@@ -2,7 +2,7 @@
 HumanoidASE (RofuncRL)
 ===========================
 
-Humanoid soldier, trained by RofuncRL
+Humanoid soldier Getup/Perturb/Heading/Location/Reach/Strike, trained by RofuncRL
 """
 
 import argparse

diff --git a/examples/planning_control/example_lqt.py → ...ples/planning_control/plot_example_lqt.py b/examples/planning_control/example_lqt.py → ...ples/planning_control/plot_example_lqt.py
@@ -13,6 +13,8 @@
 filter_indices = [0, 1, 5, 10, 22, 36]
 via_points = via_points[filter_indices]
 
-controller = rf.planning_control.lqt.LQT(via_points)
+cfg = rf.config.utils.get_config("./planning", "lqt")
+
+controller = rf.planning_control.lqt.LQT(via_points, cfg)
 u_hat, x_hat, mu, idx_slices = controller.solve()
 rf.lqt.plot_3d_uni(x_hat, mu, idx_slices, ori=False, save=False)
diff --git a/examples/planning_control/example_lqt_cp.py → ...s/planning_control/plot_example_lqt_cp.py b/examples/planning_control/example_lqt_cp.py → ...s/planning_control/plot_example_lqt_cp.py
diff --git a/...es/planning_control/example_lqt_cp_dmp.py → ...anning_control/plot_example_lqt_cp_dmp.py b/...es/planning_control/example_lqt_cp_dmp.py → ...anning_control/plot_example_lqt_cp_dmp.py
@@ -9,10 +9,10 @@
 import rofunc as rf
 from scipy.interpolate import interp1d
 
-cfg = rf.config.utils.get_config('./planning', 'lqt_cp_dmp')
-cfg.nbDeriv = 3
 x = np.load('../data/LQT_LQR/S.npy')[0, :, :2].T
 
+cfg = rf.config.utils.get_config('./planning', 'lqt_cp_dmp')
+
 f_pos = interp1d(np.linspace(0, np.size(x, 1) - 1, np.size(x, 1), dtype=int), x, kind='cubic')
 MuPos = f_pos(np.linspace(0, np.size(x, 1) - 1, cfg.nbData))  # Position
 MuVel = np.gradient(MuPos)[1] / cfg.dt
@@ -21,7 +21,7 @@
 via_points = np.vstack((MuPos, MuVel, MuAcc))
 cfg.nbData = len(via_points[0])
 
+state_noise = np.vstack((np.array([[3], [-0.5]]), np.zeros((cfg.nbVarX - cfg.nbVarU, 1))))
+
 controller = rf.planning_control.lqt.lqt_cp_dmp.LQTCPDMP(via_points, cfg)
-# u_hat, x_hat, mu, idx_slices = controller.solve()
-# rf.lqt.plot_3d_uni([x_hat], mu, idx_slices)
-# rf.lqt.uni_cp_dmp(via_points, cfg)
+u_hat, x_hat = controller.solve(state_noise=state_noise)