From 0029b2966e66660ad2bda427cad99b010f62be74 Mon Sep 17 00:00:00 2001
From: Junjia Liu <l.syj@163.com>
Date: Fri, 18 Aug 2023 22:02:22 +0800
Subject: [PATCH] Remove useless files

---
 .../agents/mixline/for_test/__init__.py       |   0
 .../agents/mixline/for_test/amp_agent.py      | 664 -----------------
 .../agents/mixline/for_test/amp_datasets.py   |  59 --
 .../agents/mixline/for_test/amp_models.py     |  65 --
 .../mixline/for_test/amp_network_builder.py   | 154 ----
 .../agents/mixline/for_test/amp_players.py    | 111 ---
 .../agents/mixline/for_test/ase_agent.py      | 567 --------------
 .../mixline/for_test/ase_humanoid_hrl.yaml    | 114 ---
 .../agents/mixline/for_test/ase_models.py     |  56 --
 .../mixline/for_test/ase_network_builder.py   | 379 ----------
 .../agents/mixline/for_test/ase_players.py    | 179 -----
 .../agents/mixline/for_test/common_agent.py   | 592 ---------------
 .../agents/mixline/for_test/common_player.py  | 216 ------
 .../agents/mixline/for_test/config.py         | 259 -------
 .../agents/mixline/for_test/hrl_agent.py      | 356 ---------
 .../agents/mixline/for_test/hrl_humanoid.yaml |  76 --
 .../agents/mixline/for_test/hrl_models.py     |  46 --
 .../mixline/for_test/hrl_network_builder.py   |  67 --
 .../agents/mixline/for_test/hrl_players.py    | 345 ---------
 .../humanoid_sword_shield_heading.yaml        |  53 --
 .../humanoid_sword_shield_strike.yaml         |  49 --
 .../agents/mixline/for_test/observer.py       |  36 -
 .../agents/mixline/for_test/parse_task.py     |  73 --
 .../agents/mixline/for_test/replay_buffer.py  | 113 ---
 .../RofuncRL/agents/mixline/for_test/run.py   | 252 -------
 .../agents/mixline/for_test/tasks/__init__.py |   6 -
 .../mixline/for_test/tasks/base_task.py       | 428 -----------
 .../agents/mixline/for_test/tasks/humanoid.py | 692 ------------------
 .../mixline/for_test/tasks/humanoid_amp.py    | 344 ---------
 .../for_test/tasks/humanoid_amp_getup.py      | 170 -----
 .../for_test/tasks/humanoid_amp_task.py       | 101 ---
 .../for_test/tasks/humanoid_heading.py        | 313 --------
 .../for_test/tasks/humanoid_location.py       | 256 -------
 .../for_test/tasks/humanoid_perturb.py        | 273 -------
 .../mixline/for_test/tasks/humanoid_reach.py  | 223 ------
 .../mixline/for_test/tasks/humanoid_strike.py | 323 --------
 .../for_test/tasks/humanoid_view_motion.py    | 125 ----
 .../agents/mixline/for_test/tasks/vec_task.py | 139 ----
 .../for_test/tasks/vec_task_wrappers.py       |  61 --
 .../agents/mixline/for_test/utils/__init__.py |   6 -
 .../agents/mixline/for_test/utils/gym_util.py | 240 ------
 .../mixline/for_test/utils/torch_utils.py     | 182 -----
 .../agents/mixline/for_test/vec_task.py       | 139 ----
 .../mixline/for_test/vec_task_wrappers.py     |  61 --
 44 files changed, 8963 deletions(-)
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/config.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/run.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py
 delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py

diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py
deleted file mode 100644
index 5e3b683f..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py
+++ /dev/null
@@ -1,664 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-from rl_games.algos_torch import torch_ext
-from rl_games.common import a2c_common
-from rl_games.common import schedulers
-from rl_games.common import vecenv
-
-from isaacgym.torch_utils import *
-
-import time
-from datetime import datetime
-import numpy as np
-from torch import optim
-import torch
-from torch import nn
-
-import replay_buffer
-import common_agent
-
-from tensorboardX import SummaryWriter
-
-
-class AMPAgent(common_agent.CommonAgent):
-    def __init__(self, base_name, config):
-        super().__init__(base_name, config)
-
-        if self._normalize_amp_input:
-            self._amp_input_mean_std = RunningMeanStd(self._amp_observation_space.shape).to(self.ppo_device)
-
-        return
-
-    def init_tensors(self):
-        super().init_tensors()
-        self._build_amp_buffers()
-        return
-
-    def set_eval(self):
-        super().set_eval()
-        if self._normalize_amp_input:
-            self._amp_input_mean_std.eval()
-        return
-
-    def set_train(self):
-        super().set_train()
-        if self._normalize_amp_input:
-            self._amp_input_mean_std.train()
-        return
-
-    def get_stats_weights(self):
-        state = super().get_stats_weights()
-        if self._normalize_amp_input:
-            state['amp_input_mean_std'] = self._amp_input_mean_std.state_dict()
-
-        return state
-
-    def set_stats_weights(self, weights):
-        super().set_stats_weights(weights)
-        if self._normalize_amp_input:
-            self._amp_input_mean_std.load_state_dict(weights['amp_input_mean_std'])
-
-        return
-
-    def play_steps(self):
-        self.set_eval()
-
-        epinfos = []
-        done_indices = []
-        update_list = self.update_list
-
-        for n in range(self.horizon_length):
-
-            self.obs = self.env_reset(done_indices)
-            self.experience_buffer.update_data('obses', n, self.obs['obs'])
-
-            if self.use_action_masks:
-                masks = self.vec_env.get_action_masks()
-                res_dict = self.get_masked_action_values(self.obs, masks)
-            else:
-                res_dict = self.get_action_values(self.obs, self._rand_action_probs)
-
-            for k in update_list:
-                self.experience_buffer.update_data(k, n, res_dict[k])
-
-            if self.has_central_value:
-                self.experience_buffer.update_data('states', n, self.obs['states'])
-
-            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            shaped_rewards = self.rewards_shaper(rewards)
-            self.experience_buffer.update_data('rewards', n, shaped_rewards)
-            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
-            self.experience_buffer.update_data('dones', n, self.dones)
-            self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
-            self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])
-
-            terminated = infos['terminate'].float()
-            terminated = terminated.unsqueeze(-1)
-            next_vals = self._eval_critic(self.obs)
-            next_vals *= (1.0 - terminated)
-            self.experience_buffer.update_data('next_values', n, next_vals)
-
-            self.current_rewards += rewards
-            self.current_lengths += 1
-            all_done_indices = self.dones.nonzero(as_tuple=False)
-            done_indices = all_done_indices[::self.num_agents]
-
-            self.game_rewards.update(self.current_rewards[done_indices])
-            self.game_lengths.update(self.current_lengths[done_indices])
-            self.algo_observer.process_infos(infos, done_indices)
-
-            not_dones = 1.0 - self.dones.float()
-
-            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
-            self.current_lengths = self.current_lengths * not_dones
-
-            if (self.vec_env.env.task.viewer):
-                self._amp_debug(infos)
-
-            done_indices = done_indices[:, 0]
-
-        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
-        mb_values = self.experience_buffer.tensor_dict['values']
-        mb_next_values = self.experience_buffer.tensor_dict['next_values']
-
-        mb_rewards = self.experience_buffer.tensor_dict['rewards']
-        mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
-        amp_rewards = self._calc_amp_rewards(mb_amp_obs)
-        mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)
-
-        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
-        mb_returns = mb_advs + mb_values
-
-        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
-        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
-        batch_dict['played_frames'] = self.batch_size
-
-        for k, v in amp_rewards.items():
-            batch_dict[k] = a2c_common.swap_and_flatten01(v)
-
-        return batch_dict
-
-    def get_action_values(self, obs_dict, rand_action_probs):
-        processed_obs = self._preproc_obs(obs_dict['obs'])
-
-        self.model.eval()
-        input_dict = {
-            'is_train': False,
-            'prev_actions': None,
-            'obs': processed_obs,
-            'rnn_states': self.rnn_states
-        }
-
-        with torch.no_grad():
-            res_dict = self.model(input_dict)
-            if self.has_central_value:
-                states = obs_dict['states']
-                input_dict = {
-                    'is_train': False,
-                    'states': states,
-                }
-                value = self.get_central_value(input_dict)
-                res_dict['values'] = value
-
-        if self.normalize_value:
-            res_dict['values'] = self.value_mean_std(res_dict['values'], True)
-
-        rand_action_mask = torch.bernoulli(rand_action_probs)
-        det_action_mask = rand_action_mask == 0.0
-        res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
-        res_dict['rand_action_mask'] = rand_action_mask
-
-        return res_dict
-
-    def prepare_dataset(self, batch_dict):
-        super().prepare_dataset(batch_dict)
-        self.dataset.values_dict['amp_obs'] = batch_dict['amp_obs']
-        self.dataset.values_dict['amp_obs_demo'] = batch_dict['amp_obs_demo']
-        self.dataset.values_dict['amp_obs_replay'] = batch_dict['amp_obs_replay']
-
-        rand_action_mask = batch_dict['rand_action_mask']
-        self.dataset.values_dict['rand_action_mask'] = rand_action_mask
-        return
-
-    def train_epoch(self):
-        play_time_start = time.time()
-
-        with torch.no_grad():
-            if self.is_rnn:
-                batch_dict = self.play_steps_rnn()
-            else:
-                batch_dict = self.play_steps()
-
-        play_time_end = time.time()
-        update_time_start = time.time()
-        rnn_masks = batch_dict.get('rnn_masks', None)
-
-        self._update_amp_demos()
-        num_obs_samples = batch_dict['amp_obs'].shape[0]
-        amp_obs_demo = self._amp_obs_demo_buffer.sample(num_obs_samples)['amp_obs']
-        batch_dict['amp_obs_demo'] = amp_obs_demo
-
-        if (self._amp_replay_buffer.get_total_count() == 0):
-            batch_dict['amp_obs_replay'] = batch_dict['amp_obs']
-        else:
-            batch_dict['amp_obs_replay'] = self._amp_replay_buffer.sample(num_obs_samples)['amp_obs']
-
-        self.set_train()
-
-        self.curr_frames = batch_dict.pop('played_frames')
-        self.prepare_dataset(batch_dict)
-        self.algo_observer.after_steps()
-
-        if self.has_central_value:
-            self.train_central_value()
-
-        train_info = None
-
-        if self.is_rnn:
-            frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
-            print(frames_mask_ratio)
-
-        for _ in range(0, self.mini_epochs_num):
-            ep_kls = []
-            for i in range(len(self.dataset)):
-                curr_train_info = self.train_actor_critic(self.dataset[i])
-
-                if self.schedule_type == 'legacy':
-                    if self.multi_gpu:
-                        curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
-                    self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef,
-                                                                            self.epoch_num, 0,
-                                                                            curr_train_info['kl'].item())
-                    self.update_lr(self.last_lr)
-
-                if (train_info is None):
-                    train_info = dict()
-                    for k, v in curr_train_info.items():
-                        train_info[k] = [v]
-                else:
-                    for k, v in curr_train_info.items():
-                        train_info[k].append(v)
-
-            av_kls = torch_ext.mean_list(train_info['kl'])
-
-            if self.schedule_type == 'standard':
-                if self.multi_gpu:
-                    av_kls = self.hvd.average_value(av_kls, 'ep_kls')
-                self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num,
-                                                                        0, av_kls.item())
-                self.update_lr(self.last_lr)
-
-        if self.schedule_type == 'standard_epoch':
-            if self.multi_gpu:
-                av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
-            self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0,
-                                                                    av_kls.item())
-            self.update_lr(self.last_lr)
-
-        update_time_end = time.time()
-        play_time = play_time_end - play_time_start
-        update_time = update_time_end - update_time_start
-        total_time = update_time_end - play_time_start
-
-        self._store_replay_amp_obs(batch_dict['amp_obs'])
-
-        train_info['play_time'] = play_time
-        train_info['update_time'] = update_time
-        train_info['total_time'] = total_time
-        self._record_train_batch_info(batch_dict, train_info)
-
-        return train_info
-
-    def calc_gradients(self, input_dict):
-        self.set_train()
-
-        value_preds_batch = input_dict['old_values']
-        old_action_log_probs_batch = input_dict['old_logp_actions']
-        advantage = input_dict['advantages']
-        old_mu_batch = input_dict['mu']
-        old_sigma_batch = input_dict['sigma']
-        return_batch = input_dict['returns']
-        actions_batch = input_dict['actions']
-        obs_batch = input_dict['obs']
-        obs_batch = self._preproc_obs(obs_batch)
-
-        amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
-        amp_obs = self._preproc_amp_obs(amp_obs)
-        amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
-        amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)
-
-        amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
-        amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
-        amp_obs_demo.requires_grad_(True)
-
-        rand_action_mask = input_dict['rand_action_mask']
-        rand_action_sum = torch.sum(rand_action_mask)
-
-        lr = self.last_lr
-        kl = 1.0
-        lr_mul = 1.0
-        curr_e_clip = lr_mul * self.e_clip
-
-        batch_dict = {
-            'is_train': True,
-            'prev_actions': actions_batch,
-            'obs': obs_batch,
-            'amp_obs': amp_obs,
-            'amp_obs_replay': amp_obs_replay,
-            'amp_obs_demo': amp_obs_demo
-        }
-
-        rnn_masks = None
-        if self.is_rnn:
-            rnn_masks = input_dict['rnn_masks']
-            batch_dict['rnn_states'] = input_dict['rnn_states']
-            batch_dict['seq_length'] = self.seq_len
-
-        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
-            res_dict = self.model(batch_dict)
-            action_log_probs = res_dict['prev_neglogp']
-            values = res_dict['values']
-            entropy = res_dict['entropy']
-            mu = res_dict['mus']
-            sigma = res_dict['sigmas']
-            disc_agent_logit = res_dict['disc_agent_logit']
-            disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
-            disc_demo_logit = res_dict['disc_demo_logit']
-
-            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
-            a_loss = a_info['actor_loss']
-            a_clipped = a_info['actor_clipped'].float()
-
-            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
-            c_loss = c_info['critic_loss']
-
-            b_loss = self.bound_loss(mu)
-
-            c_loss = torch.mean(c_loss)
-            a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
-            entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
-            b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
-            a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum
-
-            disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
-            disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
-            disc_loss = disc_info['disc_loss']
-
-            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
-                   + self._disc_coef * disc_loss
-
-            a_info['actor_loss'] = a_loss
-            a_info['actor_clip_frac'] = a_clip_frac
-            c_info['critic_loss'] = c_loss
-
-            if self.multi_gpu:
-                self.optimizer.zero_grad()
-            else:
-                for param in self.model.parameters():
-                    param.grad = None
-
-        self.scaler.scale(loss).backward()
-        # TODO: Refactor this ugliest code of the year
-        if self.truncate_grads:
-            if self.multi_gpu:
-                self.optimizer.synchronize()
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
-                with self.optimizer.skip_synchronize():
-                    self.scaler.step(self.optimizer)
-                    self.scaler.update()
-            else:
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
-                self.scaler.step(self.optimizer)
-                self.scaler.update()
-        else:
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-
-        with torch.no_grad():
-            reduce_kl = not self.is_rnn
-            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
-            if self.is_rnn:
-                kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel()  # / sum_mask
-
-        self.train_result = {
-            'entropy': entropy,
-            'kl': kl_dist,
-            'last_lr': self.last_lr,
-            'lr_mul': lr_mul,
-            'b_loss': b_loss
-        }
-        self.train_result.update(a_info)
-        self.train_result.update(c_info)
-        self.train_result.update(disc_info)
-
-        return
-
-    def _load_config_params(self, config):
-        super()._load_config_params(config)
-
-        # when eps greedy is enabled, rollouts will be generated using a mixture of
-        # a deterministic and stochastic actions. The deterministic actions help to
-        # produce smoother, less noisy, motions that can be used to train a better
-        # discriminator. If the discriminator is only trained with jittery motions
-        # from noisy actions, it can learn to phone in on the jitteriness to
-        # differential between real and fake samples.
-        self._enable_eps_greedy = bool(config['enable_eps_greedy'])
-
-        self._task_reward_w = config['task_reward_w']
-        self._disc_reward_w = config['disc_reward_w']
-
-        self._amp_observation_space = self.env_info['amp_observation_space']
-        self._amp_batch_size = int(config['amp_batch_size'])
-        self._amp_minibatch_size = int(config['amp_minibatch_size'])
-        assert (self._amp_minibatch_size <= self.minibatch_size)
-
-        self._disc_coef = config['disc_coef']
-        self._disc_logit_reg = config['disc_logit_reg']
-        self._disc_grad_penalty = config['disc_grad_penalty']
-        self._disc_weight_decay = config['disc_weight_decay']
-        self._disc_reward_scale = config['disc_reward_scale']
-        self._normalize_amp_input = config.get('normalize_amp_input', True)
-        return
-
-    def _build_net_config(self):
-        config = super()._build_net_config()
-        config['amp_input_shape'] = self._amp_observation_space.shape
-        return config
-
-    def _build_rand_action_probs(self):
-        num_envs = self.vec_env.env.task.num_envs
-        env_ids = to_torch(np.arange(num_envs), dtype=torch.float32, device=self.ppo_device)
-
-        self._rand_action_probs = 1.0 - torch.exp(10 * (env_ids / (num_envs - 1.0) - 1.0))
-        self._rand_action_probs[0] = 1.0
-        self._rand_action_probs[-1] = 0.0
-
-        if not self._enable_eps_greedy:
-            self._rand_action_probs[:] = 1.0
-
-        return
-
-    def _init_train(self):
-        super()._init_train()
-        self._init_amp_demo_buf()
-        return
-
-    def _disc_loss(self, disc_agent_logit, disc_demo_logit, obs_demo):
-        # prediction loss
-        disc_loss_agent = self._disc_loss_neg(disc_agent_logit)
-        disc_loss_demo = self._disc_loss_pos(disc_demo_logit)
-        disc_loss = 0.5 * (disc_loss_agent + disc_loss_demo)
-
-        # logit reg
-        logit_weights = self.model.a2c_network.get_disc_logit_weights()
-        disc_logit_loss = torch.sum(torch.square(logit_weights))
-        disc_loss += self._disc_logit_reg * disc_logit_loss
-
-        # grad penalty
-        disc_demo_grad = torch.autograd.grad(disc_demo_logit, obs_demo, grad_outputs=torch.ones_like(disc_demo_logit),
-                                             create_graph=True, retain_graph=True, only_inputs=True)
-        disc_demo_grad = disc_demo_grad[0]
-        disc_demo_grad = torch.sum(torch.square(disc_demo_grad), dim=-1)
-        disc_grad_penalty = torch.mean(disc_demo_grad)
-        disc_loss += self._disc_grad_penalty * disc_grad_penalty
-
-        # weight decay
-        if (self._disc_weight_decay != 0):
-            disc_weights = self.model.a2c_network.get_disc_weights()
-            disc_weights = torch.cat(disc_weights, dim=-1)
-            disc_weight_decay = torch.sum(torch.square(disc_weights))
-            disc_loss += self._disc_weight_decay * disc_weight_decay
-
-        disc_agent_acc, disc_demo_acc = self._compute_disc_acc(disc_agent_logit, disc_demo_logit)
-
-        disc_info = {
-            'disc_loss': disc_loss,
-            'disc_grad_penalty': disc_grad_penalty.detach(),
-            'disc_logit_loss': disc_logit_loss.detach(),
-            'disc_agent_acc': disc_agent_acc.detach(),
-            'disc_demo_acc': disc_demo_acc.detach(),
-            'disc_agent_logit': disc_agent_logit.detach(),
-            'disc_demo_logit': disc_demo_logit.detach()
-        }
-        return disc_info
-
-    def _disc_loss_neg(self, disc_logits):
-        bce = torch.nn.BCEWithLogitsLoss()
-        loss = bce(disc_logits, torch.zeros_like(disc_logits))
-        return loss
-
-    def _disc_loss_pos(self, disc_logits):
-        bce = torch.nn.BCEWithLogitsLoss()
-        loss = bce(disc_logits, torch.ones_like(disc_logits))
-        return loss
-
-    def _compute_disc_acc(self, disc_agent_logit, disc_demo_logit):
-        agent_acc = disc_agent_logit < 0
-        agent_acc = torch.mean(agent_acc.float())
-        demo_acc = disc_demo_logit > 0
-        demo_acc = torch.mean(demo_acc.float())
-        return agent_acc, demo_acc
-
-    def _fetch_amp_obs_demo(self, num_samples):
-        amp_obs_demo = self.vec_env.env.fetch_amp_obs_demo(num_samples)
-        return amp_obs_demo
-
-    def _build_amp_buffers(self):
-        batch_shape = self.experience_buffer.obs_base_shape
-        self.experience_buffer.tensor_dict['amp_obs'] = torch.zeros(batch_shape + self._amp_observation_space.shape,
-                                                                    device=self.ppo_device)
-        self.experience_buffer.tensor_dict['rand_action_mask'] = torch.zeros(batch_shape, dtype=torch.float32,
-                                                                             device=self.ppo_device)
-
-        amp_obs_demo_buffer_size = int(self.config['amp_obs_demo_buffer_size'])
-        self._amp_obs_demo_buffer = replay_buffer.ReplayBuffer(amp_obs_demo_buffer_size, self.ppo_device)
-
-        self._amp_replay_keep_prob = self.config['amp_replay_keep_prob']
-        replay_buffer_size = int(self.config['amp_replay_buffer_size'])
-        self._amp_replay_buffer = replay_buffer.ReplayBuffer(replay_buffer_size, self.ppo_device)
-
-        self._build_rand_action_probs()
-
-        self.tensor_list += ['amp_obs', 'rand_action_mask']
-        return
-
-    def _init_amp_demo_buf(self):
-        buffer_size = self._amp_obs_demo_buffer.get_buffer_size()
-        num_batches = int(np.ceil(buffer_size / self._amp_batch_size))
-
-        for i in range(num_batches):
-            curr_samples = self._fetch_amp_obs_demo(self._amp_batch_size)
-            self._amp_obs_demo_buffer.store({'amp_obs': curr_samples})
-
-        return
-
-    def _update_amp_demos(self):
-        new_amp_obs_demo = self._fetch_amp_obs_demo(self._amp_batch_size)
-        self._amp_obs_demo_buffer.store({'amp_obs': new_amp_obs_demo})
-        return
-
-    def _preproc_amp_obs(self, amp_obs):
-        if self._normalize_amp_input:
-            amp_obs = self._amp_input_mean_std(amp_obs)
-        return amp_obs
-
-    def _combine_rewards(self, task_rewards, amp_rewards):
-        disc_r = amp_rewards['disc_rewards']
-
-        combined_rewards = self._task_reward_w * task_rewards + \
-                           + self._disc_reward_w * disc_r
-        return combined_rewards
-
-    def _eval_disc(self, amp_obs):
-        proc_amp_obs = self._preproc_amp_obs(amp_obs)
-        return self.model.a2c_network.eval_disc(proc_amp_obs)
-
-    def _calc_advs(self, batch_dict):
-        returns = batch_dict['returns']
-        values = batch_dict['values']
-        rand_action_mask = batch_dict['rand_action_mask']
-
-        advantages = returns - values
-        advantages = torch.sum(advantages, axis=1)
-        if self.normalize_advantage:
-            advantages = torch_ext.normalization_with_masks(advantages, rand_action_mask)
-
-        return advantages
-
-    def _calc_amp_rewards(self, amp_obs):
-        disc_r = self._calc_disc_rewards(amp_obs)
-        output = {
-            'disc_rewards': disc_r
-        }
-        return output
-
-    def _calc_disc_rewards(self, amp_obs):
-        with torch.no_grad():
-            disc_logits = self._eval_disc(amp_obs)
-            prob = 1 / (1 + torch.exp(-disc_logits))
-            disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.ppo_device)))
-            disc_r *= self._disc_reward_scale
-
-        return disc_r
-
-    def _store_replay_amp_obs(self, amp_obs):
-        buf_size = self._amp_replay_buffer.get_buffer_size()
-        buf_total_count = self._amp_replay_buffer.get_total_count()
-        if (buf_total_count > buf_size):
-            keep_probs = to_torch(np.array([self._amp_replay_keep_prob] * amp_obs.shape[0]), device=self.ppo_device)
-            keep_mask = torch.bernoulli(keep_probs) == 1.0
-            amp_obs = amp_obs[keep_mask]
-
-        if (amp_obs.shape[0] > buf_size):
-            rand_idx = torch.randperm(amp_obs.shape[0])
-            rand_idx = rand_idx[:buf_size]
-            amp_obs = amp_obs[rand_idx]
-
-        self._amp_replay_buffer.store({'amp_obs': amp_obs})
-        return
-
-    def _record_train_batch_info(self, batch_dict, train_info):
-        super()._record_train_batch_info(batch_dict, train_info)
-        train_info['disc_rewards'] = batch_dict['disc_rewards']
-        return
-
-    def _log_train_info(self, train_info, frame):
-        super()._log_train_info(train_info, frame)
-
-        self.writer.add_scalar('losses/disc_loss', torch_ext.mean_list(train_info['disc_loss']).item(), frame)
-
-        self.writer.add_scalar('info/disc_agent_acc', torch_ext.mean_list(train_info['disc_agent_acc']).item(), frame)
-        self.writer.add_scalar('info/disc_demo_acc', torch_ext.mean_list(train_info['disc_demo_acc']).item(), frame)
-        self.writer.add_scalar('info/disc_agent_logit', torch_ext.mean_list(train_info['disc_agent_logit']).item(),
-                               frame)
-        self.writer.add_scalar('info/disc_demo_logit', torch_ext.mean_list(train_info['disc_demo_logit']).item(), frame)
-        self.writer.add_scalar('info/disc_grad_penalty', torch_ext.mean_list(train_info['disc_grad_penalty']).item(),
-                               frame)
-        self.writer.add_scalar('info/disc_logit_loss', torch_ext.mean_list(train_info['disc_logit_loss']).item(), frame)
-
-        disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
-        self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
-        self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
-        return
-
-    def _amp_debug(self, info):
-        with torch.no_grad():
-            amp_obs = info['amp_obs']
-            amp_obs = amp_obs[0:1]
-            disc_pred = self._eval_disc(amp_obs)
-            amp_rewards = self._calc_amp_rewards(amp_obs)
-            disc_reward = amp_rewards['disc_rewards']
-
-            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
-            disc_reward = disc_reward.cpu().numpy()[0, 0]
-            print("disc_pred: ", disc_pred, disc_reward)
-        return
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py
deleted file mode 100644
index e960b2eb..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-from rl_games.common import datasets
-
-class AMPDataset(datasets.PPODataset):
-    def __init__(self, batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len):
-        super().__init__(batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len)
-        self._idx_buf = torch.randperm(batch_size)
-        return
-    
-    def update_mu_sigma(self, mu, sigma):	  
-        raise NotImplementedError()
-        return
-
-    def _get_item(self, idx):
-        start = idx * self.minibatch_size
-        end = (idx + 1) * self.minibatch_size
-        sample_idx = self._idx_buf[start:end]
-
-        input_dict = {}
-        for k,v in self.values_dict.items():
-            if k not in self.special_names and v is not None:
-                input_dict[k] = v[sample_idx]
-                
-        if (end >= self.batch_size):
-            self._shuffle_idx_buf()
-
-        return input_dict
-
-    def _shuffle_idx_buf(self):
-        self._idx_buf[:] = torch.randperm(self.batch_size)
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py
deleted file mode 100644
index 5ff1d008..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch.nn as nn
-from rl_games.algos_torch.models import ModelA2CContinuousLogStd
-
-class ModelAMPContinuous(ModelA2CContinuousLogStd):
-    def __init__(self, network):
-        super().__init__(network)
-        return
-
-    def build(self, config):
-        net = self.network_builder.build('amp', **config)
-        for name, _ in net.named_parameters():
-            print(name)
-        return ModelAMPContinuous.Network(net)
-
-    class Network(ModelA2CContinuousLogStd.Network):
-        def __init__(self, a2c_network):
-            super().__init__(a2c_network)
-            return
-
-        def forward(self, input_dict):
-            is_train = input_dict.get('is_train', True)
-            result = super().forward(input_dict)
-
-            if (is_train):
-                amp_obs = input_dict['amp_obs']
-                disc_agent_logit = self.a2c_network.eval_disc(amp_obs)
-                result["disc_agent_logit"] = disc_agent_logit
-
-                amp_obs_replay = input_dict['amp_obs_replay']
-                disc_agent_replay_logit = self.a2c_network.eval_disc(amp_obs_replay)
-                result["disc_agent_replay_logit"] = disc_agent_replay_logit
-
-                amp_demo_obs = input_dict['amp_obs_demo']
-                disc_demo_logit = self.a2c_network.eval_disc(amp_demo_obs)
-                result["disc_demo_logit"] = disc_demo_logit
-
-            return result
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py
deleted file mode 100644
index f3d5155f..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py
+++ /dev/null
@@ -1,154 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch import layers
-from rl_games.algos_torch import network_builder
-
-import torch
-import torch.nn as nn
-import numpy as np
-
-DISC_LOGIT_INIT_SCALE = 1.0
-
-class AMPBuilder(network_builder.A2CBuilder):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        return
-
-    class Network(network_builder.A2CBuilder.Network):
-        def __init__(self, params, **kwargs):
-            super().__init__(params, **kwargs)
-
-            if self.is_continuous:
-                if (not self.space_config['learn_sigma']):
-                    actions_num = kwargs.get('actions_num')
-                    sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
-                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
-                    sigma_init(self.sigma)
-                    
-            amp_input_shape = kwargs.get('amp_input_shape')
-            self._build_disc(amp_input_shape)
-
-            return
-
-        def load(self, params):
-            super().load(params)
-
-            self._disc_units = params['disc']['units']
-            self._disc_activation = params['disc']['activation']
-            self._disc_initializer = params['disc']['initializer']
-            return
-
-        def forward(self, obs_dict):
-            obs = obs_dict['obs']
-            states = obs_dict.get('rnn_states', None)
-
-            actor_outputs = self.eval_actor(obs)
-            value = self.eval_critic(obs)
-
-            output = actor_outputs + (value, states)
-
-            return output
-
-        def eval_actor(self, obs):
-            a_out = self.actor_cnn(obs)
-            a_out = a_out.contiguous().view(a_out.size(0), -1)
-            a_out = self.actor_mlp(a_out)
-                     
-            if self.is_discrete:
-                logits = self.logits(a_out)
-                return logits
-
-            if self.is_multi_discrete:
-                logits = [logit(a_out) for logit in self.logits]
-                return logits
-
-            if self.is_continuous:
-                mu = self.mu_act(self.mu(a_out))
-                if self.space_config['fixed_sigma']:
-                    sigma = mu * 0.0 + self.sigma_act(self.sigma)
-                else:
-                    sigma = self.sigma_act(self.sigma(a_out))
-
-                return mu, sigma
-            return
-
-        def eval_critic(self, obs):
-            c_out = self.critic_cnn(obs)
-            c_out = c_out.contiguous().view(c_out.size(0), -1)
-            c_out = self.critic_mlp(c_out)              
-            value = self.value_act(self.value(c_out))
-            return value
-
-        def eval_disc(self, amp_obs):
-            disc_mlp_out = self._disc_mlp(amp_obs)
-            disc_logits = self._disc_logits(disc_mlp_out)
-            return disc_logits
-
-        def get_disc_logit_weights(self):
-            return torch.flatten(self._disc_logits.weight)
-
-        def get_disc_weights(self):
-            weights = []
-            for m in self._disc_mlp.modules():
-                if isinstance(m, nn.Linear):
-                    weights.append(torch.flatten(m.weight))
-
-            weights.append(torch.flatten(self._disc_logits.weight))
-            return weights
-
-        def _build_disc(self, input_shape):
-            self._disc_mlp = nn.Sequential()
-
-            mlp_args = {
-                'input_size' : input_shape[0], 
-                'units' : self._disc_units, 
-                'activation' : self._disc_activation, 
-                'dense_func' : torch.nn.Linear
-            }
-            self._disc_mlp = self._build_mlp(**mlp_args)
-            
-            mlp_out_size = self._disc_units[-1]
-            self._disc_logits = torch.nn.Linear(mlp_out_size, 1)
-
-            mlp_init = self.init_factory.create(**self._disc_initializer)
-            for m in self._disc_mlp.modules():
-                if isinstance(m, nn.Linear):
-                    mlp_init(m.weight)
-                    if getattr(m, "bias", None) is not None:
-                        torch.nn.init.zeros_(m.bias) 
-
-            torch.nn.init.uniform_(self._disc_logits.weight, -DISC_LOGIT_INIT_SCALE, DISC_LOGIT_INIT_SCALE)
-            torch.nn.init.zeros_(self._disc_logits.bias) 
-
-            return
-
-    def build(self, name, **kwargs):
-        net = AMPBuilder.Network(self.params, **kwargs)
-        return net
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py
deleted file mode 100644
index 7c6e2749..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py
+++ /dev/null
@@ -1,111 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch 
-
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-
-import common_player
-
-class AMPPlayerContinuous(common_player.CommonPlayer):
-    def __init__(self, config):
-        self._normalize_amp_input = config.get('normalize_amp_input', True)
-        self._disc_reward_scale = config['disc_reward_scale']
-        
-        super().__init__(config)
-        return
-
-    def restore(self, fn):
-        if (fn != 'Base'):
-            super().restore(fn)
-            if self._normalize_amp_input:
-                checkpoint = torch_ext.load_checkpoint(fn)
-                self._amp_input_mean_std.load_state_dict(checkpoint['amp_input_mean_std'])
-        return
-    
-    def _build_net(self, config):
-        super()._build_net(config)
-        
-        if self._normalize_amp_input:
-            self._amp_input_mean_std = RunningMeanStd(config['amp_input_shape']).to(self.device)
-            self._amp_input_mean_std.eval()  
-        
-        return
-
-    def _post_step(self, info):
-        super()._post_step(info)
-        if (self.env.task.viewer):
-            self._amp_debug(info)
-        return
-
-    def _build_net_config(self):
-        config = super()._build_net_config()
-        if (hasattr(self, 'env')):
-            config['amp_input_shape'] = self.env.amp_observation_space.shape
-        else:
-            config['amp_input_shape'] = self.env_info['amp_observation_space']
-        return config
-
-    def _amp_debug(self, info):
-        with torch.no_grad():
-            amp_obs = info['amp_obs']
-            amp_obs = amp_obs[0:1]
-            disc_pred = self._eval_disc(amp_obs)
-            amp_rewards = self._calc_amp_rewards(amp_obs)
-            disc_reward = amp_rewards['disc_rewards']
-
-            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
-            disc_reward = disc_reward.cpu().numpy()[0, 0]
-            print("disc_pred: ", disc_pred, disc_reward)
-
-        return
-
-    def _preproc_amp_obs(self, amp_obs):
-        if self._normalize_amp_input:
-            amp_obs = self._amp_input_mean_std(amp_obs)
-        return amp_obs
-
-    def _eval_disc(self, amp_obs):
-        proc_amp_obs = self._preproc_amp_obs(amp_obs)
-        return self.model.a2c_network.eval_disc(proc_amp_obs)
-
-    def _calc_amp_rewards(self, amp_obs):
-        disc_r = self._calc_disc_rewards(amp_obs)
-        output = {
-            'disc_rewards': disc_r
-        }
-        return output
-
-    def _calc_disc_rewards(self, amp_obs):
-        with torch.no_grad():
-            disc_logits = self._eval_disc(amp_obs)
-            prob = 1 / (1 + torch.exp(-disc_logits)) 
-            disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.device)))
-            disc_r *= self._disc_reward_scale
-        return disc_r
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py
deleted file mode 100644
index d605af32..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py
+++ /dev/null
@@ -1,567 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import amp_agent
-
-import torch
-from isaacgym.torch_utils import *
-from rl_games.algos_torch import torch_ext
-from rl_games.common import a2c_common
-# from rl_games.algos_torch.running_mean_std import RunningMeanStd
-#
-# from utils import torch_utils
-# from learning import ase_network_builder
-
-class ASEAgent(amp_agent.AMPAgent):
-    def __init__(self, base_name, config):
-        super().__init__(base_name, config)
-        return
-
-    def init_tensors(self):
-        super().init_tensors()
-        
-        batch_shape = self.experience_buffer.obs_base_shape
-        self.experience_buffer.tensor_dict['ase_latents'] = torch.zeros(batch_shape + (self._latent_dim,),
-                                                                dtype=torch.float32, device=self.ppo_device)
-        
-        self._ase_latents = torch.zeros((batch_shape[-1], self._latent_dim), dtype=torch.float32,
-                                         device=self.ppo_device)
-        
-        self.tensor_list += ['ase_latents']
-
-        self._latent_reset_steps = torch.zeros(batch_shape[-1], dtype=torch.int32, device=self.ppo_device)
-        num_envs = self.vec_env.env.task.num_envs
-        env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)
-        self._reset_latent_step_count(env_ids)
-
-        return
-    
-    def play_steps(self):
-        self.set_eval()
-        
-        epinfos = []
-        done_indices = []
-        update_list = self.update_list
-
-        for n in range(self.horizon_length):
-            self.obs = self.env_reset(done_indices)
-            self.experience_buffer.update_data('obses', n, self.obs['obs'])
-
-            self._update_latents()
-
-            if self.use_action_masks:
-                masks = self.vec_env.get_action_masks()
-                res_dict = self.get_masked_action_values(self.obs, self._ase_latents, masks)
-            else:
-                res_dict = self.get_action_values(self.obs, self._ase_latents, self._rand_action_probs)
-
-            for k in update_list:
-                self.experience_buffer.update_data(k, n, res_dict[k]) 
-
-            if self.has_central_value:
-                self.experience_buffer.update_data('states', n, self.obs['states'])
-
-            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            shaped_rewards = self.rewards_shaper(rewards)
-            self.experience_buffer.update_data('rewards', n, shaped_rewards)
-            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
-            self.experience_buffer.update_data('dones', n, self.dones)
-            self.experience_buffer.update_data('amp_obs', n, infos['amp_obs'])
-            self.experience_buffer.update_data('ase_latents', n, self._ase_latents)
-            self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask'])
-
-            terminated = infos['terminate'].float()
-            terminated = terminated.unsqueeze(-1)
-            next_vals = self._eval_critic(self.obs, self._ase_latents)
-            next_vals *= (1.0 - terminated)
-            self.experience_buffer.update_data('next_values', n, next_vals)
-
-            self.current_rewards += rewards
-            self.current_lengths += 1
-            all_done_indices = self.dones.nonzero(as_tuple=False)
-            done_indices = all_done_indices[::self.num_agents]
-
-            self.game_rewards.update(self.current_rewards[done_indices])
-            self.game_lengths.update(self.current_lengths[done_indices])
-            self.algo_observer.process_infos(infos, done_indices)
-
-            not_dones = 1.0 - self.dones.float()
-
-            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
-            self.current_lengths = self.current_lengths * not_dones
-        
-            if (self.vec_env.env.task.viewer):
-                self._amp_debug(infos, self._ase_latents)
-
-            done_indices = done_indices[:, 0]
-
-        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
-        mb_values = self.experience_buffer.tensor_dict['values']
-        mb_next_values = self.experience_buffer.tensor_dict['next_values']
-        
-        mb_rewards = self.experience_buffer.tensor_dict['rewards']
-        mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs']
-        mb_ase_latents = self.experience_buffer.tensor_dict['ase_latents']
-        amp_rewards = self._calc_amp_rewards(mb_amp_obs, mb_ase_latents)
-        mb_rewards = self._combine_rewards(mb_rewards, amp_rewards)
-        
-        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
-        mb_returns = mb_advs + mb_values
-
-        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
-        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
-        batch_dict['played_frames'] = self.batch_size
-
-        for k, v in amp_rewards.items():
-            batch_dict[k] = a2c_common.swap_and_flatten01(v)
-
-        return batch_dict
-
-    def get_action_values(self, obs_dict, ase_latents, rand_action_probs):
-        processed_obs = self._preproc_obs(obs_dict['obs'])
-
-        self.model.eval()
-        input_dict = {
-            'is_train': False,
-            'prev_actions': None, 
-            'obs' : processed_obs,
-            'rnn_states' : self.rnn_states,
-            'ase_latents': ase_latents
-        }
-
-        with torch.no_grad():
-            res_dict = self.model(input_dict)
-            if self.has_central_value:
-                states = obs_dict['states']
-                input_dict = {
-                    'is_train': False,
-                    'states' : states,
-                }
-                value = self.get_central_value(input_dict)
-                res_dict['values'] = value
-
-        if self.normalize_value:
-            res_dict['values'] = self.value_mean_std(res_dict['values'], True)
-        
-        rand_action_mask = torch.bernoulli(rand_action_probs)
-        det_action_mask = rand_action_mask == 0.0
-        res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask]
-        res_dict['rand_action_mask'] = rand_action_mask
-
-        return res_dict
-
-    def prepare_dataset(self, batch_dict):
-        super().prepare_dataset(batch_dict)
-        
-        ase_latents = batch_dict['ase_latents']
-        self.dataset.values_dict['ase_latents'] = ase_latents
-        
-        return
-
-    
-    def calc_gradients(self, input_dict):
-        self.set_train()
-
-        value_preds_batch = input_dict['old_values']
-        old_action_log_probs_batch = input_dict['old_logp_actions']
-        advantage = input_dict['advantages']
-        old_mu_batch = input_dict['mu']
-        old_sigma_batch = input_dict['sigma']
-        return_batch = input_dict['returns']
-        actions_batch = input_dict['actions']
-        obs_batch = input_dict['obs']
-        obs_batch = self._preproc_obs(obs_batch)
-
-        amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size]
-        amp_obs = self._preproc_amp_obs(amp_obs)
-        if (self._enable_enc_grad_penalty()):
-            amp_obs.requires_grad_(True)
-
-        amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size]
-        amp_obs_replay = self._preproc_amp_obs(amp_obs_replay)
-
-        amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size]
-        amp_obs_demo = self._preproc_amp_obs(amp_obs_demo)
-        amp_obs_demo.requires_grad_(True)
-
-        ase_latents = input_dict['ase_latents']
-        
-        rand_action_mask = input_dict['rand_action_mask']
-        rand_action_sum = torch.sum(rand_action_mask)
-
-        lr = self.last_lr
-        kl = 1.0
-        lr_mul = 1.0
-        curr_e_clip = lr_mul * self.e_clip
-
-        batch_dict = {
-            'is_train': True,
-            'prev_actions': actions_batch, 
-            'obs' : obs_batch,
-            'amp_obs' : amp_obs,
-            'amp_obs_replay' : amp_obs_replay,
-            'amp_obs_demo' : amp_obs_demo,
-            'ase_latents': ase_latents
-        }
-
-        rnn_masks = None
-        if self.is_rnn:
-            rnn_masks = input_dict['rnn_masks']
-            batch_dict['rnn_states'] = input_dict['rnn_states']
-            batch_dict['seq_length'] = self.seq_len
-            
-        rnn_masks = None
-        if self.is_rnn:
-            rnn_masks = input_dict['rnn_masks']
-            batch_dict['rnn_states'] = input_dict['rnn_states']
-            batch_dict['seq_length'] = self.seq_len
-
-        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
-            res_dict = self.model(batch_dict)
-            action_log_probs = res_dict['prev_neglogp']
-            values = res_dict['values']
-            entropy = res_dict['entropy']
-            mu = res_dict['mus']
-            sigma = res_dict['sigmas']
-            disc_agent_logit = res_dict['disc_agent_logit']
-            disc_agent_replay_logit = res_dict['disc_agent_replay_logit']
-            disc_demo_logit = res_dict['disc_demo_logit']
-            enc_pred = res_dict['enc_pred']
-
-            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
-            a_loss = a_info['actor_loss']
-            a_clipped = a_info['actor_clipped'].float()
-
-            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
-            c_loss = c_info['critic_loss']
-
-            b_loss = self.bound_loss(mu)
-
-            c_loss = torch.mean(c_loss)
-            a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum
-            entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum
-            b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum
-            a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum
-            
-            disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0)
-            disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo)
-            disc_loss = disc_info['disc_loss']
-            
-            enc_latents = batch_dict['ase_latents'][0:self._amp_minibatch_size]
-            enc_loss_mask = rand_action_mask[0:self._amp_minibatch_size]
-            enc_info = self._enc_loss(enc_pred, enc_latents, batch_dict['amp_obs'], enc_loss_mask)
-            enc_loss = enc_info['enc_loss']
-
-            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \
-                 + self._disc_coef * disc_loss + self._enc_coef * enc_loss
-            
-            if (self._enable_amp_diversity_bonus()):
-                diversity_loss = self._diversity_loss(batch_dict['obs'], mu, batch_dict['ase_latents'])
-                diversity_loss = torch.sum(rand_action_mask * diversity_loss) / rand_action_sum
-                loss += self._amp_diversity_bonus * diversity_loss
-                a_info['amp_diversity_loss'] = diversity_loss
-                
-            a_info['actor_loss'] = a_loss
-            a_info['actor_clip_frac'] = a_clip_frac
-            c_info['critic_loss'] = c_loss
-
-            if self.multi_gpu:
-                self.optimizer.zero_grad()
-            else:
-                for param in self.model.parameters():
-                    param.grad = None
-
-        self.scaler.scale(loss).backward()
-        #TODO: Refactor this ugliest code of the year
-        if self.truncate_grads:
-            if self.multi_gpu:
-                self.optimizer.synchronize()
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
-                with self.optimizer.skip_synchronize():
-                    self.scaler.step(self.optimizer)
-                    self.scaler.update()
-            else:
-                self.scaler.unscale_(self.optimizer)
-                nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm)
-                self.scaler.step(self.optimizer)
-                self.scaler.update()    
-        else:
-            self.scaler.step(self.optimizer)
-            self.scaler.update()
-
-        with torch.no_grad():
-            reduce_kl = not self.is_rnn
-            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
-            if self.is_rnn:
-                kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel()  #/ sum_mask
-        
-        self.train_result = {
-            'entropy': entropy,
-            'kl': kl_dist,
-            'last_lr': self.last_lr, 
-            'lr_mul': lr_mul, 
-            'b_loss': b_loss
-        }
-        self.train_result.update(a_info)
-        self.train_result.update(c_info)
-        self.train_result.update(disc_info)
-        self.train_result.update(enc_info)
-
-        return
-    
-    def env_reset(self, env_ids=None):
-        obs = super().env_reset(env_ids)
-        
-        if (env_ids is None):
-            num_envs = self.vec_env.env.task.num_envs
-            env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device)
-
-        if (len(env_ids) > 0):
-            self._reset_latents(env_ids)
-            self._reset_latent_step_count(env_ids)
-
-        return obs
-
-    def _reset_latent_step_count(self, env_ids):
-        self._latent_reset_steps[env_ids] = torch.randint_like(self._latent_reset_steps[env_ids], low=self._latent_steps_min, 
-                                                         high=self._latent_steps_max)
-        return
-
-    def _load_config_params(self, config):
-        super()._load_config_params(config)
-        
-        self._latent_dim = config['latent_dim']
-        self._latent_steps_min = config.get('latent_steps_min', np.inf)
-        self._latent_steps_max = config.get('latent_steps_max', np.inf)
-        self._latent_dim = config['latent_dim']
-        self._amp_diversity_bonus = config['amp_diversity_bonus']
-        self._amp_diversity_tar = config['amp_diversity_tar']
-        
-        self._enc_coef = config['enc_coef']
-        self._enc_weight_decay = config['enc_weight_decay']
-        self._enc_reward_scale = config['enc_reward_scale']
-        self._enc_grad_penalty = config['enc_grad_penalty']
-
-        self._enc_reward_w = config['enc_reward_w']
-
-        return
-    
-    def _build_net_config(self):
-        config = super()._build_net_config()
-        config['ase_latent_shape'] = (self._latent_dim,)
-        return config
-
-    def _reset_latents(self, env_ids):
-        n = len(env_ids)
-        z = self._sample_latents(n)
-        self._ase_latents[env_ids] = z
-
-        if (self.vec_env.env.task.viewer):
-            self._change_char_color(env_ids)
-
-        return
-
-    def _sample_latents(self, n):
-        z = self.model.a2c_network.sample_latents(n)
-        return z
-
-    def _update_latents(self):
-        new_latent_envs = self._latent_reset_steps <= self.vec_env.env.task.progress_buf
-
-        need_update = torch.any(new_latent_envs)
-        if (need_update):
-            new_latent_env_ids = new_latent_envs.nonzero(as_tuple=False).flatten()
-            self._reset_latents(new_latent_env_ids)
-            self._latent_reset_steps[new_latent_env_ids] += torch.randint_like(self._latent_reset_steps[new_latent_env_ids],
-                                                                               low=self._latent_steps_min, 
-                                                                               high=self._latent_steps_max)
-            if (self.vec_env.env.task.viewer):
-                self._change_char_color(new_latent_env_ids)
-
-        return
-
-    def _eval_actor(self, obs, ase_latents):
-        output = self.model.a2c_network.eval_actor(obs=obs, ase_latents=ase_latents)
-        return output
-
-    def _eval_critic(self, obs_dict, ase_latents):
-        self.model.eval()
-        obs = obs_dict['obs']
-        processed_obs = self._preproc_obs(obs)
-        value = self.model.a2c_network.eval_critic(processed_obs, ase_latents)
-
-        if self.normalize_value:
-            value = self.value_mean_std(value, True)
-        return value
-
-    def _calc_amp_rewards(self, amp_obs, ase_latents):
-        disc_r = self._calc_disc_rewards(amp_obs)
-        enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
-        output = {
-            'disc_rewards': disc_r,
-            'enc_rewards': enc_r
-        }
-        return output
-
-    def _calc_enc_rewards(self, amp_obs, ase_latents):
-        with torch.no_grad():
-            enc_pred = self._eval_enc(amp_obs)
-            err = self._calc_enc_error(enc_pred, ase_latents)
-            enc_r = torch.clamp_min(-err, 0.0)
-            enc_r *= self._enc_reward_scale
-
-        return enc_r
-
-    def _enc_loss(self, enc_pred, ase_latent, enc_obs, loss_mask):
-        enc_err = self._calc_enc_error(enc_pred, ase_latent)
-        #mask_sum = torch.sum(loss_mask)
-        #enc_err = enc_err.squeeze(-1)
-        #enc_loss = torch.sum(loss_mask * enc_err) / mask_sum
-        enc_loss = torch.mean(enc_err)
-
-        # weight decay
-        if (self._enc_weight_decay != 0):
-            enc_weights = self.model.a2c_network.get_enc_weights()
-            enc_weights = torch.cat(enc_weights, dim=-1)
-            enc_weight_decay = torch.sum(torch.square(enc_weights))
-            enc_loss += self._enc_weight_decay * enc_weight_decay
-            
-        enc_info = {
-            'enc_loss': enc_loss
-        }
-
-        if (self._enable_enc_grad_penalty()):
-            enc_obs_grad = torch.autograd.grad(enc_err, enc_obs, grad_outputs=torch.ones_like(enc_err),
-                                               create_graph=True, retain_graph=True, only_inputs=True)
-            enc_obs_grad = enc_obs_grad[0]
-            enc_obs_grad = torch.sum(torch.square(enc_obs_grad), dim=-1)
-            #enc_grad_penalty = torch.sum(loss_mask * enc_obs_grad) / mask_sum
-            enc_grad_penalty = torch.mean(enc_obs_grad)
-
-            enc_loss += self._enc_grad_penalty * enc_grad_penalty
-
-            enc_info['enc_grad_penalty'] = enc_grad_penalty.detach()
-
-        return enc_info
-
-    def _diversity_loss(self, obs, action_params, ase_latents):
-        assert(self.model.a2c_network.is_continuous)
-
-        n = obs.shape[0]
-        assert(n == action_params.shape[0])
-
-        new_z = self._sample_latents(n)
-        mu, sigma = self._eval_actor(obs=obs, ase_latents=new_z)
-
-        clipped_action_params = torch.clamp(action_params, -1.0, 1.0)
-        clipped_mu = torch.clamp(mu, -1.0, 1.0)
-
-        a_diff = clipped_action_params - clipped_mu
-        a_diff = torch.mean(torch.square(a_diff), dim=-1)
-
-        z_diff = new_z * ase_latents
-        z_diff = torch.sum(z_diff, dim=-1)
-        z_diff = 0.5 - 0.5 * z_diff
-
-        diversity_bonus = a_diff / (z_diff + 1e-5)
-        diversity_loss = torch.square(self._amp_diversity_tar - diversity_bonus)
-
-        return diversity_loss
-
-    def _calc_enc_error(self, enc_pred, ase_latent):
-        err = enc_pred * ase_latent
-        err = -torch.sum(err, dim=-1, keepdim=True)
-        return err
-
-    def _enable_enc_grad_penalty(self):
-        return self._enc_grad_penalty != 0
-
-    def _enable_amp_diversity_bonus(self):
-        return self._amp_diversity_bonus != 0
-
-    def _eval_enc(self, amp_obs):
-        proc_amp_obs = self._preproc_amp_obs(amp_obs)
-        return self.model.a2c_network.eval_enc(proc_amp_obs)
-
-    def _combine_rewards(self, task_rewards, amp_rewards):
-        disc_r = amp_rewards['disc_rewards']
-        enc_r = amp_rewards['enc_rewards']
-        combined_rewards = self._task_reward_w * task_rewards \
-                         + self._disc_reward_w * disc_r \
-                         + self._enc_reward_w * enc_r
-        return combined_rewards
-
-    def _record_train_batch_info(self, batch_dict, train_info):
-        super()._record_train_batch_info(batch_dict, train_info)
-        train_info['enc_rewards'] = batch_dict['enc_rewards']
-        return
-
-    def _log_train_info(self, train_info, frame):
-        super()._log_train_info(train_info, frame)
-        
-        self.writer.add_scalar('losses/enc_loss', torch_ext.mean_list(train_info['enc_loss']).item(), frame)
-         
-        if (self._enable_amp_diversity_bonus()):
-            self.writer.add_scalar('losses/amp_diversity_loss', torch_ext.mean_list(train_info['amp_diversity_loss']).item(), frame)
-        
-        enc_reward_std, enc_reward_mean = torch.std_mean(train_info['enc_rewards'])
-        self.writer.add_scalar('info/enc_reward_mean', enc_reward_mean.item(), frame)
-        self.writer.add_scalar('info/enc_reward_std', enc_reward_std.item(), frame)
-
-        if (self._enable_enc_grad_penalty()):
-            self.writer.add_scalar('info/enc_grad_penalty', torch_ext.mean_list(train_info['enc_grad_penalty']).item(), frame)
-
-        return
-
-    def _change_char_color(self, env_ids):
-        base_col = np.array([0.4, 0.4, 0.4])
-        range_col = np.array([0.0706, 0.149, 0.2863])
-        range_sum = np.linalg.norm(range_col)
-
-        rand_col = np.random.uniform(0.0, 1.0, size=3)
-        rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
-        rand_col += base_col
-        self.vec_env.env.task.set_char_color(rand_col, env_ids)
-        return
-
-    def _amp_debug(self, info, ase_latents):
-        with torch.no_grad():
-            amp_obs = info['amp_obs']
-            amp_obs = amp_obs
-            ase_latents = ase_latents
-            disc_pred = self._eval_disc(amp_obs)
-            amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
-            disc_reward = amp_rewards['disc_rewards']
-            enc_reward = amp_rewards['enc_rewards']
-
-            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
-            disc_reward = disc_reward.cpu().numpy()[0, 0]
-            enc_reward = enc_reward.cpu().numpy()[0, 0]
-            print("disc_pred: ", disc_pred, disc_reward, enc_reward)
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml
deleted file mode 100644
index d58ece91..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml
+++ /dev/null
@@ -1,114 +0,0 @@
-params:
-  seed: 42
-
-  algo:
-    name: ase
-
-  model:
-    name: ase
-
-  network:
-    name: ase
-    separate: True
-
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: -2.9
-        fixed_sigma: True
-        learn_sigma: False
-
-    mlp:
-      units: [1024, 1024, 512]
-      activation: relu
-      d2rl: False
-
-      initializer:
-        name: default
-      regularizer:
-        name: None
-
-    disc:
-      units: [1024, 1024, 512]
-      activation: relu
-
-      initializer:
-        name: default
-
-    enc:
-      units: [1024, 512]
-      activation: relu
-      separate: False
-
-      initializer:
-        name: default
-
-  load_checkpoint: False
-
-  config:
-    name: Humanoid
-    env_name: rlgpu
-    multi_gpu: False
-    ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    reward_shaper:
-      scale_value: 1
-    normalize_advantage: True
-    gamma: 0.99
-    tau: 0.95
-    learning_rate: 2e-5
-    lr_schedule: constant
-    score_to_win: 20000
-    max_epochs: 100000
-    save_best_after: 50
-    save_frequency: 50
-    print_stats: True
-    grad_norm: 1.0
-    entropy_coef: 0.0
-    truncate_grads: False
-    ppo: True
-    e_clip: 0.2
-    horizon_length: 32
-    minibatch_size: 1
-    mini_epochs: 6
-    critic_coef: 5
-    clip_value: False
-    seq_len: 4
-    bounds_loss_coef: 10
-    amp_obs_demo_buffer_size: 200000
-    amp_replay_buffer_size: 200000
-    amp_replay_keep_prob: 0.01
-    amp_batch_size: 32
-    amp_minibatch_size: 1
-    disc_coef: 5
-    disc_logit_reg: 0.01
-    disc_grad_penalty: 5
-    disc_reward_scale: 2
-    disc_weight_decay: 0.0001
-    normalize_amp_input: True
-    enable_eps_greedy: False
-
-    latent_dim: 64
-    latent_steps_min: 1
-    latent_steps_max: 150
-    
-    amp_latent_grad_bonus: 0.00
-    amp_latent_grad_bonus_max: 100.0
-    amp_diversity_bonus: 0.01
-    amp_diversity_tar: 1.0
-    
-    enc_coef: 5
-    enc_weight_decay: 0.0000
-    enc_reward_scale: 1
-    enc_grad_penalty: 0
-
-    task_reward_w: 0.0
-    disc_reward_w: 0.5
-    enc_reward_w: 0.5
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py
deleted file mode 100644
index db71a606..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import amp_models
-
-class ModelASEContinuous(amp_models.ModelAMPContinuous):
-    def __init__(self, network):
-        super().__init__(network)
-        return
-
-    def build(self, config):
-        net = self.network_builder.build('ase', **config)
-        for name, _ in net.named_parameters():
-            print(name)
-        return ModelASEContinuous.Network(net)
-
-    class Network(amp_models.ModelAMPContinuous.Network):
-        def __init__(self, a2c_network):
-            super().__init__(a2c_network)
-            return
-
-        def forward(self, input_dict):
-            is_train = input_dict.get('is_train', True)
-            result = super().forward(input_dict)
-
-            if (is_train):
-                amp_obs = input_dict['amp_obs']
-                enc_pred = self.a2c_network.eval_enc(amp_obs)
-                result["enc_pred"] = enc_pred
-
-            return result
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py
deleted file mode 100644
index c61fae76..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py
+++ /dev/null
@@ -1,379 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch import layers
-from rl_games.algos_torch import network_builder
-
-import torch
-import torch.nn as nn
-import numpy as np
-import enum
-
-import amp_network_builder
-
-ENC_LOGIT_INIT_SCALE = 0.1
-
-class LatentType(enum.Enum):
-    uniform = 0
-    sphere = 1
-
-class ASEBuilder(amp_network_builder.AMPBuilder):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        return
-
-    class Network(amp_network_builder.AMPBuilder.Network):
-        def __init__(self, params, **kwargs):
-            actions_num = kwargs.get('actions_num')
-            input_shape = kwargs.get('input_shape')
-            self.value_size = kwargs.get('value_size', 1)
-            self.num_seqs = num_seqs = kwargs.get('num_seqs', 1)
-            amp_input_shape = kwargs.get('amp_input_shape')
-            self._ase_latent_shape = kwargs.get('ase_latent_shape')
-
-            network_builder.NetworkBuilder.BaseNetwork.__init__(self)
-            
-            self.load(params)
-
-            actor_out_size, critic_out_size = self._build_actor_critic_net(input_shape, self._ase_latent_shape)
-
-            self.value = torch.nn.Linear(critic_out_size, self.value_size)
-            self.value_act = self.activations_factory.create(self.value_activation)
-            
-            if self.is_discrete:
-                self.logits = torch.nn.Linear(actor_out_size, actions_num)
-            '''
-                for multidiscrete actions num is a tuple
-            '''
-            if self.is_multi_discrete:
-                self.logits = torch.nn.ModuleList([torch.nn.Linear(actor_out_size, num) for num in actions_num])
-            if self.is_continuous:
-                self.mu = torch.nn.Linear(actor_out_size, actions_num)
-                self.mu_act = self.activations_factory.create(self.space_config['mu_activation']) 
-                mu_init = self.init_factory.create(**self.space_config['mu_init'])
-                self.sigma_act = self.activations_factory.create(self.space_config['sigma_activation']) 
-
-                sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
-
-                if (not self.space_config['learn_sigma']):
-                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
-                elif self.space_config['fixed_sigma']:
-                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True)
-                else:
-                    self.sigma = torch.nn.Linear(actor_out_size, actions_num)
-
-            mlp_init = self.init_factory.create(**self.initializer)
-            if self.has_cnn:
-                cnn_init = self.init_factory.create(**self.cnn['initializer'])
-
-            for m in self.modules():         
-                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
-                    cnn_init(m.weight)
-                    if getattr(m, "bias", None) is not None:
-                        torch.nn.init.zeros_(m.bias)
-                if isinstance(m, nn.Linear):
-                    mlp_init(m.weight)
-                    if getattr(m, "bias", None) is not None:
-                        torch.nn.init.zeros_(m.bias)    
-
-            self.actor_mlp.init_params()
-            self.critic_mlp.init_params()
-
-            if self.is_continuous:
-                mu_init(self.mu.weight)
-                if self.space_config['fixed_sigma']:
-                    sigma_init(self.sigma)
-                else:
-                    sigma_init(self.sigma.weight)
-
-            self._build_disc(amp_input_shape)
-            self._build_enc(amp_input_shape)
-
-            return
-        
-        def load(self, params):
-            super().load(params)
-
-            self._enc_units = params['enc']['units']
-            self._enc_activation = params['enc']['activation']
-            self._enc_initializer = params['enc']['initializer']
-            self._enc_separate = params['enc']['separate']
-
-            return
-
-        def forward(self, obs_dict):
-            obs = obs_dict['obs']
-            ase_latents = obs_dict['ase_latents']
-            states = obs_dict.get('rnn_states', None)
-            use_hidden_latents = obs_dict.get('use_hidden_latents', False)
-
-            actor_outputs = self.eval_actor(obs, ase_latents, use_hidden_latents)
-            value = self.eval_critic(obs, ase_latents, use_hidden_latents)
-
-            output = actor_outputs + (value, states)
-
-            return output
-
-        def eval_actor(self, obs, ase_latents, use_hidden_latents=False):
-            a_out = self.actor_cnn(obs)
-            a_out = a_out.contiguous().view(a_out.size(0), -1)
-            a_out = self.actor_mlp(a_out, ase_latents, use_hidden_latents)
-                     
-            if self.is_discrete:
-                logits = self.logits(a_out)
-                return logits
-
-            if self.is_multi_discrete:
-                logits = [logit(a_out) for logit in self.logits]
-                return logits
-
-            if self.is_continuous:
-                mu = self.mu_act(self.mu(a_out))
-                if self.space_config['fixed_sigma']:
-                    sigma = mu * 0.0 + self.sigma_act(self.sigma)
-                else:
-                    sigma = self.sigma_act(self.sigma(a_out))
-
-                return mu, sigma
-            return
-
-        def eval_critic(self, obs, ase_latents, use_hidden_latents=False):
-            c_out = self.critic_cnn(obs)
-            c_out = c_out.contiguous().view(c_out.size(0), -1)
-
-            c_out = self.critic_mlp(c_out, ase_latents, use_hidden_latents)
-            value = self.value_act(self.value(c_out))
-            return value
-
-        def get_enc_weights(self):
-            weights = []
-            for m in self._enc_mlp.modules():
-                if isinstance(m, nn.Linear):
-                    weights.append(torch.flatten(m.weight))
-
-            weights.append(torch.flatten(self._enc.weight))
-            return weights
-
-        def _build_actor_critic_net(self, input_shape, ase_latent_shape):
-            style_units = [512, 256]
-            style_dim = ase_latent_shape[-1]
-
-            self.actor_cnn = nn.Sequential()
-            self.critic_cnn = nn.Sequential()
-            
-            act_fn = self.activations_factory.create(self.activation)
-            initializer = self.init_factory.create(**self.initializer)
-
-            self.actor_mlp = AMPStyleCatNet1(obs_size=input_shape[-1],
-                                             ase_latent_size=ase_latent_shape[-1],
-                                             units=self.units,
-                                             activation=act_fn,
-                                             style_units=style_units,
-                                             style_dim=style_dim,
-                                             initializer=initializer)
-
-            if self.separate:
-                self.critic_mlp = AMPMLPNet(obs_size=input_shape[-1],
-                                            ase_latent_size=ase_latent_shape[-1],
-                                            units=self.units,
-                                            activation=act_fn,
-                                            initializer=initializer)
-
-            actor_out_size = self.actor_mlp.get_out_size()
-            critic_out_size = self.critic_mlp.get_out_size()
-
-            return actor_out_size, critic_out_size
-
-        def _build_enc(self, input_shape):
-            if (self._enc_separate):
-                self._enc_mlp = nn.Sequential()
-                mlp_args = {
-                    'input_size' : input_shape[0], 
-                    'units' : self._enc_units, 
-                    'activation' : self._enc_activation, 
-                    'dense_func' : torch.nn.Linear
-                }
-                self._enc_mlp = self._build_mlp(**mlp_args)
-
-                mlp_init = self.init_factory.create(**self._enc_initializer)
-                for m in self._enc_mlp.modules():
-                    if isinstance(m, nn.Linear):
-                        mlp_init(m.weight)
-                        if getattr(m, "bias", None) is not None:
-                            torch.nn.init.zeros_(m.bias)
-            else:
-                self._enc_mlp = self._disc_mlp
-
-            mlp_out_layer = list(self._enc_mlp.modules())[-2]
-            mlp_out_size = mlp_out_layer.out_features
-            self._enc = torch.nn.Linear(mlp_out_size, self._ase_latent_shape[-1])
-            
-            torch.nn.init.uniform_(self._enc.weight, -ENC_LOGIT_INIT_SCALE, ENC_LOGIT_INIT_SCALE)
-            torch.nn.init.zeros_(self._enc.bias) 
-            
-            return
-
-        def eval_enc(self, amp_obs):
-            enc_mlp_out = self._enc_mlp(amp_obs)
-            enc_output = self._enc(enc_mlp_out)
-            enc_output = torch.nn.functional.normalize(enc_output, dim=-1)
-
-            return enc_output
-
-        def sample_latents(self, n):
-            device = next(self._enc.parameters()).device
-            z = torch.normal(torch.zeros([n, self._ase_latent_shape[-1]], device=device))
-            z = torch.nn.functional.normalize(z, dim=-1)
-            return z
-
-    def build(self, name, **kwargs):
-        net = ASEBuilder.Network(self.params, **kwargs)
-        return net
-
-
-class AMPMLPNet(torch.nn.Module):
-    def __init__(self, obs_size, ase_latent_size, units, activation, initializer):
-        super().__init__()
-
-        input_size = obs_size + ase_latent_size
-        print('build amp mlp net:', input_size)
-        
-        self._units = units
-        self._initializer = initializer
-        self._mlp = []
-
-        in_size = input_size
-        for i in range(len(units)):
-            unit = units[i]
-            curr_dense = torch.nn.Linear(in_size, unit)
-            self._mlp.append(curr_dense)
-            self._mlp.append(activation)
-            in_size = unit
-
-        self._mlp = nn.Sequential(*self._mlp)
-        self.init_params()
-        return
-
-    def forward(self, obs, latent, skip_style):
-        inputs = [obs, latent]
-        input = torch.cat(inputs, dim=-1)
-        output = self._mlp(input)
-        return output
-
-    def init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Linear):
-                self._initializer(m.weight)
-                if getattr(m, "bias", None) is not None:
-                    torch.nn.init.zeros_(m.bias)
-        return
-
-    def get_out_size(self):
-        out_size = self._units[-1]
-        return out_size
-
-class AMPStyleCatNet1(torch.nn.Module):
-    def __init__(self, obs_size, ase_latent_size, units, activation,
-                 style_units, style_dim, initializer):
-        super().__init__()
-
-        print('build amp style cat net:', obs_size, ase_latent_size)
-            
-        self._activation = activation
-        self._initializer = initializer
-        self._dense_layers = []
-        self._units = units
-        self._style_dim = style_dim
-        self._style_activation = torch.tanh
-
-        self._style_mlp = self._build_style_mlp(style_units, ase_latent_size)
-        self._style_dense = torch.nn.Linear(style_units[-1], style_dim)
-
-        in_size = obs_size + style_dim
-        for i in range(len(units)):
-            unit = units[i]
-            out_size = unit
-            curr_dense = torch.nn.Linear(in_size, out_size)
-            self._dense_layers.append(curr_dense)
-            
-            in_size = out_size
-
-        self._dense_layers = nn.ModuleList(self._dense_layers)
-
-        self.init_params()
-
-        return
-
-    def forward(self, obs, latent, skip_style):
-        if (skip_style):
-            style = latent
-        else:
-            style = self.eval_style(latent)
-
-        h = torch.cat([obs, style], dim=-1)
-
-        for i in range(len(self._dense_layers)):
-            curr_dense = self._dense_layers[i]
-            h = curr_dense(h)
-            h = self._activation(h)
-
-        return h
-
-    def eval_style(self, latent):
-        style_h = self._style_mlp(latent)
-        style = self._style_dense(style_h)
-        style = self._style_activation(style)
-        return style
-
-    def init_params(self):
-        scale_init_range = 1.0
-
-        for m in self.modules():
-            if isinstance(m, nn.Linear):
-                self._initializer(m.weight)
-                if getattr(m, "bias", None) is not None:
-                    torch.nn.init.zeros_(m.bias)
-
-        nn.init.uniform_(self._style_dense.weight, -scale_init_range, scale_init_range)
-        return
-
-    def get_out_size(self):
-        out_size = self._units[-1]
-        return out_size
-
-    def _build_style_mlp(self, style_units, input_size):
-        in_size = input_size
-        layers = []
-        for unit in style_units:
-            layers.append(torch.nn.Linear(in_size, unit))
-            layers.append(self._activation)
-            in_size = unit
-
-        enc_mlp = nn.Sequential(*layers)
-        return enc_mlp
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py
deleted file mode 100644
index 63d2809e..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-from isaacgym.torch_utils import *
-from rl_games.algos_torch import players
-
-import amp_players
-
-class ASEPlayer(amp_players.AMPPlayerContinuous):
-    def __init__(self, config):
-        self._latent_dim = config['latent_dim']
-        self._latent_steps_min = config.get('latent_steps_min', np.inf)
-        self._latent_steps_max = config.get('latent_steps_max', np.inf)
-
-        self._enc_reward_scale = config['enc_reward_scale']
-
-        super().__init__(config)
-        
-        if (hasattr(self, 'env')):
-            batch_size = self.env.task.num_envs
-        else:
-            batch_size = self.env_info['num_envs']
-        self._ase_latents = torch.zeros((batch_size, self._latent_dim), dtype=torch.float32,
-                                         device=self.device)
-
-        return
-
-    def run(self):
-        self._reset_latent_step_count()
-        super().run()
-        return
-
-    def get_action(self, obs_dict, is_determenistic=False):
-        self._update_latents()
-
-        obs = obs_dict['obs']
-        if len(obs.size()) == len(self.obs_shape):
-            obs = obs.unsqueeze(0)
-        obs = self._preproc_obs(obs)
-        ase_latents = self._ase_latents
-
-        input_dict = {
-            'is_train': False,
-            'prev_actions': None, 
-            'obs' : obs,
-            'rnn_states' : self.states,
-            'ase_latents': ase_latents
-        }
-        with torch.no_grad():
-            res_dict = self.model(input_dict)
-        mu = res_dict['mus']
-        action = res_dict['actions']
-        self.states = res_dict['rnn_states']
-        if is_determenistic:
-            current_action = mu
-        else:
-            current_action = action
-        current_action = torch.squeeze(current_action.detach())
-        return  players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0))
-
-    def env_reset(self, env_ids=None):
-        obs = super().env_reset(env_ids)
-        self._reset_latents(env_ids)
-        return obs
-    
-    def _build_net_config(self):
-        config = super()._build_net_config()
-        config['ase_latent_shape'] = (self._latent_dim,)
-        return config
-    
-    def _reset_latents(self, done_env_ids=None):
-        if (done_env_ids is None):
-            num_envs = self.env.task.num_envs
-            done_env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)
-
-        rand_vals = self.model.a2c_network.sample_latents(len(done_env_ids))
-        self._ase_latents[done_env_ids] = rand_vals
-        self._change_char_color(done_env_ids)
-
-        return
-
-    def _update_latents(self):
-        if (self._latent_step_count <= 0):
-            self._reset_latents()
-            self._reset_latent_step_count()
-
-            if (self.env.task.viewer):
-                print("Sampling new amp latents------------------------------")
-                num_envs = self.env.task.num_envs
-                env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device)
-                self._change_char_color(env_ids)
-        else:
-            self._latent_step_count -= 1
-        return
-    
-    def _reset_latent_step_count(self):
-        self._latent_step_count = np.random.randint(self._latent_steps_min, self._latent_steps_max)
-        return
-
-    def _calc_amp_rewards(self, amp_obs, ase_latents):
-        disc_r = self._calc_disc_rewards(amp_obs)
-        enc_r = self._calc_enc_rewards(amp_obs, ase_latents)
-        output = {
-            'disc_rewards': disc_r,
-            'enc_rewards': enc_r
-        }
-        return output
-    
-    def _calc_enc_rewards(self, amp_obs, ase_latents):
-        with torch.no_grad():
-            enc_pred = self._eval_enc(amp_obs)
-            err = self._calc_enc_error(enc_pred, ase_latents)
-            enc_r = torch.clamp_min(-err, 0.0)
-            enc_r *= self._enc_reward_scale
-
-        return enc_r
-    
-    def _calc_enc_error(self, enc_pred, ase_latent):
-        err = enc_pred * ase_latent
-        err = -torch.sum(err, dim=-1, keepdim=True)
-        return err
-    
-    def _eval_enc(self, amp_obs):
-        proc_amp_obs = self._preproc_amp_obs(amp_obs)
-        return self.model.a2c_network.eval_enc(proc_amp_obs)
-
-    def _amp_debug(self, info):
-        with torch.no_grad():
-            amp_obs = info['amp_obs']
-            amp_obs = amp_obs
-            ase_latents = self._ase_latents
-            disc_pred = self._eval_disc(amp_obs)
-            amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents)
-            disc_reward = amp_rewards['disc_rewards']
-            enc_reward = amp_rewards['enc_rewards']
-
-            disc_pred = disc_pred.detach().cpu().numpy()[0, 0]
-            disc_reward = disc_reward.cpu().numpy()[0, 0]
-            enc_reward = enc_reward.cpu().numpy()[0, 0]
-            print("disc_pred: ", disc_pred, disc_reward, enc_reward)
-        return
-
-    def _change_char_color(self, env_ids):
-        base_col = np.array([0.4, 0.4, 0.4])
-        range_col = np.array([0.0706, 0.149, 0.2863])
-        range_sum = np.linalg.norm(range_col)
-
-        rand_col = np.random.uniform(0.0, 1.0, size=3)
-        rand_col = range_sum * rand_col / np.linalg.norm(rand_col)
-        rand_col += base_col
-        self.env.task.set_char_color(rand_col, env_ids)
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py
deleted file mode 100644
index 835b9400..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py
+++ /dev/null
@@ -1,592 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import copy
-from datetime import datetime
-from gym import spaces
-import numpy as np
-import os
-import time
-import yaml
-
-from rl_games.algos_torch import a2c_continuous
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch import central_value
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-from rl_games.common import a2c_common
-from rl_games.common import datasets
-from rl_games.common import schedulers
-from rl_games.common import vecenv
-
-import torch
-from torch import optim
-
-import amp_datasets
-
-from tensorboardX import SummaryWriter
-
-class CommonAgent(a2c_continuous.A2CAgent):
-    def __init__(self, base_name, config):
-        a2c_common.A2CBase.__init__(self, base_name, config)
-
-        self._load_config_params(config)
-
-        self.is_discrete = False
-        self._setup_action_space()
-        self.bounds_loss_coef = config.get('bounds_loss_coef', None)
-        self.clip_actions = config.get('clip_actions', True)
-        self._save_intermediate = config.get('save_intermediate', False)
-
-        net_config = self._build_net_config()
-        self.model = self.network.build(net_config)
-        self.model.to(self.ppo_device)
-        self.states = None
-
-        self.init_rnn_from_model(self.model)
-        self.last_lr = float(self.last_lr)
-
-        self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay)
-
-        if self.normalize_input:
-            obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
-            self.running_mean_std = RunningMeanStd(obs_shape).to(self.ppo_device)
-
-        if self.has_central_value:
-            cv_config = {
-                'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), 
-                'value_size' : self.value_size,
-                'ppo_device' : self.ppo_device, 
-                'num_agents' : self.num_agents, 
-                'horizon_length' : self.horizon_length, 
-                'num_actors' : self.num_actors, 
-                'num_actions' : self.actions_num, 
-                'seq_len' : self.seq_len, 
-                'model' : self.central_value_config['network'],
-                'config' : self.central_value_config, 
-                'writter' : self.writer,
-                'multi_gpu' : self.multi_gpu
-            }
-            self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device)
-
-        self.use_experimental_cv = self.config.get('use_experimental_cv', True)
-        self.dataset = amp_datasets.AMPDataset(self.batch_size, self.minibatch_size, self.is_discrete, self.is_rnn, self.ppo_device, self.seq_len)
-        self.algo_observer.after_init(self)
-        
-        return
-
-    def init_tensors(self):
-        super().init_tensors()
-        self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses'])
-        self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values'])
-
-        self.tensor_list += ['next_obses']
-        return
-
-    def train(self):
-        self.init_tensors()
-        self.last_mean_rewards = -100500
-        start_time = time.time()
-        total_time = 0
-        rep_count = 0
-        self.frame = 0
-        self.obs = self.env_reset()
-        self.curr_frames = self.batch_size_envs
-        
-        model_output_file = os.path.join(self.nn_dir, self.config['name'])
-        
-        if self.multi_gpu:
-            self.hvd.setup_algo(self)
-
-        self._init_train()
-
-        while True:
-            epoch_num = self.update_epoch()
-            train_info = self.train_epoch()
-
-            sum_time = train_info['total_time']
-            total_time += sum_time
-            frame = self.frame
-            if self.multi_gpu:
-                self.hvd.sync_stats(self)
-
-            if self.rank == 0:
-                scaled_time = sum_time
-                scaled_play_time = train_info['play_time']
-                curr_frames = self.curr_frames
-                self.frame += curr_frames
-                if self.print_stats:
-                    fps_step = curr_frames / scaled_play_time
-                    fps_total = curr_frames / scaled_time
-                    print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}')
-
-                self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame)
-                self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame)
-                self.writer.add_scalar('info/epochs', epoch_num, frame)
-                self._log_train_info(train_info, frame)
-
-                self.algo_observer.after_print_stats(frame, epoch_num, total_time)
-                
-                if self.game_rewards.current_size > 0:
-                    mean_rewards = self._get_mean_rewards()
-                    mean_lengths = self.game_lengths.get_mean()
-
-                    for i in range(self.value_size):
-                        self.writer.add_scalar('rewards{0}/frame'.format(i), mean_rewards[i], frame)
-                        self.writer.add_scalar('rewards{0}/iter'.format(i), mean_rewards[i], epoch_num)
-                        self.writer.add_scalar('rewards{0}/time'.format(i), mean_rewards[i], total_time)
-
-                    self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame)
-                    self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num)
-
-                    if self.has_self_play_config:
-                        self.self_play_manager.update(self)
-
-                if self.save_freq > 0:
-                    if (epoch_num % self.save_freq == 0):
-                        self.save(model_output_file)
-
-                        if (self._save_intermediate):
-                            int_model_output_file = model_output_file + '_' + str(epoch_num).zfill(8)
-                            self.save(int_model_output_file)
-
-                if epoch_num > self.max_epochs:
-                    self.save(model_output_file)
-                    print('MAX EPOCHS NUM!')
-                    return self.last_mean_rewards, epoch_num
-
-                update_time = 0
-        return
-
-    def set_full_state_weights(self, weights):
-        self.set_weights(weights)
-        self.epoch_num = weights['epoch']
-        if self.has_central_value:
-            self.central_value_net.load_state_dict(weights['assymetric_vf_nets'])
-        self.optimizer.load_state_dict(weights['optimizer'])
-        self.frame = weights.get('frame', 0)
-        self.last_mean_rewards = weights.get('last_mean_rewards', -100500)
-
-        if (hasattr(self, 'vec_env')):
-            env_state = weights.get('env_state', None)
-            self.vec_env.set_env_state(env_state)
-
-        return
-
-    def train_epoch(self):
-        play_time_start = time.time()
-        with torch.no_grad():
-            if self.is_rnn:
-                batch_dict = self.play_steps_rnn()
-            else:
-                batch_dict = self.play_steps() 
-
-        play_time_end = time.time()
-        update_time_start = time.time()
-        rnn_masks = batch_dict.get('rnn_masks', None)
-        
-        self.set_train()
-
-        self.curr_frames = batch_dict.pop('played_frames')
-        self.prepare_dataset(batch_dict)
-        self.algo_observer.after_steps()
-
-        if self.has_central_value:
-            self.train_central_value()
-
-        train_info = None
-
-        if self.is_rnn:
-            frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement())
-            print(frames_mask_ratio)
-
-        for _ in range(0, self.mini_epochs_num):
-            ep_kls = []
-            for i in range(len(self.dataset)):
-                curr_train_info = self.train_actor_critic(self.dataset[i])
-                
-                if self.schedule_type == 'legacy':  
-                    if self.multi_gpu:
-                        curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls')
-                    self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item())
-                    self.update_lr(self.last_lr)
-
-                if (train_info is None):
-                    train_info = dict()
-                    for k, v in curr_train_info.items():
-                        train_info[k] = [v]
-                else:
-                    for k, v in curr_train_info.items():
-                        train_info[k].append(v)
-            
-            av_kls = torch_ext.mean_list(train_info['kl'])
-
-            if self.schedule_type == 'standard':
-                if self.multi_gpu:
-                    av_kls = self.hvd.average_value(av_kls, 'ep_kls')
-                self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
-                self.update_lr(self.last_lr)
-
-        if self.schedule_type == 'standard_epoch':
-            if self.multi_gpu:
-                av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls')
-            self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item())
-            self.update_lr(self.last_lr)
-
-        update_time_end = time.time()
-        play_time = play_time_end - play_time_start
-        update_time = update_time_end - update_time_start
-        total_time = update_time_end - play_time_start
-
-        train_info['play_time'] = play_time
-        train_info['update_time'] = update_time
-        train_info['total_time'] = total_time
-        self._record_train_batch_info(batch_dict, train_info)
-
-        return train_info
-
-    def play_steps(self):
-        self.set_eval()
-        
-        epinfos = []
-        done_indices = []
-        update_list = self.update_list
-
-        for n in range(self.horizon_length):
-            self.obs = self.env_reset(done_indices)
-            self.experience_buffer.update_data('obses', n, self.obs['obs'])
-
-            if self.use_action_masks:
-                masks = self.vec_env.get_action_masks()
-                res_dict = self.get_masked_action_values(self.obs, masks)
-            else:
-                res_dict = self.get_action_values(self.obs)
-
-            for k in update_list:
-                self.experience_buffer.update_data(k, n, res_dict[k]) 
-
-            if self.has_central_value:
-                self.experience_buffer.update_data('states', n, self.obs['states'])
-
-            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            shaped_rewards = self.rewards_shaper(rewards)
-            self.experience_buffer.update_data('rewards', n, shaped_rewards)
-            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
-            self.experience_buffer.update_data('dones', n, self.dones)
-
-            terminated = infos['terminate'].float()
-            terminated = terminated.unsqueeze(-1)
-            next_vals = self._eval_critic(self.obs)
-            next_vals *= (1.0 - terminated)
-            self.experience_buffer.update_data('next_values', n, next_vals)
-
-            self.current_rewards += rewards
-            self.current_lengths += 1
-            all_done_indices = self.dones.nonzero(as_tuple=False)
-            done_indices = all_done_indices[::self.num_agents]
-  
-            self.game_rewards.update(self.current_rewards[done_indices])
-            self.game_lengths.update(self.current_lengths[done_indices])
-            self.algo_observer.process_infos(infos, done_indices)
-
-            not_dones = 1.0 - self.dones.float()
-
-            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
-            self.current_lengths = self.current_lengths * not_dones
-
-            done_indices = done_indices[:, 0]
-
-        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
-        mb_values = self.experience_buffer.tensor_dict['values']
-        mb_next_values = self.experience_buffer.tensor_dict['next_values']
-        mb_rewards = self.experience_buffer.tensor_dict['rewards']
-        
-        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
-        mb_returns = mb_advs + mb_values
-
-        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
-        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
-        batch_dict['played_frames'] = self.batch_size
-
-        return batch_dict
-
-    def prepare_dataset(self, batch_dict):
-        obses = batch_dict['obses']
-        returns = batch_dict['returns']
-        dones = batch_dict['dones']
-        values = batch_dict['values']
-        actions = batch_dict['actions']
-        neglogpacs = batch_dict['neglogpacs']
-        mus = batch_dict['mus']
-        sigmas = batch_dict['sigmas']
-        rnn_states = batch_dict.get('rnn_states', None)
-        rnn_masks = batch_dict.get('rnn_masks', None)
-        
-        advantages = self._calc_advs(batch_dict)
-
-        if self.normalize_value:
-            values = self.value_mean_std(values)
-            returns = self.value_mean_std(returns)
-
-        dataset_dict = {}
-        dataset_dict['old_values'] = values
-        dataset_dict['old_logp_actions'] = neglogpacs
-        dataset_dict['advantages'] = advantages
-        dataset_dict['returns'] = returns
-        dataset_dict['actions'] = actions
-        dataset_dict['obs'] = obses
-        dataset_dict['rnn_states'] = rnn_states
-        dataset_dict['rnn_masks'] = rnn_masks
-        dataset_dict['mu'] = mus
-        dataset_dict['sigma'] = sigmas
-
-        self.dataset.update_values_dict(dataset_dict)
-
-        if self.has_central_value:
-            dataset_dict = {}
-            dataset_dict['old_values'] = values
-            dataset_dict['advantages'] = advantages
-            dataset_dict['returns'] = returns
-            dataset_dict['actions'] = actions
-            dataset_dict['obs'] = batch_dict['states']
-            dataset_dict['rnn_masks'] = rnn_masks
-            self.central_value_net.update_dataset(dataset_dict)
-
-        return
-
-    def calc_gradients(self, input_dict):
-        self.set_train()
-
-        value_preds_batch = input_dict['old_values']
-        old_action_log_probs_batch = input_dict['old_logp_actions']
-        advantage = input_dict['advantages']
-        old_mu_batch = input_dict['mu']
-        old_sigma_batch = input_dict['sigma']
-        return_batch = input_dict['returns']
-        actions_batch = input_dict['actions']
-        obs_batch = input_dict['obs']
-        obs_batch = self._preproc_obs(obs_batch)
-
-        lr = self.last_lr
-        kl = 1.0
-        lr_mul = 1.0
-        curr_e_clip = lr_mul * self.e_clip
-
-        batch_dict = {
-            'is_train': True,
-            'prev_actions': actions_batch, 
-            'obs' : obs_batch
-        }
-
-        rnn_masks = None
-        if self.is_rnn:
-            rnn_masks = input_dict['rnn_masks']
-            batch_dict['rnn_states'] = input_dict['rnn_states']
-            batch_dict['seq_length'] = self.seq_len
-
-        with torch.cuda.amp.autocast(enabled=self.mixed_precision):
-            res_dict = self.model(batch_dict)
-            action_log_probs = res_dict['prev_neglogp']
-            values = res_dict['values']
-            entropy = res_dict['entropy']
-            mu = res_dict['mus']
-            sigma = res_dict['sigmas']
-
-            a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip)
-            a_loss = a_info['actor_loss']
-
-            c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value)
-            c_loss = c_info['critic_loss']
-
-            b_loss = self.bound_loss(mu)
-            
-            a_loss = torch.mean(a_loss)
-            c_loss = torch.mean(c_loss)
-            b_loss = torch.mean(b_loss)
-            entropy = torch.mean(entropy)
-
-            loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss
-            
-            a_clip_frac = torch.mean(a_info['actor_clipped'].float())
-            
-            a_info['actor_loss'] = a_loss
-            a_info['actor_clip_frac'] = a_clip_frac
-
-            if self.multi_gpu:
-                self.optimizer.zero_grad()
-            else:
-                for param in self.model.parameters():
-                    param.grad = None
-
-        self.scaler.scale(loss).backward()
-        self.scaler.step(self.optimizer)
-        self.scaler.update()
-
-        with torch.no_grad():
-            reduce_kl = not self.is_rnn
-            kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl)
-                    
-        self.train_result = {
-            'entropy': entropy,
-            'kl': kl_dist,
-            'last_lr': self.last_lr, 
-            'lr_mul': lr_mul, 
-            'b_loss': b_loss
-        }
-        self.train_result.update(a_info)
-        self.train_result.update(c_info)
-
-        return
-
-    def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values):
-        lastgaelam = 0
-        mb_advs = torch.zeros_like(mb_rewards)
-
-        for t in reversed(range(self.horizon_length)):
-            not_done = 1.0 - mb_fdones[t]
-            not_done = not_done.unsqueeze(1)
-
-            delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t]
-            lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam
-            mb_advs[t] = lastgaelam
-
-        return mb_advs
-
-    def env_reset(self, env_ids=None):
-        obs = self.vec_env.reset(env_ids)
-        obs = self.obs_to_tensors(obs)
-        return obs
-
-    def bound_loss(self, mu):
-        if self.bounds_loss_coef is not None:
-            soft_bound = 1.0
-            mu_loss_high = torch.clamp_min(mu - soft_bound, 0.0)**2
-            mu_loss_low = torch.clamp_max(mu + soft_bound, 0.0)**2
-            b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1)
-        else:
-            b_loss = 0
-        return b_loss
-
-    def _get_mean_rewards(self):
-        return self.game_rewards.get_mean()
-
-    def _load_config_params(self, config):
-        self.last_lr = config['learning_rate']
-        return
-
-    def _build_net_config(self):
-        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
-        config = {
-            'actions_num' : self.actions_num,
-            'input_shape' : obs_shape,
-            'num_seqs' : self.num_actors * self.num_agents,
-            'value_size': self.env_info.get('value_size', 1),
-        }
-        return config
-
-    def _setup_action_space(self):
-        action_space = self.env_info['action_space']
-        self.actions_num = action_space.shape[0]
-
-        # todo introduce device instead of cuda()
-        self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device)
-        self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device)
-        return
-
-    def _init_train(self):
-        return
-
-    def _eval_critic(self, obs_dict):
-        self.model.eval()
-        obs = obs_dict['obs']
-        processed_obs = self._preproc_obs(obs)
-        value = self.model.a2c_network.eval_critic(processed_obs)
-
-        if self.normalize_value:
-            value = self.value_mean_std(value, True)
-        return value
-
-    def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip):
-        ratio = torch.exp(old_action_log_probs_batch - action_log_probs)
-        surr1 = advantage * ratio
-        surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip,
-                                    1.0 + curr_e_clip)
-        a_loss = torch.max(-surr1, -surr2)
-
-        clipped = torch.abs(ratio - 1.0) > curr_e_clip
-        clipped = clipped.detach()
-        
-        info = {
-            'actor_loss': a_loss,
-            'actor_clipped': clipped.detach()
-        }
-        return info
-
-    def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value):
-        if clip_value:
-            value_pred_clipped = value_preds_batch + \
-                    (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip)
-            value_losses = (values - return_batch)**2
-            value_losses_clipped = (value_pred_clipped - return_batch)**2
-            c_loss = torch.max(value_losses, value_losses_clipped)
-        else:
-            c_loss = (return_batch - values)**2
-
-        info = {
-            'critic_loss': c_loss
-        }
-        return info
-    
-    def _calc_advs(self, batch_dict):
-        returns = batch_dict['returns']
-        values = batch_dict['values']
-
-        advantages = returns - values
-        advantages = torch.sum(advantages, axis=1)
-
-        if self.normalize_advantage:
-            advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
-
-        return advantages
-
-    def _record_train_batch_info(self, batch_dict, train_info):
-        return
-
-    def _log_train_info(self, train_info, frame):
-        self.writer.add_scalar('performance/update_time', train_info['update_time'], frame)
-        self.writer.add_scalar('performance/play_time', train_info['play_time'], frame)
-        self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame)
-        self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame)
-        
-        self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame)
-        self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame)
-        self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame)
-        self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame)
-        self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame)
-        self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame)
-        self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame)
-        return
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py
deleted file mode 100644
index ac4e626a..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py
+++ /dev/null
@@ -1,216 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch 
-
-from rl_games.algos_torch import players
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-from rl_games.common.player import BasePlayer
-
-import numpy as np
-
-class CommonPlayer(players.PpoPlayerContinuous):
-    def __init__(self, config):
-        BasePlayer.__init__(self, config)
-        self.network = config['network']
-        
-        self._setup_action_space()
-        self.mask = [False]
-
-        self.normalize_input = self.config['normalize_input']
-        
-        net_config = self._build_net_config()
-        self._build_net(net_config)   
-        
-        return
-
-    def run(self):
-        n_games = self.games_num
-        render = self.render_env
-        n_game_life = self.n_game_life
-        is_determenistic = self.is_determenistic
-        sum_rewards = 0
-        sum_steps = 0
-        sum_game_res = 0
-        n_games = n_games * n_game_life
-        games_played = 0
-        has_masks = False
-        has_masks_func = getattr(self.env, "has_action_mask", None) is not None
-
-        op_agent = getattr(self.env, "create_agent", None)
-        if op_agent:
-            agent_inited = True
-
-        if has_masks_func:
-            has_masks = self.env.has_action_mask()
-
-        need_init_rnn = self.is_rnn
-        for _ in range(n_games):
-            if games_played >= n_games:
-                break
-
-            obs_dict = self.env_reset()
-            batch_size = 1
-            batch_size = self.get_batch_size(obs_dict['obs'], batch_size)
-
-            if need_init_rnn:
-                self.init_rnn()
-                need_init_rnn = False
-
-            cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
-            steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device)
-
-            print_game_res = False
-
-            done_indices = []
-
-            for n in range(self.max_steps):
-                obs_dict = self.env_reset(done_indices)
-
-                if has_masks:
-                    masks = self.env.get_action_mask()
-                    action = self.get_masked_action(obs_dict, masks, is_determenistic)
-                else:
-                    action = self.get_action(obs_dict, is_determenistic)
-                obs_dict, r, done, info =  self.env_step(self.env, action)
-                cr += r
-                steps += 1
-  
-                self._post_step(info)
-
-                if render:
-                    self.env.render(mode = 'human')
-                    time.sleep(self.render_sleep)
-
-                all_done_indices = done.nonzero(as_tuple=False)
-                done_indices = all_done_indices[::self.num_agents]
-                done_count = len(done_indices)
-                games_played += done_count
-
-                if done_count > 0:
-                    if self.is_rnn:
-                        for s in self.states:
-                            s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0
-
-                    cur_rewards = cr[done_indices].sum().item()
-                    cur_steps = steps[done_indices].sum().item()
-
-                    cr = cr * (1.0 - done.float())
-                    steps = steps * (1.0 - done.float())
-                    sum_rewards += cur_rewards
-                    sum_steps += cur_steps
-
-                    game_res = 0.0
-                    if isinstance(info, dict):
-                        if 'battle_won' in info:
-                            print_game_res = True
-                            game_res = info.get('battle_won', 0.5)
-                        if 'scores' in info:
-                            print_game_res = True
-                            game_res = info.get('scores', 0.5)
-                    if self.print_stats:
-                        if print_game_res:
-                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
-                        else:
-                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)
-
-                    sum_game_res += game_res
-                    if batch_size//self.num_agents == 1 or games_played >= n_games:
-                        break
-                
-                done_indices = done_indices[:, 0]
-
-        print(sum_rewards)
-        if print_game_res:
-            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
-        else:
-            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)
-
-        return
-
-    def obs_to_torch(self, obs):
-        obs = super().obs_to_torch(obs)
-        obs_dict = {
-            'obs': obs
-        }
-        return obs_dict
-
-    def get_action(self, obs_dict, is_determenistic = False):
-        output = super().get_action(obs_dict['obs'], is_determenistic)
-        return output
-
-    def env_step(self, env, actions):
-        if not self.is_tensor_obses:
-            actions = actions.cpu().numpy()
-        obs, rewards, dones, infos = env.step(actions)
-
-        if hasattr(obs, 'dtype') and obs.dtype == np.float64:
-            obs = np.float32(obs)
-        if self.value_size > 1:
-            rewards = rewards[0]
-        if self.is_tensor_obses:
-            return obs, rewards.to(self.device), dones.to(self.device), infos
-        else:
-            if np.isscalar(dones):
-                rewards = np.expand_dims(np.asarray(rewards), 0)
-                dones = np.expand_dims(np.asarray(dones), 0)
-            return self.obs_to_torch(obs), torch.from_numpy(rewards), torch.from_numpy(dones), infos
-
-    def _build_net(self, config):
-        self.model = self.network.build(config)
-        self.model.to(self.device)
-        self.model.eval()
-        self.is_rnn = self.model.is_rnn()
-        if self.normalize_input:
-            obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
-            self.running_mean_std = RunningMeanStd(obs_shape).to(self.device)
-            self.running_mean_std.eval() 
-        return
-
-    def env_reset(self, env_ids=None):
-        obs = self.env.reset(env_ids)
-        return self.obs_to_torch(obs)
-
-    def _post_step(self, info):
-        return
-
-    def _build_net_config(self):
-        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
-        config = {
-            'actions_num' : self.actions_num,
-            'input_shape' : obs_shape,
-            'num_seqs' : self.num_agents
-        } 
-        return config
-
-    def _setup_action_space(self):
-        self.actions_num = self.action_space.shape[0] 
-        self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device)
-        self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device)
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py
deleted file mode 100644
index b77c3a64..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py
+++ /dev/null
@@ -1,259 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import sys
-import yaml
-
-from isaacgym import gymapi
-from isaacgym import gymutil
-
-import numpy as np
-import random
-import torch
-
-SIM_TIMESTEP = 1.0 / 60.0
-
-def set_np_formatting():
-    np.set_printoptions(edgeitems=30, infstr='inf',
-                        linewidth=4000, nanstr='nan', precision=2,
-                        suppress=False, threshold=10000, formatter=None)
-
-
-def warn_task_name():
-    raise Exception(
-        "Unrecognized task!\nTask should be one of: [BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, ShadowHandLSTM, ShadowHandFFOpenAI, ShadowHandFFOpenAITest, ShadowHandOpenAI, ShadowHandOpenAITest, Ingenuity]")
-
-
-def set_seed(seed, torch_deterministic=False):
-    if seed == -1 and torch_deterministic:
-        seed = 42
-    elif seed == -1:
-        seed = np.random.randint(0, 10000)
-    print("Setting seed: {}".format(seed))
-
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    os.environ['PYTHONHASHSEED'] = str(seed)
-    torch.cuda.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-
-    if torch_deterministic:
-        # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
-        os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
-        torch.backends.cudnn.benchmark = False
-        torch.backends.cudnn.deterministic = True
-        torch.set_deterministic(True)
-    else:
-        torch.backends.cudnn.benchmark = True
-        torch.backends.cudnn.deterministic = False
-
-    return seed
-
-
-def load_cfg(args):
-    with open(args.cfg_train, 'r') as f:
-        cfg_train = yaml.load(f, Loader=yaml.SafeLoader)
-
-    with open(os.path.join(os.getcwd(), args.cfg_env), 'r') as f:
-        cfg = yaml.load(f, Loader=yaml.SafeLoader)
-
-    # Override number of environments if passed on the command line
-    if args.num_envs > 0:
-        cfg["env"]["numEnvs"] = args.num_envs
-
-    if args.episode_length > 0:
-        cfg["env"]["episodeLength"] = args.episode_length
-
-    cfg["name"] = args.task
-    cfg["headless"] = args.headless
-
-    # Set physics domain randomization
-    if "task" in cfg:
-        if "randomize" not in cfg["task"]:
-            cfg["task"]["randomize"] = args.randomize
-        else:
-            cfg["task"]["randomize"] = args.randomize or cfg["task"]["randomize"]
-    else:
-        cfg["task"] = {"randomize": False}
-
-    logdir = args.logdir
-    # Set deterministic mode
-    if args.torch_deterministic:
-        cfg_train["params"]["torch_deterministic"] = True
-
-    exp_name = cfg_train["params"]["config"]['name']
-
-    if args.experiment != 'Base':
-        if args.metadata:
-            exp_name = "{}_{}_{}_{}".format(args.experiment, args.task_type, args.device, str(args.physics_engine).split("_")[-1])
-
-            if cfg["task"]["randomize"]:
-                exp_name += "_DR"
-        else:
-             exp_name = args.experiment
-
-    # Override config name
-    cfg_train["params"]["config"]['name'] = exp_name
-
-    if args.resume > 0:
-        cfg_train["params"]["load_checkpoint"] = True
-
-    if args.checkpoint != "Base":
-        cfg_train["params"]["load_path"] = args.checkpoint
-        
-    if args.llc_checkpoint != "":
-        cfg_train["params"]["config"]["llc_checkpoint"] = args.llc_checkpoint
-
-    # Set maximum number of training iterations (epochs)
-    if args.max_iterations > 0:
-        cfg_train["params"]["config"]['max_epochs'] = args.max_iterations
-
-    cfg_train["params"]["config"]["num_actors"] = cfg["env"]["numEnvs"]
-
-    seed = cfg_train["params"].get("seed", -1)
-    if args.seed is not None:
-        seed = args.seed
-    cfg["seed"] = seed
-    cfg_train["params"]["seed"] = seed
-
-    cfg["args"] = args
-
-    return cfg, cfg_train, logdir
-
-
-def parse_sim_params(args, cfg, cfg_train):
-    # initialize sim
-    sim_params = gymapi.SimParams()
-    sim_params.dt = SIM_TIMESTEP
-    sim_params.num_client_threads = args.slices
-
-    if args.physics_engine == gymapi.SIM_FLEX:
-        if args.device != "cpu":
-            print("WARNING: Using Flex with GPU instead of PHYSX!")
-        sim_params.flex.shape_collision_margin = 0.01
-        sim_params.flex.num_outer_iterations = 4
-        sim_params.flex.num_inner_iterations = 10
-    elif args.physics_engine == gymapi.SIM_PHYSX:
-        sim_params.physx.solver_type = 1
-        sim_params.physx.num_position_iterations = 4
-        sim_params.physx.num_velocity_iterations = 0
-        sim_params.physx.num_threads = 4
-        sim_params.physx.use_gpu = args.use_gpu
-        sim_params.physx.num_subscenes = args.subscenes
-        sim_params.physx.max_gpu_contact_pairs = 8 * 1024 * 1024
-
-    sim_params.use_gpu_pipeline = args.use_gpu_pipeline
-    sim_params.physx.use_gpu = args.use_gpu
-
-    # if sim options are provided in cfg, parse them and update/override above:
-    if "sim" in cfg:
-        gymutil.parse_sim_config(cfg["sim"], sim_params)
-
-    # Override num_threads if passed on the command line
-    if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0:
-        sim_params.physx.num_threads = args.num_threads
-
-    return sim_params
-
-
-def get_args(benchmark=False):
-    custom_parameters = [
-        {"name": "--test", "action": "store_true", "default": False,
-            "help": "Run trained policy, no training"},
-        {"name": "--play", "action": "store_true", "default": False,
-            "help": "Run trained policy, the same as test, can be used only by rl_games RL library"},
-        {"name": "--resume", "type": int, "default": 0,
-            "help": "Resume training or start testing from a checkpoint"},
-        {"name": "--checkpoint", "type": str, "default": "Base",
-            "help": "Path to the saved weights, only for rl_games RL library"},
-        {"name": "--headless", "action": "store_false", "default": True,
-            "help": "Force display off at all times"},
-        {"name": "--horovod", "action": "store_true", "default": False,
-            "help": "Use horovod for multi-gpu training, have effect only with rl_games RL library"},
-        {"name": "--task", "type": str, "default": "HumanoidStrike",
-            "help": "Can be BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, Ingenuity"},
-        {"name": "--task_type", "type": str,
-            "default": "Python", "help": "Choose Python or C++"},
-        {"name": "--rl_device", "type": str, "default": "cuda:0",
-            "help": "Choose CPU or GPU device for inferencing policy network"},
-        {"name": "--logdir", "type": str, "default": "logs/"},
-        {"name": "--experiment", "type": str, "default": "Base",
-            "help": "Experiment name. If used with --metadata flag an additional information about physics engine, sim device, pipeline and domain randomization will be added to the name"},
-        {"name": "--metadata", "action": "store_true", "default": False,
-            "help": "Requires --experiment flag, adds physics engine, sim device, pipeline info and if domain randomization is used to the experiment name provided by user"},
-        {"name": "--cfg_env", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/humanoid_sword_shield_strike.yaml", "help": "Environment configuration file (.yaml)"},
-        {"name": "--cfg_train", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/hrl_humanoid.yaml", "help": "Training configuration file (.yaml)"},
-        {"name": "--motion_file", "type": str,
-            "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/data/amp/reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy", "help": "Specify reference motion file"},
-        {"name": "--num_envs", "type": int, "default": 0,
-            "help": "Number of environments to create - override config file"},
-        {"name": "--episode_length", "type": int, "default": 0,
-            "help": "Episode length, by default is read from yaml config"},
-        {"name": "--seed", "type": int, "help": "Random seed"},
-        {"name": "--max_iterations", "type": int, "default": 0,
-            "help": "Set a maximum number of training iterations"},
-        {"name": "--horizon_length", "type": int, "default": -1,
-            "help": "Set number of simulation steps per 1 PPO iteration. Supported only by rl_games. If not -1 overrides the config settings."},
-        {"name": "--minibatch_size", "type": int, "default": -1,
-            "help": "Set batch size for PPO optimization step. Supported only by rl_games. If not -1 overrides the config settings."},
-        {"name": "--randomize", "action": "store_true", "default": False,
-            "help": "Apply physics domain randomization"},
-        {"name": "--torch_deterministic", "action": "store_true", "default": False,
-            "help": "Apply additional PyTorch settings for more deterministic behaviour"},
-        {"name": "--output_path", "type": str, "default": "output/", "help": "Specify output directory"},
-        {"name": "--llc_checkpoint", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth",
-            "help": "Path to the saved weights for the low-level controller of an HRL agent."}]
-
-    if benchmark:
-        custom_parameters += [{"name": "--num_proc", "type": int, "default": 1, "help": "Number of child processes to launch"},
-                              {"name": "--random_actions", "action": "store_true",
-                                  "help": "Run benchmark with random actions instead of inferencing"},
-                              {"name": "--bench_len", "type": int, "default": 10,
-                                  "help": "Number of timing reports"},
-                              {"name": "--bench_file", "action": "store", "help": "Filename to store benchmark results"}]
-
-    # parse arguments
-    args = gymutil.parse_arguments(
-        description="RL Policy",
-        custom_parameters=custom_parameters)
-
-    # allignment with examples
-    args.device_id = args.compute_device_id
-    args.device = args.sim_device_type if args.use_gpu_pipeline else 'cpu'
-
-    if args.test:
-        args.play = args.test
-        args.train = False
-    elif args.play:
-        args.train = False
-    else:
-        args.train = True
-
-    return args
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py
deleted file mode 100644
index aca92850..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py
+++ /dev/null
@@ -1,356 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import copy
-from datetime import datetime
-from gym import spaces
-import numpy as np
-import os
-import time
-import yaml
-
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch import central_value
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-from rl_games.common import a2c_common
-from rl_games.common import datasets
-from rl_games.common import schedulers
-from rl_games.common import vecenv
-
-import torch
-from torch import optim
-
-import common_agent
-import ase_agent
-import ase_models
-import ase_network_builder
-
-from tensorboardX import SummaryWriter
-
-
-class HRLAgent(common_agent.CommonAgent):
-    def __init__(self, base_name, config):
-        with open(config['llc_config'], 'r') as f:
-            llc_config = yaml.load(f, Loader=yaml.SafeLoader)
-            llc_config_params = llc_config['params']
-            self._latent_dim = llc_config_params['config']['latent_dim']
-
-        super().__init__(base_name, config)
-
-        self._task_size = self.vec_env.env.task.get_task_obs_size()
-
-        self._llc_steps = config['llc_steps']
-        llc_checkpoint = config['llc_checkpoint']
-        assert (llc_checkpoint != "")
-        self._build_llc(llc_config_params, llc_checkpoint)
-
-        return
-
-    def env_step(self, actions):
-        actions = self.preprocess_actions(actions)
-        obs = self.obs['obs']
-
-        rewards = 0.0
-        disc_rewards = 0.0
-        done_count = 0.0
-        terminate_count = 0.0
-        for t in range(self._llc_steps):
-            llc_actions = self._compute_llc_action(obs, actions)
-            obs, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions)
-
-            rewards += curr_rewards
-            done_count += curr_dones
-            terminate_count += infos['terminate']
-
-            amp_obs = infos['amp_obs']
-            curr_disc_reward = self._calc_disc_reward(amp_obs)
-            disc_rewards += curr_disc_reward
-
-        rewards /= self._llc_steps
-        disc_rewards /= self._llc_steps
-
-        dones = torch.zeros_like(done_count)
-        dones[done_count > 0] = 1.0
-        terminate = torch.zeros_like(terminate_count)
-        terminate[terminate_count > 0] = 1.0
-        infos['terminate'] = terminate
-        infos['disc_rewards'] = disc_rewards
-
-        if self.is_tensor_obses:
-            if self.value_size == 1:
-                rewards = rewards.unsqueeze(1)
-            return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos
-        else:
-            if self.value_size == 1:
-                rewards = np.expand_dims(rewards, axis=1)
-            return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy(
-                dones).to(self.ppo_device), infos
-
-    def cast_obs(self, obs):
-        obs = super().cast_obs(obs)
-        self._llc_agent.is_tensor_obses = self.is_tensor_obses
-        return obs
-
-    def preprocess_actions(self, actions):
-        clamped_actions = torch.clamp(actions, -1.0, 1.0)
-        if not self.is_tensor_obses:
-            clamped_actions = clamped_actions.cpu().numpy()
-        return clamped_actions
-
-    def play_steps(self):
-        self.set_eval()
-
-        epinfos = []
-        done_indices = []
-        update_list = self.update_list
-
-        for n in range(self.horizon_length):
-            self.obs = self.env_reset(done_indices)
-            self.experience_buffer.update_data('obses', n, self.obs['obs'])
-
-            if self.use_action_masks:
-                masks = self.vec_env.get_action_masks()
-                res_dict = self.get_masked_action_values(self.obs, masks)
-            else:
-                res_dict = self.get_action_values(self.obs)
-
-            for k in update_list:
-                self.experience_buffer.update_data(k, n, res_dict[k])
-
-            if self.has_central_value:
-                self.experience_buffer.update_data('states', n, self.obs['states'])
-
-            self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions'])
-            shaped_rewards = self.rewards_shaper(rewards)
-            self.experience_buffer.update_data('rewards', n, shaped_rewards)
-            self.experience_buffer.update_data('next_obses', n, self.obs['obs'])
-            self.experience_buffer.update_data('dones', n, self.dones)
-
-            self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards'])
-
-            terminated = infos['terminate'].float()
-            terminated = terminated.unsqueeze(-1)
-            next_vals = self._eval_critic(self.obs)
-            next_vals *= (1.0 - terminated)
-            self.experience_buffer.update_data('next_values', n, next_vals)
-
-            self.current_rewards += rewards
-            self.current_lengths += 1
-            all_done_indices = self.dones.nonzero(as_tuple=False)
-            done_indices = all_done_indices[::self.num_agents]
-
-            self.game_rewards.update(self.current_rewards[done_indices])
-            self.game_lengths.update(self.current_lengths[done_indices])
-            self.algo_observer.process_infos(infos, done_indices)
-
-            not_dones = 1.0 - self.dones.float()
-
-            self.current_rewards = self.current_rewards * not_dones.unsqueeze(1)
-            self.current_lengths = self.current_lengths * not_dones
-
-            done_indices = done_indices[:, 0]
-
-        mb_fdones = self.experience_buffer.tensor_dict['dones'].float()
-        mb_values = self.experience_buffer.tensor_dict['values']
-        mb_next_values = self.experience_buffer.tensor_dict['next_values']
-
-        mb_rewards = self.experience_buffer.tensor_dict['rewards']
-        mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards']
-        mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards)
-
-        mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values)
-        mb_returns = mb_advs + mb_values
-
-        batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list)
-        batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns)
-        batch_dict['played_frames'] = self.batch_size
-
-        return batch_dict
-
-    def _load_config_params(self, config):
-        super()._load_config_params(config)
-
-        self._task_reward_w = config['task_reward_w']
-        self._disc_reward_w = config['disc_reward_w']
-        return
-
-    def _get_mean_rewards(self):
-        rewards = super()._get_mean_rewards()
-        rewards *= self._llc_steps
-        return rewards
-
-    def _setup_action_space(self):
-        super()._setup_action_space()
-        self.actions_num = self._latent_dim
-        return
-
-    def init_tensors(self):
-        super().init_tensors()
-
-        del self.experience_buffer.tensor_dict['actions']
-        del self.experience_buffer.tensor_dict['mus']
-        del self.experience_buffer.tensor_dict['sigmas']
-
-        batch_shape = self.experience_buffer.obs_base_shape
-        self.experience_buffer.tensor_dict['actions'] = torch.zeros(batch_shape + (self._latent_dim,),
-                                                                    dtype=torch.float32, device=self.ppo_device)
-        self.experience_buffer.tensor_dict['mus'] = torch.zeros(batch_shape + (self._latent_dim,),
-                                                                dtype=torch.float32, device=self.ppo_device)
-        self.experience_buffer.tensor_dict['sigmas'] = torch.zeros(batch_shape + (self._latent_dim,),
-                                                                   dtype=torch.float32, device=self.ppo_device)
-
-        self.experience_buffer.tensor_dict['disc_rewards'] = torch.zeros_like(
-            self.experience_buffer.tensor_dict['rewards'])
-        self.tensor_list += ['disc_rewards']
-
-        return
-
-    # def _build_llc(self, config_params, checkpoint_file):
-    #     network_params = config_params['network']
-    #     network_builder = ase_network_builder.ASEBuilder()
-    #     network_builder.load(network_params)
-    #
-    #     network = ase_models.ModelASEContinuous(network_builder)
-    #     llc_agent_config = self._build_llc_agent_config(config_params, network)
-    #
-    #     self._llc_agent = ase_agent.ASEAgent('llc', llc_agent_config)
-    #     self._llc_agent.restore(checkpoint_file)
-    #     print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
-    #     self._llc_agent.set_eval()
-    #     return
-
-    def _build_llc(self, config_params, checkpoint_file):
-        from hydra.core.global_hydra import GlobalHydra
-        from rofunc.config.utils import get_config
-        from rofunc.learning.RofuncRL.utils.memory import RandomMemory
-        from rofunc.learning.RofuncRL.agents.mixline.ase_agent import ASEAgent
-        import rofunc as rf
-        from rofunc.utils.logger.beauty_logger import BeautyLogger
-
-        GlobalHydra.instance().clear()
-        args_overrides = ["task=HumanoidASEGetupSwordShield", "train=HumanoidASEGetupSwordShieldASERofuncRL"]
-        self.llc_config = get_config('./learning/rl', 'config', args=args_overrides)
-        llc_ckpt_path = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth"
-
-        llc_env_info = copy.deepcopy(self.env_info)
-        obs_space = llc_env_info['observation_space']
-        obs_size = obs_space.shape[0]
-        obs_size -= self._task_size
-        llc_observation_space = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
-        llc_memory = RandomMemory(memory_size=32, num_envs=4096, device=self.ppo_device)
-        motion_dataset = RandomMemory(memory_size=200000, device=self.ppo_device)
-        replay_buffer = RandomMemory(memory_size=1000000, device=self.ppo_device)
-        collect_reference_motions = lambda num_samples: self.vec_env.env.task.fetch_amp_obs_demo(num_samples)
-
-        directory = os.path.join(os.getcwd(), "runs")
-        exp_name = datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f")
-        exp_dir = os.path.join(directory, exp_name)
-        rf.utils.create_dir(exp_dir)
-        rofunc_logger = BeautyLogger(exp_dir, verbose=True)
-        self._llc_agent = ASEAgent(self.llc_config.train, llc_observation_space, llc_env_info['action_space'],
-                                   llc_memory,
-                                   self.ppo_device, exp_dir, rofunc_logger, llc_env_info['amp_observation_space'],
-                                   motion_dataset, replay_buffer, collect_reference_motions)
-        self._llc_agent.load_ckpt(llc_ckpt_path)
-        return
-
-    # def _build_llc_agent_config(self, config_params, network):
-    #     llc_env_info = copy.deepcopy(self.env_info)
-    #     obs_space = llc_env_info['observation_space']
-    #     obs_size = obs_space.shape[0]
-    #     obs_size -= self._task_size
-    #     llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
-    #
-    #     config = config_params['config']
-    #     config['network'] = network
-    #     config['num_actors'] = self.num_actors
-    #     config['features'] = {'observer' : self.algo_observer}
-    #     config['env_info'] = llc_env_info
-    #
-    #     return config
-
-    # def _compute_llc_action(self, obs, actions):
-    #     llc_obs = self._extract_llc_obs(obs)
-    #     processed_obs = self._llc_agent._preproc_obs(llc_obs)
-    #
-    #     z = torch.nn.functional.normalize(actions, dim=-1)
-    #     mu, _ = self._llc_agent.model.a2c_network.eval_actor(obs=processed_obs, ase_latents=z)
-    #     llc_action = mu
-    #     llc_action = self._llc_agent.preprocess_actions(llc_action)
-    #
-    #     return llc_action
-
-    def _compute_llc_action(self, obs, actions):
-        llc_obs = self._extract_llc_obs(obs)
-        z = torch.nn.functional.normalize(actions, dim=-1)
-        mu, _ = self._llc_agent.act(llc_obs, ase_latents=z)
-        llc_action = mu
-        # llc_action = self._llc_agent.preprocess_actions(llc_action)
-
-        return llc_action
-
-    def _extract_llc_obs(self, obs):
-        obs_size = obs.shape[-1]
-        llc_obs = obs[..., :obs_size - self._task_size]
-        return llc_obs
-
-    # def _calc_disc_reward(self, amp_obs):
-    #     disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
-    #     return disc_reward
-
-    def _calc_disc_reward(self, amp_obs):
-        with torch.no_grad():
-            amp_logits = self._llc_agent.discriminator(self._llc_agent._amp_state_preprocessor(amp_obs))
-            if self._llc_agent._least_square_discriminator:
-                style_rewards = torch.maximum(torch.tensor(1 - 0.25 * torch.square(1 - amp_logits)),
-                                              torch.tensor(0.0001, device=self.device))
-            else:
-                style_rewards = -torch.log(torch.maximum(torch.tensor(1 - 1 / (1 + torch.exp(-amp_logits))),
-                                                         torch.tensor(0.0001, device=self.device)))
-            style_rewards *= self._llc_agent._discriminator_reward_scale
-        return style_rewards
-
-    def _combine_rewards(self, task_rewards, disc_rewards):
-        combined_rewards = self._task_reward_w * task_rewards + \
-                           + self._disc_reward_w * disc_rewards
-
-        # combined_rewards = task_rewards * disc_rewards
-        return combined_rewards
-
-    def _record_train_batch_info(self, batch_dict, train_info):
-        super()._record_train_batch_info(batch_dict, train_info)
-        train_info['disc_rewards'] = batch_dict['disc_rewards']
-        return
-
-    def _log_train_info(self, train_info, frame):
-        super()._log_train_info(train_info, frame)
-
-        disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards'])
-        self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame)
-        self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame)
-        return
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml
deleted file mode 100644
index 3818fb33..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-params:
-  seed: 42
-
-  algo:
-    name: hrl
-
-  model:
-    name: hrl
-
-  network:
-    name: hrl
-    separate: True
-
-    space:
-      continuous:
-        mu_activation: None
-        sigma_activation: None
-        mu_init:
-          name: default
-        sigma_init:
-          name: const_initializer
-          val: -2.3
-        fixed_sigma: True
-        learn_sigma: False
-
-    mlp:
-      units: [1024, 512]
-      activation: relu
-      d2rl: False
-
-      initializer:
-        name: default
-      regularizer:
-        name: None
-        
-  load_checkpoint: False
-
-  config:
-    name: Humanoid
-    env_name: rlgpu
-    multi_gpu: False
-    ppo: True
-    mixed_precision: False
-    normalize_input: True
-    normalize_value: True
-    reward_shaper:
-      scale_value: 1
-    normalize_advantage: True
-    gamma: 0.99
-    tau: 0.95
-    learning_rate: 2e-5
-    lr_schedule: constant
-    score_to_win: 20000
-    max_epochs: 10000
-    save_best_after: 10
-    save_frequency: 50
-    print_stats: True
-    grad_norm: 1.0
-    entropy_coef: 0.0
-    truncate_grads: False
-    ppo: True
-    e_clip: 0.2
-    horizon_length: 32
-    minibatch_size: 16384
-    mini_epochs: 6
-    critic_coef: 5
-    clip_value: False
-    seq_len: 4
-    bounds_loss_coef: 10
-    
-    task_reward_w: 0.9
-    disc_reward_w: 0.1
-
-    llc_steps: 5
-    llc_config: /home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/ase_humanoid_hrl.yaml
-
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py
deleted file mode 100644
index e8191f46..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch.nn as nn
-from rl_games.algos_torch.models import ModelA2CContinuousLogStd
-
-class ModelHRLContinuous(ModelA2CContinuousLogStd):
-    def __init__(self, network):
-        super().__init__(network)
-        return
-
-    def build(self, config):
-        net = self.network_builder.build('amp', **config)
-        for name, _ in net.named_parameters():
-            print(name)
-        return ModelHRLContinuous.Network(net)
-
-    class Network(ModelA2CContinuousLogStd.Network):
-        def __init__(self, a2c_network):
-            super().__init__(a2c_network)
-            return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py
deleted file mode 100644
index a9d1fe78..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from rl_games.algos_torch import network_builder
-
-import torch
-import torch.nn as nn
-
-
-class HRLBuilder(network_builder.A2CBuilder):
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
-        return
-
-    class Network(network_builder.A2CBuilder.Network):
-        def __init__(self, params, **kwargs):
-            super().__init__(params, **kwargs)
-
-            if self.is_continuous:
-                if (not self.space_config['learn_sigma']):
-                    actions_num = kwargs.get('actions_num')
-                    sigma_init = self.init_factory.create(**self.space_config['sigma_init'])
-                    self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False)
-                    sigma_init(self.sigma)
-
-            return
-        
-        def forward(self, obs_dict):
-            mu, sigma, value, states = super().forward(obs_dict)
-            norm_mu = torch.tanh(mu)
-            return norm_mu, sigma, value, states
-
-        def eval_critic(self, obs):
-            c_out = self.critic_cnn(obs)
-            c_out = c_out.contiguous().view(c_out.size(0), -1)
-            c_out = self.critic_mlp(c_out)              
-            value = self.value_act(self.value(c_out))
-            return value
-
-    def build(self, name, **kwargs):
-        net = HRLBuilder.Network(self.params, **kwargs)
-        return net
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py
deleted file mode 100644
index c493246a..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py
+++ /dev/null
@@ -1,345 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import copy
-from gym import spaces
-import numpy as np
-import os
-import torch 
-import yaml
-
-from rl_games.algos_torch import players
-from rl_games.algos_torch import torch_ext
-from rl_games.algos_torch.running_mean_std import RunningMeanStd
-from rl_games.common.player import BasePlayer
-
-import common_player as common_player
-import ase_models as ase_models
-import ase_network_builder as ase_network_builder
-import ase_players as ase_players
-
-class HRLPlayer(common_player.CommonPlayer):
-    def __init__(self, config):
-        with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f:
-            llc_config = yaml.load(f, Loader=yaml.SafeLoader)
-            llc_config_params = llc_config['params']
-            self._latent_dim = llc_config_params['config']['latent_dim']
-        
-        super().__init__(config)
-        
-        self._task_size = self.env.task.get_task_obs_size()
-        
-        self._llc_steps = config['llc_steps']
-        llc_checkpoint = config['llc_checkpoint']
-        assert(llc_checkpoint != "")
-        self._build_llc(llc_config_params, llc_checkpoint)
-        
-        return
-    
-    def get_action(self, obs_dict, is_determenistic = False):
-        obs = obs_dict['obs']
-
-        if len(obs.size()) == len(self.obs_shape):
-            obs = obs.unsqueeze(0)
-        proc_obs = self._preproc_obs(obs)
-        input_dict = {
-            'is_train': False,
-            'prev_actions': None, 
-            'obs' : proc_obs,
-            'rnn_states' : self.states
-        }
-        with torch.no_grad():
-            res_dict = self.model(input_dict)
-        mu = res_dict['mus']
-        action = res_dict['actions']
-        self.states = res_dict['rnn_states']
-        if is_determenistic:
-            current_action = mu
-        else:
-            current_action = action
-        current_action = torch.squeeze(current_action.detach())
-        clamped_actions = torch.clamp(current_action, -1.0, 1.0)
-        
-        return clamped_actions
-
-    def run(self):
-        n_games = self.games_num
-        render = self.render_env
-        n_game_life = self.n_game_life
-        is_determenistic = self.is_determenistic
-        sum_rewards = 0
-        sum_steps = 0
-        sum_game_res = 0
-        n_games = n_games * n_game_life
-        games_played = 0
-        has_masks = False
-        has_masks_func = getattr(self.env, "has_action_mask", None) is not None
-
-        op_agent = getattr(self.env, "create_agent", None)
-        if op_agent:
-            agent_inited = True
-
-        if has_masks_func:
-            has_masks = self.env.has_action_mask()
-
-        need_init_rnn = self.is_rnn
-        for _ in range(n_games):
-            if games_played >= n_games:
-                break
-
-            obs_dict = self.env_reset()
-            batch_size = 1
-            if len(obs_dict['obs'].size()) > len(self.obs_shape):
-                batch_size = obs_dict['obs'].size()[0]
-            self.batch_size = batch_size
-
-            if need_init_rnn:
-                self.init_rnn()
-                need_init_rnn = False
-
-            cr = torch.zeros(batch_size, dtype=torch.float32)
-            steps = torch.zeros(batch_size, dtype=torch.float32)
-
-            print_game_res = False
-
-            done_indices = []
-
-            for n in range(self.max_steps):
-                obs_dict = self.env_reset(done_indices)
-
-                if has_masks:
-                    masks = self.env.get_action_mask()
-                    action = self.get_masked_action(obs_dict, masks, is_determenistic)
-                else:
-                    action = self.get_action(obs_dict, is_determenistic)
-                obs_dict, r, done, info = self.env_step(self.env, obs_dict, action)
-                cr += r
-                steps += 1
-  
-                self._post_step(info)
-
-                if render:
-                    self.env.render(mode = 'human')
-                    time.sleep(self.render_sleep)
-
-                all_done_indices = done.nonzero(as_tuple=False)
-                done_indices = all_done_indices[::self.num_agents]
-                done_count = len(done_indices)
-                games_played += done_count
-
-                if done_count > 0:
-                    if self.is_rnn:
-                        for s in self.states:
-                            s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0
-
-                    cur_rewards = cr[done_indices].sum().item()
-                    cur_steps = steps[done_indices].sum().item()
-
-                    cr = cr * (1.0 - done.float())
-                    steps = steps * (1.0 - done.float())
-                    sum_rewards += cur_rewards
-                    sum_steps += cur_steps
-
-                    game_res = 0.0
-                    if isinstance(info, dict):
-                        if 'battle_won' in info:
-                            print_game_res = True
-                            game_res = info.get('battle_won', 0.5)
-                        if 'scores' in info:
-                            print_game_res = True
-                            game_res = info.get('scores', 0.5)
-                    if self.print_stats:
-                        if print_game_res:
-                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res)
-                        else:
-                            print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count)
-
-                    sum_game_res += game_res
-                    if batch_size//self.num_agents == 1 or games_played >= n_games:
-                        break
-        
-                done_indices = done_indices[:, 0]
-
-        print(sum_rewards)
-        if print_game_res:
-            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life)
-        else:
-            print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life)
-
-        return
-
-    def env_step(self, env, obs_dict, action):
-        if not self.is_tensor_obses:
-            actions = actions.cpu().numpy()
-            
-        obs = obs_dict['obs']
-        rewards = 0.0
-        done_count = 0.0
-        disc_rewards = 0.0
-        for t in range(self._llc_steps):
-            llc_actions = self._compute_llc_action(obs, action)
-            obs, curr_rewards, curr_dones, infos = env.step(llc_actions)
-
-            rewards += curr_rewards
-            done_count += curr_dones
-
-            amp_obs = infos['amp_obs']
-            curr_disc_reward = self._calc_disc_reward(amp_obs)
-            curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy()
-            disc_rewards += curr_disc_reward
-
-        rewards /= self._llc_steps
-        dones = torch.zeros_like(done_count)
-        dones[done_count > 0] = 1.0
-
-        disc_rewards /= self._llc_steps
-        #print("disc_reward", disc_rewards)
-
-        if isinstance(obs, dict):
-            obs = obs['obs']
-        if obs.dtype == np.float64:
-            obs = np.float32(obs)
-        if self.value_size > 1:
-            rewards = rewards[0]
-        if self.is_tensor_obses:
-            return obs, rewards.cpu(), dones.cpu(), infos
-        else:
-            if np.isscalar(dones):
-                rewards = np.expand_dims(np.asarray(rewards), 0)
-                dones = np.expand_dims(np.asarray(dones), 0)
-            return torch.from_numpy(obs).to(self.device), torch.from_numpy(rewards), torch.from_numpy(dones), infos
-    
-    # def _build_llc(self, config_params, checkpoint_file):
-    #     network_params = config_params['network']
-    #     network_builder = ase_network_builder.ASEBuilder()
-    #     network_builder.load(network_params)
-    #
-    #     network = ase_models.ModelASEContinuous(network_builder)
-    #     llc_agent_config = self._build_llc_agent_config(config_params, network)
-    #
-    #     self._llc_agent = ase_players.ASEPlayer(llc_agent_config)
-    #     self._llc_agent.restore(checkpoint_file)
-    #     print("Loaded LLC checkpoint from {:s}".format(checkpoint_file))
-    #     return
-    #
-    # def _build_llc_agent_config(self, config_params, network):
-    #     llc_env_info = copy.deepcopy(self.env_info)
-    #     obs_space = llc_env_info['observation_space']
-    #     obs_size = obs_space.shape[0]
-    #     obs_size -= self._task_size
-    #     llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
-    #     llc_env_info['amp_observation_space'] = self.env.amp_observation_space.shape
-    #     llc_env_info['num_envs'] = self.env.task.num_envs
-    #
-    #     config = config_params['config']
-    #     config['network'] = network
-    #     config['env_info'] = llc_env_info
-    #
-    #     return config
-
-    def _build_llc(self, config_params, checkpoint_file):
-        from hydra.core.global_hydra import GlobalHydra
-        from rofunc.config.utils import get_config
-        from rofunc.learning.RofuncRL.utils.memory import RandomMemory
-        from rofunc.learning.RofuncRL.agents.mixline.ase_agent import ASEAgent
-        import rofunc as rf
-        from rofunc.utils.logger.beauty_logger import BeautyLogger
-        from datetime import datetime
-
-        GlobalHydra.instance().clear()
-        args_overrides = ["task=HumanoidASEGetupSwordShield", "train=HumanoidASEGetupSwordShieldASERofuncRL"]
-        self.llc_config = get_config('./learning/rl', 'config', args=args_overrides)
-        llc_ckpt_path = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth"
-
-        llc_env_info = copy.deepcopy(self.env_info)
-        obs_space = llc_env_info['observation_space']
-        obs_size = obs_space.shape[0]
-        obs_size -= self._task_size
-        llc_observation_space = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size])
-        llc_memory = RandomMemory(memory_size=32, num_envs=4096)
-        motion_dataset = RandomMemory(memory_size=200000)
-        replay_buffer = RandomMemory(memory_size=1000000)
-        collect_reference_motions = lambda num_samples: self.env.task.fetch_amp_obs_demo(num_samples)
-
-        directory = os.path.join(os.getcwd(), "runs")
-        exp_name = datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f")
-        exp_dir = os.path.join(directory, exp_name)
-        rf.utils.create_dir(exp_dir)
-        rofunc_logger = BeautyLogger(exp_dir, verbose=True)
-        amp_observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(1400,))
-
-        self._llc_agent = ASEAgent(self.llc_config.train, llc_observation_space, llc_env_info['action_space'],
-                                   llc_memory,
-                                   'cuda:0', exp_dir, rofunc_logger, amp_observation_space,
-                                   motion_dataset, replay_buffer, collect_reference_motions)
-        self._llc_agent.load_ckpt(llc_ckpt_path)
-        return
-
-
-    def _setup_action_space(self):
-        super()._setup_action_space()
-        self.actions_num = self._latent_dim
-        return
-
-    # def _compute_llc_action(self, obs, actions):
-    #     llc_obs = self._extract_llc_obs(obs)
-    #     processed_obs = self._llc_agent._preproc_obs(llc_obs)
-    #
-    #     z = torch.nn.functional.normalize(actions, dim=-1)
-    #     mu, _ = self._llc_agent.model.a2c_network.eval_actor(obs=processed_obs, ase_latents=z)
-    #     llc_action = players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(mu, -1.0, 1.0))
-    #
-    #     return llc_action
-
-    def _compute_llc_action(self, obs, actions):
-        llc_obs = self._extract_llc_obs(obs)
-        z = torch.nn.functional.normalize(actions, dim=-1)
-        mu, _ = self._llc_agent.act(llc_obs, ase_latents=z)
-        llc_action = mu
-        return llc_action
-
-    def _extract_llc_obs(self, obs):
-        obs_size = obs.shape[-1]
-        llc_obs = obs[..., :obs_size - self._task_size]
-        return llc_obs
-    
-    # def _calc_disc_reward(self, amp_obs):
-    #     disc_reward = self._llc_agent._calc_disc_rewards(amp_obs)
-    #     return disc_reward
-
-    def _calc_disc_reward(self, amp_obs):
-        with torch.no_grad():
-            amp_logits = self._llc_agent.discriminator(self._llc_agent._amp_state_preprocessor(amp_obs))
-            if self._llc_agent._least_square_discriminator:
-                style_rewards = torch.maximum(torch.tensor(1 - 0.25 * torch.square(1 - amp_logits)),
-                                              torch.tensor(0.0001, device=self.device))
-            else:
-                style_rewards = -torch.log(torch.maximum(torch.tensor(1 - 1 / (1 + torch.exp(-amp_logits))),
-                                                         torch.tensor(0.0001, device=self.device)))
-            style_rewards *= self._llc_agent._discriminator_reward_scale
-        return style_rewards
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml
deleted file mode 100644
index 20c1b35e..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml
+++ /dev/null
@@ -1,53 +0,0 @@
-# if given, will override the device setting in gym. 
-env: 
-  numEnvs: 4096
-  envSpacing: 5
-  episodeLength: 300
-  isFlagrun: False
-  enableDebugVis: False
-  
-  pdControl: True
-  powerScale: 1.0
-  controlFrequencyInv: 2 # 30 Hz
-  stateInit: "Default"
-  hybridInitProb: 0.5
-  numAMPObsSteps: 10
-  
-  localRootObs: True
-  keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"]
-  contactBodies: ["right_foot", "left_foot"]
-  terminationHeight: 0.15
-  enableEarlyTermination: True
-  
-  tarSpeedMin: 1.5
-  tarSpeedMax: 1.6
-  headingChangeStepsMin: 100
-  headingChangeStepsMax: 200
-  enableRandHeading: True
-  enableTaskObs: True
-
-  asset:
-    assetRoot: "ase/data/assets"
-    assetFileName: "mjcf/amp_humanoid_sword_shield.xml"
-
-  plane:
-    staticFriction: 1.0
-    dynamicFriction: 1.0
-    restitution: 0.0
-
-sim:
-  substeps: 2
-  physx:
-    num_threads: 4
-    solver_type: 1  # 0: pgs, 1: tgs
-    num_position_iterations: 4
-    num_velocity_iterations: 0
-    contact_offset: 0.02
-    rest_offset: 0.0
-    bounce_threshold_velocity: 0.2
-    max_depenetration_velocity: 10.0
-    default_buffer_size_multiplier: 10.0
-
-  flex:
-    num_inner_iterations: 10
-    warm_start: 0.25
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml
deleted file mode 100644
index 9f0bbafc..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-# if given, will override the device setting in gym. 
-env: 
-  numEnvs: 4096
-  envSpacing: 5
-  episodeLength: 300
-  isFlagrun: False
-  enableDebugVis: False
-  
-  pdControl: True
-  powerScale: 1.0
-  controlFrequencyInv: 2 # 30 Hz
-  stateInit: "Default"
-  hybridInitProb: 0.5
-  numAMPObsSteps: 10
-  
-  localRootObs: True
-  keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"]
-  contactBodies: ["right_foot", "left_foot"]
-  terminationHeight: 0.15
-  enableEarlyTermination: True
-
-  strikeBodyNames: ["sword", "right_hand", "right_lower_arm"]
-  enableTaskObs: True
-  
-  asset:
-    assetRoot: "ase/data/assets"
-    assetFileName: "mjcf/amp_humanoid_sword_shield.xml"
-
-  plane:
-    staticFriction: 1.0
-    dynamicFriction: 1.0
-    restitution: 0.0
-
-sim:
-  substeps: 2
-  physx:
-    num_threads: 4
-    solver_type: 1  # 0: pgs, 1: tgs
-    num_position_iterations: 4
-    num_velocity_iterations: 0
-    contact_offset: 0.02
-    rest_offset: 0.0
-    bounce_threshold_velocity: 0.2
-    max_depenetration_velocity: 10.0
-    default_buffer_size_multiplier: 10.0
-
-  flex:
-    num_inner_iterations: 10
-    warm_start: 0.25
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py
deleted file mode 100644
index 0f81a47d..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from rl_games.common.algo_observer import AlgoObserver
-from rl_games.algos_torch import torch_ext
-
-
-class RLGPUAlgoObserver(AlgoObserver):
-    def __init__(self, use_successes=True):
-        self.use_successes = use_successes
-        return
-
-    def after_init(self, algo):
-        self.algo = algo
-        self.consecutive_successes = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device)
-        self.writer = self.algo.writer
-        return
-
-    def process_infos(self, infos, done_indices):
-        if isinstance(infos, dict):
-            if (self.use_successes == False) and 'consecutive_successes' in infos:
-                cons_successes = infos['consecutive_successes'].clone()
-                self.consecutive_successes.update(cons_successes.to(self.algo.ppo_device))
-            if self.use_successes and 'successes' in infos:
-                successes = infos['successes'].clone()
-                self.consecutive_successes.update(successes[done_indices].to(self.algo.ppo_device))
-        return
-
-    def after_clear_stats(self):
-        self.mean_scores.clear()
-        return
-
-    def after_print_stats(self, frame, epoch_num, total_time):
-        if self.consecutive_successes.current_size > 0:
-            mean_con_successes = self.consecutive_successes.get_mean()
-            self.writer.add_scalar('successes/consecutive_successes/mean', mean_con_successes, frame)
-            self.writer.add_scalar('successes/consecutive_successes/iter', mean_con_successes, epoch_num)
-            self.writer.add_scalar('successes/consecutive_successes/time', mean_con_successes, total_time)
-        return
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py
deleted file mode 100644
index da72c040..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py
+++ /dev/null
@@ -1,73 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from tasks.humanoid import Humanoid
-from tasks.humanoid_amp import HumanoidAMP
-from tasks.humanoid_amp_getup import HumanoidAMPGetup
-from tasks.humanoid_heading import HumanoidHeading
-from tasks.humanoid_location import HumanoidLocation
-from tasks.humanoid_strike import HumanoidStrike
-from tasks.humanoid_reach import HumanoidReach
-from tasks.humanoid_perturb import HumanoidPerturb
-from tasks.humanoid_view_motion import HumanoidViewMotion
-from vec_task_wrappers import VecTaskPythonWrapper
-
-from isaacgym import rlgpu
-
-import json
-import numpy as np
-
-
-def warn_task_name():
-    raise Exception(
-        "Unrecognized task!\nTask should be one of: [BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, ShadowHandLSTM, ShadowHandFFOpenAI, ShadowHandFFOpenAITest, ShadowHandOpenAI, ShadowHandOpenAITest, Ingenuity]")
-
-def parse_task(args, cfg, cfg_train, sim_params):
-
-    # create native task and pass custom config
-    device_id = args.device_id
-    rl_device = args.rl_device
-
-    cfg["seed"] = cfg_train.get("seed", -1)
-    cfg_task = cfg["env"]
-    cfg_task["seed"] = cfg["seed"]
-
-    try:
-        task = eval(args.task)(
-            cfg=cfg,
-            sim_params=sim_params,
-            physics_engine=args.physics_engine,
-            device_type=args.device,
-            device_id=device_id,
-            headless=args.headless)
-    except NameError as e:
-        print(e)
-        warn_task_name()
-    env = VecTaskPythonWrapper(task, rl_device, cfg_train.get("clip_observations", np.inf), cfg_train.get("clip_actions", 1.0))
-
-    return task, env
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py
deleted file mode 100644
index 5c5e7c77..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-class ReplayBuffer():
-    def __init__(self, buffer_size, device):
-        self._head = 0
-        self._total_count = 0
-        self._buffer_size = buffer_size
-        self._device = device
-        self._data_buf = None
-        self._sample_idx = torch.randperm(buffer_size)
-        self._sample_head = 0
-
-        return
-
-    def reset(self):
-        self._head = 0
-        self._total_count = 0
-        self._reset_sample_idx()
-        return
-
-    def get_buffer_size(self):
-        return self._buffer_size
-
-    def get_total_count(self):
-        return self._total_count
-
-    def store(self, data_dict):
-        if (self._data_buf is None):
-            self._init_data_buf(data_dict)
-
-        n = next(iter(data_dict.values())).shape[0]
-        buffer_size = self.get_buffer_size()
-        assert(n <= buffer_size)
-
-        for key, curr_buf in self._data_buf.items():
-            curr_n = data_dict[key].shape[0]
-            assert(n == curr_n)
-
-            store_n = min(curr_n, buffer_size - self._head)
-            curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n]    
-        
-            remainder = n - store_n
-            if (remainder > 0):
-                curr_buf[0:remainder] = data_dict[key][store_n:]  
-
-        self._head = (self._head + n) % buffer_size
-        self._total_count += n
-
-        return
-
-    def sample(self, n):
-        total_count = self.get_total_count()
-        buffer_size = self.get_buffer_size()
-
-        idx = torch.arange(self._sample_head, self._sample_head + n)
-        idx = idx % buffer_size
-        rand_idx = self._sample_idx[idx]
-        if (total_count < buffer_size):
-            rand_idx = rand_idx % self._head
-
-        samples = dict()
-        for k, v in self._data_buf.items():
-            samples[k] = v[rand_idx]
-
-        self._sample_head += n
-        if (self._sample_head >= buffer_size):
-            self._reset_sample_idx()
-
-        return samples
-
-    def _reset_sample_idx(self):
-        buffer_size = self.get_buffer_size()
-        self._sample_idx[:] = torch.randperm(buffer_size)
-        self._sample_head = 0
-        return
-
-    def _init_data_buf(self, data_dict):
-        buffer_size = self.get_buffer_size()
-        self._data_buf = dict()
-
-        for k, v in data_dict.items():
-            v_shape = v.shape[1:]
-            self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device)
-
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py
deleted file mode 100644
index a4a88d60..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import os
-import isaacgym
-from config import set_np_formatting, set_seed, get_args, parse_sim_params, load_cfg
-from parse_task import parse_task
-
-from rl_games.algos_torch import players
-from rl_games.algos_torch import torch_ext
-from rl_games.common import env_configurations, experiment, vecenv
-from rl_games.common.algo_observer import AlgoObserver
-from rl_games.torch_runner import Runner
-
-import numpy as np
-import copy
-import torch
-
-import amp_agent
-import amp_players
-import amp_models
-import amp_network_builder
-
-import ase_agent
-import ase_players
-import ase_models
-import ase_network_builder
-
-import hrl_agent
-import hrl_players
-import hrl_models
-import hrl_network_builder
-
-args = None
-cfg = None
-cfg_train = None
-
-
-def create_rlgpu_env(**kwargs):
-    use_horovod = cfg_train['params']['config'].get('multi_gpu', False)
-    if use_horovod:
-        import horovod.torch as hvd
-
-        rank = hvd.rank()
-        print("Horovod rank: ", rank)
-
-        cfg_train['params']['seed'] = cfg_train['params']['seed'] + rank
-
-        args.device = 'cuda'
-        args.device_id = rank
-        args.rl_device = 'cuda:' + str(rank)
-
-        cfg['rank'] = rank
-        cfg['rl_device'] = 'cuda:' + str(rank)
-
-    sim_params = parse_sim_params(args, cfg, cfg_train)
-    task, env = parse_task(args, cfg, cfg_train, sim_params)
-
-    print('num_envs: {:d}'.format(env.num_envs))
-    print('num_actions: {:d}'.format(env.num_actions))
-    print('num_obs: {:d}'.format(env.num_obs))
-    print('num_states: {:d}'.format(env.num_states))
-
-    frames = kwargs.pop('frames', 1)
-    if frames > 1:
-        env = wrappers.FrameStack(env, frames, False)
-    return env
-
-
-class RLGPUAlgoObserver(AlgoObserver):
-    def __init__(self, use_successes=True):
-        self.use_successes = use_successes
-        return
-
-    def after_init(self, algo):
-        self.algo = algo
-        self.consecutive_successes = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device)
-        self.writer = self.algo.writer
-        return
-
-    def process_infos(self, infos, done_indices):
-        if isinstance(infos, dict):
-            if (self.use_successes == False) and 'consecutive_successes' in infos:
-                cons_successes = infos['consecutive_successes'].clone()
-                self.consecutive_successes.update(cons_successes.to(self.algo.ppo_device))
-            if self.use_successes and 'successes' in infos:
-                successes = infos['successes'].clone()
-                self.consecutive_successes.update(successes[done_indices].to(self.algo.ppo_device))
-        return
-
-    def after_clear_stats(self):
-        self.mean_scores.clear()
-        return
-
-    def after_print_stats(self, frame, epoch_num, total_time):
-        if self.consecutive_successes.current_size > 0:
-            mean_con_successes = self.consecutive_successes.get_mean()
-            self.writer.add_scalar('successes/consecutive_successes/mean', mean_con_successes, frame)
-            self.writer.add_scalar('successes/consecutive_successes/iter', mean_con_successes, epoch_num)
-            self.writer.add_scalar('successes/consecutive_successes/time', mean_con_successes, total_time)
-        return
-
-
-class RLGPUEnv(vecenv.IVecEnv):
-    def __init__(self, config_name, num_actors, **kwargs):
-        self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs)
-        self.use_global_obs = (self.env.num_states > 0)
-
-        self.full_state = {}
-        self.full_state["obs"] = self.reset()
-        if self.use_global_obs:
-            self.full_state["states"] = self.env.get_state()
-        return
-
-    def step(self, action):
-        next_obs, reward, is_done, info = self.env.step(action)
-
-        # todo: improve, return only dictinary
-        self.full_state["obs"] = next_obs
-        if self.use_global_obs:
-            self.full_state["states"] = self.env.get_state()
-            return self.full_state, reward, is_done, info
-        else:
-            return self.full_state["obs"], reward, is_done, info
-
-    def reset(self, env_ids=None):
-        self.full_state["obs"] = self.env.reset(env_ids)
-        if self.use_global_obs:
-            self.full_state["states"] = self.env.get_state()
-            return self.full_state
-        else:
-            return self.full_state["obs"]
-
-    def get_number_of_agents(self):
-        return self.env.get_number_of_agents()
-
-    def get_env_info(self):
-        info = {}
-        info['action_space'] = self.env.action_space
-        info['observation_space'] = self.env.observation_space
-        info['amp_observation_space'] = self.env.amp_observation_space
-
-        if self.use_global_obs:
-            info['state_space'] = self.env.state_space
-            print(info['action_space'], info['observation_space'], info['state_space'])
-        else:
-            print(info['action_space'], info['observation_space'])
-
-        return info
-
-
-vecenv.register('RLGPU', lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs))
-env_configurations.register('rlgpu', {
-    'env_creator': lambda **kwargs: create_rlgpu_env(**kwargs),
-    'vecenv_type': 'RLGPU'})
-
-
-def build_alg_runner(algo_observer):
-    runner = Runner(algo_observer)
-    runner.algo_factory.register_builder('amp', lambda **kwargs: amp_agent.AMPAgent(**kwargs))
-    runner.player_factory.register_builder('amp', lambda **kwargs: amp_players.AMPPlayerContinuous(**kwargs))
-    runner.model_builder.model_factory.register_builder('amp', lambda network, **kwargs: amp_models.ModelAMPContinuous(
-        network))
-    runner.model_builder.network_factory.register_builder('amp', lambda **kwargs: amp_network_builder.AMPBuilder())
-
-    runner.algo_factory.register_builder('ase', lambda **kwargs: ase_agent.ASEAgent(**kwargs))
-    runner.player_factory.register_builder('ase', lambda **kwargs: ase_players.ASEPlayer(**kwargs))
-    runner.model_builder.model_factory.register_builder('ase', lambda network, **kwargs: ase_models.ModelASEContinuous(
-        network))
-    runner.model_builder.network_factory.register_builder('ase', lambda **kwargs: ase_network_builder.ASEBuilder())
-
-    runner.algo_factory.register_builder('hrl', lambda **kwargs: hrl_agent.HRLAgent(**kwargs))
-    runner.player_factory.register_builder('hrl', lambda **kwargs: hrl_players.HRLPlayer(**kwargs))
-    runner.model_builder.model_factory.register_builder('hrl', lambda network, **kwargs: hrl_models.ModelHRLContinuous(
-        network))
-    runner.model_builder.network_factory.register_builder('hrl', lambda **kwargs: hrl_network_builder.HRLBuilder())
-
-    return runner
-
-
-def main():
-    global args
-    global cfg
-    global cfg_train
-
-    set_np_formatting()
-    args = get_args()
-    gpu_id = 0
-    args.device_id = gpu_id
-    args.rl_device = 'cuda:' + str(gpu_id)
-    args.compute_device_id = gpu_id
-    args.graphics_device_id = gpu_id
-    args.sim_device = 'cuda:' + str(gpu_id)
-    cfg, cfg_train, logdir = load_cfg(args)
-
-    cfg_train['params']['seed'] = set_seed(cfg_train['params'].get("seed", -1),
-                                           cfg_train['params'].get("torch_deterministic", False))
-
-    if args.horovod:
-        cfg_train['params']['config']['multi_gpu'] = args.horovod
-
-    if args.horizon_length != -1:
-        cfg_train['params']['config']['horizon_length'] = args.horizon_length
-
-    if args.minibatch_size != -1:
-        cfg_train['params']['config']['minibatch_size'] = args.minibatch_size
-
-    if args.motion_file:
-        cfg['env']['motion_file'] = args.motion_file
-
-    # Create default directories for weights and statistics
-    cfg_train['params']['config']['train_dir'] = args.output_path
-
-    vargs = vars(args)
-
-    algo_observer = RLGPUAlgoObserver()
-
-    runner = build_alg_runner(algo_observer)
-    runner.load(cfg_train)
-    runner.reset()
-    runner.run(vargs)
-
-    return
-
-
-if __name__ == '__main__':
-    main()
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py
deleted file mode 100644
index bc6ee169..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto.  Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py
deleted file mode 100644
index 9587440f..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py
+++ /dev/null
@@ -1,428 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto.  Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
-
-import sys
-import os
-import operator
-from copy import deepcopy
-import random
-
-from isaacgym import gymapi
-from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples
-
-import numpy as np
-import torch
-
-
-# Base class for RL tasks
-class BaseTask():
-
-    def __init__(self, cfg, enable_camera_sensors=False):
-        self.gym = gymapi.acquire_gym()
-
-        self.device_type = cfg.get("device_type", "cuda")
-        self.device_id = cfg.get("device_id", 0)
-
-        self.device = "cpu"
-        if self.device_type == "cuda" or self.device_type == "GPU":
-            self.device = "cuda" + ":" + str(self.device_id)
-
-        self.headless = cfg["headless"]
-
-        # double check!
-        self.graphics_device_id = self.device_id
-        if enable_camera_sensors == False and self.headless == True:
-            self.graphics_device_id = -1
-
-        self.num_envs = cfg["env"]["numEnvs"]
-        self.num_obs = cfg["env"]["numObservations"]
-        self.num_states = cfg["env"].get("numStates", 0)
-        self.num_actions = cfg["env"]["numActions"]
-
-        self.control_freq_inv = cfg["env"].get("controlFrequencyInv", 1)
-
-        # optimization flags for pytorch JIT
-        torch._C._jit_set_profiling_mode(False)
-        torch._C._jit_set_profiling_executor(False)
-
-        # allocate buffers
-        self.obs_buf = torch.zeros(
-            (self.num_envs, self.num_obs), device=self.device, dtype=torch.float)
-        self.states_buf = torch.zeros(
-            (self.num_envs, self.num_states), device=self.device, dtype=torch.float)
-        self.rew_buf = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.float)
-        self.reset_buf = torch.ones(
-            self.num_envs, device=self.device, dtype=torch.long)
-        self.progress_buf = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.long)
-        self.randomize_buf = torch.zeros(
-            self.num_envs, device=self.device, dtype=torch.long)
-        self.extras = {}
-
-        self.original_props = {}
-        self.dr_randomizations = {}
-        self.first_randomization = True
-        self.actor_params_generator = None
-        self.extern_actor_params = {}
-        for env_id in range(self.num_envs):
-            self.extern_actor_params[env_id] = None
-
-        self.last_step = -1
-        self.last_rand_step = -1
-
-        # create envs, sim and viewer
-        self.create_sim()
-        self.gym.prepare_sim(self.sim)
-
-        # todo: read from config
-        self.enable_viewer_sync = True
-        self.viewer = None
-
-        # if running with a viewer, set up keyboard shortcuts and camera
-        if self.headless == False:
-            # subscribe to keyboard shortcuts
-            self.viewer = self.gym.create_viewer(
-                self.sim, gymapi.CameraProperties())
-            self.gym.subscribe_viewer_keyboard_event(
-                self.viewer, gymapi.KEY_ESCAPE, "QUIT")
-            self.gym.subscribe_viewer_keyboard_event(
-                self.viewer, gymapi.KEY_V, "toggle_viewer_sync")
-
-            # set the camera position based on up axis
-            sim_params = self.gym.get_sim_params(self.sim)
-            if sim_params.up_axis == gymapi.UP_AXIS_Z:
-                cam_pos = gymapi.Vec3(20.0, 25.0, 3.0)
-                cam_target = gymapi.Vec3(10.0, 15.0, 0.0)
-            else:
-                cam_pos = gymapi.Vec3(20.0, 3.0, 25.0)
-                cam_target = gymapi.Vec3(10.0, 0.0, 15.0)
-
-            self.gym.viewer_camera_look_at(
-                self.viewer, None, cam_pos, cam_target)
-
-    # set gravity based on up axis and return axis index
-    def set_sim_params_up_axis(self, sim_params, axis):
-        if axis == 'z':
-            sim_params.up_axis = gymapi.UP_AXIS_Z
-            sim_params.gravity.x = 0
-            sim_params.gravity.y = 0
-            sim_params.gravity.z = -9.81
-            return 2
-        return 1
-
-    def create_sim(self, compute_device, graphics_device, physics_engine, sim_params):
-        sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params)
-        if sim is None:
-            print("*** Failed to create sim")
-            quit()
-
-        return sim
-
-    def step(self, actions):
-        if self.dr_randomizations.get('actions', None):
-            actions = self.dr_randomizations['actions']['noise_lambda'](actions)
-
-        # apply actions
-        self.pre_physics_step(actions)
-
-        # step physics and render each frame
-        self._physics_step()
-
-        # to fix!
-        if self.device == 'cpu':
-            self.gym.fetch_results(self.sim, True)
-
-        # compute observations, rewards, resets, ...
-        self.post_physics_step()
-
-        if self.dr_randomizations.get('observations', None):
-            self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf)
-
-    def get_states(self):
-        return self.states_buf
-
-    def render(self, sync_frame_time=False):
-        if self.viewer:
-            # check for window closed
-            if self.gym.query_viewer_has_closed(self.viewer):
-                sys.exit()
-
-            # check for keyboard events
-            for evt in self.gym.query_viewer_action_events(self.viewer):
-                if evt.action == "QUIT" and evt.value > 0:
-                    sys.exit()
-                elif evt.action == "toggle_viewer_sync" and evt.value > 0:
-                    self.enable_viewer_sync = not self.enable_viewer_sync
-
-            # fetch results
-            if self.device != 'cpu':
-                self.gym.fetch_results(self.sim, True)
-
-            # step graphics
-            if self.enable_viewer_sync:
-                self.gym.step_graphics(self.sim)
-                self.gym.draw_viewer(self.viewer, self.sim, True)
-            else:
-                self.gym.poll_viewer_events(self.viewer)
-
-    def get_actor_params_info(self, dr_params, env):
-        """Returns a flat array of actor params, their names and ranges."""
-        if "actor_params" not in dr_params:
-            return None
-        params = []
-        names = []
-        lows = []
-        highs = []
-        param_getters_map = get_property_getter_map(self.gym)
-        for actor, actor_properties in dr_params["actor_params"].items():
-            handle = self.gym.find_actor_handle(env, actor)
-            for prop_name, prop_attrs in actor_properties.items():
-                if prop_name == 'color':
-                    continue  # this is set randomly
-                props = param_getters_map[prop_name](env, handle)
-                if not isinstance(props, list):
-                    props = [props]
-                for prop_idx, prop in enumerate(props):
-                    for attr, attr_randomization_params in prop_attrs.items():
-                        name = prop_name+'_'+str(prop_idx)+'_'+attr
-                        lo_hi = attr_randomization_params['range']
-                        distr = attr_randomization_params['distribution']
-                        if 'uniform' not in distr:
-                            lo_hi = (-1.0*float('Inf'), float('Inf'))
-                        if isinstance(prop, np.ndarray):
-                            for attr_idx in range(prop[attr].shape[0]):
-                                params.append(prop[attr][attr_idx])
-                                names.append(name+'_'+str(attr_idx))
-                                lows.append(lo_hi[0])
-                                highs.append(lo_hi[1])
-                        else:
-                            params.append(getattr(prop, attr))
-                            names.append(name)
-                            lows.append(lo_hi[0])
-                            highs.append(lo_hi[1])
-        return params, names, lows, highs
-
-    # Apply randomizations only on resets, due to current PhysX limitations
-    def apply_randomizations(self, dr_params):
-        # If we don't have a randomization frequency, randomize every step
-        rand_freq = dr_params.get("frequency", 1)
-
-        # First, determine what to randomize:
-        #   - non-environment parameters when > frequency steps have passed since the last non-environment
-        #   - physical environments in the reset buffer, which have exceeded the randomization frequency threshold
-        #   - on the first call, randomize everything
-        self.last_step = self.gym.get_frame_count(self.sim)
-        if self.first_randomization:
-            do_nonenv_randomize = True
-            env_ids = list(range(self.num_envs))
-        else:
-            do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq
-            rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf))
-            rand_envs = torch.logical_and(rand_envs, self.reset_buf)
-            env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist()
-            self.randomize_buf[rand_envs] = 0
-
-        if do_nonenv_randomize:
-            self.last_rand_step = self.last_step
-
-        param_setters_map = get_property_setter_map(self.gym)
-        param_setter_defaults_map = get_default_setter_args(self.gym)
-        param_getters_map = get_property_getter_map(self.gym)
-
-        # On first iteration, check the number of buckets
-        if self.first_randomization:
-            check_buckets(self.gym, self.envs, dr_params)
-
-        for nonphysical_param in ["observations", "actions"]:
-            if nonphysical_param in dr_params and do_nonenv_randomize:
-                dist = dr_params[nonphysical_param]["distribution"]
-                op_type = dr_params[nonphysical_param]["operation"]
-                sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None
-                sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None
-                op = operator.add if op_type == 'additive' else operator.mul
-
-                if sched_type == 'linear':
-                    sched_scaling = 1.0 / sched_step * \
-                        min(self.last_step, sched_step)
-                elif sched_type == 'constant':
-                    sched_scaling = 0 if self.last_step < sched_step else 1
-                else:
-                    sched_scaling = 1
-
-                if dist == 'gaussian':
-                    mu, var = dr_params[nonphysical_param]["range"]
-                    mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
-
-                    if op_type == 'additive':
-                        mu *= sched_scaling
-                        var *= sched_scaling
-                        mu_corr *= sched_scaling
-                        var_corr *= sched_scaling
-                    elif op_type == 'scaling':
-                        var = var * sched_scaling  # scale up var over time
-                        mu = mu * sched_scaling + 1.0 * \
-                            (1.0 - sched_scaling)  # linearly interpolate
-
-                        var_corr = var_corr * sched_scaling  # scale up var over time
-                        mu_corr = mu_corr * sched_scaling + 1.0 * \
-                            (1.0 - sched_scaling)  # linearly interpolate
-
-                    def noise_lambda(tensor, param_name=nonphysical_param):
-                        params = self.dr_randomizations[param_name]
-                        corr = params.get('corr', None)
-                        if corr is None:
-                            corr = torch.randn_like(tensor)
-                            params['corr'] = corr
-                        corr = corr * params['var_corr'] + params['mu_corr']
-                        return op(
-                            tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu'])
-
-                    self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda}
-
-                elif dist == 'uniform':
-                    lo, hi = dr_params[nonphysical_param]["range"]
-                    lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.])
-
-                    if op_type == 'additive':
-                        lo *= sched_scaling
-                        hi *= sched_scaling
-                        lo_corr *= sched_scaling
-                        hi_corr *= sched_scaling
-                    elif op_type == 'scaling':
-                        lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling)
-                        hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling)
-                        lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
-                        hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling)
-
-                    def noise_lambda(tensor, param_name=nonphysical_param):
-                        params = self.dr_randomizations[param_name]
-                        corr = params.get('corr', None)
-                        if corr is None:
-                            corr = torch.randn_like(tensor)
-                            params['corr'] = corr
-                        corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr']
-                        return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo'])
-
-                    self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda}
-
-        if "sim_params" in dr_params and do_nonenv_randomize:
-            prop_attrs = dr_params["sim_params"]
-            prop = self.gym.get_sim_params(self.sim)
-
-            if self.first_randomization:
-                self.original_props["sim_params"] = {
-                    attr: getattr(prop, attr) for attr in dir(prop)}
-
-            for attr, attr_randomization_params in prop_attrs.items():
-                apply_random_samples(
-                    prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step)
-
-            self.gym.set_sim_params(self.sim, prop)
-
-        # If self.actor_params_generator is initialized: use it to
-        # sample actor simulation params. This gives users the
-        # freedom to generate samples from arbitrary distributions,
-        # e.g. use full-covariance distributions instead of the DR's
-        # default of treating each simulation parameter independently.
-        extern_offsets = {}
-        if self.actor_params_generator is not None:
-            for env_id in env_ids:
-                self.extern_actor_params[env_id] = \
-                    self.actor_params_generator.sample()
-                extern_offsets[env_id] = 0
-
-        for actor, actor_properties in dr_params["actor_params"].items():
-            for env_id in env_ids:
-                env = self.envs[env_id]
-                handle = self.gym.find_actor_handle(env, actor)
-                extern_sample = self.extern_actor_params[env_id]
-
-                for prop_name, prop_attrs in actor_properties.items():
-                    if prop_name == 'color':
-                        num_bodies = self.gym.get_actor_rigid_body_count(
-                            env, handle)
-                        for n in range(num_bodies):
-                            self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL,
-                                                          gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1)))
-                        continue
-                    if prop_name == 'scale':
-                        attr_randomization_params = prop_attrs
-                        sample = generate_random_samples(attr_randomization_params, 1,
-                                                         self.last_step, None)
-                        og_scale = 1
-                        if attr_randomization_params['operation'] == 'scaling':
-                            new_scale = og_scale * sample
-                        elif attr_randomization_params['operation'] == 'additive':
-                            new_scale = og_scale + sample
-                        self.gym.set_actor_scale(env, handle, new_scale)
-                        continue
-
-                    prop = param_getters_map[prop_name](env, handle)
-                    if isinstance(prop, list):
-                        if self.first_randomization:
-                            self.original_props[prop_name] = [
-                                {attr: getattr(p, attr) for attr in dir(p)} for p in prop]
-                        for p, og_p in zip(prop, self.original_props[prop_name]):
-                            for attr, attr_randomization_params in prop_attrs.items():
-                                smpl = None
-                                if self.actor_params_generator is not None:
-                                    smpl, extern_offsets[env_id] = get_attr_val_from_sample(
-                                        extern_sample, extern_offsets[env_id], p, attr)
-                                apply_random_samples(
-                                    p, og_p, attr, attr_randomization_params,
-                                    self.last_step, smpl)
-                    else:
-                        if self.first_randomization:
-                            self.original_props[prop_name] = deepcopy(prop)
-                        for attr, attr_randomization_params in prop_attrs.items():
-                            smpl = None
-                            if self.actor_params_generator is not None:
-                                smpl, extern_offsets[env_id] = get_attr_val_from_sample(
-                                    extern_sample, extern_offsets[env_id], prop, attr)
-                            apply_random_samples(
-                                prop, self.original_props[prop_name], attr,
-                                attr_randomization_params, self.last_step, smpl)
-
-                    setter = param_setters_map[prop_name]
-                    default_args = param_setter_defaults_map[prop_name]
-                    setter(env, handle, prop, *default_args)
-
-        if self.actor_params_generator is not None:
-            for env_id in env_ids:  # check that we used all dims in sample
-                if extern_offsets[env_id] > 0:
-                    extern_sample = self.extern_actor_params[env_id]
-                    if extern_offsets[env_id] != extern_sample.shape[0]:
-                        print('env_id', env_id,
-                              'extern_offset', extern_offsets[env_id],
-                              'vs extern_sample.shape', extern_sample.shape)
-                        raise Exception("Invalid extern_sample size")
-
-        self.first_randomization = False
-
-    def pre_physics_step(self, actions):
-        raise NotImplementedError
-
-    def _physics_step(self):
-        for i in range(self.control_freq_inv):
-            self.render()
-            self.gym.simulate(self.sim)
-        return
-
-    def post_physics_step(self):
-        raise NotImplementedError
-
-
-def get_attr_val_from_sample(sample, offset, prop, attr):
-    """Retrieves param value for the given prop and attr from the sample."""
-    if sample is None:
-        return None, 0
-    if isinstance(prop, np.ndarray):
-        smpl = sample[offset:offset+prop[attr].shape[0]]
-        return smpl, offset+prop[attr].shape[0]
-    else:
-        return sample[offset], offset+1
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py
deleted file mode 100644
index 037fe95b..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py
+++ /dev/null
@@ -1,692 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import numpy as np
-import os
-import torch
-
-from isaacgym import gymtorch
-from isaacgym import gymapi
-from isaacgym.torch_utils import *
-
-from utils import torch_utils
-
-from tasks.base_task import BaseTask
-
-class Humanoid(BaseTask):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        self.cfg = cfg
-        self.sim_params = sim_params
-        self.physics_engine = physics_engine
-
-        self._pd_control = self.cfg["env"]["pdControl"]
-        self.power_scale = self.cfg["env"]["powerScale"]
-
-        self.debug_viz = self.cfg["env"]["enableDebugVis"]
-        self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"]
-        self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"]
-        self.plane_restitution = self.cfg["env"]["plane"]["restitution"]
-
-        self.max_episode_length = self.cfg["env"]["episodeLength"]
-        self._local_root_obs = self.cfg["env"]["localRootObs"]
-        self._root_height_obs = self.cfg["env"].get("rootHeightObs", True)
-        self._enable_early_termination = self.cfg["env"]["enableEarlyTermination"]
-        
-        key_bodies = self.cfg["env"]["keyBodies"]
-        self._setup_character_props(key_bodies)
-
-        self.cfg["env"]["numObservations"] = self.get_obs_size()
-        self.cfg["env"]["numActions"] = self.get_action_size()
-
-        self.cfg["device_type"] = device_type
-        self.cfg["device_id"] = device_id
-        self.cfg["headless"] = headless
-         
-        super().__init__(cfg=self.cfg)
-        
-        self.dt = self.control_freq_inv * sim_params.dt
-        
-        # get gym GPU state tensors
-        actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim)
-        dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim)
-        sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim)
-        rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim)
-        contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim)
-
-        sensors_per_env = 2
-        self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, sensors_per_env * 6)
-
-        dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim)
-        self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self.num_dof)
-        
-        self.gym.refresh_dof_state_tensor(self.sim)
-        self.gym.refresh_actor_root_state_tensor(self.sim)
-        self.gym.refresh_rigid_body_state_tensor(self.sim)
-        self.gym.refresh_net_contact_force_tensor(self.sim)
-
-        self._root_states = gymtorch.wrap_tensor(actor_root_state)
-        num_actors = self.get_num_actors_per_env()
-        
-        self._humanoid_root_states = self._root_states.view(self.num_envs, num_actors, actor_root_state.shape[-1])[..., 0, :]
-        self._initial_humanoid_root_states = self._humanoid_root_states.clone()
-        self._initial_humanoid_root_states[:, 7:13] = 0
-
-        self._humanoid_actor_ids = num_actors * torch.arange(self.num_envs, device=self.device, dtype=torch.int32)
-
-        # create some wrapper tensors for different slices
-        self._dof_state = gymtorch.wrap_tensor(dof_state_tensor)
-        dofs_per_env = self._dof_state.shape[0] // self.num_envs
-        self._dof_pos = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 0]
-        self._dof_vel = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 1]
-        
-        self._initial_dof_pos = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float)
-        self._initial_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)
-        
-        self._rigid_body_state = gymtorch.wrap_tensor(rigid_body_state)
-        bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs
-        rigid_body_state_reshaped = self._rigid_body_state.view(self.num_envs, bodies_per_env, 13)
-
-        self._rigid_body_pos = rigid_body_state_reshaped[..., :self.num_bodies, 0:3]
-        self._rigid_body_rot = rigid_body_state_reshaped[..., :self.num_bodies, 3:7]
-        self._rigid_body_vel = rigid_body_state_reshaped[..., :self.num_bodies, 7:10]
-        self._rigid_body_ang_vel = rigid_body_state_reshaped[..., :self.num_bodies, 10:13]
-
-        contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor)
-        self._contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., :self.num_bodies, :]
-        
-        self._terminate_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long)
-        
-        self._build_termination_heights()
-        
-        contact_bodies = self.cfg["env"]["contactBodies"]
-        self._key_body_ids = self._build_key_body_ids_tensor(key_bodies)
-        self._contact_body_ids = self._build_contact_body_ids_tensor(contact_bodies)
-        
-        if self.viewer != None:
-            self._init_camera()
-            
-        return
-
-    def get_obs_size(self):
-        return self._num_obs
-
-    def get_action_size(self):
-        return self._num_actions
-
-    def get_num_actors_per_env(self):
-        num_actors = self._root_states.shape[0] // self.num_envs
-        return num_actors
-
-    def create_sim(self):
-        self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z')
-        self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params)
-
-        self._create_ground_plane()
-        self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs)))
-        return
-
-    def reset(self, env_ids=None):
-        if (env_ids is None):
-            env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
-        self._reset_envs(env_ids)
-        return
-
-    def set_char_color(self, col, env_ids):
-        for env_id in env_ids:
-            env_ptr = self.envs[env_id]
-            handle = self.humanoid_handles[env_id]
-
-            for j in range(self.num_bodies):
-                self.gym.set_rigid_body_color(env_ptr, handle, j, gymapi.MESH_VISUAL,
-                                              gymapi.Vec3(col[0], col[1], col[2]))
-
-        return
-
-    def _reset_envs(self, env_ids):
-        if (len(env_ids) > 0):
-            self._reset_actors(env_ids)
-            self._reset_env_tensors(env_ids)
-            self._refresh_sim_tensors()
-            self._compute_observations(env_ids)
-        return
-
-    def _reset_env_tensors(self, env_ids):
-        env_ids_int32 = self._humanoid_actor_ids[env_ids]
-        self.gym.set_actor_root_state_tensor_indexed(self.sim,
-                                                     gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        self.gym.set_dof_state_tensor_indexed(self.sim,
-                                              gymtorch.unwrap_tensor(self._dof_state),
-                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        
-        self.progress_buf[env_ids] = 0
-        self.reset_buf[env_ids] = 0
-        self._terminate_buf[env_ids] = 0
-        return
-
-    def _create_ground_plane(self):
-        plane_params = gymapi.PlaneParams()
-        plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0)
-        plane_params.static_friction = self.plane_static_friction
-        plane_params.dynamic_friction = self.plane_dynamic_friction
-        plane_params.restitution = self.plane_restitution
-        self.gym.add_ground(self.sim, plane_params)
-        return
-
-    def _setup_character_props(self, key_bodies):
-        asset_file = self.cfg["env"]["asset"]["assetFileName"]
-        num_key_bodies = len(key_bodies)
-
-        if (asset_file == "mjcf/amp_humanoid.xml"):
-            self._dof_body_ids = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14]
-            self._dof_offsets = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28]
-            self._dof_obs_size = 72
-            self._num_actions = 28
-            self._num_obs = 1 + 15 * (3 + 6 + 3 + 3) - 3
-            
-        elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
-            self._dof_body_ids = [1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16]
-            self._dof_offsets = [0, 3, 6, 9, 10, 13, 16, 17, 20, 21, 24, 27, 28, 31]
-            self._dof_obs_size = 78
-            self._num_actions = 31
-            self._num_obs = 1 + 17 * (3 + 6 + 3 + 3) - 3
-
-        else:
-            print("Unsupported character config file: {s}".format(asset_file))
-            assert(False)
-
-        return
-
-    def _build_termination_heights(self):
-        head_term_height = 0.3
-        shield_term_height = 0.32
-
-        termination_height = self.cfg["env"]["terminationHeight"]
-        self._termination_heights = np.array([termination_height] * self.num_bodies)
-
-        head_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "head")
-        self._termination_heights[head_id] = max(head_term_height, self._termination_heights[head_id])
-
-        asset_file = self.cfg["env"]["asset"]["assetFileName"]
-        if (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
-            left_arm_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "left_lower_arm")
-            self._termination_heights[left_arm_id] = max(shield_term_height, self._termination_heights[left_arm_id])
-        
-        self._termination_heights = to_torch(self._termination_heights, device=self.device)
-        return
-
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        lower = gymapi.Vec3(-spacing, -spacing, 0.0)
-        upper = gymapi.Vec3(spacing, spacing, spacing)
-
-        asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets"
-        asset_file = self.cfg["env"]["asset"]["assetFileName"]
-
-        asset_path = os.path.join(asset_root, asset_file)
-        asset_root = os.path.dirname(asset_path)
-        asset_file = os.path.basename(asset_path)
-
-        asset_options = gymapi.AssetOptions()
-        asset_options.angular_damping = 0.01
-        asset_options.max_angular_velocity = 100.0
-        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-        #asset_options.fix_base_link = True
-        humanoid_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
-
-        actuator_props = self.gym.get_asset_actuator_properties(humanoid_asset)
-        motor_efforts = [prop.motor_effort for prop in actuator_props]
-        
-        # create force sensors at the feet
-        right_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "right_foot")
-        left_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "left_foot")
-        sensor_pose = gymapi.Transform()
-
-        self.gym.create_asset_force_sensor(humanoid_asset, right_foot_idx, sensor_pose)
-        self.gym.create_asset_force_sensor(humanoid_asset, left_foot_idx, sensor_pose)
-
-        self.max_motor_effort = max(motor_efforts)
-        self.motor_efforts = to_torch(motor_efforts, device=self.device)
-
-        self.torso_index = 0
-        self.num_bodies = self.gym.get_asset_rigid_body_count(humanoid_asset)
-        self.num_dof = self.gym.get_asset_dof_count(humanoid_asset)
-        self.num_joints = self.gym.get_asset_joint_count(humanoid_asset)
-
-        self.humanoid_handles = []
-        self.envs = []
-        self.dof_limits_lower = []
-        self.dof_limits_upper = []
-        
-        for i in range(self.num_envs):
-            # create env instance
-            env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row)
-            self._build_env(i, env_ptr, humanoid_asset)
-            self.envs.append(env_ptr)
-
-        dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.humanoid_handles[0])
-        for j in range(self.num_dof):
-            if dof_prop['lower'][j] > dof_prop['upper'][j]:
-                self.dof_limits_lower.append(dof_prop['upper'][j])
-                self.dof_limits_upper.append(dof_prop['lower'][j])
-            else:
-                self.dof_limits_lower.append(dof_prop['lower'][j])
-                self.dof_limits_upper.append(dof_prop['upper'][j])
-
-        self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device)
-        self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device)
-
-        if (self._pd_control):
-            self._build_pd_action_offset_scale()
-
-        return
-    
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        col_group = env_id
-        col_filter = self._get_humanoid_collision_filter()
-        segmentation_id = 0
-
-        start_pose = gymapi.Transform()
-        asset_file = self.cfg["env"]["asset"]["assetFileName"]
-        char_h = 0.89
-
-        start_pose.p = gymapi.Vec3(*get_axis_params(char_h, self.up_axis_idx))
-        start_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0)
-
-        humanoid_handle = self.gym.create_actor(env_ptr, humanoid_asset, start_pose, "humanoid", col_group, col_filter, segmentation_id)
-
-        self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle)
-
-        for j in range(self.num_bodies):
-            self.gym.set_rigid_body_color(env_ptr, humanoid_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.54, 0.85, 0.2))
-
-        if (self._pd_control):
-            dof_prop = self.gym.get_asset_dof_properties(humanoid_asset)
-            dof_prop["driveMode"] = gymapi.DOF_MODE_POS
-            self.gym.set_actor_dof_properties(env_ptr, humanoid_handle, dof_prop)
-
-        self.humanoid_handles.append(humanoid_handle)
-
-        return
-
-    def _build_pd_action_offset_scale(self):
-        num_joints = len(self._dof_offsets) - 1
-        
-        lim_low = self.dof_limits_lower.cpu().numpy()
-        lim_high = self.dof_limits_upper.cpu().numpy()
-
-        for j in range(num_joints):
-            dof_offset = self._dof_offsets[j]
-            dof_size = self._dof_offsets[j + 1] - self._dof_offsets[j]
-
-            if (dof_size == 3):
-                curr_low = lim_low[dof_offset:(dof_offset + dof_size)]
-                curr_high = lim_high[dof_offset:(dof_offset + dof_size)]
-                curr_low = np.max(np.abs(curr_low))
-                curr_high = np.max(np.abs(curr_high))
-                curr_scale = max([curr_low, curr_high])
-                curr_scale = 1.2 * curr_scale
-                curr_scale = min([curr_scale, np.pi])
-
-                lim_low[dof_offset:(dof_offset + dof_size)] = -curr_scale
-                lim_high[dof_offset:(dof_offset + dof_size)] = curr_scale
-                
-                #lim_low[dof_offset:(dof_offset + dof_size)] = -np.pi
-                #lim_high[dof_offset:(dof_offset + dof_size)] = np.pi
-
-
-            elif (dof_size == 1):
-                curr_low = lim_low[dof_offset]
-                curr_high = lim_high[dof_offset]
-                curr_mid = 0.5 * (curr_high + curr_low)
-                
-                # extend the action range to be a bit beyond the joint limits so that the motors
-                # don't lose their strength as they approach the joint limits
-                curr_scale = 0.7 * (curr_high - curr_low)
-                curr_low = curr_mid - curr_scale
-                curr_high = curr_mid + curr_scale
-
-                lim_low[dof_offset] = curr_low
-                lim_high[dof_offset] =  curr_high
-
-        self._pd_action_offset = 0.5 * (lim_high + lim_low)
-        self._pd_action_scale = 0.5 * (lim_high - lim_low)
-        self._pd_action_offset = to_torch(self._pd_action_offset, device=self.device)
-        self._pd_action_scale = to_torch(self._pd_action_scale, device=self.device)
-
-        return
-
-    def _get_humanoid_collision_filter(self):
-        return 0
-
-    def _compute_reward(self, actions):
-        self.rew_buf[:] = compute_humanoid_reward(self.obs_buf)
-        return
-
-    def _compute_reset(self):
-        self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf,
-                                                   self._contact_forces, self._contact_body_ids,
-                                                   self._rigid_body_pos, self.max_episode_length,
-                                                   self._enable_early_termination, self._termination_heights)
-        return
-
-    def _refresh_sim_tensors(self):
-        self.gym.refresh_dof_state_tensor(self.sim)
-        self.gym.refresh_actor_root_state_tensor(self.sim)
-        self.gym.refresh_rigid_body_state_tensor(self.sim)
-
-        self.gym.refresh_force_sensor_tensor(self.sim)
-        self.gym.refresh_dof_force_tensor(self.sim)
-        self.gym.refresh_net_contact_force_tensor(self.sim)
-        return
-
-    def _compute_observations(self, env_ids=None):
-        obs = self._compute_humanoid_obs(env_ids)
-
-        if (env_ids is None):
-            self.obs_buf[:] = obs
-        else:
-            self.obs_buf[env_ids] = obs
-
-        return
-
-    def _compute_humanoid_obs(self, env_ids=None):
-        if (env_ids is None):
-            body_pos = self._rigid_body_pos
-            body_rot = self._rigid_body_rot
-            body_vel = self._rigid_body_vel
-            body_ang_vel = self._rigid_body_ang_vel
-        else:
-            body_pos = self._rigid_body_pos[env_ids]
-            body_rot = self._rigid_body_rot[env_ids]
-            body_vel = self._rigid_body_vel[env_ids]
-            body_ang_vel = self._rigid_body_ang_vel[env_ids]
-        
-        obs = compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, self._local_root_obs,
-                                                self._root_height_obs)
-        return obs
-
-    def _reset_actors(self, env_ids):
-        self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids]
-        self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
-        self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
-        return
-
-    def pre_physics_step(self, actions):
-        self.actions = actions.to(self.device).clone()
-        if (self._pd_control):
-            pd_tar = self._action_to_pd_targets(self.actions)
-            pd_tar_tensor = gymtorch.unwrap_tensor(pd_tar)
-            self.gym.set_dof_position_target_tensor(self.sim, pd_tar_tensor)
-        else:
-            forces = self.actions * self.motor_efforts.unsqueeze(0) * self.power_scale
-            force_tensor = gymtorch.unwrap_tensor(forces)
-            self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor)
-
-        return
-
-    def post_physics_step(self):
-        self.progress_buf += 1
-
-        self._refresh_sim_tensors()
-        self._compute_observations()
-        self._compute_reward(self.actions)
-        self._compute_reset()
-        
-        self.extras["terminate"] = self._terminate_buf
-
-        # debug viz
-        if self.viewer and self.debug_viz:
-            self._update_debug_viz()
-
-        return
-
-    def render(self, sync_frame_time=False):
-        if self.viewer:
-            self._update_camera()
-
-        super().render(sync_frame_time)
-        return
-
-    def _build_key_body_ids_tensor(self, key_body_names):
-        env_ptr = self.envs[0]
-        actor_handle = self.humanoid_handles[0]
-        body_ids = []
-
-        for body_name in key_body_names:
-            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
-            assert(body_id != -1)
-            body_ids.append(body_id)
-
-        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
-        return body_ids
-
-    def _build_contact_body_ids_tensor(self, contact_body_names):
-        env_ptr = self.envs[0]
-        actor_handle = self.humanoid_handles[0]
-        body_ids = []
-
-        for body_name in contact_body_names:
-            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
-            assert(body_id != -1)
-            body_ids.append(body_id)
-
-        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
-        return body_ids
-
-    def _action_to_pd_targets(self, action):
-        pd_tar = self._pd_action_offset + self._pd_action_scale * action
-        return pd_tar
-
-    def _init_camera(self):
-        self.gym.refresh_actor_root_state_tensor(self.sim)
-        self._cam_prev_char_pos = self._humanoid_root_states[0, 0:3].cpu().numpy()
-        
-        cam_pos = gymapi.Vec3(self._cam_prev_char_pos[0], 
-                              self._cam_prev_char_pos[1] - 3.0, 
-                              1.0)
-        cam_target = gymapi.Vec3(self._cam_prev_char_pos[0],
-                                 self._cam_prev_char_pos[1],
-                                 1.0)
-        self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target)
-        return
-
-    def _update_camera(self):
-        self.gym.refresh_actor_root_state_tensor(self.sim)
-        char_root_pos = self._humanoid_root_states[0, 0:3].cpu().numpy()
-        
-        cam_trans = self.gym.get_viewer_camera_transform(self.viewer, None)
-        cam_pos = np.array([cam_trans.p.x, cam_trans.p.y, cam_trans.p.z])
-        cam_delta = cam_pos - self._cam_prev_char_pos
-
-        new_cam_target = gymapi.Vec3(char_root_pos[0], char_root_pos[1], 1.0)
-        new_cam_pos = gymapi.Vec3(char_root_pos[0] + cam_delta[0], 
-                                  char_root_pos[1] + cam_delta[1], 
-                                  cam_pos[2])
-
-        self.gym.viewer_camera_look_at(self.viewer, None, new_cam_pos, new_cam_target)
-
-        self._cam_prev_char_pos[:] = char_root_pos
-        return
-
-    def _update_debug_viz(self):
-        self.gym.clear_lines(self.viewer)
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def dof_to_obs(pose, dof_obs_size, dof_offsets):
-    # type: (Tensor, int, List[int]) -> Tensor
-    joint_obs_size = 6
-    num_joints = len(dof_offsets) - 1
-
-    dof_obs_shape = pose.shape[:-1] + (dof_obs_size,)
-    dof_obs = torch.zeros(dof_obs_shape, device=pose.device)
-    dof_obs_offset = 0
-
-    for j in range(num_joints):
-        dof_offset = dof_offsets[j]
-        dof_size = dof_offsets[j + 1] - dof_offsets[j]
-        joint_pose = pose[:, dof_offset:(dof_offset + dof_size)]
-
-        # assume this is a spherical joint
-        if (dof_size == 3):
-            joint_pose_q = torch_utils.exp_map_to_quat(joint_pose)
-        elif (dof_size == 1):
-            axis = torch.tensor([0.0, 1.0, 0.0], dtype=joint_pose.dtype, device=pose.device)
-            joint_pose_q = quat_from_angle_axis(joint_pose[..., 0], axis)
-        else:
-            joint_pose_q = None
-            assert(False), "Unsupported joint type"
-
-        joint_dof_obs = torch_utils.quat_to_tan_norm(joint_pose_q)
-        dof_obs[:, (j * joint_obs_size):((j + 1) * joint_obs_size)] = joint_dof_obs
-
-    assert((num_joints * joint_obs_size) == dof_obs_size)
-
-    return dof_obs
-
-@torch.jit.script
-def compute_humanoid_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos,
-                                  local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
-    root_h = root_pos[:, 2:3]
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-
-    if (local_root_obs):
-        root_rot_obs = quat_mul(heading_rot, root_rot)
-    else:
-        root_rot_obs = root_rot
-    root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
-    
-    if (not root_height_obs):
-        root_h_obs = torch.zeros_like(root_h)
-    else:
-        root_h_obs = root_h
-    
-    local_root_vel = quat_rotate(heading_rot, root_vel)
-    local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)
-
-    root_pos_expand = root_pos.unsqueeze(-2)
-    local_key_body_pos = key_body_pos - root_pos_expand
-    
-    heading_rot_expand = heading_rot.unsqueeze(-2)
-    heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
-    flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
-    flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
-                                               heading_rot_expand.shape[2])
-    local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
-    flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])
-
-    dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)
-
-    obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
-    return obs
-
-@torch.jit.script
-def compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, local_root_obs, root_height_obs):
-    # type: (Tensor, Tensor, Tensor, Tensor, bool, bool) -> Tensor
-    root_pos = body_pos[:, 0, :]
-    root_rot = body_rot[:, 0, :]
-
-    root_h = root_pos[:, 2:3]
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-    
-    if (not root_height_obs):
-        root_h_obs = torch.zeros_like(root_h)
-    else:
-        root_h_obs = root_h
-    
-    heading_rot_expand = heading_rot.unsqueeze(-2)
-    heading_rot_expand = heading_rot_expand.repeat((1, body_pos.shape[1], 1))
-    flat_heading_rot = heading_rot_expand.reshape(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
-                                               heading_rot_expand.shape[2])
-    
-    root_pos_expand = root_pos.unsqueeze(-2)
-    local_body_pos = body_pos - root_pos_expand
-    flat_local_body_pos = local_body_pos.reshape(local_body_pos.shape[0] * local_body_pos.shape[1], local_body_pos.shape[2])
-    flat_local_body_pos = quat_rotate(flat_heading_rot, flat_local_body_pos)
-    local_body_pos = flat_local_body_pos.reshape(local_body_pos.shape[0], local_body_pos.shape[1] * local_body_pos.shape[2])
-    local_body_pos = local_body_pos[..., 3:] # remove root pos
-
-    flat_body_rot = body_rot.reshape(body_rot.shape[0] * body_rot.shape[1], body_rot.shape[2])
-    flat_local_body_rot = quat_mul(flat_heading_rot, flat_body_rot)
-    flat_local_body_rot_obs = torch_utils.quat_to_tan_norm(flat_local_body_rot)
-    local_body_rot_obs = flat_local_body_rot_obs.reshape(body_rot.shape[0], body_rot.shape[1] * flat_local_body_rot_obs.shape[1])
-    
-    if (local_root_obs):
-        root_rot_obs = torch_utils.quat_to_tan_norm(root_rot)
-        local_body_rot_obs[..., 0:6] = root_rot_obs
-
-    flat_body_vel = body_vel.reshape(body_vel.shape[0] * body_vel.shape[1], body_vel.shape[2])
-    flat_local_body_vel = quat_rotate(flat_heading_rot, flat_body_vel)
-    local_body_vel = flat_local_body_vel.reshape(body_vel.shape[0], body_vel.shape[1] * body_vel.shape[2])
-    
-    flat_body_ang_vel = body_ang_vel.reshape(body_ang_vel.shape[0] * body_ang_vel.shape[1], body_ang_vel.shape[2])
-    flat_local_body_ang_vel = quat_rotate(flat_heading_rot, flat_body_ang_vel)
-    local_body_ang_vel = flat_local_body_ang_vel.reshape(body_ang_vel.shape[0], body_ang_vel.shape[1] * body_ang_vel.shape[2])
-    
-    obs = torch.cat((root_h_obs, local_body_pos, local_body_rot_obs, local_body_vel, local_body_ang_vel), dim=-1)
-    return obs
-
-
-@torch.jit.script
-def compute_humanoid_reward(obs_buf):
-    # type: (Tensor) -> Tensor
-    reward = torch.ones_like(obs_buf[:, 0])
-    return reward
-
-@torch.jit.script
-def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos,
-                           max_episode_length, enable_early_termination, termination_heights):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor]
-    terminated = torch.zeros_like(reset_buf)
-
-    if (enable_early_termination):
-        masked_contact_buf = contact_buf.clone()
-        masked_contact_buf[:, contact_body_ids, :] = 0
-        fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
-        fall_contact = torch.any(fall_contact, dim=-1)
-
-        body_height = rigid_body_pos[..., 2]
-        fall_height = body_height < termination_heights
-        fall_height[:, contact_body_ids] = False
-        fall_height = torch.any(fall_height, dim=-1)
-
-        has_fallen = torch.logical_and(fall_contact, fall_height)
-
-        # first timestep can sometimes still have nonzero contact forces
-        # so only check after first couple of steps
-        has_fallen *= (progress_buf > 1)
-        terminated = torch.where(has_fallen, torch.ones_like(reset_buf), terminated)
-    
-    reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
-
-    return reset, terminated
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py
deleted file mode 100644
index 7022bf76..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py
+++ /dev/null
@@ -1,344 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from enum import Enum
-import numpy as np
-import torch
-
-from isaacgym import gymapi
-from isaacgym import gymtorch
-
-from tasks.humanoid import Humanoid, dof_to_obs
-from utils import gym_util
-from utils.motion_lib import MotionLib
-from isaacgym.torch_utils import *
-
-from utils import torch_utils
-
-class HumanoidAMP(Humanoid):
-    class StateInit(Enum):
-        Default = 0
-        Start = 1
-        Random = 2
-        Hybrid = 3
-
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        state_init = cfg["env"]["stateInit"]
-        self._state_init = HumanoidAMP.StateInit[state_init]
-        self._hybrid_init_prob = cfg["env"]["hybridInitProb"]
-        self._num_amp_obs_steps = cfg["env"]["numAMPObsSteps"]
-        assert(self._num_amp_obs_steps >= 2)
-
-        self._reset_default_env_ids = []
-        self._reset_ref_env_ids = []
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-
-        motion_file = cfg['env']['motion_file']
-        self._load_motion(motion_file)
-
-        self._amp_obs_buf = torch.zeros((self.num_envs, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float)
-        self._curr_amp_obs_buf = self._amp_obs_buf[:, 0]
-        self._hist_amp_obs_buf = self._amp_obs_buf[:, 1:]
-        
-        self._amp_obs_demo_buf = None
-
-        return
-
-    def post_physics_step(self):
-        super().post_physics_step()
-        
-        self._update_hist_amp_obs()
-        self._compute_amp_observations()
-
-        amp_obs_flat = self._amp_obs_buf.view(-1, self.get_num_amp_obs())
-        self.extras["amp_obs"] = amp_obs_flat
-
-        return
-
-    def get_num_amp_obs(self):
-        return self._num_amp_obs_steps * self._num_amp_obs_per_step
-
-    def fetch_amp_obs_demo(self, num_samples):
-
-        if (self._amp_obs_demo_buf is None):
-            self._build_amp_obs_demo_buf(num_samples)
-        else:
-            assert(self._amp_obs_demo_buf.shape[0] == num_samples)
-        
-        motion_ids = self._motion_lib.sample_motions(num_samples)
-        
-        # since negative times are added to these values in build_amp_obs_demo,
-        # we shift them into the range [0 + truncate_time, end of clip]
-        truncate_time = self.dt * (self._num_amp_obs_steps - 1)
-        motion_times0 = self._motion_lib.sample_time(motion_ids, truncate_time=truncate_time)
-        motion_times0 += truncate_time
-
-        amp_obs_demo = self.build_amp_obs_demo(motion_ids, motion_times0)
-        self._amp_obs_demo_buf[:] = amp_obs_demo.view(self._amp_obs_demo_buf.shape)
-        amp_obs_demo_flat = self._amp_obs_demo_buf.view(-1, self.get_num_amp_obs())
-
-        return amp_obs_demo_flat
-
-    def build_amp_obs_demo(self, motion_ids, motion_times0):
-        dt = self.dt
-
-        motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps])
-        motion_times = motion_times0.unsqueeze(-1)
-        time_steps = -dt * torch.arange(0, self._num_amp_obs_steps, device=self.device)
-        motion_times = motion_times + time_steps
-
-        motion_ids = motion_ids.view(-1)
-        motion_times = motion_times.view(-1)
-        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
-               = self._motion_lib.get_motion_state(motion_ids, motion_times)
-        amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel,
-                                              dof_pos, dof_vel, key_pos,
-                                              self._local_root_obs, self._root_height_obs,
-                                              self._dof_obs_size, self._dof_offsets)
-        return amp_obs_demo
-
-    def _build_amp_obs_demo_buf(self, num_samples):
-        self._amp_obs_demo_buf = torch.zeros((num_samples, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float32)
-        return
-        
-    def _setup_character_props(self, key_bodies):
-        super()._setup_character_props(key_bodies)
-
-        asset_file = self.cfg["env"]["asset"]["assetFileName"]
-        num_key_bodies = len(key_bodies)
-
-        if (asset_file == "mjcf/amp_humanoid.xml"):
-            self._num_amp_obs_per_step = 13 + self._dof_obs_size + 28 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
-        elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"):
-            self._num_amp_obs_per_step = 13 + self._dof_obs_size + 31 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos]
-        else:
-            print("Unsupported character config file: {s}".format(asset_file))
-            assert(False)
-
-        return
-
-    def _load_motion(self, motion_file):
-        assert(self._dof_offsets[-1] == self.num_dof)
-        self._motion_lib = MotionLib(motion_file=motion_file,
-                                     dof_body_ids=self._dof_body_ids,
-                                     dof_offsets=self._dof_offsets,
-                                     key_body_ids=self._key_body_ids.cpu().numpy(), 
-                                     device=self.device)
-        return
-    
-    def _reset_envs(self, env_ids):
-        self._reset_default_env_ids = []
-        self._reset_ref_env_ids = []
-
-        super()._reset_envs(env_ids)
-        self._init_amp_obs(env_ids)
-
-        return
-
-    def _reset_actors(self, env_ids):
-        if (self._state_init == HumanoidAMP.StateInit.Default):
-            self._reset_default(env_ids)
-        elif (self._state_init == HumanoidAMP.StateInit.Start
-              or self._state_init == HumanoidAMP.StateInit.Random):
-            self._reset_ref_state_init(env_ids)
-        elif (self._state_init == HumanoidAMP.StateInit.Hybrid):
-            self._reset_hybrid_state_init(env_ids)
-        else:
-            assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))
-        return
-    
-    def _reset_default(self, env_ids):
-        self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids]
-        self._dof_pos[env_ids] = self._initial_dof_pos[env_ids]
-        self._dof_vel[env_ids] = self._initial_dof_vel[env_ids]
-        self._reset_default_env_ids = env_ids
-        return
-
-    def _reset_ref_state_init(self, env_ids):
-        num_envs = env_ids.shape[0]
-        motion_ids = self._motion_lib.sample_motions(num_envs)
-        
-        if (self._state_init == HumanoidAMP.StateInit.Random
-            or self._state_init == HumanoidAMP.StateInit.Hybrid):
-            motion_times = self._motion_lib.sample_time(motion_ids)
-        elif (self._state_init == HumanoidAMP.StateInit.Start):
-            motion_times = torch.zeros(num_envs, device=self.device)
-        else:
-            assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init))
-
-        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
-               = self._motion_lib.get_motion_state(motion_ids, motion_times)
-
-        self._set_env_state(env_ids=env_ids, 
-                            root_pos=root_pos, 
-                            root_rot=root_rot, 
-                            dof_pos=dof_pos, 
-                            root_vel=root_vel, 
-                            root_ang_vel=root_ang_vel, 
-                            dof_vel=dof_vel)
-
-        self._reset_ref_env_ids = env_ids
-        self._reset_ref_motion_ids = motion_ids
-        self._reset_ref_motion_times = motion_times
-        return
-
-    def _reset_hybrid_state_init(self, env_ids):
-        num_envs = env_ids.shape[0]
-        ref_probs = to_torch(np.array([self._hybrid_init_prob] * num_envs), device=self.device)
-        ref_init_mask = torch.bernoulli(ref_probs) == 1.0
-
-        ref_reset_ids = env_ids[ref_init_mask]
-        if (len(ref_reset_ids) > 0):
-            self._reset_ref_state_init(ref_reset_ids)
-
-        default_reset_ids = env_ids[torch.logical_not(ref_init_mask)]
-        if (len(default_reset_ids) > 0):
-            self._reset_default(default_reset_ids)
-
-        return
-
-    def _init_amp_obs(self, env_ids):
-        self._compute_amp_observations(env_ids)
-
-        if (len(self._reset_default_env_ids) > 0):
-            self._init_amp_obs_default(self._reset_default_env_ids)
-
-        if (len(self._reset_ref_env_ids) > 0):
-            self._init_amp_obs_ref(self._reset_ref_env_ids, self._reset_ref_motion_ids,
-                                   self._reset_ref_motion_times)
-        
-        return
-
-    def _init_amp_obs_default(self, env_ids):
-        curr_amp_obs = self._curr_amp_obs_buf[env_ids].unsqueeze(-2)
-        self._hist_amp_obs_buf[env_ids] = curr_amp_obs
-        return
-
-    def _init_amp_obs_ref(self, env_ids, motion_ids, motion_times):
-        dt = self.dt
-        motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps - 1])
-        motion_times = motion_times.unsqueeze(-1)
-        time_steps = -dt * (torch.arange(0, self._num_amp_obs_steps - 1, device=self.device) + 1)
-        motion_times = motion_times + time_steps
-
-        motion_ids = motion_ids.view(-1)
-        motion_times = motion_times.view(-1)
-        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
-               = self._motion_lib.get_motion_state(motion_ids, motion_times)
-        amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, 
-                                              dof_pos, dof_vel, key_pos, 
-                                              self._local_root_obs, self._root_height_obs, 
-                                              self._dof_obs_size, self._dof_offsets)
-        self._hist_amp_obs_buf[env_ids] = amp_obs_demo.view(self._hist_amp_obs_buf[env_ids].shape)
-        return
-    
-    def _set_env_state(self, env_ids, root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel):
-        self._humanoid_root_states[env_ids, 0:3] = root_pos
-        self._humanoid_root_states[env_ids, 3:7] = root_rot
-        self._humanoid_root_states[env_ids, 7:10] = root_vel
-        self._humanoid_root_states[env_ids, 10:13] = root_ang_vel
-        
-        self._dof_pos[env_ids] = dof_pos
-        self._dof_vel[env_ids] = dof_vel
-        return
-
-    def _update_hist_amp_obs(self, env_ids=None):
-        if (env_ids is None):
-            for i in reversed(range(self._amp_obs_buf.shape[1] - 1)):
-                self._amp_obs_buf[:, i + 1] = self._amp_obs_buf[:, i]
-        else:
-            for i in reversed(range(self._amp_obs_buf.shape[1] - 1)):
-                self._amp_obs_buf[env_ids, i + 1] = self._amp_obs_buf[env_ids, i]
-        return
-    
-    def _compute_amp_observations(self, env_ids=None):
-        key_body_pos = self._rigid_body_pos[:, self._key_body_ids, :]
-        if (env_ids is None):
-            self._curr_amp_obs_buf[:] = build_amp_observations(self._rigid_body_pos[:, 0, :],
-                                                               self._rigid_body_rot[:, 0, :],
-                                                               self._rigid_body_vel[:, 0, :],
-                                                               self._rigid_body_ang_vel[:, 0, :],
-                                                               self._dof_pos, self._dof_vel, key_body_pos,
-                                                               self._local_root_obs, self._root_height_obs, 
-                                                               self._dof_obs_size, self._dof_offsets)
-        else:
-            self._curr_amp_obs_buf[env_ids] = build_amp_observations(self._rigid_body_pos[env_ids][:, 0, :],
-                                                                   self._rigid_body_rot[env_ids][:, 0, :],
-                                                                   self._rigid_body_vel[env_ids][:, 0, :],
-                                                                   self._rigid_body_ang_vel[env_ids][:, 0, :],
-                                                                   self._dof_pos[env_ids], self._dof_vel[env_ids], key_body_pos[env_ids],
-                                                                   self._local_root_obs, self._root_height_obs, 
-                                                                   self._dof_obs_size, self._dof_offsets)
-        return
-
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, 
-                           local_root_obs, root_height_obs, dof_obs_size, dof_offsets):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor
-    root_h = root_pos[:, 2:3]
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-
-    if (local_root_obs):
-        root_rot_obs = quat_mul(heading_rot, root_rot)
-    else:
-        root_rot_obs = root_rot
-    root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs)
-    
-    if (not root_height_obs):
-        root_h_obs = torch.zeros_like(root_h)
-    else:
-        root_h_obs = root_h
-    
-    local_root_vel = quat_rotate(heading_rot, root_vel)
-    local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel)
-
-    root_pos_expand = root_pos.unsqueeze(-2)
-    local_key_body_pos = key_body_pos - root_pos_expand
-    
-    heading_rot_expand = heading_rot.unsqueeze(-2)
-    heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1))
-    flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2])
-    flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], 
-                                               heading_rot_expand.shape[2])
-    local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos)
-    flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2])
-    
-    dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets)
-    obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1)
-    return obs
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py
deleted file mode 100644
index 1091a801..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-from isaacgym import gymapi
-from isaacgym import gymtorch
-
-from tasks.humanoid_amp import HumanoidAMP
-from isaacgym.torch_utils import *
-
-from utils import torch_utils
-from utils import gym_util
-
-
-class HumanoidAMPGetup(HumanoidAMP):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        
-        self._recovery_episode_prob = cfg["env"]["recoveryEpisodeProb"]
-        self._recovery_steps = cfg["env"]["recoverySteps"]
-        self._fall_init_prob = cfg["env"]["fallInitProb"]
-
-        self._reset_fall_env_ids = []
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._recovery_counter = torch.zeros(self.num_envs, device=self.device, dtype=torch.int)
-
-        self._generate_fall_states()
-
-        return
-
-    
-    def pre_physics_step(self, actions):
-        super().pre_physics_step(actions)
-
-        self._update_recovery_count()
-        return
-
-    def _generate_fall_states(self):
-        max_steps = 150
-        
-        env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long)
-        root_states = self._initial_humanoid_root_states[env_ids].clone()
-        root_states[..., 3:7] = torch.randn_like(root_states[..., 3:7])
-        root_states[..., 3:7] = torch.nn.functional.normalize(root_states[..., 3:7], dim=-1)
-        self._humanoid_root_states[env_ids] = root_states
-        
-        env_ids_int32 = self._humanoid_actor_ids[env_ids]
-        self.gym.set_actor_root_state_tensor_indexed(self.sim,
-                                                     gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        self.gym.set_dof_state_tensor_indexed(self.sim,
-                                              gymtorch.unwrap_tensor(self._dof_state),
-                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-
-
-        rand_actions = np.random.uniform(-0.5, 0.5, size=[self.num_envs, self.get_action_size()])
-        rand_actions = to_torch(rand_actions, device=self.device)
-        self.pre_physics_step(rand_actions)
-
-        # step physics and render each frame
-        for i in range(max_steps):
-            self.render()
-            self.gym.simulate(self.sim)
-            
-        self._refresh_sim_tensors()
-        
-        self._fall_root_states = self._humanoid_root_states.clone()
-        self._fall_root_states[:, 7:13] = 0
-        self._fall_dof_pos = self._dof_pos.clone()
-        self._fall_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float)
-
-        return
-
-    def _reset_actors(self, env_ids):
-        num_envs = env_ids.shape[0]
-        recovery_probs = to_torch(np.array([self._recovery_episode_prob] * num_envs), device=self.device)
-        recovery_mask = torch.bernoulli(recovery_probs) == 1.0
-        terminated_mask = (self._terminate_buf[env_ids] == 1)
-        recovery_mask = torch.logical_and(recovery_mask, terminated_mask)
-
-        recovery_ids = env_ids[recovery_mask]
-        if (len(recovery_ids) > 0):
-            self._reset_recovery_episode(recovery_ids)
-            
-
-        nonrecovery_ids = env_ids[torch.logical_not(recovery_mask)]
-        fall_probs = to_torch(np.array([self._fall_init_prob] * nonrecovery_ids.shape[0]), device=self.device)
-        fall_mask = torch.bernoulli(fall_probs) == 1.0
-        fall_ids = nonrecovery_ids[fall_mask]
-        if (len(fall_ids) > 0):
-            self._reset_fall_episode(fall_ids)
-            
-
-        nonfall_ids = nonrecovery_ids[torch.logical_not(fall_mask)]
-        if (len(nonfall_ids) > 0):
-            super()._reset_actors(nonfall_ids)
-            self._recovery_counter[nonfall_ids] = 0
-
-        return
-
-    def _reset_recovery_episode(self, env_ids):
-        self._recovery_counter[env_ids] = self._recovery_steps
-        return
-    
-    def _reset_fall_episode(self, env_ids):
-        fall_state_ids = torch.randint_like(env_ids, low=0, high=self._fall_root_states.shape[0])
-        self._humanoid_root_states[env_ids] = self._fall_root_states[fall_state_ids]
-        self._dof_pos[env_ids] = self._fall_dof_pos[fall_state_ids]
-        self._dof_vel[env_ids] = self._fall_dof_vel[fall_state_ids]
-        self._recovery_counter[env_ids] = self._recovery_steps
-        self._reset_fall_env_ids = env_ids
-        return
-    
-    def _reset_envs(self, env_ids):
-        self._reset_fall_env_ids = []
-        super()._reset_envs(env_ids)
-        return
-
-    def _init_amp_obs(self, env_ids):
-        super()._init_amp_obs(env_ids)
-
-        if (len(self._reset_fall_env_ids) > 0):
-            self._init_amp_obs_default(self._reset_fall_env_ids)
-
-        return
-
-    def _update_recovery_count(self):
-        self._recovery_counter -= 1
-        self._recovery_counter = torch.clamp_min(self._recovery_counter, 0)
-        return
-
-    def _compute_reset(self):
-        super()._compute_reset()
-
-        is_recovery = self._recovery_counter > 0
-        self.reset_buf[is_recovery] = 0
-        self._terminate_buf[is_recovery] = 0
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py
deleted file mode 100644
index 6f266e4c..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-import tasks.humanoid_amp as humanoid_amp
-
-class HumanoidAMPTask(humanoid_amp.HumanoidAMP):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        self._enable_task_obs = cfg["env"]["enableTaskObs"]
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        return
-
-    
-    def get_obs_size(self):
-        obs_size = super().get_obs_size()
-        if (self._enable_task_obs):
-            task_obs_size = self.get_task_obs_size()
-            obs_size += task_obs_size
-        return obs_size
-
-    def get_task_obs_size(self):
-        return 0
-
-    def pre_physics_step(self, actions):
-        super().pre_physics_step(actions)
-        self._update_task()
-        return
-
-    def render(self, sync_frame_time=False):
-        super().render(sync_frame_time)
-
-        if self.viewer:
-            self._draw_task()
-        return
-
-    def _update_task(self):
-        return
-
-    def _reset_envs(self, env_ids):
-        super()._reset_envs(env_ids)
-        self._reset_task(env_ids)
-        return
-
-    def _reset_task(self, env_ids):
-        return
-
-    def _compute_observations(self, env_ids=None):
-        humanoid_obs = self._compute_humanoid_obs(env_ids)
-        
-        if (self._enable_task_obs):
-            task_obs = self._compute_task_obs(env_ids)
-            obs = torch.cat([humanoid_obs, task_obs], dim=-1)
-        else:
-            obs = humanoid_obs
-
-        if (env_ids is None):
-            self.obs_buf[:] = obs
-        else:
-            self.obs_buf[env_ids] = obs
-        return
-
-    def _compute_task_obs(self, env_ids=None):
-        return NotImplemented
-
-    def _compute_reward(self, actions):
-        return NotImplemented
-
-    def _draw_task(self):
-        return
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py
deleted file mode 100644
index 0b9d420f..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py
+++ /dev/null
@@ -1,313 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-import tasks.humanoid as humanoid
-import tasks.humanoid_amp as humanoid_amp
-import tasks.humanoid_amp_task as humanoid_amp_task
-from utils import torch_utils
-
-from isaacgym import gymapi
-from isaacgym import gymtorch
-from isaacgym.torch_utils import *
-
-TAR_ACTOR_ID = 1
-TAR_FACING_ACTOR_ID = 2
-
-class HumanoidHeading(humanoid_amp_task.HumanoidAMPTask):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        self._tar_speed_min = cfg["env"]["tarSpeedMin"]
-        self._tar_speed_max = cfg["env"]["tarSpeedMax"]
-        self._heading_change_steps_min = cfg["env"]["headingChangeStepsMin"]
-        self._heading_change_steps_max = cfg["env"]["headingChangeStepsMax"]
-        self._enable_rand_heading = cfg["env"]["enableRandHeading"]
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._heading_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64)
-        self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
-        self._tar_speed = torch.ones([self.num_envs], device=self.device, dtype=torch.float)
-        self._tar_dir = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float)
-        self._tar_dir[..., 0] = 1.0
-        
-        self._tar_facing_dir = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float)
-        self._tar_facing_dir[..., 0] = 1.0
-
-        if (not self.headless):
-            self._build_marker_state_tensors()
-
-        return
-
-    def get_task_obs_size(self):
-        obs_size = 0
-        if (self._enable_task_obs):
-            obs_size = 5
-        return obs_size
-
-    def pre_physics_step(self, actions):
-        super().pre_physics_step(actions)
-        self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3]
-        return
-    
-    def _update_marker(self):
-        humanoid_root_pos = self._humanoid_root_states[..., 0:3]
-        self._marker_pos[..., 0:2] = humanoid_root_pos[..., 0:2] + self._tar_dir
-        self._marker_pos[..., 2] = 0.0
-
-        heading_theta = torch.atan2(self._tar_dir[..., 1], self._tar_dir[..., 0])
-        heading_axis = torch.zeros_like(self._marker_pos)
-        heading_axis[..., -1] = 1.0
-        heading_q = quat_from_angle_axis(heading_theta, heading_axis)
-        self._marker_rot[:] = heading_q
-
-        self._face_marker_pos[..., 0:2] = humanoid_root_pos[..., 0:2] + self._tar_facing_dir
-        self._face_marker_pos[..., 2] = 0.0
-
-        face_theta = torch.atan2(self._tar_facing_dir[..., 1], self._tar_facing_dir[..., 0])
-        face_axis = torch.zeros_like(self._marker_pos)
-        face_axis[..., -1] = 1.0
-        face_q = quat_from_angle_axis(face_theta, heading_axis)
-        self._face_marker_rot[:] = face_q
-
-        marker_ids = torch.cat([self._marker_actor_ids, self._face_marker_actor_ids], dim=0)
-        self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(marker_ids), len(marker_ids))
-        return
-
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        if (not self.headless):
-            self._marker_handles = []
-            self._face_marker_handles = []
-            self._load_marker_asset()
-
-        super()._create_envs(num_envs, spacing, num_per_row)
-        return
-
-    def _load_marker_asset(self):
-        asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets/mjcf"
-        asset_file = "heading_marker.urdf"
-
-        asset_options = gymapi.AssetOptions()
-        asset_options.angular_damping = 0.01
-        asset_options.linear_damping = 0.01
-        asset_options.max_angular_velocity = 100.0
-        asset_options.density = 1.0
-        asset_options.fix_base_link = True
-        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-
-        self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
-
-        return
-
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        super()._build_env(env_id, env_ptr, humanoid_asset)
-        
-        if (not self.headless):
-            self._build_marker(env_id, env_ptr)
-
-        return
-
-    def _build_marker(self, env_id, env_ptr):
-        col_group = env_id
-        col_filter = 2
-        segmentation_id = 0
-
-        default_pose = gymapi.Transform()
-        default_pose.p.x = 1.0
-        default_pose.p.z = 0.0
-        
-        marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id)
-        self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0))
-        self._marker_handles.append(marker_handle)
-        
-        face_marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "face_marker", col_group, col_filter, segmentation_id)
-        self.gym.set_rigid_body_color(env_ptr, face_marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.0, 0.0, 0.8))
-        self._face_marker_handles.append(face_marker_handle)
-        
-        return
-
-    def _build_marker_state_tensors(self):
-        num_actors = self._root_states.shape[0] // self.num_envs
-
-        self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., TAR_ACTOR_ID, :]
-        self._marker_pos = self._marker_states[..., :3]
-        self._marker_rot = self._marker_states[..., 3:7]
-        self._marker_actor_ids = self._humanoid_actor_ids + TAR_ACTOR_ID
-
-        self._face_marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., TAR_FACING_ACTOR_ID, :]
-        self._face_marker_pos = self._face_marker_states[..., :3]
-        self._face_marker_rot = self._face_marker_states[..., 3:7]
-        self._face_marker_actor_ids = self._humanoid_actor_ids + TAR_FACING_ACTOR_ID
-
-        return
-
-    def _update_task(self):
-        reset_task_mask = self.progress_buf >= self._heading_change_steps
-        rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten()
-        if len(rest_env_ids) > 0:
-            self._reset_task(rest_env_ids)
-        return
-
-    def _reset_task(self, env_ids):
-        n = len(env_ids)
-        if (self._enable_rand_heading):
-            rand_theta = 2 * np.pi * torch.rand(n, device=self.device) - np.pi
-            rand_face_theta = 2 * np.pi * torch.rand(n, device=self.device) - np.pi
-        else:
-            rand_theta = torch.zeros(n, device=self.device)
-            rand_face_theta = torch.zeros(n, device=self.device)
-
-        tar_dir = torch.stack([torch.cos(rand_theta), torch.sin(rand_theta)], dim=-1)
-        tar_speed = (self._tar_speed_max - self._tar_speed_min) * torch.rand(n, device=self.device) + self._tar_speed_min
-        change_steps = torch.randint(low=self._heading_change_steps_min, high=self._heading_change_steps_max,
-                                     size=(n,), device=self.device, dtype=torch.int64)
-        
-        face_tar_dir = torch.stack([torch.cos(rand_face_theta), torch.sin(rand_face_theta)], dim=-1)
-
-        self._tar_speed[env_ids] = tar_speed
-        self._tar_dir[env_ids] = tar_dir
-        self._tar_facing_dir[env_ids] = face_tar_dir
-        self._heading_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps
-        return
-
-    def _compute_task_obs(self, env_ids=None):
-        if (env_ids is None):
-            root_states = self._humanoid_root_states
-            tar_dir = self._tar_dir
-            tar_speed = self._tar_speed
-            tar_face_dir = self._tar_facing_dir
-        else:
-            root_states = self._humanoid_root_states[env_ids]
-            tar_dir = self._tar_dir[env_ids]
-            tar_speed = self._tar_speed[env_ids]
-            tar_face_dir = self._tar_facing_dir[env_ids]
-        
-        obs = compute_heading_observations(root_states, tar_dir, tar_speed, tar_face_dir)
-        return obs
-
-    def _compute_reward(self, actions):
-        root_pos = self._humanoid_root_states[..., 0:3]
-        root_rot = self._humanoid_root_states[..., 3:7]
-        self.rew_buf[:] = compute_heading_reward(root_pos, self._prev_root_pos,  root_rot,
-                                                 self._tar_dir, self._tar_speed,
-                                                 self._tar_facing_dir, self.dt)
-        return
-
-    def _draw_task(self):
-        self._update_marker()
-
-        vel_scale = 0.2
-        heading_cols = np.array([[0.0, 1.0, 0.0],
-                                [1.0, 0.0, 0.0]], dtype=np.float32)
-
-        self.gym.clear_lines(self.viewer)
-
-        root_pos = self._humanoid_root_states[..., 0:3]
-        prev_root_pos = self._prev_root_pos
-        sim_vel = (root_pos - prev_root_pos) / self.dt
-        sim_vel[..., -1] = 0
-
-        starts = root_pos
-        tar_ends = torch.clone(starts)
-        tar_ends[..., 0:2] += vel_scale * self._tar_speed.unsqueeze(-1) * self._tar_dir
-        sim_ends = starts + vel_scale * sim_vel
-
-        verts = torch.cat([starts, tar_ends, starts, sim_ends], dim=-1).cpu().numpy()
-
-        for i, env_ptr in enumerate(self.envs):
-            curr_verts = verts[i:i+1]
-            curr_verts = curr_verts.reshape([2, 6])
-            self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, heading_cols)
-
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def compute_heading_observations(root_states, tar_dir, tar_speed, tar_face_dir):
-    # type: (Tensor, Tensor, Tensor, Tensor) -> Tensor
-    root_rot = root_states[:, 3:7]
-
-    tar_dir3d = torch.cat([tar_dir, torch.zeros_like(tar_dir[..., 0:1])], dim=-1)
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-    
-    local_tar_dir = quat_rotate(heading_rot, tar_dir3d)
-    local_tar_dir = local_tar_dir[..., 0:2]
-    tar_speed = tar_speed.unsqueeze(-1)
-    
-    tar_face_dir3d = torch.cat([tar_face_dir, torch.zeros_like(tar_face_dir[..., 0:1])], dim=-1)
-    local_tar_face_dir = quat_rotate(heading_rot, tar_face_dir3d)
-    local_tar_face_dir = local_tar_face_dir[..., 0:2]
-
-    obs = torch.cat([local_tar_dir, tar_speed, local_tar_face_dir], dim=-1)
-    return obs
-
-@torch.jit.script
-def compute_heading_reward(root_pos, prev_root_pos, root_rot, tar_dir, tar_speed, tar_face_dir, dt):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float) -> Tensor
-    vel_err_scale = 0.25
-    tangent_err_w = 0.1
-
-    dir_reward_w = 0.7
-    facing_reward_w = 0.3
-
-    delta_root_pos = root_pos - prev_root_pos
-    root_vel = delta_root_pos / dt
-    tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1)
-
-    tar_dir_vel = tar_dir_speed.unsqueeze(-1) * tar_dir
-    tangent_vel = root_vel[..., :2] - tar_dir_vel
-
-    tangent_speed = torch.sum(tangent_vel, dim=-1)
-
-    tar_vel_err = tar_speed - tar_dir_speed
-    tangent_vel_err = tangent_speed
-    dir_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err + 
-                        tangent_err_w * tangent_vel_err * tangent_vel_err))
-
-    speed_mask = tar_dir_speed <= 0
-    dir_reward[speed_mask] = 0
-
-    heading_rot = torch_utils.calc_heading_quat(root_rot)
-    facing_dir = torch.zeros_like(root_pos)
-    facing_dir[..., 0] = 1.0
-    facing_dir = quat_rotate(heading_rot, facing_dir)
-    facing_err = torch.sum(tar_face_dir * facing_dir[..., 0:2], dim=-1)
-    facing_reward = torch.clamp_min(facing_err, 0.0)
-
-    reward = dir_reward_w * dir_reward + facing_reward_w * facing_reward
-
-    return reward
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py
deleted file mode 100644
index c203074e..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-import tasks.humanoid as humanoid
-import tasks.humanoid_amp as humanoid_amp
-import tasks.humanoid_amp_task as humanoid_amp_task
-from utils import torch_utils
-
-from isaacgym import gymapi
-from isaacgym import gymtorch
-from isaacgym.torch_utils import *
-
-class HumanoidLocation(humanoid_amp_task.HumanoidAMPTask):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        self._tar_speed = cfg["env"]["tarSpeed"]
-        self._tar_change_steps_min = cfg["env"]["tarChangeStepsMin"]
-        self._tar_change_steps_max = cfg["env"]["tarChangeStepsMax"]
-        self._tar_dist_max = cfg["env"]["tarDistMax"]
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._tar_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64)
-        self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
-        self._tar_pos = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float)
-
-        if (not self.headless):
-            self._build_marker_state_tensors()
-
-        return
-
-    def get_task_obs_size(self):
-        obs_size = 0
-        if (self._enable_task_obs):
-            obs_size = 2
-        return obs_size
-
-    def pre_physics_step(self, actions):
-        super().pre_physics_step(actions)
-        self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3]
-        return
-    
-    def _update_marker(self):
-        self._marker_pos[..., 0:2] = self._tar_pos
-        self._marker_pos[..., 2] = 0.0
-
-        self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(self._marker_actor_ids), len(self._marker_actor_ids))
-        return
-
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        if (not self.headless):
-            self._marker_handles = []
-            self._load_marker_asset()
-
-        super()._create_envs(num_envs, spacing, num_per_row)
-        return
-
-    def _load_marker_asset(self):
-        asset_root = "ase/data/assets/mjcf/"
-        asset_file = "location_marker.urdf"
-
-        asset_options = gymapi.AssetOptions()
-        asset_options.angular_damping = 0.01
-        asset_options.linear_damping = 0.01
-        asset_options.max_angular_velocity = 100.0
-        asset_options.density = 1.0
-        asset_options.fix_base_link = True
-        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-
-        self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
-
-        return
-
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        super()._build_env(env_id, env_ptr, humanoid_asset)
-        
-        if (not self.headless):
-            self._build_marker(env_id, env_ptr)
-
-        return
-
-    def _build_marker(self, env_id, env_ptr):
-        col_group = env_id
-        col_filter = 2
-        segmentation_id = 0
-        default_pose = gymapi.Transform()
-        
-        marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id)
-        self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0))
-        self._marker_handles.append(marker_handle)
-
-        return
-
-    def _build_marker_state_tensors(self):
-        num_actors = self._root_states.shape[0] // self.num_envs
-        self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :]
-        self._marker_pos = self._marker_states[..., :3]
-        
-        self._marker_actor_ids = self._humanoid_actor_ids + 1
-
-        return
-
-    def _update_task(self):
-        reset_task_mask = self.progress_buf >= self._tar_change_steps
-        rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten()
-        if len(rest_env_ids) > 0:
-            self._reset_task(rest_env_ids)
-        return
-
-    def _reset_task(self, env_ids):
-        n = len(env_ids)
-
-        char_root_pos = self._humanoid_root_states[env_ids, 0:2]
-        rand_pos = self._tar_dist_max * (2.0 * torch.rand([n, 2], device=self.device) - 1.0)
-
-        change_steps = torch.randint(low=self._tar_change_steps_min, high=self._tar_change_steps_max,
-                                     size=(n,), device=self.device, dtype=torch.int64)
-
-        self._tar_pos[env_ids] = char_root_pos + rand_pos
-        self._tar_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps
-        return
-
-    def _compute_task_obs(self, env_ids=None):
-        if (env_ids is None):
-            root_states = self._humanoid_root_states
-            tar_pos = self._tar_pos
-        else:
-            root_states = self._humanoid_root_states[env_ids]
-            tar_pos = self._tar_pos[env_ids]
-        
-        obs = compute_location_observations(root_states, tar_pos)
-        return obs
-
-    def _compute_reward(self, actions):
-        root_pos = self._humanoid_root_states[..., 0:3]
-        root_rot = self._humanoid_root_states[..., 3:7]
-        self.rew_buf[:] = compute_location_reward(root_pos, self._prev_root_pos, root_rot,
-                                                 self._tar_pos, self._tar_speed,
-                                                 self.dt)
-        return
-
-    def _draw_task(self):
-        self._update_marker()
-        
-        cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32)
-
-        self.gym.clear_lines(self.viewer)
-
-        starts = self._humanoid_root_states[..., 0:3]
-        ends = self._marker_pos
-
-        verts = torch.cat([starts, ends], dim=-1).cpu().numpy()
-
-        for i, env_ptr in enumerate(self.envs):
-            curr_verts = verts[i]
-            curr_verts = curr_verts.reshape([1, 6])
-            self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols)
-
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def compute_location_observations(root_states, tar_pos):
-    # type: (Tensor, Tensor) -> Tensor
-    root_pos = root_states[:, 0:3]
-    root_rot = root_states[:, 3:7]
-
-    tar_pos3d = torch.cat([tar_pos, torch.zeros_like(tar_pos[..., 0:1])], dim=-1)
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-    
-    local_tar_pos = quat_rotate(heading_rot, tar_pos3d - root_pos)
-    local_tar_pos = local_tar_pos[..., 0:2]
-
-    obs = local_tar_pos
-    return obs
-
-@torch.jit.script
-def compute_location_reward(root_pos, prev_root_pos, root_rot, tar_pos, tar_speed, dt):
-    # type: (Tensor, Tensor, Tensor, Tensor, float, float) -> Tensor
-    dist_threshold = 0.5
-
-    pos_err_scale = 0.5
-    vel_err_scale = 4.0
-
-    pos_reward_w = 0.5
-    vel_reward_w = 0.4
-    face_reward_w = 0.1
-    
-    pos_diff = tar_pos - root_pos[..., 0:2]
-    pos_err = torch.sum(pos_diff * pos_diff, dim=-1)
-    pos_reward = torch.exp(-pos_err_scale * pos_err)
-
-    tar_dir = tar_pos - root_pos[..., 0:2]
-    tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1)
-    
-    
-    delta_root_pos = root_pos - prev_root_pos
-    root_vel = delta_root_pos / dt
-    tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1)
-    tar_vel_err = tar_speed - tar_dir_speed
-    tar_vel_err = torch.clamp_min(tar_vel_err, 0.0)
-    vel_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err))
-    speed_mask = tar_dir_speed <= 0
-    vel_reward[speed_mask] = 0
-
-
-    heading_rot = torch_utils.calc_heading_quat(root_rot)
-    facing_dir = torch.zeros_like(root_pos)
-    facing_dir[..., 0] = 1.0
-    facing_dir = quat_rotate(heading_rot, facing_dir)
-    facing_err = torch.sum(tar_dir * facing_dir[..., 0:2], dim=-1)
-    facing_reward = torch.clamp_min(facing_err, 0.0)
-
-
-    dist_mask = pos_err < dist_threshold
-    facing_reward[dist_mask] = 1.0
-    vel_reward[dist_mask] = 1.0
-
-    reward = pos_reward_w * pos_reward + vel_reward_w * vel_reward + face_reward_w * facing_reward
-
-    return reward
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py
deleted file mode 100644
index 40934988..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py
+++ /dev/null
@@ -1,273 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-from isaacgym import gymapi, gymtorch
-from isaacgym.torch_utils import *
-
-import tasks.humanoid_amp as humanoid_amp
-import tasks.humanoid_amp_getup as humanoid_amp_getup
-import tasks.humanoid_strike as humanoid_strike
-import tasks.humanoid_location as humanoid_location
-from utils import torch_utils
-
-PERTURB_OBJS = [
-    ["small", 60],
-    ["small", 7],
-    ["small", 10],
-    ["small", 35],
-    ["small", 2],
-    ["small", 2],
-    ["small", 3],
-    ["small", 2],
-    ["small", 2],
-    ["small", 3],
-    ["small", 2],
-    ["large", 60],
-    ["small", 300],
-]
-
-class HumanoidPerturb(humanoid_amp.HumanoidAMP):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._proj_dist_min = 4
-        self._proj_dist_max = 5
-        self._proj_h_min = 0.25
-        self._proj_h_max = 2
-        self._proj_steps = 150
-        self._proj_warmup_steps = 1
-        self._proj_speed_min = 30
-        self._proj_speed_max = 40
-        assert(self._proj_warmup_steps < self._proj_steps)
-
-        self._build_proj_tensors()
-        self._calc_perturb_times()
-
-        return
-    
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        self._proj_handles = []
-        self._load_proj_asset()
-
-        super()._create_envs(num_envs, spacing, num_per_row)
-        return
-
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        super()._build_env(env_id, env_ptr, humanoid_asset)
-        self._build_proj(env_id, env_ptr)
-        return
-
-    def _load_proj_asset(self):
-        asset_root = "ase/data/assets/mjcf/"
-
-        small_asset_file = "block_projectile.urdf"
-        small_asset_options = gymapi.AssetOptions()
-        small_asset_options.angular_damping = 0.01
-        small_asset_options.linear_damping = 0.01
-        small_asset_options.max_angular_velocity = 100.0
-        small_asset_options.density = 200.0
-        small_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-        self._small_proj_asset = self.gym.load_asset(self.sim, asset_root, small_asset_file, small_asset_options)
-        
-        large_asset_file = "block_projectile_large.urdf"
-        large_asset_options = gymapi.AssetOptions()
-        large_asset_options.angular_damping = 0.01
-        large_asset_options.linear_damping = 0.01
-        large_asset_options.max_angular_velocity = 100.0
-        large_asset_options.density = 100.0
-        large_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-        self._large_proj_asset = self.gym.load_asset(self.sim, asset_root, large_asset_file, large_asset_options)
-        return
-
-    def _build_proj(self, env_id, env_ptr):
-        col_group = env_id
-        col_filter = 0
-        segmentation_id = 0
-
-        for i, obj in enumerate(PERTURB_OBJS):
-            default_pose = gymapi.Transform()
-            default_pose.p.x = 200 + i
-            default_pose.p.z = 1
-            obj_type = obj[0]
-            if (obj_type == "small"):
-                proj_asset = self._small_proj_asset
-            elif (obj_type == "large"):
-                proj_asset = self._large_proj_asset
-
-            proj_handle = self.gym.create_actor(env_ptr, proj_asset, default_pose, "proj{:d}".format(i), col_group, col_filter, segmentation_id)
-            self._proj_handles.append(proj_handle)
-
-        return
-
-    def _build_body_ids_tensor(self, env_ptr, actor_handle, body_names):
-        env_ptr = self.envs[0]
-        actor_handle = self.humanoid_handles[0]
-        body_ids = []
-
-        for body_name in body_names:
-            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
-            assert(body_id != -1)
-            body_ids.append(body_id)
-
-        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
-        return body_ids
-
-    def _build_proj_tensors(self):
-        num_actors = self.get_num_actors_per_env()
-        num_objs = self._get_num_objs()
-        self._proj_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., (num_actors - num_objs):, :]
-        
-        self._proj_actor_ids = num_actors * np.arange(self.num_envs)
-        self._proj_actor_ids = np.expand_dims(self._proj_actor_ids, axis=-1)
-        self._proj_actor_ids = self._proj_actor_ids + np.reshape(np.array(self._proj_handles), [self.num_envs, num_objs])
-        self._proj_actor_ids = self._proj_actor_ids.flatten()
-        self._proj_actor_ids = to_torch(self._proj_actor_ids, device=self.device, dtype=torch.int32)
-        
-        bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs
-        contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim)
-        contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor)
-        self._proj_contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., (num_actors - num_objs):, :]
-        
-        return
-
-    def _calc_perturb_times(self):
-        self._perturb_timesteps = []
-        total_steps = 0
-        for i, obj in enumerate(PERTURB_OBJS):
-            curr_time = obj[1]
-            total_steps += curr_time
-            self._perturb_timesteps.append(total_steps)
-
-        self._perturb_timesteps = np.array(self._perturb_timesteps)
-
-        return
-    
-    def _reset_env_tensors(self, env_ids):
-        super()._reset_env_tensors(env_ids)
-
-        env_ids_int32 = self._proj_actor_ids[env_ids]
-        self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        return
-
-    def _compute_reset(self):
-        self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf,
-                                                               self._contact_forces, self._contact_body_ids,
-                                                               self._rigid_body_pos, self.max_episode_length,
-                                                               self._enable_early_termination, self._termination_heights)
-        return
-
-    def post_physics_step(self):
-        self._update_proj()
-        super().post_physics_step()
-        return
-    
-    def _get_num_objs(self):
-        return len(PERTURB_OBJS)
-
-    def _update_proj(self):
-        
-        curr_timestep = self.progress_buf.cpu().numpy()[0]
-        curr_timestep = curr_timestep % (self._perturb_timesteps[-1] + 1)
-        perturb_step = np.where(self._perturb_timesteps == curr_timestep)[0]
-        
-        if (len(perturb_step) > 0):
-            perturb_id = perturb_step[0]
-            n = self.num_envs
-            humanoid_root_pos = self._humanoid_root_states[..., 0:3]
-
-            rand_theta = torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device)
-            rand_theta *= 2 * np.pi
-            rand_dist = (self._proj_dist_max - self._proj_dist_min) * torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device) + self._proj_dist_min
-            pos_x = rand_dist * torch.cos(rand_theta)
-            pos_y = -rand_dist * torch.sin(rand_theta)
-            pos_z = (self._proj_h_max - self._proj_h_min) * torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device) + self._proj_h_min
-            
-            self._proj_states[..., perturb_id, 0] = humanoid_root_pos[..., 0] + pos_x
-            self._proj_states[..., perturb_id, 1] = humanoid_root_pos[..., 1] + pos_y
-            self._proj_states[..., perturb_id, 2] = pos_z
-            self._proj_states[..., perturb_id, 3:6] = 0.0
-            self._proj_states[..., perturb_id, 6] = 1.0
-            
-            tar_body_idx = np.random.randint(self.num_bodies)
-            tar_body_idx = 1
-
-            launch_tar_pos = self._rigid_body_pos[..., tar_body_idx, :]
-            launch_dir = launch_tar_pos - self._proj_states[..., perturb_id, 0:3]
-            launch_dir += 0.1 * torch.randn_like(launch_dir)
-            launch_dir = torch.nn.functional.normalize(launch_dir, dim=-1)
-            launch_speed = (self._proj_speed_max - self._proj_speed_min) * torch.rand_like(launch_dir[:, 0:1]) + self._proj_speed_min
-            launch_vel = launch_speed * launch_dir
-            launch_vel[..., 0:2] += self._rigid_body_vel[..., tar_body_idx, 0:2]
-            self._proj_states[..., perturb_id, 7:10] = launch_vel
-            self._proj_states[..., perturb_id, 10:13] = 0.0
-
-            self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                         gymtorch.unwrap_tensor(self._proj_actor_ids),
-                                                         len(self._proj_actor_ids))
-
-        return
-
-    def _draw_task(self):
-        super()._draw_task()
-        
-        cols = np.array([[1.0, 0.0, 0.0]], dtype=np.float32)
-
-        self.gym.clear_lines(self.viewer)
-
-        starts = self._humanoid_root_states[..., 0:3]
-        ends = self._proj_states[..., 0:3]
-        verts = torch.cat([starts, ends], dim=-1).cpu().numpy()
-
-        for i, env_ptr in enumerate(self.envs):
-            curr_verts = verts[i]
-            curr_verts = curr_verts.reshape([1, 6])
-            self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols)
-
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos,
-                           max_episode_length, enable_early_termination, termination_heights):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor]
-    
-    terminated = torch.zeros_like(reset_buf)
-    reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
-
-    return reset, terminated
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py
deleted file mode 100644
index cf578aac..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-import tasks.humanoid as humanoid
-import tasks.humanoid_amp as humanoid_amp
-import tasks.humanoid_amp_task as humanoid_amp_task
-from utils import torch_utils
-
-from isaacgym import gymapi
-from isaacgym import gymtorch
-from isaacgym.torch_utils import *
-
-class HumanoidReach(humanoid_amp_task.HumanoidAMPTask):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        self._tar_speed = cfg["env"]["tarSpeed"]
-        self._tar_change_steps_min = cfg["env"]["tarChangeStepsMin"]
-        self._tar_change_steps_max = cfg["env"]["tarChangeStepsMax"]
-        self._tar_dist_max = cfg["env"]["tarDistMax"]
-        self._tar_height_min = cfg["env"]["tarHeightMin"]
-        self._tar_height_max = cfg["env"]["tarHeightMax"]
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._tar_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64)
-        self._tar_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
-
-        reach_body_name = cfg["env"]["reachBodyName"]
-        self._reach_body_id = self._build_reach_body_id_tensor(self.envs[0], self.humanoid_handles[0], reach_body_name)
-        
-        if (not self.headless):
-            self._build_marker_state_tensors()
-
-        return
-
-    def get_task_obs_size(self):
-        obs_size = 0
-        if (self._enable_task_obs):
-            obs_size = 3
-        return obs_size
-
-    def _update_marker(self):
-        self._marker_pos[..., :] = self._tar_pos
-        self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(self._marker_actor_ids), len(self._marker_actor_ids))
-        return
-
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        if (not self.headless):
-            self._marker_handles = []
-            self._load_marker_asset()
-
-        super()._create_envs(num_envs, spacing, num_per_row)
-        return
-
-    def _load_marker_asset(self):
-        asset_root = "ase/data/assets/mjcf/"
-        asset_file = "location_marker.urdf"
-
-        asset_options = gymapi.AssetOptions()
-        asset_options.angular_damping = 0.01
-        asset_options.linear_damping = 0.01
-        asset_options.max_angular_velocity = 100.0
-        asset_options.density = 1.0
-        asset_options.fix_base_link = True
-        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-
-        self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
-
-        return
-
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        super()._build_env(env_id, env_ptr, humanoid_asset)
-        
-        if (not self.headless):
-            self._build_marker(env_id, env_ptr)
-
-        return
-
-    def _build_marker(self, env_id, env_ptr):
-        col_group = env_id
-        col_filter = 2
-        segmentation_id = 0
-
-        default_pose = gymapi.Transform()
-        
-        marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id)
-        self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0))
-        self._marker_handles.append(marker_handle)
-
-        return
-
-    def _build_marker_state_tensors(self):
-        num_actors = self._root_states.shape[0] // self.num_envs
-        self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :]
-        self._marker_pos = self._marker_states[..., :3]
-        
-        self._marker_actor_ids = self._humanoid_actor_ids + 1
-
-        return
-    
-    def _build_reach_body_id_tensor(self, env_ptr, actor_handle, body_name):
-        body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
-        assert(body_id != -1)
-        body_id = to_torch(body_id, device=self.device, dtype=torch.long)
-        return body_id
-
-    def _update_task(self):
-        reset_task_mask = self.progress_buf >= self._tar_change_steps
-        rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten()
-        if len(rest_env_ids) > 0:
-            self._reset_task(rest_env_ids)
-        return
-
-    def _reset_task(self, env_ids):
-        n = len(env_ids)
-
-        rand_pos = torch.rand([n, 3], device=self.device)
-        rand_pos[..., 0:2] = self._tar_dist_max * (2.0 * rand_pos[..., 0:2] - 1.0)
-        rand_pos[..., 2] = (self._tar_height_max - self._tar_height_min) * rand_pos[..., 2] + self._tar_height_min
-        
-        change_steps = torch.randint(low=self._tar_change_steps_min, high=self._tar_change_steps_max,
-                                     size=(n,), device=self.device, dtype=torch.int64)
-
-        self._tar_pos[env_ids, :] = rand_pos
-        self._tar_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps
-        return
-
-    def _compute_task_obs(self, env_ids=None):
-        if (env_ids is None):
-            root_states = self._humanoid_root_states
-            tar_pos = self._tar_pos
-        else:
-            root_states = self._humanoid_root_states[env_ids]
-            tar_pos = self._tar_pos[env_ids]
-        
-        obs = compute_location_observations(root_states, tar_pos)
-        return obs
-
-    def _compute_reward(self, actions):
-        reach_body_pos = self._rigid_body_pos[:, self._reach_body_id, :]
-        root_rot = self._humanoid_root_states[..., 3:7]
-        self.rew_buf[:] = compute_reach_reward(reach_body_pos, root_rot,
-                                                 self._tar_pos, self._tar_speed,
-                                                 self.dt)
-        return
-
-    def _draw_task(self):
-        self._update_marker()
-        
-        cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32)
-
-        self.gym.clear_lines(self.viewer)
-
-        starts = self._rigid_body_pos[:, self._reach_body_id, :]
-        ends = self._tar_pos
-
-        verts = torch.cat([starts, ends], dim=-1).cpu().numpy()
-
-        for i, env_ptr in enumerate(self.envs):
-            curr_verts = verts[i]
-            curr_verts = curr_verts.reshape([1, 6])
-            self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols)
-
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def compute_location_observations(root_states, tar_pos):
-    # type: (Tensor, Tensor) -> Tensor
-    root_rot = root_states[:, 3:7]
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-    local_tar_pos = quat_rotate(heading_rot, tar_pos)
-
-    obs = local_tar_pos
-    return obs
-
-@torch.jit.script
-def compute_reach_reward(reach_body_pos, root_rot, tar_pos, tar_speed, dt):
-    # type: (Tensor, Tensor, Tensor, float, float) -> Tensor
-    pos_err_scale = 4.0
-    
-    pos_diff = tar_pos - reach_body_pos
-    pos_err = torch.sum(pos_diff * pos_diff, dim=-1)
-    pos_reward = torch.exp(-pos_err_scale * pos_err)
-    
-    reward = pos_reward
-
-    return reward
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py
deleted file mode 100644
index 6c3a31be..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py
+++ /dev/null
@@ -1,323 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-from isaacgym import gymapi, gymtorch
-from isaacgym.torch_utils import *
-
-import tasks.humanoid_amp as humanoid_amp
-import tasks.humanoid_amp_task as humanoid_amp_task
-from utils import torch_utils
-
-class HumanoidStrike(humanoid_amp_task.HumanoidAMPTask):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        self._tar_dist_min = 0.5
-        self._tar_dist_max = 10.0
-        self._near_dist = 1.5
-        self._near_prob = 0.5
-        
-        self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float)
-        
-        strike_body_names = cfg["env"]["strikeBodyNames"]
-        self._strike_body_ids = self._build_strike_body_ids_tensor(self.envs[0], self.humanoid_handles[0], strike_body_names)
-        self._build_target_tensors()
-
-        return
-    
-    def get_task_obs_size(self):
-        obs_size = 0
-        if (self._enable_task_obs):
-            obs_size = 15
-        return obs_size
-    
-    def _create_envs(self, num_envs, spacing, num_per_row):
-        self._target_handles = []
-        self._load_target_asset()
-
-        super()._create_envs(num_envs, spacing, num_per_row)
-        return
-
-    def _build_env(self, env_id, env_ptr, humanoid_asset):
-        super()._build_env(env_id, env_ptr, humanoid_asset)
-        self._build_target(env_id, env_ptr)
-        return
-
-    def _load_target_asset(self):
-        asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets/mjcf"
-        asset_file = "strike_target.urdf"
-
-        asset_options = gymapi.AssetOptions()
-        asset_options.angular_damping = 0.01
-        asset_options.linear_damping = 0.01
-        asset_options.max_angular_velocity = 100.0
-        asset_options.density = 30.0
-        asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE
-
-        self._target_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options)
-        return
-
-    def _build_target(self, env_id, env_ptr):
-        col_group = env_id
-        col_filter = 0
-        segmentation_id = 0
-
-        default_pose = gymapi.Transform()
-        default_pose.p.x = 1.0
-        
-        target_handle = self.gym.create_actor(env_ptr, self._target_asset, default_pose, "target", col_group, col_filter, segmentation_id)
-        self._target_handles.append(target_handle)
-
-        return
-
-    def _build_strike_body_ids_tensor(self, env_ptr, actor_handle, body_names):
-        env_ptr = self.envs[0]
-        actor_handle = self.humanoid_handles[0]
-        body_ids = []
-
-        for body_name in body_names:
-            body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name)
-            assert(body_id != -1)
-            body_ids.append(body_id)
-
-        body_ids = to_torch(body_ids, device=self.device, dtype=torch.long)
-        return body_ids
-
-    def _build_target_tensors(self):
-        num_actors = self.get_num_actors_per_env()
-        self._target_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :]
-        
-        self._tar_actor_ids = to_torch(num_actors * np.arange(self.num_envs), device=self.device, dtype=torch.int32) + 1
-        
-        bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs
-        contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim)
-        contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor)
-        self._tar_contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., self.num_bodies, :]
-
-        return
-
-    def _reset_actors(self, env_ids):
-        super()._reset_actors(env_ids)
-        self._reset_target(env_ids)
-        return
-
-    def _reset_target(self, env_ids):
-        n = len(env_ids)
-
-        init_near = torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) < self._near_prob
-        dist_max = self._tar_dist_max * torch.ones([n], dtype=self._target_states.dtype, device=self._target_states.device)
-        dist_max[init_near] = self._near_dist
-        rand_dist = (dist_max - self._tar_dist_min) * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) + self._tar_dist_min
-        
-        rand_theta = 2 * np.pi * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device)
-        self._target_states[env_ids, 0] = rand_dist * torch.cos(rand_theta) + self._humanoid_root_states[env_ids, 0]
-        self._target_states[env_ids, 1] = rand_dist * torch.sin(rand_theta) + self._humanoid_root_states[env_ids, 1]
-        self._target_states[env_ids, 2] = 0.9
-        
-        rand_rot_theta = 2 * np.pi * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device)
-        axis = torch.tensor([0.0, 0.0, 1.0], dtype=self._target_states.dtype, device=self._target_states.device)
-        rand_rot = quat_from_angle_axis(rand_rot_theta, axis)
-
-        self._target_states[env_ids, 3:7] = rand_rot
-        self._target_states[env_ids, 7:10] = 0.0
-        self._target_states[env_ids, 10:13] = 0.0
-        return
-
-    def _reset_env_tensors(self, env_ids):
-        super()._reset_env_tensors(env_ids)
-
-        env_ids_int32 = self._tar_actor_ids[env_ids]
-        self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        return
-
-    def pre_physics_step(self, actions):
-        super().pre_physics_step(actions)
-        self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3]
-        return
-    
-    def _compute_task_obs(self, env_ids=None):
-        if (env_ids is None):
-            root_states = self._humanoid_root_states
-            tar_states = self._target_states
-        else:
-            root_states = self._humanoid_root_states[env_ids]
-            tar_states = self._target_states[env_ids]
-        
-        obs = compute_strike_observations(root_states, tar_states)
-        return obs
-
-    def _compute_reward(self, actions):
-        tar_pos = self._target_states[..., 0:3]
-        tar_rot = self._target_states[..., 3:7]
-        char_root_state = self._humanoid_root_states
-        strike_body_vel = self._rigid_body_vel[..., self._strike_body_ids[0], :]
-
-        self.rew_buf[:] = compute_strike_reward(tar_pos, tar_rot, char_root_state, 
-                                                self._prev_root_pos, strike_body_vel,
-                                                self.dt, self._near_dist)
-        return
-
-    def _compute_reset(self):
-        self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf,
-                                                           self._contact_forces, self._contact_body_ids,
-                                                           self._rigid_body_pos, self._tar_contact_forces,
-                                                           self._strike_body_ids, self.max_episode_length,
-                                                           self._enable_early_termination, self._termination_heights)
-        return
-
-    def _draw_task(self):
-        cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32)
-
-        self.gym.clear_lines(self.viewer)
-
-        starts = self._humanoid_root_states[..., 0:3]
-        ends = self._target_states[..., 0:3]
-        verts = torch.cat([starts, ends], dim=-1).cpu().numpy()
-
-        for i, env_ptr in enumerate(self.envs):
-            curr_verts = verts[i]
-            curr_verts = curr_verts.reshape([1, 6])
-            self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols)
-
-        return
-
-#####################################################################
-###=========================jit functions=========================###
-#####################################################################
-
-@torch.jit.script
-def compute_strike_observations(root_states, tar_states):
-    # type: (Tensor, Tensor) -> Tensor
-    root_pos = root_states[:, 0:3]
-    root_rot = root_states[:, 3:7]
-
-    tar_pos = tar_states[:, 0:3]
-    tar_rot = tar_states[:, 3:7]
-    tar_vel = tar_states[:, 7:10]
-    tar_ang_vel = tar_states[:, 10:13]
-
-    heading_rot = torch_utils.calc_heading_quat_inv(root_rot)
-    
-    local_tar_pos = tar_pos - root_pos
-    local_tar_pos[..., -1] = tar_pos[..., -1]
-    local_tar_pos = quat_rotate(heading_rot, local_tar_pos)
-    local_tar_vel = quat_rotate(heading_rot, tar_vel)
-    local_tar_ang_vel = quat_rotate(heading_rot, tar_ang_vel)
-
-    local_tar_rot = quat_mul(heading_rot, tar_rot)
-    local_tar_rot_obs = torch_utils.quat_to_tan_norm(local_tar_rot)
-
-    obs = torch.cat([local_tar_pos, local_tar_rot_obs, local_tar_vel, local_tar_ang_vel], dim=-1)
-    return obs
-
-@torch.jit.script
-def compute_strike_reward(tar_pos, tar_rot, root_state, prev_root_pos, strike_body_vel, dt, near_dist):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, float) -> Tensor
-    tar_speed = 1.0
-    vel_err_scale = 4.0
-
-    tar_rot_w = 0.6
-    vel_reward_w = 0.4
-
-    up = torch.zeros_like(tar_pos)
-    up[..., -1] = 1
-    tar_up = quat_rotate(tar_rot, up)
-    tar_rot_err = torch.sum(up * tar_up, dim=-1)
-    tar_rot_r = torch.clamp_min(1.0 - tar_rot_err, 0.0)
-
-    root_pos = root_state[..., 0:3]
-    tar_dir = tar_pos[..., 0:2] - root_pos[..., 0:2]
-    tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1)
-    delta_root_pos = root_pos - prev_root_pos
-    root_vel = delta_root_pos / dt
-    tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1)
-    tar_vel_err = tar_speed - tar_dir_speed
-    tar_vel_err = torch.clamp_min(tar_vel_err, 0.0)
-    vel_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err))
-    speed_mask = tar_dir_speed <= 0
-    vel_reward[speed_mask] = 0
-
-
-    reward = tar_rot_w * tar_rot_r + vel_reward_w * vel_reward
-    
-    succ = tar_rot_err < 0.2
-    reward = torch.where(succ, torch.ones_like(reward), reward)
-
-    return reward
-    
-
-@torch.jit.script
-def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos,
-                           tar_contact_forces, strike_body_ids, max_episode_length,
-                           enable_early_termination, termination_heights):
-    # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor]
-    contact_force_threshold = 1.0
-    
-    terminated = torch.zeros_like(reset_buf)
-
-    if (enable_early_termination):
-        masked_contact_buf = contact_buf.clone()
-        masked_contact_buf[:, contact_body_ids, :] = 0
-        fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1)
-        fall_contact = torch.any(fall_contact, dim=-1)
-
-        body_height = rigid_body_pos[..., 2]
-        fall_height = body_height < termination_heights
-        fall_height[:, contact_body_ids] = False
-        fall_height = torch.any(fall_height, dim=-1)
-
-        has_fallen = torch.logical_and(fall_contact, fall_height)
-
-        tar_has_contact = torch.any(torch.abs(tar_contact_forces[..., 0:2]) > contact_force_threshold, dim=-1)
-        #strike_body_force = contact_buf[:, strike_body_id, :]
-        #strike_body_has_contact = torch.any(torch.abs(strike_body_force) > contact_force_threshold, dim=-1)
-        nonstrike_body_force = masked_contact_buf
-        nonstrike_body_force[:, strike_body_ids, :] = 0
-        nonstrike_body_has_contact = torch.any(torch.abs(nonstrike_body_force) > contact_force_threshold, dim=-1)
-        nonstrike_body_has_contact = torch.any(nonstrike_body_has_contact, dim=-1)
-
-        tar_fail = torch.logical_and(tar_has_contact, nonstrike_body_has_contact)
-        
-        has_failed = torch.logical_or(has_fallen, tar_fail)
-
-        # first timestep can sometimes still have nonzero contact forces
-        # so only check after first couple of steps
-        has_failed *= (progress_buf > 1)
-        terminated = torch.where(has_failed, torch.ones_like(reset_buf), terminated)
-    
-    reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated)
-
-    return reset, terminated
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py
deleted file mode 100644
index 0cae80ed..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-
-from isaacgym import gymtorch
-
-from tasks.humanoid_amp import HumanoidAMP
-
-
-class HumanoidViewMotion(HumanoidAMP):
-    def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless):
-        control_freq_inv = cfg["env"]["controlFrequencyInv"]
-        self._motion_dt = control_freq_inv * sim_params.dt
-
-        cfg["env"]["controlFrequencyInv"] = 1
-        cfg["env"]["pdControl"] = False
-
-        super().__init__(cfg=cfg,
-                         sim_params=sim_params,
-                         physics_engine=physics_engine,
-                         device_type=device_type,
-                         device_id=device_id,
-                         headless=headless)
-        
-        num_motions = self._motion_lib.num_motions()
-        self._motion_ids = torch.arange(self.num_envs, device=self.device, dtype=torch.long)
-        self._motion_ids = torch.remainder(self._motion_ids, num_motions)
-
-        return
-
-    def pre_physics_step(self, actions):
-        self.actions = actions.to(self.device).clone()
-        forces = torch.zeros_like(self.actions)
-        force_tensor = gymtorch.unwrap_tensor(forces)
-        self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor)
-        return
-
-    def post_physics_step(self):
-        super().post_physics_step()
-        self._motion_sync()
-        return
-    
-    def _get_humanoid_collision_filter(self):
-        return 1 # disable self collisions
-
-    def _motion_sync(self):
-        num_motions = self._motion_lib.num_motions()
-        motion_ids = self._motion_ids
-        motion_times = self.progress_buf * self._motion_dt
-
-        root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \
-           = self._motion_lib.get_motion_state(motion_ids, motion_times)
-        
-        root_vel = torch.zeros_like(root_vel)
-        root_ang_vel = torch.zeros_like(root_ang_vel)
-        dof_vel = torch.zeros_like(dof_vel)
-
-        env_ids = torch.arange(self.num_envs, dtype=torch.long, device=self.device)
-        self._set_env_state(env_ids=env_ids, 
-                            root_pos=root_pos, 
-                            root_rot=root_rot, 
-                            dof_pos=dof_pos, 
-                            root_vel=root_vel, 
-                            root_ang_vel=root_ang_vel, 
-                            dof_vel=dof_vel)
-
-        env_ids_int32 = self._humanoid_actor_ids[env_ids]
-        self.gym.set_actor_root_state_tensor_indexed(self.sim,
-                                                     gymtorch.unwrap_tensor(self._root_states),
-                                                     gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        self.gym.set_dof_state_tensor_indexed(self.sim,
-                                              gymtorch.unwrap_tensor(self._dof_state),
-                                              gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32))
-        return
-
-    def _compute_reset(self):
-        motion_lengths = self._motion_lib.get_motion_length(self._motion_ids)
-        self.reset_buf[:], self._terminate_buf[:] = compute_view_motion_reset(self.reset_buf, motion_lengths, self.progress_buf, self._motion_dt)
-        return
-
-    def _reset_actors(self, env_ids):
-        return
-
-    def _reset_env_tensors(self, env_ids):
-        num_motions = self._motion_lib.num_motions()
-        self._motion_ids[env_ids] = torch.remainder(self._motion_ids[env_ids] + self.num_envs, num_motions)
-        
-        self.progress_buf[env_ids] = 0
-        self.reset_buf[env_ids] = 0
-        self._terminate_buf[env_ids] = 0
-        return
-
-@torch.jit.script
-def compute_view_motion_reset(reset_buf, motion_lengths, progress_buf, dt):
-    # type: (Tensor, Tensor, Tensor, float) -> Tuple[Tensor, Tensor]
-    terminated = torch.zeros_like(reset_buf)
-    motion_times = progress_buf * dt
-    reset = torch.where(motion_times > motion_lengths, torch.ones_like(reset_buf), terminated)
-    return reset, terminated
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py
deleted file mode 100644
index 356e9e47..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto.  Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
-
-from gym import spaces
-
-from isaacgym import gymtorch
-from isaacgym.torch_utils import to_torch
-import torch
-import numpy as np
-
-
-# VecEnv Wrapper for RL training
-class VecTask():
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        self.task = task
-
-        self.num_environments = task.num_envs
-        self.num_agents = 1  # used for multi-agent environments
-        self.num_observations = task.num_obs
-        self.num_states = task.num_states
-        self.num_actions = task.num_actions
-
-        self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
-        self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)
-        self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)
-
-        self.clip_obs = clip_observations
-        self.clip_actions = clip_actions
-        self.rl_device = rl_device
-
-        print("RL device: ", rl_device)
-
-    def step(self, actions):
-        raise NotImplementedError
-
-    def reset(self):
-        raise NotImplementedError
-
-    def get_number_of_agents(self):
-        return self.num_agents
-
-    @property
-    def observation_space(self):
-        return self.obs_space
-
-    @property
-    def action_space(self):
-        return self.act_space
-
-    @property
-    def num_envs(self):
-        return self.num_environments
-
-    @property
-    def num_acts(self):
-        return self.num_actions
-
-    @property
-    def num_obs(self):
-        return self.num_observations
-
-
-# C++ CPU Class
-class VecTaskCPU(VecTask):
-    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
-        self.sync_frame_time = sync_frame_time
-
-    def step(self, actions):
-        actions = actions.cpu().numpy()
-        self.task.render(self.sync_frame_time)
-
-        obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions))
-
-        return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device),
-                to_torch(rewards, dtype=torch.float, device=self.rl_device),
-                to_torch(resets, dtype=torch.uint8, device=self.rl_device), [])
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f')
-
-        # step the simulator
-        obs, rewards, resets, extras = self.task.step(actions)
-
-        return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device)
-
-
-# C++ GPU Class
-class VecTaskGPU(VecTask):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
-
-        self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs))
-        self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,))
-        self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,))
-
-    def step(self, actions):
-        self.task.render(False)
-        actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions)
-        actions_tensor = gymtorch.unwrap_tensor(actions_clipped)
-
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, []
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
-        actions_tensor = gymtorch.unwrap_tensor(actions)
-
-        # step the simulator
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs)
-
-
-# Python CPU/GPU Class
-class VecTaskPython(VecTask):
-
-    def get_state(self):
-        return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
-
-    def step(self, actions):
-        actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)
-
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
-
-        # step the simulator
-        self.task.step(actions)
-
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py
deleted file mode 100644
index a5c8160d..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from gym import spaces
-import numpy as np
-import torch
-from tasks.vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython
-
-class VecTaskCPUWrapper(VecTaskCPU):
-    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions)
-        return
-
-class VecTaskGPUWrapper(VecTaskGPU):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations, clip_actions)
-        return
-
-
-class VecTaskPythonWrapper(VecTaskPython):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations, clip_actions)
-
-        self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf)
-        return
-
-    def reset(self, env_ids=None):
-        self.task.reset(env_ids)
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
-
-    @property
-    def amp_observation_space(self):
-        return self._amp_obs_space
-
-    def fetch_amp_obs_demo(self, num_samples):
-        return self.task.fetch_amp_obs_demo(num_samples)
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py
deleted file mode 100644
index bc6ee169..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto.  Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py
deleted file mode 100644
index 204344c1..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py
+++ /dev/null
@@ -1,240 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from utils import logger
-from isaacgym import gymapi
-import numpy as np
-import torch
-from isaacgym.torch_utils import *
-from isaacgym import gymtorch
-
-def setup_gym_viewer(config):
-    gym = initialize_gym(config)
-    sim, viewer = configure_gym(gym, config)
-    return gym, sim, viewer
-
-
-def initialize_gym(config):
-    gym = gymapi.acquire_gym()
-    if not gym.initialize():
-        logger.warn("*** Failed to initialize gym")
-        quit()
-
-    return gym
-
-
-def configure_gym(gym, config):
-    engine, render = config['engine'], config['render']
-
-    # physics engine settings
-    if(engine == 'FLEX'):
-        sim_engine = gymapi.SIM_FLEX
-    elif(engine == 'PHYSX'):
-        sim_engine = gymapi.SIM_PHYSX
-    else:
-        logger.warn("Uknown physics engine. defaulting to FLEX")
-        sim_engine = gymapi.SIM_FLEX
-
-    # gym viewer
-    if render:
-        # create viewer
-        sim = gym.create_sim(0, 0, sim_type=sim_engine)
-        viewer = gym.create_viewer(
-            sim, int(gymapi.DEFAULT_VIEWER_WIDTH / 1.25),
-            int(gymapi.DEFAULT_VIEWER_HEIGHT / 1.25)
-        )
-
-        if viewer is None:
-            logger.warn("*** Failed to create viewer")
-            quit()
-
-        # enable left mouse click or space bar for throwing projectiles
-        if config['add_projectiles']:
-            gym.subscribe_viewer_mouse_event(viewer, gymapi.MOUSE_LEFT_BUTTON, "shoot")
-            gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_SPACE, "shoot")
-
-    else:
-        sim = gym.create_sim(0, -1)
-        viewer = None
-
-    # simulation params
-    scene_config = config['env']['scene']
-    sim_params = gymapi.SimParams()
-    sim_params.solver_type = scene_config['SolverType']
-    sim_params.num_outer_iterations = scene_config['NumIterations']
-    sim_params.num_inner_iterations = scene_config['NumInnerIterations']
-    sim_params.relaxation = scene_config.get('Relaxation', 0.75)
-    sim_params.warm_start = scene_config.get('WarmStart', 0.25)
-    sim_params.geometric_stiffness = scene_config.get('GeometricStiffness', 1.0)
-    sim_params.shape_collision_margin = 0.01
-
-    sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0)
-    gym.set_sim_params(sim, sim_params)
-
-    return sim, viewer
-
-
-def parse_states_from_reference_states(reference_states, progress):
-    # parse reference states from DeepMimicState
-    global_quats_ref = torch.tensor(
-        reference_states._global_rotation[(progress,)].numpy(),
-        dtype=torch.double
-    ).cuda()
-    ts_ref = torch.tensor(
-        reference_states._translation[(progress,)].numpy(),
-        dtype=torch.double
-    ).cuda()
-    vels_ref = torch.tensor(
-        reference_states._velocity[(progress,)].numpy(),
-        dtype=torch.double
-    ).cuda()
-    avels_ref = torch.tensor(
-        reference_states._angular_velocity[(progress,)].numpy(),
-        dtype=torch.double
-    ).cuda()
-    return global_quats_ref, ts_ref, vels_ref, avels_ref
-
-
-def parse_states_from_reference_states_with_motion_id(precomputed_state,
-                                                      progress, motion_id):
-    assert len(progress) == len(motion_id)
-    # get the global id
-    global_id = precomputed_state['motion_offset'][motion_id] + progress
-    global_id = np.minimum(global_id,
-                           precomputed_state['global_quats_ref'].shape[0] - 1)
-
-    # parse reference states from DeepMimicState
-    global_quats_ref = precomputed_state['global_quats_ref'][global_id]
-    ts_ref = precomputed_state['ts_ref'][global_id]
-    vels_ref = precomputed_state['vels_ref'][global_id]
-    avels_ref = precomputed_state['avels_ref'][global_id]
-    return global_quats_ref, ts_ref, vels_ref, avels_ref
-
-
-def parse_dof_state_with_motion_id(precomputed_state, dof_state,
-                                   progress, motion_id):
-    assert len(progress) == len(motion_id)
-    # get the global id
-    global_id = precomputed_state['motion_offset'][motion_id] + progress
-    # NOTE: it should never reach the dof_state.shape, cause the episode is
-    # terminated 2 steps before
-    global_id = np.minimum(global_id, dof_state.shape[0] - 1)
-
-    # parse reference states from DeepMimicState
-    return dof_state[global_id]
-
-
-def get_flatten_ids(precomputed_state):
-    motion_offsets = precomputed_state['motion_offset']
-    init_state_id, init_motion_id, global_id = [], [], []
-    for i_motion in range(len(motion_offsets) - 1):
-        i_length = motion_offsets[i_motion + 1] - motion_offsets[i_motion]
-        init_state_id.extend(range(i_length))
-        init_motion_id.extend([i_motion] * i_length)
-        if len(global_id) == 0:
-            global_id.extend(range(0, i_length))
-        else:
-            global_id.extend(range(global_id[-1] + 1,
-                                   global_id[-1] + i_length + 1))
-    return np.array(init_state_id), np.array(init_motion_id), \
-        np.array(global_id)
-
-
-def parse_states_from_reference_states_with_global_id(precomputed_state,
-                                                      global_id):
-    # get the global id
-    global_id = global_id % precomputed_state['global_quats_ref'].shape[0]
-
-    # parse reference states from DeepMimicState
-    global_quats_ref = precomputed_state['global_quats_ref'][global_id]
-    ts_ref = precomputed_state['ts_ref'][global_id]
-    vels_ref = precomputed_state['vels_ref'][global_id]
-    avels_ref = precomputed_state['avels_ref'][global_id]
-    return global_quats_ref, ts_ref, vels_ref, avels_ref
-
-
-def get_robot_states_from_torch_tensor(config, ts, global_quats, vels, avels,
-                                       init_rot, progress, motion_length=-1,
-                                       actions=None, relative_rot=None,
-                                       motion_id=None, num_motion=None,
-                                       motion_onehot_matrix=None):
-    info = {}
-    # the observation with quaternion-based representation
-    torso_height = ts[..., 0, 1].cpu().numpy()
-    gttrny, gqny, vny, avny, info['root_yaw_inv'] = \
-        quaternion_math.compute_observation_return_info(global_quats, ts,
-                                                        vels, avels)
-    joint_obs = np.concatenate([gttrny.cpu().numpy(), gqny.cpu().numpy(),
-                                vny.cpu().numpy(), avny.cpu().numpy()], axis=-1)
-    joint_obs = joint_obs.reshape(joint_obs.shape[0], -1)
-    num_envs = joint_obs.shape[0]
-    obs = np.concatenate([torso_height[:, np.newaxis], joint_obs], -1)
-
-    # the previous action
-    if config['env_action_ob']:
-        obs = np.concatenate([obs, actions], axis=-1)
-
-    # the orientation
-    if config['env_orientation_ob']:
-        if relative_rot is not None:
-            obs = np.concatenate([obs, relative_rot], axis=-1)
-        else:
-            curr_rot = global_quats[np.arange(num_envs)][:, 0]
-            curr_rot = curr_rot.reshape(num_envs, -1, 4)
-            relative_rot = quaternion_math.compute_orientation_drift(
-                init_rot, curr_rot
-            ).cpu().numpy()
-            obs = np.concatenate([obs, relative_rot], axis=-1)
-
-    if config['env_frame_ob']:
-        if type(motion_length) == np.ndarray:
-            motion_length = motion_length.astype(np.float)
-            progress_ob = np.expand_dims(progress.astype(np.float) /
-                                         motion_length, axis=-1)
-        else:
-            progress_ob = np.expand_dims(progress.astype(np.float) /
-                                         float(motion_length), axis=-1)
-        obs = np.concatenate([obs, progress_ob], axis=-1)
-
-    if config['env_motion_ob'] and not config['env_motion_ob_onehot']:
-        motion_id_ob = np.expand_dims(motion_id.astype(np.float) /
-                                      float(num_motion), axis=-1)
-        obs = np.concatenate([obs, motion_id_ob], axis=-1)
-    elif config['env_motion_ob'] and config['env_motion_ob_onehot']:
-        motion_id_ob = motion_onehot_matrix[motion_id]
-        obs = np.concatenate([obs, motion_id_ob], axis=-1)
-
-    return obs, info
-
-
-def get_xyzoffset(start_ts, end_ts, root_yaw_inv):
-    xyoffset = (end_ts - start_ts)[:, [0], :].reshape(1, -1, 1, 3)
-    ryinv = root_yaw_inv.reshape(1, -1, 1, 4)
-
-    calibrated_xyz_offset = quaternion_math.quat_apply(ryinv, xyoffset)[0, :, 0, :]
-    return calibrated_xyz_offset
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py
deleted file mode 100644
index bbe273ab..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-import torch
-import numpy as np
-
-from isaacgym.torch_utils import *
-
-@torch.jit.script
-def quat_to_angle_axis(q):
-    # type: (Tensor) -> Tuple[Tensor, Tensor]
-    # computes axis-angle representation from quaternion q
-    # q must be normalized
-    min_theta = 1e-5
-    qx, qy, qz, qw = 0, 1, 2, 3
-
-    sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw])
-    angle = 2 * torch.acos(q[..., qw])
-    angle = normalize_angle(angle)
-    sin_theta_expand = sin_theta.unsqueeze(-1)
-    axis = q[..., qx:qw] / sin_theta_expand
-
-    mask = torch.abs(sin_theta) > min_theta
-    default_axis = torch.zeros_like(axis)
-    default_axis[..., -1] = 1
-
-    angle = torch.where(mask, angle, torch.zeros_like(angle))
-    mask_expand = mask.unsqueeze(-1)
-    axis = torch.where(mask_expand, axis, default_axis)
-    return angle, axis
-
-@torch.jit.script
-def angle_axis_to_exp_map(angle, axis):
-    # type: (Tensor, Tensor) -> Tensor
-    # compute exponential map from axis-angle
-    angle_expand = angle.unsqueeze(-1)
-    exp_map = angle_expand * axis
-    return exp_map
-
-@torch.jit.script
-def quat_to_exp_map(q):
-    # type: (Tensor) -> Tensor
-    # compute exponential map from quaternion
-    # q must be normalized
-    angle, axis = quat_to_angle_axis(q)
-    exp_map = angle_axis_to_exp_map(angle, axis)
-    return exp_map
-
-@torch.jit.script
-def quat_to_tan_norm(q):
-    # type: (Tensor) -> Tensor
-    # represents a rotation using the tangent and normal vectors
-    ref_tan = torch.zeros_like(q[..., 0:3])
-    ref_tan[..., 0] = 1
-    tan = quat_rotate(q, ref_tan)
-    
-    ref_norm = torch.zeros_like(q[..., 0:3])
-    ref_norm[..., -1] = 1
-    norm = quat_rotate(q, ref_norm)
-    
-    norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1)
-    return norm_tan
-
-@torch.jit.script
-def euler_xyz_to_exp_map(roll, pitch, yaw):
-    # type: (Tensor, Tensor, Tensor) -> Tensor
-    q = quat_from_euler_xyz(roll, pitch, yaw)
-    exp_map = quat_to_exp_map(q)
-    return exp_map
-
-@torch.jit.script
-def exp_map_to_angle_axis(exp_map):
-    min_theta = 1e-5
-
-    angle = torch.norm(exp_map, dim=-1)
-    angle_exp = torch.unsqueeze(angle, dim=-1)
-    axis = exp_map / angle_exp
-    angle = normalize_angle(angle)
-
-    default_axis = torch.zeros_like(exp_map)
-    default_axis[..., -1] = 1
-
-    mask = torch.abs(angle) > min_theta
-    angle = torch.where(mask, angle, torch.zeros_like(angle))
-    mask_expand = mask.unsqueeze(-1)
-    axis = torch.where(mask_expand, axis, default_axis)
-
-    return angle, axis
-
-@torch.jit.script
-def exp_map_to_quat(exp_map):
-    angle, axis = exp_map_to_angle_axis(exp_map)
-    q = quat_from_angle_axis(angle, axis)
-    return q
-
-@torch.jit.script
-def slerp(q0, q1, t):
-    # type: (Tensor, Tensor, Tensor) -> Tensor
-    cos_half_theta = torch.sum(q0 * q1, dim=-1)
-
-    neg_mask = cos_half_theta < 0
-    q1 = q1.clone()
-    q1[neg_mask] = -q1[neg_mask]
-    cos_half_theta = torch.abs(cos_half_theta)
-    cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1)
-
-    half_theta = torch.acos(cos_half_theta);
-    sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta);
-
-    ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta;
-    ratioB = torch.sin(t * half_theta) / sin_half_theta; 
-    
-    new_q = ratioA * q0 + ratioB * q1
-
-    new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q)
-    new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q)
-
-    return new_q
-
-@torch.jit.script
-def calc_heading(q):
-    # type: (Tensor) -> Tensor
-    # calculate heading direction from quaternion
-    # the heading is the direction on the xy plane
-    # q must be normalized
-    ref_dir = torch.zeros_like(q[..., 0:3])
-    ref_dir[..., 0] = 1
-    rot_dir = quat_rotate(q, ref_dir)
-
-    heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0])
-    return heading
-
-@torch.jit.script
-def calc_heading_quat(q):
-    # type: (Tensor) -> Tensor
-    # calculate heading rotation from quaternion
-    # the heading is the direction on the xy plane
-    # q must be normalized
-    heading = calc_heading(q)
-    axis = torch.zeros_like(q[..., 0:3])
-    axis[..., 2] = 1
-
-    heading_q = quat_from_angle_axis(heading, axis)
-    return heading_q
-
-@torch.jit.script
-def calc_heading_quat_inv(q):
-    # type: (Tensor) -> Tensor
-    # calculate heading rotation from quaternion
-    # the heading is the direction on the xy plane
-    # q must be normalized
-    heading = calc_heading(q)
-    axis = torch.zeros_like(q[..., 0:3])
-    axis[..., 2] = 1
-
-    heading_q = quat_from_angle_axis(-heading, axis)
-    return heading_q
\ No newline at end of file
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py
deleted file mode 100644
index 356e9e47..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
-# NVIDIA CORPORATION and its licensors retain all intellectual property
-# and proprietary rights in and to this software, related documentation
-# and any modifications thereto.  Any use, reproduction, disclosure or
-# distribution of this software and related documentation without an express
-# license agreement from NVIDIA CORPORATION is strictly prohibited.
-
-from gym import spaces
-
-from isaacgym import gymtorch
-from isaacgym.torch_utils import to_torch
-import torch
-import numpy as np
-
-
-# VecEnv Wrapper for RL training
-class VecTask():
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        self.task = task
-
-        self.num_environments = task.num_envs
-        self.num_agents = 1  # used for multi-agent environments
-        self.num_observations = task.num_obs
-        self.num_states = task.num_states
-        self.num_actions = task.num_actions
-
-        self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf)
-        self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf)
-        self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.)
-
-        self.clip_obs = clip_observations
-        self.clip_actions = clip_actions
-        self.rl_device = rl_device
-
-        print("RL device: ", rl_device)
-
-    def step(self, actions):
-        raise NotImplementedError
-
-    def reset(self):
-        raise NotImplementedError
-
-    def get_number_of_agents(self):
-        return self.num_agents
-
-    @property
-    def observation_space(self):
-        return self.obs_space
-
-    @property
-    def action_space(self):
-        return self.act_space
-
-    @property
-    def num_envs(self):
-        return self.num_environments
-
-    @property
-    def num_acts(self):
-        return self.num_actions
-
-    @property
-    def num_obs(self):
-        return self.num_observations
-
-
-# C++ CPU Class
-class VecTaskCPU(VecTask):
-    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
-        self.sync_frame_time = sync_frame_time
-
-    def step(self, actions):
-        actions = actions.cpu().numpy()
-        self.task.render(self.sync_frame_time)
-
-        obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions))
-
-        return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device),
-                to_torch(rewards, dtype=torch.float, device=self.rl_device),
-                to_torch(resets, dtype=torch.uint8, device=self.rl_device), [])
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f')
-
-        # step the simulator
-        obs, rewards, resets, extras = self.task.step(actions)
-
-        return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device)
-
-
-# C++ GPU Class
-class VecTaskGPU(VecTask):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions)
-
-        self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs))
-        self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,))
-        self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,))
-
-    def step(self, actions):
-        self.task.render(False)
-        actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions)
-        actions_tensor = gymtorch.unwrap_tensor(actions_clipped)
-
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, []
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
-        actions_tensor = gymtorch.unwrap_tensor(actions)
-
-        # step the simulator
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs)
-
-
-# Python CPU/GPU Class
-class VecTaskPython(VecTask):
-
-    def get_state(self):
-        return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
-
-    def step(self, actions):
-        actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions)
-
-        self.task.step(actions_tensor)
-
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras
-
-    def reset(self):
-        actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device))
-
-        # step the simulator
-        self.task.step(actions)
-
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py
deleted file mode 100644
index 00af127c..00000000
--- a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# Copyright (c) 2018-2022, NVIDIA Corporation
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-#
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-#
-# 3. Neither the name of the copyright holder nor the names of its
-#    contributors may be used to endorse or promote products derived from
-#    this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-from gym import spaces
-import numpy as np
-import torch
-from vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython
-
-class VecTaskCPUWrapper(VecTaskCPU):
-    def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions)
-        return
-
-class VecTaskGPUWrapper(VecTaskGPU):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations, clip_actions)
-        return
-
-
-class VecTaskPythonWrapper(VecTaskPython):
-    def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0):
-        super().__init__(task, rl_device, clip_observations, clip_actions)
-
-        self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf)
-        return
-
-    def reset(self, env_ids=None):
-        self.task.reset(env_ids)
-        return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device)
-
-    @property
-    def amp_observation_space(self):
-        return self._amp_obs_space
-
-    def fetch_amp_obs_demo(self, num_samples):
-        return self.task.fetch_amp_obs_demo(num_samples)
\ No newline at end of file