From 0029b2966e66660ad2bda427cad99b010f62be74 Mon Sep 17 00:00:00 2001 From: Junjia Liu Date: Fri, 18 Aug 2023 22:02:22 +0800 Subject: [PATCH] Remove useless files --- .../agents/mixline/for_test/__init__.py | 0 .../agents/mixline/for_test/amp_agent.py | 664 ----------------- .../agents/mixline/for_test/amp_datasets.py | 59 -- .../agents/mixline/for_test/amp_models.py | 65 -- .../mixline/for_test/amp_network_builder.py | 154 ---- .../agents/mixline/for_test/amp_players.py | 111 --- .../agents/mixline/for_test/ase_agent.py | 567 -------------- .../mixline/for_test/ase_humanoid_hrl.yaml | 114 --- .../agents/mixline/for_test/ase_models.py | 56 -- .../mixline/for_test/ase_network_builder.py | 379 ---------- .../agents/mixline/for_test/ase_players.py | 179 ----- .../agents/mixline/for_test/common_agent.py | 592 --------------- .../agents/mixline/for_test/common_player.py | 216 ------ .../agents/mixline/for_test/config.py | 259 ------- .../agents/mixline/for_test/hrl_agent.py | 356 --------- .../agents/mixline/for_test/hrl_humanoid.yaml | 76 -- .../agents/mixline/for_test/hrl_models.py | 46 -- .../mixline/for_test/hrl_network_builder.py | 67 -- .../agents/mixline/for_test/hrl_players.py | 345 --------- .../humanoid_sword_shield_heading.yaml | 53 -- .../humanoid_sword_shield_strike.yaml | 49 -- .../agents/mixline/for_test/observer.py | 36 - .../agents/mixline/for_test/parse_task.py | 73 -- .../agents/mixline/for_test/replay_buffer.py | 113 --- .../RofuncRL/agents/mixline/for_test/run.py | 252 ------- .../agents/mixline/for_test/tasks/__init__.py | 6 - .../mixline/for_test/tasks/base_task.py | 428 ----------- .../agents/mixline/for_test/tasks/humanoid.py | 692 ------------------ .../mixline/for_test/tasks/humanoid_amp.py | 344 --------- .../for_test/tasks/humanoid_amp_getup.py | 170 ----- .../for_test/tasks/humanoid_amp_task.py | 101 --- .../for_test/tasks/humanoid_heading.py | 313 -------- .../for_test/tasks/humanoid_location.py | 256 ------- .../for_test/tasks/humanoid_perturb.py | 273 ------- .../mixline/for_test/tasks/humanoid_reach.py | 223 ------ .../mixline/for_test/tasks/humanoid_strike.py | 323 -------- .../for_test/tasks/humanoid_view_motion.py | 125 ---- .../agents/mixline/for_test/tasks/vec_task.py | 139 ---- .../for_test/tasks/vec_task_wrappers.py | 61 -- .../agents/mixline/for_test/utils/__init__.py | 6 - .../agents/mixline/for_test/utils/gym_util.py | 240 ------ .../mixline/for_test/utils/torch_utils.py | 182 ----- .../agents/mixline/for_test/vec_task.py | 139 ---- .../mixline/for_test/vec_task_wrappers.py | 61 -- 44 files changed, 8963 deletions(-) delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/config.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/run.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py delete mode 100644 rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py deleted file mode 100644 index 5e3b683f..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_agent.py +++ /dev/null @@ -1,664 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from rl_games.algos_torch.running_mean_std import RunningMeanStd -from rl_games.algos_torch import torch_ext -from rl_games.common import a2c_common -from rl_games.common import schedulers -from rl_games.common import vecenv - -from isaacgym.torch_utils import * - -import time -from datetime import datetime -import numpy as np -from torch import optim -import torch -from torch import nn - -import replay_buffer -import common_agent - -from tensorboardX import SummaryWriter - - -class AMPAgent(common_agent.CommonAgent): - def __init__(self, base_name, config): - super().__init__(base_name, config) - - if self._normalize_amp_input: - self._amp_input_mean_std = RunningMeanStd(self._amp_observation_space.shape).to(self.ppo_device) - - return - - def init_tensors(self): - super().init_tensors() - self._build_amp_buffers() - return - - def set_eval(self): - super().set_eval() - if self._normalize_amp_input: - self._amp_input_mean_std.eval() - return - - def set_train(self): - super().set_train() - if self._normalize_amp_input: - self._amp_input_mean_std.train() - return - - def get_stats_weights(self): - state = super().get_stats_weights() - if self._normalize_amp_input: - state['amp_input_mean_std'] = self._amp_input_mean_std.state_dict() - - return state - - def set_stats_weights(self, weights): - super().set_stats_weights(weights) - if self._normalize_amp_input: - self._amp_input_mean_std.load_state_dict(weights['amp_input_mean_std']) - - return - - def play_steps(self): - self.set_eval() - - epinfos = [] - done_indices = [] - update_list = self.update_list - - for n in range(self.horizon_length): - - self.obs = self.env_reset(done_indices) - self.experience_buffer.update_data('obses', n, self.obs['obs']) - - if self.use_action_masks: - masks = self.vec_env.get_action_masks() - res_dict = self.get_masked_action_values(self.obs, masks) - else: - res_dict = self.get_action_values(self.obs, self._rand_action_probs) - - for k in update_list: - self.experience_buffer.update_data(k, n, res_dict[k]) - - if self.has_central_value: - self.experience_buffer.update_data('states', n, self.obs['states']) - - self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - shaped_rewards = self.rewards_shaper(rewards) - self.experience_buffer.update_data('rewards', n, shaped_rewards) - self.experience_buffer.update_data('next_obses', n, self.obs['obs']) - self.experience_buffer.update_data('dones', n, self.dones) - self.experience_buffer.update_data('amp_obs', n, infos['amp_obs']) - self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask']) - - terminated = infos['terminate'].float() - terminated = terminated.unsqueeze(-1) - next_vals = self._eval_critic(self.obs) - next_vals *= (1.0 - terminated) - self.experience_buffer.update_data('next_values', n, next_vals) - - self.current_rewards += rewards - self.current_lengths += 1 - all_done_indices = self.dones.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - - self.game_rewards.update(self.current_rewards[done_indices]) - self.game_lengths.update(self.current_lengths[done_indices]) - self.algo_observer.process_infos(infos, done_indices) - - not_dones = 1.0 - self.dones.float() - - self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) - self.current_lengths = self.current_lengths * not_dones - - if (self.vec_env.env.task.viewer): - self._amp_debug(infos) - - done_indices = done_indices[:, 0] - - mb_fdones = self.experience_buffer.tensor_dict['dones'].float() - mb_values = self.experience_buffer.tensor_dict['values'] - mb_next_values = self.experience_buffer.tensor_dict['next_values'] - - mb_rewards = self.experience_buffer.tensor_dict['rewards'] - mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs'] - amp_rewards = self._calc_amp_rewards(mb_amp_obs) - mb_rewards = self._combine_rewards(mb_rewards, amp_rewards) - - mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) - mb_returns = mb_advs + mb_values - - batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) - batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) - batch_dict['played_frames'] = self.batch_size - - for k, v in amp_rewards.items(): - batch_dict[k] = a2c_common.swap_and_flatten01(v) - - return batch_dict - - def get_action_values(self, obs_dict, rand_action_probs): - processed_obs = self._preproc_obs(obs_dict['obs']) - - self.model.eval() - input_dict = { - 'is_train': False, - 'prev_actions': None, - 'obs': processed_obs, - 'rnn_states': self.rnn_states - } - - with torch.no_grad(): - res_dict = self.model(input_dict) - if self.has_central_value: - states = obs_dict['states'] - input_dict = { - 'is_train': False, - 'states': states, - } - value = self.get_central_value(input_dict) - res_dict['values'] = value - - if self.normalize_value: - res_dict['values'] = self.value_mean_std(res_dict['values'], True) - - rand_action_mask = torch.bernoulli(rand_action_probs) - det_action_mask = rand_action_mask == 0.0 - res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask] - res_dict['rand_action_mask'] = rand_action_mask - - return res_dict - - def prepare_dataset(self, batch_dict): - super().prepare_dataset(batch_dict) - self.dataset.values_dict['amp_obs'] = batch_dict['amp_obs'] - self.dataset.values_dict['amp_obs_demo'] = batch_dict['amp_obs_demo'] - self.dataset.values_dict['amp_obs_replay'] = batch_dict['amp_obs_replay'] - - rand_action_mask = batch_dict['rand_action_mask'] - self.dataset.values_dict['rand_action_mask'] = rand_action_mask - return - - def train_epoch(self): - play_time_start = time.time() - - with torch.no_grad(): - if self.is_rnn: - batch_dict = self.play_steps_rnn() - else: - batch_dict = self.play_steps() - - play_time_end = time.time() - update_time_start = time.time() - rnn_masks = batch_dict.get('rnn_masks', None) - - self._update_amp_demos() - num_obs_samples = batch_dict['amp_obs'].shape[0] - amp_obs_demo = self._amp_obs_demo_buffer.sample(num_obs_samples)['amp_obs'] - batch_dict['amp_obs_demo'] = amp_obs_demo - - if (self._amp_replay_buffer.get_total_count() == 0): - batch_dict['amp_obs_replay'] = batch_dict['amp_obs'] - else: - batch_dict['amp_obs_replay'] = self._amp_replay_buffer.sample(num_obs_samples)['amp_obs'] - - self.set_train() - - self.curr_frames = batch_dict.pop('played_frames') - self.prepare_dataset(batch_dict) - self.algo_observer.after_steps() - - if self.has_central_value: - self.train_central_value() - - train_info = None - - if self.is_rnn: - frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement()) - print(frames_mask_ratio) - - for _ in range(0, self.mini_epochs_num): - ep_kls = [] - for i in range(len(self.dataset)): - curr_train_info = self.train_actor_critic(self.dataset[i]) - - if self.schedule_type == 'legacy': - if self.multi_gpu: - curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, - self.epoch_num, 0, - curr_train_info['kl'].item()) - self.update_lr(self.last_lr) - - if (train_info is None): - train_info = dict() - for k, v in curr_train_info.items(): - train_info[k] = [v] - else: - for k, v in curr_train_info.items(): - train_info[k].append(v) - - av_kls = torch_ext.mean_list(train_info['kl']) - - if self.schedule_type == 'standard': - if self.multi_gpu: - av_kls = self.hvd.average_value(av_kls, 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, - 0, av_kls.item()) - self.update_lr(self.last_lr) - - if self.schedule_type == 'standard_epoch': - if self.multi_gpu: - av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, - av_kls.item()) - self.update_lr(self.last_lr) - - update_time_end = time.time() - play_time = play_time_end - play_time_start - update_time = update_time_end - update_time_start - total_time = update_time_end - play_time_start - - self._store_replay_amp_obs(batch_dict['amp_obs']) - - train_info['play_time'] = play_time - train_info['update_time'] = update_time - train_info['total_time'] = total_time - self._record_train_batch_info(batch_dict, train_info) - - return train_info - - def calc_gradients(self, input_dict): - self.set_train() - - value_preds_batch = input_dict['old_values'] - old_action_log_probs_batch = input_dict['old_logp_actions'] - advantage = input_dict['advantages'] - old_mu_batch = input_dict['mu'] - old_sigma_batch = input_dict['sigma'] - return_batch = input_dict['returns'] - actions_batch = input_dict['actions'] - obs_batch = input_dict['obs'] - obs_batch = self._preproc_obs(obs_batch) - - amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size] - amp_obs = self._preproc_amp_obs(amp_obs) - amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size] - amp_obs_replay = self._preproc_amp_obs(amp_obs_replay) - - amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size] - amp_obs_demo = self._preproc_amp_obs(amp_obs_demo) - amp_obs_demo.requires_grad_(True) - - rand_action_mask = input_dict['rand_action_mask'] - rand_action_sum = torch.sum(rand_action_mask) - - lr = self.last_lr - kl = 1.0 - lr_mul = 1.0 - curr_e_clip = lr_mul * self.e_clip - - batch_dict = { - 'is_train': True, - 'prev_actions': actions_batch, - 'obs': obs_batch, - 'amp_obs': amp_obs, - 'amp_obs_replay': amp_obs_replay, - 'amp_obs_demo': amp_obs_demo - } - - rnn_masks = None - if self.is_rnn: - rnn_masks = input_dict['rnn_masks'] - batch_dict['rnn_states'] = input_dict['rnn_states'] - batch_dict['seq_length'] = self.seq_len - - with torch.cuda.amp.autocast(enabled=self.mixed_precision): - res_dict = self.model(batch_dict) - action_log_probs = res_dict['prev_neglogp'] - values = res_dict['values'] - entropy = res_dict['entropy'] - mu = res_dict['mus'] - sigma = res_dict['sigmas'] - disc_agent_logit = res_dict['disc_agent_logit'] - disc_agent_replay_logit = res_dict['disc_agent_replay_logit'] - disc_demo_logit = res_dict['disc_demo_logit'] - - a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) - a_loss = a_info['actor_loss'] - a_clipped = a_info['actor_clipped'].float() - - c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) - c_loss = c_info['critic_loss'] - - b_loss = self.bound_loss(mu) - - c_loss = torch.mean(c_loss) - a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum - entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum - b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum - a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum - - disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0) - disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo) - disc_loss = disc_info['disc_loss'] - - loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \ - + self._disc_coef * disc_loss - - a_info['actor_loss'] = a_loss - a_info['actor_clip_frac'] = a_clip_frac - c_info['critic_loss'] = c_loss - - if self.multi_gpu: - self.optimizer.zero_grad() - else: - for param in self.model.parameters(): - param.grad = None - - self.scaler.scale(loss).backward() - # TODO: Refactor this ugliest code of the year - if self.truncate_grads: - if self.multi_gpu: - self.optimizer.synchronize() - self.scaler.unscale_(self.optimizer) - nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) - with self.optimizer.skip_synchronize(): - self.scaler.step(self.optimizer) - self.scaler.update() - else: - self.scaler.unscale_(self.optimizer) - nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) - self.scaler.step(self.optimizer) - self.scaler.update() - else: - self.scaler.step(self.optimizer) - self.scaler.update() - - with torch.no_grad(): - reduce_kl = not self.is_rnn - kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) - if self.is_rnn: - kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() # / sum_mask - - self.train_result = { - 'entropy': entropy, - 'kl': kl_dist, - 'last_lr': self.last_lr, - 'lr_mul': lr_mul, - 'b_loss': b_loss - } - self.train_result.update(a_info) - self.train_result.update(c_info) - self.train_result.update(disc_info) - - return - - def _load_config_params(self, config): - super()._load_config_params(config) - - # when eps greedy is enabled, rollouts will be generated using a mixture of - # a deterministic and stochastic actions. The deterministic actions help to - # produce smoother, less noisy, motions that can be used to train a better - # discriminator. If the discriminator is only trained with jittery motions - # from noisy actions, it can learn to phone in on the jitteriness to - # differential between real and fake samples. - self._enable_eps_greedy = bool(config['enable_eps_greedy']) - - self._task_reward_w = config['task_reward_w'] - self._disc_reward_w = config['disc_reward_w'] - - self._amp_observation_space = self.env_info['amp_observation_space'] - self._amp_batch_size = int(config['amp_batch_size']) - self._amp_minibatch_size = int(config['amp_minibatch_size']) - assert (self._amp_minibatch_size <= self.minibatch_size) - - self._disc_coef = config['disc_coef'] - self._disc_logit_reg = config['disc_logit_reg'] - self._disc_grad_penalty = config['disc_grad_penalty'] - self._disc_weight_decay = config['disc_weight_decay'] - self._disc_reward_scale = config['disc_reward_scale'] - self._normalize_amp_input = config.get('normalize_amp_input', True) - return - - def _build_net_config(self): - config = super()._build_net_config() - config['amp_input_shape'] = self._amp_observation_space.shape - return config - - def _build_rand_action_probs(self): - num_envs = self.vec_env.env.task.num_envs - env_ids = to_torch(np.arange(num_envs), dtype=torch.float32, device=self.ppo_device) - - self._rand_action_probs = 1.0 - torch.exp(10 * (env_ids / (num_envs - 1.0) - 1.0)) - self._rand_action_probs[0] = 1.0 - self._rand_action_probs[-1] = 0.0 - - if not self._enable_eps_greedy: - self._rand_action_probs[:] = 1.0 - - return - - def _init_train(self): - super()._init_train() - self._init_amp_demo_buf() - return - - def _disc_loss(self, disc_agent_logit, disc_demo_logit, obs_demo): - # prediction loss - disc_loss_agent = self._disc_loss_neg(disc_agent_logit) - disc_loss_demo = self._disc_loss_pos(disc_demo_logit) - disc_loss = 0.5 * (disc_loss_agent + disc_loss_demo) - - # logit reg - logit_weights = self.model.a2c_network.get_disc_logit_weights() - disc_logit_loss = torch.sum(torch.square(logit_weights)) - disc_loss += self._disc_logit_reg * disc_logit_loss - - # grad penalty - disc_demo_grad = torch.autograd.grad(disc_demo_logit, obs_demo, grad_outputs=torch.ones_like(disc_demo_logit), - create_graph=True, retain_graph=True, only_inputs=True) - disc_demo_grad = disc_demo_grad[0] - disc_demo_grad = torch.sum(torch.square(disc_demo_grad), dim=-1) - disc_grad_penalty = torch.mean(disc_demo_grad) - disc_loss += self._disc_grad_penalty * disc_grad_penalty - - # weight decay - if (self._disc_weight_decay != 0): - disc_weights = self.model.a2c_network.get_disc_weights() - disc_weights = torch.cat(disc_weights, dim=-1) - disc_weight_decay = torch.sum(torch.square(disc_weights)) - disc_loss += self._disc_weight_decay * disc_weight_decay - - disc_agent_acc, disc_demo_acc = self._compute_disc_acc(disc_agent_logit, disc_demo_logit) - - disc_info = { - 'disc_loss': disc_loss, - 'disc_grad_penalty': disc_grad_penalty.detach(), - 'disc_logit_loss': disc_logit_loss.detach(), - 'disc_agent_acc': disc_agent_acc.detach(), - 'disc_demo_acc': disc_demo_acc.detach(), - 'disc_agent_logit': disc_agent_logit.detach(), - 'disc_demo_logit': disc_demo_logit.detach() - } - return disc_info - - def _disc_loss_neg(self, disc_logits): - bce = torch.nn.BCEWithLogitsLoss() - loss = bce(disc_logits, torch.zeros_like(disc_logits)) - return loss - - def _disc_loss_pos(self, disc_logits): - bce = torch.nn.BCEWithLogitsLoss() - loss = bce(disc_logits, torch.ones_like(disc_logits)) - return loss - - def _compute_disc_acc(self, disc_agent_logit, disc_demo_logit): - agent_acc = disc_agent_logit < 0 - agent_acc = torch.mean(agent_acc.float()) - demo_acc = disc_demo_logit > 0 - demo_acc = torch.mean(demo_acc.float()) - return agent_acc, demo_acc - - def _fetch_amp_obs_demo(self, num_samples): - amp_obs_demo = self.vec_env.env.fetch_amp_obs_demo(num_samples) - return amp_obs_demo - - def _build_amp_buffers(self): - batch_shape = self.experience_buffer.obs_base_shape - self.experience_buffer.tensor_dict['amp_obs'] = torch.zeros(batch_shape + self._amp_observation_space.shape, - device=self.ppo_device) - self.experience_buffer.tensor_dict['rand_action_mask'] = torch.zeros(batch_shape, dtype=torch.float32, - device=self.ppo_device) - - amp_obs_demo_buffer_size = int(self.config['amp_obs_demo_buffer_size']) - self._amp_obs_demo_buffer = replay_buffer.ReplayBuffer(amp_obs_demo_buffer_size, self.ppo_device) - - self._amp_replay_keep_prob = self.config['amp_replay_keep_prob'] - replay_buffer_size = int(self.config['amp_replay_buffer_size']) - self._amp_replay_buffer = replay_buffer.ReplayBuffer(replay_buffer_size, self.ppo_device) - - self._build_rand_action_probs() - - self.tensor_list += ['amp_obs', 'rand_action_mask'] - return - - def _init_amp_demo_buf(self): - buffer_size = self._amp_obs_demo_buffer.get_buffer_size() - num_batches = int(np.ceil(buffer_size / self._amp_batch_size)) - - for i in range(num_batches): - curr_samples = self._fetch_amp_obs_demo(self._amp_batch_size) - self._amp_obs_demo_buffer.store({'amp_obs': curr_samples}) - - return - - def _update_amp_demos(self): - new_amp_obs_demo = self._fetch_amp_obs_demo(self._amp_batch_size) - self._amp_obs_demo_buffer.store({'amp_obs': new_amp_obs_demo}) - return - - def _preproc_amp_obs(self, amp_obs): - if self._normalize_amp_input: - amp_obs = self._amp_input_mean_std(amp_obs) - return amp_obs - - def _combine_rewards(self, task_rewards, amp_rewards): - disc_r = amp_rewards['disc_rewards'] - - combined_rewards = self._task_reward_w * task_rewards + \ - + self._disc_reward_w * disc_r - return combined_rewards - - def _eval_disc(self, amp_obs): - proc_amp_obs = self._preproc_amp_obs(amp_obs) - return self.model.a2c_network.eval_disc(proc_amp_obs) - - def _calc_advs(self, batch_dict): - returns = batch_dict['returns'] - values = batch_dict['values'] - rand_action_mask = batch_dict['rand_action_mask'] - - advantages = returns - values - advantages = torch.sum(advantages, axis=1) - if self.normalize_advantage: - advantages = torch_ext.normalization_with_masks(advantages, rand_action_mask) - - return advantages - - def _calc_amp_rewards(self, amp_obs): - disc_r = self._calc_disc_rewards(amp_obs) - output = { - 'disc_rewards': disc_r - } - return output - - def _calc_disc_rewards(self, amp_obs): - with torch.no_grad(): - disc_logits = self._eval_disc(amp_obs) - prob = 1 / (1 + torch.exp(-disc_logits)) - disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.ppo_device))) - disc_r *= self._disc_reward_scale - - return disc_r - - def _store_replay_amp_obs(self, amp_obs): - buf_size = self._amp_replay_buffer.get_buffer_size() - buf_total_count = self._amp_replay_buffer.get_total_count() - if (buf_total_count > buf_size): - keep_probs = to_torch(np.array([self._amp_replay_keep_prob] * amp_obs.shape[0]), device=self.ppo_device) - keep_mask = torch.bernoulli(keep_probs) == 1.0 - amp_obs = amp_obs[keep_mask] - - if (amp_obs.shape[0] > buf_size): - rand_idx = torch.randperm(amp_obs.shape[0]) - rand_idx = rand_idx[:buf_size] - amp_obs = amp_obs[rand_idx] - - self._amp_replay_buffer.store({'amp_obs': amp_obs}) - return - - def _record_train_batch_info(self, batch_dict, train_info): - super()._record_train_batch_info(batch_dict, train_info) - train_info['disc_rewards'] = batch_dict['disc_rewards'] - return - - def _log_train_info(self, train_info, frame): - super()._log_train_info(train_info, frame) - - self.writer.add_scalar('losses/disc_loss', torch_ext.mean_list(train_info['disc_loss']).item(), frame) - - self.writer.add_scalar('info/disc_agent_acc', torch_ext.mean_list(train_info['disc_agent_acc']).item(), frame) - self.writer.add_scalar('info/disc_demo_acc', torch_ext.mean_list(train_info['disc_demo_acc']).item(), frame) - self.writer.add_scalar('info/disc_agent_logit', torch_ext.mean_list(train_info['disc_agent_logit']).item(), - frame) - self.writer.add_scalar('info/disc_demo_logit', torch_ext.mean_list(train_info['disc_demo_logit']).item(), frame) - self.writer.add_scalar('info/disc_grad_penalty', torch_ext.mean_list(train_info['disc_grad_penalty']).item(), - frame) - self.writer.add_scalar('info/disc_logit_loss', torch_ext.mean_list(train_info['disc_logit_loss']).item(), frame) - - disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards']) - self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame) - self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame) - return - - def _amp_debug(self, info): - with torch.no_grad(): - amp_obs = info['amp_obs'] - amp_obs = amp_obs[0:1] - disc_pred = self._eval_disc(amp_obs) - amp_rewards = self._calc_amp_rewards(amp_obs) - disc_reward = amp_rewards['disc_rewards'] - - disc_pred = disc_pred.detach().cpu().numpy()[0, 0] - disc_reward = disc_reward.cpu().numpy()[0, 0] - print("disc_pred: ", disc_pred, disc_reward) - return diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py deleted file mode 100644 index e960b2eb..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_datasets.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch -from rl_games.common import datasets - -class AMPDataset(datasets.PPODataset): - def __init__(self, batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len): - super().__init__(batch_size, minibatch_size, is_discrete, is_rnn, device, seq_len) - self._idx_buf = torch.randperm(batch_size) - return - - def update_mu_sigma(self, mu, sigma): - raise NotImplementedError() - return - - def _get_item(self, idx): - start = idx * self.minibatch_size - end = (idx + 1) * self.minibatch_size - sample_idx = self._idx_buf[start:end] - - input_dict = {} - for k,v in self.values_dict.items(): - if k not in self.special_names and v is not None: - input_dict[k] = v[sample_idx] - - if (end >= self.batch_size): - self._shuffle_idx_buf() - - return input_dict - - def _shuffle_idx_buf(self): - self._idx_buf[:] = torch.randperm(self.batch_size) - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py deleted file mode 100644 index 5ff1d008..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_models.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch.nn as nn -from rl_games.algos_torch.models import ModelA2CContinuousLogStd - -class ModelAMPContinuous(ModelA2CContinuousLogStd): - def __init__(self, network): - super().__init__(network) - return - - def build(self, config): - net = self.network_builder.build('amp', **config) - for name, _ in net.named_parameters(): - print(name) - return ModelAMPContinuous.Network(net) - - class Network(ModelA2CContinuousLogStd.Network): - def __init__(self, a2c_network): - super().__init__(a2c_network) - return - - def forward(self, input_dict): - is_train = input_dict.get('is_train', True) - result = super().forward(input_dict) - - if (is_train): - amp_obs = input_dict['amp_obs'] - disc_agent_logit = self.a2c_network.eval_disc(amp_obs) - result["disc_agent_logit"] = disc_agent_logit - - amp_obs_replay = input_dict['amp_obs_replay'] - disc_agent_replay_logit = self.a2c_network.eval_disc(amp_obs_replay) - result["disc_agent_replay_logit"] = disc_agent_replay_logit - - amp_demo_obs = input_dict['amp_obs_demo'] - disc_demo_logit = self.a2c_network.eval_disc(amp_demo_obs) - result["disc_demo_logit"] = disc_demo_logit - - return result \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py deleted file mode 100644 index f3d5155f..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_network_builder.py +++ /dev/null @@ -1,154 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch import layers -from rl_games.algos_torch import network_builder - -import torch -import torch.nn as nn -import numpy as np - -DISC_LOGIT_INIT_SCALE = 1.0 - -class AMPBuilder(network_builder.A2CBuilder): - def __init__(self, **kwargs): - super().__init__(**kwargs) - return - - class Network(network_builder.A2CBuilder.Network): - def __init__(self, params, **kwargs): - super().__init__(params, **kwargs) - - if self.is_continuous: - if (not self.space_config['learn_sigma']): - actions_num = kwargs.get('actions_num') - sigma_init = self.init_factory.create(**self.space_config['sigma_init']) - self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) - sigma_init(self.sigma) - - amp_input_shape = kwargs.get('amp_input_shape') - self._build_disc(amp_input_shape) - - return - - def load(self, params): - super().load(params) - - self._disc_units = params['disc']['units'] - self._disc_activation = params['disc']['activation'] - self._disc_initializer = params['disc']['initializer'] - return - - def forward(self, obs_dict): - obs = obs_dict['obs'] - states = obs_dict.get('rnn_states', None) - - actor_outputs = self.eval_actor(obs) - value = self.eval_critic(obs) - - output = actor_outputs + (value, states) - - return output - - def eval_actor(self, obs): - a_out = self.actor_cnn(obs) - a_out = a_out.contiguous().view(a_out.size(0), -1) - a_out = self.actor_mlp(a_out) - - if self.is_discrete: - logits = self.logits(a_out) - return logits - - if self.is_multi_discrete: - logits = [logit(a_out) for logit in self.logits] - return logits - - if self.is_continuous: - mu = self.mu_act(self.mu(a_out)) - if self.space_config['fixed_sigma']: - sigma = mu * 0.0 + self.sigma_act(self.sigma) - else: - sigma = self.sigma_act(self.sigma(a_out)) - - return mu, sigma - return - - def eval_critic(self, obs): - c_out = self.critic_cnn(obs) - c_out = c_out.contiguous().view(c_out.size(0), -1) - c_out = self.critic_mlp(c_out) - value = self.value_act(self.value(c_out)) - return value - - def eval_disc(self, amp_obs): - disc_mlp_out = self._disc_mlp(amp_obs) - disc_logits = self._disc_logits(disc_mlp_out) - return disc_logits - - def get_disc_logit_weights(self): - return torch.flatten(self._disc_logits.weight) - - def get_disc_weights(self): - weights = [] - for m in self._disc_mlp.modules(): - if isinstance(m, nn.Linear): - weights.append(torch.flatten(m.weight)) - - weights.append(torch.flatten(self._disc_logits.weight)) - return weights - - def _build_disc(self, input_shape): - self._disc_mlp = nn.Sequential() - - mlp_args = { - 'input_size' : input_shape[0], - 'units' : self._disc_units, - 'activation' : self._disc_activation, - 'dense_func' : torch.nn.Linear - } - self._disc_mlp = self._build_mlp(**mlp_args) - - mlp_out_size = self._disc_units[-1] - self._disc_logits = torch.nn.Linear(mlp_out_size, 1) - - mlp_init = self.init_factory.create(**self._disc_initializer) - for m in self._disc_mlp.modules(): - if isinstance(m, nn.Linear): - mlp_init(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - - torch.nn.init.uniform_(self._disc_logits.weight, -DISC_LOGIT_INIT_SCALE, DISC_LOGIT_INIT_SCALE) - torch.nn.init.zeros_(self._disc_logits.bias) - - return - - def build(self, name, **kwargs): - net = AMPBuilder.Network(self.params, **kwargs) - return net \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py deleted file mode 100644 index 7c6e2749..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/amp_players.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch.running_mean_std import RunningMeanStd - -import common_player - -class AMPPlayerContinuous(common_player.CommonPlayer): - def __init__(self, config): - self._normalize_amp_input = config.get('normalize_amp_input', True) - self._disc_reward_scale = config['disc_reward_scale'] - - super().__init__(config) - return - - def restore(self, fn): - if (fn != 'Base'): - super().restore(fn) - if self._normalize_amp_input: - checkpoint = torch_ext.load_checkpoint(fn) - self._amp_input_mean_std.load_state_dict(checkpoint['amp_input_mean_std']) - return - - def _build_net(self, config): - super()._build_net(config) - - if self._normalize_amp_input: - self._amp_input_mean_std = RunningMeanStd(config['amp_input_shape']).to(self.device) - self._amp_input_mean_std.eval() - - return - - def _post_step(self, info): - super()._post_step(info) - if (self.env.task.viewer): - self._amp_debug(info) - return - - def _build_net_config(self): - config = super()._build_net_config() - if (hasattr(self, 'env')): - config['amp_input_shape'] = self.env.amp_observation_space.shape - else: - config['amp_input_shape'] = self.env_info['amp_observation_space'] - return config - - def _amp_debug(self, info): - with torch.no_grad(): - amp_obs = info['amp_obs'] - amp_obs = amp_obs[0:1] - disc_pred = self._eval_disc(amp_obs) - amp_rewards = self._calc_amp_rewards(amp_obs) - disc_reward = amp_rewards['disc_rewards'] - - disc_pred = disc_pred.detach().cpu().numpy()[0, 0] - disc_reward = disc_reward.cpu().numpy()[0, 0] - print("disc_pred: ", disc_pred, disc_reward) - - return - - def _preproc_amp_obs(self, amp_obs): - if self._normalize_amp_input: - amp_obs = self._amp_input_mean_std(amp_obs) - return amp_obs - - def _eval_disc(self, amp_obs): - proc_amp_obs = self._preproc_amp_obs(amp_obs) - return self.model.a2c_network.eval_disc(proc_amp_obs) - - def _calc_amp_rewards(self, amp_obs): - disc_r = self._calc_disc_rewards(amp_obs) - output = { - 'disc_rewards': disc_r - } - return output - - def _calc_disc_rewards(self, amp_obs): - with torch.no_grad(): - disc_logits = self._eval_disc(amp_obs) - prob = 1 / (1 + torch.exp(-disc_logits)) - disc_r = -torch.log(torch.maximum(1 - prob, torch.tensor(0.0001, device=self.device))) - disc_r *= self._disc_reward_scale - return disc_r diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py deleted file mode 100644 index d605af32..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_agent.py +++ /dev/null @@ -1,567 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import amp_agent - -import torch -from isaacgym.torch_utils import * -from rl_games.algos_torch import torch_ext -from rl_games.common import a2c_common -# from rl_games.algos_torch.running_mean_std import RunningMeanStd -# -# from utils import torch_utils -# from learning import ase_network_builder - -class ASEAgent(amp_agent.AMPAgent): - def __init__(self, base_name, config): - super().__init__(base_name, config) - return - - def init_tensors(self): - super().init_tensors() - - batch_shape = self.experience_buffer.obs_base_shape - self.experience_buffer.tensor_dict['ase_latents'] = torch.zeros(batch_shape + (self._latent_dim,), - dtype=torch.float32, device=self.ppo_device) - - self._ase_latents = torch.zeros((batch_shape[-1], self._latent_dim), dtype=torch.float32, - device=self.ppo_device) - - self.tensor_list += ['ase_latents'] - - self._latent_reset_steps = torch.zeros(batch_shape[-1], dtype=torch.int32, device=self.ppo_device) - num_envs = self.vec_env.env.task.num_envs - env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device) - self._reset_latent_step_count(env_ids) - - return - - def play_steps(self): - self.set_eval() - - epinfos = [] - done_indices = [] - update_list = self.update_list - - for n in range(self.horizon_length): - self.obs = self.env_reset(done_indices) - self.experience_buffer.update_data('obses', n, self.obs['obs']) - - self._update_latents() - - if self.use_action_masks: - masks = self.vec_env.get_action_masks() - res_dict = self.get_masked_action_values(self.obs, self._ase_latents, masks) - else: - res_dict = self.get_action_values(self.obs, self._ase_latents, self._rand_action_probs) - - for k in update_list: - self.experience_buffer.update_data(k, n, res_dict[k]) - - if self.has_central_value: - self.experience_buffer.update_data('states', n, self.obs['states']) - - self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - shaped_rewards = self.rewards_shaper(rewards) - self.experience_buffer.update_data('rewards', n, shaped_rewards) - self.experience_buffer.update_data('next_obses', n, self.obs['obs']) - self.experience_buffer.update_data('dones', n, self.dones) - self.experience_buffer.update_data('amp_obs', n, infos['amp_obs']) - self.experience_buffer.update_data('ase_latents', n, self._ase_latents) - self.experience_buffer.update_data('rand_action_mask', n, res_dict['rand_action_mask']) - - terminated = infos['terminate'].float() - terminated = terminated.unsqueeze(-1) - next_vals = self._eval_critic(self.obs, self._ase_latents) - next_vals *= (1.0 - terminated) - self.experience_buffer.update_data('next_values', n, next_vals) - - self.current_rewards += rewards - self.current_lengths += 1 - all_done_indices = self.dones.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - - self.game_rewards.update(self.current_rewards[done_indices]) - self.game_lengths.update(self.current_lengths[done_indices]) - self.algo_observer.process_infos(infos, done_indices) - - not_dones = 1.0 - self.dones.float() - - self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) - self.current_lengths = self.current_lengths * not_dones - - if (self.vec_env.env.task.viewer): - self._amp_debug(infos, self._ase_latents) - - done_indices = done_indices[:, 0] - - mb_fdones = self.experience_buffer.tensor_dict['dones'].float() - mb_values = self.experience_buffer.tensor_dict['values'] - mb_next_values = self.experience_buffer.tensor_dict['next_values'] - - mb_rewards = self.experience_buffer.tensor_dict['rewards'] - mb_amp_obs = self.experience_buffer.tensor_dict['amp_obs'] - mb_ase_latents = self.experience_buffer.tensor_dict['ase_latents'] - amp_rewards = self._calc_amp_rewards(mb_amp_obs, mb_ase_latents) - mb_rewards = self._combine_rewards(mb_rewards, amp_rewards) - - mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) - mb_returns = mb_advs + mb_values - - batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) - batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) - batch_dict['played_frames'] = self.batch_size - - for k, v in amp_rewards.items(): - batch_dict[k] = a2c_common.swap_and_flatten01(v) - - return batch_dict - - def get_action_values(self, obs_dict, ase_latents, rand_action_probs): - processed_obs = self._preproc_obs(obs_dict['obs']) - - self.model.eval() - input_dict = { - 'is_train': False, - 'prev_actions': None, - 'obs' : processed_obs, - 'rnn_states' : self.rnn_states, - 'ase_latents': ase_latents - } - - with torch.no_grad(): - res_dict = self.model(input_dict) - if self.has_central_value: - states = obs_dict['states'] - input_dict = { - 'is_train': False, - 'states' : states, - } - value = self.get_central_value(input_dict) - res_dict['values'] = value - - if self.normalize_value: - res_dict['values'] = self.value_mean_std(res_dict['values'], True) - - rand_action_mask = torch.bernoulli(rand_action_probs) - det_action_mask = rand_action_mask == 0.0 - res_dict['actions'][det_action_mask] = res_dict['mus'][det_action_mask] - res_dict['rand_action_mask'] = rand_action_mask - - return res_dict - - def prepare_dataset(self, batch_dict): - super().prepare_dataset(batch_dict) - - ase_latents = batch_dict['ase_latents'] - self.dataset.values_dict['ase_latents'] = ase_latents - - return - - - def calc_gradients(self, input_dict): - self.set_train() - - value_preds_batch = input_dict['old_values'] - old_action_log_probs_batch = input_dict['old_logp_actions'] - advantage = input_dict['advantages'] - old_mu_batch = input_dict['mu'] - old_sigma_batch = input_dict['sigma'] - return_batch = input_dict['returns'] - actions_batch = input_dict['actions'] - obs_batch = input_dict['obs'] - obs_batch = self._preproc_obs(obs_batch) - - amp_obs = input_dict['amp_obs'][0:self._amp_minibatch_size] - amp_obs = self._preproc_amp_obs(amp_obs) - if (self._enable_enc_grad_penalty()): - amp_obs.requires_grad_(True) - - amp_obs_replay = input_dict['amp_obs_replay'][0:self._amp_minibatch_size] - amp_obs_replay = self._preproc_amp_obs(amp_obs_replay) - - amp_obs_demo = input_dict['amp_obs_demo'][0:self._amp_minibatch_size] - amp_obs_demo = self._preproc_amp_obs(amp_obs_demo) - amp_obs_demo.requires_grad_(True) - - ase_latents = input_dict['ase_latents'] - - rand_action_mask = input_dict['rand_action_mask'] - rand_action_sum = torch.sum(rand_action_mask) - - lr = self.last_lr - kl = 1.0 - lr_mul = 1.0 - curr_e_clip = lr_mul * self.e_clip - - batch_dict = { - 'is_train': True, - 'prev_actions': actions_batch, - 'obs' : obs_batch, - 'amp_obs' : amp_obs, - 'amp_obs_replay' : amp_obs_replay, - 'amp_obs_demo' : amp_obs_demo, - 'ase_latents': ase_latents - } - - rnn_masks = None - if self.is_rnn: - rnn_masks = input_dict['rnn_masks'] - batch_dict['rnn_states'] = input_dict['rnn_states'] - batch_dict['seq_length'] = self.seq_len - - rnn_masks = None - if self.is_rnn: - rnn_masks = input_dict['rnn_masks'] - batch_dict['rnn_states'] = input_dict['rnn_states'] - batch_dict['seq_length'] = self.seq_len - - with torch.cuda.amp.autocast(enabled=self.mixed_precision): - res_dict = self.model(batch_dict) - action_log_probs = res_dict['prev_neglogp'] - values = res_dict['values'] - entropy = res_dict['entropy'] - mu = res_dict['mus'] - sigma = res_dict['sigmas'] - disc_agent_logit = res_dict['disc_agent_logit'] - disc_agent_replay_logit = res_dict['disc_agent_replay_logit'] - disc_demo_logit = res_dict['disc_demo_logit'] - enc_pred = res_dict['enc_pred'] - - a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) - a_loss = a_info['actor_loss'] - a_clipped = a_info['actor_clipped'].float() - - c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) - c_loss = c_info['critic_loss'] - - b_loss = self.bound_loss(mu) - - c_loss = torch.mean(c_loss) - a_loss = torch.sum(rand_action_mask * a_loss) / rand_action_sum - entropy = torch.sum(rand_action_mask * entropy) / rand_action_sum - b_loss = torch.sum(rand_action_mask * b_loss) / rand_action_sum - a_clip_frac = torch.sum(rand_action_mask * a_clipped) / rand_action_sum - - disc_agent_cat_logit = torch.cat([disc_agent_logit, disc_agent_replay_logit], dim=0) - disc_info = self._disc_loss(disc_agent_cat_logit, disc_demo_logit, amp_obs_demo) - disc_loss = disc_info['disc_loss'] - - enc_latents = batch_dict['ase_latents'][0:self._amp_minibatch_size] - enc_loss_mask = rand_action_mask[0:self._amp_minibatch_size] - enc_info = self._enc_loss(enc_pred, enc_latents, batch_dict['amp_obs'], enc_loss_mask) - enc_loss = enc_info['enc_loss'] - - loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss \ - + self._disc_coef * disc_loss + self._enc_coef * enc_loss - - if (self._enable_amp_diversity_bonus()): - diversity_loss = self._diversity_loss(batch_dict['obs'], mu, batch_dict['ase_latents']) - diversity_loss = torch.sum(rand_action_mask * diversity_loss) / rand_action_sum - loss += self._amp_diversity_bonus * diversity_loss - a_info['amp_diversity_loss'] = diversity_loss - - a_info['actor_loss'] = a_loss - a_info['actor_clip_frac'] = a_clip_frac - c_info['critic_loss'] = c_loss - - if self.multi_gpu: - self.optimizer.zero_grad() - else: - for param in self.model.parameters(): - param.grad = None - - self.scaler.scale(loss).backward() - #TODO: Refactor this ugliest code of the year - if self.truncate_grads: - if self.multi_gpu: - self.optimizer.synchronize() - self.scaler.unscale_(self.optimizer) - nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) - with self.optimizer.skip_synchronize(): - self.scaler.step(self.optimizer) - self.scaler.update() - else: - self.scaler.unscale_(self.optimizer) - nn.utils.clip_grad_norm_(self.model.parameters(), self.grad_norm) - self.scaler.step(self.optimizer) - self.scaler.update() - else: - self.scaler.step(self.optimizer) - self.scaler.update() - - with torch.no_grad(): - reduce_kl = not self.is_rnn - kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) - if self.is_rnn: - kl_dist = (kl_dist * rnn_masks).sum() / rnn_masks.numel() #/ sum_mask - - self.train_result = { - 'entropy': entropy, - 'kl': kl_dist, - 'last_lr': self.last_lr, - 'lr_mul': lr_mul, - 'b_loss': b_loss - } - self.train_result.update(a_info) - self.train_result.update(c_info) - self.train_result.update(disc_info) - self.train_result.update(enc_info) - - return - - def env_reset(self, env_ids=None): - obs = super().env_reset(env_ids) - - if (env_ids is None): - num_envs = self.vec_env.env.task.num_envs - env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.ppo_device) - - if (len(env_ids) > 0): - self._reset_latents(env_ids) - self._reset_latent_step_count(env_ids) - - return obs - - def _reset_latent_step_count(self, env_ids): - self._latent_reset_steps[env_ids] = torch.randint_like(self._latent_reset_steps[env_ids], low=self._latent_steps_min, - high=self._latent_steps_max) - return - - def _load_config_params(self, config): - super()._load_config_params(config) - - self._latent_dim = config['latent_dim'] - self._latent_steps_min = config.get('latent_steps_min', np.inf) - self._latent_steps_max = config.get('latent_steps_max', np.inf) - self._latent_dim = config['latent_dim'] - self._amp_diversity_bonus = config['amp_diversity_bonus'] - self._amp_diversity_tar = config['amp_diversity_tar'] - - self._enc_coef = config['enc_coef'] - self._enc_weight_decay = config['enc_weight_decay'] - self._enc_reward_scale = config['enc_reward_scale'] - self._enc_grad_penalty = config['enc_grad_penalty'] - - self._enc_reward_w = config['enc_reward_w'] - - return - - def _build_net_config(self): - config = super()._build_net_config() - config['ase_latent_shape'] = (self._latent_dim,) - return config - - def _reset_latents(self, env_ids): - n = len(env_ids) - z = self._sample_latents(n) - self._ase_latents[env_ids] = z - - if (self.vec_env.env.task.viewer): - self._change_char_color(env_ids) - - return - - def _sample_latents(self, n): - z = self.model.a2c_network.sample_latents(n) - return z - - def _update_latents(self): - new_latent_envs = self._latent_reset_steps <= self.vec_env.env.task.progress_buf - - need_update = torch.any(new_latent_envs) - if (need_update): - new_latent_env_ids = new_latent_envs.nonzero(as_tuple=False).flatten() - self._reset_latents(new_latent_env_ids) - self._latent_reset_steps[new_latent_env_ids] += torch.randint_like(self._latent_reset_steps[new_latent_env_ids], - low=self._latent_steps_min, - high=self._latent_steps_max) - if (self.vec_env.env.task.viewer): - self._change_char_color(new_latent_env_ids) - - return - - def _eval_actor(self, obs, ase_latents): - output = self.model.a2c_network.eval_actor(obs=obs, ase_latents=ase_latents) - return output - - def _eval_critic(self, obs_dict, ase_latents): - self.model.eval() - obs = obs_dict['obs'] - processed_obs = self._preproc_obs(obs) - value = self.model.a2c_network.eval_critic(processed_obs, ase_latents) - - if self.normalize_value: - value = self.value_mean_std(value, True) - return value - - def _calc_amp_rewards(self, amp_obs, ase_latents): - disc_r = self._calc_disc_rewards(amp_obs) - enc_r = self._calc_enc_rewards(amp_obs, ase_latents) - output = { - 'disc_rewards': disc_r, - 'enc_rewards': enc_r - } - return output - - def _calc_enc_rewards(self, amp_obs, ase_latents): - with torch.no_grad(): - enc_pred = self._eval_enc(amp_obs) - err = self._calc_enc_error(enc_pred, ase_latents) - enc_r = torch.clamp_min(-err, 0.0) - enc_r *= self._enc_reward_scale - - return enc_r - - def _enc_loss(self, enc_pred, ase_latent, enc_obs, loss_mask): - enc_err = self._calc_enc_error(enc_pred, ase_latent) - #mask_sum = torch.sum(loss_mask) - #enc_err = enc_err.squeeze(-1) - #enc_loss = torch.sum(loss_mask * enc_err) / mask_sum - enc_loss = torch.mean(enc_err) - - # weight decay - if (self._enc_weight_decay != 0): - enc_weights = self.model.a2c_network.get_enc_weights() - enc_weights = torch.cat(enc_weights, dim=-1) - enc_weight_decay = torch.sum(torch.square(enc_weights)) - enc_loss += self._enc_weight_decay * enc_weight_decay - - enc_info = { - 'enc_loss': enc_loss - } - - if (self._enable_enc_grad_penalty()): - enc_obs_grad = torch.autograd.grad(enc_err, enc_obs, grad_outputs=torch.ones_like(enc_err), - create_graph=True, retain_graph=True, only_inputs=True) - enc_obs_grad = enc_obs_grad[0] - enc_obs_grad = torch.sum(torch.square(enc_obs_grad), dim=-1) - #enc_grad_penalty = torch.sum(loss_mask * enc_obs_grad) / mask_sum - enc_grad_penalty = torch.mean(enc_obs_grad) - - enc_loss += self._enc_grad_penalty * enc_grad_penalty - - enc_info['enc_grad_penalty'] = enc_grad_penalty.detach() - - return enc_info - - def _diversity_loss(self, obs, action_params, ase_latents): - assert(self.model.a2c_network.is_continuous) - - n = obs.shape[0] - assert(n == action_params.shape[0]) - - new_z = self._sample_latents(n) - mu, sigma = self._eval_actor(obs=obs, ase_latents=new_z) - - clipped_action_params = torch.clamp(action_params, -1.0, 1.0) - clipped_mu = torch.clamp(mu, -1.0, 1.0) - - a_diff = clipped_action_params - clipped_mu - a_diff = torch.mean(torch.square(a_diff), dim=-1) - - z_diff = new_z * ase_latents - z_diff = torch.sum(z_diff, dim=-1) - z_diff = 0.5 - 0.5 * z_diff - - diversity_bonus = a_diff / (z_diff + 1e-5) - diversity_loss = torch.square(self._amp_diversity_tar - diversity_bonus) - - return diversity_loss - - def _calc_enc_error(self, enc_pred, ase_latent): - err = enc_pred * ase_latent - err = -torch.sum(err, dim=-1, keepdim=True) - return err - - def _enable_enc_grad_penalty(self): - return self._enc_grad_penalty != 0 - - def _enable_amp_diversity_bonus(self): - return self._amp_diversity_bonus != 0 - - def _eval_enc(self, amp_obs): - proc_amp_obs = self._preproc_amp_obs(amp_obs) - return self.model.a2c_network.eval_enc(proc_amp_obs) - - def _combine_rewards(self, task_rewards, amp_rewards): - disc_r = amp_rewards['disc_rewards'] - enc_r = amp_rewards['enc_rewards'] - combined_rewards = self._task_reward_w * task_rewards \ - + self._disc_reward_w * disc_r \ - + self._enc_reward_w * enc_r - return combined_rewards - - def _record_train_batch_info(self, batch_dict, train_info): - super()._record_train_batch_info(batch_dict, train_info) - train_info['enc_rewards'] = batch_dict['enc_rewards'] - return - - def _log_train_info(self, train_info, frame): - super()._log_train_info(train_info, frame) - - self.writer.add_scalar('losses/enc_loss', torch_ext.mean_list(train_info['enc_loss']).item(), frame) - - if (self._enable_amp_diversity_bonus()): - self.writer.add_scalar('losses/amp_diversity_loss', torch_ext.mean_list(train_info['amp_diversity_loss']).item(), frame) - - enc_reward_std, enc_reward_mean = torch.std_mean(train_info['enc_rewards']) - self.writer.add_scalar('info/enc_reward_mean', enc_reward_mean.item(), frame) - self.writer.add_scalar('info/enc_reward_std', enc_reward_std.item(), frame) - - if (self._enable_enc_grad_penalty()): - self.writer.add_scalar('info/enc_grad_penalty', torch_ext.mean_list(train_info['enc_grad_penalty']).item(), frame) - - return - - def _change_char_color(self, env_ids): - base_col = np.array([0.4, 0.4, 0.4]) - range_col = np.array([0.0706, 0.149, 0.2863]) - range_sum = np.linalg.norm(range_col) - - rand_col = np.random.uniform(0.0, 1.0, size=3) - rand_col = range_sum * rand_col / np.linalg.norm(rand_col) - rand_col += base_col - self.vec_env.env.task.set_char_color(rand_col, env_ids) - return - - def _amp_debug(self, info, ase_latents): - with torch.no_grad(): - amp_obs = info['amp_obs'] - amp_obs = amp_obs - ase_latents = ase_latents - disc_pred = self._eval_disc(amp_obs) - amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents) - disc_reward = amp_rewards['disc_rewards'] - enc_reward = amp_rewards['enc_rewards'] - - disc_pred = disc_pred.detach().cpu().numpy()[0, 0] - disc_reward = disc_reward.cpu().numpy()[0, 0] - enc_reward = enc_reward.cpu().numpy()[0, 0] - print("disc_pred: ", disc_pred, disc_reward, enc_reward) - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml deleted file mode 100644 index d58ece91..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_humanoid_hrl.yaml +++ /dev/null @@ -1,114 +0,0 @@ -params: - seed: 42 - - algo: - name: ase - - model: - name: ase - - network: - name: ase - separate: True - - space: - continuous: - mu_activation: None - sigma_activation: None - mu_init: - name: default - sigma_init: - name: const_initializer - val: -2.9 - fixed_sigma: True - learn_sigma: False - - mlp: - units: [1024, 1024, 512] - activation: relu - d2rl: False - - initializer: - name: default - regularizer: - name: None - - disc: - units: [1024, 1024, 512] - activation: relu - - initializer: - name: default - - enc: - units: [1024, 512] - activation: relu - separate: False - - initializer: - name: default - - load_checkpoint: False - - config: - name: Humanoid - env_name: rlgpu - multi_gpu: False - ppo: True - mixed_precision: False - normalize_input: True - normalize_value: True - reward_shaper: - scale_value: 1 - normalize_advantage: True - gamma: 0.99 - tau: 0.95 - learning_rate: 2e-5 - lr_schedule: constant - score_to_win: 20000 - max_epochs: 100000 - save_best_after: 50 - save_frequency: 50 - print_stats: True - grad_norm: 1.0 - entropy_coef: 0.0 - truncate_grads: False - ppo: True - e_clip: 0.2 - horizon_length: 32 - minibatch_size: 1 - mini_epochs: 6 - critic_coef: 5 - clip_value: False - seq_len: 4 - bounds_loss_coef: 10 - amp_obs_demo_buffer_size: 200000 - amp_replay_buffer_size: 200000 - amp_replay_keep_prob: 0.01 - amp_batch_size: 32 - amp_minibatch_size: 1 - disc_coef: 5 - disc_logit_reg: 0.01 - disc_grad_penalty: 5 - disc_reward_scale: 2 - disc_weight_decay: 0.0001 - normalize_amp_input: True - enable_eps_greedy: False - - latent_dim: 64 - latent_steps_min: 1 - latent_steps_max: 150 - - amp_latent_grad_bonus: 0.00 - amp_latent_grad_bonus_max: 100.0 - amp_diversity_bonus: 0.01 - amp_diversity_tar: 1.0 - - enc_coef: 5 - enc_weight_decay: 0.0000 - enc_reward_scale: 1 - enc_grad_penalty: 0 - - task_reward_w: 0.0 - disc_reward_w: 0.5 - enc_reward_w: 0.5 diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py deleted file mode 100644 index db71a606..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_models.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import amp_models - -class ModelASEContinuous(amp_models.ModelAMPContinuous): - def __init__(self, network): - super().__init__(network) - return - - def build(self, config): - net = self.network_builder.build('ase', **config) - for name, _ in net.named_parameters(): - print(name) - return ModelASEContinuous.Network(net) - - class Network(amp_models.ModelAMPContinuous.Network): - def __init__(self, a2c_network): - super().__init__(a2c_network) - return - - def forward(self, input_dict): - is_train = input_dict.get('is_train', True) - result = super().forward(input_dict) - - if (is_train): - amp_obs = input_dict['amp_obs'] - enc_pred = self.a2c_network.eval_enc(amp_obs) - result["enc_pred"] = enc_pred - - return result \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py deleted file mode 100644 index c61fae76..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_network_builder.py +++ /dev/null @@ -1,379 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch import layers -from rl_games.algos_torch import network_builder - -import torch -import torch.nn as nn -import numpy as np -import enum - -import amp_network_builder - -ENC_LOGIT_INIT_SCALE = 0.1 - -class LatentType(enum.Enum): - uniform = 0 - sphere = 1 - -class ASEBuilder(amp_network_builder.AMPBuilder): - def __init__(self, **kwargs): - super().__init__(**kwargs) - return - - class Network(amp_network_builder.AMPBuilder.Network): - def __init__(self, params, **kwargs): - actions_num = kwargs.get('actions_num') - input_shape = kwargs.get('input_shape') - self.value_size = kwargs.get('value_size', 1) - self.num_seqs = num_seqs = kwargs.get('num_seqs', 1) - amp_input_shape = kwargs.get('amp_input_shape') - self._ase_latent_shape = kwargs.get('ase_latent_shape') - - network_builder.NetworkBuilder.BaseNetwork.__init__(self) - - self.load(params) - - actor_out_size, critic_out_size = self._build_actor_critic_net(input_shape, self._ase_latent_shape) - - self.value = torch.nn.Linear(critic_out_size, self.value_size) - self.value_act = self.activations_factory.create(self.value_activation) - - if self.is_discrete: - self.logits = torch.nn.Linear(actor_out_size, actions_num) - ''' - for multidiscrete actions num is a tuple - ''' - if self.is_multi_discrete: - self.logits = torch.nn.ModuleList([torch.nn.Linear(actor_out_size, num) for num in actions_num]) - if self.is_continuous: - self.mu = torch.nn.Linear(actor_out_size, actions_num) - self.mu_act = self.activations_factory.create(self.space_config['mu_activation']) - mu_init = self.init_factory.create(**self.space_config['mu_init']) - self.sigma_act = self.activations_factory.create(self.space_config['sigma_activation']) - - sigma_init = self.init_factory.create(**self.space_config['sigma_init']) - - if (not self.space_config['learn_sigma']): - self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) - elif self.space_config['fixed_sigma']: - self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), requires_grad=True) - else: - self.sigma = torch.nn.Linear(actor_out_size, actions_num) - - mlp_init = self.init_factory.create(**self.initializer) - if self.has_cnn: - cnn_init = self.init_factory.create(**self.cnn['initializer']) - - for m in self.modules(): - if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d): - cnn_init(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - if isinstance(m, nn.Linear): - mlp_init(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - - self.actor_mlp.init_params() - self.critic_mlp.init_params() - - if self.is_continuous: - mu_init(self.mu.weight) - if self.space_config['fixed_sigma']: - sigma_init(self.sigma) - else: - sigma_init(self.sigma.weight) - - self._build_disc(amp_input_shape) - self._build_enc(amp_input_shape) - - return - - def load(self, params): - super().load(params) - - self._enc_units = params['enc']['units'] - self._enc_activation = params['enc']['activation'] - self._enc_initializer = params['enc']['initializer'] - self._enc_separate = params['enc']['separate'] - - return - - def forward(self, obs_dict): - obs = obs_dict['obs'] - ase_latents = obs_dict['ase_latents'] - states = obs_dict.get('rnn_states', None) - use_hidden_latents = obs_dict.get('use_hidden_latents', False) - - actor_outputs = self.eval_actor(obs, ase_latents, use_hidden_latents) - value = self.eval_critic(obs, ase_latents, use_hidden_latents) - - output = actor_outputs + (value, states) - - return output - - def eval_actor(self, obs, ase_latents, use_hidden_latents=False): - a_out = self.actor_cnn(obs) - a_out = a_out.contiguous().view(a_out.size(0), -1) - a_out = self.actor_mlp(a_out, ase_latents, use_hidden_latents) - - if self.is_discrete: - logits = self.logits(a_out) - return logits - - if self.is_multi_discrete: - logits = [logit(a_out) for logit in self.logits] - return logits - - if self.is_continuous: - mu = self.mu_act(self.mu(a_out)) - if self.space_config['fixed_sigma']: - sigma = mu * 0.0 + self.sigma_act(self.sigma) - else: - sigma = self.sigma_act(self.sigma(a_out)) - - return mu, sigma - return - - def eval_critic(self, obs, ase_latents, use_hidden_latents=False): - c_out = self.critic_cnn(obs) - c_out = c_out.contiguous().view(c_out.size(0), -1) - - c_out = self.critic_mlp(c_out, ase_latents, use_hidden_latents) - value = self.value_act(self.value(c_out)) - return value - - def get_enc_weights(self): - weights = [] - for m in self._enc_mlp.modules(): - if isinstance(m, nn.Linear): - weights.append(torch.flatten(m.weight)) - - weights.append(torch.flatten(self._enc.weight)) - return weights - - def _build_actor_critic_net(self, input_shape, ase_latent_shape): - style_units = [512, 256] - style_dim = ase_latent_shape[-1] - - self.actor_cnn = nn.Sequential() - self.critic_cnn = nn.Sequential() - - act_fn = self.activations_factory.create(self.activation) - initializer = self.init_factory.create(**self.initializer) - - self.actor_mlp = AMPStyleCatNet1(obs_size=input_shape[-1], - ase_latent_size=ase_latent_shape[-1], - units=self.units, - activation=act_fn, - style_units=style_units, - style_dim=style_dim, - initializer=initializer) - - if self.separate: - self.critic_mlp = AMPMLPNet(obs_size=input_shape[-1], - ase_latent_size=ase_latent_shape[-1], - units=self.units, - activation=act_fn, - initializer=initializer) - - actor_out_size = self.actor_mlp.get_out_size() - critic_out_size = self.critic_mlp.get_out_size() - - return actor_out_size, critic_out_size - - def _build_enc(self, input_shape): - if (self._enc_separate): - self._enc_mlp = nn.Sequential() - mlp_args = { - 'input_size' : input_shape[0], - 'units' : self._enc_units, - 'activation' : self._enc_activation, - 'dense_func' : torch.nn.Linear - } - self._enc_mlp = self._build_mlp(**mlp_args) - - mlp_init = self.init_factory.create(**self._enc_initializer) - for m in self._enc_mlp.modules(): - if isinstance(m, nn.Linear): - mlp_init(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - else: - self._enc_mlp = self._disc_mlp - - mlp_out_layer = list(self._enc_mlp.modules())[-2] - mlp_out_size = mlp_out_layer.out_features - self._enc = torch.nn.Linear(mlp_out_size, self._ase_latent_shape[-1]) - - torch.nn.init.uniform_(self._enc.weight, -ENC_LOGIT_INIT_SCALE, ENC_LOGIT_INIT_SCALE) - torch.nn.init.zeros_(self._enc.bias) - - return - - def eval_enc(self, amp_obs): - enc_mlp_out = self._enc_mlp(amp_obs) - enc_output = self._enc(enc_mlp_out) - enc_output = torch.nn.functional.normalize(enc_output, dim=-1) - - return enc_output - - def sample_latents(self, n): - device = next(self._enc.parameters()).device - z = torch.normal(torch.zeros([n, self._ase_latent_shape[-1]], device=device)) - z = torch.nn.functional.normalize(z, dim=-1) - return z - - def build(self, name, **kwargs): - net = ASEBuilder.Network(self.params, **kwargs) - return net - - -class AMPMLPNet(torch.nn.Module): - def __init__(self, obs_size, ase_latent_size, units, activation, initializer): - super().__init__() - - input_size = obs_size + ase_latent_size - print('build amp mlp net:', input_size) - - self._units = units - self._initializer = initializer - self._mlp = [] - - in_size = input_size - for i in range(len(units)): - unit = units[i] - curr_dense = torch.nn.Linear(in_size, unit) - self._mlp.append(curr_dense) - self._mlp.append(activation) - in_size = unit - - self._mlp = nn.Sequential(*self._mlp) - self.init_params() - return - - def forward(self, obs, latent, skip_style): - inputs = [obs, latent] - input = torch.cat(inputs, dim=-1) - output = self._mlp(input) - return output - - def init_params(self): - for m in self.modules(): - if isinstance(m, nn.Linear): - self._initializer(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - return - - def get_out_size(self): - out_size = self._units[-1] - return out_size - -class AMPStyleCatNet1(torch.nn.Module): - def __init__(self, obs_size, ase_latent_size, units, activation, - style_units, style_dim, initializer): - super().__init__() - - print('build amp style cat net:', obs_size, ase_latent_size) - - self._activation = activation - self._initializer = initializer - self._dense_layers = [] - self._units = units - self._style_dim = style_dim - self._style_activation = torch.tanh - - self._style_mlp = self._build_style_mlp(style_units, ase_latent_size) - self._style_dense = torch.nn.Linear(style_units[-1], style_dim) - - in_size = obs_size + style_dim - for i in range(len(units)): - unit = units[i] - out_size = unit - curr_dense = torch.nn.Linear(in_size, out_size) - self._dense_layers.append(curr_dense) - - in_size = out_size - - self._dense_layers = nn.ModuleList(self._dense_layers) - - self.init_params() - - return - - def forward(self, obs, latent, skip_style): - if (skip_style): - style = latent - else: - style = self.eval_style(latent) - - h = torch.cat([obs, style], dim=-1) - - for i in range(len(self._dense_layers)): - curr_dense = self._dense_layers[i] - h = curr_dense(h) - h = self._activation(h) - - return h - - def eval_style(self, latent): - style_h = self._style_mlp(latent) - style = self._style_dense(style_h) - style = self._style_activation(style) - return style - - def init_params(self): - scale_init_range = 1.0 - - for m in self.modules(): - if isinstance(m, nn.Linear): - self._initializer(m.weight) - if getattr(m, "bias", None) is not None: - torch.nn.init.zeros_(m.bias) - - nn.init.uniform_(self._style_dense.weight, -scale_init_range, scale_init_range) - return - - def get_out_size(self): - out_size = self._units[-1] - return out_size - - def _build_style_mlp(self, style_units, input_size): - in_size = input_size - layers = [] - for unit in style_units: - layers.append(torch.nn.Linear(in_size, unit)) - layers.append(self._activation) - in_size = unit - - enc_mlp = nn.Sequential(*layers) - return enc_mlp \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py deleted file mode 100644 index 63d2809e..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/ase_players.py +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from isaacgym.torch_utils import * -from rl_games.algos_torch import players - -import amp_players - -class ASEPlayer(amp_players.AMPPlayerContinuous): - def __init__(self, config): - self._latent_dim = config['latent_dim'] - self._latent_steps_min = config.get('latent_steps_min', np.inf) - self._latent_steps_max = config.get('latent_steps_max', np.inf) - - self._enc_reward_scale = config['enc_reward_scale'] - - super().__init__(config) - - if (hasattr(self, 'env')): - batch_size = self.env.task.num_envs - else: - batch_size = self.env_info['num_envs'] - self._ase_latents = torch.zeros((batch_size, self._latent_dim), dtype=torch.float32, - device=self.device) - - return - - def run(self): - self._reset_latent_step_count() - super().run() - return - - def get_action(self, obs_dict, is_determenistic=False): - self._update_latents() - - obs = obs_dict['obs'] - if len(obs.size()) == len(self.obs_shape): - obs = obs.unsqueeze(0) - obs = self._preproc_obs(obs) - ase_latents = self._ase_latents - - input_dict = { - 'is_train': False, - 'prev_actions': None, - 'obs' : obs, - 'rnn_states' : self.states, - 'ase_latents': ase_latents - } - with torch.no_grad(): - res_dict = self.model(input_dict) - mu = res_dict['mus'] - action = res_dict['actions'] - self.states = res_dict['rnn_states'] - if is_determenistic: - current_action = mu - else: - current_action = action - current_action = torch.squeeze(current_action.detach()) - return players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(current_action, -1.0, 1.0)) - - def env_reset(self, env_ids=None): - obs = super().env_reset(env_ids) - self._reset_latents(env_ids) - return obs - - def _build_net_config(self): - config = super()._build_net_config() - config['ase_latent_shape'] = (self._latent_dim,) - return config - - def _reset_latents(self, done_env_ids=None): - if (done_env_ids is None): - num_envs = self.env.task.num_envs - done_env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device) - - rand_vals = self.model.a2c_network.sample_latents(len(done_env_ids)) - self._ase_latents[done_env_ids] = rand_vals - self._change_char_color(done_env_ids) - - return - - def _update_latents(self): - if (self._latent_step_count <= 0): - self._reset_latents() - self._reset_latent_step_count() - - if (self.env.task.viewer): - print("Sampling new amp latents------------------------------") - num_envs = self.env.task.num_envs - env_ids = to_torch(np.arange(num_envs), dtype=torch.long, device=self.device) - self._change_char_color(env_ids) - else: - self._latent_step_count -= 1 - return - - def _reset_latent_step_count(self): - self._latent_step_count = np.random.randint(self._latent_steps_min, self._latent_steps_max) - return - - def _calc_amp_rewards(self, amp_obs, ase_latents): - disc_r = self._calc_disc_rewards(amp_obs) - enc_r = self._calc_enc_rewards(amp_obs, ase_latents) - output = { - 'disc_rewards': disc_r, - 'enc_rewards': enc_r - } - return output - - def _calc_enc_rewards(self, amp_obs, ase_latents): - with torch.no_grad(): - enc_pred = self._eval_enc(amp_obs) - err = self._calc_enc_error(enc_pred, ase_latents) - enc_r = torch.clamp_min(-err, 0.0) - enc_r *= self._enc_reward_scale - - return enc_r - - def _calc_enc_error(self, enc_pred, ase_latent): - err = enc_pred * ase_latent - err = -torch.sum(err, dim=-1, keepdim=True) - return err - - def _eval_enc(self, amp_obs): - proc_amp_obs = self._preproc_amp_obs(amp_obs) - return self.model.a2c_network.eval_enc(proc_amp_obs) - - def _amp_debug(self, info): - with torch.no_grad(): - amp_obs = info['amp_obs'] - amp_obs = amp_obs - ase_latents = self._ase_latents - disc_pred = self._eval_disc(amp_obs) - amp_rewards = self._calc_amp_rewards(amp_obs, ase_latents) - disc_reward = amp_rewards['disc_rewards'] - enc_reward = amp_rewards['enc_rewards'] - - disc_pred = disc_pred.detach().cpu().numpy()[0, 0] - disc_reward = disc_reward.cpu().numpy()[0, 0] - enc_reward = enc_reward.cpu().numpy()[0, 0] - print("disc_pred: ", disc_pred, disc_reward, enc_reward) - return - - def _change_char_color(self, env_ids): - base_col = np.array([0.4, 0.4, 0.4]) - range_col = np.array([0.0706, 0.149, 0.2863]) - range_sum = np.linalg.norm(range_col) - - rand_col = np.random.uniform(0.0, 1.0, size=3) - rand_col = range_sum * rand_col / np.linalg.norm(rand_col) - rand_col += base_col - self.env.task.set_char_color(rand_col, env_ids) - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py deleted file mode 100644 index 835b9400..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_agent.py +++ /dev/null @@ -1,592 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import copy -from datetime import datetime -from gym import spaces -import numpy as np -import os -import time -import yaml - -from rl_games.algos_torch import a2c_continuous -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch import central_value -from rl_games.algos_torch.running_mean_std import RunningMeanStd -from rl_games.common import a2c_common -from rl_games.common import datasets -from rl_games.common import schedulers -from rl_games.common import vecenv - -import torch -from torch import optim - -import amp_datasets - -from tensorboardX import SummaryWriter - -class CommonAgent(a2c_continuous.A2CAgent): - def __init__(self, base_name, config): - a2c_common.A2CBase.__init__(self, base_name, config) - - self._load_config_params(config) - - self.is_discrete = False - self._setup_action_space() - self.bounds_loss_coef = config.get('bounds_loss_coef', None) - self.clip_actions = config.get('clip_actions', True) - self._save_intermediate = config.get('save_intermediate', False) - - net_config = self._build_net_config() - self.model = self.network.build(net_config) - self.model.to(self.ppo_device) - self.states = None - - self.init_rnn_from_model(self.model) - self.last_lr = float(self.last_lr) - - self.optimizer = optim.Adam(self.model.parameters(), float(self.last_lr), eps=1e-08, weight_decay=self.weight_decay) - - if self.normalize_input: - obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) - self.running_mean_std = RunningMeanStd(obs_shape).to(self.ppo_device) - - if self.has_central_value: - cv_config = { - 'state_shape' : torch_ext.shape_whc_to_cwh(self.state_shape), - 'value_size' : self.value_size, - 'ppo_device' : self.ppo_device, - 'num_agents' : self.num_agents, - 'horizon_length' : self.horizon_length, - 'num_actors' : self.num_actors, - 'num_actions' : self.actions_num, - 'seq_len' : self.seq_len, - 'model' : self.central_value_config['network'], - 'config' : self.central_value_config, - 'writter' : self.writer, - 'multi_gpu' : self.multi_gpu - } - self.central_value_net = central_value.CentralValueTrain(**cv_config).to(self.ppo_device) - - self.use_experimental_cv = self.config.get('use_experimental_cv', True) - self.dataset = amp_datasets.AMPDataset(self.batch_size, self.minibatch_size, self.is_discrete, self.is_rnn, self.ppo_device, self.seq_len) - self.algo_observer.after_init(self) - - return - - def init_tensors(self): - super().init_tensors() - self.experience_buffer.tensor_dict['next_obses'] = torch.zeros_like(self.experience_buffer.tensor_dict['obses']) - self.experience_buffer.tensor_dict['next_values'] = torch.zeros_like(self.experience_buffer.tensor_dict['values']) - - self.tensor_list += ['next_obses'] - return - - def train(self): - self.init_tensors() - self.last_mean_rewards = -100500 - start_time = time.time() - total_time = 0 - rep_count = 0 - self.frame = 0 - self.obs = self.env_reset() - self.curr_frames = self.batch_size_envs - - model_output_file = os.path.join(self.nn_dir, self.config['name']) - - if self.multi_gpu: - self.hvd.setup_algo(self) - - self._init_train() - - while True: - epoch_num = self.update_epoch() - train_info = self.train_epoch() - - sum_time = train_info['total_time'] - total_time += sum_time - frame = self.frame - if self.multi_gpu: - self.hvd.sync_stats(self) - - if self.rank == 0: - scaled_time = sum_time - scaled_play_time = train_info['play_time'] - curr_frames = self.curr_frames - self.frame += curr_frames - if self.print_stats: - fps_step = curr_frames / scaled_play_time - fps_total = curr_frames / scaled_time - print(f'fps step: {fps_step:.1f} fps total: {fps_total:.1f}') - - self.writer.add_scalar('performance/total_fps', curr_frames / scaled_time, frame) - self.writer.add_scalar('performance/step_fps', curr_frames / scaled_play_time, frame) - self.writer.add_scalar('info/epochs', epoch_num, frame) - self._log_train_info(train_info, frame) - - self.algo_observer.after_print_stats(frame, epoch_num, total_time) - - if self.game_rewards.current_size > 0: - mean_rewards = self._get_mean_rewards() - mean_lengths = self.game_lengths.get_mean() - - for i in range(self.value_size): - self.writer.add_scalar('rewards{0}/frame'.format(i), mean_rewards[i], frame) - self.writer.add_scalar('rewards{0}/iter'.format(i), mean_rewards[i], epoch_num) - self.writer.add_scalar('rewards{0}/time'.format(i), mean_rewards[i], total_time) - - self.writer.add_scalar('episode_lengths/frame', mean_lengths, frame) - self.writer.add_scalar('episode_lengths/iter', mean_lengths, epoch_num) - - if self.has_self_play_config: - self.self_play_manager.update(self) - - if self.save_freq > 0: - if (epoch_num % self.save_freq == 0): - self.save(model_output_file) - - if (self._save_intermediate): - int_model_output_file = model_output_file + '_' + str(epoch_num).zfill(8) - self.save(int_model_output_file) - - if epoch_num > self.max_epochs: - self.save(model_output_file) - print('MAX EPOCHS NUM!') - return self.last_mean_rewards, epoch_num - - update_time = 0 - return - - def set_full_state_weights(self, weights): - self.set_weights(weights) - self.epoch_num = weights['epoch'] - if self.has_central_value: - self.central_value_net.load_state_dict(weights['assymetric_vf_nets']) - self.optimizer.load_state_dict(weights['optimizer']) - self.frame = weights.get('frame', 0) - self.last_mean_rewards = weights.get('last_mean_rewards', -100500) - - if (hasattr(self, 'vec_env')): - env_state = weights.get('env_state', None) - self.vec_env.set_env_state(env_state) - - return - - def train_epoch(self): - play_time_start = time.time() - with torch.no_grad(): - if self.is_rnn: - batch_dict = self.play_steps_rnn() - else: - batch_dict = self.play_steps() - - play_time_end = time.time() - update_time_start = time.time() - rnn_masks = batch_dict.get('rnn_masks', None) - - self.set_train() - - self.curr_frames = batch_dict.pop('played_frames') - self.prepare_dataset(batch_dict) - self.algo_observer.after_steps() - - if self.has_central_value: - self.train_central_value() - - train_info = None - - if self.is_rnn: - frames_mask_ratio = rnn_masks.sum().item() / (rnn_masks.nelement()) - print(frames_mask_ratio) - - for _ in range(0, self.mini_epochs_num): - ep_kls = [] - for i in range(len(self.dataset)): - curr_train_info = self.train_actor_critic(self.dataset[i]) - - if self.schedule_type == 'legacy': - if self.multi_gpu: - curr_train_info['kl'] = self.hvd.average_value(curr_train_info['kl'], 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, curr_train_info['kl'].item()) - self.update_lr(self.last_lr) - - if (train_info is None): - train_info = dict() - for k, v in curr_train_info.items(): - train_info[k] = [v] - else: - for k, v in curr_train_info.items(): - train_info[k].append(v) - - av_kls = torch_ext.mean_list(train_info['kl']) - - if self.schedule_type == 'standard': - if self.multi_gpu: - av_kls = self.hvd.average_value(av_kls, 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) - self.update_lr(self.last_lr) - - if self.schedule_type == 'standard_epoch': - if self.multi_gpu: - av_kls = self.hvd.average_value(torch_ext.mean_list(kls), 'ep_kls') - self.last_lr, self.entropy_coef = self.scheduler.update(self.last_lr, self.entropy_coef, self.epoch_num, 0, av_kls.item()) - self.update_lr(self.last_lr) - - update_time_end = time.time() - play_time = play_time_end - play_time_start - update_time = update_time_end - update_time_start - total_time = update_time_end - play_time_start - - train_info['play_time'] = play_time - train_info['update_time'] = update_time - train_info['total_time'] = total_time - self._record_train_batch_info(batch_dict, train_info) - - return train_info - - def play_steps(self): - self.set_eval() - - epinfos = [] - done_indices = [] - update_list = self.update_list - - for n in range(self.horizon_length): - self.obs = self.env_reset(done_indices) - self.experience_buffer.update_data('obses', n, self.obs['obs']) - - if self.use_action_masks: - masks = self.vec_env.get_action_masks() - res_dict = self.get_masked_action_values(self.obs, masks) - else: - res_dict = self.get_action_values(self.obs) - - for k in update_list: - self.experience_buffer.update_data(k, n, res_dict[k]) - - if self.has_central_value: - self.experience_buffer.update_data('states', n, self.obs['states']) - - self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - shaped_rewards = self.rewards_shaper(rewards) - self.experience_buffer.update_data('rewards', n, shaped_rewards) - self.experience_buffer.update_data('next_obses', n, self.obs['obs']) - self.experience_buffer.update_data('dones', n, self.dones) - - terminated = infos['terminate'].float() - terminated = terminated.unsqueeze(-1) - next_vals = self._eval_critic(self.obs) - next_vals *= (1.0 - terminated) - self.experience_buffer.update_data('next_values', n, next_vals) - - self.current_rewards += rewards - self.current_lengths += 1 - all_done_indices = self.dones.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - - self.game_rewards.update(self.current_rewards[done_indices]) - self.game_lengths.update(self.current_lengths[done_indices]) - self.algo_observer.process_infos(infos, done_indices) - - not_dones = 1.0 - self.dones.float() - - self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) - self.current_lengths = self.current_lengths * not_dones - - done_indices = done_indices[:, 0] - - mb_fdones = self.experience_buffer.tensor_dict['dones'].float() - mb_values = self.experience_buffer.tensor_dict['values'] - mb_next_values = self.experience_buffer.tensor_dict['next_values'] - mb_rewards = self.experience_buffer.tensor_dict['rewards'] - - mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) - mb_returns = mb_advs + mb_values - - batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) - batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) - batch_dict['played_frames'] = self.batch_size - - return batch_dict - - def prepare_dataset(self, batch_dict): - obses = batch_dict['obses'] - returns = batch_dict['returns'] - dones = batch_dict['dones'] - values = batch_dict['values'] - actions = batch_dict['actions'] - neglogpacs = batch_dict['neglogpacs'] - mus = batch_dict['mus'] - sigmas = batch_dict['sigmas'] - rnn_states = batch_dict.get('rnn_states', None) - rnn_masks = batch_dict.get('rnn_masks', None) - - advantages = self._calc_advs(batch_dict) - - if self.normalize_value: - values = self.value_mean_std(values) - returns = self.value_mean_std(returns) - - dataset_dict = {} - dataset_dict['old_values'] = values - dataset_dict['old_logp_actions'] = neglogpacs - dataset_dict['advantages'] = advantages - dataset_dict['returns'] = returns - dataset_dict['actions'] = actions - dataset_dict['obs'] = obses - dataset_dict['rnn_states'] = rnn_states - dataset_dict['rnn_masks'] = rnn_masks - dataset_dict['mu'] = mus - dataset_dict['sigma'] = sigmas - - self.dataset.update_values_dict(dataset_dict) - - if self.has_central_value: - dataset_dict = {} - dataset_dict['old_values'] = values - dataset_dict['advantages'] = advantages - dataset_dict['returns'] = returns - dataset_dict['actions'] = actions - dataset_dict['obs'] = batch_dict['states'] - dataset_dict['rnn_masks'] = rnn_masks - self.central_value_net.update_dataset(dataset_dict) - - return - - def calc_gradients(self, input_dict): - self.set_train() - - value_preds_batch = input_dict['old_values'] - old_action_log_probs_batch = input_dict['old_logp_actions'] - advantage = input_dict['advantages'] - old_mu_batch = input_dict['mu'] - old_sigma_batch = input_dict['sigma'] - return_batch = input_dict['returns'] - actions_batch = input_dict['actions'] - obs_batch = input_dict['obs'] - obs_batch = self._preproc_obs(obs_batch) - - lr = self.last_lr - kl = 1.0 - lr_mul = 1.0 - curr_e_clip = lr_mul * self.e_clip - - batch_dict = { - 'is_train': True, - 'prev_actions': actions_batch, - 'obs' : obs_batch - } - - rnn_masks = None - if self.is_rnn: - rnn_masks = input_dict['rnn_masks'] - batch_dict['rnn_states'] = input_dict['rnn_states'] - batch_dict['seq_length'] = self.seq_len - - with torch.cuda.amp.autocast(enabled=self.mixed_precision): - res_dict = self.model(batch_dict) - action_log_probs = res_dict['prev_neglogp'] - values = res_dict['values'] - entropy = res_dict['entropy'] - mu = res_dict['mus'] - sigma = res_dict['sigmas'] - - a_info = self._actor_loss(old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip) - a_loss = a_info['actor_loss'] - - c_info = self._critic_loss(value_preds_batch, values, curr_e_clip, return_batch, self.clip_value) - c_loss = c_info['critic_loss'] - - b_loss = self.bound_loss(mu) - - a_loss = torch.mean(a_loss) - c_loss = torch.mean(c_loss) - b_loss = torch.mean(b_loss) - entropy = torch.mean(entropy) - - loss = a_loss + self.critic_coef * c_loss - self.entropy_coef * entropy + self.bounds_loss_coef * b_loss - - a_clip_frac = torch.mean(a_info['actor_clipped'].float()) - - a_info['actor_loss'] = a_loss - a_info['actor_clip_frac'] = a_clip_frac - - if self.multi_gpu: - self.optimizer.zero_grad() - else: - for param in self.model.parameters(): - param.grad = None - - self.scaler.scale(loss).backward() - self.scaler.step(self.optimizer) - self.scaler.update() - - with torch.no_grad(): - reduce_kl = not self.is_rnn - kl_dist = torch_ext.policy_kl(mu.detach(), sigma.detach(), old_mu_batch, old_sigma_batch, reduce_kl) - - self.train_result = { - 'entropy': entropy, - 'kl': kl_dist, - 'last_lr': self.last_lr, - 'lr_mul': lr_mul, - 'b_loss': b_loss - } - self.train_result.update(a_info) - self.train_result.update(c_info) - - return - - def discount_values(self, mb_fdones, mb_values, mb_rewards, mb_next_values): - lastgaelam = 0 - mb_advs = torch.zeros_like(mb_rewards) - - for t in reversed(range(self.horizon_length)): - not_done = 1.0 - mb_fdones[t] - not_done = not_done.unsqueeze(1) - - delta = mb_rewards[t] + self.gamma * mb_next_values[t] - mb_values[t] - lastgaelam = delta + self.gamma * self.tau * not_done * lastgaelam - mb_advs[t] = lastgaelam - - return mb_advs - - def env_reset(self, env_ids=None): - obs = self.vec_env.reset(env_ids) - obs = self.obs_to_tensors(obs) - return obs - - def bound_loss(self, mu): - if self.bounds_loss_coef is not None: - soft_bound = 1.0 - mu_loss_high = torch.clamp_min(mu - soft_bound, 0.0)**2 - mu_loss_low = torch.clamp_max(mu + soft_bound, 0.0)**2 - b_loss = (mu_loss_low + mu_loss_high).sum(axis=-1) - else: - b_loss = 0 - return b_loss - - def _get_mean_rewards(self): - return self.game_rewards.get_mean() - - def _load_config_params(self, config): - self.last_lr = config['learning_rate'] - return - - def _build_net_config(self): - obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) - config = { - 'actions_num' : self.actions_num, - 'input_shape' : obs_shape, - 'num_seqs' : self.num_actors * self.num_agents, - 'value_size': self.env_info.get('value_size', 1), - } - return config - - def _setup_action_space(self): - action_space = self.env_info['action_space'] - self.actions_num = action_space.shape[0] - - # todo introduce device instead of cuda() - self.actions_low = torch.from_numpy(action_space.low.copy()).float().to(self.ppo_device) - self.actions_high = torch.from_numpy(action_space.high.copy()).float().to(self.ppo_device) - return - - def _init_train(self): - return - - def _eval_critic(self, obs_dict): - self.model.eval() - obs = obs_dict['obs'] - processed_obs = self._preproc_obs(obs) - value = self.model.a2c_network.eval_critic(processed_obs) - - if self.normalize_value: - value = self.value_mean_std(value, True) - return value - - def _actor_loss(self, old_action_log_probs_batch, action_log_probs, advantage, curr_e_clip): - ratio = torch.exp(old_action_log_probs_batch - action_log_probs) - surr1 = advantage * ratio - surr2 = advantage * torch.clamp(ratio, 1.0 - curr_e_clip, - 1.0 + curr_e_clip) - a_loss = torch.max(-surr1, -surr2) - - clipped = torch.abs(ratio - 1.0) > curr_e_clip - clipped = clipped.detach() - - info = { - 'actor_loss': a_loss, - 'actor_clipped': clipped.detach() - } - return info - - def _critic_loss(self, value_preds_batch, values, curr_e_clip, return_batch, clip_value): - if clip_value: - value_pred_clipped = value_preds_batch + \ - (values - value_preds_batch).clamp(-curr_e_clip, curr_e_clip) - value_losses = (values - return_batch)**2 - value_losses_clipped = (value_pred_clipped - return_batch)**2 - c_loss = torch.max(value_losses, value_losses_clipped) - else: - c_loss = (return_batch - values)**2 - - info = { - 'critic_loss': c_loss - } - return info - - def _calc_advs(self, batch_dict): - returns = batch_dict['returns'] - values = batch_dict['values'] - - advantages = returns - values - advantages = torch.sum(advantages, axis=1) - - if self.normalize_advantage: - advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) - - return advantages - - def _record_train_batch_info(self, batch_dict, train_info): - return - - def _log_train_info(self, train_info, frame): - self.writer.add_scalar('performance/update_time', train_info['update_time'], frame) - self.writer.add_scalar('performance/play_time', train_info['play_time'], frame) - self.writer.add_scalar('losses/a_loss', torch_ext.mean_list(train_info['actor_loss']).item(), frame) - self.writer.add_scalar('losses/c_loss', torch_ext.mean_list(train_info['critic_loss']).item(), frame) - - self.writer.add_scalar('losses/bounds_loss', torch_ext.mean_list(train_info['b_loss']).item(), frame) - self.writer.add_scalar('losses/entropy', torch_ext.mean_list(train_info['entropy']).item(), frame) - self.writer.add_scalar('info/last_lr', train_info['last_lr'][-1] * train_info['lr_mul'][-1], frame) - self.writer.add_scalar('info/lr_mul', train_info['lr_mul'][-1], frame) - self.writer.add_scalar('info/e_clip', self.e_clip * train_info['lr_mul'][-1], frame) - self.writer.add_scalar('info/clip_frac', torch_ext.mean_list(train_info['actor_clip_frac']).item(), frame) - self.writer.add_scalar('info/kl', torch_ext.mean_list(train_info['kl']).item(), frame) - return diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py deleted file mode 100644 index ac4e626a..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/common_player.py +++ /dev/null @@ -1,216 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from rl_games.algos_torch import players -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch.running_mean_std import RunningMeanStd -from rl_games.common.player import BasePlayer - -import numpy as np - -class CommonPlayer(players.PpoPlayerContinuous): - def __init__(self, config): - BasePlayer.__init__(self, config) - self.network = config['network'] - - self._setup_action_space() - self.mask = [False] - - self.normalize_input = self.config['normalize_input'] - - net_config = self._build_net_config() - self._build_net(net_config) - - return - - def run(self): - n_games = self.games_num - render = self.render_env - n_game_life = self.n_game_life - is_determenistic = self.is_determenistic - sum_rewards = 0 - sum_steps = 0 - sum_game_res = 0 - n_games = n_games * n_game_life - games_played = 0 - has_masks = False - has_masks_func = getattr(self.env, "has_action_mask", None) is not None - - op_agent = getattr(self.env, "create_agent", None) - if op_agent: - agent_inited = True - - if has_masks_func: - has_masks = self.env.has_action_mask() - - need_init_rnn = self.is_rnn - for _ in range(n_games): - if games_played >= n_games: - break - - obs_dict = self.env_reset() - batch_size = 1 - batch_size = self.get_batch_size(obs_dict['obs'], batch_size) - - if need_init_rnn: - self.init_rnn() - need_init_rnn = False - - cr = torch.zeros(batch_size, dtype=torch.float32, device=self.device) - steps = torch.zeros(batch_size, dtype=torch.float32, device=self.device) - - print_game_res = False - - done_indices = [] - - for n in range(self.max_steps): - obs_dict = self.env_reset(done_indices) - - if has_masks: - masks = self.env.get_action_mask() - action = self.get_masked_action(obs_dict, masks, is_determenistic) - else: - action = self.get_action(obs_dict, is_determenistic) - obs_dict, r, done, info = self.env_step(self.env, action) - cr += r - steps += 1 - - self._post_step(info) - - if render: - self.env.render(mode = 'human') - time.sleep(self.render_sleep) - - all_done_indices = done.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - done_count = len(done_indices) - games_played += done_count - - if done_count > 0: - if self.is_rnn: - for s in self.states: - s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0 - - cur_rewards = cr[done_indices].sum().item() - cur_steps = steps[done_indices].sum().item() - - cr = cr * (1.0 - done.float()) - steps = steps * (1.0 - done.float()) - sum_rewards += cur_rewards - sum_steps += cur_steps - - game_res = 0.0 - if isinstance(info, dict): - if 'battle_won' in info: - print_game_res = True - game_res = info.get('battle_won', 0.5) - if 'scores' in info: - print_game_res = True - game_res = info.get('scores', 0.5) - if self.print_stats: - if print_game_res: - print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res) - else: - print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count) - - sum_game_res += game_res - if batch_size//self.num_agents == 1 or games_played >= n_games: - break - - done_indices = done_indices[:, 0] - - print(sum_rewards) - if print_game_res: - print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life) - else: - print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life) - - return - - def obs_to_torch(self, obs): - obs = super().obs_to_torch(obs) - obs_dict = { - 'obs': obs - } - return obs_dict - - def get_action(self, obs_dict, is_determenistic = False): - output = super().get_action(obs_dict['obs'], is_determenistic) - return output - - def env_step(self, env, actions): - if not self.is_tensor_obses: - actions = actions.cpu().numpy() - obs, rewards, dones, infos = env.step(actions) - - if hasattr(obs, 'dtype') and obs.dtype == np.float64: - obs = np.float32(obs) - if self.value_size > 1: - rewards = rewards[0] - if self.is_tensor_obses: - return obs, rewards.to(self.device), dones.to(self.device), infos - else: - if np.isscalar(dones): - rewards = np.expand_dims(np.asarray(rewards), 0) - dones = np.expand_dims(np.asarray(dones), 0) - return self.obs_to_torch(obs), torch.from_numpy(rewards), torch.from_numpy(dones), infos - - def _build_net(self, config): - self.model = self.network.build(config) - self.model.to(self.device) - self.model.eval() - self.is_rnn = self.model.is_rnn() - if self.normalize_input: - obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) - self.running_mean_std = RunningMeanStd(obs_shape).to(self.device) - self.running_mean_std.eval() - return - - def env_reset(self, env_ids=None): - obs = self.env.reset(env_ids) - return self.obs_to_torch(obs) - - def _post_step(self, info): - return - - def _build_net_config(self): - obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape) - config = { - 'actions_num' : self.actions_num, - 'input_shape' : obs_shape, - 'num_seqs' : self.num_agents - } - return config - - def _setup_action_space(self): - self.actions_num = self.action_space.shape[0] - self.actions_low = torch.from_numpy(self.action_space.low.copy()).float().to(self.device) - self.actions_high = torch.from_numpy(self.action_space.high.copy()).float().to(self.device) - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py deleted file mode 100644 index b77c3a64..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/config.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import sys -import yaml - -from isaacgym import gymapi -from isaacgym import gymutil - -import numpy as np -import random -import torch - -SIM_TIMESTEP = 1.0 / 60.0 - -def set_np_formatting(): - np.set_printoptions(edgeitems=30, infstr='inf', - linewidth=4000, nanstr='nan', precision=2, - suppress=False, threshold=10000, formatter=None) - - -def warn_task_name(): - raise Exception( - "Unrecognized task!\nTask should be one of: [BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, ShadowHandLSTM, ShadowHandFFOpenAI, ShadowHandFFOpenAITest, ShadowHandOpenAI, ShadowHandOpenAITest, Ingenuity]") - - -def set_seed(seed, torch_deterministic=False): - if seed == -1 and torch_deterministic: - seed = 42 - elif seed == -1: - seed = np.random.randint(0, 10000) - print("Setting seed: {}".format(seed)) - - random.seed(seed) - np.random.seed(seed) - torch.manual_seed(seed) - os.environ['PYTHONHASHSEED'] = str(seed) - torch.cuda.manual_seed(seed) - torch.cuda.manual_seed_all(seed) - - if torch_deterministic: - # refer to https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility - os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8' - torch.backends.cudnn.benchmark = False - torch.backends.cudnn.deterministic = True - torch.set_deterministic(True) - else: - torch.backends.cudnn.benchmark = True - torch.backends.cudnn.deterministic = False - - return seed - - -def load_cfg(args): - with open(args.cfg_train, 'r') as f: - cfg_train = yaml.load(f, Loader=yaml.SafeLoader) - - with open(os.path.join(os.getcwd(), args.cfg_env), 'r') as f: - cfg = yaml.load(f, Loader=yaml.SafeLoader) - - # Override number of environments if passed on the command line - if args.num_envs > 0: - cfg["env"]["numEnvs"] = args.num_envs - - if args.episode_length > 0: - cfg["env"]["episodeLength"] = args.episode_length - - cfg["name"] = args.task - cfg["headless"] = args.headless - - # Set physics domain randomization - if "task" in cfg: - if "randomize" not in cfg["task"]: - cfg["task"]["randomize"] = args.randomize - else: - cfg["task"]["randomize"] = args.randomize or cfg["task"]["randomize"] - else: - cfg["task"] = {"randomize": False} - - logdir = args.logdir - # Set deterministic mode - if args.torch_deterministic: - cfg_train["params"]["torch_deterministic"] = True - - exp_name = cfg_train["params"]["config"]['name'] - - if args.experiment != 'Base': - if args.metadata: - exp_name = "{}_{}_{}_{}".format(args.experiment, args.task_type, args.device, str(args.physics_engine).split("_")[-1]) - - if cfg["task"]["randomize"]: - exp_name += "_DR" - else: - exp_name = args.experiment - - # Override config name - cfg_train["params"]["config"]['name'] = exp_name - - if args.resume > 0: - cfg_train["params"]["load_checkpoint"] = True - - if args.checkpoint != "Base": - cfg_train["params"]["load_path"] = args.checkpoint - - if args.llc_checkpoint != "": - cfg_train["params"]["config"]["llc_checkpoint"] = args.llc_checkpoint - - # Set maximum number of training iterations (epochs) - if args.max_iterations > 0: - cfg_train["params"]["config"]['max_epochs'] = args.max_iterations - - cfg_train["params"]["config"]["num_actors"] = cfg["env"]["numEnvs"] - - seed = cfg_train["params"].get("seed", -1) - if args.seed is not None: - seed = args.seed - cfg["seed"] = seed - cfg_train["params"]["seed"] = seed - - cfg["args"] = args - - return cfg, cfg_train, logdir - - -def parse_sim_params(args, cfg, cfg_train): - # initialize sim - sim_params = gymapi.SimParams() - sim_params.dt = SIM_TIMESTEP - sim_params.num_client_threads = args.slices - - if args.physics_engine == gymapi.SIM_FLEX: - if args.device != "cpu": - print("WARNING: Using Flex with GPU instead of PHYSX!") - sim_params.flex.shape_collision_margin = 0.01 - sim_params.flex.num_outer_iterations = 4 - sim_params.flex.num_inner_iterations = 10 - elif args.physics_engine == gymapi.SIM_PHYSX: - sim_params.physx.solver_type = 1 - sim_params.physx.num_position_iterations = 4 - sim_params.physx.num_velocity_iterations = 0 - sim_params.physx.num_threads = 4 - sim_params.physx.use_gpu = args.use_gpu - sim_params.physx.num_subscenes = args.subscenes - sim_params.physx.max_gpu_contact_pairs = 8 * 1024 * 1024 - - sim_params.use_gpu_pipeline = args.use_gpu_pipeline - sim_params.physx.use_gpu = args.use_gpu - - # if sim options are provided in cfg, parse them and update/override above: - if "sim" in cfg: - gymutil.parse_sim_config(cfg["sim"], sim_params) - - # Override num_threads if passed on the command line - if args.physics_engine == gymapi.SIM_PHYSX and args.num_threads > 0: - sim_params.physx.num_threads = args.num_threads - - return sim_params - - -def get_args(benchmark=False): - custom_parameters = [ - {"name": "--test", "action": "store_true", "default": False, - "help": "Run trained policy, no training"}, - {"name": "--play", "action": "store_true", "default": False, - "help": "Run trained policy, the same as test, can be used only by rl_games RL library"}, - {"name": "--resume", "type": int, "default": 0, - "help": "Resume training or start testing from a checkpoint"}, - {"name": "--checkpoint", "type": str, "default": "Base", - "help": "Path to the saved weights, only for rl_games RL library"}, - {"name": "--headless", "action": "store_false", "default": True, - "help": "Force display off at all times"}, - {"name": "--horovod", "action": "store_true", "default": False, - "help": "Use horovod for multi-gpu training, have effect only with rl_games RL library"}, - {"name": "--task", "type": str, "default": "HumanoidStrike", - "help": "Can be BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, Ingenuity"}, - {"name": "--task_type", "type": str, - "default": "Python", "help": "Choose Python or C++"}, - {"name": "--rl_device", "type": str, "default": "cuda:0", - "help": "Choose CPU or GPU device for inferencing policy network"}, - {"name": "--logdir", "type": str, "default": "logs/"}, - {"name": "--experiment", "type": str, "default": "Base", - "help": "Experiment name. If used with --metadata flag an additional information about physics engine, sim device, pipeline and domain randomization will be added to the name"}, - {"name": "--metadata", "action": "store_true", "default": False, - "help": "Requires --experiment flag, adds physics engine, sim device, pipeline info and if domain randomization is used to the experiment name provided by user"}, - {"name": "--cfg_env", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/humanoid_sword_shield_strike.yaml", "help": "Environment configuration file (.yaml)"}, - {"name": "--cfg_train", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/hrl_humanoid.yaml", "help": "Training configuration file (.yaml)"}, - {"name": "--motion_file", "type": str, - "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/data/amp/reallusion_sword_shield/RL_Avatar_Idle_Ready_Motion.npy", "help": "Specify reference motion file"}, - {"name": "--num_envs", "type": int, "default": 0, - "help": "Number of environments to create - override config file"}, - {"name": "--episode_length", "type": int, "default": 0, - "help": "Episode length, by default is read from yaml config"}, - {"name": "--seed", "type": int, "help": "Random seed"}, - {"name": "--max_iterations", "type": int, "default": 0, - "help": "Set a maximum number of training iterations"}, - {"name": "--horizon_length", "type": int, "default": -1, - "help": "Set number of simulation steps per 1 PPO iteration. Supported only by rl_games. If not -1 overrides the config settings."}, - {"name": "--minibatch_size", "type": int, "default": -1, - "help": "Set batch size for PPO optimization step. Supported only by rl_games. If not -1 overrides the config settings."}, - {"name": "--randomize", "action": "store_true", "default": False, - "help": "Apply physics domain randomization"}, - {"name": "--torch_deterministic", "action": "store_true", "default": False, - "help": "Apply additional PyTorch settings for more deterministic behaviour"}, - {"name": "--output_path", "type": str, "default": "output/", "help": "Specify output directory"}, - {"name": "--llc_checkpoint", "type": str, "default": "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth", - "help": "Path to the saved weights for the low-level controller of an HRL agent."}] - - if benchmark: - custom_parameters += [{"name": "--num_proc", "type": int, "default": 1, "help": "Number of child processes to launch"}, - {"name": "--random_actions", "action": "store_true", - "help": "Run benchmark with random actions instead of inferencing"}, - {"name": "--bench_len", "type": int, "default": 10, - "help": "Number of timing reports"}, - {"name": "--bench_file", "action": "store", "help": "Filename to store benchmark results"}] - - # parse arguments - args = gymutil.parse_arguments( - description="RL Policy", - custom_parameters=custom_parameters) - - # allignment with examples - args.device_id = args.compute_device_id - args.device = args.sim_device_type if args.use_gpu_pipeline else 'cpu' - - if args.test: - args.play = args.test - args.train = False - elif args.play: - args.train = False - else: - args.train = True - - return args diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py deleted file mode 100644 index aca92850..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_agent.py +++ /dev/null @@ -1,356 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import copy -from datetime import datetime -from gym import spaces -import numpy as np -import os -import time -import yaml - -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch import central_value -from rl_games.algos_torch.running_mean_std import RunningMeanStd -from rl_games.common import a2c_common -from rl_games.common import datasets -from rl_games.common import schedulers -from rl_games.common import vecenv - -import torch -from torch import optim - -import common_agent -import ase_agent -import ase_models -import ase_network_builder - -from tensorboardX import SummaryWriter - - -class HRLAgent(common_agent.CommonAgent): - def __init__(self, base_name, config): - with open(config['llc_config'], 'r') as f: - llc_config = yaml.load(f, Loader=yaml.SafeLoader) - llc_config_params = llc_config['params'] - self._latent_dim = llc_config_params['config']['latent_dim'] - - super().__init__(base_name, config) - - self._task_size = self.vec_env.env.task.get_task_obs_size() - - self._llc_steps = config['llc_steps'] - llc_checkpoint = config['llc_checkpoint'] - assert (llc_checkpoint != "") - self._build_llc(llc_config_params, llc_checkpoint) - - return - - def env_step(self, actions): - actions = self.preprocess_actions(actions) - obs = self.obs['obs'] - - rewards = 0.0 - disc_rewards = 0.0 - done_count = 0.0 - terminate_count = 0.0 - for t in range(self._llc_steps): - llc_actions = self._compute_llc_action(obs, actions) - obs, curr_rewards, curr_dones, infos = self.vec_env.step(llc_actions) - - rewards += curr_rewards - done_count += curr_dones - terminate_count += infos['terminate'] - - amp_obs = infos['amp_obs'] - curr_disc_reward = self._calc_disc_reward(amp_obs) - disc_rewards += curr_disc_reward - - rewards /= self._llc_steps - disc_rewards /= self._llc_steps - - dones = torch.zeros_like(done_count) - dones[done_count > 0] = 1.0 - terminate = torch.zeros_like(terminate_count) - terminate[terminate_count > 0] = 1.0 - infos['terminate'] = terminate - infos['disc_rewards'] = disc_rewards - - if self.is_tensor_obses: - if self.value_size == 1: - rewards = rewards.unsqueeze(1) - return self.obs_to_tensors(obs), rewards.to(self.ppo_device), dones.to(self.ppo_device), infos - else: - if self.value_size == 1: - rewards = np.expand_dims(rewards, axis=1) - return self.obs_to_tensors(obs), torch.from_numpy(rewards).to(self.ppo_device).float(), torch.from_numpy( - dones).to(self.ppo_device), infos - - def cast_obs(self, obs): - obs = super().cast_obs(obs) - self._llc_agent.is_tensor_obses = self.is_tensor_obses - return obs - - def preprocess_actions(self, actions): - clamped_actions = torch.clamp(actions, -1.0, 1.0) - if not self.is_tensor_obses: - clamped_actions = clamped_actions.cpu().numpy() - return clamped_actions - - def play_steps(self): - self.set_eval() - - epinfos = [] - done_indices = [] - update_list = self.update_list - - for n in range(self.horizon_length): - self.obs = self.env_reset(done_indices) - self.experience_buffer.update_data('obses', n, self.obs['obs']) - - if self.use_action_masks: - masks = self.vec_env.get_action_masks() - res_dict = self.get_masked_action_values(self.obs, masks) - else: - res_dict = self.get_action_values(self.obs) - - for k in update_list: - self.experience_buffer.update_data(k, n, res_dict[k]) - - if self.has_central_value: - self.experience_buffer.update_data('states', n, self.obs['states']) - - self.obs, rewards, self.dones, infos = self.env_step(res_dict['actions']) - shaped_rewards = self.rewards_shaper(rewards) - self.experience_buffer.update_data('rewards', n, shaped_rewards) - self.experience_buffer.update_data('next_obses', n, self.obs['obs']) - self.experience_buffer.update_data('dones', n, self.dones) - - self.experience_buffer.update_data('disc_rewards', n, infos['disc_rewards']) - - terminated = infos['terminate'].float() - terminated = terminated.unsqueeze(-1) - next_vals = self._eval_critic(self.obs) - next_vals *= (1.0 - terminated) - self.experience_buffer.update_data('next_values', n, next_vals) - - self.current_rewards += rewards - self.current_lengths += 1 - all_done_indices = self.dones.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - - self.game_rewards.update(self.current_rewards[done_indices]) - self.game_lengths.update(self.current_lengths[done_indices]) - self.algo_observer.process_infos(infos, done_indices) - - not_dones = 1.0 - self.dones.float() - - self.current_rewards = self.current_rewards * not_dones.unsqueeze(1) - self.current_lengths = self.current_lengths * not_dones - - done_indices = done_indices[:, 0] - - mb_fdones = self.experience_buffer.tensor_dict['dones'].float() - mb_values = self.experience_buffer.tensor_dict['values'] - mb_next_values = self.experience_buffer.tensor_dict['next_values'] - - mb_rewards = self.experience_buffer.tensor_dict['rewards'] - mb_disc_rewards = self.experience_buffer.tensor_dict['disc_rewards'] - mb_rewards = self._combine_rewards(mb_rewards, mb_disc_rewards) - - mb_advs = self.discount_values(mb_fdones, mb_values, mb_rewards, mb_next_values) - mb_returns = mb_advs + mb_values - - batch_dict = self.experience_buffer.get_transformed_list(a2c_common.swap_and_flatten01, self.tensor_list) - batch_dict['returns'] = a2c_common.swap_and_flatten01(mb_returns) - batch_dict['played_frames'] = self.batch_size - - return batch_dict - - def _load_config_params(self, config): - super()._load_config_params(config) - - self._task_reward_w = config['task_reward_w'] - self._disc_reward_w = config['disc_reward_w'] - return - - def _get_mean_rewards(self): - rewards = super()._get_mean_rewards() - rewards *= self._llc_steps - return rewards - - def _setup_action_space(self): - super()._setup_action_space() - self.actions_num = self._latent_dim - return - - def init_tensors(self): - super().init_tensors() - - del self.experience_buffer.tensor_dict['actions'] - del self.experience_buffer.tensor_dict['mus'] - del self.experience_buffer.tensor_dict['sigmas'] - - batch_shape = self.experience_buffer.obs_base_shape - self.experience_buffer.tensor_dict['actions'] = torch.zeros(batch_shape + (self._latent_dim,), - dtype=torch.float32, device=self.ppo_device) - self.experience_buffer.tensor_dict['mus'] = torch.zeros(batch_shape + (self._latent_dim,), - dtype=torch.float32, device=self.ppo_device) - self.experience_buffer.tensor_dict['sigmas'] = torch.zeros(batch_shape + (self._latent_dim,), - dtype=torch.float32, device=self.ppo_device) - - self.experience_buffer.tensor_dict['disc_rewards'] = torch.zeros_like( - self.experience_buffer.tensor_dict['rewards']) - self.tensor_list += ['disc_rewards'] - - return - - # def _build_llc(self, config_params, checkpoint_file): - # network_params = config_params['network'] - # network_builder = ase_network_builder.ASEBuilder() - # network_builder.load(network_params) - # - # network = ase_models.ModelASEContinuous(network_builder) - # llc_agent_config = self._build_llc_agent_config(config_params, network) - # - # self._llc_agent = ase_agent.ASEAgent('llc', llc_agent_config) - # self._llc_agent.restore(checkpoint_file) - # print("Loaded LLC checkpoint from {:s}".format(checkpoint_file)) - # self._llc_agent.set_eval() - # return - - def _build_llc(self, config_params, checkpoint_file): - from hydra.core.global_hydra import GlobalHydra - from rofunc.config.utils import get_config - from rofunc.learning.RofuncRL.utils.memory import RandomMemory - from rofunc.learning.RofuncRL.agents.mixline.ase_agent import ASEAgent - import rofunc as rf - from rofunc.utils.logger.beauty_logger import BeautyLogger - - GlobalHydra.instance().clear() - args_overrides = ["task=HumanoidASEGetupSwordShield", "train=HumanoidASEGetupSwordShieldASERofuncRL"] - self.llc_config = get_config('./learning/rl', 'config', args=args_overrides) - llc_ckpt_path = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth" - - llc_env_info = copy.deepcopy(self.env_info) - obs_space = llc_env_info['observation_space'] - obs_size = obs_space.shape[0] - obs_size -= self._task_size - llc_observation_space = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) - llc_memory = RandomMemory(memory_size=32, num_envs=4096, device=self.ppo_device) - motion_dataset = RandomMemory(memory_size=200000, device=self.ppo_device) - replay_buffer = RandomMemory(memory_size=1000000, device=self.ppo_device) - collect_reference_motions = lambda num_samples: self.vec_env.env.task.fetch_amp_obs_demo(num_samples) - - directory = os.path.join(os.getcwd(), "runs") - exp_name = datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f") - exp_dir = os.path.join(directory, exp_name) - rf.utils.create_dir(exp_dir) - rofunc_logger = BeautyLogger(exp_dir, verbose=True) - self._llc_agent = ASEAgent(self.llc_config.train, llc_observation_space, llc_env_info['action_space'], - llc_memory, - self.ppo_device, exp_dir, rofunc_logger, llc_env_info['amp_observation_space'], - motion_dataset, replay_buffer, collect_reference_motions) - self._llc_agent.load_ckpt(llc_ckpt_path) - return - - # def _build_llc_agent_config(self, config_params, network): - # llc_env_info = copy.deepcopy(self.env_info) - # obs_space = llc_env_info['observation_space'] - # obs_size = obs_space.shape[0] - # obs_size -= self._task_size - # llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) - # - # config = config_params['config'] - # config['network'] = network - # config['num_actors'] = self.num_actors - # config['features'] = {'observer' : self.algo_observer} - # config['env_info'] = llc_env_info - # - # return config - - # def _compute_llc_action(self, obs, actions): - # llc_obs = self._extract_llc_obs(obs) - # processed_obs = self._llc_agent._preproc_obs(llc_obs) - # - # z = torch.nn.functional.normalize(actions, dim=-1) - # mu, _ = self._llc_agent.model.a2c_network.eval_actor(obs=processed_obs, ase_latents=z) - # llc_action = mu - # llc_action = self._llc_agent.preprocess_actions(llc_action) - # - # return llc_action - - def _compute_llc_action(self, obs, actions): - llc_obs = self._extract_llc_obs(obs) - z = torch.nn.functional.normalize(actions, dim=-1) - mu, _ = self._llc_agent.act(llc_obs, ase_latents=z) - llc_action = mu - # llc_action = self._llc_agent.preprocess_actions(llc_action) - - return llc_action - - def _extract_llc_obs(self, obs): - obs_size = obs.shape[-1] - llc_obs = obs[..., :obs_size - self._task_size] - return llc_obs - - # def _calc_disc_reward(self, amp_obs): - # disc_reward = self._llc_agent._calc_disc_rewards(amp_obs) - # return disc_reward - - def _calc_disc_reward(self, amp_obs): - with torch.no_grad(): - amp_logits = self._llc_agent.discriminator(self._llc_agent._amp_state_preprocessor(amp_obs)) - if self._llc_agent._least_square_discriminator: - style_rewards = torch.maximum(torch.tensor(1 - 0.25 * torch.square(1 - amp_logits)), - torch.tensor(0.0001, device=self.device)) - else: - style_rewards = -torch.log(torch.maximum(torch.tensor(1 - 1 / (1 + torch.exp(-amp_logits))), - torch.tensor(0.0001, device=self.device))) - style_rewards *= self._llc_agent._discriminator_reward_scale - return style_rewards - - def _combine_rewards(self, task_rewards, disc_rewards): - combined_rewards = self._task_reward_w * task_rewards + \ - + self._disc_reward_w * disc_rewards - - # combined_rewards = task_rewards * disc_rewards - return combined_rewards - - def _record_train_batch_info(self, batch_dict, train_info): - super()._record_train_batch_info(batch_dict, train_info) - train_info['disc_rewards'] = batch_dict['disc_rewards'] - return - - def _log_train_info(self, train_info, frame): - super()._log_train_info(train_info, frame) - - disc_reward_std, disc_reward_mean = torch.std_mean(train_info['disc_rewards']) - self.writer.add_scalar('info/disc_reward_mean', disc_reward_mean.item(), frame) - self.writer.add_scalar('info/disc_reward_std', disc_reward_std.item(), frame) - return diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml deleted file mode 100644 index 3818fb33..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_humanoid.yaml +++ /dev/null @@ -1,76 +0,0 @@ -params: - seed: 42 - - algo: - name: hrl - - model: - name: hrl - - network: - name: hrl - separate: True - - space: - continuous: - mu_activation: None - sigma_activation: None - mu_init: - name: default - sigma_init: - name: const_initializer - val: -2.3 - fixed_sigma: True - learn_sigma: False - - mlp: - units: [1024, 512] - activation: relu - d2rl: False - - initializer: - name: default - regularizer: - name: None - - load_checkpoint: False - - config: - name: Humanoid - env_name: rlgpu - multi_gpu: False - ppo: True - mixed_precision: False - normalize_input: True - normalize_value: True - reward_shaper: - scale_value: 1 - normalize_advantage: True - gamma: 0.99 - tau: 0.95 - learning_rate: 2e-5 - lr_schedule: constant - score_to_win: 20000 - max_epochs: 10000 - save_best_after: 10 - save_frequency: 50 - print_stats: True - grad_norm: 1.0 - entropy_coef: 0.0 - truncate_grads: False - ppo: True - e_clip: 0.2 - horizon_length: 32 - minibatch_size: 16384 - mini_epochs: 6 - critic_coef: 5 - clip_value: False - seq_len: 4 - bounds_loss_coef: 10 - - task_reward_w: 0.9 - disc_reward_w: 0.1 - - llc_steps: 5 - llc_config: /home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/learning/RofuncRL/agents/mixline/utils/ase_humanoid_hrl.yaml - diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py deleted file mode 100644 index e8191f46..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_models.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch.nn as nn -from rl_games.algos_torch.models import ModelA2CContinuousLogStd - -class ModelHRLContinuous(ModelA2CContinuousLogStd): - def __init__(self, network): - super().__init__(network) - return - - def build(self, config): - net = self.network_builder.build('amp', **config) - for name, _ in net.named_parameters(): - print(name) - return ModelHRLContinuous.Network(net) - - class Network(ModelA2CContinuousLogStd.Network): - def __init__(self, a2c_network): - super().__init__(a2c_network) - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py deleted file mode 100644 index a9d1fe78..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_network_builder.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from rl_games.algos_torch import network_builder - -import torch -import torch.nn as nn - - -class HRLBuilder(network_builder.A2CBuilder): - def __init__(self, **kwargs): - super().__init__(**kwargs) - return - - class Network(network_builder.A2CBuilder.Network): - def __init__(self, params, **kwargs): - super().__init__(params, **kwargs) - - if self.is_continuous: - if (not self.space_config['learn_sigma']): - actions_num = kwargs.get('actions_num') - sigma_init = self.init_factory.create(**self.space_config['sigma_init']) - self.sigma = nn.Parameter(torch.zeros(actions_num, requires_grad=False, dtype=torch.float32), requires_grad=False) - sigma_init(self.sigma) - - return - - def forward(self, obs_dict): - mu, sigma, value, states = super().forward(obs_dict) - norm_mu = torch.tanh(mu) - return norm_mu, sigma, value, states - - def eval_critic(self, obs): - c_out = self.critic_cnn(obs) - c_out = c_out.contiguous().view(c_out.size(0), -1) - c_out = self.critic_mlp(c_out) - value = self.value_act(self.value(c_out)) - return value - - def build(self, name, **kwargs): - net = HRLBuilder.Network(self.params, **kwargs) - return net \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py deleted file mode 100644 index c493246a..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/hrl_players.py +++ /dev/null @@ -1,345 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import copy -from gym import spaces -import numpy as np -import os -import torch -import yaml - -from rl_games.algos_torch import players -from rl_games.algos_torch import torch_ext -from rl_games.algos_torch.running_mean_std import RunningMeanStd -from rl_games.common.player import BasePlayer - -import common_player as common_player -import ase_models as ase_models -import ase_network_builder as ase_network_builder -import ase_players as ase_players - -class HRLPlayer(common_player.CommonPlayer): - def __init__(self, config): - with open(os.path.join(os.getcwd(), config['llc_config']), 'r') as f: - llc_config = yaml.load(f, Loader=yaml.SafeLoader) - llc_config_params = llc_config['params'] - self._latent_dim = llc_config_params['config']['latent_dim'] - - super().__init__(config) - - self._task_size = self.env.task.get_task_obs_size() - - self._llc_steps = config['llc_steps'] - llc_checkpoint = config['llc_checkpoint'] - assert(llc_checkpoint != "") - self._build_llc(llc_config_params, llc_checkpoint) - - return - - def get_action(self, obs_dict, is_determenistic = False): - obs = obs_dict['obs'] - - if len(obs.size()) == len(self.obs_shape): - obs = obs.unsqueeze(0) - proc_obs = self._preproc_obs(obs) - input_dict = { - 'is_train': False, - 'prev_actions': None, - 'obs' : proc_obs, - 'rnn_states' : self.states - } - with torch.no_grad(): - res_dict = self.model(input_dict) - mu = res_dict['mus'] - action = res_dict['actions'] - self.states = res_dict['rnn_states'] - if is_determenistic: - current_action = mu - else: - current_action = action - current_action = torch.squeeze(current_action.detach()) - clamped_actions = torch.clamp(current_action, -1.0, 1.0) - - return clamped_actions - - def run(self): - n_games = self.games_num - render = self.render_env - n_game_life = self.n_game_life - is_determenistic = self.is_determenistic - sum_rewards = 0 - sum_steps = 0 - sum_game_res = 0 - n_games = n_games * n_game_life - games_played = 0 - has_masks = False - has_masks_func = getattr(self.env, "has_action_mask", None) is not None - - op_agent = getattr(self.env, "create_agent", None) - if op_agent: - agent_inited = True - - if has_masks_func: - has_masks = self.env.has_action_mask() - - need_init_rnn = self.is_rnn - for _ in range(n_games): - if games_played >= n_games: - break - - obs_dict = self.env_reset() - batch_size = 1 - if len(obs_dict['obs'].size()) > len(self.obs_shape): - batch_size = obs_dict['obs'].size()[0] - self.batch_size = batch_size - - if need_init_rnn: - self.init_rnn() - need_init_rnn = False - - cr = torch.zeros(batch_size, dtype=torch.float32) - steps = torch.zeros(batch_size, dtype=torch.float32) - - print_game_res = False - - done_indices = [] - - for n in range(self.max_steps): - obs_dict = self.env_reset(done_indices) - - if has_masks: - masks = self.env.get_action_mask() - action = self.get_masked_action(obs_dict, masks, is_determenistic) - else: - action = self.get_action(obs_dict, is_determenistic) - obs_dict, r, done, info = self.env_step(self.env, obs_dict, action) - cr += r - steps += 1 - - self._post_step(info) - - if render: - self.env.render(mode = 'human') - time.sleep(self.render_sleep) - - all_done_indices = done.nonzero(as_tuple=False) - done_indices = all_done_indices[::self.num_agents] - done_count = len(done_indices) - games_played += done_count - - if done_count > 0: - if self.is_rnn: - for s in self.states: - s[:,all_done_indices,:] = s[:,all_done_indices,:] * 0.0 - - cur_rewards = cr[done_indices].sum().item() - cur_steps = steps[done_indices].sum().item() - - cr = cr * (1.0 - done.float()) - steps = steps * (1.0 - done.float()) - sum_rewards += cur_rewards - sum_steps += cur_steps - - game_res = 0.0 - if isinstance(info, dict): - if 'battle_won' in info: - print_game_res = True - game_res = info.get('battle_won', 0.5) - if 'scores' in info: - print_game_res = True - game_res = info.get('scores', 0.5) - if self.print_stats: - if print_game_res: - print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count, 'w:', game_res) - else: - print('reward:', cur_rewards/done_count, 'steps:', cur_steps/done_count) - - sum_game_res += game_res - if batch_size//self.num_agents == 1 or games_played >= n_games: - break - - done_indices = done_indices[:, 0] - - print(sum_rewards) - if print_game_res: - print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life, 'winrate:', sum_game_res / games_played * n_game_life) - else: - print('av reward:', sum_rewards / games_played * n_game_life, 'av steps:', sum_steps / games_played * n_game_life) - - return - - def env_step(self, env, obs_dict, action): - if not self.is_tensor_obses: - actions = actions.cpu().numpy() - - obs = obs_dict['obs'] - rewards = 0.0 - done_count = 0.0 - disc_rewards = 0.0 - for t in range(self._llc_steps): - llc_actions = self._compute_llc_action(obs, action) - obs, curr_rewards, curr_dones, infos = env.step(llc_actions) - - rewards += curr_rewards - done_count += curr_dones - - amp_obs = infos['amp_obs'] - curr_disc_reward = self._calc_disc_reward(amp_obs) - curr_disc_reward = curr_disc_reward[0, 0].cpu().numpy() - disc_rewards += curr_disc_reward - - rewards /= self._llc_steps - dones = torch.zeros_like(done_count) - dones[done_count > 0] = 1.0 - - disc_rewards /= self._llc_steps - #print("disc_reward", disc_rewards) - - if isinstance(obs, dict): - obs = obs['obs'] - if obs.dtype == np.float64: - obs = np.float32(obs) - if self.value_size > 1: - rewards = rewards[0] - if self.is_tensor_obses: - return obs, rewards.cpu(), dones.cpu(), infos - else: - if np.isscalar(dones): - rewards = np.expand_dims(np.asarray(rewards), 0) - dones = np.expand_dims(np.asarray(dones), 0) - return torch.from_numpy(obs).to(self.device), torch.from_numpy(rewards), torch.from_numpy(dones), infos - - # def _build_llc(self, config_params, checkpoint_file): - # network_params = config_params['network'] - # network_builder = ase_network_builder.ASEBuilder() - # network_builder.load(network_params) - # - # network = ase_models.ModelASEContinuous(network_builder) - # llc_agent_config = self._build_llc_agent_config(config_params, network) - # - # self._llc_agent = ase_players.ASEPlayer(llc_agent_config) - # self._llc_agent.restore(checkpoint_file) - # print("Loaded LLC checkpoint from {:s}".format(checkpoint_file)) - # return - # - # def _build_llc_agent_config(self, config_params, network): - # llc_env_info = copy.deepcopy(self.env_info) - # obs_space = llc_env_info['observation_space'] - # obs_size = obs_space.shape[0] - # obs_size -= self._task_size - # llc_env_info['observation_space'] = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) - # llc_env_info['amp_observation_space'] = self.env.amp_observation_space.shape - # llc_env_info['num_envs'] = self.env.task.num_envs - # - # config = config_params['config'] - # config['network'] = network - # config['env_info'] = llc_env_info - # - # return config - - def _build_llc(self, config_params, checkpoint_file): - from hydra.core.global_hydra import GlobalHydra - from rofunc.config.utils import get_config - from rofunc.learning.RofuncRL.utils.memory import RandomMemory - from rofunc.learning.RofuncRL.agents.mixline.ase_agent import ASEAgent - import rofunc as rf - from rofunc.utils.logger.beauty_logger import BeautyLogger - from datetime import datetime - - GlobalHydra.instance().clear() - args_overrides = ["task=HumanoidASEGetupSwordShield", "train=HumanoidASEGetupSwordShieldASERofuncRL"] - self.llc_config = get_config('./learning/rl', 'config', args=args_overrides) - llc_ckpt_path = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/examples/learning_rl/runs/RofuncRL_ASETrainer_HumanoidASEGetupSwordShield_23-06-26_12-49-35-111331/checkpoints/ckpt_87000.pth" - - llc_env_info = copy.deepcopy(self.env_info) - obs_space = llc_env_info['observation_space'] - obs_size = obs_space.shape[0] - obs_size -= self._task_size - llc_observation_space = spaces.Box(obs_space.low[:obs_size], obs_space.high[:obs_size]) - llc_memory = RandomMemory(memory_size=32, num_envs=4096) - motion_dataset = RandomMemory(memory_size=200000) - replay_buffer = RandomMemory(memory_size=1000000) - collect_reference_motions = lambda num_samples: self.env.task.fetch_amp_obs_demo(num_samples) - - directory = os.path.join(os.getcwd(), "runs") - exp_name = datetime.now().strftime("%y-%m-%d_%H-%M-%S-%f") - exp_dir = os.path.join(directory, exp_name) - rf.utils.create_dir(exp_dir) - rofunc_logger = BeautyLogger(exp_dir, verbose=True) - amp_observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(1400,)) - - self._llc_agent = ASEAgent(self.llc_config.train, llc_observation_space, llc_env_info['action_space'], - llc_memory, - 'cuda:0', exp_dir, rofunc_logger, amp_observation_space, - motion_dataset, replay_buffer, collect_reference_motions) - self._llc_agent.load_ckpt(llc_ckpt_path) - return - - - def _setup_action_space(self): - super()._setup_action_space() - self.actions_num = self._latent_dim - return - - # def _compute_llc_action(self, obs, actions): - # llc_obs = self._extract_llc_obs(obs) - # processed_obs = self._llc_agent._preproc_obs(llc_obs) - # - # z = torch.nn.functional.normalize(actions, dim=-1) - # mu, _ = self._llc_agent.model.a2c_network.eval_actor(obs=processed_obs, ase_latents=z) - # llc_action = players.rescale_actions(self.actions_low, self.actions_high, torch.clamp(mu, -1.0, 1.0)) - # - # return llc_action - - def _compute_llc_action(self, obs, actions): - llc_obs = self._extract_llc_obs(obs) - z = torch.nn.functional.normalize(actions, dim=-1) - mu, _ = self._llc_agent.act(llc_obs, ase_latents=z) - llc_action = mu - return llc_action - - def _extract_llc_obs(self, obs): - obs_size = obs.shape[-1] - llc_obs = obs[..., :obs_size - self._task_size] - return llc_obs - - # def _calc_disc_reward(self, amp_obs): - # disc_reward = self._llc_agent._calc_disc_rewards(amp_obs) - # return disc_reward - - def _calc_disc_reward(self, amp_obs): - with torch.no_grad(): - amp_logits = self._llc_agent.discriminator(self._llc_agent._amp_state_preprocessor(amp_obs)) - if self._llc_agent._least_square_discriminator: - style_rewards = torch.maximum(torch.tensor(1 - 0.25 * torch.square(1 - amp_logits)), - torch.tensor(0.0001, device=self.device)) - else: - style_rewards = -torch.log(torch.maximum(torch.tensor(1 - 1 / (1 + torch.exp(-amp_logits))), - torch.tensor(0.0001, device=self.device))) - style_rewards *= self._llc_agent._discriminator_reward_scale - return style_rewards \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml deleted file mode 100644 index 20c1b35e..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_heading.yaml +++ /dev/null @@ -1,53 +0,0 @@ -# if given, will override the device setting in gym. -env: - numEnvs: 4096 - envSpacing: 5 - episodeLength: 300 - isFlagrun: False - enableDebugVis: False - - pdControl: True - powerScale: 1.0 - controlFrequencyInv: 2 # 30 Hz - stateInit: "Default" - hybridInitProb: 0.5 - numAMPObsSteps: 10 - - localRootObs: True - keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"] - contactBodies: ["right_foot", "left_foot"] - terminationHeight: 0.15 - enableEarlyTermination: True - - tarSpeedMin: 1.5 - tarSpeedMax: 1.6 - headingChangeStepsMin: 100 - headingChangeStepsMax: 200 - enableRandHeading: True - enableTaskObs: True - - asset: - assetRoot: "ase/data/assets" - assetFileName: "mjcf/amp_humanoid_sword_shield.xml" - - plane: - staticFriction: 1.0 - dynamicFriction: 1.0 - restitution: 0.0 - -sim: - substeps: 2 - physx: - num_threads: 4 - solver_type: 1 # 0: pgs, 1: tgs - num_position_iterations: 4 - num_velocity_iterations: 0 - contact_offset: 0.02 - rest_offset: 0.0 - bounce_threshold_velocity: 0.2 - max_depenetration_velocity: 10.0 - default_buffer_size_multiplier: 10.0 - - flex: - num_inner_iterations: 10 - warm_start: 0.25 diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml b/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml deleted file mode 100644 index 9f0bbafc..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/humanoid_sword_shield_strike.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# if given, will override the device setting in gym. -env: - numEnvs: 4096 - envSpacing: 5 - episodeLength: 300 - isFlagrun: False - enableDebugVis: False - - pdControl: True - powerScale: 1.0 - controlFrequencyInv: 2 # 30 Hz - stateInit: "Default" - hybridInitProb: 0.5 - numAMPObsSteps: 10 - - localRootObs: True - keyBodies: ["right_hand", "left_hand", "right_foot", "left_foot", "sword", "shield"] - contactBodies: ["right_foot", "left_foot"] - terminationHeight: 0.15 - enableEarlyTermination: True - - strikeBodyNames: ["sword", "right_hand", "right_lower_arm"] - enableTaskObs: True - - asset: - assetRoot: "ase/data/assets" - assetFileName: "mjcf/amp_humanoid_sword_shield.xml" - - plane: - staticFriction: 1.0 - dynamicFriction: 1.0 - restitution: 0.0 - -sim: - substeps: 2 - physx: - num_threads: 4 - solver_type: 1 # 0: pgs, 1: tgs - num_position_iterations: 4 - num_velocity_iterations: 0 - contact_offset: 0.02 - rest_offset: 0.0 - bounce_threshold_velocity: 0.2 - max_depenetration_velocity: 10.0 - default_buffer_size_multiplier: 10.0 - - flex: - num_inner_iterations: 10 - warm_start: 0.25 diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py deleted file mode 100644 index 0f81a47d..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/observer.py +++ /dev/null @@ -1,36 +0,0 @@ -from rl_games.common.algo_observer import AlgoObserver -from rl_games.algos_torch import torch_ext - - -class RLGPUAlgoObserver(AlgoObserver): - def __init__(self, use_successes=True): - self.use_successes = use_successes - return - - def after_init(self, algo): - self.algo = algo - self.consecutive_successes = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device) - self.writer = self.algo.writer - return - - def process_infos(self, infos, done_indices): - if isinstance(infos, dict): - if (self.use_successes == False) and 'consecutive_successes' in infos: - cons_successes = infos['consecutive_successes'].clone() - self.consecutive_successes.update(cons_successes.to(self.algo.ppo_device)) - if self.use_successes and 'successes' in infos: - successes = infos['successes'].clone() - self.consecutive_successes.update(successes[done_indices].to(self.algo.ppo_device)) - return - - def after_clear_stats(self): - self.mean_scores.clear() - return - - def after_print_stats(self, frame, epoch_num, total_time): - if self.consecutive_successes.current_size > 0: - mean_con_successes = self.consecutive_successes.get_mean() - self.writer.add_scalar('successes/consecutive_successes/mean', mean_con_successes, frame) - self.writer.add_scalar('successes/consecutive_successes/iter', mean_con_successes, epoch_num) - self.writer.add_scalar('successes/consecutive_successes/time', mean_con_successes, total_time) - return diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py deleted file mode 100644 index da72c040..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/parse_task.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from tasks.humanoid import Humanoid -from tasks.humanoid_amp import HumanoidAMP -from tasks.humanoid_amp_getup import HumanoidAMPGetup -from tasks.humanoid_heading import HumanoidHeading -from tasks.humanoid_location import HumanoidLocation -from tasks.humanoid_strike import HumanoidStrike -from tasks.humanoid_reach import HumanoidReach -from tasks.humanoid_perturb import HumanoidPerturb -from tasks.humanoid_view_motion import HumanoidViewMotion -from vec_task_wrappers import VecTaskPythonWrapper - -from isaacgym import rlgpu - -import json -import numpy as np - - -def warn_task_name(): - raise Exception( - "Unrecognized task!\nTask should be one of: [BallBalance, Cartpole, CartpoleYUp, Ant, Humanoid, Anymal, FrankaCabinet, Quadcopter, ShadowHand, ShadowHandLSTM, ShadowHandFFOpenAI, ShadowHandFFOpenAITest, ShadowHandOpenAI, ShadowHandOpenAITest, Ingenuity]") - -def parse_task(args, cfg, cfg_train, sim_params): - - # create native task and pass custom config - device_id = args.device_id - rl_device = args.rl_device - - cfg["seed"] = cfg_train.get("seed", -1) - cfg_task = cfg["env"] - cfg_task["seed"] = cfg["seed"] - - try: - task = eval(args.task)( - cfg=cfg, - sim_params=sim_params, - physics_engine=args.physics_engine, - device_type=args.device, - device_id=device_id, - headless=args.headless) - except NameError as e: - print(e) - warn_task_name() - env = VecTaskPythonWrapper(task, rl_device, cfg_train.get("clip_observations", np.inf), cfg_train.get("clip_actions", 1.0)) - - return task, env diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py deleted file mode 100644 index 5c5e7c77..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/replay_buffer.py +++ /dev/null @@ -1,113 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -class ReplayBuffer(): - def __init__(self, buffer_size, device): - self._head = 0 - self._total_count = 0 - self._buffer_size = buffer_size - self._device = device - self._data_buf = None - self._sample_idx = torch.randperm(buffer_size) - self._sample_head = 0 - - return - - def reset(self): - self._head = 0 - self._total_count = 0 - self._reset_sample_idx() - return - - def get_buffer_size(self): - return self._buffer_size - - def get_total_count(self): - return self._total_count - - def store(self, data_dict): - if (self._data_buf is None): - self._init_data_buf(data_dict) - - n = next(iter(data_dict.values())).shape[0] - buffer_size = self.get_buffer_size() - assert(n <= buffer_size) - - for key, curr_buf in self._data_buf.items(): - curr_n = data_dict[key].shape[0] - assert(n == curr_n) - - store_n = min(curr_n, buffer_size - self._head) - curr_buf[self._head:(self._head + store_n)] = data_dict[key][:store_n] - - remainder = n - store_n - if (remainder > 0): - curr_buf[0:remainder] = data_dict[key][store_n:] - - self._head = (self._head + n) % buffer_size - self._total_count += n - - return - - def sample(self, n): - total_count = self.get_total_count() - buffer_size = self.get_buffer_size() - - idx = torch.arange(self._sample_head, self._sample_head + n) - idx = idx % buffer_size - rand_idx = self._sample_idx[idx] - if (total_count < buffer_size): - rand_idx = rand_idx % self._head - - samples = dict() - for k, v in self._data_buf.items(): - samples[k] = v[rand_idx] - - self._sample_head += n - if (self._sample_head >= buffer_size): - self._reset_sample_idx() - - return samples - - def _reset_sample_idx(self): - buffer_size = self.get_buffer_size() - self._sample_idx[:] = torch.randperm(buffer_size) - self._sample_head = 0 - return - - def _init_data_buf(self, data_dict): - buffer_size = self.get_buffer_size() - self._data_buf = dict() - - for k, v in data_dict.items(): - v_shape = v.shape[1:] - self._data_buf[k] = torch.zeros((buffer_size,) + v_shape, device=self._device) - - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py deleted file mode 100644 index a4a88d60..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/run.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import os -import isaacgym -from config import set_np_formatting, set_seed, get_args, parse_sim_params, load_cfg -from parse_task import parse_task - -from rl_games.algos_torch import players -from rl_games.algos_torch import torch_ext -from rl_games.common import env_configurations, experiment, vecenv -from rl_games.common.algo_observer import AlgoObserver -from rl_games.torch_runner import Runner - -import numpy as np -import copy -import torch - -import amp_agent -import amp_players -import amp_models -import amp_network_builder - -import ase_agent -import ase_players -import ase_models -import ase_network_builder - -import hrl_agent -import hrl_players -import hrl_models -import hrl_network_builder - -args = None -cfg = None -cfg_train = None - - -def create_rlgpu_env(**kwargs): - use_horovod = cfg_train['params']['config'].get('multi_gpu', False) - if use_horovod: - import horovod.torch as hvd - - rank = hvd.rank() - print("Horovod rank: ", rank) - - cfg_train['params']['seed'] = cfg_train['params']['seed'] + rank - - args.device = 'cuda' - args.device_id = rank - args.rl_device = 'cuda:' + str(rank) - - cfg['rank'] = rank - cfg['rl_device'] = 'cuda:' + str(rank) - - sim_params = parse_sim_params(args, cfg, cfg_train) - task, env = parse_task(args, cfg, cfg_train, sim_params) - - print('num_envs: {:d}'.format(env.num_envs)) - print('num_actions: {:d}'.format(env.num_actions)) - print('num_obs: {:d}'.format(env.num_obs)) - print('num_states: {:d}'.format(env.num_states)) - - frames = kwargs.pop('frames', 1) - if frames > 1: - env = wrappers.FrameStack(env, frames, False) - return env - - -class RLGPUAlgoObserver(AlgoObserver): - def __init__(self, use_successes=True): - self.use_successes = use_successes - return - - def after_init(self, algo): - self.algo = algo - self.consecutive_successes = torch_ext.AverageMeter(1, self.algo.games_to_track).to(self.algo.ppo_device) - self.writer = self.algo.writer - return - - def process_infos(self, infos, done_indices): - if isinstance(infos, dict): - if (self.use_successes == False) and 'consecutive_successes' in infos: - cons_successes = infos['consecutive_successes'].clone() - self.consecutive_successes.update(cons_successes.to(self.algo.ppo_device)) - if self.use_successes and 'successes' in infos: - successes = infos['successes'].clone() - self.consecutive_successes.update(successes[done_indices].to(self.algo.ppo_device)) - return - - def after_clear_stats(self): - self.mean_scores.clear() - return - - def after_print_stats(self, frame, epoch_num, total_time): - if self.consecutive_successes.current_size > 0: - mean_con_successes = self.consecutive_successes.get_mean() - self.writer.add_scalar('successes/consecutive_successes/mean', mean_con_successes, frame) - self.writer.add_scalar('successes/consecutive_successes/iter', mean_con_successes, epoch_num) - self.writer.add_scalar('successes/consecutive_successes/time', mean_con_successes, total_time) - return - - -class RLGPUEnv(vecenv.IVecEnv): - def __init__(self, config_name, num_actors, **kwargs): - self.env = env_configurations.configurations[config_name]['env_creator'](**kwargs) - self.use_global_obs = (self.env.num_states > 0) - - self.full_state = {} - self.full_state["obs"] = self.reset() - if self.use_global_obs: - self.full_state["states"] = self.env.get_state() - return - - def step(self, action): - next_obs, reward, is_done, info = self.env.step(action) - - # todo: improve, return only dictinary - self.full_state["obs"] = next_obs - if self.use_global_obs: - self.full_state["states"] = self.env.get_state() - return self.full_state, reward, is_done, info - else: - return self.full_state["obs"], reward, is_done, info - - def reset(self, env_ids=None): - self.full_state["obs"] = self.env.reset(env_ids) - if self.use_global_obs: - self.full_state["states"] = self.env.get_state() - return self.full_state - else: - return self.full_state["obs"] - - def get_number_of_agents(self): - return self.env.get_number_of_agents() - - def get_env_info(self): - info = {} - info['action_space'] = self.env.action_space - info['observation_space'] = self.env.observation_space - info['amp_observation_space'] = self.env.amp_observation_space - - if self.use_global_obs: - info['state_space'] = self.env.state_space - print(info['action_space'], info['observation_space'], info['state_space']) - else: - print(info['action_space'], info['observation_space']) - - return info - - -vecenv.register('RLGPU', lambda config_name, num_actors, **kwargs: RLGPUEnv(config_name, num_actors, **kwargs)) -env_configurations.register('rlgpu', { - 'env_creator': lambda **kwargs: create_rlgpu_env(**kwargs), - 'vecenv_type': 'RLGPU'}) - - -def build_alg_runner(algo_observer): - runner = Runner(algo_observer) - runner.algo_factory.register_builder('amp', lambda **kwargs: amp_agent.AMPAgent(**kwargs)) - runner.player_factory.register_builder('amp', lambda **kwargs: amp_players.AMPPlayerContinuous(**kwargs)) - runner.model_builder.model_factory.register_builder('amp', lambda network, **kwargs: amp_models.ModelAMPContinuous( - network)) - runner.model_builder.network_factory.register_builder('amp', lambda **kwargs: amp_network_builder.AMPBuilder()) - - runner.algo_factory.register_builder('ase', lambda **kwargs: ase_agent.ASEAgent(**kwargs)) - runner.player_factory.register_builder('ase', lambda **kwargs: ase_players.ASEPlayer(**kwargs)) - runner.model_builder.model_factory.register_builder('ase', lambda network, **kwargs: ase_models.ModelASEContinuous( - network)) - runner.model_builder.network_factory.register_builder('ase', lambda **kwargs: ase_network_builder.ASEBuilder()) - - runner.algo_factory.register_builder('hrl', lambda **kwargs: hrl_agent.HRLAgent(**kwargs)) - runner.player_factory.register_builder('hrl', lambda **kwargs: hrl_players.HRLPlayer(**kwargs)) - runner.model_builder.model_factory.register_builder('hrl', lambda network, **kwargs: hrl_models.ModelHRLContinuous( - network)) - runner.model_builder.network_factory.register_builder('hrl', lambda **kwargs: hrl_network_builder.HRLBuilder()) - - return runner - - -def main(): - global args - global cfg - global cfg_train - - set_np_formatting() - args = get_args() - gpu_id = 0 - args.device_id = gpu_id - args.rl_device = 'cuda:' + str(gpu_id) - args.compute_device_id = gpu_id - args.graphics_device_id = gpu_id - args.sim_device = 'cuda:' + str(gpu_id) - cfg, cfg_train, logdir = load_cfg(args) - - cfg_train['params']['seed'] = set_seed(cfg_train['params'].get("seed", -1), - cfg_train['params'].get("torch_deterministic", False)) - - if args.horovod: - cfg_train['params']['config']['multi_gpu'] = args.horovod - - if args.horizon_length != -1: - cfg_train['params']['config']['horizon_length'] = args.horizon_length - - if args.minibatch_size != -1: - cfg_train['params']['config']['minibatch_size'] = args.minibatch_size - - if args.motion_file: - cfg['env']['motion_file'] = args.motion_file - - # Create default directories for weights and statistics - cfg_train['params']['config']['train_dir'] = args.output_path - - vargs = vars(args) - - algo_observer = RLGPUAlgoObserver() - - runner = build_alg_runner(algo_observer) - runner.load(cfg_train) - runner.reset() - runner.run(vargs) - - return - - -if __name__ == '__main__': - main() diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py deleted file mode 100644 index bc6ee169..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py deleted file mode 100644 index 9587440f..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/base_task.py +++ /dev/null @@ -1,428 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. - -import sys -import os -import operator -from copy import deepcopy -import random - -from isaacgym import gymapi -from isaacgym.gymutil import get_property_setter_map, get_property_getter_map, get_default_setter_args, apply_random_samples, check_buckets, generate_random_samples - -import numpy as np -import torch - - -# Base class for RL tasks -class BaseTask(): - - def __init__(self, cfg, enable_camera_sensors=False): - self.gym = gymapi.acquire_gym() - - self.device_type = cfg.get("device_type", "cuda") - self.device_id = cfg.get("device_id", 0) - - self.device = "cpu" - if self.device_type == "cuda" or self.device_type == "GPU": - self.device = "cuda" + ":" + str(self.device_id) - - self.headless = cfg["headless"] - - # double check! - self.graphics_device_id = self.device_id - if enable_camera_sensors == False and self.headless == True: - self.graphics_device_id = -1 - - self.num_envs = cfg["env"]["numEnvs"] - self.num_obs = cfg["env"]["numObservations"] - self.num_states = cfg["env"].get("numStates", 0) - self.num_actions = cfg["env"]["numActions"] - - self.control_freq_inv = cfg["env"].get("controlFrequencyInv", 1) - - # optimization flags for pytorch JIT - torch._C._jit_set_profiling_mode(False) - torch._C._jit_set_profiling_executor(False) - - # allocate buffers - self.obs_buf = torch.zeros( - (self.num_envs, self.num_obs), device=self.device, dtype=torch.float) - self.states_buf = torch.zeros( - (self.num_envs, self.num_states), device=self.device, dtype=torch.float) - self.rew_buf = torch.zeros( - self.num_envs, device=self.device, dtype=torch.float) - self.reset_buf = torch.ones( - self.num_envs, device=self.device, dtype=torch.long) - self.progress_buf = torch.zeros( - self.num_envs, device=self.device, dtype=torch.long) - self.randomize_buf = torch.zeros( - self.num_envs, device=self.device, dtype=torch.long) - self.extras = {} - - self.original_props = {} - self.dr_randomizations = {} - self.first_randomization = True - self.actor_params_generator = None - self.extern_actor_params = {} - for env_id in range(self.num_envs): - self.extern_actor_params[env_id] = None - - self.last_step = -1 - self.last_rand_step = -1 - - # create envs, sim and viewer - self.create_sim() - self.gym.prepare_sim(self.sim) - - # todo: read from config - self.enable_viewer_sync = True - self.viewer = None - - # if running with a viewer, set up keyboard shortcuts and camera - if self.headless == False: - # subscribe to keyboard shortcuts - self.viewer = self.gym.create_viewer( - self.sim, gymapi.CameraProperties()) - self.gym.subscribe_viewer_keyboard_event( - self.viewer, gymapi.KEY_ESCAPE, "QUIT") - self.gym.subscribe_viewer_keyboard_event( - self.viewer, gymapi.KEY_V, "toggle_viewer_sync") - - # set the camera position based on up axis - sim_params = self.gym.get_sim_params(self.sim) - if sim_params.up_axis == gymapi.UP_AXIS_Z: - cam_pos = gymapi.Vec3(20.0, 25.0, 3.0) - cam_target = gymapi.Vec3(10.0, 15.0, 0.0) - else: - cam_pos = gymapi.Vec3(20.0, 3.0, 25.0) - cam_target = gymapi.Vec3(10.0, 0.0, 15.0) - - self.gym.viewer_camera_look_at( - self.viewer, None, cam_pos, cam_target) - - # set gravity based on up axis and return axis index - def set_sim_params_up_axis(self, sim_params, axis): - if axis == 'z': - sim_params.up_axis = gymapi.UP_AXIS_Z - sim_params.gravity.x = 0 - sim_params.gravity.y = 0 - sim_params.gravity.z = -9.81 - return 2 - return 1 - - def create_sim(self, compute_device, graphics_device, physics_engine, sim_params): - sim = self.gym.create_sim(compute_device, graphics_device, physics_engine, sim_params) - if sim is None: - print("*** Failed to create sim") - quit() - - return sim - - def step(self, actions): - if self.dr_randomizations.get('actions', None): - actions = self.dr_randomizations['actions']['noise_lambda'](actions) - - # apply actions - self.pre_physics_step(actions) - - # step physics and render each frame - self._physics_step() - - # to fix! - if self.device == 'cpu': - self.gym.fetch_results(self.sim, True) - - # compute observations, rewards, resets, ... - self.post_physics_step() - - if self.dr_randomizations.get('observations', None): - self.obs_buf = self.dr_randomizations['observations']['noise_lambda'](self.obs_buf) - - def get_states(self): - return self.states_buf - - def render(self, sync_frame_time=False): - if self.viewer: - # check for window closed - if self.gym.query_viewer_has_closed(self.viewer): - sys.exit() - - # check for keyboard events - for evt in self.gym.query_viewer_action_events(self.viewer): - if evt.action == "QUIT" and evt.value > 0: - sys.exit() - elif evt.action == "toggle_viewer_sync" and evt.value > 0: - self.enable_viewer_sync = not self.enable_viewer_sync - - # fetch results - if self.device != 'cpu': - self.gym.fetch_results(self.sim, True) - - # step graphics - if self.enable_viewer_sync: - self.gym.step_graphics(self.sim) - self.gym.draw_viewer(self.viewer, self.sim, True) - else: - self.gym.poll_viewer_events(self.viewer) - - def get_actor_params_info(self, dr_params, env): - """Returns a flat array of actor params, their names and ranges.""" - if "actor_params" not in dr_params: - return None - params = [] - names = [] - lows = [] - highs = [] - param_getters_map = get_property_getter_map(self.gym) - for actor, actor_properties in dr_params["actor_params"].items(): - handle = self.gym.find_actor_handle(env, actor) - for prop_name, prop_attrs in actor_properties.items(): - if prop_name == 'color': - continue # this is set randomly - props = param_getters_map[prop_name](env, handle) - if not isinstance(props, list): - props = [props] - for prop_idx, prop in enumerate(props): - for attr, attr_randomization_params in prop_attrs.items(): - name = prop_name+'_'+str(prop_idx)+'_'+attr - lo_hi = attr_randomization_params['range'] - distr = attr_randomization_params['distribution'] - if 'uniform' not in distr: - lo_hi = (-1.0*float('Inf'), float('Inf')) - if isinstance(prop, np.ndarray): - for attr_idx in range(prop[attr].shape[0]): - params.append(prop[attr][attr_idx]) - names.append(name+'_'+str(attr_idx)) - lows.append(lo_hi[0]) - highs.append(lo_hi[1]) - else: - params.append(getattr(prop, attr)) - names.append(name) - lows.append(lo_hi[0]) - highs.append(lo_hi[1]) - return params, names, lows, highs - - # Apply randomizations only on resets, due to current PhysX limitations - def apply_randomizations(self, dr_params): - # If we don't have a randomization frequency, randomize every step - rand_freq = dr_params.get("frequency", 1) - - # First, determine what to randomize: - # - non-environment parameters when > frequency steps have passed since the last non-environment - # - physical environments in the reset buffer, which have exceeded the randomization frequency threshold - # - on the first call, randomize everything - self.last_step = self.gym.get_frame_count(self.sim) - if self.first_randomization: - do_nonenv_randomize = True - env_ids = list(range(self.num_envs)) - else: - do_nonenv_randomize = (self.last_step - self.last_rand_step) >= rand_freq - rand_envs = torch.where(self.randomize_buf >= rand_freq, torch.ones_like(self.randomize_buf), torch.zeros_like(self.randomize_buf)) - rand_envs = torch.logical_and(rand_envs, self.reset_buf) - env_ids = torch.nonzero(rand_envs, as_tuple=False).squeeze(-1).tolist() - self.randomize_buf[rand_envs] = 0 - - if do_nonenv_randomize: - self.last_rand_step = self.last_step - - param_setters_map = get_property_setter_map(self.gym) - param_setter_defaults_map = get_default_setter_args(self.gym) - param_getters_map = get_property_getter_map(self.gym) - - # On first iteration, check the number of buckets - if self.first_randomization: - check_buckets(self.gym, self.envs, dr_params) - - for nonphysical_param in ["observations", "actions"]: - if nonphysical_param in dr_params and do_nonenv_randomize: - dist = dr_params[nonphysical_param]["distribution"] - op_type = dr_params[nonphysical_param]["operation"] - sched_type = dr_params[nonphysical_param]["schedule"] if "schedule" in dr_params[nonphysical_param] else None - sched_step = dr_params[nonphysical_param]["schedule_steps"] if "schedule" in dr_params[nonphysical_param] else None - op = operator.add if op_type == 'additive' else operator.mul - - if sched_type == 'linear': - sched_scaling = 1.0 / sched_step * \ - min(self.last_step, sched_step) - elif sched_type == 'constant': - sched_scaling = 0 if self.last_step < sched_step else 1 - else: - sched_scaling = 1 - - if dist == 'gaussian': - mu, var = dr_params[nonphysical_param]["range"] - mu_corr, var_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) - - if op_type == 'additive': - mu *= sched_scaling - var *= sched_scaling - mu_corr *= sched_scaling - var_corr *= sched_scaling - elif op_type == 'scaling': - var = var * sched_scaling # scale up var over time - mu = mu * sched_scaling + 1.0 * \ - (1.0 - sched_scaling) # linearly interpolate - - var_corr = var_corr * sched_scaling # scale up var over time - mu_corr = mu_corr * sched_scaling + 1.0 * \ - (1.0 - sched_scaling) # linearly interpolate - - def noise_lambda(tensor, param_name=nonphysical_param): - params = self.dr_randomizations[param_name] - corr = params.get('corr', None) - if corr is None: - corr = torch.randn_like(tensor) - params['corr'] = corr - corr = corr * params['var_corr'] + params['mu_corr'] - return op( - tensor, corr + torch.randn_like(tensor) * params['var'] + params['mu']) - - self.dr_randomizations[nonphysical_param] = {'mu': mu, 'var': var, 'mu_corr': mu_corr, 'var_corr': var_corr, 'noise_lambda': noise_lambda} - - elif dist == 'uniform': - lo, hi = dr_params[nonphysical_param]["range"] - lo_corr, hi_corr = dr_params[nonphysical_param].get("range_correlated", [0., 0.]) - - if op_type == 'additive': - lo *= sched_scaling - hi *= sched_scaling - lo_corr *= sched_scaling - hi_corr *= sched_scaling - elif op_type == 'scaling': - lo = lo * sched_scaling + 1.0 * (1.0 - sched_scaling) - hi = hi * sched_scaling + 1.0 * (1.0 - sched_scaling) - lo_corr = lo_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) - hi_corr = hi_corr * sched_scaling + 1.0 * (1.0 - sched_scaling) - - def noise_lambda(tensor, param_name=nonphysical_param): - params = self.dr_randomizations[param_name] - corr = params.get('corr', None) - if corr is None: - corr = torch.randn_like(tensor) - params['corr'] = corr - corr = corr * (params['hi_corr'] - params['lo_corr']) + params['lo_corr'] - return op(tensor, corr + torch.rand_like(tensor) * (params['hi'] - params['lo']) + params['lo']) - - self.dr_randomizations[nonphysical_param] = {'lo': lo, 'hi': hi, 'lo_corr': lo_corr, 'hi_corr': hi_corr, 'noise_lambda': noise_lambda} - - if "sim_params" in dr_params and do_nonenv_randomize: - prop_attrs = dr_params["sim_params"] - prop = self.gym.get_sim_params(self.sim) - - if self.first_randomization: - self.original_props["sim_params"] = { - attr: getattr(prop, attr) for attr in dir(prop)} - - for attr, attr_randomization_params in prop_attrs.items(): - apply_random_samples( - prop, self.original_props["sim_params"], attr, attr_randomization_params, self.last_step) - - self.gym.set_sim_params(self.sim, prop) - - # If self.actor_params_generator is initialized: use it to - # sample actor simulation params. This gives users the - # freedom to generate samples from arbitrary distributions, - # e.g. use full-covariance distributions instead of the DR's - # default of treating each simulation parameter independently. - extern_offsets = {} - if self.actor_params_generator is not None: - for env_id in env_ids: - self.extern_actor_params[env_id] = \ - self.actor_params_generator.sample() - extern_offsets[env_id] = 0 - - for actor, actor_properties in dr_params["actor_params"].items(): - for env_id in env_ids: - env = self.envs[env_id] - handle = self.gym.find_actor_handle(env, actor) - extern_sample = self.extern_actor_params[env_id] - - for prop_name, prop_attrs in actor_properties.items(): - if prop_name == 'color': - num_bodies = self.gym.get_actor_rigid_body_count( - env, handle) - for n in range(num_bodies): - self.gym.set_rigid_body_color(env, handle, n, gymapi.MESH_VISUAL, - gymapi.Vec3(random.uniform(0, 1), random.uniform(0, 1), random.uniform(0, 1))) - continue - if prop_name == 'scale': - attr_randomization_params = prop_attrs - sample = generate_random_samples(attr_randomization_params, 1, - self.last_step, None) - og_scale = 1 - if attr_randomization_params['operation'] == 'scaling': - new_scale = og_scale * sample - elif attr_randomization_params['operation'] == 'additive': - new_scale = og_scale + sample - self.gym.set_actor_scale(env, handle, new_scale) - continue - - prop = param_getters_map[prop_name](env, handle) - if isinstance(prop, list): - if self.first_randomization: - self.original_props[prop_name] = [ - {attr: getattr(p, attr) for attr in dir(p)} for p in prop] - for p, og_p in zip(prop, self.original_props[prop_name]): - for attr, attr_randomization_params in prop_attrs.items(): - smpl = None - if self.actor_params_generator is not None: - smpl, extern_offsets[env_id] = get_attr_val_from_sample( - extern_sample, extern_offsets[env_id], p, attr) - apply_random_samples( - p, og_p, attr, attr_randomization_params, - self.last_step, smpl) - else: - if self.first_randomization: - self.original_props[prop_name] = deepcopy(prop) - for attr, attr_randomization_params in prop_attrs.items(): - smpl = None - if self.actor_params_generator is not None: - smpl, extern_offsets[env_id] = get_attr_val_from_sample( - extern_sample, extern_offsets[env_id], prop, attr) - apply_random_samples( - prop, self.original_props[prop_name], attr, - attr_randomization_params, self.last_step, smpl) - - setter = param_setters_map[prop_name] - default_args = param_setter_defaults_map[prop_name] - setter(env, handle, prop, *default_args) - - if self.actor_params_generator is not None: - for env_id in env_ids: # check that we used all dims in sample - if extern_offsets[env_id] > 0: - extern_sample = self.extern_actor_params[env_id] - if extern_offsets[env_id] != extern_sample.shape[0]: - print('env_id', env_id, - 'extern_offset', extern_offsets[env_id], - 'vs extern_sample.shape', extern_sample.shape) - raise Exception("Invalid extern_sample size") - - self.first_randomization = False - - def pre_physics_step(self, actions): - raise NotImplementedError - - def _physics_step(self): - for i in range(self.control_freq_inv): - self.render() - self.gym.simulate(self.sim) - return - - def post_physics_step(self): - raise NotImplementedError - - -def get_attr_val_from_sample(sample, offset, prop, attr): - """Retrieves param value for the given prop and attr from the sample.""" - if sample is None: - return None, 0 - if isinstance(prop, np.ndarray): - smpl = sample[offset:offset+prop[attr].shape[0]] - return smpl, offset+prop[attr].shape[0] - else: - return sample[offset], offset+1 diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py deleted file mode 100644 index 037fe95b..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid.py +++ /dev/null @@ -1,692 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import numpy as np -import os -import torch - -from isaacgym import gymtorch -from isaacgym import gymapi -from isaacgym.torch_utils import * - -from utils import torch_utils - -from tasks.base_task import BaseTask - -class Humanoid(BaseTask): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - self.cfg = cfg - self.sim_params = sim_params - self.physics_engine = physics_engine - - self._pd_control = self.cfg["env"]["pdControl"] - self.power_scale = self.cfg["env"]["powerScale"] - - self.debug_viz = self.cfg["env"]["enableDebugVis"] - self.plane_static_friction = self.cfg["env"]["plane"]["staticFriction"] - self.plane_dynamic_friction = self.cfg["env"]["plane"]["dynamicFriction"] - self.plane_restitution = self.cfg["env"]["plane"]["restitution"] - - self.max_episode_length = self.cfg["env"]["episodeLength"] - self._local_root_obs = self.cfg["env"]["localRootObs"] - self._root_height_obs = self.cfg["env"].get("rootHeightObs", True) - self._enable_early_termination = self.cfg["env"]["enableEarlyTermination"] - - key_bodies = self.cfg["env"]["keyBodies"] - self._setup_character_props(key_bodies) - - self.cfg["env"]["numObservations"] = self.get_obs_size() - self.cfg["env"]["numActions"] = self.get_action_size() - - self.cfg["device_type"] = device_type - self.cfg["device_id"] = device_id - self.cfg["headless"] = headless - - super().__init__(cfg=self.cfg) - - self.dt = self.control_freq_inv * sim_params.dt - - # get gym GPU state tensors - actor_root_state = self.gym.acquire_actor_root_state_tensor(self.sim) - dof_state_tensor = self.gym.acquire_dof_state_tensor(self.sim) - sensor_tensor = self.gym.acquire_force_sensor_tensor(self.sim) - rigid_body_state = self.gym.acquire_rigid_body_state_tensor(self.sim) - contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim) - - sensors_per_env = 2 - self.vec_sensor_tensor = gymtorch.wrap_tensor(sensor_tensor).view(self.num_envs, sensors_per_env * 6) - - dof_force_tensor = self.gym.acquire_dof_force_tensor(self.sim) - self.dof_force_tensor = gymtorch.wrap_tensor(dof_force_tensor).view(self.num_envs, self.num_dof) - - self.gym.refresh_dof_state_tensor(self.sim) - self.gym.refresh_actor_root_state_tensor(self.sim) - self.gym.refresh_rigid_body_state_tensor(self.sim) - self.gym.refresh_net_contact_force_tensor(self.sim) - - self._root_states = gymtorch.wrap_tensor(actor_root_state) - num_actors = self.get_num_actors_per_env() - - self._humanoid_root_states = self._root_states.view(self.num_envs, num_actors, actor_root_state.shape[-1])[..., 0, :] - self._initial_humanoid_root_states = self._humanoid_root_states.clone() - self._initial_humanoid_root_states[:, 7:13] = 0 - - self._humanoid_actor_ids = num_actors * torch.arange(self.num_envs, device=self.device, dtype=torch.int32) - - # create some wrapper tensors for different slices - self._dof_state = gymtorch.wrap_tensor(dof_state_tensor) - dofs_per_env = self._dof_state.shape[0] // self.num_envs - self._dof_pos = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 0] - self._dof_vel = self._dof_state.view(self.num_envs, dofs_per_env, 2)[..., :self.num_dof, 1] - - self._initial_dof_pos = torch.zeros_like(self._dof_pos, device=self.device, dtype=torch.float) - self._initial_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float) - - self._rigid_body_state = gymtorch.wrap_tensor(rigid_body_state) - bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs - rigid_body_state_reshaped = self._rigid_body_state.view(self.num_envs, bodies_per_env, 13) - - self._rigid_body_pos = rigid_body_state_reshaped[..., :self.num_bodies, 0:3] - self._rigid_body_rot = rigid_body_state_reshaped[..., :self.num_bodies, 3:7] - self._rigid_body_vel = rigid_body_state_reshaped[..., :self.num_bodies, 7:10] - self._rigid_body_ang_vel = rigid_body_state_reshaped[..., :self.num_bodies, 10:13] - - contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor) - self._contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., :self.num_bodies, :] - - self._terminate_buf = torch.ones(self.num_envs, device=self.device, dtype=torch.long) - - self._build_termination_heights() - - contact_bodies = self.cfg["env"]["contactBodies"] - self._key_body_ids = self._build_key_body_ids_tensor(key_bodies) - self._contact_body_ids = self._build_contact_body_ids_tensor(contact_bodies) - - if self.viewer != None: - self._init_camera() - - return - - def get_obs_size(self): - return self._num_obs - - def get_action_size(self): - return self._num_actions - - def get_num_actors_per_env(self): - num_actors = self._root_states.shape[0] // self.num_envs - return num_actors - - def create_sim(self): - self.up_axis_idx = self.set_sim_params_up_axis(self.sim_params, 'z') - self.sim = super().create_sim(self.device_id, self.graphics_device_id, self.physics_engine, self.sim_params) - - self._create_ground_plane() - self._create_envs(self.num_envs, self.cfg["env"]['envSpacing'], int(np.sqrt(self.num_envs))) - return - - def reset(self, env_ids=None): - if (env_ids is None): - env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long) - self._reset_envs(env_ids) - return - - def set_char_color(self, col, env_ids): - for env_id in env_ids: - env_ptr = self.envs[env_id] - handle = self.humanoid_handles[env_id] - - for j in range(self.num_bodies): - self.gym.set_rigid_body_color(env_ptr, handle, j, gymapi.MESH_VISUAL, - gymapi.Vec3(col[0], col[1], col[2])) - - return - - def _reset_envs(self, env_ids): - if (len(env_ids) > 0): - self._reset_actors(env_ids) - self._reset_env_tensors(env_ids) - self._refresh_sim_tensors() - self._compute_observations(env_ids) - return - - def _reset_env_tensors(self, env_ids): - env_ids_int32 = self._humanoid_actor_ids[env_ids] - self.gym.set_actor_root_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - self.gym.set_dof_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._dof_state), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - - self.progress_buf[env_ids] = 0 - self.reset_buf[env_ids] = 0 - self._terminate_buf[env_ids] = 0 - return - - def _create_ground_plane(self): - plane_params = gymapi.PlaneParams() - plane_params.normal = gymapi.Vec3(0.0, 0.0, 1.0) - plane_params.static_friction = self.plane_static_friction - plane_params.dynamic_friction = self.plane_dynamic_friction - plane_params.restitution = self.plane_restitution - self.gym.add_ground(self.sim, plane_params) - return - - def _setup_character_props(self, key_bodies): - asset_file = self.cfg["env"]["asset"]["assetFileName"] - num_key_bodies = len(key_bodies) - - if (asset_file == "mjcf/amp_humanoid.xml"): - self._dof_body_ids = [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14] - self._dof_offsets = [0, 3, 6, 9, 10, 13, 14, 17, 18, 21, 24, 25, 28] - self._dof_obs_size = 72 - self._num_actions = 28 - self._num_obs = 1 + 15 * (3 + 6 + 3 + 3) - 3 - - elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): - self._dof_body_ids = [1, 2, 3, 4, 5, 7, 8, 11, 12, 13, 14, 15, 16] - self._dof_offsets = [0, 3, 6, 9, 10, 13, 16, 17, 20, 21, 24, 27, 28, 31] - self._dof_obs_size = 78 - self._num_actions = 31 - self._num_obs = 1 + 17 * (3 + 6 + 3 + 3) - 3 - - else: - print("Unsupported character config file: {s}".format(asset_file)) - assert(False) - - return - - def _build_termination_heights(self): - head_term_height = 0.3 - shield_term_height = 0.32 - - termination_height = self.cfg["env"]["terminationHeight"] - self._termination_heights = np.array([termination_height] * self.num_bodies) - - head_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "head") - self._termination_heights[head_id] = max(head_term_height, self._termination_heights[head_id]) - - asset_file = self.cfg["env"]["asset"]["assetFileName"] - if (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): - left_arm_id = self.gym.find_actor_rigid_body_handle(self.envs[0], self.humanoid_handles[0], "left_lower_arm") - self._termination_heights[left_arm_id] = max(shield_term_height, self._termination_heights[left_arm_id]) - - self._termination_heights = to_torch(self._termination_heights, device=self.device) - return - - def _create_envs(self, num_envs, spacing, num_per_row): - lower = gymapi.Vec3(-spacing, -spacing, 0.0) - upper = gymapi.Vec3(spacing, spacing, spacing) - - asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets" - asset_file = self.cfg["env"]["asset"]["assetFileName"] - - asset_path = os.path.join(asset_root, asset_file) - asset_root = os.path.dirname(asset_path) - asset_file = os.path.basename(asset_path) - - asset_options = gymapi.AssetOptions() - asset_options.angular_damping = 0.01 - asset_options.max_angular_velocity = 100.0 - asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - #asset_options.fix_base_link = True - humanoid_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) - - actuator_props = self.gym.get_asset_actuator_properties(humanoid_asset) - motor_efforts = [prop.motor_effort for prop in actuator_props] - - # create force sensors at the feet - right_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "right_foot") - left_foot_idx = self.gym.find_asset_rigid_body_index(humanoid_asset, "left_foot") - sensor_pose = gymapi.Transform() - - self.gym.create_asset_force_sensor(humanoid_asset, right_foot_idx, sensor_pose) - self.gym.create_asset_force_sensor(humanoid_asset, left_foot_idx, sensor_pose) - - self.max_motor_effort = max(motor_efforts) - self.motor_efforts = to_torch(motor_efforts, device=self.device) - - self.torso_index = 0 - self.num_bodies = self.gym.get_asset_rigid_body_count(humanoid_asset) - self.num_dof = self.gym.get_asset_dof_count(humanoid_asset) - self.num_joints = self.gym.get_asset_joint_count(humanoid_asset) - - self.humanoid_handles = [] - self.envs = [] - self.dof_limits_lower = [] - self.dof_limits_upper = [] - - for i in range(self.num_envs): - # create env instance - env_ptr = self.gym.create_env(self.sim, lower, upper, num_per_row) - self._build_env(i, env_ptr, humanoid_asset) - self.envs.append(env_ptr) - - dof_prop = self.gym.get_actor_dof_properties(self.envs[0], self.humanoid_handles[0]) - for j in range(self.num_dof): - if dof_prop['lower'][j] > dof_prop['upper'][j]: - self.dof_limits_lower.append(dof_prop['upper'][j]) - self.dof_limits_upper.append(dof_prop['lower'][j]) - else: - self.dof_limits_lower.append(dof_prop['lower'][j]) - self.dof_limits_upper.append(dof_prop['upper'][j]) - - self.dof_limits_lower = to_torch(self.dof_limits_lower, device=self.device) - self.dof_limits_upper = to_torch(self.dof_limits_upper, device=self.device) - - if (self._pd_control): - self._build_pd_action_offset_scale() - - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - col_group = env_id - col_filter = self._get_humanoid_collision_filter() - segmentation_id = 0 - - start_pose = gymapi.Transform() - asset_file = self.cfg["env"]["asset"]["assetFileName"] - char_h = 0.89 - - start_pose.p = gymapi.Vec3(*get_axis_params(char_h, self.up_axis_idx)) - start_pose.r = gymapi.Quat(0.0, 0.0, 0.0, 1.0) - - humanoid_handle = self.gym.create_actor(env_ptr, humanoid_asset, start_pose, "humanoid", col_group, col_filter, segmentation_id) - - self.gym.enable_actor_dof_force_sensors(env_ptr, humanoid_handle) - - for j in range(self.num_bodies): - self.gym.set_rigid_body_color(env_ptr, humanoid_handle, j, gymapi.MESH_VISUAL, gymapi.Vec3(0.54, 0.85, 0.2)) - - if (self._pd_control): - dof_prop = self.gym.get_asset_dof_properties(humanoid_asset) - dof_prop["driveMode"] = gymapi.DOF_MODE_POS - self.gym.set_actor_dof_properties(env_ptr, humanoid_handle, dof_prop) - - self.humanoid_handles.append(humanoid_handle) - - return - - def _build_pd_action_offset_scale(self): - num_joints = len(self._dof_offsets) - 1 - - lim_low = self.dof_limits_lower.cpu().numpy() - lim_high = self.dof_limits_upper.cpu().numpy() - - for j in range(num_joints): - dof_offset = self._dof_offsets[j] - dof_size = self._dof_offsets[j + 1] - self._dof_offsets[j] - - if (dof_size == 3): - curr_low = lim_low[dof_offset:(dof_offset + dof_size)] - curr_high = lim_high[dof_offset:(dof_offset + dof_size)] - curr_low = np.max(np.abs(curr_low)) - curr_high = np.max(np.abs(curr_high)) - curr_scale = max([curr_low, curr_high]) - curr_scale = 1.2 * curr_scale - curr_scale = min([curr_scale, np.pi]) - - lim_low[dof_offset:(dof_offset + dof_size)] = -curr_scale - lim_high[dof_offset:(dof_offset + dof_size)] = curr_scale - - #lim_low[dof_offset:(dof_offset + dof_size)] = -np.pi - #lim_high[dof_offset:(dof_offset + dof_size)] = np.pi - - - elif (dof_size == 1): - curr_low = lim_low[dof_offset] - curr_high = lim_high[dof_offset] - curr_mid = 0.5 * (curr_high + curr_low) - - # extend the action range to be a bit beyond the joint limits so that the motors - # don't lose their strength as they approach the joint limits - curr_scale = 0.7 * (curr_high - curr_low) - curr_low = curr_mid - curr_scale - curr_high = curr_mid + curr_scale - - lim_low[dof_offset] = curr_low - lim_high[dof_offset] = curr_high - - self._pd_action_offset = 0.5 * (lim_high + lim_low) - self._pd_action_scale = 0.5 * (lim_high - lim_low) - self._pd_action_offset = to_torch(self._pd_action_offset, device=self.device) - self._pd_action_scale = to_torch(self._pd_action_scale, device=self.device) - - return - - def _get_humanoid_collision_filter(self): - return 0 - - def _compute_reward(self, actions): - self.rew_buf[:] = compute_humanoid_reward(self.obs_buf) - return - - def _compute_reset(self): - self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf, - self._contact_forces, self._contact_body_ids, - self._rigid_body_pos, self.max_episode_length, - self._enable_early_termination, self._termination_heights) - return - - def _refresh_sim_tensors(self): - self.gym.refresh_dof_state_tensor(self.sim) - self.gym.refresh_actor_root_state_tensor(self.sim) - self.gym.refresh_rigid_body_state_tensor(self.sim) - - self.gym.refresh_force_sensor_tensor(self.sim) - self.gym.refresh_dof_force_tensor(self.sim) - self.gym.refresh_net_contact_force_tensor(self.sim) - return - - def _compute_observations(self, env_ids=None): - obs = self._compute_humanoid_obs(env_ids) - - if (env_ids is None): - self.obs_buf[:] = obs - else: - self.obs_buf[env_ids] = obs - - return - - def _compute_humanoid_obs(self, env_ids=None): - if (env_ids is None): - body_pos = self._rigid_body_pos - body_rot = self._rigid_body_rot - body_vel = self._rigid_body_vel - body_ang_vel = self._rigid_body_ang_vel - else: - body_pos = self._rigid_body_pos[env_ids] - body_rot = self._rigid_body_rot[env_ids] - body_vel = self._rigid_body_vel[env_ids] - body_ang_vel = self._rigid_body_ang_vel[env_ids] - - obs = compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, self._local_root_obs, - self._root_height_obs) - return obs - - def _reset_actors(self, env_ids): - self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids] - self._dof_pos[env_ids] = self._initial_dof_pos[env_ids] - self._dof_vel[env_ids] = self._initial_dof_vel[env_ids] - return - - def pre_physics_step(self, actions): - self.actions = actions.to(self.device).clone() - if (self._pd_control): - pd_tar = self._action_to_pd_targets(self.actions) - pd_tar_tensor = gymtorch.unwrap_tensor(pd_tar) - self.gym.set_dof_position_target_tensor(self.sim, pd_tar_tensor) - else: - forces = self.actions * self.motor_efforts.unsqueeze(0) * self.power_scale - force_tensor = gymtorch.unwrap_tensor(forces) - self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor) - - return - - def post_physics_step(self): - self.progress_buf += 1 - - self._refresh_sim_tensors() - self._compute_observations() - self._compute_reward(self.actions) - self._compute_reset() - - self.extras["terminate"] = self._terminate_buf - - # debug viz - if self.viewer and self.debug_viz: - self._update_debug_viz() - - return - - def render(self, sync_frame_time=False): - if self.viewer: - self._update_camera() - - super().render(sync_frame_time) - return - - def _build_key_body_ids_tensor(self, key_body_names): - env_ptr = self.envs[0] - actor_handle = self.humanoid_handles[0] - body_ids = [] - - for body_name in key_body_names: - body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) - assert(body_id != -1) - body_ids.append(body_id) - - body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) - return body_ids - - def _build_contact_body_ids_tensor(self, contact_body_names): - env_ptr = self.envs[0] - actor_handle = self.humanoid_handles[0] - body_ids = [] - - for body_name in contact_body_names: - body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) - assert(body_id != -1) - body_ids.append(body_id) - - body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) - return body_ids - - def _action_to_pd_targets(self, action): - pd_tar = self._pd_action_offset + self._pd_action_scale * action - return pd_tar - - def _init_camera(self): - self.gym.refresh_actor_root_state_tensor(self.sim) - self._cam_prev_char_pos = self._humanoid_root_states[0, 0:3].cpu().numpy() - - cam_pos = gymapi.Vec3(self._cam_prev_char_pos[0], - self._cam_prev_char_pos[1] - 3.0, - 1.0) - cam_target = gymapi.Vec3(self._cam_prev_char_pos[0], - self._cam_prev_char_pos[1], - 1.0) - self.gym.viewer_camera_look_at(self.viewer, None, cam_pos, cam_target) - return - - def _update_camera(self): - self.gym.refresh_actor_root_state_tensor(self.sim) - char_root_pos = self._humanoid_root_states[0, 0:3].cpu().numpy() - - cam_trans = self.gym.get_viewer_camera_transform(self.viewer, None) - cam_pos = np.array([cam_trans.p.x, cam_trans.p.y, cam_trans.p.z]) - cam_delta = cam_pos - self._cam_prev_char_pos - - new_cam_target = gymapi.Vec3(char_root_pos[0], char_root_pos[1], 1.0) - new_cam_pos = gymapi.Vec3(char_root_pos[0] + cam_delta[0], - char_root_pos[1] + cam_delta[1], - cam_pos[2]) - - self.gym.viewer_camera_look_at(self.viewer, None, new_cam_pos, new_cam_target) - - self._cam_prev_char_pos[:] = char_root_pos - return - - def _update_debug_viz(self): - self.gym.clear_lines(self.viewer) - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def dof_to_obs(pose, dof_obs_size, dof_offsets): - # type: (Tensor, int, List[int]) -> Tensor - joint_obs_size = 6 - num_joints = len(dof_offsets) - 1 - - dof_obs_shape = pose.shape[:-1] + (dof_obs_size,) - dof_obs = torch.zeros(dof_obs_shape, device=pose.device) - dof_obs_offset = 0 - - for j in range(num_joints): - dof_offset = dof_offsets[j] - dof_size = dof_offsets[j + 1] - dof_offsets[j] - joint_pose = pose[:, dof_offset:(dof_offset + dof_size)] - - # assume this is a spherical joint - if (dof_size == 3): - joint_pose_q = torch_utils.exp_map_to_quat(joint_pose) - elif (dof_size == 1): - axis = torch.tensor([0.0, 1.0, 0.0], dtype=joint_pose.dtype, device=pose.device) - joint_pose_q = quat_from_angle_axis(joint_pose[..., 0], axis) - else: - joint_pose_q = None - assert(False), "Unsupported joint type" - - joint_dof_obs = torch_utils.quat_to_tan_norm(joint_pose_q) - dof_obs[:, (j * joint_obs_size):((j + 1) * joint_obs_size)] = joint_dof_obs - - assert((num_joints * joint_obs_size) == dof_obs_size) - - return dof_obs - -@torch.jit.script -def compute_humanoid_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, - local_root_obs, root_height_obs, dof_obs_size, dof_offsets): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor - root_h = root_pos[:, 2:3] - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - if (local_root_obs): - root_rot_obs = quat_mul(heading_rot, root_rot) - else: - root_rot_obs = root_rot - root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs) - - if (not root_height_obs): - root_h_obs = torch.zeros_like(root_h) - else: - root_h_obs = root_h - - local_root_vel = quat_rotate(heading_rot, root_vel) - local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel) - - root_pos_expand = root_pos.unsqueeze(-2) - local_key_body_pos = key_body_pos - root_pos_expand - - heading_rot_expand = heading_rot.unsqueeze(-2) - heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1)) - flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2]) - flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], - heading_rot_expand.shape[2]) - local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos) - flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2]) - - dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets) - - obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1) - return obs - -@torch.jit.script -def compute_humanoid_observations_max(body_pos, body_rot, body_vel, body_ang_vel, local_root_obs, root_height_obs): - # type: (Tensor, Tensor, Tensor, Tensor, bool, bool) -> Tensor - root_pos = body_pos[:, 0, :] - root_rot = body_rot[:, 0, :] - - root_h = root_pos[:, 2:3] - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - if (not root_height_obs): - root_h_obs = torch.zeros_like(root_h) - else: - root_h_obs = root_h - - heading_rot_expand = heading_rot.unsqueeze(-2) - heading_rot_expand = heading_rot_expand.repeat((1, body_pos.shape[1], 1)) - flat_heading_rot = heading_rot_expand.reshape(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], - heading_rot_expand.shape[2]) - - root_pos_expand = root_pos.unsqueeze(-2) - local_body_pos = body_pos - root_pos_expand - flat_local_body_pos = local_body_pos.reshape(local_body_pos.shape[0] * local_body_pos.shape[1], local_body_pos.shape[2]) - flat_local_body_pos = quat_rotate(flat_heading_rot, flat_local_body_pos) - local_body_pos = flat_local_body_pos.reshape(local_body_pos.shape[0], local_body_pos.shape[1] * local_body_pos.shape[2]) - local_body_pos = local_body_pos[..., 3:] # remove root pos - - flat_body_rot = body_rot.reshape(body_rot.shape[0] * body_rot.shape[1], body_rot.shape[2]) - flat_local_body_rot = quat_mul(flat_heading_rot, flat_body_rot) - flat_local_body_rot_obs = torch_utils.quat_to_tan_norm(flat_local_body_rot) - local_body_rot_obs = flat_local_body_rot_obs.reshape(body_rot.shape[0], body_rot.shape[1] * flat_local_body_rot_obs.shape[1]) - - if (local_root_obs): - root_rot_obs = torch_utils.quat_to_tan_norm(root_rot) - local_body_rot_obs[..., 0:6] = root_rot_obs - - flat_body_vel = body_vel.reshape(body_vel.shape[0] * body_vel.shape[1], body_vel.shape[2]) - flat_local_body_vel = quat_rotate(flat_heading_rot, flat_body_vel) - local_body_vel = flat_local_body_vel.reshape(body_vel.shape[0], body_vel.shape[1] * body_vel.shape[2]) - - flat_body_ang_vel = body_ang_vel.reshape(body_ang_vel.shape[0] * body_ang_vel.shape[1], body_ang_vel.shape[2]) - flat_local_body_ang_vel = quat_rotate(flat_heading_rot, flat_body_ang_vel) - local_body_ang_vel = flat_local_body_ang_vel.reshape(body_ang_vel.shape[0], body_ang_vel.shape[1] * body_ang_vel.shape[2]) - - obs = torch.cat((root_h_obs, local_body_pos, local_body_rot_obs, local_body_vel, local_body_ang_vel), dim=-1) - return obs - - -@torch.jit.script -def compute_humanoid_reward(obs_buf): - # type: (Tensor) -> Tensor - reward = torch.ones_like(obs_buf[:, 0]) - return reward - -@torch.jit.script -def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos, - max_episode_length, enable_early_termination, termination_heights): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor] - terminated = torch.zeros_like(reset_buf) - - if (enable_early_termination): - masked_contact_buf = contact_buf.clone() - masked_contact_buf[:, contact_body_ids, :] = 0 - fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1) - fall_contact = torch.any(fall_contact, dim=-1) - - body_height = rigid_body_pos[..., 2] - fall_height = body_height < termination_heights - fall_height[:, contact_body_ids] = False - fall_height = torch.any(fall_height, dim=-1) - - has_fallen = torch.logical_and(fall_contact, fall_height) - - # first timestep can sometimes still have nonzero contact forces - # so only check after first couple of steps - has_fallen *= (progress_buf > 1) - terminated = torch.where(has_fallen, torch.ones_like(reset_buf), terminated) - - reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated) - - return reset, terminated diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py deleted file mode 100644 index 7022bf76..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp.py +++ /dev/null @@ -1,344 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from enum import Enum -import numpy as np -import torch - -from isaacgym import gymapi -from isaacgym import gymtorch - -from tasks.humanoid import Humanoid, dof_to_obs -from utils import gym_util -from utils.motion_lib import MotionLib -from isaacgym.torch_utils import * - -from utils import torch_utils - -class HumanoidAMP(Humanoid): - class StateInit(Enum): - Default = 0 - Start = 1 - Random = 2 - Hybrid = 3 - - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - state_init = cfg["env"]["stateInit"] - self._state_init = HumanoidAMP.StateInit[state_init] - self._hybrid_init_prob = cfg["env"]["hybridInitProb"] - self._num_amp_obs_steps = cfg["env"]["numAMPObsSteps"] - assert(self._num_amp_obs_steps >= 2) - - self._reset_default_env_ids = [] - self._reset_ref_env_ids = [] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - motion_file = cfg['env']['motion_file'] - self._load_motion(motion_file) - - self._amp_obs_buf = torch.zeros((self.num_envs, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float) - self._curr_amp_obs_buf = self._amp_obs_buf[:, 0] - self._hist_amp_obs_buf = self._amp_obs_buf[:, 1:] - - self._amp_obs_demo_buf = None - - return - - def post_physics_step(self): - super().post_physics_step() - - self._update_hist_amp_obs() - self._compute_amp_observations() - - amp_obs_flat = self._amp_obs_buf.view(-1, self.get_num_amp_obs()) - self.extras["amp_obs"] = amp_obs_flat - - return - - def get_num_amp_obs(self): - return self._num_amp_obs_steps * self._num_amp_obs_per_step - - def fetch_amp_obs_demo(self, num_samples): - - if (self._amp_obs_demo_buf is None): - self._build_amp_obs_demo_buf(num_samples) - else: - assert(self._amp_obs_demo_buf.shape[0] == num_samples) - - motion_ids = self._motion_lib.sample_motions(num_samples) - - # since negative times are added to these values in build_amp_obs_demo, - # we shift them into the range [0 + truncate_time, end of clip] - truncate_time = self.dt * (self._num_amp_obs_steps - 1) - motion_times0 = self._motion_lib.sample_time(motion_ids, truncate_time=truncate_time) - motion_times0 += truncate_time - - amp_obs_demo = self.build_amp_obs_demo(motion_ids, motion_times0) - self._amp_obs_demo_buf[:] = amp_obs_demo.view(self._amp_obs_demo_buf.shape) - amp_obs_demo_flat = self._amp_obs_demo_buf.view(-1, self.get_num_amp_obs()) - - return amp_obs_demo_flat - - def build_amp_obs_demo(self, motion_ids, motion_times0): - dt = self.dt - - motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps]) - motion_times = motion_times0.unsqueeze(-1) - time_steps = -dt * torch.arange(0, self._num_amp_obs_steps, device=self.device) - motion_times = motion_times + time_steps - - motion_ids = motion_ids.view(-1) - motion_times = motion_times.view(-1) - root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ - = self._motion_lib.get_motion_state(motion_ids, motion_times) - amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, - dof_pos, dof_vel, key_pos, - self._local_root_obs, self._root_height_obs, - self._dof_obs_size, self._dof_offsets) - return amp_obs_demo - - def _build_amp_obs_demo_buf(self, num_samples): - self._amp_obs_demo_buf = torch.zeros((num_samples, self._num_amp_obs_steps, self._num_amp_obs_per_step), device=self.device, dtype=torch.float32) - return - - def _setup_character_props(self, key_bodies): - super()._setup_character_props(key_bodies) - - asset_file = self.cfg["env"]["asset"]["assetFileName"] - num_key_bodies = len(key_bodies) - - if (asset_file == "mjcf/amp_humanoid.xml"): - self._num_amp_obs_per_step = 13 + self._dof_obs_size + 28 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos] - elif (asset_file == "mjcf/amp_humanoid_sword_shield.xml"): - self._num_amp_obs_per_step = 13 + self._dof_obs_size + 31 + 3 * num_key_bodies # [root_h, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos] - else: - print("Unsupported character config file: {s}".format(asset_file)) - assert(False) - - return - - def _load_motion(self, motion_file): - assert(self._dof_offsets[-1] == self.num_dof) - self._motion_lib = MotionLib(motion_file=motion_file, - dof_body_ids=self._dof_body_ids, - dof_offsets=self._dof_offsets, - key_body_ids=self._key_body_ids.cpu().numpy(), - device=self.device) - return - - def _reset_envs(self, env_ids): - self._reset_default_env_ids = [] - self._reset_ref_env_ids = [] - - super()._reset_envs(env_ids) - self._init_amp_obs(env_ids) - - return - - def _reset_actors(self, env_ids): - if (self._state_init == HumanoidAMP.StateInit.Default): - self._reset_default(env_ids) - elif (self._state_init == HumanoidAMP.StateInit.Start - or self._state_init == HumanoidAMP.StateInit.Random): - self._reset_ref_state_init(env_ids) - elif (self._state_init == HumanoidAMP.StateInit.Hybrid): - self._reset_hybrid_state_init(env_ids) - else: - assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init)) - return - - def _reset_default(self, env_ids): - self._humanoid_root_states[env_ids] = self._initial_humanoid_root_states[env_ids] - self._dof_pos[env_ids] = self._initial_dof_pos[env_ids] - self._dof_vel[env_ids] = self._initial_dof_vel[env_ids] - self._reset_default_env_ids = env_ids - return - - def _reset_ref_state_init(self, env_ids): - num_envs = env_ids.shape[0] - motion_ids = self._motion_lib.sample_motions(num_envs) - - if (self._state_init == HumanoidAMP.StateInit.Random - or self._state_init == HumanoidAMP.StateInit.Hybrid): - motion_times = self._motion_lib.sample_time(motion_ids) - elif (self._state_init == HumanoidAMP.StateInit.Start): - motion_times = torch.zeros(num_envs, device=self.device) - else: - assert(False), "Unsupported state initialization strategy: {:s}".format(str(self._state_init)) - - root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ - = self._motion_lib.get_motion_state(motion_ids, motion_times) - - self._set_env_state(env_ids=env_ids, - root_pos=root_pos, - root_rot=root_rot, - dof_pos=dof_pos, - root_vel=root_vel, - root_ang_vel=root_ang_vel, - dof_vel=dof_vel) - - self._reset_ref_env_ids = env_ids - self._reset_ref_motion_ids = motion_ids - self._reset_ref_motion_times = motion_times - return - - def _reset_hybrid_state_init(self, env_ids): - num_envs = env_ids.shape[0] - ref_probs = to_torch(np.array([self._hybrid_init_prob] * num_envs), device=self.device) - ref_init_mask = torch.bernoulli(ref_probs) == 1.0 - - ref_reset_ids = env_ids[ref_init_mask] - if (len(ref_reset_ids) > 0): - self._reset_ref_state_init(ref_reset_ids) - - default_reset_ids = env_ids[torch.logical_not(ref_init_mask)] - if (len(default_reset_ids) > 0): - self._reset_default(default_reset_ids) - - return - - def _init_amp_obs(self, env_ids): - self._compute_amp_observations(env_ids) - - if (len(self._reset_default_env_ids) > 0): - self._init_amp_obs_default(self._reset_default_env_ids) - - if (len(self._reset_ref_env_ids) > 0): - self._init_amp_obs_ref(self._reset_ref_env_ids, self._reset_ref_motion_ids, - self._reset_ref_motion_times) - - return - - def _init_amp_obs_default(self, env_ids): - curr_amp_obs = self._curr_amp_obs_buf[env_ids].unsqueeze(-2) - self._hist_amp_obs_buf[env_ids] = curr_amp_obs - return - - def _init_amp_obs_ref(self, env_ids, motion_ids, motion_times): - dt = self.dt - motion_ids = torch.tile(motion_ids.unsqueeze(-1), [1, self._num_amp_obs_steps - 1]) - motion_times = motion_times.unsqueeze(-1) - time_steps = -dt * (torch.arange(0, self._num_amp_obs_steps - 1, device=self.device) + 1) - motion_times = motion_times + time_steps - - motion_ids = motion_ids.view(-1) - motion_times = motion_times.view(-1) - root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ - = self._motion_lib.get_motion_state(motion_ids, motion_times) - amp_obs_demo = build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, - dof_pos, dof_vel, key_pos, - self._local_root_obs, self._root_height_obs, - self._dof_obs_size, self._dof_offsets) - self._hist_amp_obs_buf[env_ids] = amp_obs_demo.view(self._hist_amp_obs_buf[env_ids].shape) - return - - def _set_env_state(self, env_ids, root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel): - self._humanoid_root_states[env_ids, 0:3] = root_pos - self._humanoid_root_states[env_ids, 3:7] = root_rot - self._humanoid_root_states[env_ids, 7:10] = root_vel - self._humanoid_root_states[env_ids, 10:13] = root_ang_vel - - self._dof_pos[env_ids] = dof_pos - self._dof_vel[env_ids] = dof_vel - return - - def _update_hist_amp_obs(self, env_ids=None): - if (env_ids is None): - for i in reversed(range(self._amp_obs_buf.shape[1] - 1)): - self._amp_obs_buf[:, i + 1] = self._amp_obs_buf[:, i] - else: - for i in reversed(range(self._amp_obs_buf.shape[1] - 1)): - self._amp_obs_buf[env_ids, i + 1] = self._amp_obs_buf[env_ids, i] - return - - def _compute_amp_observations(self, env_ids=None): - key_body_pos = self._rigid_body_pos[:, self._key_body_ids, :] - if (env_ids is None): - self._curr_amp_obs_buf[:] = build_amp_observations(self._rigid_body_pos[:, 0, :], - self._rigid_body_rot[:, 0, :], - self._rigid_body_vel[:, 0, :], - self._rigid_body_ang_vel[:, 0, :], - self._dof_pos, self._dof_vel, key_body_pos, - self._local_root_obs, self._root_height_obs, - self._dof_obs_size, self._dof_offsets) - else: - self._curr_amp_obs_buf[env_ids] = build_amp_observations(self._rigid_body_pos[env_ids][:, 0, :], - self._rigid_body_rot[env_ids][:, 0, :], - self._rigid_body_vel[env_ids][:, 0, :], - self._rigid_body_ang_vel[env_ids][:, 0, :], - self._dof_pos[env_ids], self._dof_vel[env_ids], key_body_pos[env_ids], - self._local_root_obs, self._root_height_obs, - self._dof_obs_size, self._dof_offsets) - return - - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def build_amp_observations(root_pos, root_rot, root_vel, root_ang_vel, dof_pos, dof_vel, key_body_pos, - local_root_obs, root_height_obs, dof_obs_size, dof_offsets): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, bool, bool, int, List[int]) -> Tensor - root_h = root_pos[:, 2:3] - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - if (local_root_obs): - root_rot_obs = quat_mul(heading_rot, root_rot) - else: - root_rot_obs = root_rot - root_rot_obs = torch_utils.quat_to_tan_norm(root_rot_obs) - - if (not root_height_obs): - root_h_obs = torch.zeros_like(root_h) - else: - root_h_obs = root_h - - local_root_vel = quat_rotate(heading_rot, root_vel) - local_root_ang_vel = quat_rotate(heading_rot, root_ang_vel) - - root_pos_expand = root_pos.unsqueeze(-2) - local_key_body_pos = key_body_pos - root_pos_expand - - heading_rot_expand = heading_rot.unsqueeze(-2) - heading_rot_expand = heading_rot_expand.repeat((1, local_key_body_pos.shape[1], 1)) - flat_end_pos = local_key_body_pos.view(local_key_body_pos.shape[0] * local_key_body_pos.shape[1], local_key_body_pos.shape[2]) - flat_heading_rot = heading_rot_expand.view(heading_rot_expand.shape[0] * heading_rot_expand.shape[1], - heading_rot_expand.shape[2]) - local_end_pos = quat_rotate(flat_heading_rot, flat_end_pos) - flat_local_key_pos = local_end_pos.view(local_key_body_pos.shape[0], local_key_body_pos.shape[1] * local_key_body_pos.shape[2]) - - dof_obs = dof_to_obs(dof_pos, dof_obs_size, dof_offsets) - obs = torch.cat((root_h_obs, root_rot_obs, local_root_vel, local_root_ang_vel, dof_obs, dof_vel, flat_local_key_pos), dim=-1) - return obs diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py deleted file mode 100644 index 1091a801..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_getup.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from isaacgym import gymapi -from isaacgym import gymtorch - -from tasks.humanoid_amp import HumanoidAMP -from isaacgym.torch_utils import * - -from utils import torch_utils -from utils import gym_util - - -class HumanoidAMPGetup(HumanoidAMP): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - - self._recovery_episode_prob = cfg["env"]["recoveryEpisodeProb"] - self._recovery_steps = cfg["env"]["recoverySteps"] - self._fall_init_prob = cfg["env"]["fallInitProb"] - - self._reset_fall_env_ids = [] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._recovery_counter = torch.zeros(self.num_envs, device=self.device, dtype=torch.int) - - self._generate_fall_states() - - return - - - def pre_physics_step(self, actions): - super().pre_physics_step(actions) - - self._update_recovery_count() - return - - def _generate_fall_states(self): - max_steps = 150 - - env_ids = to_torch(np.arange(self.num_envs), device=self.device, dtype=torch.long) - root_states = self._initial_humanoid_root_states[env_ids].clone() - root_states[..., 3:7] = torch.randn_like(root_states[..., 3:7]) - root_states[..., 3:7] = torch.nn.functional.normalize(root_states[..., 3:7], dim=-1) - self._humanoid_root_states[env_ids] = root_states - - env_ids_int32 = self._humanoid_actor_ids[env_ids] - self.gym.set_actor_root_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - self.gym.set_dof_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._dof_state), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - - - rand_actions = np.random.uniform(-0.5, 0.5, size=[self.num_envs, self.get_action_size()]) - rand_actions = to_torch(rand_actions, device=self.device) - self.pre_physics_step(rand_actions) - - # step physics and render each frame - for i in range(max_steps): - self.render() - self.gym.simulate(self.sim) - - self._refresh_sim_tensors() - - self._fall_root_states = self._humanoid_root_states.clone() - self._fall_root_states[:, 7:13] = 0 - self._fall_dof_pos = self._dof_pos.clone() - self._fall_dof_vel = torch.zeros_like(self._dof_vel, device=self.device, dtype=torch.float) - - return - - def _reset_actors(self, env_ids): - num_envs = env_ids.shape[0] - recovery_probs = to_torch(np.array([self._recovery_episode_prob] * num_envs), device=self.device) - recovery_mask = torch.bernoulli(recovery_probs) == 1.0 - terminated_mask = (self._terminate_buf[env_ids] == 1) - recovery_mask = torch.logical_and(recovery_mask, terminated_mask) - - recovery_ids = env_ids[recovery_mask] - if (len(recovery_ids) > 0): - self._reset_recovery_episode(recovery_ids) - - - nonrecovery_ids = env_ids[torch.logical_not(recovery_mask)] - fall_probs = to_torch(np.array([self._fall_init_prob] * nonrecovery_ids.shape[0]), device=self.device) - fall_mask = torch.bernoulli(fall_probs) == 1.0 - fall_ids = nonrecovery_ids[fall_mask] - if (len(fall_ids) > 0): - self._reset_fall_episode(fall_ids) - - - nonfall_ids = nonrecovery_ids[torch.logical_not(fall_mask)] - if (len(nonfall_ids) > 0): - super()._reset_actors(nonfall_ids) - self._recovery_counter[nonfall_ids] = 0 - - return - - def _reset_recovery_episode(self, env_ids): - self._recovery_counter[env_ids] = self._recovery_steps - return - - def _reset_fall_episode(self, env_ids): - fall_state_ids = torch.randint_like(env_ids, low=0, high=self._fall_root_states.shape[0]) - self._humanoid_root_states[env_ids] = self._fall_root_states[fall_state_ids] - self._dof_pos[env_ids] = self._fall_dof_pos[fall_state_ids] - self._dof_vel[env_ids] = self._fall_dof_vel[fall_state_ids] - self._recovery_counter[env_ids] = self._recovery_steps - self._reset_fall_env_ids = env_ids - return - - def _reset_envs(self, env_ids): - self._reset_fall_env_ids = [] - super()._reset_envs(env_ids) - return - - def _init_amp_obs(self, env_ids): - super()._init_amp_obs(env_ids) - - if (len(self._reset_fall_env_ids) > 0): - self._init_amp_obs_default(self._reset_fall_env_ids) - - return - - def _update_recovery_count(self): - self._recovery_counter -= 1 - self._recovery_counter = torch.clamp_min(self._recovery_counter, 0) - return - - def _compute_reset(self): - super()._compute_reset() - - is_recovery = self._recovery_counter > 0 - self.reset_buf[is_recovery] = 0 - self._terminate_buf[is_recovery] = 0 - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py deleted file mode 100644 index 6f266e4c..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_amp_task.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -import tasks.humanoid_amp as humanoid_amp - -class HumanoidAMPTask(humanoid_amp.HumanoidAMP): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - self._enable_task_obs = cfg["env"]["enableTaskObs"] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - return - - - def get_obs_size(self): - obs_size = super().get_obs_size() - if (self._enable_task_obs): - task_obs_size = self.get_task_obs_size() - obs_size += task_obs_size - return obs_size - - def get_task_obs_size(self): - return 0 - - def pre_physics_step(self, actions): - super().pre_physics_step(actions) - self._update_task() - return - - def render(self, sync_frame_time=False): - super().render(sync_frame_time) - - if self.viewer: - self._draw_task() - return - - def _update_task(self): - return - - def _reset_envs(self, env_ids): - super()._reset_envs(env_ids) - self._reset_task(env_ids) - return - - def _reset_task(self, env_ids): - return - - def _compute_observations(self, env_ids=None): - humanoid_obs = self._compute_humanoid_obs(env_ids) - - if (self._enable_task_obs): - task_obs = self._compute_task_obs(env_ids) - obs = torch.cat([humanoid_obs, task_obs], dim=-1) - else: - obs = humanoid_obs - - if (env_ids is None): - self.obs_buf[:] = obs - else: - self.obs_buf[env_ids] = obs - return - - def _compute_task_obs(self, env_ids=None): - return NotImplemented - - def _compute_reward(self, actions): - return NotImplemented - - def _draw_task(self): - return \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py deleted file mode 100644 index 0b9d420f..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_heading.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -import tasks.humanoid as humanoid -import tasks.humanoid_amp as humanoid_amp -import tasks.humanoid_amp_task as humanoid_amp_task -from utils import torch_utils - -from isaacgym import gymapi -from isaacgym import gymtorch -from isaacgym.torch_utils import * - -TAR_ACTOR_ID = 1 -TAR_FACING_ACTOR_ID = 2 - -class HumanoidHeading(humanoid_amp_task.HumanoidAMPTask): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - self._tar_speed_min = cfg["env"]["tarSpeedMin"] - self._tar_speed_max = cfg["env"]["tarSpeedMax"] - self._heading_change_steps_min = cfg["env"]["headingChangeStepsMin"] - self._heading_change_steps_max = cfg["env"]["headingChangeStepsMax"] - self._enable_rand_heading = cfg["env"]["enableRandHeading"] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._heading_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64) - self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) - self._tar_speed = torch.ones([self.num_envs], device=self.device, dtype=torch.float) - self._tar_dir = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float) - self._tar_dir[..., 0] = 1.0 - - self._tar_facing_dir = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float) - self._tar_facing_dir[..., 0] = 1.0 - - if (not self.headless): - self._build_marker_state_tensors() - - return - - def get_task_obs_size(self): - obs_size = 0 - if (self._enable_task_obs): - obs_size = 5 - return obs_size - - def pre_physics_step(self, actions): - super().pre_physics_step(actions) - self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3] - return - - def _update_marker(self): - humanoid_root_pos = self._humanoid_root_states[..., 0:3] - self._marker_pos[..., 0:2] = humanoid_root_pos[..., 0:2] + self._tar_dir - self._marker_pos[..., 2] = 0.0 - - heading_theta = torch.atan2(self._tar_dir[..., 1], self._tar_dir[..., 0]) - heading_axis = torch.zeros_like(self._marker_pos) - heading_axis[..., -1] = 1.0 - heading_q = quat_from_angle_axis(heading_theta, heading_axis) - self._marker_rot[:] = heading_q - - self._face_marker_pos[..., 0:2] = humanoid_root_pos[..., 0:2] + self._tar_facing_dir - self._face_marker_pos[..., 2] = 0.0 - - face_theta = torch.atan2(self._tar_facing_dir[..., 1], self._tar_facing_dir[..., 0]) - face_axis = torch.zeros_like(self._marker_pos) - face_axis[..., -1] = 1.0 - face_q = quat_from_angle_axis(face_theta, heading_axis) - self._face_marker_rot[:] = face_q - - marker_ids = torch.cat([self._marker_actor_ids, self._face_marker_actor_ids], dim=0) - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(marker_ids), len(marker_ids)) - return - - def _create_envs(self, num_envs, spacing, num_per_row): - if (not self.headless): - self._marker_handles = [] - self._face_marker_handles = [] - self._load_marker_asset() - - super()._create_envs(num_envs, spacing, num_per_row) - return - - def _load_marker_asset(self): - asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets/mjcf" - asset_file = "heading_marker.urdf" - - asset_options = gymapi.AssetOptions() - asset_options.angular_damping = 0.01 - asset_options.linear_damping = 0.01 - asset_options.max_angular_velocity = 100.0 - asset_options.density = 1.0 - asset_options.fix_base_link = True - asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - - self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) - - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - super()._build_env(env_id, env_ptr, humanoid_asset) - - if (not self.headless): - self._build_marker(env_id, env_ptr) - - return - - def _build_marker(self, env_id, env_ptr): - col_group = env_id - col_filter = 2 - segmentation_id = 0 - - default_pose = gymapi.Transform() - default_pose.p.x = 1.0 - default_pose.p.z = 0.0 - - marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id) - self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0)) - self._marker_handles.append(marker_handle) - - face_marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "face_marker", col_group, col_filter, segmentation_id) - self.gym.set_rigid_body_color(env_ptr, face_marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.0, 0.0, 0.8)) - self._face_marker_handles.append(face_marker_handle) - - return - - def _build_marker_state_tensors(self): - num_actors = self._root_states.shape[0] // self.num_envs - - self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., TAR_ACTOR_ID, :] - self._marker_pos = self._marker_states[..., :3] - self._marker_rot = self._marker_states[..., 3:7] - self._marker_actor_ids = self._humanoid_actor_ids + TAR_ACTOR_ID - - self._face_marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., TAR_FACING_ACTOR_ID, :] - self._face_marker_pos = self._face_marker_states[..., :3] - self._face_marker_rot = self._face_marker_states[..., 3:7] - self._face_marker_actor_ids = self._humanoid_actor_ids + TAR_FACING_ACTOR_ID - - return - - def _update_task(self): - reset_task_mask = self.progress_buf >= self._heading_change_steps - rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten() - if len(rest_env_ids) > 0: - self._reset_task(rest_env_ids) - return - - def _reset_task(self, env_ids): - n = len(env_ids) - if (self._enable_rand_heading): - rand_theta = 2 * np.pi * torch.rand(n, device=self.device) - np.pi - rand_face_theta = 2 * np.pi * torch.rand(n, device=self.device) - np.pi - else: - rand_theta = torch.zeros(n, device=self.device) - rand_face_theta = torch.zeros(n, device=self.device) - - tar_dir = torch.stack([torch.cos(rand_theta), torch.sin(rand_theta)], dim=-1) - tar_speed = (self._tar_speed_max - self._tar_speed_min) * torch.rand(n, device=self.device) + self._tar_speed_min - change_steps = torch.randint(low=self._heading_change_steps_min, high=self._heading_change_steps_max, - size=(n,), device=self.device, dtype=torch.int64) - - face_tar_dir = torch.stack([torch.cos(rand_face_theta), torch.sin(rand_face_theta)], dim=-1) - - self._tar_speed[env_ids] = tar_speed - self._tar_dir[env_ids] = tar_dir - self._tar_facing_dir[env_ids] = face_tar_dir - self._heading_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps - return - - def _compute_task_obs(self, env_ids=None): - if (env_ids is None): - root_states = self._humanoid_root_states - tar_dir = self._tar_dir - tar_speed = self._tar_speed - tar_face_dir = self._tar_facing_dir - else: - root_states = self._humanoid_root_states[env_ids] - tar_dir = self._tar_dir[env_ids] - tar_speed = self._tar_speed[env_ids] - tar_face_dir = self._tar_facing_dir[env_ids] - - obs = compute_heading_observations(root_states, tar_dir, tar_speed, tar_face_dir) - return obs - - def _compute_reward(self, actions): - root_pos = self._humanoid_root_states[..., 0:3] - root_rot = self._humanoid_root_states[..., 3:7] - self.rew_buf[:] = compute_heading_reward(root_pos, self._prev_root_pos, root_rot, - self._tar_dir, self._tar_speed, - self._tar_facing_dir, self.dt) - return - - def _draw_task(self): - self._update_marker() - - vel_scale = 0.2 - heading_cols = np.array([[0.0, 1.0, 0.0], - [1.0, 0.0, 0.0]], dtype=np.float32) - - self.gym.clear_lines(self.viewer) - - root_pos = self._humanoid_root_states[..., 0:3] - prev_root_pos = self._prev_root_pos - sim_vel = (root_pos - prev_root_pos) / self.dt - sim_vel[..., -1] = 0 - - starts = root_pos - tar_ends = torch.clone(starts) - tar_ends[..., 0:2] += vel_scale * self._tar_speed.unsqueeze(-1) * self._tar_dir - sim_ends = starts + vel_scale * sim_vel - - verts = torch.cat([starts, tar_ends, starts, sim_ends], dim=-1).cpu().numpy() - - for i, env_ptr in enumerate(self.envs): - curr_verts = verts[i:i+1] - curr_verts = curr_verts.reshape([2, 6]) - self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, heading_cols) - - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def compute_heading_observations(root_states, tar_dir, tar_speed, tar_face_dir): - # type: (Tensor, Tensor, Tensor, Tensor) -> Tensor - root_rot = root_states[:, 3:7] - - tar_dir3d = torch.cat([tar_dir, torch.zeros_like(tar_dir[..., 0:1])], dim=-1) - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - local_tar_dir = quat_rotate(heading_rot, tar_dir3d) - local_tar_dir = local_tar_dir[..., 0:2] - tar_speed = tar_speed.unsqueeze(-1) - - tar_face_dir3d = torch.cat([tar_face_dir, torch.zeros_like(tar_face_dir[..., 0:1])], dim=-1) - local_tar_face_dir = quat_rotate(heading_rot, tar_face_dir3d) - local_tar_face_dir = local_tar_face_dir[..., 0:2] - - obs = torch.cat([local_tar_dir, tar_speed, local_tar_face_dir], dim=-1) - return obs - -@torch.jit.script -def compute_heading_reward(root_pos, prev_root_pos, root_rot, tar_dir, tar_speed, tar_face_dir, dt): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float) -> Tensor - vel_err_scale = 0.25 - tangent_err_w = 0.1 - - dir_reward_w = 0.7 - facing_reward_w = 0.3 - - delta_root_pos = root_pos - prev_root_pos - root_vel = delta_root_pos / dt - tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1) - - tar_dir_vel = tar_dir_speed.unsqueeze(-1) * tar_dir - tangent_vel = root_vel[..., :2] - tar_dir_vel - - tangent_speed = torch.sum(tangent_vel, dim=-1) - - tar_vel_err = tar_speed - tar_dir_speed - tangent_vel_err = tangent_speed - dir_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err + - tangent_err_w * tangent_vel_err * tangent_vel_err)) - - speed_mask = tar_dir_speed <= 0 - dir_reward[speed_mask] = 0 - - heading_rot = torch_utils.calc_heading_quat(root_rot) - facing_dir = torch.zeros_like(root_pos) - facing_dir[..., 0] = 1.0 - facing_dir = quat_rotate(heading_rot, facing_dir) - facing_err = torch.sum(tar_face_dir * facing_dir[..., 0:2], dim=-1) - facing_reward = torch.clamp_min(facing_err, 0.0) - - reward = dir_reward_w * dir_reward + facing_reward_w * facing_reward - - return reward diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py deleted file mode 100644 index c203074e..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_location.py +++ /dev/null @@ -1,256 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -import tasks.humanoid as humanoid -import tasks.humanoid_amp as humanoid_amp -import tasks.humanoid_amp_task as humanoid_amp_task -from utils import torch_utils - -from isaacgym import gymapi -from isaacgym import gymtorch -from isaacgym.torch_utils import * - -class HumanoidLocation(humanoid_amp_task.HumanoidAMPTask): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - self._tar_speed = cfg["env"]["tarSpeed"] - self._tar_change_steps_min = cfg["env"]["tarChangeStepsMin"] - self._tar_change_steps_max = cfg["env"]["tarChangeStepsMax"] - self._tar_dist_max = cfg["env"]["tarDistMax"] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._tar_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64) - self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) - self._tar_pos = torch.zeros([self.num_envs, 2], device=self.device, dtype=torch.float) - - if (not self.headless): - self._build_marker_state_tensors() - - return - - def get_task_obs_size(self): - obs_size = 0 - if (self._enable_task_obs): - obs_size = 2 - return obs_size - - def pre_physics_step(self, actions): - super().pre_physics_step(actions) - self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3] - return - - def _update_marker(self): - self._marker_pos[..., 0:2] = self._tar_pos - self._marker_pos[..., 2] = 0.0 - - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(self._marker_actor_ids), len(self._marker_actor_ids)) - return - - def _create_envs(self, num_envs, spacing, num_per_row): - if (not self.headless): - self._marker_handles = [] - self._load_marker_asset() - - super()._create_envs(num_envs, spacing, num_per_row) - return - - def _load_marker_asset(self): - asset_root = "ase/data/assets/mjcf/" - asset_file = "location_marker.urdf" - - asset_options = gymapi.AssetOptions() - asset_options.angular_damping = 0.01 - asset_options.linear_damping = 0.01 - asset_options.max_angular_velocity = 100.0 - asset_options.density = 1.0 - asset_options.fix_base_link = True - asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - - self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) - - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - super()._build_env(env_id, env_ptr, humanoid_asset) - - if (not self.headless): - self._build_marker(env_id, env_ptr) - - return - - def _build_marker(self, env_id, env_ptr): - col_group = env_id - col_filter = 2 - segmentation_id = 0 - default_pose = gymapi.Transform() - - marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id) - self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0)) - self._marker_handles.append(marker_handle) - - return - - def _build_marker_state_tensors(self): - num_actors = self._root_states.shape[0] // self.num_envs - self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :] - self._marker_pos = self._marker_states[..., :3] - - self._marker_actor_ids = self._humanoid_actor_ids + 1 - - return - - def _update_task(self): - reset_task_mask = self.progress_buf >= self._tar_change_steps - rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten() - if len(rest_env_ids) > 0: - self._reset_task(rest_env_ids) - return - - def _reset_task(self, env_ids): - n = len(env_ids) - - char_root_pos = self._humanoid_root_states[env_ids, 0:2] - rand_pos = self._tar_dist_max * (2.0 * torch.rand([n, 2], device=self.device) - 1.0) - - change_steps = torch.randint(low=self._tar_change_steps_min, high=self._tar_change_steps_max, - size=(n,), device=self.device, dtype=torch.int64) - - self._tar_pos[env_ids] = char_root_pos + rand_pos - self._tar_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps - return - - def _compute_task_obs(self, env_ids=None): - if (env_ids is None): - root_states = self._humanoid_root_states - tar_pos = self._tar_pos - else: - root_states = self._humanoid_root_states[env_ids] - tar_pos = self._tar_pos[env_ids] - - obs = compute_location_observations(root_states, tar_pos) - return obs - - def _compute_reward(self, actions): - root_pos = self._humanoid_root_states[..., 0:3] - root_rot = self._humanoid_root_states[..., 3:7] - self.rew_buf[:] = compute_location_reward(root_pos, self._prev_root_pos, root_rot, - self._tar_pos, self._tar_speed, - self.dt) - return - - def _draw_task(self): - self._update_marker() - - cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32) - - self.gym.clear_lines(self.viewer) - - starts = self._humanoid_root_states[..., 0:3] - ends = self._marker_pos - - verts = torch.cat([starts, ends], dim=-1).cpu().numpy() - - for i, env_ptr in enumerate(self.envs): - curr_verts = verts[i] - curr_verts = curr_verts.reshape([1, 6]) - self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols) - - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def compute_location_observations(root_states, tar_pos): - # type: (Tensor, Tensor) -> Tensor - root_pos = root_states[:, 0:3] - root_rot = root_states[:, 3:7] - - tar_pos3d = torch.cat([tar_pos, torch.zeros_like(tar_pos[..., 0:1])], dim=-1) - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - local_tar_pos = quat_rotate(heading_rot, tar_pos3d - root_pos) - local_tar_pos = local_tar_pos[..., 0:2] - - obs = local_tar_pos - return obs - -@torch.jit.script -def compute_location_reward(root_pos, prev_root_pos, root_rot, tar_pos, tar_speed, dt): - # type: (Tensor, Tensor, Tensor, Tensor, float, float) -> Tensor - dist_threshold = 0.5 - - pos_err_scale = 0.5 - vel_err_scale = 4.0 - - pos_reward_w = 0.5 - vel_reward_w = 0.4 - face_reward_w = 0.1 - - pos_diff = tar_pos - root_pos[..., 0:2] - pos_err = torch.sum(pos_diff * pos_diff, dim=-1) - pos_reward = torch.exp(-pos_err_scale * pos_err) - - tar_dir = tar_pos - root_pos[..., 0:2] - tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1) - - - delta_root_pos = root_pos - prev_root_pos - root_vel = delta_root_pos / dt - tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1) - tar_vel_err = tar_speed - tar_dir_speed - tar_vel_err = torch.clamp_min(tar_vel_err, 0.0) - vel_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err)) - speed_mask = tar_dir_speed <= 0 - vel_reward[speed_mask] = 0 - - - heading_rot = torch_utils.calc_heading_quat(root_rot) - facing_dir = torch.zeros_like(root_pos) - facing_dir[..., 0] = 1.0 - facing_dir = quat_rotate(heading_rot, facing_dir) - facing_err = torch.sum(tar_dir * facing_dir[..., 0:2], dim=-1) - facing_reward = torch.clamp_min(facing_err, 0.0) - - - dist_mask = pos_err < dist_threshold - facing_reward[dist_mask] = 1.0 - vel_reward[dist_mask] = 1.0 - - reward = pos_reward_w * pos_reward + vel_reward_w * vel_reward + face_reward_w * facing_reward - - return reward \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py deleted file mode 100644 index 40934988..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_perturb.py +++ /dev/null @@ -1,273 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from isaacgym import gymapi, gymtorch -from isaacgym.torch_utils import * - -import tasks.humanoid_amp as humanoid_amp -import tasks.humanoid_amp_getup as humanoid_amp_getup -import tasks.humanoid_strike as humanoid_strike -import tasks.humanoid_location as humanoid_location -from utils import torch_utils - -PERTURB_OBJS = [ - ["small", 60], - ["small", 7], - ["small", 10], - ["small", 35], - ["small", 2], - ["small", 2], - ["small", 3], - ["small", 2], - ["small", 2], - ["small", 3], - ["small", 2], - ["large", 60], - ["small", 300], -] - -class HumanoidPerturb(humanoid_amp.HumanoidAMP): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._proj_dist_min = 4 - self._proj_dist_max = 5 - self._proj_h_min = 0.25 - self._proj_h_max = 2 - self._proj_steps = 150 - self._proj_warmup_steps = 1 - self._proj_speed_min = 30 - self._proj_speed_max = 40 - assert(self._proj_warmup_steps < self._proj_steps) - - self._build_proj_tensors() - self._calc_perturb_times() - - return - - def _create_envs(self, num_envs, spacing, num_per_row): - self._proj_handles = [] - self._load_proj_asset() - - super()._create_envs(num_envs, spacing, num_per_row) - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - super()._build_env(env_id, env_ptr, humanoid_asset) - self._build_proj(env_id, env_ptr) - return - - def _load_proj_asset(self): - asset_root = "ase/data/assets/mjcf/" - - small_asset_file = "block_projectile.urdf" - small_asset_options = gymapi.AssetOptions() - small_asset_options.angular_damping = 0.01 - small_asset_options.linear_damping = 0.01 - small_asset_options.max_angular_velocity = 100.0 - small_asset_options.density = 200.0 - small_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - self._small_proj_asset = self.gym.load_asset(self.sim, asset_root, small_asset_file, small_asset_options) - - large_asset_file = "block_projectile_large.urdf" - large_asset_options = gymapi.AssetOptions() - large_asset_options.angular_damping = 0.01 - large_asset_options.linear_damping = 0.01 - large_asset_options.max_angular_velocity = 100.0 - large_asset_options.density = 100.0 - large_asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - self._large_proj_asset = self.gym.load_asset(self.sim, asset_root, large_asset_file, large_asset_options) - return - - def _build_proj(self, env_id, env_ptr): - col_group = env_id - col_filter = 0 - segmentation_id = 0 - - for i, obj in enumerate(PERTURB_OBJS): - default_pose = gymapi.Transform() - default_pose.p.x = 200 + i - default_pose.p.z = 1 - obj_type = obj[0] - if (obj_type == "small"): - proj_asset = self._small_proj_asset - elif (obj_type == "large"): - proj_asset = self._large_proj_asset - - proj_handle = self.gym.create_actor(env_ptr, proj_asset, default_pose, "proj{:d}".format(i), col_group, col_filter, segmentation_id) - self._proj_handles.append(proj_handle) - - return - - def _build_body_ids_tensor(self, env_ptr, actor_handle, body_names): - env_ptr = self.envs[0] - actor_handle = self.humanoid_handles[0] - body_ids = [] - - for body_name in body_names: - body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) - assert(body_id != -1) - body_ids.append(body_id) - - body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) - return body_ids - - def _build_proj_tensors(self): - num_actors = self.get_num_actors_per_env() - num_objs = self._get_num_objs() - self._proj_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., (num_actors - num_objs):, :] - - self._proj_actor_ids = num_actors * np.arange(self.num_envs) - self._proj_actor_ids = np.expand_dims(self._proj_actor_ids, axis=-1) - self._proj_actor_ids = self._proj_actor_ids + np.reshape(np.array(self._proj_handles), [self.num_envs, num_objs]) - self._proj_actor_ids = self._proj_actor_ids.flatten() - self._proj_actor_ids = to_torch(self._proj_actor_ids, device=self.device, dtype=torch.int32) - - bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs - contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim) - contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor) - self._proj_contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., (num_actors - num_objs):, :] - - return - - def _calc_perturb_times(self): - self._perturb_timesteps = [] - total_steps = 0 - for i, obj in enumerate(PERTURB_OBJS): - curr_time = obj[1] - total_steps += curr_time - self._perturb_timesteps.append(total_steps) - - self._perturb_timesteps = np.array(self._perturb_timesteps) - - return - - def _reset_env_tensors(self, env_ids): - super()._reset_env_tensors(env_ids) - - env_ids_int32 = self._proj_actor_ids[env_ids] - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - return - - def _compute_reset(self): - self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf, - self._contact_forces, self._contact_body_ids, - self._rigid_body_pos, self.max_episode_length, - self._enable_early_termination, self._termination_heights) - return - - def post_physics_step(self): - self._update_proj() - super().post_physics_step() - return - - def _get_num_objs(self): - return len(PERTURB_OBJS) - - def _update_proj(self): - - curr_timestep = self.progress_buf.cpu().numpy()[0] - curr_timestep = curr_timestep % (self._perturb_timesteps[-1] + 1) - perturb_step = np.where(self._perturb_timesteps == curr_timestep)[0] - - if (len(perturb_step) > 0): - perturb_id = perturb_step[0] - n = self.num_envs - humanoid_root_pos = self._humanoid_root_states[..., 0:3] - - rand_theta = torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device) - rand_theta *= 2 * np.pi - rand_dist = (self._proj_dist_max - self._proj_dist_min) * torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device) + self._proj_dist_min - pos_x = rand_dist * torch.cos(rand_theta) - pos_y = -rand_dist * torch.sin(rand_theta) - pos_z = (self._proj_h_max - self._proj_h_min) * torch.rand([n], dtype=self._proj_states.dtype, device=self._proj_states.device) + self._proj_h_min - - self._proj_states[..., perturb_id, 0] = humanoid_root_pos[..., 0] + pos_x - self._proj_states[..., perturb_id, 1] = humanoid_root_pos[..., 1] + pos_y - self._proj_states[..., perturb_id, 2] = pos_z - self._proj_states[..., perturb_id, 3:6] = 0.0 - self._proj_states[..., perturb_id, 6] = 1.0 - - tar_body_idx = np.random.randint(self.num_bodies) - tar_body_idx = 1 - - launch_tar_pos = self._rigid_body_pos[..., tar_body_idx, :] - launch_dir = launch_tar_pos - self._proj_states[..., perturb_id, 0:3] - launch_dir += 0.1 * torch.randn_like(launch_dir) - launch_dir = torch.nn.functional.normalize(launch_dir, dim=-1) - launch_speed = (self._proj_speed_max - self._proj_speed_min) * torch.rand_like(launch_dir[:, 0:1]) + self._proj_speed_min - launch_vel = launch_speed * launch_dir - launch_vel[..., 0:2] += self._rigid_body_vel[..., tar_body_idx, 0:2] - self._proj_states[..., perturb_id, 7:10] = launch_vel - self._proj_states[..., perturb_id, 10:13] = 0.0 - - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(self._proj_actor_ids), - len(self._proj_actor_ids)) - - return - - def _draw_task(self): - super()._draw_task() - - cols = np.array([[1.0, 0.0, 0.0]], dtype=np.float32) - - self.gym.clear_lines(self.viewer) - - starts = self._humanoid_root_states[..., 0:3] - ends = self._proj_states[..., 0:3] - verts = torch.cat([starts, ends], dim=-1).cpu().numpy() - - for i, env_ptr in enumerate(self.envs): - curr_verts = verts[i] - curr_verts = curr_verts.reshape([1, 6]) - self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols) - - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos, - max_episode_length, enable_early_termination, termination_heights): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor] - - terminated = torch.zeros_like(reset_buf) - reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated) - - return reset, terminated diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py deleted file mode 100644 index cf578aac..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_reach.py +++ /dev/null @@ -1,223 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -import tasks.humanoid as humanoid -import tasks.humanoid_amp as humanoid_amp -import tasks.humanoid_amp_task as humanoid_amp_task -from utils import torch_utils - -from isaacgym import gymapi -from isaacgym import gymtorch -from isaacgym.torch_utils import * - -class HumanoidReach(humanoid_amp_task.HumanoidAMPTask): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - self._tar_speed = cfg["env"]["tarSpeed"] - self._tar_change_steps_min = cfg["env"]["tarChangeStepsMin"] - self._tar_change_steps_max = cfg["env"]["tarChangeStepsMax"] - self._tar_dist_max = cfg["env"]["tarDistMax"] - self._tar_height_min = cfg["env"]["tarHeightMin"] - self._tar_height_max = cfg["env"]["tarHeightMax"] - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._tar_change_steps = torch.zeros([self.num_envs], device=self.device, dtype=torch.int64) - self._tar_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) - - reach_body_name = cfg["env"]["reachBodyName"] - self._reach_body_id = self._build_reach_body_id_tensor(self.envs[0], self.humanoid_handles[0], reach_body_name) - - if (not self.headless): - self._build_marker_state_tensors() - - return - - def get_task_obs_size(self): - obs_size = 0 - if (self._enable_task_obs): - obs_size = 3 - return obs_size - - def _update_marker(self): - self._marker_pos[..., :] = self._tar_pos - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(self._marker_actor_ids), len(self._marker_actor_ids)) - return - - def _create_envs(self, num_envs, spacing, num_per_row): - if (not self.headless): - self._marker_handles = [] - self._load_marker_asset() - - super()._create_envs(num_envs, spacing, num_per_row) - return - - def _load_marker_asset(self): - asset_root = "ase/data/assets/mjcf/" - asset_file = "location_marker.urdf" - - asset_options = gymapi.AssetOptions() - asset_options.angular_damping = 0.01 - asset_options.linear_damping = 0.01 - asset_options.max_angular_velocity = 100.0 - asset_options.density = 1.0 - asset_options.fix_base_link = True - asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - - self._marker_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) - - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - super()._build_env(env_id, env_ptr, humanoid_asset) - - if (not self.headless): - self._build_marker(env_id, env_ptr) - - return - - def _build_marker(self, env_id, env_ptr): - col_group = env_id - col_filter = 2 - segmentation_id = 0 - - default_pose = gymapi.Transform() - - marker_handle = self.gym.create_actor(env_ptr, self._marker_asset, default_pose, "marker", col_group, col_filter, segmentation_id) - self.gym.set_rigid_body_color(env_ptr, marker_handle, 0, gymapi.MESH_VISUAL, gymapi.Vec3(0.8, 0.0, 0.0)) - self._marker_handles.append(marker_handle) - - return - - def _build_marker_state_tensors(self): - num_actors = self._root_states.shape[0] // self.num_envs - self._marker_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :] - self._marker_pos = self._marker_states[..., :3] - - self._marker_actor_ids = self._humanoid_actor_ids + 1 - - return - - def _build_reach_body_id_tensor(self, env_ptr, actor_handle, body_name): - body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) - assert(body_id != -1) - body_id = to_torch(body_id, device=self.device, dtype=torch.long) - return body_id - - def _update_task(self): - reset_task_mask = self.progress_buf >= self._tar_change_steps - rest_env_ids = reset_task_mask.nonzero(as_tuple=False).flatten() - if len(rest_env_ids) > 0: - self._reset_task(rest_env_ids) - return - - def _reset_task(self, env_ids): - n = len(env_ids) - - rand_pos = torch.rand([n, 3], device=self.device) - rand_pos[..., 0:2] = self._tar_dist_max * (2.0 * rand_pos[..., 0:2] - 1.0) - rand_pos[..., 2] = (self._tar_height_max - self._tar_height_min) * rand_pos[..., 2] + self._tar_height_min - - change_steps = torch.randint(low=self._tar_change_steps_min, high=self._tar_change_steps_max, - size=(n,), device=self.device, dtype=torch.int64) - - self._tar_pos[env_ids, :] = rand_pos - self._tar_change_steps[env_ids] = self.progress_buf[env_ids] + change_steps - return - - def _compute_task_obs(self, env_ids=None): - if (env_ids is None): - root_states = self._humanoid_root_states - tar_pos = self._tar_pos - else: - root_states = self._humanoid_root_states[env_ids] - tar_pos = self._tar_pos[env_ids] - - obs = compute_location_observations(root_states, tar_pos) - return obs - - def _compute_reward(self, actions): - reach_body_pos = self._rigid_body_pos[:, self._reach_body_id, :] - root_rot = self._humanoid_root_states[..., 3:7] - self.rew_buf[:] = compute_reach_reward(reach_body_pos, root_rot, - self._tar_pos, self._tar_speed, - self.dt) - return - - def _draw_task(self): - self._update_marker() - - cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32) - - self.gym.clear_lines(self.viewer) - - starts = self._rigid_body_pos[:, self._reach_body_id, :] - ends = self._tar_pos - - verts = torch.cat([starts, ends], dim=-1).cpu().numpy() - - for i, env_ptr in enumerate(self.envs): - curr_verts = verts[i] - curr_verts = curr_verts.reshape([1, 6]) - self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols) - - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def compute_location_observations(root_states, tar_pos): - # type: (Tensor, Tensor) -> Tensor - root_rot = root_states[:, 3:7] - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - local_tar_pos = quat_rotate(heading_rot, tar_pos) - - obs = local_tar_pos - return obs - -@torch.jit.script -def compute_reach_reward(reach_body_pos, root_rot, tar_pos, tar_speed, dt): - # type: (Tensor, Tensor, Tensor, float, float) -> Tensor - pos_err_scale = 4.0 - - pos_diff = tar_pos - reach_body_pos - pos_err = torch.sum(pos_diff * pos_diff, dim=-1) - pos_reward = torch.exp(-pos_err_scale * pos_err) - - reward = pos_reward - - return reward \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py deleted file mode 100644 index 6c3a31be..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_strike.py +++ /dev/null @@ -1,323 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from isaacgym import gymapi, gymtorch -from isaacgym.torch_utils import * - -import tasks.humanoid_amp as humanoid_amp -import tasks.humanoid_amp_task as humanoid_amp_task -from utils import torch_utils - -class HumanoidStrike(humanoid_amp_task.HumanoidAMPTask): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - self._tar_dist_min = 0.5 - self._tar_dist_max = 10.0 - self._near_dist = 1.5 - self._near_prob = 0.5 - - self._prev_root_pos = torch.zeros([self.num_envs, 3], device=self.device, dtype=torch.float) - - strike_body_names = cfg["env"]["strikeBodyNames"] - self._strike_body_ids = self._build_strike_body_ids_tensor(self.envs[0], self.humanoid_handles[0], strike_body_names) - self._build_target_tensors() - - return - - def get_task_obs_size(self): - obs_size = 0 - if (self._enable_task_obs): - obs_size = 15 - return obs_size - - def _create_envs(self, num_envs, spacing, num_per_row): - self._target_handles = [] - self._load_target_asset() - - super()._create_envs(num_envs, spacing, num_per_row) - return - - def _build_env(self, env_id, env_ptr, humanoid_asset): - super()._build_env(env_id, env_ptr, humanoid_asset) - self._build_target(env_id, env_ptr) - return - - def _load_target_asset(self): - asset_root = "/home/ubuntu/Github/Knowledge-Universe/Robotics/Roadmap-for-robot-science/rofunc/simulator/assets/mjcf" - asset_file = "strike_target.urdf" - - asset_options = gymapi.AssetOptions() - asset_options.angular_damping = 0.01 - asset_options.linear_damping = 0.01 - asset_options.max_angular_velocity = 100.0 - asset_options.density = 30.0 - asset_options.default_dof_drive_mode = gymapi.DOF_MODE_NONE - - self._target_asset = self.gym.load_asset(self.sim, asset_root, asset_file, asset_options) - return - - def _build_target(self, env_id, env_ptr): - col_group = env_id - col_filter = 0 - segmentation_id = 0 - - default_pose = gymapi.Transform() - default_pose.p.x = 1.0 - - target_handle = self.gym.create_actor(env_ptr, self._target_asset, default_pose, "target", col_group, col_filter, segmentation_id) - self._target_handles.append(target_handle) - - return - - def _build_strike_body_ids_tensor(self, env_ptr, actor_handle, body_names): - env_ptr = self.envs[0] - actor_handle = self.humanoid_handles[0] - body_ids = [] - - for body_name in body_names: - body_id = self.gym.find_actor_rigid_body_handle(env_ptr, actor_handle, body_name) - assert(body_id != -1) - body_ids.append(body_id) - - body_ids = to_torch(body_ids, device=self.device, dtype=torch.long) - return body_ids - - def _build_target_tensors(self): - num_actors = self.get_num_actors_per_env() - self._target_states = self._root_states.view(self.num_envs, num_actors, self._root_states.shape[-1])[..., 1, :] - - self._tar_actor_ids = to_torch(num_actors * np.arange(self.num_envs), device=self.device, dtype=torch.int32) + 1 - - bodies_per_env = self._rigid_body_state.shape[0] // self.num_envs - contact_force_tensor = self.gym.acquire_net_contact_force_tensor(self.sim) - contact_force_tensor = gymtorch.wrap_tensor(contact_force_tensor) - self._tar_contact_forces = contact_force_tensor.view(self.num_envs, bodies_per_env, 3)[..., self.num_bodies, :] - - return - - def _reset_actors(self, env_ids): - super()._reset_actors(env_ids) - self._reset_target(env_ids) - return - - def _reset_target(self, env_ids): - n = len(env_ids) - - init_near = torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) < self._near_prob - dist_max = self._tar_dist_max * torch.ones([n], dtype=self._target_states.dtype, device=self._target_states.device) - dist_max[init_near] = self._near_dist - rand_dist = (dist_max - self._tar_dist_min) * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) + self._tar_dist_min - - rand_theta = 2 * np.pi * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) - self._target_states[env_ids, 0] = rand_dist * torch.cos(rand_theta) + self._humanoid_root_states[env_ids, 0] - self._target_states[env_ids, 1] = rand_dist * torch.sin(rand_theta) + self._humanoid_root_states[env_ids, 1] - self._target_states[env_ids, 2] = 0.9 - - rand_rot_theta = 2 * np.pi * torch.rand([n], dtype=self._target_states.dtype, device=self._target_states.device) - axis = torch.tensor([0.0, 0.0, 1.0], dtype=self._target_states.dtype, device=self._target_states.device) - rand_rot = quat_from_angle_axis(rand_rot_theta, axis) - - self._target_states[env_ids, 3:7] = rand_rot - self._target_states[env_ids, 7:10] = 0.0 - self._target_states[env_ids, 10:13] = 0.0 - return - - def _reset_env_tensors(self, env_ids): - super()._reset_env_tensors(env_ids) - - env_ids_int32 = self._tar_actor_ids[env_ids] - self.gym.set_actor_root_state_tensor_indexed(self.sim, gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - return - - def pre_physics_step(self, actions): - super().pre_physics_step(actions) - self._prev_root_pos[:] = self._humanoid_root_states[..., 0:3] - return - - def _compute_task_obs(self, env_ids=None): - if (env_ids is None): - root_states = self._humanoid_root_states - tar_states = self._target_states - else: - root_states = self._humanoid_root_states[env_ids] - tar_states = self._target_states[env_ids] - - obs = compute_strike_observations(root_states, tar_states) - return obs - - def _compute_reward(self, actions): - tar_pos = self._target_states[..., 0:3] - tar_rot = self._target_states[..., 3:7] - char_root_state = self._humanoid_root_states - strike_body_vel = self._rigid_body_vel[..., self._strike_body_ids[0], :] - - self.rew_buf[:] = compute_strike_reward(tar_pos, tar_rot, char_root_state, - self._prev_root_pos, strike_body_vel, - self.dt, self._near_dist) - return - - def _compute_reset(self): - self.reset_buf[:], self._terminate_buf[:] = compute_humanoid_reset(self.reset_buf, self.progress_buf, - self._contact_forces, self._contact_body_ids, - self._rigid_body_pos, self._tar_contact_forces, - self._strike_body_ids, self.max_episode_length, - self._enable_early_termination, self._termination_heights) - return - - def _draw_task(self): - cols = np.array([[0.0, 1.0, 0.0]], dtype=np.float32) - - self.gym.clear_lines(self.viewer) - - starts = self._humanoid_root_states[..., 0:3] - ends = self._target_states[..., 0:3] - verts = torch.cat([starts, ends], dim=-1).cpu().numpy() - - for i, env_ptr in enumerate(self.envs): - curr_verts = verts[i] - curr_verts = curr_verts.reshape([1, 6]) - self.gym.add_lines(self.viewer, env_ptr, curr_verts.shape[0], curr_verts, cols) - - return - -##################################################################### -###=========================jit functions=========================### -##################################################################### - -@torch.jit.script -def compute_strike_observations(root_states, tar_states): - # type: (Tensor, Tensor) -> Tensor - root_pos = root_states[:, 0:3] - root_rot = root_states[:, 3:7] - - tar_pos = tar_states[:, 0:3] - tar_rot = tar_states[:, 3:7] - tar_vel = tar_states[:, 7:10] - tar_ang_vel = tar_states[:, 10:13] - - heading_rot = torch_utils.calc_heading_quat_inv(root_rot) - - local_tar_pos = tar_pos - root_pos - local_tar_pos[..., -1] = tar_pos[..., -1] - local_tar_pos = quat_rotate(heading_rot, local_tar_pos) - local_tar_vel = quat_rotate(heading_rot, tar_vel) - local_tar_ang_vel = quat_rotate(heading_rot, tar_ang_vel) - - local_tar_rot = quat_mul(heading_rot, tar_rot) - local_tar_rot_obs = torch_utils.quat_to_tan_norm(local_tar_rot) - - obs = torch.cat([local_tar_pos, local_tar_rot_obs, local_tar_vel, local_tar_ang_vel], dim=-1) - return obs - -@torch.jit.script -def compute_strike_reward(tar_pos, tar_rot, root_state, prev_root_pos, strike_body_vel, dt, near_dist): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, float, float) -> Tensor - tar_speed = 1.0 - vel_err_scale = 4.0 - - tar_rot_w = 0.6 - vel_reward_w = 0.4 - - up = torch.zeros_like(tar_pos) - up[..., -1] = 1 - tar_up = quat_rotate(tar_rot, up) - tar_rot_err = torch.sum(up * tar_up, dim=-1) - tar_rot_r = torch.clamp_min(1.0 - tar_rot_err, 0.0) - - root_pos = root_state[..., 0:3] - tar_dir = tar_pos[..., 0:2] - root_pos[..., 0:2] - tar_dir = torch.nn.functional.normalize(tar_dir, dim=-1) - delta_root_pos = root_pos - prev_root_pos - root_vel = delta_root_pos / dt - tar_dir_speed = torch.sum(tar_dir * root_vel[..., :2], dim=-1) - tar_vel_err = tar_speed - tar_dir_speed - tar_vel_err = torch.clamp_min(tar_vel_err, 0.0) - vel_reward = torch.exp(-vel_err_scale * (tar_vel_err * tar_vel_err)) - speed_mask = tar_dir_speed <= 0 - vel_reward[speed_mask] = 0 - - - reward = tar_rot_w * tar_rot_r + vel_reward_w * vel_reward - - succ = tar_rot_err < 0.2 - reward = torch.where(succ, torch.ones_like(reward), reward) - - return reward - - -@torch.jit.script -def compute_humanoid_reset(reset_buf, progress_buf, contact_buf, contact_body_ids, rigid_body_pos, - tar_contact_forces, strike_body_ids, max_episode_length, - enable_early_termination, termination_heights): - # type: (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, float, bool, Tensor) -> Tuple[Tensor, Tensor] - contact_force_threshold = 1.0 - - terminated = torch.zeros_like(reset_buf) - - if (enable_early_termination): - masked_contact_buf = contact_buf.clone() - masked_contact_buf[:, contact_body_ids, :] = 0 - fall_contact = torch.any(torch.abs(masked_contact_buf) > 0.1, dim=-1) - fall_contact = torch.any(fall_contact, dim=-1) - - body_height = rigid_body_pos[..., 2] - fall_height = body_height < termination_heights - fall_height[:, contact_body_ids] = False - fall_height = torch.any(fall_height, dim=-1) - - has_fallen = torch.logical_and(fall_contact, fall_height) - - tar_has_contact = torch.any(torch.abs(tar_contact_forces[..., 0:2]) > contact_force_threshold, dim=-1) - #strike_body_force = contact_buf[:, strike_body_id, :] - #strike_body_has_contact = torch.any(torch.abs(strike_body_force) > contact_force_threshold, dim=-1) - nonstrike_body_force = masked_contact_buf - nonstrike_body_force[:, strike_body_ids, :] = 0 - nonstrike_body_has_contact = torch.any(torch.abs(nonstrike_body_force) > contact_force_threshold, dim=-1) - nonstrike_body_has_contact = torch.any(nonstrike_body_has_contact, dim=-1) - - tar_fail = torch.logical_and(tar_has_contact, nonstrike_body_has_contact) - - has_failed = torch.logical_or(has_fallen, tar_fail) - - # first timestep can sometimes still have nonzero contact forces - # so only check after first couple of steps - has_failed *= (progress_buf > 1) - terminated = torch.where(has_failed, torch.ones_like(reset_buf), terminated) - - reset = torch.where(progress_buf >= max_episode_length - 1, torch.ones_like(reset_buf), terminated) - - return reset, terminated \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py deleted file mode 100644 index 0cae80ed..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/humanoid_view_motion.py +++ /dev/null @@ -1,125 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch - -from isaacgym import gymtorch - -from tasks.humanoid_amp import HumanoidAMP - - -class HumanoidViewMotion(HumanoidAMP): - def __init__(self, cfg, sim_params, physics_engine, device_type, device_id, headless): - control_freq_inv = cfg["env"]["controlFrequencyInv"] - self._motion_dt = control_freq_inv * sim_params.dt - - cfg["env"]["controlFrequencyInv"] = 1 - cfg["env"]["pdControl"] = False - - super().__init__(cfg=cfg, - sim_params=sim_params, - physics_engine=physics_engine, - device_type=device_type, - device_id=device_id, - headless=headless) - - num_motions = self._motion_lib.num_motions() - self._motion_ids = torch.arange(self.num_envs, device=self.device, dtype=torch.long) - self._motion_ids = torch.remainder(self._motion_ids, num_motions) - - return - - def pre_physics_step(self, actions): - self.actions = actions.to(self.device).clone() - forces = torch.zeros_like(self.actions) - force_tensor = gymtorch.unwrap_tensor(forces) - self.gym.set_dof_actuation_force_tensor(self.sim, force_tensor) - return - - def post_physics_step(self): - super().post_physics_step() - self._motion_sync() - return - - def _get_humanoid_collision_filter(self): - return 1 # disable self collisions - - def _motion_sync(self): - num_motions = self._motion_lib.num_motions() - motion_ids = self._motion_ids - motion_times = self.progress_buf * self._motion_dt - - root_pos, root_rot, dof_pos, root_vel, root_ang_vel, dof_vel, key_pos \ - = self._motion_lib.get_motion_state(motion_ids, motion_times) - - root_vel = torch.zeros_like(root_vel) - root_ang_vel = torch.zeros_like(root_ang_vel) - dof_vel = torch.zeros_like(dof_vel) - - env_ids = torch.arange(self.num_envs, dtype=torch.long, device=self.device) - self._set_env_state(env_ids=env_ids, - root_pos=root_pos, - root_rot=root_rot, - dof_pos=dof_pos, - root_vel=root_vel, - root_ang_vel=root_ang_vel, - dof_vel=dof_vel) - - env_ids_int32 = self._humanoid_actor_ids[env_ids] - self.gym.set_actor_root_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._root_states), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - self.gym.set_dof_state_tensor_indexed(self.sim, - gymtorch.unwrap_tensor(self._dof_state), - gymtorch.unwrap_tensor(env_ids_int32), len(env_ids_int32)) - return - - def _compute_reset(self): - motion_lengths = self._motion_lib.get_motion_length(self._motion_ids) - self.reset_buf[:], self._terminate_buf[:] = compute_view_motion_reset(self.reset_buf, motion_lengths, self.progress_buf, self._motion_dt) - return - - def _reset_actors(self, env_ids): - return - - def _reset_env_tensors(self, env_ids): - num_motions = self._motion_lib.num_motions() - self._motion_ids[env_ids] = torch.remainder(self._motion_ids[env_ids] + self.num_envs, num_motions) - - self.progress_buf[env_ids] = 0 - self.reset_buf[env_ids] = 0 - self._terminate_buf[env_ids] = 0 - return - -@torch.jit.script -def compute_view_motion_reset(reset_buf, motion_lengths, progress_buf, dt): - # type: (Tensor, Tensor, Tensor, float) -> Tuple[Tensor, Tensor] - terminated = torch.zeros_like(reset_buf) - motion_times = progress_buf * dt - reset = torch.where(motion_times > motion_lengths, torch.ones_like(reset_buf), terminated) - return reset, terminated \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py deleted file mode 100644 index 356e9e47..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. - -from gym import spaces - -from isaacgym import gymtorch -from isaacgym.torch_utils import to_torch -import torch -import numpy as np - - -# VecEnv Wrapper for RL training -class VecTask(): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - self.task = task - - self.num_environments = task.num_envs - self.num_agents = 1 # used for multi-agent environments - self.num_observations = task.num_obs - self.num_states = task.num_states - self.num_actions = task.num_actions - - self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf) - self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf) - self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.) - - self.clip_obs = clip_observations - self.clip_actions = clip_actions - self.rl_device = rl_device - - print("RL device: ", rl_device) - - def step(self, actions): - raise NotImplementedError - - def reset(self): - raise NotImplementedError - - def get_number_of_agents(self): - return self.num_agents - - @property - def observation_space(self): - return self.obs_space - - @property - def action_space(self): - return self.act_space - - @property - def num_envs(self): - return self.num_environments - - @property - def num_acts(self): - return self.num_actions - - @property - def num_obs(self): - return self.num_observations - - -# C++ CPU Class -class VecTaskCPU(VecTask): - def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) - self.sync_frame_time = sync_frame_time - - def step(self, actions): - actions = actions.cpu().numpy() - self.task.render(self.sync_frame_time) - - obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions)) - - return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device), - to_torch(rewards, dtype=torch.float, device=self.rl_device), - to_torch(resets, dtype=torch.uint8, device=self.rl_device), []) - - def reset(self): - actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f') - - # step the simulator - obs, rewards, resets, extras = self.task.step(actions) - - return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device) - - -# C++ GPU Class -class VecTaskGPU(VecTask): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) - - self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs)) - self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,)) - self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,)) - - def step(self, actions): - self.task.render(False) - actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions) - actions_tensor = gymtorch.unwrap_tensor(actions_clipped) - - self.task.step(actions_tensor) - - return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, [] - - def reset(self): - actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) - actions_tensor = gymtorch.unwrap_tensor(actions) - - # step the simulator - self.task.step(actions_tensor) - - return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs) - - -# Python CPU/GPU Class -class VecTaskPython(VecTask): - - def get_state(self): - return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) - - def step(self, actions): - actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions) - - self.task.step(actions_tensor) - - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras - - def reset(self): - actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) - - # step the simulator - self.task.step(actions) - - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py deleted file mode 100644 index a5c8160d..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/tasks/vec_task_wrappers.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from gym import spaces -import numpy as np -import torch -from tasks.vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython - -class VecTaskCPUWrapper(VecTaskCPU): - def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions) - return - -class VecTaskGPUWrapper(VecTaskGPU): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations, clip_actions) - return - - -class VecTaskPythonWrapper(VecTaskPython): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations, clip_actions) - - self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf) - return - - def reset(self, env_ids=None): - self.task.reset(env_ids) - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) - - @property - def amp_observation_space(self): - return self._amp_obs_space - - def fetch_amp_obs_demo(self, num_samples): - return self.task.fetch_amp_obs_demo(num_samples) \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py deleted file mode 100644 index bc6ee169..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py deleted file mode 100644 index 204344c1..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/gym_util.py +++ /dev/null @@ -1,240 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from utils import logger -from isaacgym import gymapi -import numpy as np -import torch -from isaacgym.torch_utils import * -from isaacgym import gymtorch - -def setup_gym_viewer(config): - gym = initialize_gym(config) - sim, viewer = configure_gym(gym, config) - return gym, sim, viewer - - -def initialize_gym(config): - gym = gymapi.acquire_gym() - if not gym.initialize(): - logger.warn("*** Failed to initialize gym") - quit() - - return gym - - -def configure_gym(gym, config): - engine, render = config['engine'], config['render'] - - # physics engine settings - if(engine == 'FLEX'): - sim_engine = gymapi.SIM_FLEX - elif(engine == 'PHYSX'): - sim_engine = gymapi.SIM_PHYSX - else: - logger.warn("Uknown physics engine. defaulting to FLEX") - sim_engine = gymapi.SIM_FLEX - - # gym viewer - if render: - # create viewer - sim = gym.create_sim(0, 0, sim_type=sim_engine) - viewer = gym.create_viewer( - sim, int(gymapi.DEFAULT_VIEWER_WIDTH / 1.25), - int(gymapi.DEFAULT_VIEWER_HEIGHT / 1.25) - ) - - if viewer is None: - logger.warn("*** Failed to create viewer") - quit() - - # enable left mouse click or space bar for throwing projectiles - if config['add_projectiles']: - gym.subscribe_viewer_mouse_event(viewer, gymapi.MOUSE_LEFT_BUTTON, "shoot") - gym.subscribe_viewer_keyboard_event(viewer, gymapi.KEY_SPACE, "shoot") - - else: - sim = gym.create_sim(0, -1) - viewer = None - - # simulation params - scene_config = config['env']['scene'] - sim_params = gymapi.SimParams() - sim_params.solver_type = scene_config['SolverType'] - sim_params.num_outer_iterations = scene_config['NumIterations'] - sim_params.num_inner_iterations = scene_config['NumInnerIterations'] - sim_params.relaxation = scene_config.get('Relaxation', 0.75) - sim_params.warm_start = scene_config.get('WarmStart', 0.25) - sim_params.geometric_stiffness = scene_config.get('GeometricStiffness', 1.0) - sim_params.shape_collision_margin = 0.01 - - sim_params.gravity = gymapi.Vec3(0.0, -9.8, 0.0) - gym.set_sim_params(sim, sim_params) - - return sim, viewer - - -def parse_states_from_reference_states(reference_states, progress): - # parse reference states from DeepMimicState - global_quats_ref = torch.tensor( - reference_states._global_rotation[(progress,)].numpy(), - dtype=torch.double - ).cuda() - ts_ref = torch.tensor( - reference_states._translation[(progress,)].numpy(), - dtype=torch.double - ).cuda() - vels_ref = torch.tensor( - reference_states._velocity[(progress,)].numpy(), - dtype=torch.double - ).cuda() - avels_ref = torch.tensor( - reference_states._angular_velocity[(progress,)].numpy(), - dtype=torch.double - ).cuda() - return global_quats_ref, ts_ref, vels_ref, avels_ref - - -def parse_states_from_reference_states_with_motion_id(precomputed_state, - progress, motion_id): - assert len(progress) == len(motion_id) - # get the global id - global_id = precomputed_state['motion_offset'][motion_id] + progress - global_id = np.minimum(global_id, - precomputed_state['global_quats_ref'].shape[0] - 1) - - # parse reference states from DeepMimicState - global_quats_ref = precomputed_state['global_quats_ref'][global_id] - ts_ref = precomputed_state['ts_ref'][global_id] - vels_ref = precomputed_state['vels_ref'][global_id] - avels_ref = precomputed_state['avels_ref'][global_id] - return global_quats_ref, ts_ref, vels_ref, avels_ref - - -def parse_dof_state_with_motion_id(precomputed_state, dof_state, - progress, motion_id): - assert len(progress) == len(motion_id) - # get the global id - global_id = precomputed_state['motion_offset'][motion_id] + progress - # NOTE: it should never reach the dof_state.shape, cause the episode is - # terminated 2 steps before - global_id = np.minimum(global_id, dof_state.shape[0] - 1) - - # parse reference states from DeepMimicState - return dof_state[global_id] - - -def get_flatten_ids(precomputed_state): - motion_offsets = precomputed_state['motion_offset'] - init_state_id, init_motion_id, global_id = [], [], [] - for i_motion in range(len(motion_offsets) - 1): - i_length = motion_offsets[i_motion + 1] - motion_offsets[i_motion] - init_state_id.extend(range(i_length)) - init_motion_id.extend([i_motion] * i_length) - if len(global_id) == 0: - global_id.extend(range(0, i_length)) - else: - global_id.extend(range(global_id[-1] + 1, - global_id[-1] + i_length + 1)) - return np.array(init_state_id), np.array(init_motion_id), \ - np.array(global_id) - - -def parse_states_from_reference_states_with_global_id(precomputed_state, - global_id): - # get the global id - global_id = global_id % precomputed_state['global_quats_ref'].shape[0] - - # parse reference states from DeepMimicState - global_quats_ref = precomputed_state['global_quats_ref'][global_id] - ts_ref = precomputed_state['ts_ref'][global_id] - vels_ref = precomputed_state['vels_ref'][global_id] - avels_ref = precomputed_state['avels_ref'][global_id] - return global_quats_ref, ts_ref, vels_ref, avels_ref - - -def get_robot_states_from_torch_tensor(config, ts, global_quats, vels, avels, - init_rot, progress, motion_length=-1, - actions=None, relative_rot=None, - motion_id=None, num_motion=None, - motion_onehot_matrix=None): - info = {} - # the observation with quaternion-based representation - torso_height = ts[..., 0, 1].cpu().numpy() - gttrny, gqny, vny, avny, info['root_yaw_inv'] = \ - quaternion_math.compute_observation_return_info(global_quats, ts, - vels, avels) - joint_obs = np.concatenate([gttrny.cpu().numpy(), gqny.cpu().numpy(), - vny.cpu().numpy(), avny.cpu().numpy()], axis=-1) - joint_obs = joint_obs.reshape(joint_obs.shape[0], -1) - num_envs = joint_obs.shape[0] - obs = np.concatenate([torso_height[:, np.newaxis], joint_obs], -1) - - # the previous action - if config['env_action_ob']: - obs = np.concatenate([obs, actions], axis=-1) - - # the orientation - if config['env_orientation_ob']: - if relative_rot is not None: - obs = np.concatenate([obs, relative_rot], axis=-1) - else: - curr_rot = global_quats[np.arange(num_envs)][:, 0] - curr_rot = curr_rot.reshape(num_envs, -1, 4) - relative_rot = quaternion_math.compute_orientation_drift( - init_rot, curr_rot - ).cpu().numpy() - obs = np.concatenate([obs, relative_rot], axis=-1) - - if config['env_frame_ob']: - if type(motion_length) == np.ndarray: - motion_length = motion_length.astype(np.float) - progress_ob = np.expand_dims(progress.astype(np.float) / - motion_length, axis=-1) - else: - progress_ob = np.expand_dims(progress.astype(np.float) / - float(motion_length), axis=-1) - obs = np.concatenate([obs, progress_ob], axis=-1) - - if config['env_motion_ob'] and not config['env_motion_ob_onehot']: - motion_id_ob = np.expand_dims(motion_id.astype(np.float) / - float(num_motion), axis=-1) - obs = np.concatenate([obs, motion_id_ob], axis=-1) - elif config['env_motion_ob'] and config['env_motion_ob_onehot']: - motion_id_ob = motion_onehot_matrix[motion_id] - obs = np.concatenate([obs, motion_id_ob], axis=-1) - - return obs, info - - -def get_xyzoffset(start_ts, end_ts, root_yaw_inv): - xyoffset = (end_ts - start_ts)[:, [0], :].reshape(1, -1, 1, 3) - ryinv = root_yaw_inv.reshape(1, -1, 1, 4) - - calibrated_xyz_offset = quaternion_math.quat_apply(ryinv, xyoffset)[0, :, 0, :] - return calibrated_xyz_offset diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py deleted file mode 100644 index bbe273ab..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/utils/torch_utils.py +++ /dev/null @@ -1,182 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import torch -import numpy as np - -from isaacgym.torch_utils import * - -@torch.jit.script -def quat_to_angle_axis(q): - # type: (Tensor) -> Tuple[Tensor, Tensor] - # computes axis-angle representation from quaternion q - # q must be normalized - min_theta = 1e-5 - qx, qy, qz, qw = 0, 1, 2, 3 - - sin_theta = torch.sqrt(1 - q[..., qw] * q[..., qw]) - angle = 2 * torch.acos(q[..., qw]) - angle = normalize_angle(angle) - sin_theta_expand = sin_theta.unsqueeze(-1) - axis = q[..., qx:qw] / sin_theta_expand - - mask = torch.abs(sin_theta) > min_theta - default_axis = torch.zeros_like(axis) - default_axis[..., -1] = 1 - - angle = torch.where(mask, angle, torch.zeros_like(angle)) - mask_expand = mask.unsqueeze(-1) - axis = torch.where(mask_expand, axis, default_axis) - return angle, axis - -@torch.jit.script -def angle_axis_to_exp_map(angle, axis): - # type: (Tensor, Tensor) -> Tensor - # compute exponential map from axis-angle - angle_expand = angle.unsqueeze(-1) - exp_map = angle_expand * axis - return exp_map - -@torch.jit.script -def quat_to_exp_map(q): - # type: (Tensor) -> Tensor - # compute exponential map from quaternion - # q must be normalized - angle, axis = quat_to_angle_axis(q) - exp_map = angle_axis_to_exp_map(angle, axis) - return exp_map - -@torch.jit.script -def quat_to_tan_norm(q): - # type: (Tensor) -> Tensor - # represents a rotation using the tangent and normal vectors - ref_tan = torch.zeros_like(q[..., 0:3]) - ref_tan[..., 0] = 1 - tan = quat_rotate(q, ref_tan) - - ref_norm = torch.zeros_like(q[..., 0:3]) - ref_norm[..., -1] = 1 - norm = quat_rotate(q, ref_norm) - - norm_tan = torch.cat([tan, norm], dim=len(tan.shape) - 1) - return norm_tan - -@torch.jit.script -def euler_xyz_to_exp_map(roll, pitch, yaw): - # type: (Tensor, Tensor, Tensor) -> Tensor - q = quat_from_euler_xyz(roll, pitch, yaw) - exp_map = quat_to_exp_map(q) - return exp_map - -@torch.jit.script -def exp_map_to_angle_axis(exp_map): - min_theta = 1e-5 - - angle = torch.norm(exp_map, dim=-1) - angle_exp = torch.unsqueeze(angle, dim=-1) - axis = exp_map / angle_exp - angle = normalize_angle(angle) - - default_axis = torch.zeros_like(exp_map) - default_axis[..., -1] = 1 - - mask = torch.abs(angle) > min_theta - angle = torch.where(mask, angle, torch.zeros_like(angle)) - mask_expand = mask.unsqueeze(-1) - axis = torch.where(mask_expand, axis, default_axis) - - return angle, axis - -@torch.jit.script -def exp_map_to_quat(exp_map): - angle, axis = exp_map_to_angle_axis(exp_map) - q = quat_from_angle_axis(angle, axis) - return q - -@torch.jit.script -def slerp(q0, q1, t): - # type: (Tensor, Tensor, Tensor) -> Tensor - cos_half_theta = torch.sum(q0 * q1, dim=-1) - - neg_mask = cos_half_theta < 0 - q1 = q1.clone() - q1[neg_mask] = -q1[neg_mask] - cos_half_theta = torch.abs(cos_half_theta) - cos_half_theta = torch.unsqueeze(cos_half_theta, dim=-1) - - half_theta = torch.acos(cos_half_theta); - sin_half_theta = torch.sqrt(1.0 - cos_half_theta * cos_half_theta); - - ratioA = torch.sin((1 - t) * half_theta) / sin_half_theta; - ratioB = torch.sin(t * half_theta) / sin_half_theta; - - new_q = ratioA * q0 + ratioB * q1 - - new_q = torch.where(torch.abs(sin_half_theta) < 0.001, 0.5 * q0 + 0.5 * q1, new_q) - new_q = torch.where(torch.abs(cos_half_theta) >= 1, q0, new_q) - - return new_q - -@torch.jit.script -def calc_heading(q): - # type: (Tensor) -> Tensor - # calculate heading direction from quaternion - # the heading is the direction on the xy plane - # q must be normalized - ref_dir = torch.zeros_like(q[..., 0:3]) - ref_dir[..., 0] = 1 - rot_dir = quat_rotate(q, ref_dir) - - heading = torch.atan2(rot_dir[..., 1], rot_dir[..., 0]) - return heading - -@torch.jit.script -def calc_heading_quat(q): - # type: (Tensor) -> Tensor - # calculate heading rotation from quaternion - # the heading is the direction on the xy plane - # q must be normalized - heading = calc_heading(q) - axis = torch.zeros_like(q[..., 0:3]) - axis[..., 2] = 1 - - heading_q = quat_from_angle_axis(heading, axis) - return heading_q - -@torch.jit.script -def calc_heading_quat_inv(q): - # type: (Tensor) -> Tensor - # calculate heading rotation from quaternion - # the heading is the direction on the xy plane - # q must be normalized - heading = calc_heading(q) - axis = torch.zeros_like(q[..., 0:3]) - axis[..., 2] = 1 - - heading_q = quat_from_angle_axis(-heading, axis) - return heading_q \ No newline at end of file diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py deleted file mode 100644 index 356e9e47..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. - -from gym import spaces - -from isaacgym import gymtorch -from isaacgym.torch_utils import to_torch -import torch -import numpy as np - - -# VecEnv Wrapper for RL training -class VecTask(): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - self.task = task - - self.num_environments = task.num_envs - self.num_agents = 1 # used for multi-agent environments - self.num_observations = task.num_obs - self.num_states = task.num_states - self.num_actions = task.num_actions - - self.obs_space = spaces.Box(np.ones(self.num_obs) * -np.Inf, np.ones(self.num_obs) * np.Inf) - self.state_space = spaces.Box(np.ones(self.num_states) * -np.Inf, np.ones(self.num_states) * np.Inf) - self.act_space = spaces.Box(np.ones(self.num_actions) * -1., np.ones(self.num_actions) * 1.) - - self.clip_obs = clip_observations - self.clip_actions = clip_actions - self.rl_device = rl_device - - print("RL device: ", rl_device) - - def step(self, actions): - raise NotImplementedError - - def reset(self): - raise NotImplementedError - - def get_number_of_agents(self): - return self.num_agents - - @property - def observation_space(self): - return self.obs_space - - @property - def action_space(self): - return self.act_space - - @property - def num_envs(self): - return self.num_environments - - @property - def num_acts(self): - return self.num_actions - - @property - def num_obs(self): - return self.num_observations - - -# C++ CPU Class -class VecTaskCPU(VecTask): - def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) - self.sync_frame_time = sync_frame_time - - def step(self, actions): - actions = actions.cpu().numpy() - self.task.render(self.sync_frame_time) - - obs, rewards, resets, extras = self.task.step(np.clip(actions, -self.clip_actions, self.clip_actions)) - - return (to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device), - to_torch(rewards, dtype=torch.float, device=self.rl_device), - to_torch(resets, dtype=torch.uint8, device=self.rl_device), []) - - def reset(self): - actions = 0.01 * (1 - 2 * np.random.rand(self.num_envs, self.num_actions)).astype('f') - - # step the simulator - obs, rewards, resets, extras = self.task.step(actions) - - return to_torch(np.clip(obs, -self.clip_obs, self.clip_obs), dtype=torch.float, device=self.rl_device) - - -# C++ GPU Class -class VecTaskGPU(VecTask): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations=clip_observations, clip_actions=clip_actions) - - self.obs_tensor = gymtorch.wrap_tensor(self.task.obs_tensor, counts=(self.task.num_envs, self.task.num_obs)) - self.rewards_tensor = gymtorch.wrap_tensor(self.task.rewards_tensor, counts=(self.task.num_envs,)) - self.resets_tensor = gymtorch.wrap_tensor(self.task.resets_tensor, counts=(self.task.num_envs,)) - - def step(self, actions): - self.task.render(False) - actions_clipped = torch.clamp(actions, -self.clip_actions, self.clip_actions) - actions_tensor = gymtorch.unwrap_tensor(actions_clipped) - - self.task.step(actions_tensor) - - return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs), self.rewards_tensor, self.resets_tensor, [] - - def reset(self): - actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) - actions_tensor = gymtorch.unwrap_tensor(actions) - - # step the simulator - self.task.step(actions_tensor) - - return torch.clamp(self.obs_tensor, -self.clip_obs, self.clip_obs) - - -# Python CPU/GPU Class -class VecTaskPython(VecTask): - - def get_state(self): - return torch.clamp(self.task.states_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) - - def step(self, actions): - actions_tensor = torch.clamp(actions, -self.clip_actions, self.clip_actions) - - self.task.step(actions_tensor) - - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device), self.task.rew_buf.to(self.rl_device), self.task.reset_buf.to(self.rl_device), self.task.extras - - def reset(self): - actions = 0.01 * (1 - 2 * torch.rand([self.task.num_envs, self.task.num_actions], dtype=torch.float32, device=self.rl_device)) - - # step the simulator - self.task.step(actions) - - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) diff --git a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py b/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py deleted file mode 100644 index 00af127c..00000000 --- a/rofunc/learning/RofuncRL/agents/mixline/for_test/vec_task_wrappers.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright (c) 2018-2022, NVIDIA Corporation -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# 3. Neither the name of the copyright holder nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -from gym import spaces -import numpy as np -import torch -from vec_task import VecTaskCPU, VecTaskGPU, VecTaskPython - -class VecTaskCPUWrapper(VecTaskCPU): - def __init__(self, task, rl_device, sync_frame_time=False, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, sync_frame_time, clip_observations, clip_actions) - return - -class VecTaskGPUWrapper(VecTaskGPU): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations, clip_actions) - return - - -class VecTaskPythonWrapper(VecTaskPython): - def __init__(self, task, rl_device, clip_observations=5.0, clip_actions=1.0): - super().__init__(task, rl_device, clip_observations, clip_actions) - - self._amp_obs_space = spaces.Box(np.ones(task.get_num_amp_obs()) * -np.Inf, np.ones(task.get_num_amp_obs()) * np.Inf) - return - - def reset(self, env_ids=None): - self.task.reset(env_ids) - return torch.clamp(self.task.obs_buf, -self.clip_obs, self.clip_obs).to(self.rl_device) - - @property - def amp_observation_space(self): - return self._amp_obs_space - - def fetch_amp_obs_demo(self, num_samples): - return self.task.fetch_amp_obs_demo(num_samples) \ No newline at end of file