Skip to content

Commit

Permalink
max reward BPD + toml dependency
Browse files Browse the repository at this point in the history
  • Loading branch information
rradules committed Aug 21, 2023
1 parent 6f6fb4e commit 28508d9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ repos:
- --ignore-decorators=override
- --explain
- --convention=google
additional_dependencies: ["toml"]
additional_dependencies: ["tomli"]
- repo: local
hooks:
- id: pyright
Expand Down
9 changes: 2 additions & 7 deletions momadm_benchmarks/envs/beach_domain/beach_domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

def parallel_env(**kwargs):
"""Env factory function for the beach domain."""

return MOBeachDomain(**kwargs)


Expand All @@ -40,7 +39,6 @@ def env(**kwargs):
Returns:
A fully wrapped env
"""

env = raw_env(**kwargs)
# this wrapper helps error handling for discrete action spaces
env = wrappers.AssertOutOfBoundsWrapper(env)
Expand All @@ -52,7 +50,6 @@ def env(**kwargs):

def raw_env(**kwargs):
"""To support the AEC API, the raw_env function just uses the from_parallel function to convert from a ParallelEnv to an AEC env."""

env = parallel_env(**kwargs)
env = mo_parallel_to_aec(env)
return env
Expand All @@ -71,7 +68,6 @@ class MOBeachDomain(MOParallelEnv):
metadata = {"render_modes": ["human"], "name": "mobeach_v0"}

# TODO does this environment require max_cycle?

def __init__(
self,
num_timesteps=10,
Expand All @@ -95,7 +91,6 @@ def __init__(
render_mode: render mode
reward_scheme: the reward scheme to use ('local', or 'global'). Default: local
"""

self.reward_scheme = reward_scheme
self.sections = sections
# TODO Extend to distinct capacities per section?
Expand Down Expand Up @@ -135,10 +130,10 @@ def __init__(
* num_agents,
)
)
# TODO check reward spaces

# maximum capacity reward can be calculated by calling the _global_capacity_reward()
optimal_consumption = [capacity for _ in range(sections)]
optimal_consumption[-1] = max(self.num_agents - ((sections-1) * capacity), 0)
optimal_consumption[-1] = max(self.num_agents - ((sections - 1) * capacity), 0)
max_r = _global_capacity_reward(self.resource_capacities, optimal_consumption)
self.reward_spaces = dict(zip(self.agents, [Box(low=0, high=max_r, shape=(NUM_OBJECTIVES,))] * num_agents))

Expand Down

0 comments on commit 28508d9

Please sign in to comment.