xuance.environment.multi_agent_env.mpe 源代码

import importlib
import numpy as np
from xuance.environment import RawMultiAgentEnv

TEAM_NAME_DICT = {
    "mpe.simple_adversary_v3": ['adversary', 'agent'],
    "mpe.simple_crypto_v3": ['eve', 'alice', 'bob'],
    "mpe.simple_push_v3": ['adversary', 'agent'],
    "mpe.simple_reference_v3": ['agent'],
    "mpe.simple_speaker_listener_v4": ['speaker', 'listener'],
    "mpe.simple_spread_v3": ['agent'],
    "mpe.simple_tag_v3": ['adversary', 'agent'],
    "mpe.simple_v3": ['agent'],
    "mpe.simple_world_comm_v3": ['adversary', 'agent'],
}


[文档]class MPE_Env(RawMultiAgentEnv): """ The implementation of MPE environments, provides a standardized interface for interacting with the environments in the context of multi-agent reinforcement learning. Parameters: config: The configurations of the environment. """ def __init__(self, config): super(MPE_Env, self).__init__() # Prepare raw environment env_name, env_id = config.env_name, config.env_id self.render_mode = config.render_mode self.continuous_actions = config.continuous_action self.scenario_name = env_name + "." + env_id scenario = importlib.import_module(f'pettingzoo.{env_name}.{env_id}') # create scenario self.env = scenario.parallel_env(continuous_actions=self.continuous_actions, render_mode=self.render_mode) self.env.reset(config.env_seed) # Set basic attributes self.metadata = self.env.metadata self.agents = self.env.agents self.state_space = self.env.state_space self.observation_space = {agent: self.env.observation_space(agent) for agent in self.agents} self.action_space = {agent: self.env.action_space(agent) for agent in self.agents} self.num_agents = self.env.num_agents if "simple_push" in env_id: self.agent_groups = [['agent_0'], ['adversary_0']] elif "simple_adversary" in env_id: self.agent_groups = [['adversary_0'], ['agent_0', 'agent_1']] self.max_episode_steps = self.env.unwrapped.max_cycles self.individual_episode_reward = {k: 0.0 for k in self.agents} self._episode_step = 0
[文档] def close(self): """Close the environment.""" self.env.close()
[文档] def render(self, *args): """Get the rendered images of the environment.""" return self.env.render()
[文档] def reset(self): """Reset the environment to its initial state.""" observations, infos = self.env.reset() for agent_key in self.agents: self.individual_episode_reward[agent_key] = 0.0 reset_info = {"infos": infos, "individual_episode_rewards": self.individual_episode_reward} self._episode_step = 0 return observations, reset_info
[文档] def step(self, actions): """Take an action as input, perform a step in the underlying pettingzoo environment.""" if self.continuous_actions: for k, v in actions.items(): actions[k] = np.clip(v, self.action_space[k].low, self.action_space[k].high) observations, rewards, terminated, truncated, info = self.env.step(actions) for k, v in rewards.items(): self.individual_episode_reward[k] += v step_info = {"infos": info, "individual_episode_rewards": self.individual_episode_reward} self._episode_step += 1 truncated = True if self._episode_step >= self.max_episode_steps else False return observations, rewards, terminated, truncated, step_info
[文档] def state(self): """Returns the global state of the environment.""" return self.env.state()
[文档] def agent_mask(self): """ Create a boolean mask indicating which agents are currently alive. Note: For MPE environment, all agents are alive before the episode is terminated. """ return {agent: True for agent in self.agents}
[文档] def avail_actions(self): """Returns a boolean mask indicating which actions are available for each agent.""" if self.continuous_actions: return None else: return {agent: np.ones(self.action_space[agent].n, np.bool_) for agent in self.agents}