xuance.environment.multi_agent_env.mpe 源代码

import importlib
import numpy as np
from xuance.environment import RawMultiAgentEnv

TEAM_NAME_DICT = {
    "mpe.simple_adversary_v3": ['adversary', 'agent'],
    "mpe.simple_crypto_v3": ['eve', 'alice', 'bob'],
    "mpe.simple_push_v3": ['adversary', 'agent'],
    "mpe.simple_reference_v3": ['agent'],
    "mpe.simple_speaker_listener_v4": ['speaker', 'listener'],
    "mpe.simple_spread_v3": ['agent'],
    "mpe.simple_tag_v3": ['adversary', 'agent'],
    "mpe.simple_v3": ['agent'],
    "mpe.simple_world_comm_v3": ['adversary', 'agent'],
}


[文档]class MPE_Env(RawMultiAgentEnv):
    """
    The implementation of MPE environments, provides a standardized interface for interacting
    with the environments in the context of multi-agent reinforcement learning.

    Parameters:
        config: The configurations of the environment.
    """

    def __init__(self, config):
        super(MPE_Env, self).__init__()
        # Prepare raw environment
        env_name, env_id = config.env_name, config.env_id
        self.render_mode = config.render_mode
        self.continuous_actions = config.continuous_action
        self.scenario_name = env_name + "." + env_id
        scenario = importlib.import_module(f'pettingzoo.{env_name}.{env_id}')  # create scenario
        self.env = scenario.parallel_env(continuous_actions=self.continuous_actions, render_mode=self.render_mode)
        self.env.reset(config.env_seed)

        # Set basic attributes
        self.metadata = self.env.metadata
        self.agents = self.env.agents
        self.state_space = self.env.state_space
        self.observation_space = {agent: self.env.observation_space(agent) for agent in self.agents}
        self.action_space = {agent: self.env.action_space(agent) for agent in self.agents}
        self.num_agents = self.env.num_agents
        if "simple_push" in env_id:
            self.agent_groups = [['agent_0'], ['adversary_0']]
        elif "simple_adversary" in env_id:
            self.agent_groups = [['adversary_0'], ['agent_0', 'agent_1']]
        self.max_episode_steps = self.env.unwrapped.max_cycles
        self.individual_episode_reward = {k: 0.0 for k in self.agents}
        self._episode_step = 0

[文档]    def close(self):
        """Close the environment."""
        self.env.close()

[文档]    def render(self, *args):
        """Get the rendered images of the environment."""
        return self.env.render()

[文档]    def reset(self):
        """Reset the environment to its initial state."""
        observations, infos = self.env.reset()
        for agent_key in self.agents:
            self.individual_episode_reward[agent_key] = 0.0
        reset_info = {"infos": infos,
                      "individual_episode_rewards": self.individual_episode_reward}
        self._episode_step = 0
        return observations, reset_info

[文档]    def step(self, actions):
        """Take an action as input, perform a step in the underlying pettingzoo environment."""
        if self.continuous_actions:
            for k, v in actions.items():
                actions[k] = np.clip(v, self.action_space[k].low, self.action_space[k].high)
        observations, rewards, terminated, truncated, info = self.env.step(actions)
        for k, v in rewards.items():
            self.individual_episode_reward[k] += v
        step_info = {"infos": info,
                     "individual_episode_rewards": self.individual_episode_reward}
        self._episode_step += 1
        truncated = True if self._episode_step >= self.max_episode_steps else False
        return observations, rewards, terminated, truncated, step_info

[文档]    def state(self):
        """Returns the global state of the environment."""
        return self.env.state()

[文档]    def agent_mask(self):
        """
        Create a boolean mask indicating which agents are currently alive.
        Note: For MPE environment, all agents are alive before the episode is terminated.
        """
        return {agent: True for agent in self.agents}

[文档]    def avail_actions(self):
        """Returns a boolean mask indicating which actions are available for each agent."""
        if self.continuous_actions:
            return None
        else:
            return {agent: np.ones(self.action_space[agent].n, np.bool_) for agent in self.agents}