[文档]classStarCraft2_Env(RawMultiAgentEnv):""" The implementation of StarCraft2 environments, provides a standardized interface for interacting with the environments in the context of multi-agent reinforcement learning. Parameters: config: The configurations of the environment. """def__init__(self,config):super(StarCraft2_Env,self).__init__()self.env=StarCraft2Env(map_name=config.env_id)self.env_info=self.env.get_env_info()self.num_agents=self.env_info['n_agents']self.agents=[f"agent_{i}"foriinrange(self.num_agents)]self.state_space=Box(low=-np.inf,high=np.inf,shape=(self.env_info['state_shape'],))self.observation_space={k:Box(low=-np.inf,high=np.inf,shape=(self.env_info['obs_shape'],))forkinself.agents}self.action_space={k:Discrete(n=self.env_info['n_actions'])forkinself.agents}try:self.env.reset(seed=config.env_seed)except:self.env.reset()self.max_episode_steps=self.env_info['episode_limit']self._episode_step=0
[文档]defreset(self):""" Resets the environment. """obs,_=self.env.reset()obs_dict={key:obs[index]forindex,keyinenumerate(self.agents)}self._episode_step=0info={}returnobs_dict,info
[文档]defstep(self,actions):""" Takes actions as input, perform a step in the underlying StarCraft2 environment. """actions_list=[actions[key]forkeyinself.agents]reward,terminated,info=self.env.step(actions_list)ifinfo=={}:info={'battle_won':0,'dead_allies':0,'dead_enemies':0}reward_dict={k:rewardforkinself.agents}terminated_dict={k:terminatedforkinself.agents}obs=self.env.get_obs()obs_dict={key:obs[index]forindex,keyinenumerate(self.agents)}step_info=infoself._episode_step+=1truncated=Trueifself._episode_step>=self.max_episode_stepselseFalsereturnobs_dict,reward_dict,terminated_dict,truncated,step_info
[文档]defrender(self,mode):""" Renders the environment. Return: rgb_images (np.ndarray or list): The images used to visualize the environment. """returnself.env.render(mode)
[文档]defclose(self):"""Closes the environment."""self.env.close()
[文档]defstate(self):"""Returns the global state of the environment."""returnself.env.get_state()
[文档]defagent_mask(self):"""Returns boolean mask variables indicating which agents are currently alive."""return{agent:Trueforagentinself.agents}
[文档]defavail_actions(self):"""Returns a boolean mask indicating which actions are available for each agent."""actions_mask_list=self.env.get_avail_actions()return{key:actions_mask_list[index]forindex,keyinenumerate(self.agents)}