Source code for prt_sim.jhu.gold_explorer

import numpy as np
import random
import os
from typing import Tuple, Optional
from prt_sim.jhu.base import BaseEnvironment
from prt_sim.common.grid_rendering import GridworldRender


[docs] def get_state_index(x, y, z): x_idx = x y_idx = 8 * y z_idx = 64 * z return x_idx + y_idx + z_idx # ranges from 0 to 127
[docs] class GoldExplorer(BaseEnvironment): """ The Gold Explorer puzzle .. image:: /_static/gold-explorer.png :alt: Gold Explorer puzzle :width: 100% :align: center **Action space**: integer representing a discrete action described in the table below +-----+--------+ | Num | Action | +=====+========+ | 0 | North | +-----+--------+ | 1 | East | +-----+--------+ | 2 | South | +-----+--------+ | 3 | West | +-----+--------+ **Observation space**: integer between 0 and 127 representing the state as an octal number, <gold bit><row><column> .. image:: /_static/gold-explorer-state.png :alt: Gold Explorer State :width: 100% :align: center **Reward**: +15 for obtaining gold coins, +30 for obtaining the motherlode, -30 for entering a mine field, -1 for every other location """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 5 } def __init__(self, render_mode: Optional[str] = "rgb_array" ) -> None: self.render_mode = render_mode self.num_states = 128 self.num_actions = 4 self.expl_x = 0 # explorer's x position from 0 to 7 self.expl_y = 0 # explorer's y position from 0 to 7 self.expl_z = 0 # explorer's z position from 0 to 1 self.win = {15, 62, 79, 126} self.loss = {13, 17, 28, 32, 51, 77, 81, 92, 96, 115} self.coins = {50} self.mount = {1, 9, 49, 65, 73, 113} self.gridworld_render = GridworldRender( grid_width=8, grid_height=8, window_size=(800, 800), render_mode=self.render_mode, render_fps=self.metadata['render_fps'], agent_icons={ 'explorer': os.path.join(os.path.dirname(__file__), 'icons/explorer.png'), 'mountain1': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'), 'mountain2': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'), 'mountain3': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'), 'mine1': os.path.join(os.path.dirname(__file__), 'icons/mine.png'), 'mine2': os.path.join(os.path.dirname(__file__), 'icons/mine.png'), 'mine3': os.path.join(os.path.dirname(__file__), 'icons/mine.png'), 'mine4': os.path.join(os.path.dirname(__file__), 'icons/mine.png'), 'mine5': os.path.join(os.path.dirname(__file__), 'icons/mine.png'), 'coins': os.path.join(os.path.dirname(__file__), 'icons/coins.png'), 'gold1': os.path.join(os.path.dirname(__file__), 'icons/gold.png'), 'gold2': os.path.join(os.path.dirname(__file__), 'icons/gold.png'), }, window_title='Gold Explorer', background_color=(198, 236, 254) ) self.agent_positions = { 'mountain1': np.array([1, 0]), 'mountain2': np.array([1, 1]), 'mountain3': np.array([1, 6]), 'mine1': np.array([1, 2]), 'mine2': np.array([0, 4]), 'mine3': np.array([5, 1]), 'mine4': np.array([4, 3]), 'mine5': np.array([3, 6]), 'coins': np.array([2, 6]), 'gold1': np.array([7, 1]), 'gold2': np.array([6, 7]), 'explorer': np.array([0, 0]), } # Get the key environment parameters
[docs] def get_number_of_states(self) -> int: """ Returns the number of states in the puzzle Returns: int: total number of states in the puzzle """ return self.num_states
[docs] def get_number_of_actions(self) -> int: """ Returns the number of discrete actions in the puzzle Returns: int: total number of actions in the puzzle """ return self.num_actions
# Get the state IDs that should not be set optimistically def get_terminal_states(self): term = self.win.union(self.loss, self.mount) return term def get_state(self): return get_state_index(self.expl_x, self.expl_y, self.expl_z) # Set the current state to the initial state
[docs] def reset(self, seed: Optional[int] = None, randomize_start: Optional[bool] = False ) -> int: x = 0 y = 0 z = 0 if randomize_start: done = False while not done: x = random.randint(0, 7) y = random.randint(0, 7) z = random.randint(0, 1) st = get_state_index(x, y, z) if (st in self.win) or (st in self.loss) or (st in self.mount) or (st in self.coins): done = False else: done = True self.expl_x = x self.expl_y = y self.expl_z = z st = get_state_index(self.expl_x, self.expl_y, self.expl_z) return st
[docs] def execute_action(self, action: int ) -> Tuple[int, float, bool]: """ Executes an action for the explorer. Args: action (int): the action to execute Returns: """ # Use the agent's action to determine the next state and reward # # Note: 'N' = 0, 'E' = 1, 'S' = 2, 'W' = 3 # current_state = get_state_index(self.expl_x, self.expl_y, self.expl_z) new_state = current_state reward = 0 game_end = False # if in terminal states, stay in terminal states if (current_state in self.win) or (current_state in self.loss): new_state = current_state reward = 0 game_end = True elif (current_state in self.mount) or (current_state in self.coins): new_state = current_state reward = -1000 game_end = True else: temp_x = self.expl_x temp_y = self.expl_y temp_z = self.expl_z # determine a potential next state if action == 0: # action is 'N' if temp_y == 0: temp_y = 0 else: temp_y = temp_y - 1 elif action == 1: # action is 'E' if temp_x == 7: temp_x = 7 else: temp_x = temp_x + 1 elif action == 2: # action is 'S' if temp_y == 7: temp_y = 7 else: temp_y = temp_y + 1 else: # action is 'W' if temp_x == 0: temp_x = 0 else: temp_x = temp_x - 1 # recalculate the new state new_state = get_state_index(temp_x, temp_y, temp_z) # check to see if coins can be picked up if new_state in self.coins: temp_z = 1 # shift to second level grid space new_state = get_state_index(temp_x, temp_y, temp_z) reward = 15 game_end = False elif new_state in self.mount: temp_x = self.expl_x temp_y = self.expl_y temp_z = self.expl_z new_state = get_state_index(temp_x, temp_y, temp_z) reward = -1 game_end = False elif new_state in self.loss: # you lose reward = -30 game_end = True elif new_state in self.win: # you won reward = 30 game_end = True else: reward = -1 game_end = False self.expl_x = temp_x self.expl_y = temp_y self.expl_z = temp_z return new_state, reward, game_end
[docs] def render(self): self.agent_positions['explorer'] = np.array([self.expl_x, self.expl_y]) self.agent_positions['coins'] = None if self.expl_z == 1 else self.agent_positions['coins'] if self.render_mode == 'human': self.gridworld_render.render(self.agent_positions) elif self.render_mode == 'rgb_array': return self.gridworld_render.render(self.agent_positions)
if __name__ == '__main__': env = GoldExplorer() env.reset(randomize_start=False) env.render() for _ in range(20): _, _, done = env.execute_action(np.random.randint(env.num_actions)) env.render() if done: break