Source code for prt_sim.jhu.gold_explorer

import numpy as np
import random
import os
from typing import Tuple, Optional
from prt_sim.jhu.base import BaseEnvironment
from prt_sim.common.grid_rendering import GridworldRender



[docs]
def get_state_index(x, y, z):
    x_idx = x
    y_idx = 8 * y
    z_idx = 64 * z
    return x_idx + y_idx + z_idx   # ranges from 0 to 127




[docs]
class GoldExplorer(BaseEnvironment):
    """
    The Gold Explorer puzzle

    .. image:: /_static/gold-explorer.png
       :alt: Gold Explorer puzzle
       :width: 100%
       :align: center

    **Action space**: integer representing a discrete action described in the table below

    +-----+--------+
    | Num | Action |
    +=====+========+
    | 0   | North  |
    +-----+--------+
    | 1   | East   |
    +-----+--------+
    | 2   | South  |
    +-----+--------+
    | 3   | West   |
    +-----+--------+

    **Observation space**: integer between 0 and 127 representing the state as an octal number, <gold bit><row><column>

    .. image:: /_static/gold-explorer-state.png
        :alt: Gold Explorer State
        :width: 100%
        :align: center

    **Reward**: +15 for obtaining gold coins, +30 for obtaining the motherlode, -30 for entering a mine field, -1 for every other location
    """
    metadata = {
        "render_modes": ["human", "rgb_array"],
        "render_fps": 5
    }

    def __init__(self,
                 render_mode: Optional[str] = "rgb_array"
                 ) -> None:
        self.render_mode = render_mode
        self.num_states = 128
        self.num_actions = 4
        self.expl_x = 0  # explorer's x position from 0 to 7
        self.expl_y = 0  # explorer's y position from 0 to 7
        self.expl_z = 0  # explorer's z position from 0 to 1
        self.win = {15, 62, 79, 126}
        self.loss = {13, 17, 28, 32, 51, 77, 81, 92, 96, 115}
        self.coins = {50}
        self.mount = {1, 9, 49, 65, 73, 113}

        self.gridworld_render = GridworldRender(
            grid_width=8,
            grid_height=8,
            window_size=(800, 800),
            render_mode=self.render_mode,
            render_fps=self.metadata['render_fps'],
            agent_icons={
                'explorer': os.path.join(os.path.dirname(__file__), 'icons/explorer.png'),
                'mountain1': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'),
                'mountain2': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'),
                'mountain3': os.path.join(os.path.dirname(__file__), 'icons/mountain.png'),
                'mine1': os.path.join(os.path.dirname(__file__), 'icons/mine.png'),
                'mine2': os.path.join(os.path.dirname(__file__), 'icons/mine.png'),
                'mine3': os.path.join(os.path.dirname(__file__), 'icons/mine.png'),
                'mine4': os.path.join(os.path.dirname(__file__), 'icons/mine.png'),
                'mine5': os.path.join(os.path.dirname(__file__), 'icons/mine.png'),
                'coins': os.path.join(os.path.dirname(__file__), 'icons/coins.png'),
                'gold1': os.path.join(os.path.dirname(__file__), 'icons/gold.png'),
                'gold2': os.path.join(os.path.dirname(__file__), 'icons/gold.png'),
            },
            window_title='Gold Explorer',
            background_color=(198, 236, 254)
        )
        self.agent_positions = {
            'mountain1': np.array([1, 0]),
            'mountain2': np.array([1, 1]),
            'mountain3': np.array([1, 6]),
            'mine1': np.array([1, 2]),
            'mine2': np.array([0, 4]),
            'mine3': np.array([5, 1]),
            'mine4': np.array([4, 3]),
            'mine5': np.array([3, 6]),
            'coins': np.array([2, 6]),
            'gold1': np.array([7, 1]),
            'gold2': np.array([6, 7]),
            'explorer': np.array([0, 0]),
        }

    # Get the key environment parameters

[docs]
    def get_number_of_states(self) -> int:
        """
        Returns the number of states in the puzzle

        Returns:
            int: total number of states in the puzzle
        """
        return self.num_states



[docs]
    def get_number_of_actions(self) -> int:
        """
        Returns the number of discrete actions in the puzzle

        Returns:
            int: total number of actions in the puzzle
        """
        return self.num_actions


    # Get the state IDs that should not be set optimistically
    def get_terminal_states(self):
        term = self.win.union(self.loss, self.mount)
        return term

    def get_state(self):
        return get_state_index(self.expl_x, self.expl_y, self.expl_z)

    # Set the current state to the initial state

[docs]
    def reset(self,
              seed: Optional[int] = None,
              randomize_start: Optional[bool] = False
              ) -> int:
        x = 0
        y = 0
        z = 0
        if randomize_start:
            done = False
            while not done:
                x = random.randint(0, 7)
                y = random.randint(0, 7)
                z = random.randint(0, 1)
                st = get_state_index(x, y, z)
                if (st in self.win) or (st in self.loss) or (st in self.mount) or (st in self.coins):
                    done = False
                else:
                    done = True
        self.expl_x = x
        self.expl_y = y
        self.expl_z = z
        st = get_state_index(self.expl_x, self.expl_y, self.expl_z)
        return st



[docs]
    def execute_action(self,
                       action: int
                       ) -> Tuple[int, float, bool]:
        """
        Executes an action for the explorer.

        Args:
            action (int): the action to execute

        Returns:

        """
        # Use the agent's action to determine the next state and reward #
        # Note: 'N' = 0, 'E' = 1, 'S' = 2, 'W' = 3 #

        current_state = get_state_index(self.expl_x, self.expl_y, self.expl_z)
        new_state = current_state
        reward = 0
        game_end = False

        # if in terminal states, stay in terminal states
        if (current_state in self.win) or (current_state in self.loss):
            new_state = current_state
            reward = 0
            game_end = True

        elif (current_state in self.mount) or (current_state in self.coins):
            new_state = current_state
            reward = -1000
            game_end = True

        else:
            temp_x = self.expl_x
            temp_y = self.expl_y
            temp_z = self.expl_z

            # determine a potential next state
            if action == 0:  # action is 'N'
                if temp_y == 0:
                    temp_y = 0
                else:
                    temp_y = temp_y - 1

            elif action == 1:  # action is 'E'
                if temp_x == 7:
                    temp_x = 7
                else:
                    temp_x = temp_x + 1

            elif action == 2:  # action is 'S'
                if temp_y == 7:
                    temp_y = 7
                else:
                    temp_y = temp_y + 1

            else:  # action is 'W'
                if temp_x == 0:
                    temp_x = 0
                else:
                    temp_x = temp_x - 1

            # recalculate the new state
            new_state = get_state_index(temp_x, temp_y, temp_z)

            # check to see if coins can be picked up
            if new_state in self.coins:
                temp_z = 1  # shift to second level grid space
                new_state = get_state_index(temp_x, temp_y, temp_z)
                reward = 15
                game_end = False

            elif new_state in self.mount:
                temp_x = self.expl_x
                temp_y = self.expl_y
                temp_z = self.expl_z
                new_state = get_state_index(temp_x, temp_y, temp_z)
                reward = -1
                game_end = False

            elif new_state in self.loss:      # you lose
                reward = -30
                game_end = True

            elif new_state in self.win:     # you won
                reward = 30
                game_end = True

            else:
                reward = -1
                game_end = False

            self.expl_x = temp_x
            self.expl_y = temp_y
            self.expl_z = temp_z

        return new_state, reward, game_end



[docs]
    def render(self):
        self.agent_positions['explorer'] = np.array([self.expl_x, self.expl_y])
        self.agent_positions['coins'] = None if self.expl_z == 1 else self.agent_positions['coins']
        
        if self.render_mode == 'human':
            self.gridworld_render.render(self.agent_positions)
        elif self.render_mode == 'rgb_array':
            return self.gridworld_render.render(self.agent_positions)




if __name__ == '__main__':
    env = GoldExplorer()
    env.reset(randomize_start=False)
    env.render()

    for _ in range(20):
        _, _, done = env.execute_action(np.random.randint(env.num_actions))
        env.render()

        if done:
            break