Source code for prt_sim.jhu.blackjack

import random
from typing import Tuple, Optional
from prt_sim.jhu.base import BaseEnvironment


[docs]
class CardDeck:
    """For shuffling and dealing cards"""

    def __init__(self):

        self.cards = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
                      6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10,
                      10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10]
        self.deal_seq = []

    def shuffle_cards(self):
        self.deal_seq = random.sample(self.cards, 24)

    def deal_card(self):
        return self.deal_seq.pop(0)




[docs]
class Blackjack(BaseEnvironment):
    r"""
    Blackjack simulation class

    """
    def __init__(self):

        self.deck = CardDeck()
        self.agent_total = 0
        self.usable_ace = 0
        self.dealer_card = 0
        self.dealer_total = 0
        self.dealer_ace = 0
        self.current_state = 0
        self.num_states = 203
        self.num_actions = 2


[docs]
    def get_number_of_states(self) -> int:
        return self.num_states



[docs]
    def get_number_of_actions(self) -> int:
        return self.num_actions





[docs]
    def reset(self,
              seed: Optional[int] = None,
              randomize_start: Optional[bool] = False
              ) -> int:
        assert not randomize_start, "Randomizing the start is not supported"
        if seed is not None:
            random.seed(seed)

        self.deck.shuffle_cards()
        self.agent_total = 0
        self.usable_ace = 0
        self.dealer_card = 0
        self.dealer_total = 0
        self.dealer_ace = 0
        self.current_state = 0

        # deal a face up card and a second card to the dealer
        self.dealer_card = self.deck.deal_card()
        d_card_2 = self.deck.deal_card()
        self.dealer_total = self.dealer_card + d_card_2
        if self.dealer_card == 1 or d_card_2 == 1:
            self.dealer_ace = 1
            self.dealer_total += 10
        # print("Dealer has", self.dealer_card, "and", d_card_2)
        # print("Dealer has", self.dealer_total, "points.")

        # deal two cards to the agent
        card_1 = self.deck.deal_card()
        card_2 = self.deck.deal_card()
        self.agent_total = card_1 + card_2
        if card_1 == 1 or card_2 == 1:
            self.usable_ace = 1
            self.agent_total += 10
        # print("Agent has", card_1, "and", card_2)
        # print("Agent has", self.agent_total, "points.")

        # check to see if the agent has a natural (ace + face card)
        if self.agent_total == 21:
            if self.dealer_total == 21:
                self.current_state = 202    # tie game
            else:
                self.current_state = 203    # agent wins

        # otherwise, deal enough cards to the agent so that the total is >11
        else:
            while self.agent_total < 12:
                new_card = self.deck.deal_card()
                self.agent_total += new_card
                if new_card == 1 and self.usable_ace == 0 and self.agent_total < 12:
                    self.usable_ace = 1
                    self.agent_total += 10
                print("Agent drew a", new_card, "and now has", self.agent_total, "points.")
            # now determine the initial state
            self.current_state = self.get_state_index()

        # reset complete; return the initial state
        return self.current_state


    # Use the agent's action to determine the next state and reward

[docs]
    def execute_action(self,
                       action: int
                       ) -> Tuple[int, float, bool]:
        # action is 'stick'
        if action == 0:
            # dealer's turn
            while self.dealer_total < 17:
                new_card = self.deck.deal_card()
                self.dealer_total += new_card
                if new_card == 1 and self.dealer_ace == 0 and self.dealer_total < 12:
                    self.dealer_ace = 1
                    self.agent_total += 10
                if self.dealer_total > 21 and self.dealer_ace == 1:
                    self.dealer_ace = 0
                    self.agent_total -= 10
            if self.dealer_total > 21:
                # dealer busted; agent wins
                new_state = 203
                reward = 1
                game_end = True
            else:
                if self.dealer_total > self.agent_total:
                    # dealer wins
                    new_state = 201
                    reward = -1
                    game_end = True
                elif self.dealer_total < self.agent_total:
                    # agent wins
                    new_state = 203
                    reward = 1
                    game_end = True
                else:
                    # tie
                    new_state = 202
                    reward = 0
                    game_end = True

        # action is 'hit'
        elif action == 1:
            new_state = self.get_next_state()
            if new_state == 201:
                reward = -1
                game_end = True
            else:
                reward = 0
                game_end = False

        # print("new_state =", new_state, "reward = ", reward, "game_end =", game_end)
        self.current_state = new_state
        return new_state, float(reward), game_end


    def get_state(self):
        return self.current_state

    def get_state_index(self):
        a_idx = self.agent_total - 12
        d_idx = 10 * (self.dealer_card - 1)
        u_idx = 100 * self.usable_ace
        return a_idx + d_idx + u_idx

    def get_next_state(self):
        new_card = self.deck.deal_card()
        self.agent_total += new_card
        if self.agent_total > 21 and self.usable_ace == 1:
            self.usable_ace = 0
            self.agent_total -= 10
        # print("Agent drew a", new_card, "and now has", self.agent_total, "points.")
        if self.agent_total > 21:
            new_state = 201      # 201 is the losing state
        else:
            new_state = self.get_state_index()
        return new_state