Source code for prt_sim.jhu.blackjack

import random
from typing import Tuple, Optional
from prt_sim.jhu.base import BaseEnvironment

[docs] class CardDeck: """For shuffling and dealing cards""" def __init__(self): self.cards = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10] self.deal_seq = [] def shuffle_cards(self): self.deal_seq = random.sample(self.cards, 24) def deal_card(self): return self.deal_seq.pop(0)
[docs] class Blackjack(BaseEnvironment): r""" Blackjack simulation class """ def __init__(self): self.deck = CardDeck() self.agent_total = 0 self.usable_ace = 0 self.dealer_card = 0 self.dealer_total = 0 self.dealer_ace = 0 self.current_state = 0 self.num_states = 203 self.num_actions = 2
[docs] def get_number_of_states(self) -> int: return self.num_states
[docs] def get_number_of_actions(self) -> int: return self.num_actions
[docs] def reset(self, seed: Optional[int] = None, randomize_start: Optional[bool] = False ) -> int: assert not randomize_start, "Randomizing the start is not supported" if seed is not None: random.seed(seed) self.deck.shuffle_cards() self.agent_total = 0 self.usable_ace = 0 self.dealer_card = 0 self.dealer_total = 0 self.dealer_ace = 0 self.current_state = 0 # deal a face up card and a second card to the dealer self.dealer_card = self.deck.deal_card() d_card_2 = self.deck.deal_card() self.dealer_total = self.dealer_card + d_card_2 if self.dealer_card == 1 or d_card_2 == 1: self.dealer_ace = 1 self.dealer_total += 10 # print("Dealer has", self.dealer_card, "and", d_card_2) # print("Dealer has", self.dealer_total, "points.") # deal two cards to the agent card_1 = self.deck.deal_card() card_2 = self.deck.deal_card() self.agent_total = card_1 + card_2 if card_1 == 1 or card_2 == 1: self.usable_ace = 1 self.agent_total += 10 # print("Agent has", card_1, "and", card_2) # print("Agent has", self.agent_total, "points.") # check to see if the agent has a natural (ace + face card) if self.agent_total == 21: if self.dealer_total == 21: self.current_state = 202 # tie game else: self.current_state = 203 # agent wins # otherwise, deal enough cards to the agent so that the total is >11 else: while self.agent_total < 12: new_card = self.deck.deal_card() self.agent_total += new_card if new_card == 1 and self.usable_ace == 0 and self.agent_total < 12: self.usable_ace = 1 self.agent_total += 10 print("Agent drew a", new_card, "and now has", self.agent_total, "points.") # now determine the initial state self.current_state = self.get_state_index() # reset complete; return the initial state return self.current_state
# Use the agent's action to determine the next state and reward
[docs] def execute_action(self, action: int ) -> Tuple[int, float, bool]: # action is 'stick' if action == 0: # dealer's turn while self.dealer_total < 17: new_card = self.deck.deal_card() self.dealer_total += new_card if new_card == 1 and self.dealer_ace == 0 and self.dealer_total < 12: self.dealer_ace = 1 self.agent_total += 10 if self.dealer_total > 21 and self.dealer_ace == 1: self.dealer_ace = 0 self.agent_total -= 10 if self.dealer_total > 21: # dealer busted; agent wins new_state = 203 reward = 1 game_end = True else: if self.dealer_total > self.agent_total: # dealer wins new_state = 201 reward = -1 game_end = True elif self.dealer_total < self.agent_total: # agent wins new_state = 203 reward = 1 game_end = True else: # tie new_state = 202 reward = 0 game_end = True # action is 'hit' elif action == 1: new_state = self.get_next_state() if new_state == 201: reward = -1 game_end = True else: reward = 0 game_end = False # print("new_state =", new_state, "reward = ", reward, "game_end =", game_end) self.current_state = new_state return new_state, float(reward), game_end
def get_state(self): return self.current_state def get_state_index(self): a_idx = self.agent_total - 12 d_idx = 10 * (self.dealer_card - 1) u_idx = 100 * self.usable_ace return a_idx + d_idx + u_idx def get_next_state(self): new_card = self.deck.deal_card() self.agent_total += new_card if self.agent_total > 21 and self.usable_ace == 1: self.usable_ace = 0 self.agent_total -= 10 # print("Agent drew a", new_card, "and now has", self.agent_total, "points.") if self.agent_total > 21: new_state = 201 # 201 is the losing state else: new_state = self.get_state_index() return new_state