Source code for emdp.examples.tricky_gridworlds

"""
Environments with Tricky Rewards
Two kinds of worlds are available:

1. Symmetric Grid World
    (0, size-1): +true_reward
    (size-1, 0): epsilon * true_reward
2. Multi-minima Grid World
    (2,0): best_reward
    (1,1): best_reward*2/3
    (0,2): best_reward*1/3
"""
import numpy as np
from emdp.gridworld.builder_tools import build_simple_grid_world_with_terminal_states

GRID_SIZE = 10

[docs]def make_symmetric_epsilon_reward_env(epsilon, best_reward=+5, size=GRID_SIZE, p_success=1, gamma=0.99, seed=2017): """ Symmetric Grid World where the rewards are at [(x,y): reward]: (0, size-1): +true_reward (size-1, 0): epsilon * true_reward :param epsilon: the proportion of the true reward to place :param best_reward: the true reward :param reward_spec: dict with the reward specification {(x,y):reward, ...} :param size: size of the grid world :param p_success: the probability an action is successful :param gamma: the discount factor for the MDP :param seed: the seed for the MDP :return: """ reward_spec = { (0, size-1): best_reward, (size-1, 0): epsilon * best_reward } world = build_simple_grid_world_with_terminal_states(reward_spec, size=size, gamma=gamma, p_success=p_success, seed=seed) return world, reward_spec
[docs]def make_multi_minima_reward_env(best_reward=5, size=GRID_SIZE, p_success=1, gamma=0.99, shuffle_rewards=False, seed=2017): """ Multiple minima grid world where there are rewards on the diagonal with increasing value. For example, in a 3x3 grid world we have: (2,0): best_reward (1,1): best_reward*2/3 (0,2): best_reward*1/3 if shuffle_rewards is True, we jumble the rewards along the diagonal. :param epsilon: the proportion of the true reward to place :param best_reward: the true reward :param reward_spec: dict with the reward specification {(x,y):reward, ...} :param size: size of the grid world :param p_success: the probability an action is successful :param gamma: the discount factor for the MDP :param shuffle_rewards: shuffle the rewards on the diagonal. :param seed: the seed for the MDP """ all_rewards = [best_reward * (i + 1) / size for i in range(size)] if shuffle_rewards: randomizer = np.random.RandomState(seed) # reproducibility measure randomizer.shuffle(all_rewards) reward_spec = {(i,size-i-1): all_rewards[i] for i in range(size)} world = build_simple_grid_world_with_terminal_states(reward_spec, size=size, gamma=gamma, p_success=p_success, seed=seed) return world, reward_spec
[docs]def make_four_minima_env(epsilon, best_reward=5, size=GRID_SIZE, p_success=1, gamma=0.99, seed=2017): """ Makes a gridworld where there are four rewards: {(0, size/2): (2 - epsilon)*best_reward, (size/2, 0): epsilon * best_reward, (size/2, size-1): epsilon * best_reward, (size-1, size/2): best_reward } and the agent starts in the middle of the grid at (size/2, size/2). Note that size must have an odd shape. :param best_reward: :param size: :param p_success: :param gamma: :param seed: :return: """ if size % 2 == 0: raise ValueError('grid must have an odd size.') middle = size//2 end = size-1 reward_spec = { (0, middle): (2-epsilon) * best_reward, (middle, 0): epsilon * best_reward, (middle, end): epsilon * best_reward, (end, middle): best_reward } start_state = middle * size + middle world = build_simple_grid_world_with_terminal_states(reward_spec, size=size, gamma=gamma, p_success=p_success, seed=seed, start_state=start_state) return world, reward_spec