Source code for emdp.utils

import numpy as np
from collections import Iterable

[docs]def convert_int_rep_to_onehot(state, vector_size:int):
    """convert the int representation of a state (or states) to onehot representation.

    Examples:

        >>> convert_int_rep_to_onehot(1,5)
        array([0, 1, 0, 0, 0])

        >>> convert_int_rep_to_onehot(np.array([1,2]),5)
        array([[0, 1, 0, 0, 0],
               [0, 0, 1, 0, 0]])

    Args:
        state: int representation of state (or states).
        vector_size (int): size of onehot representation.

    Returns:
        np.ndarray: onehot representation of state (or states).
    """
    if not isinstance(state, Iterable):
        s = np.zeros(vector_size)
        s[state] = 1
        return s
    else:
        n = len(state)
        s = np.zeros((n, vector_size))
        for i in range(n):
            s[i, state[i]] = 1
        return s

[docs]def convert_onehot_to_int(state:np.ndarray):
    """convert the onehot representation of a state (or states) to index (or indices).

    Examples:

        >>> convert_onehot_to_int(np.array([0,0,0,1,0]))
        3

        >>> convert_onehot_to_int(np.array([[0,0,0,1,0],[0,1,0,0,0]]))
        array([3, 1])

    Args:
        state (np.ndarray): onehot representation of state (or states).

    Returns:
        index  (or indices).
    """
    if not isinstance(state, np.ndarray):
        state = np.asarray(state)
    return state.argmax(axis=-1)

#
# def xy_to_flatten_state(state, size):
#     """Flatten state (x,y) into a one hot vector of size"""
#     idx = self.size * state[0] + state[1]
#     one_hot = np.zeros(self.size * self.size)
#     one_hot[idx] = 1
#     return one_hot
#
# def unflatten_state(self, onehot):
#     onehot = onehot.reshape(self.size, self.size)
#     x = onehot.argmax(0).max()
#     y = onehot.argmax(1).max()
#     return (x, y)

# def step(self, action):
#     """action must be the index of an action"""
#     # get the vector representing the next state probabilities:
#     current_state_idx = np.argmax(self.current_state)
#     next_state_probs = self.P[current_state_idx, action]
#     # sample the next state
#     sampled_next_state = np.random.choice(np.arange(self.P.shape[0]), p=next_state_probs)
#     # observe the reward
#     reward = self.r[current_state_idx, action]
#     self.current_state = self.convert_int_rep_to_onehot(sampled_next_state)
#     #         if reward > 0 :print(reward, current_state_idx, action)
#     return self.current_state, reward, sampled_next_state == self.P.shape[0] - 1, {}