Skip to content

viberl.utils.mock_env

Mock environment for testing RL algorithms.

Provides a gymnasium-compatible environment that returns random valid values for all methods, useful for testing agents without complex environment setup.

Classes:

Name Description
MockEnv

A mock environment that returns random valid values for testing.

MockEnv

MockEnv(state_size: int = 4, action_size: int = 2, max_episode_steps: int = 100)

Bases: Env

A mock environment that returns random valid values for testing.

This environment provides: - Random observations within observation space - Random rewards within reward range - Random terminal states - Random info dictionaries

Parameters:

Name Type Description Default
state_size int

Size of the observation space

4
action_size int

Number of discrete actions

2
max_episode_steps int

Maximum steps before truncation

100

Methods:

Name Description
reset

Reset the environment with random initial state.

step

Take a step with random outcomes.

render

Mock render - does nothing.

close

Mock close - does nothing.

seed

Set random seed for reproducibility.

Attributes:

Name Type Description
state_size
action_size
max_episode_steps
observation_space
action_space
current_step
Source code in viberl/utils/mock_env.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self,
    state_size: int = 4,
    action_size: int = 2,
    max_episode_steps: int = 100,
) -> None:
    super().__init__()

    self.state_size = state_size
    self.action_size = action_size
    self.max_episode_steps = max_episode_steps

    # Define spaces
    self.observation_space = spaces.Box(
        low=-1.0, high=1.0, shape=(state_size,), dtype=np.float32
    )
    self.action_space = spaces.Discrete(action_size)

    # Internal state
    self.current_step = 0
    self._np_random = np.random.RandomState()

state_size instance-attribute

state_size = state_size

action_size instance-attribute

action_size = action_size

max_episode_steps instance-attribute

max_episode_steps = max_episode_steps

observation_space instance-attribute

observation_space = Box(low=-1.0, high=1.0, shape=(state_size,), dtype=float32)

action_space instance-attribute

action_space = Discrete(action_size)

current_step instance-attribute

current_step = 0

reset

reset(seed: int | None = None, options: dict | None = None) -> tuple[ndarray, dict]

Reset the environment with random initial state.

Source code in viberl/utils/mock_env.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def reset(
    self,
    seed: int | None = None,
    options: dict | None = None,
) -> tuple[np.ndarray, dict]:
    """Reset the environment with random initial state."""
    super().reset(seed=seed)

    if seed is not None:
        self._np_random = np.random.RandomState(seed)
        # Set numpy's global random state for gymnasium's sample() method
        np.random.seed(seed)

    self.current_step = 0

    # Generate random observation using our seeded random state
    obs = self._np_random.uniform(
        low=self.observation_space.low,
        high=self.observation_space.high,
        size=self.observation_space.shape,
    ).astype(np.float32)

    # Return with random info
    info = {'episode': 0, 'step': 0, 'random_metric': self._np_random.random()}

    return obs, info

step

step(action: int) -> tuple[ndarray, float, bool, bool, dict]

Take a step with random outcomes.

Parameters:

Name Type Description Default
action int

The action to take

required

Returns:

Type Description
tuple[ndarray, float, bool, bool, dict]

observation, reward, terminated, truncated, info

Source code in viberl/utils/mock_env.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, dict]:
    """
    Take a step with random outcomes.

    Args:
        action: The action to take

    Returns:
        observation, reward, terminated, truncated, info
    """
    assert self.action_space.contains(action), f'Invalid action: {action}'

    # Check if we've reached max steps (truncation happens after max_episode_steps steps)
    if self.current_step >= self.max_episode_steps:
        obs = np.zeros(self.observation_space.shape, dtype=np.float32)
        reward = 0.0
        terminated = True
        truncated = True
        info = {'step': self.current_step, 'truncated': True}
        return obs, reward, terminated, truncated, info

    self.current_step += 1

    # Generate random observation using seeded random state
    obs = self._np_random.uniform(
        low=self.observation_space.low,
        high=self.observation_space.high,
        size=self.observation_space.shape,
    ).astype(np.float32)

    # Generate random reward (-1 to 1)
    reward = float(self._np_random.uniform(-1.0, 1.0))

    # Random termination (5% chance per step)
    terminated = bool(self._np_random.random() < 0.05)

    # Truncation happens when we reach max_episode_steps
    truncated = self.current_step >= self.max_episode_steps

    # Random info
    info = {
        'step': self.current_step,
        'action_taken': action,
        'random_info': self._np_random.random(),
        'episode_complete': terminated or truncated,
    }

    return obs, reward, terminated, truncated, info

render

render() -> None

Mock render - does nothing.

Source code in viberl/utils/mock_env.py
128
129
def render(self) -> None:
    """Mock render - does nothing."""

close

close() -> None

Mock close - does nothing.

Source code in viberl/utils/mock_env.py
131
132
def close(self) -> None:
    """Mock close - does nothing."""

seed

seed(seed: int | None = None) -> None

Set random seed for reproducibility.

Source code in viberl/utils/mock_env.py
134
135
136
def seed(self, seed: int | None = None) -> None:
    """Set random seed for reproducibility."""
    self._np_random = np.random.RandomState(seed)