`viberl.utils.mock_env`

Mock environment for testing RL algorithms.

Provides a gymnasium-compatible environment that returns random valid values for all methods, useful for testing agents without complex environment setup.

Classes:

Name	Description
`MockEnv`	A mock environment that returns random valid values for testing.

MockEnv

MockEnv(state_size: int = 4, action_size: int = 2, max_episode_steps: int = 100)

Bases: Env

A mock environment that returns random valid values for testing.

This environment provides: - Random observations within observation space - Random rewards within reward range - Random terminal states - Random info dictionaries

Parameters:

Name	Type	Description	Default
`state_size`	`int`	Size of the observation space	`4`
`action_size`	`int`	Number of discrete actions	`2`
`max_episode_steps`	`int`	Maximum steps before truncation	`100`

Methods:

Name	Description
`reset`	Reset the environment with random initial state.
`step`	Take a step with random outcomes.
`render`	Mock render - does nothing.
`close`	Mock close - does nothing.
`seed`	Set random seed for reproducibility.

Attributes:

Name	Type	Description
`state_size`
`action_size`
`max_episode_steps`
`observation_space`
`action_space`
`current_step`

Source code in viberl/utils/mock_env.py

def __init__(
    self,
    state_size: int = 4,
    action_size: int = 2,
    max_episode_steps: int = 100,
) -> None:
    super().__init__()

    self.state_size = state_size
    self.action_size = action_size
    self.max_episode_steps = max_episode_steps

    # Define spaces
    self.observation_space = spaces.Box(
        low=-1.0, high=1.0, shape=(state_size,), dtype=np.float32
    )
    self.action_space = spaces.Discrete(action_size)

    # Internal state
    self.current_step = 0
    self._np_random = np.random.RandomState()

state_size `instance-attribute`

state_size = state_size

action_size `instance-attribute`

action_size = action_size

max_episode_steps `instance-attribute`

max_episode_steps = max_episode_steps

observation_space `instance-attribute`

observation_space = Box(low=-1.0, high=1.0, shape=(state_size,), dtype=float32)

action_space `instance-attribute`

action_space = Discrete(action_size)

current_step `instance-attribute`

current_step = 0

reset

reset(seed: int | None = None, options: dict | None = None) -> tuple[ndarray, dict]

Reset the environment with random initial state.

Source code in viberl/utils/mock_env.py

def reset(
    self,
    seed: int | None = None,
    options: dict | None = None,
) -> tuple[np.ndarray, dict]:
    """Reset the environment with random initial state."""
    super().reset(seed=seed)

    if seed is not None:
        self._np_random = np.random.RandomState(seed)
        # Set numpy's global random state for gymnasium's sample() method
        np.random.seed(seed)

    self.current_step = 0

    # Generate random observation using our seeded random state
    obs = self._np_random.uniform(
        low=self.observation_space.low,
        high=self.observation_space.high,
        size=self.observation_space.shape,
    ).astype(np.float32)

    # Return with random info
    info = {'episode': 0, 'step': 0, 'random_metric': self._np_random.random()}

    return obs, info

step

step(action: int) -> tuple[ndarray, float, bool, bool, dict]

Take a step with random outcomes.

Parameters:

Name	Type	Description	Default
`action`	`int`	The action to take	required

Returns:

Type	Description
`tuple[ndarray, float, bool, bool, dict]`	observation, reward, terminated, truncated, info

Source code in viberl/utils/mock_env.py

def step(self, action: int) -> tuple[np.ndarray, float, bool, bool, dict]:
    """
    Take a step with random outcomes.

    Args:
        action: The action to take

    Returns:
        observation, reward, terminated, truncated, info
    """
    assert self.action_space.contains(action), f'Invalid action: {action}'

    # Check if we've reached max steps (truncation happens after max_episode_steps steps)
    if self.current_step >= self.max_episode_steps:
        obs = np.zeros(self.observation_space.shape, dtype=np.float32)
        reward = 0.0
        terminated = True
        truncated = True
        info = {'step': self.current_step, 'truncated': True}
        return obs, reward, terminated, truncated, info

    self.current_step += 1

    # Generate random observation using seeded random state
    obs = self._np_random.uniform(
        low=self.observation_space.low,
        high=self.observation_space.high,
        size=self.observation_space.shape,
    ).astype(np.float32)

    # Generate random reward (-1 to 1)
    reward = float(self._np_random.uniform(-1.0, 1.0))

    # Random termination (5% chance per step)
    terminated = bool(self._np_random.random() < 0.05)

    # Truncation happens when we reach max_episode_steps
    truncated = self.current_step >= self.max_episode_steps

    # Random info
    info = {
        'step': self.current_step,
        'action_taken': action,
        'random_info': self._np_random.random(),
        'episode_complete': terminated or truncated,
    }

    return obs, reward, terminated, truncated, info

render

render() -> None

Mock render - does nothing.

Source code in viberl/utils/mock_env.py

def render(self) -> None:
    """Mock render - does nothing."""

close

close() -> None

Mock close - does nothing.

Source code in viberl/utils/mock_env.py

def close(self) -> None:
    """Mock close - does nothing."""

seed

seed(seed: int | None = None) -> None

Set random seed for reproducibility.

Source code in viberl/utils/mock_env.py

def seed(self, seed: int | None = None) -> None:
    """Set random seed for reproducibility."""
    self._np_random = np.random.RandomState(seed)

viberl.utils.mock_env

MockEnv

state_size instance-attribute

action_size instance-attribute

max_episode_steps instance-attribute

observation_space instance-attribute

action_space instance-attribute

current_step instance-attribute

reset

step

render

close

seed

`viberl.utils.mock_env`

state_size `instance-attribute`

action_size `instance-attribute`

max_episode_steps `instance-attribute`

observation_space `instance-attribute`

action_space `instance-attribute`

current_step `instance-attribute`