Skip to content

viberl.typing

Custom typing classes for reinforcement learning using Pydantic.

Classes:

Name Description
Action

An action taken by an agent, optionally with log probabilities.

Transition

A single transition in an episode.

Trajectory

A complete trajectory (episode) consisting of multiple transitions.

Action

Action(**data: Any)

Bases: BaseModel

An action taken by an agent, optionally with log probabilities.

Attributes:

Name Type Description
model_config
action int
logprobs Tensor | None
Source code in .venv/lib/python3.12/site-packages/pydantic/main.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

action instance-attribute

action: int

logprobs class-attribute instance-attribute

logprobs: Tensor | None = None

Transition

Transition(**data: Any)

Bases: BaseModel

A single transition in an episode.

Attributes:

Name Type Description
model_config
state ndarray
action Action
reward float
next_state ndarray
done bool
info dict[str, Any]
Source code in .venv/lib/python3.12/site-packages/pydantic/main.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

state instance-attribute

state: ndarray

action instance-attribute

action: Action

reward instance-attribute

reward: float

next_state instance-attribute

next_state: ndarray

done instance-attribute

done: bool

info class-attribute instance-attribute

info: dict[str, Any] = {}

Trajectory

Trajectory(**data: Any)

Bases: BaseModel

A complete trajectory (episode) consisting of multiple transitions.

Methods:

Name Description
from_transitions

Create a trajectory from a list of transitions.

to_dict

Convert trajectory to dictionary format for agent learning.

Attributes:

Name Type Description
model_config
transitions list[Transition]
total_reward float
length int
Source code in .venv/lib/python3.12/site-packages/pydantic/main.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

transitions instance-attribute

transitions: list[Transition]

total_reward instance-attribute

total_reward: float

length instance-attribute

length: int

from_transitions classmethod

from_transitions(transitions: list[Transition]) -> Trajectory

Create a trajectory from a list of transitions.

Source code in viberl/typing.py
43
44
45
46
47
@classmethod
def from_transitions(cls, transitions: list[Transition]) -> Trajectory:
    """Create a trajectory from a list of transitions."""
    total_reward = sum(t.reward for t in transitions)
    return cls(transitions=transitions, total_reward=total_reward, length=len(transitions))

to_dict

to_dict() -> dict

Convert trajectory to dictionary format for agent learning.

Source code in viberl/typing.py
49
50
51
52
53
54
55
56
57
58
59
60
61
def to_dict(self) -> dict:
    """Convert trajectory to dictionary format for agent learning."""
    return {
        'states': [t.state for t in self.transitions],
        'actions': [t.action.action for t in self.transitions],
        'rewards': [t.reward for t in self.transitions],
        'next_states': [t.next_state for t in self.transitions],
        'dones': [t.done for t in self.transitions],
        'logprobs': [
            t.action.logprobs for t in self.transitions if t.action.logprobs is not None
        ],
        'infos': [t.info for t in self.transitions],
    }