trinity.common.experience module

trinity.common.experience module#

Experience Class.

class trinity.common.experience.EID(batch: int | str = '', task: int | str = '', run: int = 0, step: int = 0, suffix: str = <factory>)[source]#

Bases: object

Experience ID class to uniquely identify an experience.

To enable the full functionality of the experience grouping, user should manually set the run and step fields in custom workflows.

batch: int | str = ''#

task: int | str = ''#

run: int = 0#

step: int = 0#

suffix: str#

property uid: str#: An unique identifier for the experience.

property sid: str#

Step ID of the experience.

For example, experiences generated by all runs of a same task at the same step will have the same sid.

property rid: str#

Run ID of the experience.

For example, experiences generated by one run of a task at all steps will have the same run_id.

property tid: str#

Task ID for the experience.

For example, experiences generated by a all run of a same task in GRPO-like algorithms will have the same tid.

to_dict() → dict[source]#: Convert the EID to a dictionary.

__init__(batch: int | str = '', task: int | str = '', run: int = 0, step: int = 0, suffix: str = <factory>) → None#

class trinity.common.experience.CustomField(source_field: str, destination_field: str, data_type: dtype)[source]#

Bases: object

Custom field for Experiences.

This is used to store additional information into the Experiences class.

source_field: str#

destination_field: str#

data_type: dtype#

__init__(source_field: str, destination_field: str, data_type: dtype) → None#

class trinity.common.experience.Experience(*, eid=None, tokens, logprobs=None, reward=None, token_level_reward=None, advantages=None, returns=None, truncate_status=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, tools=None, chosen=None, rejected=None, chosen_messages=None, rejected_messages=None, multi_modal_inputs=None, teacher_logprobs=None, routed_experts=None, custom_fields=None)[source]#

Bases: object

__init__(*, eid=None, tokens, logprobs=None, reward=None, token_level_reward=None, advantages=None, returns=None, truncate_status=None, info=None, metrics=None, prompt_length=1, response_text=None, prompt_text=None, action_mask=None, messages=None, tools=None, chosen=None, rejected=None, chosen_messages=None, rejected_messages=None, multi_modal_inputs=None, teacher_logprobs=None, routed_experts=None, custom_fields=None)[source]#

eid: EID#

reward: float | None = None#

token_level_reward: Tensor | None = None#

advantages: Tensor | None = None#

returns: Tensor | None = None#

info: dict#

metrics: dict[str, float]#

truncate_status: str | None = None#

prompt_length: int = 1#

response_text: str | None = None#

prompt_text: str | None = None#

messages: List[dict] | None = None#

tools: List[dict] | None = None#

chosen_messages: List[dict] | None = None#

rejected_messages: List[dict] | None = None#

multi_modal_inputs: Dict[str, Tensor] | None = None#

tokens: Tensor | None = None#

logprobs: Tensor | None = None#

action_mask: Tensor | None = None#

chosen: Tensor | None = None#

rejected: Tensor | None = None#

teacher_logprobs: Tensor | None = None#

routed_experts: Tensor | None = None#

custom_fields: List[CustomField]#

serialize() → bytes[source]#: Serialize the experience to bytes.

classmethod deserialize(data: bytes) → Experience[source]#

classmethod serialize_many(experiences: List[Experience]) → bytes[source]#

Serialize a list of experiences into a compact bytes payload.

Tensor fields are packed with safetensors while non-tensor fields are packed as metadata via pickle.

classmethod deserialize_many(data: bytes) → List[Experience][source]#

Deserialize bytes into a list of experiences.

Supports both new batched payloads and legacy single-experience pickle payloads.

to_dict() → dict[source]#: Convert the experience to a dictionary.

trinity.common.experience.split_dpo_experience_to_single_turn(experiences: List[Experience]) → List[Experience][source]#

trinity.common.experience.gather_token_ids(experiences, max_prompt_length: int, max_response_length: int, pad_token_id: int) → Tensor[source]#

trinity.common.experience.gather_action_masks(experiences, max_response_length: int) → Tensor[source]#

trinity.common.experience.gather_attention_masks(experiences, max_prompt_length: int, max_response_length: int) → Tensor[source]#

trinity.common.experience.gather_response_attrs(experiences, attr_name: str, max_response_length: int, pad_value: int = 0) → Tensor[source]#

trinity.common.experience.gather_multi_modal_inputs(experiences) → Dict[str, Tensor][source]#

trinity.common.experience.group_by(experiences: List[Experience], id_type: Literal['task', 'run', 'step']) → Dict[str, List[Experience]][source]#: Group experiences by ID.

trinity.common.experience.to_hf_datasets(experiences: list[Experience]) → Dataset[source]#: Convert a list of Experience objects to a HuggingFace Dataset, preserving all fields.

trinity.common.experience.from_hf_datasets(dataset: Dataset) → List[Experience][source]#: Convert a HuggingFace Dataset back to a list of Experience objects.

trinity.common.experience module

Contents

trinity.common.experience module#