trinity.trainer.verl package#
Submodules#
- trinity.trainer.verl.dp_actor module
- trinity.trainer.verl.fsdp_checkpoint_manager module
- trinity.trainer.verl.fsdp_workers module
- trinity.trainer.verl.megatron_actor module
- trinity.trainer.verl.megatron_checkpoint_manager module
- trinity.trainer.verl.megatron_workers module
- trinity.trainer.verl.monkey_patch module
- trinity.trainer.verl.utils module
- trinity.trainer.verl.verl_config module
DataFusedKernelOptionsActorModelActorModel.pathActorModel.external_libActorModel.override_configActorModel.enable_gradient_checkpointingActorModel.use_remove_paddingActorModel.use_fused_kernelsActorModel.fused_kernel_optionsActorModel.custom_chat_templateActorModel.enable_activation_offloadActorModel.use_shmActorModel.trust_remote_codeActorModel.lora_rankActorModel.lora_alphaActorModel.target_modulesActorModel.exclude_modulesActorModel.lora_adapter_pathActorModel.rope_scalingActorModel.rope_thetaActorModel.__init__()
OptimOptim.optimizerOptim.optimizer_implOptim.lrOptim.lr_warmup_stepsOptim.lr_warmup_steps_ratioOptim.min_lr_ratioOptim.warmup_styleOptim.lr_scheduler_typeOptim.total_training_stepsOptim.betasOptim.clip_gradOptim.lr_warmup_initOptim.lr_decay_stepsOptim.lr_decay_styleOptim.min_lrOptim.weight_decayOptim.weight_decay_incr_styleOptim.lr_wsd_decay_styleOptim.lr_wsd_decay_stepsOptim.use_checkpoint_opt_param_schedulerOptim.override_optimizer_configOptim.__init__()
WrapPolicyFSDPConfigCheckpointOverrideTransformerConfigMegatronConfigMegatronConfig.param_offloadMegatronConfig.grad_offloadMegatronConfig.optimizer_offloadMegatronConfig.tensor_model_parallel_sizeMegatronConfig.expert_model_parallel_sizeMegatronConfig.expert_tensor_parallel_sizeMegatronConfig.pipeline_model_parallel_sizeMegatronConfig.virtual_pipeline_model_parallel_sizeMegatronConfig.context_parallel_sizeMegatronConfig.sequence_parallelMegatronConfig.use_distributed_optimizerMegatronConfig.use_dist_checkpointingMegatronConfig.dist_checkpointing_pathMegatronConfig.seedMegatronConfig.override_ddp_configMegatronConfig.override_transformer_configMegatronConfig.use_mbridgeMegatronConfig.dtypeMegatronConfig.use_remove_paddingMegatronConfig.__init__()
ProfileConfigActorActor.strategyActor.ppo_mini_batch_sizeActor.ppo_micro_batch_sizeActor.ppo_micro_batch_size_per_gpuActor.use_dynamic_bszActor.ppo_max_token_len_per_gpuActor.fix_actor_microbatch_loss_scaleActor.grad_clipActor.ppo_epochsActor.shuffleActor.ulysses_sequence_parallel_sizeActor.entropy_from_logits_with_chunkingActor.entropy_checkpointingActor.checkpointActor.optimActor.fsdp_configActor.megatronActor.profileActor.data_loader_seedActor.load_weightActor.policy_lossActor.profilerActor.router_replayActor.loss_agg_modeActor.loss_scale_factorActor.clip_ratioActor.clip_ratio_lowActor.clip_ratio_highActor.entropy_coeffActor.use_kl_lossActor.__init__()
RefRef.strategyRef.fsdp_configRef.log_prob_micro_batch_sizeRef.log_prob_micro_batch_size_per_gpuRef.log_prob_use_dynamic_bszRef.log_prob_max_token_len_per_gpuRef.ulysses_sequence_parallel_sizeRef.entropy_from_logits_with_chunkingRef.entropy_checkpointingRef.checkpointRef.megatronRef.profileRef.load_weightRef.profilerRef.router_replayRef.__init__()
RolloutActorRolloutRefCriticModelCriticCritic.enableCritic.strategyCritic.optimCritic.modelCritic.ppo_mini_batch_sizeCritic.ppo_micro_batch_sizeCritic.ppo_micro_batch_size_per_gpuCritic.forward_micro_batch_sizeCritic.forward_micro_batch_size_per_gpuCritic.use_dynamic_bszCritic.ppo_max_token_len_per_gpuCritic.forward_max_token_len_per_gpuCritic.ulysses_sequence_parallel_sizeCritic.ppo_epochsCritic.shuffleCritic.grad_clipCritic.cliprange_valueCritic.checkpointCritic.rollout_nCritic.loss_agg_modeCritic.megatronCritic.profileCritic.data_loader_seedCritic.load_weightCritic.nccl_timeoutCritic.ray_namespaceCritic.profilerCritic.__init__()
RewardModelRewardModel.enableRewardModel.strategyRewardModel.modelRewardModel.micro_batch_size_per_gpuRewardModel.max_lengthRewardModel.ulysses_sequence_parallel_sizeRewardModel.use_dynamic_bszRewardModel.forward_max_token_len_per_gpuRewardModel.reward_managerRewardModel.use_reward_loopRewardModel.__init__()
CustomRewardFunctionKL_CtrlRolloutCorrectionRolloutCorrection.rollout_isRolloutCorrection.rollout_is_thresholdRolloutCorrection.rollout_rsRolloutCorrection.rollout_rs_thresholdRolloutCorrection.rollout_rs_threshold_lowerRolloutCorrection.rollout_token_veto_thresholdRolloutCorrection.bypass_modeRolloutCorrection.loss_typeRolloutCorrection.rollout_is_batch_normalizeRolloutCorrection.__init__()
AlgorithmTrainerTrainer.balance_batchTrainer.total_epochsTrainer.total_training_stepsTrainer.project_nameTrainer.group_nameTrainer.experiment_nameTrainer.loggerTrainer.val_generations_to_log_to_wandbTrainer.nnodesTrainer.n_gpus_per_nodeTrainer.save_freqTrainer.resume_modeTrainer.resume_from_pathTrainer.test_freqTrainer.critic_warmupTrainer.default_hdfs_dirTrainer.remove_previous_ckpt_in_saveTrainer.del_local_ckpt_after_loadTrainer.default_local_dirTrainer.val_before_trainTrainer.training_rollout_modeTrainer.enable_exp_bufferTrainer.sync_freqTrainer.max_actor_ckpt_to_keepTrainer.max_critic_ckpt_to_keepTrainer.deviceTrainer.__init__()
veRLConfigload_config()
- trinity.trainer.verl.verl_trainer module
CheckpointMonitorCheckpointMonitor.__init__()CheckpointMonitor.update_latest_checkpoint_step()CheckpointMonitor.update_latest_state_dict_step()CheckpointMonitor.register_thread_count()CheckpointMonitor.monitor_step()CheckpointMonitor.notify_started()CheckpointMonitor.notify_finished()CheckpointMonitor.get_actor()
VerlPPOTrainerWrapperVerlPPOTrainerWrapper.__init__()VerlPPOTrainerWrapper.init_workers()VerlPPOTrainerWrapper.train_step_numVerlPPOTrainerWrapper.prepare()VerlPPOTrainerWrapper.save_state_dict()VerlPPOTrainerWrapper.upload_state_dict()VerlPPOTrainerWrapper.train_step()VerlPPOTrainerWrapper.save_checkpoint()VerlPPOTrainerWrapper.sync_weight()