trlX
latest
Contents:
Installation
API
Examples
Configs
Trainers
Pipelines
Data Classes
trlX
Index
Edit on GitHub
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
I
|
L
|
M
|
O
|
P
|
S
|
T
A
AccelerateILQLTrainer (class in trlx.trainer.accelerate_ilql_trainer)
AcceleratePPOTrainer (class in trlx.trainer.accelerate_ppo_trainer)
AccelerateRLTrainer (class in trlx.trainer.accelerate_base_trainer)
AccelerateSFTTrainer (class in trlx.trainer.accelerate_sft_trainer)
add_eval_pipeline() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
add_prompt_pipeline() (trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
B
BasePipeline (class in trlx.pipeline)
BaseRLTrainer (class in trlx.trainer)
BaseRolloutStore (class in trlx.pipeline)
C
CausalILQLOutput (class in trlx.models.modeling_ilql)
create_loader() (trlx.pipeline.BasePipeline method)
(trlx.pipeline.BaseRolloutStore method)
(trlx.pipeline.offline_pipeline.DialogStore method)
(trlx.pipeline.offline_pipeline.ILQLRolloutStorage method)
(trlx.pipeline.offline_pipeline.ILQLSeq2SeqRolloutStorage method)
(trlx.pipeline.offline_pipeline.PromptPipeline method)
(trlx.pipeline.ppo_pipeline.PPORolloutStorage method)
create_train_dataloader() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
(trlx.trainer.accelerate_sft_trainer.AccelerateSFTTrainer method)
D
decode() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
DialogMessage (class in trlx.pipeline.offline_pipeline)
DialogStore (class in trlx.pipeline.offline_pipeline)
E
evaluate() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
evolve() (trlx.data.configs.TRLConfig method)
F
from_dict() (trlx.data.configs.TRLConfig class method)
G
generate() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
generate_eval() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
get_advantages_and_returns() (trlx.models.modeling_ppo.PPOConfig method)
get_arch() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
(trlx.trainer.accelerate_sft_trainer.AccelerateSFTTrainer method)
I
ILQLBatch (class in trlx.data.ilql_types)
ILQLConfig (class in trlx.models.modeling_ilql)
ILQLElement (class in trlx.data.ilql_types)
ILQLRolloutStorage (class in trlx.pipeline.offline_pipeline)
ILQLSeq2SeqBatch (class in trlx.data.ilql_types)
ILQLSeq2SeqElement (class in trlx.data.ilql_types)
ILQLSeq2SeqRolloutStorage (class in trlx.pipeline.offline_pipeline)
L
learn() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.BaseRLTrainer method)
load() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
load_yaml() (trlx.data.configs.TRLConfig class method)
loss() (trlx.models.modeling_ppo.PPOConfig method)
(trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
(trlx.trainer.accelerate_sft_trainer.AccelerateSFTTrainer method)
M
make_experience() (trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
make_experience_seq2seq() (trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
MethodConfig (class in trlx.data.method_configs)
ModelConfig (class in trlx.data.configs)
O
OptimizerConfig (class in trlx.data.configs)
P
post_backward_callback() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
post_epoch_callback() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
PPOConfig (class in trlx.models.modeling_ppo)
PPORLBatch (class in trlx.data.ppo_types)
PPORLElement (class in trlx.data.ppo_types)
PPORolloutStorage (class in trlx.pipeline.ppo_pipeline)
prepare_learning() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ilql_trainer.AccelerateILQLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
(trlx.trainer.accelerate_sft_trainer.AccelerateSFTTrainer method)
PromptPipeline (class in trlx.pipeline.offline_pipeline)
push() (trlx.pipeline.BaseRolloutStore method)
(trlx.pipeline.ppo_pipeline.PPORolloutStorage method)
push_to_store() (trlx.trainer.BaseRLTrainer method)
S
save() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
save_pretrained() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
(trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
SchedulerConfig (class in trlx.data.configs)
Seq2SeqILQLOutput (class in trlx.models.modeling_ilql)
setup_model() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
setup_optimizer() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
setup_rollout_logging() (trlx.trainer.accelerate_ppo_trainer.AcceleratePPOTrainer method)
setup_scheduler() (trlx.trainer.accelerate_base_trainer.AccelerateRLTrainer method)
T
to_dict() (trlx.data.configs.TRLConfig method)
tokenize_dialogue() (in module trlx.pipeline.offline_pipeline)
TokenizerConfig (class in trlx.data.configs)
train() (in module trlx)
TrainConfig (class in trlx.data.configs)
TRLConfig (class in trlx.data.configs)
Read the Docs
v: latest
Versions
latest
stable
main
docs
Downloads
On Read the Docs
Project Home
Builds