trlX
stable
Contents:
Data Elements
RL Models
Orchestrators
Configs
Pipelines
Examples
trlX
»
Index
Edit on GitHub
Index
A
|
B
|
C
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
O
|
P
|
S
|
T
|
V
A
AccelerateILQLModel (class in trlx.model.accelerate_ilql_model)
AcceleratePPOModel (class in trlx.model.accelerate_ppo_model)
AccelerateRLBatchElement (class in trlx.data.accelerate_base_datatypes)
AccelerateRLElement (class in trlx.data.accelerate_base_datatypes)
AccelerateRLModel (class in trlx.model.accelerate_base_model)
act() (trlx.model.BaseRLModel method)
add_eval_pipeline() (trlx.model.accelerate_base_model.AccelerateRLModel method)
alpha (trlx.data.method_configs.ILQLConfig attribute)
awac_scale (trlx.data.method_configs.ILQLConfig attribute)
B
BasePipeline (class in trlx.pipeline)
BaseRLModel (class in trlx.model)
BaseRolloutStore (class in trlx.pipeline)
betas (trlx.data.method_configs.ILQLConfig attribute)
C
CausalLMWithValueHeads (class in trlx.model.nn.ilql_models)
chunk_size (trlx.data.method_configs.PPOConfig attribute)
cliprange (trlx.data.method_configs.PPOConfig attribute)
cliprange_value (trlx.data.method_configs.PPOConfig attribute)
cql_scale (trlx.data.method_configs.ILQLConfig attribute)
create_loader() (trlx.pipeline.BasePipeline method)
(trlx.pipeline.BaseRolloutStore method)
(trlx.pipeline.offline_pipeline.ILQLRolloutStorage method)
(trlx.pipeline.offline_pipeline.PromptPipeline method)
(trlx.pipeline.ppo_pipeline.PPORolloutStorage method)
E
evaluate() (trlx.model.accelerate_base_model.AccelerateRLModel method)
F
forward() (trlx.model.nn.ilql_models.CausalLMWithValueHeads method)
G
gamma (trlx.data.method_configs.ILQLConfig attribute)
(trlx.data.method_configs.PPOConfig attribute)
gen_kwargs (trlx.data.method_configs.PPOConfig attribute)
generate() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.nn.ilql_models.CausalLMWithValueHeads method)
get_arch() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.accelerate_ilql_model.AccelerateILQLModel method)
(trlx.model.accelerate_ppo_model.AcceleratePPOModel method)
get_components() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.BaseRLModel method)
H
horizon (trlx.data.method_configs.PPOConfig attribute)
I
ILQLBatch (class in trlx.data.ilql_types)
ILQLConfig (class in trlx.data.method_configs)
ILQLElement (class in trlx.data.ilql_types)
ILQLRolloutStorage (class in trlx.pipeline.offline_pipeline)
init_kl_coef (trlx.data.method_configs.PPOConfig attribute)
intervals() (trlx.model.BaseRLModel method)
L
lam (trlx.data.method_configs.PPOConfig attribute)
learn() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.BaseRLModel method)
load() (trlx.model.BaseRLModel method)
load_yaml() (trlx.data.configs.TRLConfig class method)
loss() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.accelerate_ilql_model.AccelerateILQLModel method)
(trlx.model.accelerate_ppo_model.AcceleratePPOModel method)
M
make_experience() (trlx.orchestrator.offline_orchestrator.OfflineOrchestrator method)
(trlx.orchestrator.Orchestrator method)
(trlx.orchestrator.ppo_orchestrator.PPOOrchestrator method)
MethodConfig (class in trlx.data.method_configs)
ModelConfig (class in trlx.data.configs)
N
num_rollouts (trlx.data.method_configs.PPOConfig attribute)
O
OfflineOrchestrator (class in trlx.orchestrator.offline_orchestrator)
Orchestrator (class in trlx.orchestrator)
P
post_backward_callback() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.accelerate_ilql_model.AccelerateILQLModel method)
(trlx.model.accelerate_ppo_model.AcceleratePPOModel method)
post_epoch_callback() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.accelerate_ppo_model.AcceleratePPOModel method)
ppo_epochs (trlx.data.method_configs.PPOConfig attribute)
PPOConfig (class in trlx.data.method_configs)
PPOOrchestrator (class in trlx.orchestrator.ppo_orchestrator)
PPORLBatch (class in trlx.data.ppo_types)
PPORLElement (class in trlx.data.ppo_types)
PPORolloutStorage (class in trlx.pipeline.ppo_pipeline)
PromptBatch (class in trlx.data.accelerate_base_datatypes)
PromptElement (class in trlx.data.accelerate_base_datatypes)
PromptPipeline (class in trlx.pipeline.offline_pipeline)
push() (trlx.pipeline.BaseRolloutStore method)
(trlx.pipeline.ppo_pipeline.PPORolloutStorage method)
S
sample() (trlx.model.BaseRLModel method)
save() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.BaseRLModel method)
score() (trlx.orchestrator.ppo_orchestrator.PPOOrchestrator method)
steps_for_target_q_sync (trlx.data.method_configs.ILQLConfig attribute)
T
target (trlx.data.method_configs.PPOConfig attribute)
tau (trlx.data.method_configs.ILQLConfig attribute)
to_dict() (trlx.data.configs.TRLConfig method)
tokenize() (trlx.model.accelerate_base_model.AccelerateRLModel method)
(trlx.model.accelerate_ilql_model.AccelerateILQLModel method)
TrainConfig (class in trlx.data.configs)
TRLConfig (class in trlx.data.configs)
two_qs (trlx.data.method_configs.ILQLConfig attribute)
V
vf_coef (trlx.data.method_configs.PPOConfig attribute)
Read the Docs
v: stable
Versions
latest
stable
Downloads
On Read the Docs
Project Home
Builds