Source code for trlx.data.configs

from dataclasses import dataclass
from typing import Any, Dict, Tuple

import yaml

from trlx.data.method_configs import MethodConfig, get_method


[docs]@dataclass
class ModelConfig:
    """
    Config for a model.

    :param model_path: Path to the model (local or on huggingface hub)
    :type model_path: str

    :param tokenizer_path: Path to the tokenizer (local or on huggingface hub)
    :type tokenizer_path: str

    :param model_type: One of the registered RL models present in trlx.model
    :type model_type: str
    """

    model_path: str
    tokenizer_path: str
    model_type: str  # One of the architectures present in framework.model
    num_layers_unfrozen: int = -1

    @classmethod
    def from_dict(cls, config: Dict[str, Any]):
        return cls(**config)


[docs]@dataclass
class TrainConfig:
    """
    Config for train job on model.

    :param total_steps: Total number of training steps
    :type total_steps: int

    :param seq_length: Number of tokens to use as context (max length for tokenizer)
    :type seq_length: int

    :param epochs: Total number of passes through data
    :type epochs: int

    :param batch_size: Batch size for training
    :type batch_size: int

    :param lr_ramp_steps: Number of steps before learning rate reaches learning_rate_init
    :type lr_ramp_steps: int

    :param lr_decay_steps: Number of after ramp up steps before learning rate decays to learning_rate_target
    :type lr_decay_steps: int

    :param weight_decay: Weight decay for optimizer
    :type weight_decay: float

    :param learning_rate_init: Initial learning rate after ramp up
    :type learning_rate_init: float

    :param learning_rate_target: Target learning rate after decay
    :type learning_rate_target: float

    :param checkpoint_interval: Save model every checkpoint_interval steps
    :type checkpoint_interval: int

    :param eval_interval: Evaluate model every eval_interval steps
    :type eval_interval: int

    :param pipeline: Pipeline to use for training. One of the registered pipelines present in trlx.pipeline
    :type pipeline: str

    :param orchestrator: Orchestrator to use for training. One of the registered orchestrators present in trlx.orchestrator
    :type orchestrator: str

    :param project_name: Project name for wandb
    :type project_name: str
    """

    total_steps: int
    seq_length: int
    epochs: int
    batch_size: int

    lr_ramp_steps: int
    lr_decay_steps: int
    weight_decay: float
    learning_rate_init: float
    learning_rate_target: float
    opt_betas: Tuple[float]

    checkpoint_interval: int
    eval_interval: int

    pipeline: str  # One of the pipelines in framework.pipeline
    orchestrator: str  # One of the orchestrators

    checkpoint_dir: str = "ckpts"
    project_name: str = "trlx"
    seed: int = 1000

    @classmethod
    def from_dict(cls, config: Dict[str, Any]):
        return cls(**config)


[docs]@dataclass
class TRLConfig:
    """
    Top level config for trlX. Loads configs and can be converted to dictionary.
    """

    model: ModelConfig
    train: TrainConfig
    method: MethodConfig

[docs]    @classmethod
    def load_yaml(cls, yml_fp: str):
        """
        Load yaml file as TRLConfig.

        :param yml_fp: Path to yaml file
        :type yml_fp: str
        """
        with open(yml_fp, mode="r") as file:
            config = yaml.safe_load(file)
        return cls(
            ModelConfig.from_dict(config["model"]),
            TrainConfig.from_dict(config["train"]),
            get_method(config["method"]["name"]).from_dict(config["method"]),
        )

[docs]    def to_dict(self):
        """
        Convert TRLConfig to dictionary.
        """
        data = self.model.__dict__.copy()
        data.update(self.train.__dict__)
        data.update(self.method.__dict__)
        return data