Source code for novelai_api.BiasGroup

from typing import Any, Dict, Iterable, List, Union

from novelai_api.Preset import Model
from novelai_api.utils import tokenize_if_not


[docs]class BiasGroup:
    _sequences: List[Union[List[int], str]]

    bias: float
    ensure_sequence_finish: bool
    generate_once: bool
    enabled: bool

[docs]    def __init__(
        self,
        bias: float,
        ensure_sequence_finish: bool = False,
        generate_once: bool = False,
        enabled: bool = True,
    ):
        """
        Create a bias group

        :param bias: Bias value of the bias group. Negative is a downbias, positive is an upbias
        :param ensure_sequence_finish: Ensures the bias completes
        :param generate_once: Only biases for the first occurrence
        :param enabled: Is the bias group enabled
        """

        self._sequences = []

        self.bias = bias
        self.ensure_sequence_finish = ensure_sequence_finish
        self.generate_once = generate_once
        self.enabled = enabled

[docs]    @classmethod
    def from_data(cls, data: Dict[str, Any]) -> "BiasGroup":
        """
        Create a bias group from bias group data
        """

        # FIXME: wtf is "whenInactive" in bias ?
        ensure_sequence_finish = (
            data["ensureSequenceFinish"]
            if "ensureSequenceFinish" in data
            else data["ensure_sequence_finish"]
            if "ensure_sequence_finish" in data
            else False
        )

        generate_once = (
            data["generateOnce"]
            if "generateOnce" in data
            else data["generate_once"]
            if "generate_once" in data
            else False
        )

        b = cls(data["bias"], ensure_sequence_finish, generate_once, data["enabled"])

        if "phrases" in data:
            b.add(*data["phrases"])

        return b

[docs]    def add(
        self,
        *sequences: Union[Dict[str, List[List[int]]], Dict[str, List[int]], List[int], str],
    ) -> "BiasGroup":
        """
        Add elements to the bias group. Elements can be string or tokenized strings
        Using tokenized strings is not recommended, for flexibility between tokenizers
        """

        for i, sequence in enumerate(sequences):
            if isinstance(sequence, dict):
                if "sequence" in sequence:
                    sequence = sequence["sequence"]
                elif "sequences" in sequence:
                    sequence = sequence["sequences"][0]

            if not isinstance(sequence, str):
                if not isinstance(sequence, list):
                    raise ValueError(
                        f"Expected type 'List[int]' for sequence #{i} of 'sequences', " f"but got '{type(sequence)}'"
                    )

                for j, s in enumerate(sequence):
                    if not isinstance(s, int):
                        raise ValueError(
                            f"Expected type 'int' for item #{j} of sequence #{i} of 'sequences', "
                            f"but got '{type(s)}': {sequence}"
                        )

            self._sequences.append(sequence)

        return self

    def __iadd__(
        self, sequences: Union[Dict[str, List[List[int]]], Dict[str, List[int]], List[int], str]
    ) -> "BiasGroup":
        """
        Add elements to the bias group. Elements can be string or tokenized strings
        Using tokenized strings is not recommended, for flexibility between tokenizers
        """

        self.add(sequences)

        return self

    def __iter__(self):
        """
        Return an iterator on the stored sequences
        """

        return (
            {
                "bias": self.bias,
                "ensure_sequence_finish": self.ensure_sequence_finish,
                "generate_once": self.generate_once,
                "enabled": self.enabled,
                "sequence": s,
            }
            for s in self._sequences
        )

[docs]    def get_tokenized_entries(self, model: Model) -> Iterable[Dict[str, any]]:
        """
        Return the tokenized sequences for the bias group, if it is enabled

        :param model: Model to use for tokenization
        """

        return (
            {
                "bias": self.bias,
                "ensure_sequence_finish": self.ensure_sequence_finish,
                "generate_once": self.generate_once,
                "sequence": tokenize_if_not(model, s),
            }
            for s in self._sequences
            if self.enabled
        )

    def __str__(self) -> str:
        return (
            "{ "
            f"bias: {self.bias}, "
            f"ensure_sequence_finish: {self.ensure_sequence_finish}, "
            f"generate_once: {self.generate_once}, "
            f"enabled: {self.enabled}, "
            f"sequences: {self._sequences}"
            "}"
        )