Create, in Colab, soft prompts compatible with KoboldAI and mkultra for your favourite GPT-J-6B-based or GPT-Neo-2.7B-based model!
See this paper https://arxiv.org/pdf/2104.08691.pdf for more information about what a soft prompt is.
If you're not a programmer or you want a demo of how to use the API, click here to open the demo notebook.
To install mtj-softtuner in a TPU Colab notebook, run these commands:
git clone https://github.com/ve-forbryderne/mtj-softtuner
bash mtj-softtuner/install.sh
Here's an extremely basic example of how to use the API:
from mtj_softtuner import BasicTrainer
# Change this to an integer (e.g. 1) if you want trainer.data to persist after
# the Colab runtime is restarted
universe = None
# Changing this to True causes traceback of certain error messages to be hidden
quiet = False
trainer = BasicTrainer(universe, quiet=quiet)
# Path to a Mesh Transformer JAX model, or the model ID of a Hugging Face model
# such as "KoboldAI/fairseq-dense-13B"
trainer.data.ckpt_path = "/content/step_383500"
trainer.get_hf_checkpoint_metadata()
# These two lines below are only required if you're loading from a Mesh
# Transformer JAX model, see the demo notebook for the full list of permitted
# model types
model_type = "GPT-J-6B"
trainer.set_params(model_type)
# Location of the save file (if the file does not exist it will be created), you
# can specify the path to an existing save file created by mtj-softtuner to
# continue from an earlier point in the training
trainer.data.save_file = "/content/my_softprompt.mtjsp"
# Set the initial soft prompt string, this will be ignored if we are continuing
# from an existing save file
initial_softprompt = (
"Le Jeu du Prochain Train itself is simplicity in motion. The object: "
"Be the last of your round's six to jump from one side of the tracks to "
"the other - that is, across the tracks - before the train passes.\n\n"
)
trainer.data.prompt_method = "tokens"
tokenizer = trainer.get_tokenizer()
if trainer.data.newlinemode == "s": # Handle fairseq-style newlines if required
initial_softprompt = initial_softprompt.replace("\n", "</s>")
trainer.data.initial_softprompt = tokenizer.encode(
initial_softprompt, max_length=int(2e9), truncation=True
)
# Alternatively, you can uncomment the below two lines to instead use a randomly
# initialized prompt with 20 tokens in it:
#trainer.data.prompt_method = "kaiming"
#trainer.data.soft_in_dim = 20
# Or these two lines to use a prompt with 20 randomly sampled (without
# replacement) tokens from your model's vocabulary:
#trainer.data.prompt_method = "vocab_sample"
#trainer.data.soft_in_dim = 20
# Do this to generate an NPY file for your dataset if you haven't already done so
dataset_path = "/content/dataset.txt" # Can be a single file or a folder
output_file = "/content/dataset.npy"
batch_size = 2048
epochs = 1
trainer.tokenize_dataset(dataset_path, output_file, batch_size, epochs)
dataset_file = output_file
trainer.data.dataset_file = dataset_file
trainer.data.gradient_accumulation_steps = 16
# Set training hyperparameters here; see the demo notebook for explanation of
# what these mean
trainer.data.stparams = {
"lr": 3e-5,
"max_grad_norm": 10.0,
"weight_decay": 0.1,
"warmup": 0.1,
"end_lr_multiplier": 0.1,
"save_every": 50,
}
# Now, begin training!
trainer.train()
# Export to KoboldAI/mkultra format
output_file = "/content/my_softprompt.zip"
name = "Untitled"
author = ""
supported = "Generic 6B"
description = "Baby shoes"
trainer.export_to_kobold(output_file, name, author, supported, description)
output_file = "/content/my_softprompt.json"
soft_prompt_name = "Untitled"
soft_prompt_description = "Baby shoes"
trainer.export_to_mkultra(output_file, soft_prompt_name, soft_prompt_description)