from dotenv import load_dotenv
load_dotenv()

True

from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

import os

os.environ["WANDB_PROJECT"] = "finetuning"

import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
wandb: Currently logged in as: mrchaos88. Use `wandb login --relogin` to force relogin

True

import wandb
if wandb.run is not None:
    wandb.finish()

from datasets import load_dataset

dataset_name = "squad_v2"
dataset = load_dataset(dataset_name, split="train")
eval_dataset = load_dataset(dataset_name, split="validation")

Downloading readme:   0%|          | 0.00/8.18k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/16.4M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.35M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/130319 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/11873 [00:00<?, ? examples/s]

load_dataset(dataset_name)

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 130319
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 11873
    })
})

dataset.features

{'id': Value(dtype='string', id=None),
 'title': Value(dtype='string', id=None),
 'context': Value(dtype='string', id=None),
 'question': Value(dtype='string', id=None),
 'answers': Sequence(feature={'text': Value(dtype='string', id=None), 'answer_start': Value(dtype='int32', id=None)}, length=-1, id=None)}

model_id = "openlm-research/open_llama_3b_v2"

import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

device_map = "auto"

base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code= True,
    )

# base_model.bnb_config.use_cache = False
base_model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 3200, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=3200, out_features=3200, bias=False)
          (k_proj): Linear4bit(in_features=3200, out_features=3200, bias=False)
          (v_proj): Linear4bit(in_features=3200, out_features=3200, bias=False)
          (o_proj): Linear4bit(in_features=3200, out_features=3200, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=3200, out_features=8640, bias=False)
          (up_proj): Linear4bit(in_features=3200, out_features=8640, bias=False)
          (down_proj): Linear4bit(in_features=8640, out_features=3200, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head): Linear(in_features=3200, out_features=32000, bias=False)
)

base_model.config

LlamaConfig {
  "_name_or_path": "openlm-research/open_llama_3b_v2",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 3200,
  "initializer_range": 0.02,
  "intermediate_size": 8640,
  "max_position_embeddings": 2048,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 26,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "quantization_config": {
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.36.2",
  "use_cache": false,
  "vocab_size": 32000
}

base_model.config.use_cache = False

from  transformers import AutoTokenizer, TrainingArguments, EarlyStoppingCallback
from peft import LoraConfig
from trl import SFTTrainer

# more info : https://github.com/huggingface/transformers/pull/24906
base_model.config.pretraining_tp = 1

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

tokenizer_config.json:   0%|          | 0.00/593 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/512k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/330 [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565

output_dir = "data/finetuning"
new_model_name = f"openllama-3b-peft-{dataset_name}"
new_model_name

'openllama-3b-peft-squad_v2'

training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_steps=10,
    max_steps=2, #2000, # training can still improve after many steps!
    num_train_epochs=100,
    evaluation_strategy="steps",
    eval_steps=5, # update steps betweenn two evaluations
    save_total_limit=5, # only last 5 models are saved.
    push_to_hub=False, # you can set this to True if you want to upload your model to huggingface
    load_best_model_at_end=True, # to use in combination with early stopping
    report_to="wandb",
)

PyTorch: setting up devices

max_seq_length = 512

trainer = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    dataset_text_field="question", # this depends on dataset!
    tokenizer=tokenizer,
    args=training_args,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=200)],
)

PyTorch: setting up devices
/home/shpark/anaconda3/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:222: UserWarning: You didn't pass a `max_seq_length` argument to the SFTTrainer, this will default to 1024
  warnings.warn(

Map:   0%|          | 0/130319 [00:00<?, ? examples/s]

Map:   0%|          | 0/11873 [00:00<?, ? examples/s]

/home/shpark/anaconda3/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:284: UserWarning: You passed a tokenizer with `padding_side` not equal to `right` to the SFTTrainer. This might lead to some unexpected behaviour due to overflow issues when training a model in half-precision. You might consider adding `tokenizer.padding_side = 'right'` to your code.
  warnings.warn(
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
max_steps is given, it will override any value given in num_train_epochs

trainer.train()

***** Running training *****
  Num examples = 130,319
  Num Epochs = 1
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 2
  Number of trainable parameters = 21,299,200
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Training completed. Do not forget to share your model on huggingface.co/models =)

TrainOutput(global_step=2, training_loss=4.824883460998535, metrics={'train_runtime': 3.9028, 'train_samples_per_second': 8.199, 'train_steps_per_second': 0.512, 'total_flos': 10919296819200.0, 'train_loss': 4.824883460998535, 'epoch': 0.0})

trainer.model.save_pretrained(
    os.path.join(output_dir, "filenal_checkpoint"),
)

trainer.model.push_to_hub(
    repo_id = new_model_name
)

Uploading the following files to mrchaos88/openllama-3b-peft-squad_v2: adapter_config.json,adapter_model.safetensors,README.md

adapter_model.safetensors:   0%|          | 0.00/85.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/mrchaos88/openllama-3b-peft-squad_v2/commit/b42a80ac5302f803ed1bac3accee5856bb930fee', commit_message='Upload model', commit_description='', oid='b42a80ac5302f803ed1bac3accee5856bb930fee', pr_url=None, pr_revision=None, pr_num=None)

# usually, the peft model is stored as an adapter, not as a full model, therefore the loading is a bit different"
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline
from langchain.llms.huggingface_pipeline import HuggingFacePipeline

task = "text-generation"
base_model_name = 'openlm-research/open_llama_3b_v2'
lora_model_name = "mrchaos88/openllama-3b-peft-squad_v2"
config = PeftConfig.from_pretrained(lora_model_name)
model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(model,lora_model_name,config=config)
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code= True)
tokenizer.pad_token = tokenizer.eos_token
config = GenerationConfig(
    do_sample=True,
    temperature=0.7,
    max_new_tokens=256,
    top_p=3
)
pipe = pipeline(
    task,
    model=model,
    tokenizer=tokenizer,
    max_length=256,
    framework="pt",
    generation_config=config,
)
llm = HuggingFacePipeline(pipeline=pipe)

loading configuration file config.json from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/config.json
Model config LlamaConfig {
  "_name_or_path": "openlm-research/open_llama_3b_v2",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 3200,
  "initializer_range": 0.02,
  "intermediate_size": 8640,
  "max_position_embeddings": 2048,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 26,
  "num_key_value_heads": 32,
  "pad_token_id": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.36.2",
  "use_cache": true,
  "vocab_size": 32000
}

loading weights file pytorch_model.bin from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/pytorch_model.bin
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0
}

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at openlm-research/open_llama_3b_v2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 0
}

adapter_model.safetensors:   0%|          | 0.00/85.2M [00:00<?, ?B/s]

loading file tokenizer.model from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/tokenizer.model
loading file tokenizer.json from cache at None
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/shpark/.cache/huggingface/hub/models--openlm-research--open_llama_3b_v2/snapshots/bce5d60d3b0c68318862270ec4e794d83308d80a/tokenizer_config.json
The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PersimmonForCausalLM', 'PhiForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormForCausalLM', 'RoCBertForCausalLM', 'RoFormerForCausalLM', 'RwkvForCausalLM', 'Speech2Text2ForCausalLM', 'TransfoXLLMHeadModel', 'TrOCRForCausalLM', 'WhisperForCausalLM', 'XGLMForCausalLM', 'XLMWithLMHeadModel', 'XLMProphetNetForCausalLM', 'XLMRobertaForCausalLM', 'XLMRobertaXLForCausalLM', 'XLNetLMHeadModel', 'XmodForCausalLM'].

response = llm("what is love?")

Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)

print(response)

.
Praise Him for giving me a good, healthy, beautiful child. Praise Him for the opportunity to parent. Praise Him for the privilege to be a stay-at-home mom. Praise Him for the opportunity to do what I love to do.
Praise Him for a husband who loves our children and loves me. Praise Him for a marriage that is growing and changing and developing and maturing. Praise Him that we are becoming a family. Praise Him for the blessing of being the parents of a child with Down syndrome. Praise Him for the opportunity to parent this child. Praise Him that we are learning and growing and maturing through this journey.
Praise Him for the gift of our children. Praise Him for the life we get to live. Praise Him for the opportunity to live the life we get to live.
Praise Him for the opportunity to do what I love to do. Praise Him for the opportunity to make a difference. Praise Him for the opportunity to live life as I want to live it.
Praise Him for all things, big and small, and everything in between. Praise Him in all things.

response = llm("top 5 philosophers")

Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)

print(response)

 of all time.
- #1 - Aristotle
- #2 - Plato
- #3 - Kant
- #4 - Descartes
- #5 - Locke
Top 10 Philosophers
- #1 - Aristotle
- #2 - Kant
- #3 - Plato
- #4 - Descartes
- #5 - Locke
- #6 - Aquinas
- #7 - Spinoza
- #8 - Schopenhauer
- #9 - Nietzsche
- #10 - Heidegger
Top 10 Philosophers Of All Time
- #1 - Aristotle
- #2 - Plato
- #3 - Kant
- #4 - Descartes
- #5 - Locke
- #6 - Aquinas
- #7 - Schopenhauer
- #8 - Nietzsche
- #9 - Heidegger
- #10 - Spinoza
What Do Philosophers Do?
Philosophers are theologians, poets, and literary critics, yet they are not the same. They are like theolog

from dotenv import load_dotenv
load_dotenv()

from skllm.datasets import get_classification_dataset

from skllm.datasets import get_classification_dataset # test용 데이터셋
from skllm.config import SKLLMConfig
from skllm.models.gpt.classification.zero_shot import ZeroShotGPTClassifier
import os

SKLLMConfig.set_openai_key(os.getenv("OPENAI_API_KEY"))
SKLLMConfig.set_openai_org(os.getenv("OPENAI_ORGANIZATION"))

X,y = get_classification_dataset()

clf = ZeroShotGPTClassifier(model="gpt-4-1106-preview")
clf.fit(X,y)
clf.predict(X)

100%|██████████| 30/30 [00:43<00:00,  1.44s/it]

['positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral']

clf.predict(["I love you"])

100%|██████████| 1/1 [00:01<00:00,  1.06s/it]

['positive']

clf.predict([
    "The successor to Saga will have new hardware and a cheaper price point, according to a person familiar with the plans.",
    "The newly created ETFs could attract inflows of up to $36 billion from other crypto products like Grayscale Bitcoin Trust (GBTC), a report said.",
    "Bitcoin's RSI divergence signals correction, 10x Research said."
    ])

100%|██████████| 3/3 [00:04<00:00,  1.63s/it]

['positive', 'neutral', 'neutral']

import pickle
pickle.dump(clf, open("data/zero_shot_gpt.pkl", "wb"))

from skllm.config import SKLLMConfig
import os
SKLLMConfig.set_openai_key(os.getenv("OPENAI_API_KEY"))
clf_loaded = pickle.load(open("data/zero_shot_gpt.pkl", "rb"))

clf_loaded.predict([
    "The successor to Saga will have new hardware and a cheaper price point, according to a person familiar with the plans.",
    "The newly created ETFs could attract inflows of up to $36 billion from other crypto products like Grayscale Bitcoin Trust (GBTC), a report said.",
    "Bitcoin's RSI divergence signals correction, 10x Research said."
    ])

100%|██████████| 3/3 [00:04<00:00,  1.38s/it]

['positive', 'neutral', 'neutral']

# No Label Data
clf.fit(None,["positive","negative","neutral"])
clf.predict(X)

100%|██████████| 30/30 [00:38<00:00,  1.28s/it]

['positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'negative',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'neutral']

from skllm.models.gpt.classification.zero_shot import MultiLabelZeroShotGPTClassifier
from skllm.datasets import get_multilabel_classification_dataset
from skllm.config import SKLLMConfig
import os
SKLLMConfig.set_openai_key(os.getenv("OPENAI_API_KEY"))
SKLLMConfig.set_openai_org(os.getenv("OPENAI_ORGANIZATION"))

X, y = get_multilabel_classification_dataset()
clf = MultiLabelZeroShotGPTClassifier(max_labels=3)
clf.fit(X,y)

from skllm.models.gpt.classification.few_shot import FewShotGPTClassifier
from skllm.datasets import get_classification_dataset

X, y = get_classification_dataset()
clf = FewShotGPTClassifier(model="gpt-3.5-turbo")
clf.fit(X,y)
print(clf.predict(X))

100%|██████████| 30/30 [00:24<00:00,  1.24it/s]

['positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'negative', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral', 'neutral']

from skllm.models.gpt.vectorization import GPTVectorizer
from skllm.datasets import get_classification_dataset

X, y = get_classification_dataset()
model = GPTVectorizer()
vectors = model.fit_transform(X)
vectors

Batch size: 1

100%|██████████| 30/30 [00:09<00:00,  3.30it/s]

array([[-1.08496211e-02,  8.19084700e-04,  6.26989827e-03, ...,
        -1.48865385e-02, -7.56211011e-05, -4.84170057e-02],
       [-7.52027147e-03, -1.57349240e-02, -1.33176930e-02, ...,
        -1.56432129e-02, -1.84076335e-02, -3.60553786e-02],
       [ 7.94564933e-03, -1.18701421e-02,  1.94484740e-02, ...,
         4.25314531e-03, -1.71028003e-02, -3.00555620e-02],
       ...,
       [-6.85371272e-03, -1.22346738e-02,  1.35289095e-02, ...,
        -3.79982567e-03, -9.90122370e-03, -2.87664663e-02],
       [-1.87684819e-02, -2.07024184e-03,  1.10712238e-02, ...,
        -1.68698262e-02, -1.12893125e-02, -1.93714350e-02],
       [-2.29031481e-02, -6.95864251e-03,  1.82640534e-02, ...,
        -3.40369754e-02, -2.81904452e-02, -1.62940267e-02]])

vectors.shape

(30, 1536)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

le = LabelEncoder()
y_encoded = le.fit_transform(y)

steps = [("GPT",GPTVectorizer()),("XGB",XGBClassifier())]
clf = Pipeline(steps)
clf.fit(X,y_encoded)

set(zip(y,y_encoded))

{('negative', 0), ('neutral', 1), ('positive', 2)}

clf.predict(["I love you"])

Batch size: 1

100%|██████████| 1/1 [00:00<00:00,  3.78it/s]

array([2])

clf.predict([
    "The successor to Saga will have new hardware and a cheaper price point, according to a person familiar with the plans.",
    "The newly created ETFs could attract inflows of up to $36 billion from other crypto products like Grayscale Bitcoin Trust (GBTC), a report said.",
    "Bitcoin's RSI divergence signals correction, 10x Research said."
    ])

Batch size: 1

100%|██████████| 3/3 [00:02<00:00,  1.39it/s]

array([0, 1, 0])

from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI

model = ChatOpenAI()
prompt = PromptTemplate(
   template="Classify the semtiment of this text: {text}",
   input_variables=["text"],
)

chain = prompt | model

print(chain.invoke({"text": "I hated that movie, it was terrible!"}))

content='The sentiment of the text is negative.'

from langchain.prompts import FewShotPromptTemplate, PromptTemplate
from langchain.chat_models import ChatOpenAI

examples = [
    {
        "input": "I absolutely love the new update! Everything works seamlessly.",
        "output": "positive",
    },
    {
        "input": "It's okay, but I think it could use mode features.",
        "output": "neutral",
    },
    {
        "input": "I'm disappointed with the service, I expected much better performance.",
        "output": "negative",
    }
]

example_prompt = PromptTemplate(template="{input} -> {output}", input_variables=["input", "output"])
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    suffix="Question: {input}",
    input_variables=["input"],
)

print(prompt.format(input="I love this new update!"))

I absolutely love the new update! Everything works seamlessly. -> positive

It's okay, but I think it could use mode features. -> neutral

I'm disappointed with the service, I expected much better performance. -> negative

Question: I love this new update!

chain = prompt | ChatOpenAI(verbose=True)

print(chain.invoke({"input": "This is an excellent book with high quality explanations."}))

content='positive'

from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate

reasoning_prompt = "{question}\nLet's think step by step!"
prompt = PromptTemplate(
    template=reasoning_prompt,
    input_variables=["question"],
)

model = ChatOpenAI()
chain = prompt | model

print(
    chain.invoke(
        {
            "question": "There were 5 apples originally. I ate 2 apples. My friend give me 3 apples. How many apples do I have now?",
        }
    ).content
)

Step 1: You started with 5 apples.
Step 2: You ate 2 apples, so you have 5 - 2 = 3 apples left.
Step 3: Your friend gave you 3 apples, so you now have 3 + 3 = 6 apples.

examples = [
    {
        "input": "I absolutely love the new update! Everything works seamlessly.",
        "output": "Love and absolute works seamlessly are examples of positive sentiment. Therefore, the sentiment isi positive.",
    },
    {
        "input": "It's okay, but I think it could use mode features.",
        "output": "It's okay is not an endorsement. The customer further thinks it should be extended. Therefore, the sentiment is neutral.",
    },
    {
        "input": "I'm disappointed with the service, I expected much better performance.",
        "output": "The customer is disappointed and expected more. Therefore, the sentiment is negative.",
    }
]

example_prompt = PromptTemplate(template="{input} -> {output}", input_variables=["input", "output"])
prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_prompt,
    suffix="Question: {input}\n\nLet's think step by step!",
    input_variables=["input"],
)

model = ChatOpenAI()
chain = prompt | model

print(
    chain.invoke(
        {
            "input": "There were 5 apples originally. I ate 2 apples. My friend give me 3 apples. How many apples do I have now?",
        }
    ).content
)

Step 1: Start with 5 apples.
Step 2: Subtract 2 apples because you ate them. So you have 5 - 2 = 3 apples.
Step 3: Add 3 apples because your friend gave them to you. So you have 3 + 3 = 6 apples.
Therefore, you have 6 apples now.

print(chain.invoke({"input": "This is an excellent book with high quality explanations."}).content)

1. "Excellent" and "high quality" are positive descriptors, indicating a positive sentiment.
2. The statement in the question is an endorsement of the book, expressing satisfaction with its explanations.
3. Therefore, the sentiment is positive.

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain.chat_models import ChatOpenAI

solutions_template = """
Generate {num_solutions} distinct answers to the question.
The the question is delimited with triple backticks.

question: ```{question}```

Solutions:

"""

consistency_template = """
For each answer in {solutions}, count the number of times it occurs.
Finally, choose the answer that occurs the most times.

Most frequent solution:

"""

# solution chain
solution_prompt = PromptTemplate(
    template=solutions_template,
    input_variables=["question", "num_solutions"],
)
solution_chain = LLMChain(
    prompt=solution_prompt,
    llm=ChatOpenAI(),
    output_key="solutions",
    verbose=True,
)

# consistency chain
consistency_prompt = PromptTemplate(
    template=consistency_template,
    input_variables=["solutions"],
)
consistency_chain = LLMChain(
    prompt=consistency_prompt,
    llm = ChatOpenAI(),
    output_key="best_solution",
    verbose=True,
)

# answer chain
answer_chain = SequentialChain(
    chains=[solution_chain, consistency_chain],
    input_variables=["question", "num_solutions"],
    output_variables=["best_solution"],
    verbose=True,
)

response = answer_chain.run(question="Which year was the Declaration of Independence of the United States signed?", num_solutions=5)
response


> Entering new SequentialChain chain...

> Finished chain.

'The answer "The signing of the Declaration of Independence of the United States took place in 1776." occurs the most times, as it appears three times.'

response = answer_chain.run(question="한국이 일제점령기로 부터 독립한 연도는 연제 인가?", num_solutions=5)
response


> Entering new SequentialChain chain...


> Entering new LLMChain chain...
Prompt after formatting:

Generate 5 distinct answers to the question.
The the question is delimited with triple backticks.

question: ```한국이 일제점령기로 부터 독립한 연도는 연제 인가?```

Solutions:


> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:

For each answer in 1. 아니요, 한국은 1945년에 일제점령기로부터 독립했습니다.
2. 아니요, 한국은 1948년에 독립했습니다.
3. 아니요, 한국은 1953년에 독립했습니다.
4. 아니요, 한국은 1895년에 독립했습니다.
5. 아니요, 한국은 1910년에 일제에 의해 병합되기 전에 독립했습니다., count the number of times it occurs.
Finally, choose the answer that occurs the most times.

Most frequent solution:


> Finished chain.

> Finished chain.

'아니요, 한국은 1945년에 일제점령기로부터 독립했습니다. (occurs 1 time)'

# 솔루션 템플릿
solutions_template = """
Generate {num_solutions} distinct solutions for {problem}.
Consider factors like {factors}.

Solutions:
"""
solutions_prompt = PromptTemplate(
    template=solutions_template,
    input_variables=["num_solutions", "problem", "factors"],
)

# 평가 템플릿
evaluation_template = """
Evaluate each solution in {solutions}  by analyzing pros, cons, feasibility, and probability of success.

Evaluations:
"""
evaluation_prompt = PromptTemplate(
    template=evaluation_template,
    input_variables=["solutions"],
)

# 추론 템플릿
reasoning_template = """
For the most promising solutions in {evaluations}, explain scenarios,
implementation strategies, partnerships needed, and handlings potential obstacles.

Enhanced Reasoning:
"""
reasoning_prompt = PromptTemplate(
    template=reasoning_template,
    input_variables=["evaluations"],
)

# 순위 템플릿
ranking_template = """
Based on the evaluations and reasoning, rank the solutions in {enhanced_reasoning} from most to least promising.

Ranked Solutions:
"""
ranking_prompt = PromptTemplate(
    template=ranking_template,
    input_variables=["enhanced_reasoning"],
)

solution_chain = LLMChain(
    prompt=solutions_prompt,
    llm=ChatOpenAI(),
    output_key="solutions",
    verbose=True,
)
evaluation_chain = LLMChain(
    prompt=evaluation_prompt,
    llm=ChatOpenAI(),
    output_key="evaluations",
    verbose=True,
)
reasoning_chain = LLMChain(
    prompt=reasoning_prompt,
    llm=ChatOpenAI(),
    output_key="enhanced_reasoning",
    verbose=True,
)
ranking_chain = LLMChain(
    prompt=ranking_prompt,
    llm=ChatOpenAI(),
    output_key="ranked_solutions",
    verbose=True,
)

tot_chain = SequentialChain(
    chains=[solution_chain, evaluation_chain, reasoning_chain, ranking_chain],
    input_variables=["num_solutions", "problem", "factors"],
    output_variables=["ranked_solutions"],
    verbose=True,
)

response = tot_chain.run(
    problem="Prompt Engineering",
    factors="Requirements for high task performance, low token use, and few calls to the LLM",
    num_solutions=3,
)
response


> Entering new SequentialChain chain...


> Entering new LLMChain chain...
Prompt after formatting:

Generate 3 distinct solutions for Prompt Engineering.
Consider factors like Requirements for high task performance, low token use, and few calls to the LLM.

Solutions:


> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:

Evaluate each solution in 1. Implement an optimized task scheduling algorithm: By developing a task scheduling algorithm that takes into account factors such as task dependencies, resource availability, and task deadlines, we can ensure high task performance. This algorithm can prioritize tasks based on their importance and allocate resources efficiently, reducing the overall time required to complete tasks. This would result in fewer calls to the LLM and optimize token use, as tasks can be completed more efficiently.

2. Utilize caching mechanisms: By implementing caching mechanisms, we can reduce the number of calls to the LLM. Caching frequently accessed data or results of previous computations can help improve task performance as well as reduce token usage. This can be achieved by storing data in a cache that is easily accessible and updated when needed, reducing the need for repeated calls to the LLM for the same data.

3. Enable parallel processing capabilities: By enabling parallel processing capabilities, we can enhance task performance and reduce token use. This can be achieved by designing the engineering system to distribute tasks across multiple processors or machines, allowing for simultaneous execution of multiple tasks. This parallel processing approach can significantly improve task performance by decreasing the overall processing time required for a set of tasks, leading to fewer calls to the LLM and reducing token usage.  by analyzing pros, cons, feasibility, and probability of success.

Evaluations:


> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:

For the most promising solutions in 1. Implement an optimized task scheduling algorithm:
Pros:
- Prioritizes tasks based on their importance, which can improve overall task performance.
- Efficiently allocates resources, reducing the overall time required to complete tasks.
- Reduces the need for calls to the LLM and optimizes token use.

Cons:
- Developing a task scheduling algorithm that takes into account various factors can be complex and time-consuming.
- Implementing and maintaining such an algorithm may require significant resources and expertise.
- The algorithm may not always accurately prioritize tasks or allocate resources efficiently, leading to potential performance issues.

Feasibility:
- The feasibility of implementing an optimized task scheduling algorithm depends on the complexity of the engineering system and the availability of resources and expertise.
- It may be more feasible for large-scale systems with complex task dependencies and resource constraints.

Probability of success:
- The probability of success depends on the accuracy and effectiveness of the developed algorithm.
- Extensive testing and refinement may be required to ensure the algorithm performs as expected.
- The success of the algorithm also depends on the system's ability to accurately track task dependencies, resource availability, and deadlines.

2. Utilize caching mechanisms:
Pros:
- Reduces the number of calls to the LLM, improving task performance.
- Can reduce token usage by storing frequently accessed data or results of previous computations.
- Easy accessibility and updating of cached data.

Cons:
- Implementing caching mechanisms may require modifications to the existing system architecture.
- The effectiveness of caching depends on the frequency of data access and the size of the cache.
- There is a risk of stale or outdated data if the cache is not properly managed and updated.

Feasibility:
- The feasibility of utilizing caching mechanisms depends on the system architecture and the availability of suitable caching solutions.
- Implementing caching may require changes to the codebase and potential integration with third-party caching systems.

Probability of success:
- The probability of success depends on the accuracy of identifying frequently accessed data and the effectiveness of the caching mechanism.
- Proper cache management and regular updates are essential for maintaining data integrity and preventing stale data issues.

3. Enable parallel processing capabilities:
Pros:
- Enhances task performance by allowing simultaneous execution of multiple tasks.
- Reduces token usage by decreasing the overall processing time required for a set of tasks.
- Can be particularly effective for computationally intensive tasks or tasks with no interdependencies.

Cons:
- Parallel processing may introduce additional complexity and overhead in task execution.
- Task dependencies and resource constraints need to be carefully managed to ensure correct execution.
- Not all tasks may benefit from parallel processing, and the effort required to parallelize certain tasks may outweigh the performance gains.

Feasibility:
- The feasibility of enabling parallel processing capabilities depends on the nature of the tasks and the availability of resources.
- Systems with a high number of independent tasks or tasks that can be easily parallelized may benefit more from this approach.

Probability of success:
- The probability of success depends on the proper identification and parallelization of tasks that can benefit from parallel processing.
- Effective resource management and load balancing are crucial for achieving optimal performance.
- The success of parallel processing also depends on the system's ability to handle task dependencies and potential synchronization issues., explain scenarios,
implementation strategies, partnerships needed, and handlings potential obstacles.

Enhanced Reasoning:


> Finished chain.


> Entering new LLMChain chain...
Prompt after formatting:

Based on the evaluations and reasoning, rank the solutions in 1. Implement an optimized task scheduling algorithm:
Scenarios:
- This solution could be beneficial in scenarios where there are complex task dependencies and limited resources.
- It can be particularly useful in large-scale systems where efficient allocation of resources is crucial.

Implementation Strategies:
- Conduct a thorough analysis of the system's task dependencies and resource constraints.
- Develop an algorithm that takes into account factors such as task importance, deadlines, and resource availability.
- Implement the algorithm within the existing system infrastructure, ensuring compatibility and minimal disruption.
- Conduct extensive testing and performance evaluation to validate the effectiveness of the algorithm.
- Continuously monitor and refine the algorithm based on real-time data and feedback.

Partnerships Needed:
- Collaboration with software engineers and developers to implement the algorithm.
- Coordination with system administrators and IT professionals to ensure compatibility and resource allocation.

Potential Obstacles:
- Developing a complex task scheduling algorithm can be time-consuming and require significant expertise.
- Integration with existing systems may pose challenges and require modifications to the codebase.
- Ensuring accurate tracking of task dependencies and resource availability can be challenging.

2. Utilize caching mechanisms:
Scenarios:
- This solution could be beneficial in scenarios where there are frequent calls to the LLM and a need to optimize token usage.
- It can be particularly effective in systems with high data access frequency or repetitive computations.

Implementation Strategies:
- Identify frequently accessed data or computations that can be cached.
- Implement a caching mechanism that stores and retrieves cached data efficiently.
- Integrate the caching mechanism with the existing system architecture, ensuring compatibility.
- Define cache management strategies to handle data updates, expiration, and eviction.
- Monitor cache performance and make adjustments as needed.

Partnerships Needed:
- Collaboration with software engineers and developers to implement the caching mechanism.
- Coordination with system administrators and IT professionals to ensure compatibility and integration.

Potential Obstacles:
- Modifying the existing system architecture to accommodate caching mechanisms may require significant changes to the codebase.
- Managing cache performance and preventing data staleness can be challenging.
- Identifying the most suitable data or computations for caching can be complex.

3. Enable parallel processing capabilities:
Scenarios:
- This solution could be beneficial in scenarios where there are computationally intensive tasks or tasks with no interdependencies.
- It can be particularly effective in systems with a high number of independent tasks.

Implementation Strategies:
- Identify tasks that can be parallelized and are suitable for simultaneous execution.
- Implement parallel processing techniques such as multi-threading or distributed computing.
- Develop resource management and load balancing strategies to ensure optimal task execution.
- Monitor and optimize the parallel processing capabilities based on performance metrics.

Partnerships Needed:
- Collaboration with software engineers and developers to implement parallel processing techniques.
- Coordination with system administrators and IT professionals to ensure resource availability and compatibility.

Potential Obstacles:
- Parallel processing may introduce additional complexity, requiring careful management of task dependencies and synchronization.
- Certain tasks may not be suitable for parallelization, and the effort required to parallelize them may outweigh the performance gains.
- Ensuring proper resource allocation and load balancing can be challenging. from most to least promising.

Ranked Solutions:


> Finished chain.

> Finished chain.

'1. Implement an optimized task scheduling algorithm\n2. Utilize caching mechanisms\n3. Enable parallel processing capabilities'

print(response)

1. Implement an optimized task scheduling algorithm
2. Utilize caching mechanisms
3. Enable parallel processing capabilities

Technique	Description	Key Idea	PerformanceConsiderations
Zero-Shot Prompting	예제 제공되지 않으므로 모델의 학습에 의존해야 합니다.	모델의 사전 학습을 활용합니다.	간단한 작업에는 효과적이지만 복잡한 추론에는 어려움을 겪습니다.
Few-Shot Prompting	입력 및 원하는 출력에 대한 몇 가지 데모를 제공합니다.	원하는 추론 형식 표시	초등학교 수학의 정확도 3배 향상
CoT	중간 추론 단계로 응답 앞에 접두사 붙이기	모델에 응답하기 전에 추론할 공간을 제공합니다.	수학 데이터 세트의 정확도 4배 향상
Least-to-Most Prompting	더 간단한 하위 작업을 먼저 모델링하라는 메시지 표시	문제를 더 작은 조각으로 분해	일부 작업에서 정확도가 16%에서 99.7%로 향상되었습니다.
Self-Consistency	여러 샘플에서 가장 빈번한 답변을 선택합니다.	중복성 증가	벤치마크 전반에서 1~24% 포인트 상승
Chain-of-Density	엔티티를 추가하여 반복적으로 밀도 높은 요약을 생성합니다.	풍부하고 간결한 요약 생성	요약의 정보 밀도 향상
Chain-of-Verification(CoV)	질문을 생성하고 답변하여 초기 응답을 확인합니다.	사람 검증 모방	견고함과 자신감 향상
Active Prompting	인간 라벨링의 예로 불확실한 샘플을 선택합니다.	효과적인 몇 컷 예시 찾기	소수 샷 성능 향상
Tree-of-Thought	여러 개의 응답 생성 및 자동 평가	추론 경로를 통해 역추적 가능	최적의 추론 경로 찾기
Verifiers	응답을 평가하기 위해 별도의 모델을 학습시킵니다.	잘못된 응답 필터링	초등학교 수학 정확도 최대 20% 포인트 향상
Fine-Tuning	프롬프트를 통해 생성된 설명 데이터 집합을 미세 조정합니다.	모델의 추론 능력 향상	상식적인 QA 데이터 세트의 73% 정확도

Customizing LLMs and Their Output, Prompt Technique¶

Conditioning LLMs¶

Fine-Tuning¶

Open-source models¶

Commercial models¶

Prompt techniques¶

Zero-shot prompting¶

Few-shot learnig¶

CoT (Chain-of-thought) prompting - 사고유도 프롬프트¶