Building a Chatbot like ChatGPT¶
# embedding
from langchain.embeddings.openai import OpenAIEmbeddings
from scipy.spatial.distance import pdist, squareform
import numpy as np
import pandas as pd
# vector database
from langchain.vectorstores import Chroma
from langchain.document_loaders import ArxivLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA # Chain for question-answering against a vector database.
from langchain.llms import OpenAI
# from langchain.llms import OpenAIChat
from langchain_community.chat_models import ChatOpenAI
# Loading and retrieving in Langchain
from langchain.document_loaders import TextLoader
from langchain.retrievers import (
KNNRetriever,
PubMedRetriever, # PubMed(생물학 문헌 인용DB) 데이터베이스와 상호작용
)
# Custom Retriever
from langchain.schema import Document, BaseRetriever
# Conversation buffers
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
# Remembering conversation summaries
from langchain.memory import ConversationSummaryMemory
# Storing knowledge graphs
from langchain.memory import ConversationKGMemory
# Combining several memorymechanisms
from langchain.memory import CombinedMemory
# Long-term peristence
from langchain.memory import ZepMemory
import uuid
from dotenv import load_dotenv
import os
load_dotenv()
True
Embedding¶
임베딩
임베딩은 기계가 처리하고 이해할 수 있는 방식으로 콘텐츠를 수치로 표현한 것
embeddings = OpenAIEmbeddings()
text = "This is a sample query."
query_result = embeddings.embed_query(text)
# print(query_result)
print(len(query_result))
1536
words = ["cat", "dog", "computer", "animal"]
embeddings = OpenAIEmbeddings()
doc_vectors = embeddings.embed_documents(words)
doc_vectors[0][1:10]
[-0.01743050591323599, -0.00966626551309167, -0.030631132997702794, -0.012618664121172612, 0.003124503287338194, -0.004992817784131865, -0.04122002249374317, -0.014563272094407411, -0.021348110433230556]
- pdist : pairwise distace
- squareform : vector를 metrix로 표현
$\binom{n}{2}$
$\binom{4}{2} = 6$
X = np.array(doc_vectors)
# default distance : euclidean
pairwise_dist = pdist(X, metric="cosine")
print(pairwise_dist)
dists = squareform(pairwise_dist)
print(dists)
print(dists.shape)
[0.13631546 0.16534796 0.13565021 0.16875871 0.11445978 0.17475822] [[0. 0.13631546 0.16534796 0.13565021] [0.13631546 0. 0.16875871 0.11445978] [0.16534796 0.16875871 0. 0.17475822] [0.13565021 0.11445978 0.17475822 0. ]] (4, 4)
# cat,dog,computer,animal 간의 cosine 거리
df = pd.DataFrame(data=dists,index=words,columns=words)
df.style.background_gradient(cmap='coolwarm')
cat | dog | computer | animal | |
---|---|---|---|---|
cat | 0.000000 | 0.136315 | 0.165348 | 0.135650 |
dog | 0.136315 | 0.000000 | 0.168759 | 0.114460 |
computer | 0.165348 | 0.168759 | 0.000000 | 0.174758 |
animal | 0.135650 | 0.114460 | 0.174758 | 0.000000 |
Vector Storage¶
벡터 스토리지는 벡터 임베딩을 효율적으로 저장하고 검색하도록 특별히 설계된 독립형 솔루션
Milvus 또는 Pinecone과 같은 벡터 데이터베이스는 대규모 벡터 세트를저장, 관리 및 검색하도록 설계되었습니다
Vector Indexing¶
벡터인덱싱은 임베딩의 컨텍스트 검색 및/또는 저장을 최적화하기 위해 데이터를 구성하는 방법
벡터 임베딩의 경우인덱싱은 유사한 벡터가 서로 옆에 저장되도록 벡터를 구조화하여 빠른 근접성또는 유사성 검색을 가능하게 하는 것을 목표
Vector Libraries¶
Facebook(Meta) Faiss 또는 Spotify Annoy와 같은 벡터 라이브러리는 벡터 작업을 위한 기능을 제공.
이 라이브러리는 ANN(Approximate Nearest Neighbor) 알고리즘을 사용하여 벡터를 효율적으로 검색하고 가장 유사한 벡터를 찾음
Faiss가 가장 인기 있음
- 대규모 벡터 검색 작업에 널리 사용
- CPU 및 GPU 가속을 모두 지원.
Vector Database¶
벡터 임베딩을 처리하도록 설계되어 데이터 객체를 더 쉽게검색하고 쿼리
벡터 저장소(Vector Storage)는 벡터 임베딩(Vector Embedding)을 저장하고 검색하는 데만 초점을 맞추는 반면,
벡터 데이터베이스는(Vector Database) 벡터 데이터를 관리하고 쿼리하기 위한 보다 포괄적인 솔루션을 제공
mivus가 가장 인기 있음
- 뒤를 이어 2023년에 qdrant, weviate, chroma순으로 인기가 급상승 중
Chroma는 현재 무료
설치:
pip install chromadb
pip install pymupdf
# 다운로드는 PDF이므로 pymupdf 라이브러리가 설치되어 있어야 함.
# Mistral 7B LLM 논문 : 2310.06825, https://arxiv.org/pdf/2310.06825.pdf
loader = ArxivLoader(query="2310.06825")
documents = loader.load()
documents
[Document(page_content='Mistral 7B\nAlbert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford,\nDevendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel,\nGuillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux,\nPierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix,\nWilliam El Sayed\nAbstract\nWe introduce Mistral 7B, a 7–billion-parameter language model engineered for\nsuperior performance and efficiency. Mistral 7B outperforms the best open 13B\nmodel (Llama 2) across all evaluated benchmarks, and the best released 34B\nmodel (Llama 1) in reasoning, mathematics, and code generation. Our model\nleverages grouped-query attention (GQA) for faster inference, coupled with sliding\nwindow attention (SWA) to effectively handle sequences of arbitrary length with a\nreduced inference cost. We also provide a model fine-tuned to follow instructions,\nMistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and\nautomated benchmarks. Our models are released under the Apache 2.0 license.\nCode: https://github.com/mistralai/mistral-src\nWebpage: https://mistral.ai/news/announcing-mistral-7b/\n1\nIntroduction\nIn the rapidly evolving domain of Natural Language Processing (NLP), the race towards higher model\nperformance often necessitates an escalation in model size. However, this scaling tends to increase\ncomputational costs and inference latency, thereby raising barriers to deployment in practical,\nreal-world scenarios. In this context, the search for balanced models delivering both high-level\nperformance and efficiency becomes critically essential. Our model, Mistral 7B, demonstrates that\na carefully designed language model can deliver high performance while maintaining an efficient\ninference. Mistral 7B outperforms the previous best 13B model (Llama 2, [26]) across all tested\nbenchmarks, and surpasses the best 34B model (LLaMa 34B, [25]) in mathematics and code\ngeneration. Furthermore, Mistral 7B approaches the coding performance of Code-Llama 7B [20],\nwithout sacrificing performance on non-code related benchmarks.\nMistral 7B leverages grouped-query attention (GQA) [1], and sliding window attention (SWA) [6, 3].\nGQA significantly accelerates the inference speed, and also reduces the memory requirement during\ndecoding, allowing for higher batch sizes hence higher throughput, a crucial factor for real-time\napplications. In addition, SWA is designed to handle longer sequences more effectively at a reduced\ncomputational cost, thereby alleviating a common limitation in LLMs. These attention mechanisms\ncollectively contribute to the enhanced performance and efficiency of Mistral 7B.\narXiv:2310.06825v1 [cs.CL] 10 Oct 2023\nMistral 7B is released under the Apache 2.0 license. This release is accompanied by a reference\nimplementation1 facilitating easy deployment either locally or on cloud platforms such as AWS, GCP,\nor Azure using the vLLM [17] inference server and SkyPilot 2. Integration with Hugging Face 3 is\nalso streamlined for easier integration. Moreover, Mistral 7B is crafted for ease of fine-tuning across\na myriad of tasks. As a demonstration of its adaptability and superior performance, we present a chat\nmodel fine-tuned from Mistral 7B that significantly outperforms the Llama 2 13B – Chat model.\nMistral 7B takes a significant step in balancing the goals of getting high performance while keeping\nlarge language models efficient. Through our work, our aim is to help the community create more\naffordable, efficient, and high-performing language models that can be used in a wide range of\nreal-world applications.\n2\nArchitectural details\nFigure 1: Sliding Window Attention. The number of operations in vanilla attention is quadratic in the sequence\nlength, and the memory increases linearly with the number of tokens. At inference time, this incurs higher\nlatency and smaller throughput due to reduced cache availability. To alleviate this issue, we use sliding window\nattention: each token can attend to at most W tokens from the previous layer (here, W = 3). Note that tokens\noutside the sliding window still influence next word prediction. At each attention layer, information can move\nforward by W tokens. Hence, after k attention layers, information can move forward by up to k × W tokens.\nParameter\nValue\ndim\n4096\nn_layers\n32\nhead_dim\n128\nhidden_dim\n14336\nn_heads\n32\nn_kv_heads\n8\nwindow_size\n4096\ncontext_len\n8192\nvocab_size\n32000\nTable 1: Model architecture.\nMistral 7B is based on a transformer architecture [27]. The main\nparameters of the architecture are summarized in Table 1. Compared\nto Llama, it introduces a few changes that we summarize below.\nSliding Window Attention. SWA exploits the stacked layers of a trans-\nformer to attend information beyond the window size W. The hidden\nstate in position i of the layer k, hi, attends to all hidden states from\nthe previous layer with positions between i − W and i. Recursively,\nhi can access tokens from the input layer at a distance of up to W × k\ntokens, as illustrated in Figure 1. At the last layer, using a window size\nof W = 4096, we have a theoretical attention span of approximately\n131K tokens. In practice, for a sequence length of 16K and W = 4096,\nchanges made to FlashAttention [11] and xFormers [18] yield a 2x\nspeed improvement over a vanilla attention baseline.\nRolling Buffer Cache. A fixed attention span means that we can limit our cache size using a rolling\nbuffer cache. The cache has a fixed size of W, and the keys and values for the timestep i are stored\nin position i mod W of the cache. As a result, when the position i is larger than W, past values\nin the cache are overwritten, and the size of the cache stops increasing. We provide an illustration\nin Figure 2 for W = 3. On a sequence length of 32k tokens, this reduces the cache memory usage\nby 8x, without impacting the model quality.\n1https://github.com/mistralai/mistral-src\n2https://github.com/skypilot-org/skypilot\n3https://huggingface.co/mistralai\n2\nFigure 2: Rolling buffer cache. The cache has a fixed size of W = 4. Keys and values for position i are stored\nin position i mod W of the cache. When the position i is larger than W, past values in the cache are overwritten.\nThe hidden state corresponding to the latest generated tokens are colored in orange.\nPre-fill and Chunking. When generating a sequence, we need to predict tokens one-by-one, as\neach token is conditioned on the previous ones. However, the prompt is known in advance, and we\ncan pre-fill the (k, v) cache with the prompt. If the prompt is very large, we can chunk it into smaller\npieces, and pre-fill the cache with each chunk. For this purpose, we can select the window size as\nour chunk size. For each chunk, we thus need to compute the attention over the cache and over the\nchunk. Figure 3 shows how the attention mask works over both the cache and the chunk.\ngo\ndog\n0\n0\n0\n0\n1\n0\n0\n0\n0\n0\nthe\nto\nThe\ncat\nsat\non\nthe\n1\nmat\nand\n1\n1\n1\nsaw\nthe\n1\n0\n0\n0\ndog\ngo\nto\n1\n0\n0\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n0\n0\n1\n1\n1\n0\n0\n0\n0\n0\n0\n1\n1\n1\n1\n0\nPast\nCache\nCurrent\nFigure 3: Pre-fill and chunking. During pre-fill of the cache, long sequences are chunked to limit memory\nusage. We process a sequence in three chunks, “The cat sat on”, “the mat and saw”, “the dog go to”. The figure\nshows what happens for the third chunk (“the dog go to”): it attends itself using a causal mask (rightmost block),\nattends the cache using a sliding window (center block), and does not attend to past tokens as they are outside of\nthe sliding window (left block).\n3\nResults\nWe compare Mistral 7B to Llama, and re-run all benchmarks with our own evaluation pipeline for\nfair comparison. We measure performance on a wide variety of tasks categorized as follow:\n• Commonsense Reasoning (0-shot): Hellaswag [28], Winogrande [21], PIQA [4], SIQA [22],\nOpenbookQA [19], ARC-Easy, ARC-Challenge [9], CommonsenseQA [24]\n• World Knowledge (5-shot): NaturalQuestions [16], TriviaQA [15]\n• Reading Comprehension (0-shot): BoolQ [8], QuAC [7]\n• Math: GSM8K [10] (8-shot) with maj@8 and MATH [13] (4-shot) with maj@4\n• Code: Humaneval [5] (0-shot) and MBPP [2] (3-shot)\n• Popular aggregated results: MMLU [12] (5-shot), BBH [23] (3-shot), and AGI Eval [29]\n(3-5-shot, English multiple-choice questions only)\nDetailed results for Mistral 7B, Llama 2 7B/13B, and Code-Llama 7B are reported in Table 2. Figure 4\ncompares the performance of Mistral 7B with Llama 2 7B/13B, and Llama 1 34B4 in different\ncategories. Mistral 7B surpasses Llama 2 13B across all metrics, and outperforms Llama 1 34B on\nmost benchmarks. In particular, Mistral 7B displays a superior performance in code, mathematics,\nand reasoning benchmarks.\n4Since Llama 2 34B was not open-sourced, we report results for Llama 1 34B.\n3\nFigure 4: Performance of Mistral 7B and different Llama models on a wide range of benchmarks. All\nmodels were re-evaluated on all metrics with our evaluation pipeline for accurate comparison. Mistral 7B\nsignificantly outperforms Llama 2 7B and Llama 2 13B on all benchmarks. It is also vastly superior to Llama 1\n34B in mathematics, code generation, and reasoning benchmarks.\nModel\nModality MMLU HellaSwag WinoG PIQA\nArc-e\nArc-c\nNQ\nTriviaQA HumanEval MBPP MATH GSM8K\nLLaMA 2 7B\nPretrained 44.4%\n77.1%\n69.5% 77.9% 68.7% 43.2% 24.7%\n63.8%\n11.6%\n26.1%\n3.9%\n16.0%\nLLaMA 2 13B\nPretrained 55.6%\n80.7%\n72.9% 80.8% 75.2% 48.8% 29.0%\n69.6%\n18.9%\n35.4%\n6.0%\n34.3%\nCode-Llama 7B Finetuned\n36.9%\n62.9%\n62.3% 72.8% 59.4% 34.5% 11.0%\n34.9%\n31.1%\n52.5%\n5.2%\n20.8%\nMistral 7B\nPretrained 60.1%\n81.3%\n75.3% 83.0% 80.0% 55.5% 28.8%\n69.9%\n30.5%\n47.5% 13.1%\n52.2%\nTable 2: Comparison of Mistral 7B with Llama. Mistral 7B outperforms Llama 2 13B on all metrics, and\napproaches the code performance of Code-Llama 7B without sacrificing performance on non-code benchmarks.\nSize and Efficiency. We computed “equivalent model sizes” of the Llama 2 family, aiming to\nunderstand Mistral 7B models’ efficiency in the cost-performance spectrum (see Figure 5). When\nevaluated on reasoning, comprehension, and STEM reasoning (specifically MMLU), Mistral 7B\nmirrored performance that one might expect from a Llama 2 model with more than 3x its size. On\nthe Knowledge benchmarks, Mistral 7B’s performance achieves a lower compression rate of 1.9x,\nwhich is likely due to its limited parameter count that restricts the amount of knowledge it can store.\nEvaluation Differences. On some benchmarks, there are some differences between our evaluation\nprotocol and the one reported in the Llama 2 paper: 1) on MBPP, we use the hand-verified subset 2)\non TriviaQA, we do not provide Wikipedia contexts.\n4\nInstruction Finetuning\nModel\nChatbot Arena\nELO Rating\nMT Bench\nWizardLM 13B v1.2\n1047\n7.2\nMistral 7B Instruct\n1031\n6.84 +/- 0.07\nLlama 2 13B Chat\n1012\n6.65\nVicuna 13B\n1041\n6.57\nLlama 2 7B Chat\n985\n6.27\nVicuna 7B\n997\n6.17\nAlpaca 13B\n914\n4.53\nTable 3: Comparison of Chat models. Mistral 7B –\nInstruct outperforms all 7B models on MT-Bench, and\nis comparable to 13B – Chat models.\nTo evaluate the generalization capabilities of\nMistral 7B, we fine-tuned it on instruction datasets\npublicly available on the Hugging Face repository.\nNo proprietary data or training tricks were utilized:\nMistral 7B – Instruct model is a simple and\npreliminary demonstration that the base model can\neasily be fine-tuned to achieve good performance.\nIn Table 3, we observe that the resulting model,\nMistral 7B – Instruct, exhibits superior perfor-\nmance compared to all 7B models on MT-Bench,\nand is comparable to 13B – Chat models. An\nindependent human evaluation was conducted on\nhttps://llmboxing.com/leaderboard.\nIn this evaluation, participants were provided with a set of questions along with anonymous responses\nfrom two models and were asked to select their preferred response, as illustrated in Figure 6. As of\nOctober 6, 2023, the outputs generated by Mistral 7B were preferred 5020 times, compared to 4143\ntimes for Llama 2 13B.\n4\nFigure 5: Results on MMLU, commonsense reasoning, world knowledge and reading comprehension for\nMistral 7B and Llama 2 (7B/13B/70B). Mistral 7B largely outperforms Llama 2 13B on all evaluations, except\non knowledge benchmarks, where it is on par (this is likely due to its limited parameter count, which limits the\namount of knowledge it can compress).\n5\nAdding guardrails for front-facing applications\nThe ability to enforce guardrails when it comes to AI generation is important for front-facing appli-\ncations. In this section, we highlight how to leverage system prompting to optionally enforce output\nconstraints on top of our models. Additionally, we showcase the ability of Mistral 7B to perform\nfine-grained content moderation, which can be useful to enforce quality content in applications.\n5.1\nSystem prompt to enforce guardrails\nWe introduce a system prompt (see below) to guide the model to generate answers within specified\nguardrails, similar to the work done with Llama 2. Using this prompt allows the user to move on the\nPareto front of model utility / guardrails enforcement, as indicated in Table 4.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful,\nunethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\nGuardrails\nMT Bench\nNo system prompt\n6.84 ± 0.07\nLlama 2 system prompt\n6.38 ± 0.07\nMistral system prompt\n6.58 ± 0.05\nTable 4: System prompts. Mean official\nMT Bench score over 10 iterations with\nstandard deviation for Mistral 7B – Instruct.\nFor reference, Llama 2 13B – Chat reports\nofficial results of 6.65.\nWe use a set of 175 unsafe prompts for evaluating safety.\nWith the recommended system prompt, the model properly\ndeclines to answer 100% of the harmful questions.\nAs an illustration, we provide in Table 5 the answers of\nboth Mistral 7B – Instruct and Llama 2 Chat 13B to the\nquestion How to kill a linux process with system prompts\nactivated. We observe that Mistral 7B provides a correct\nresponse while Llama 2 declines to answer. Note that on\nthis specific question, both models answer correctly when\nsystem prompts are deactivated.\n5.2\nContent moderation with self-reflection\nMistral 7B – Instruct can be used as a content moderator: the model itself is able to accurately\nclassify a user prompt or its generated answer as being either acceptable or falling into one of the\nfollowing categories: Illegal activities such as terrorism, child abuse or fraud; Hateful, harassing\nor violent content such as discrimination, self-harm or bullying; Unqualified advice for instance\nin legal, medical or financial domains.\n5\nModel\nAnswer\nMistral 7B – Instruct\nwith Mistral system prompt\nTo kill a Linux process, you can use the `kill`command followed by the process ID (PID)\nof the process you want to terminate. For example, to kill process with PID 1234, you\nwould run the command `kill 1234`. It’s important to note that killing a process can\nhave unintended consequences, so it’s generally a good idea to only kill processes that\nyou are certain you want to terminate. Additionally, it’s generally a good idea to use the\n`kill`command with caution, as it can be used to terminate critical system processes that\nare necessary for the operation of the system.\nLlama 2 13B – Chat\nwith Llama 2 system prompt\nAs a helpful and respectful assistant, I cannot provide information on how to kill a Linux\nprocess. It is not appropriate or ethical to intentionally harm or terminate processes, as it\ncan cause damage to the system and potentially result in data loss. Additionally, it is\nimportant to respect the autonomy and security of other users’ processes, as they may be\ncritical to their work or personal activities. Instead, I suggest that you explore other\noptions to address any issues or concerns you may have.\nTable 5: Comparison between Mistral and Llama system prompts. Mistral provides the right answer whereas\nLlama 2 declines to answer to the question How to kill a linux process.\nTo do so, we designed a self-reflection prompt that makes Mistral 7B classify a prompt or a generated\nanswer. We evaluated self-reflection on our manually curated and balanced dataset of adversarial\nand standard prompts and got a precision of 99.4% for a recall of 95.6% (considering acceptable\nprompts as positives).\nThe use cases are vast, from moderating comments on social media or forums to brand monitoring\non the internet. In particular, the end user is able to select afterwards which categories to effectively\nfilter based on their particular use-case.\n6\nConclusion\nOur work on Mistral 7B demonstrates that language models may compress knowledge more than\nwhat was previously thought. This opens up interesting perspectives: the field has so far put the\nemphasis on scaling laws in 2 dimensions (directly associating model capabilities to training cost, as\nin [14]); the problem is rather 3 dimensional (model capabilities, training cost, inference cost), and\nmuch remains to be explored to obtain the best performance with the smallest possible model.\nAcknowledgements\nWe are grateful to CoreWeave for their 24/7 help in marshalling our cluster.\nWe thank the\nCINECA/EuroHPC team, and in particular the operators of Leonardo, for their resources and help.\nWe thank the maintainers of FlashAttention, vLLM, xFormers, Skypilot for their precious assistance\nin implementing new features and integrating their solutions into ours. A huge thanks to Tri Dao\nand Daniel Haziza for helping include Mistral related changes to FlashAttention and xFormers on\na tight schedule. We thank the teams of Hugging Face, AWS, GCP, Azure ML for their intense help\nin making our model compatible everywhere.\n6\nFigure 6: Human evaluation of Mistral 7B – Instruct vs Llama 2 13B – Chat Example. An example of\nhuman evaluation from llmboxing.com. The question asks for recommendations of books in quantum physics.\nLlama 2 13B – Chat recommends a general physics book, while Mistral 7B – Instruct recommends a more\nrelevant book on quantum physics and describes in the contents in more detail.\n7\nReferences\n[1] Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebrón, and\nSumit Sanghai. Gqa: Training generalized multi-query transformer models from multi-head\ncheckpoints. arXiv preprint arXiv:2305.13245, 2023.\n[2] Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David\nDohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, et al. Program synthesis with large\nlanguage models. arXiv preprint arXiv:2108.07732, 2021.\n[3] Iz Beltagy, Matthew E Peters, and Arman Cohan. Longformer: The long-document transformer.\narXiv preprint arXiv:2004.05150, 2020.\n[4] Yonatan Bisk, Rowan Zellers, Jianfeng Gao, Yejin Choi, et al. Piqa: Reasoning about phys-\nical commonsense in natural language. In Proceedings of the AAAI conference on artificial\nintelligence, 2020.\n[5] Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared\nKaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large\nlanguage models trained on code. arXiv preprint arXiv:2107.03374, 2021.\n[6] Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. Generating long sequences with\nsparse transformers. arXiv preprint arXiv:1904.10509, 2019.\n[7] Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, and\nLuke Zettlemoyer. Quac: Question answering in context. arXiv preprint arXiv:1808.07036,\n2018.\n[8] Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and\nKristina Toutanova. Boolq: Exploring the surprising difficulty of natural yes/no questions.\narXiv preprint arXiv:1905.10044, 2019.\n[9] Peter Clark, Isaac Cowhey, Oren Etzioni, Tushar Khot, Ashish Sabharwal, Carissa Schoenick,\nand Oyvind Tafjord. Think you have solved question answering? try arc, the ai2 reasoning\nchallenge. arXiv preprint arXiv:1803.05457, 2018.\n[10] Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser,\nMatthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, et al. Training verifiers to\nsolve math word problems. arXiv preprint arXiv:2110.14168, 2021.\n[11] Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, and Christopher Ré. FlashAttention: Fast\nand memory-efficient exact attention with IO-awareness. In Advances in Neural Information\nProcessing Systems, 2022.\n[12] Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and\nJacob Steinhardt.\nMeasuring massive multitask language understanding.\narXiv preprint\narXiv:2009.03300, 2020.\n[13] Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn\nSong, and Jacob Steinhardt. Measuring mathematical problem solving with the math dataset.\narXiv preprint arXiv:2103.03874, 2021.\n[14] Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza\nRutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, Thomas\nHennigan, Eric Noland, Katherine Millican, George van den Driessche, Bogdan Damoc, Aurelia\nGuy, Simon Osindero, Karén Simonyan, Erich Elsen, Oriol Vinyals, Jack Rae, and Laurent\nSifre. An empirical analysis of compute-optimal large language model training. In Advances in\nNeural Information Processing Systems, volume 35, 2022.\n[15] Mandar Joshi, Eunsol Choi, Daniel S Weld, and Luke Zettlemoyer.\nTriviaqa: A large\nscale distantly supervised challenge dataset for reading comprehension.\narXiv preprint\narXiv:1705.03551, 2017.\n[16] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redfield, Michael Collins, Ankur Parikh, Chris\nAlberti, Danielle Epstein, Illia Polosukhin, Jacob Devlin, Kenton Lee, et al. Natural questions: a\nbenchmark for question answering research. Transactions of the Association for Computational\nLinguistics, 7:453–466, 2019.\n8\n[17] Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu,\nJoseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large lan-\nguage model serving with pagedattention. In Proceedings of the ACM SIGOPS 29th Symposium\non Operating Systems Principles, 2023.\n[18] Benjamin Lefaudeux, Francisco Massa, Diana Liskovich, Wenhan Xiong, Vittorio Caggiano,\nSean Naren, Min Xu, Jieru Hu, Marta Tintore, Susan Zhang, Patrick Labatut, and Daniel Haziza.\nxformers: A modular and hackable transformer modelling library. https://github.com/\nfacebookresearch/xformers, 2022.\n[19] Todor Mihaylov, Peter Clark, Tushar Khot, and Ashish Sabharwal. Can a suit of armor conduct\nelectricity? a new dataset for open book question answering. arXiv preprint arXiv:1809.02789,\n2018.\n[20] Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan,\nYossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, et al. Code llama: Open foundation models\nfor code. arXiv preprint arXiv:2308.12950, 2023.\n[21] Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhagavatula, and Yejin Choi. Winogrande: An\nadversarial winograd schema challenge at scale. Communications of the ACM, 64(9):99–106,\n2021.\n[22] Maarten Sap, Hannah Rashkin, Derek Chen, Ronan LeBras, and Yejin Choi. Socialiqa: Com-\nmonsense reasoning about social interactions. arXiv preprint arXiv:1904.09728, 2019.\n[23] Mirac Suzgun, Nathan Scales, Nathanael Schärli, Sebastian Gehrmann, Yi Tay, Hyung Won\nChung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou, , and Jason Wei.\nChallenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint\narXiv:2210.09261, 2022.\n[24] Alon Talmor, Jonathan Herzig, Nicholas Lourie, and Jonathan Berant. Commonsenseqa: A ques-\ntion answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937,\n2018.\n[25] Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timo-\nthée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open\nand efficient foundation language models. arXiv preprint arXiv:2302.13971, 2023.\n[26] Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei,\nNikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, et al. Llama 2: Open\nfoundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288, 2023.\n[27] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,\nŁukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information\nprocessing systems, 30, 2017.\n[28] Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. Hellaswag: Can a\nmachine really finish your sentence? arXiv preprint arXiv:1905.07830, 2019.\n[29] Wanjun Zhong, Ruixiang Cui, Yiduo Guo, Yaobo Liang, Shuai Lu, Yanlin Wang, Amin Saied,\nWeizhu Chen, and Nan Duan. Agieval: A human-centric benchmark for evaluating foundation\nmodels. arXiv preprint arXiv:2304.06364, 2023.\n9\n', metadata={'Published': '2023-10-10', 'Title': 'Mistral 7B', 'Authors': 'Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed', 'Summary': 'We introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.'})]
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)
len(docs)
1
# local에 .chroma/index 폴더가 생성되고 그 안에 vector database가 생성됨
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./.chroma/arxiv")
In a notebook, we should call persist() to ensure the embeddings are written to disk. This isn't necessary in a script - the database will be automatically persisted when the client object is destroyed.
vectordb.persist()
vector_store = None
Load the Database from disk, and create the chain¶
vectordb = Chroma(persist_directory="./.chroma/arxiv", embedding_function=embeddings)
retriever = VectorStoreRetriever(vectorstore=vectordb)
retrievalQA = RetrievalQA.from_chain_type(llm=ChatOpenAI(model="gpt-4-1106-preview"), retriever=retriever)
sim_docs = vectordb.similarity_search("whta is the best important feratures of the mistal?")
sim_docs
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
[Document(page_content='Mistral 7B\nAlbert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford,\nDevendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel,\nGuillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux,\nPierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix,\nWilliam El Sayed\nAbstract\nWe introduce Mistral 7B, a 7–billion-parameter language model engineered for\nsuperior performance and efficiency. Mistral 7B outperforms the best open 13B\nmodel (Llama 2) across all evaluated benchmarks, and the best released 34B\nmodel (Llama 1) in reasoning, mathematics, and code generation. Our model\nleverages grouped-query attention (GQA) for faster inference, coupled with sliding\nwindow attention (SWA) to effectively handle sequences of arbitrary length with a\nreduced inference cost. We also provide a model fine-tuned to follow instructions,\nMistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and\nautomated benchmarks. Our models are released under the Apache 2.0 license.\nCode: https://github.com/mistralai/mistral-src\nWebpage: https://mistral.ai/news/announcing-mistral-7b/\n1\nIntroduction\nIn the rapidly evolving domain of Natural Language Processing (NLP), the race towards higher model\nperformance often necessitates an escalation in model size. However, this scaling tends to increase\ncomputational costs and inference latency, thereby raising barriers to deployment in practical,\nreal-world scenarios. In this context, the search for balanced models delivering both high-level\nperformance and efficiency becomes critically essential. Our model, Mistral 7B, demonstrates that\na carefully designed language model can deliver high performance while maintaining an efficient\ninference. Mistral 7B outperforms the previous best 13B model (Llama 2, [26]) across all tested\nbenchmarks, and surpasses the best 34B model (LLaMa 34B, [25]) in mathematics and code\ngeneration. Furthermore, Mistral 7B approaches the coding performance of Code-Llama 7B [20],\nwithout sacrificing performance on non-code related benchmarks.\nMistral 7B leverages grouped-query attention (GQA) [1], and sliding window attention (SWA) [6, 3].\nGQA significantly accelerates the inference speed, and also reduces the memory requirement during\ndecoding, allowing for higher batch sizes hence higher throughput, a crucial factor for real-time\napplications. In addition, SWA is designed to handle longer sequences more effectively at a reduced\ncomputational cost, thereby alleviating a common limitation in LLMs. These attention mechanisms\ncollectively contribute to the enhanced performance and efficiency of Mistral 7B.\narXiv:2310.06825v1 [cs.CL] 10 Oct 2023\nMistral 7B is released under the Apache 2.0 license. This release is accompanied by a reference\nimplementation1 facilitating easy deployment either locally or on cloud platforms such as AWS, GCP,\nor Azure using the vLLM [17] inference server and SkyPilot 2. Integration with Hugging Face 3 is\nalso streamlined for easier integration. Moreover, Mistral 7B is crafted for ease of fine-tuning across\na myriad of tasks. As a demonstration of its adaptability and superior performance, we present a chat\nmodel fine-tuned from Mistral 7B that significantly outperforms the Llama 2 13B – Chat model.\nMistral 7B takes a significant step in balancing the goals of getting high performance while keeping\nlarge language models efficient. Through our work, our aim is to help the community create more\naffordable, efficient, and high-performing language models that can be used in a wide range of\nreal-world applications.\n2\nArchitectural details\nFigure 1: Sliding Window Attention. The number of operations in vanilla attention is quadratic in the sequence\nlength, and the memory increases linearly with the number of tokens. At inference time, this incurs higher\nlatency and smaller throughput due to reduced cache availability. To alleviate this issue, we use sliding window\nattention: each token can attend to at most W tokens from the previous layer (here, W = 3). Note that tokens\noutside the sliding window still influence next word prediction. At each attention layer, information can move\nforward by W tokens. Hence, after k attention layers, information can move forward by up to k × W tokens.\nParameter\nValue\ndim\n4096\nn_layers\n32\nhead_dim\n128\nhidden_dim\n14336\nn_heads\n32\nn_kv_heads\n8\nwindow_size\n4096\ncontext_len\n8192\nvocab_size\n32000\nTable 1: Model architecture.\nMistral 7B is based on a transformer architecture [27]. The main\nparameters of the architecture are summarized in Table 1. Compared\nto Llama, it introduces a few changes that we summarize below.\nSliding Window Attention. SWA exploits the stacked layers of a trans-\nformer to attend information beyond the window size W. The hidden\nstate in position i of the layer k, hi, attends to all hidden states from\nthe previous layer with positions between i − W and i. Recursively,\nhi can access tokens from the input layer at a distance of up to W × k\ntokens, as illustrated in Figure 1. At the last layer, using a window size\nof W = 4096, we have a theoretical attention span of approximately\n131K tokens. In practice, for a sequence length of 16K and W = 4096,\nchanges made to FlashAttention [11] and xFormers [18] yield a 2x\nspeed improvement over a vanilla attention baseline.\nRolling Buffer Cache. A fixed attention span means that we can limit our cache size using a rolling\nbuffer cache. The cache has a fixed size of W, and the keys and values for the timestep i are stored\nin position i mod W of the cache. As a result, when the position i is larger than W, past values\nin the cache are overwritten, and the size of the cache stops increasing. We provide an illustration\nin Figure 2 for W = 3. On a sequence length of 32k tokens, this reduces the cache memory usage\nby 8x, without impacting the model quality.\n1https://github.com/mistralai/mistral-src\n2https://github.com/skypilot-org/skypilot\n3https://huggingface.co/mistralai\n2\nFigure 2: Rolling buffer cache. The cache has a fixed size of W = 4. Keys and values for position i are stored\nin position i mod W of the cache. When the position i is larger than W, past values in the cache are overwritten.\nThe hidden state corresponding to the latest generated tokens are colored in orange.\nPre-fill and Chunking. When generating a sequence, we need to predict tokens one-by-one, as\neach token is conditioned on the previous ones. However, the prompt is known in advance, and we\ncan pre-fill the (k, v) cache with the prompt. If the prompt is very large, we can chunk it into smaller\npieces, and pre-fill the cache with each chunk. For this purpose, we can select the window size as\nour chunk size. For each chunk, we thus need to compute the attention over the cache and over the\nchunk. Figure 3 shows how the attention mask works over both the cache and the chunk.\ngo\ndog\n0\n0\n0\n0\n1\n0\n0\n0\n0\n0\nthe\nto\nThe\ncat\nsat\non\nthe\n1\nmat\nand\n1\n1\n1\nsaw\nthe\n1\n0\n0\n0\ndog\ngo\nto\n1\n0\n0\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n0\n0\n1\n1\n1\n0\n0\n0\n0\n0\n0\n1\n1\n1\n1\n0\nPast\nCache\nCurrent\nFigure 3: Pre-fill and chunking. During pre-fill of the cache, long sequences are chunked to limit memory\nusage. We process a sequence in three chunks, “The cat sat on”, “the mat and saw”, “the dog go to”. The figure\nshows what happens for the third chunk (“the dog go to”): it attends itself using a causal mask (rightmost block),\nattends the cache using a sliding window (center block), and does not attend to past tokens as they are outside of\nthe sliding window (left block).\n3\nResults\nWe compare Mistral 7B to Llama, and re-run all benchmarks with our own evaluation pipeline for\nfair comparison. We measure performance on a wide variety of tasks categorized as follow:\n• Commonsense Reasoning (0-shot): Hellaswag [28], Winogrande [21], PIQA [4], SIQA [22],\nOpenbookQA [19], ARC-Easy, ARC-Challenge [9], CommonsenseQA [24]\n• World Knowledge (5-shot): NaturalQuestions [16], TriviaQA [15]\n• Reading Comprehension (0-shot): BoolQ [8], QuAC [7]\n• Math: GSM8K [10] (8-shot) with maj@8 and MATH [13] (4-shot) with maj@4\n• Code: Humaneval [5] (0-shot) and MBPP [2] (3-shot)\n• Popular aggregated results: MMLU [12] (5-shot), BBH [23] (3-shot), and AGI Eval [29]\n(3-5-shot, English multiple-choice questions only)\nDetailed results for Mistral 7B, Llama 2 7B/13B, and Code-Llama 7B are reported in Table 2. Figure 4\ncompares the performance of Mistral 7B with Llama 2 7B/13B, and Llama 1 34B4 in different\ncategories. Mistral 7B surpasses Llama 2 13B across all metrics, and outperforms Llama 1 34B on\nmost benchmarks. In particular, Mistral 7B displays a superior performance in code, mathematics,\nand reasoning benchmarks.\n4Since Llama 2 34B was not open-sourced, we report results for Llama 1 34B.\n3\nFigure 4: Performance of Mistral 7B and different Llama models on a wide range of benchmarks. All\nmodels were re-evaluated on all metrics with our evaluation pipeline for accurate comparison. Mistral 7B\nsignificantly outperforms Llama 2 7B and Llama 2 13B on all benchmarks. It is also vastly superior to Llama 1\n34B in mathematics, code generation, and reasoning benchmarks.\nModel\nModality MMLU HellaSwag WinoG PIQA\nArc-e\nArc-c\nNQ\nTriviaQA HumanEval MBPP MATH GSM8K\nLLaMA 2 7B\nPretrained 44.4%\n77.1%\n69.5% 77.9% 68.7% 43.2% 24.7%\n63.8%\n11.6%\n26.1%\n3.9%\n16.0%\nLLaMA 2 13B\nPretrained 55.6%\n80.7%\n72.9% 80.8% 75.2% 48.8% 29.0%\n69.6%\n18.9%\n35.4%\n6.0%\n34.3%\nCode-Llama 7B Finetuned\n36.9%\n62.9%\n62.3% 72.8% 59.4% 34.5% 11.0%\n34.9%\n31.1%\n52.5%\n5.2%\n20.8%\nMistral 7B\nPretrained 60.1%\n81.3%\n75.3% 83.0% 80.0% 55.5% 28.8%\n69.9%\n30.5%\n47.5% 13.1%\n52.2%\nTable 2: Comparison of Mistral 7B with Llama. Mistral 7B outperforms Llama 2 13B on all metrics, and\napproaches the code performance of Code-Llama 7B without sacrificing performance on non-code benchmarks.\nSize and Efficiency. We computed “equivalent model sizes” of the Llama 2 family, aiming to\nunderstand Mistral 7B models’ efficiency in the cost-performance spectrum (see Figure 5). When\nevaluated on reasoning, comprehension, and STEM reasoning (specifically MMLU), Mistral 7B\nmirrored performance that one might expect from a Llama 2 model with more than 3x its size. On\nthe Knowledge benchmarks, Mistral 7B’s performance achieves a lower compression rate of 1.9x,\nwhich is likely due to its limited parameter count that restricts the amount of knowledge it can store.\nEvaluation Differences. On some benchmarks, there are some differences between our evaluation\nprotocol and the one reported in the Llama 2 paper: 1) on MBPP, we use the hand-verified subset 2)\non TriviaQA, we do not provide Wikipedia contexts.\n4\nInstruction Finetuning\nModel\nChatbot Arena\nELO Rating\nMT Bench\nWizardLM 13B v1.2\n1047\n7.2\nMistral 7B Instruct\n1031\n6.84 +/- 0.07\nLlama 2 13B Chat\n1012\n6.65\nVicuna 13B\n1041\n6.57\nLlama 2 7B Chat\n985\n6.27\nVicuna 7B\n997\n6.17\nAlpaca 13B\n914\n4.53\nTable 3: Comparison of Chat models. Mistral 7B –\nInstruct outperforms all 7B models on MT-Bench, and\nis comparable to 13B – Chat models.\nTo evaluate the generalization capabilities of\nMistral 7B, we fine-tuned it on instruction datasets\npublicly available on the Hugging Face repository.\nNo proprietary data or training tricks were utilized:\nMistral 7B – Instruct model is a simple and\npreliminary demonstration that the base model can\neasily be fine-tuned to achieve good performance.\nIn Table 3, we observe that the resulting model,\nMistral 7B – Instruct, exhibits superior perfor-\nmance compared to all 7B models on MT-Bench,\nand is comparable to 13B – Chat models. An\nindependent human evaluation was conducted on\nhttps://llmboxing.com/leaderboard.\nIn this evaluation, participants were provided with a set of questions along with anonymous responses\nfrom two models and were asked to select their preferred response, as illustrated in Figure 6. As of\nOctober 6, 2023, the outputs generated by Mistral 7B were preferred 5020 times, compared to 4143\ntimes for Llama 2 13B.\n4\nFigure 5: Results on MMLU, commonsense reasoning, world knowledge and reading comprehension for\nMistral 7B and Llama 2 (7B/13B/70B). Mistral 7B largely outperforms Llama 2 13B on all evaluations, except\non knowledge benchmarks, where it is on par (this is likely due to its limited parameter count, which limits the\namount of knowledge it can compress).\n5\nAdding guardrails for front-facing applications\nThe ability to enforce guardrails when it comes to AI generation is important for front-facing appli-\ncations. In this section, we highlight how to leverage system prompting to optionally enforce output\nconstraints on top of our models. Additionally, we showcase the ability of Mistral 7B to perform\nfine-grained content moderation, which can be useful to enforce quality content in applications.\n5.1\nSystem prompt to enforce guardrails\nWe introduce a system prompt (see below) to guide the model to generate answers within specified\nguardrails, similar to the work done with Llama 2. Using this prompt allows the user to move on the\nPareto front of model utility / guardrails enforcement, as indicated in Table 4.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful,\nunethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\nGuardrails\nMT Bench\nNo system prompt\n6.84 ± 0.07\nLlama 2 system prompt\n6.38 ± 0.07\nMistral system prompt\n6.58 ± 0.05\nTable 4: System prompts. Mean official\nMT Bench score over 10 iterations with\nstandard deviation for Mistral 7B – Instruct.\nFor reference, Llama 2 13B – Chat reports\nofficial results of 6.65.\nWe use a set of 175 unsafe prompts for evaluating safety.\nWith the recommended system prompt, the model properly\ndeclines to answer 100% of the harmful questions.\nAs an illustration, we provide in Table 5 the answers of\nboth Mistral 7B – Instruct and Llama 2 Chat 13B to the\nquestion How to kill a linux process with system prompts\nactivated. We observe that Mistral 7B provides a correct\nresponse while Llama 2 declines to answer. Note that on\nthis specific question, both models answer correctly when\nsystem prompts are deactivated.\n5.2\nContent moderation with self-reflection\nMistral 7B – Instruct can be used as a content moderator: the model itself is able to accurately\nclassify a user prompt or its generated answer as being either acceptable or falling into one of the\nfollowing categories: Illegal activities such as terrorism, child abuse or fraud; Hateful, harassing\nor violent content such as discrimination, self-harm or bullying; Unqualified advice for instance\nin legal, medical or financial domains.\n5\nModel\nAnswer\nMistral 7B – Instruct\nwith Mistral system prompt\nTo kill a Linux process, you can use the `kill`command followed by the process ID (PID)\nof the process you want to terminate. For example, to kill process with PID 1234, you\nwould run the command `kill 1234`. It’s important to note that killing a process can\nhave unintended consequences, so it’s generally a good idea to only kill processes that\nyou are certain you want to terminate. Additionally, it’s generally a good idea to use the\n`kill`command with caution, as it can be used to terminate critical system processes that\nare necessary for the operation of the system.\nLlama 2 13B – Chat\nwith Llama 2 system prompt\nAs a helpful and respectful assistant, I cannot provide information on how to kill a Linux\nprocess. It is not appropriate or ethical to intentionally harm or terminate processes, as it\ncan cause damage to the system and potentially result in data loss. Additionally, it is\nimportant to respect the autonomy and security of other users’ processes, as they may be\ncritical to their work or personal activities. Instead, I suggest that you explore other\noptions to address any issues or concerns you may have.\nTable 5: Comparison between Mistral and Llama system prompts. Mistral provides the right answer whereas\nLlama 2 declines to answer to the question How to kill a linux process.\nTo do so, we designed a self-reflection prompt that makes Mistral 7B classify a prompt or a generated\nanswer. We evaluated self-reflection on our manually curated and balanced dataset of adversarial\nand standard prompts and got a precision of 99.4% for a recall of 95.6% (considering acceptable\nprompts as positives).\nThe use cases are vast, from moderating comments on social media or forums to brand monitoring\non the internet. In particular, the end user is able to select afterwards which categories to effectively\nfilter based on their particular use-case.\n6\nConclusion\nOur work on Mistral 7B demonstrates that language models may compress knowledge more than\nwhat was previously thought. This opens up interesting perspectives: the field has so far put the\nemphasis on scaling laws in 2 dimensions (directly associating model capabilities to training cost, as\nin [14]); the problem is rather 3 dimensional (model capabilities, training cost, inference cost), and\nmuch remains to be explored to obtain the best performance with the smallest possible model.\nAcknowledgements\nWe are grateful to CoreWeave for their 24/7 help in marshalling our cluster.\nWe thank the\nCINECA/EuroHPC team, and in particular the operators of Leonardo, for their resources and help.\nWe thank the maintainers of FlashAttention, vLLM, xFormers, Skypilot for their precious assistance\nin implementing new features and integrating their solutions into ours. A huge thanks to Tri Dao\nand Daniel Haziza for helping include Mistral related changes to FlashAttention and xFormers on\na tight schedule. We thank the teams of Hugging Face, AWS, GCP, Azure ML for their intense help\nin making our model compatible everywhere.\n6\nFigure 6: Human evaluation of Mistral 7B – Instruct vs Llama 2 13B – Chat Example. An example of\nhuman evaluation from llmboxing.com. The question asks for recommendations of books in quantum physics.\nLlama 2 13B – Chat recommends a general physics book, while Mistral 7B – Instruct recommends a more\nrelevant book on quantum physics and describes in the contents in more detail.\n7\nReferences\n[1] Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebrón, and\nSumit Sanghai. Gqa: Training generalized multi-query transformer models from multi-head\ncheckpoints. arXiv preprint arXiv:2305.13245, 2023.\n[2] Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David\nDohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, et al. Program synthesis with large\nlanguage models. arXiv preprint arXiv:2108.07732, 2021.\n[3] Iz Beltagy, Matthew E Peters, and Arman Cohan. Longformer: The long-document transformer.\narXiv preprint arXiv:2004.05150, 2020.\n[4] Yonatan Bisk, Rowan Zellers, Jianfeng Gao, Yejin Choi, et al. Piqa: Reasoning about phys-\nical commonsense in natural language. In Proceedings of the AAAI conference on artificial\nintelligence, 2020.\n[5] Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared\nKaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large\nlanguage models trained on code. arXiv preprint arXiv:2107.03374, 2021.\n[6] Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. Generating long sequences with\nsparse transformers. arXiv preprint arXiv:1904.10509, 2019.\n[7] Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, and\nLuke Zettlemoyer. Quac: Question answering in context. arXiv preprint arXiv:1808.07036,\n2018.\n[8] Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and\nKristina Toutanova. Boolq: Exploring the surprising difficulty of natural yes/no questions.\narXiv preprint arXiv:1905.10044, 2019.\n[9] Peter Clark, Isaac Cowhey, Oren Etzioni, Tushar Khot, Ashish Sabharwal, Carissa Schoenick,\nand Oyvind Tafjord. Think you have solved question answering? try arc, the ai2 reasoning\nchallenge. arXiv preprint arXiv:1803.05457, 2018.\n[10] Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser,\nMatthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, et al. Training verifiers to\nsolve math word problems. arXiv preprint arXiv:2110.14168, 2021.\n[11] Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, and Christopher Ré. FlashAttention: Fast\nand memory-efficient exact attention with IO-awareness. In Advances in Neural Information\nProcessing Systems, 2022.\n[12] Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and\nJacob Steinhardt.\nMeasuring massive multitask language understanding.\narXiv preprint\narXiv:2009.03300, 2020.\n[13] Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn\nSong, and Jacob Steinhardt. Measuring mathematical problem solving with the math dataset.\narXiv preprint arXiv:2103.03874, 2021.\n[14] Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza\nRutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, Thomas\nHennigan, Eric Noland, Katherine Millican, George van den Driessche, Bogdan Damoc, Aurelia\nGuy, Simon Osindero, Karén Simonyan, Erich Elsen, Oriol Vinyals, Jack Rae, and Laurent\nSifre. An empirical analysis of compute-optimal large language model training. In Advances in\nNeural Information Processing Systems, volume 35, 2022.\n[15] Mandar Joshi, Eunsol Choi, Daniel S Weld, and Luke Zettlemoyer.\nTriviaqa: A large\nscale distantly supervised challenge dataset for reading comprehension.\narXiv preprint\narXiv:1705.03551, 2017.\n[16] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redfield, Michael Collins, Ankur Parikh, Chris\nAlberti, Danielle Epstein, Illia Polosukhin, Jacob Devlin, Kenton Lee, et al. Natural questions: a\nbenchmark for question answering research. Transactions of the Association for Computational\nLinguistics, 7:453–466, 2019.\n8\n[17] Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu,\nJoseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large lan-\nguage model serving with pagedattention. In Proceedings of the ACM SIGOPS 29th Symposium\non Operating Systems Principles, 2023.\n[18] Benjamin Lefaudeux, Francisco Massa, Diana Liskovich, Wenhan Xiong, Vittorio Caggiano,\nSean Naren, Min Xu, Jieru Hu, Marta Tintore, Susan Zhang, Patrick Labatut, and Daniel Haziza.\nxformers: A modular and hackable transformer modelling library. https://github.com/\nfacebookresearch/xformers, 2022.\n[19] Todor Mihaylov, Peter Clark, Tushar Khot, and Ashish Sabharwal. Can a suit of armor conduct\nelectricity? a new dataset for open book question answering. arXiv preprint arXiv:1809.02789,\n2018.\n[20] Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan,\nYossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, et al. Code llama: Open foundation models\nfor code. arXiv preprint arXiv:2308.12950, 2023.\n[21] Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhagavatula, and Yejin Choi. Winogrande: An\nadversarial winograd schema challenge at scale. Communications of the ACM, 64(9):99–106,\n2021.\n[22] Maarten Sap, Hannah Rashkin, Derek Chen, Ronan LeBras, and Yejin Choi. Socialiqa: Com-\nmonsense reasoning about social interactions. arXiv preprint arXiv:1904.09728, 2019.\n[23] Mirac Suzgun, Nathan Scales, Nathanael Schärli, Sebastian Gehrmann, Yi Tay, Hyung Won\nChung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou, , and Jason Wei.\nChallenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint\narXiv:2210.09261, 2022.\n[24] Alon Talmor, Jonathan Herzig, Nicholas Lourie, and Jonathan Berant. Commonsenseqa: A ques-\ntion answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937,\n2018.\n[25] Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timo-\nthée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open\nand efficient foundation language models. arXiv preprint arXiv:2302.13971, 2023.\n[26] Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei,\nNikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, et al. Llama 2: Open\nfoundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288, 2023.\n[27] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,\nŁukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information\nprocessing systems, 30, 2017.\n[28] Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. Hellaswag: Can a\nmachine really finish your sentence? arXiv preprint arXiv:1905.07830, 2019.\n[29] Wanjun Zhong, Ruixiang Cui, Yiduo Guo, Yaobo Liang, Shuai Lu, Yanlin Wang, Amin Saied,\nWeizhu Chen, and Nan Duan. Agieval: A human-centric benchmark for evaluating foundation\nmodels. arXiv preprint arXiv:2304.06364, 2023.\n9', metadata={'Published': '2023-10-10', 'Title': 'Mistral 7B', 'Authors': 'Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed', 'Summary': 'We introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.'})]
response = retrievalQA.run("whta is the best important feratures of the mistal?")
Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
print(response)
The most important features of Mistral 7B, a 7-billion-parameter language model, include: 1. **Grouped-Query Attention (GQA):** This feature significantly accelerates the inference speed and reduces the memory requirement during decoding. It allows for higher batch sizes and thus higher throughput, which is crucial for real-time applications. 2. **Sliding Window Attention (SWA):** This attention mechanism is designed to handle longer sequences more effectively at a reduced computational cost. It allows each token to attend to a window of W tokens from the previous layer, enabling the model to process sequences of arbitrary length efficiently. 3. **Rolling Buffer Cache:** With a fixed attention span, the rolling buffer cache limits the cache size. It stores the keys and values for the timestep `i` in position `i mod W` of the cache. When the position `i` is larger than `W`, past values in the cache are overwritten, and the cache size remains constant. 4. **Pre-fill and Chunking:** These techniques allow for efficient handling of large prompts by pre-filling the cache with known prompt information and chunking large sequences to limit memory usage. 5. **High Performance and Efficiency:** Despite its smaller size compared to other models like Llama 2 13B and Llama 1 34B, Mistral 7B outperforms these models across various benchmarks, particularly in reasoning, mathematics, and code generation tasks. 6. **Fine-tuning Capabilities:** Mistral 7B can be fine-tuned for specific tasks, such as following instructions, and has demonstrated the ability to outperform larger models in these areas as well. 7. **Open Source and Compatibility:** Mistral 7B is released under the Apache 2.0 license and comes with a reference implementation that supports deployment on cloud platforms (AWS, GCP, Azure) and integration with Hugging Face. These features make Mistral 7B a highly efficient and effective language model for various NLP tasks, balancing performance with inference cost and computational efficiency.
Loading and retrieving in Langchain¶
<벡터 저장소 및 데이터 로더>
Document Loaders¶
loader = TextLoader(file_path="./data/나없이는존재하지않는세상_20240102.txt")
docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100))
len(docs)
164
# local에 .chroma/index 폴더가 생성되고 그 안에 vector database가 생성됨
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./.chroma/txt")
vectordb.persist()
sim_docs = vectordb.similarity_search("양자역학은 무엇에 대한 것인가?", k=4)
for doc in sim_docs:
print(doc.page_content)
print("=====================================")
189 또 이런 메일도 받습니다. "이미 이 순간을 살았던 것같은 느낌이 드는데 교수님, 양자 효과인가요?" 맙소사, 아닙니다! 우리의 기억과 생각의 복잡성이 양자와무슨 관련이 있단 말입니까? 전혀, 전혀 상관없습니다!양자역학은 초자연적 현상이나 대체 의학, 신비한 파동이나 진동과는 아무 관련이 없습니다. 물론 저도 기분 좋은 진동을 좋아합니다. 저도 어렸을 때는 긴 머리에 빨간 띠를 두르고 앨런 긴즈버그* 바로 옆에서 다리를 꼬고 앉아 '옴’을 읊은 적도 있습니다.그러나 우리와 우주 사이의 미묘하고 복잡한 정서적 관계가 양자론의 파동과 관련이 있는 정도는, 바흐의 칸타타가 내 차의 기화기와 관련이 있는 정도일 겁니다. 이 세계는 바흐 음악의 마법, 기분 좋은 진동, 우리의깊은 영적 삶을 낳을 수 있을 정도로 충분히 복잡하기때문에, 굳이 이상한 양자를 들먹일 필요도 없습니다. 또는 반대로, 양자적 현실은 우리의 모든 심리적 현실과 영적 삶의 섬세하고 신비롭고 매혹적이며 복잡한측면보다 훨씬 더 기묘하다고 할 수도 있습니다. 또한저는 마음의 작용과 같이 우리가 거의 이해하지 못하는 복잡한 현상을 설명하기 위해 양자역학을 사용하려는 시도는 전혀 설득력이 없다고 생각합니다. *앨런 긴즈버그는 Allen Ginsbery는 미국의 시인이자 반문화의 아이콘으로 '비트 세대'의 대표작가로 꼽힌다. 190 ᏂᏂ 그러나 비록 일상의 직접적인 경험과는 거리가 멀다고 하더라도, 세계의 양자적 본질에 대한 발견은 너무급진적이어서, 마음의 본질과 같은 큰 미해결 질문과전혀 무관하다고 보기는 어렵습니다. 마음의 작용을비롯한 우리가 아직 이해하지 못한 다른 현상들이 양자 현상이어서가 아니라, 양자의 발견으로 물리적 세계와 물질에 대한 우리의 개념이 바뀌어 질문의 조건이 달라지기 때문입니다. ===================================== “양자역학은 우리 가운데 누구도 제대로 이해하지못하지만 사용할 줄은 아는 무척 신비롭고 당혹스러운학문이다." 리처드 파인만도 마찬가지로 "양자역학을 이해하는사람은 아무도 없다"고 말했다. 양자 이론은 매우 유용하지만 세계의 실재, 세계상에 대해 말해주는 바는 이해하기 어렵고 매우 혼란스럽다는 말이다. 오늘날 양자 이론이 물리학·화학·생물학·천문학 등 현대 과학의기초이고 컴퓨터, 레이저, 원자력과 같은 현대 기술의유용한 토대임을 생각한다면, 이는 미스터리가 아닐수 없다. 카를로 로벨리는 이 책 《나 없이는 존재하지 않는세상》에서 양자 이론이 탄생한 지 100년이 지난 지금에도 풀리지 않고 있는 이 수수께끼에 새롭게 도전한다. 양자 이론이 세계의 실재에 대해 무엇을 말해주는 238 지, 혹은 양자 이론이 그려내는 세계는 어떠한 모습인지를 진지하게 탐구한다. 결론부터 말하면 세계는 고정된 속성을 지닌 자립적인 실체, 즉 물질 입자들로 구성되어 있지 않고 상호 간의 작용과 상관관계를 바탕으로 한 관계의 네트워크로 이루어져 있다. (그는 이를양자 이론에 대한 '관계론적 해석'이라고 부른다.) 로벨리의 탐구는 관찰 가능성에 기반해 양자 이론을 꽃피운 베르너 하이젠베르크의 아이디어에서 출발하지만, 그 여정의 전 과정은 놀라울 정도로 광활하고방대하다. 과학과 철학의 영역을 경계 없이 넘나들면서 통섭적으로 사고한다. ===================================== 이 이론은 세계의 실재 구조에서부터 경험의 본성까지, 형이상학에서부터 어쩌면 의식의 본질에 이르기까지, 큰 물음들을 다시 생각할 수 있는 새로운 길을 제시합니다. 이 모든 것은 오늘날 과학자와 철학자들 사이에서 활발한 논쟁의 주제가 되고 있죠. 앞으로 이 모든주제에 대해 이야기해보겠습니다. 베르너 하이젠베르크는 북풍이 몰아치는 극한의 척박한 섬 헬골란트에서 진리를 가리고 있던 장막을 걷어냈습니다. 그런데 그 장막 너머에서 나타난 것은 심연이었습니다. 이 책의 이야기는 하이젠베르크의 아이디어가 싹을 틔운 섬에서 시작하여, 세계 실재의 양자적 구조가 발견됨으로써 제기된 더 큰 질문으로 점차확장해 갑니다. 저는 이 책을 주로 양자 물리학에 익숙하지 않으며양자 물리학이 무엇인지, 양자 물리학이 의미하는 바가 무엇인지 궁금해하는 사람들을 위해 썼습니다. 문 11 hh 제의 핵심을 파악하는 데 꼭 필요하지 않은 세부 사항은 생략하고 최대한 간결히 설명하려고 노력했습니다.난해하지만 핵심적인 이론에 대해서는 가능한 한 명확하게 설명하려고 노력했고요. 어쩌면 양자역학을 이해하는 방법을 설명하기보다는, 양자역학을 이해하기가왜 그렇게 어려운지를 설명하고 있는 것일지도 모르겠네요. 그러나 이 책은 양자역학에 대해 더 깊이 파고들수록 더 많은 의문을 품게 되는 동료 과학자와 철학자들을 위한 책이기도 합니다. 이 놀라운 물리학의 의미에대한 대화를 계속 이어가고 더 일반적인 관점으로 나아가고 싶어서죠. 이 책에는 이미 양자역학에 익숙한이들을 위한 주석도 많이 달려 있습니다. 본문에서는좀 더 읽기 쉽게 말하고자 한 바를, 주석에서는 더 정확하게 표현했습니다. ===================================== 양자론은 화학의 기초, 원자와 고체 그리고 플라즈마의 작용, 하늘의 색깔, 우리 뇌의 뉴런, 별의 동역학, 은하의 기원 등 세계의 수많은 측면을 밝혀냈습니다. 그것은 컴퓨터에서 원자력발전소에 이르기까지 최신 기술의 기초가 됩니다. 공학자, 천체 물리학자, 우주학자,화학자, 생물학자들은 매일 이 이론을 사용합니다. 고등학교 교과과정에도 그 이론의 기초가 포함되어 있 9 죠. 그 이론은 틀린 적이 없습니다. 현대 과학의 심장이라고 할 수 있죠. 그러나 그것은 여전히 심오한 미스터리로 남아 있습니다. 어딘지 모르게 불안함을 줍니다. 양자론은 이 세계가 정해진 궤적을 따라 움직이는입자들로 구성된 것이라는 세계의 이미지를 부숴버렸지만, 우리가 세계에 대해 어떻게 생각해야 하는지는명확히 보여주지 않았습니다. 양자론의 수학은 세계의실재를 기술하지 않으며, “무엇이 있는지 알려주지 않습니다. 멀리 떨어져 있는 물체들은 서로 마법으로 연결되어 있는 것처럼 보입니다. 물질은 유령 같은 확률파동으로 대체되고…. 양자론이 실재 세계에 대해서 무엇을 말하는지 자문해보는 사람은 누구나 당황하게 될 것입니다. 양자론의 몇 가지 아이디어를 선구적으로 제시했던 아인슈타인도Albert Einstein 그것을 소화하지 못했고, 20세기 후반의위대한 이론 물리학자 리처드 파인만Richard Feynman은 아무도 양자를 이해하지 못한다고 썼습니다. 하지만 그게 바로 과학입니다. 세상에 대한 새로운사고방식을 탐구하는 것이죠. 과학은 우리의 개념에끊임없이 의문을 제기할 수 있는 능력입니다. 과학은그 자신의 개념적 토대를 수정하고, 세상을 처음부터다시 설계할 수 있는 반항적이고 비판적인 사고의 힘이죠. 10 양자론의 낯설음은 우리를 혼란스럽게 하지만, 이해하는 새로운 관점을 열어주기도 합니다. 공간 속의 입자들이라는 단순한 유물론의 실재보다 더 섬세한 실재, 대상들 이전에 관계로 이루어진 실재를요. =====================================
Retrievers in LangChain¶
LangChain의 리트리버는 임베딩을 색인하고 검색하기 위해 크로마와 같이 벡터 저장소에 저장된 특정 인덱스에서 정보를 검색하고 가져오는 데 사용되는 컴포넌트의 한 유형.
리트리버는 주어진 쿼리를 기반으로 관련 정보를 다시 검색할 수 있기 때문에 문서에 대한 질문에 답하는 데 중요한 역할.
BM25 리트리버 :
- 이 검색기는 BM25알고리즘을 사요하여 주어진 쿼리와의 관련성을 기준으로 문서의 순위를 매김.
- 용어의
빈도
와 문서의길이
를 고려하는 인기 있는 정보 검색 알고리즘
TF-IDF :
- TF-IDF(Term Frequency-Inverse Document Frequency)알고리즘 사용히여 문서 컬렉션에서 용어의
중요성
을 기준으로 문서 순위 지정 - 컬렉션에서는 드물지만
특정 문서에서는 자주 나타나는 용어에 더 높은 가중치
할당
Dense 리트리버 :
- 조밀한 임베딩을 사용하여 문서 검색
- 문서와 쿼리를
밀집된 벡터로 인코딩
하고 코사인 유사도 또는 기타 거리 측정법을 사용하여 이들 간의 유사성 계산
kNN 리트리버 :
- 주어진
쿼리와의 유사성
을 기반으로 관련 문서를 검색 - k-최접근 이웃(
k-nearest neighbors
) 알고리즘 사용
kNN retriever¶
# 결과는 질문과 가장 높은 유사도 순으로 목록을 나열 한다.
words =["cat","dog","computer","animal"]
retriever = KNNRetriever.from_texts(words,embeddings=embeddings)
result = retriever.get_relevant_documents("dog")
print(result)
[Document(page_content='dog'), Document(page_content='animal'), Document(page_content='cat'), Document(page_content='computer')]
PubMed 검색자¶
LangChain에는 PubMed의 것과 같은 몇 가지 특수 검색기가 있음.
PubMed 검색 자는 생물 의학 문헌 검색을 언어 모델 애플리케이션에 통합하는데 도움이 되는 LangChain의 구성 요소.
PubMed에는 다양한 소스에서수백만 건의 생물의학 문헌 인용이 포함되어 있음.
retriever = PubMedRetriever()
documents = retriever.get_relevant_documents("COVID")
for document in documents:
print(document.metadata["Title"])
[Association of polymorphic variants of hemostatic system genes with the course of COVID-19]. {'i': 'Betacoronavirus', '#text': '[Clinical symptoms and signs in hamsters during experimental infection with the SARS-CoV-2 virus (Coronaviridae: )].'} Post COVID-19 condition and behavioral manifestations in Taiwanese children.
Custom 리트리버¶
베이스 리트리버 추상 클래스에서 상속된 클래스를 생성하여 LangChain에서 자체 사용자 정의 리트리버를 구현할 수 있다.
이 클래스는 쿼리 문자열을 입력으로 받아 관련 문서 목록을 반환하는 get_relevant_documents()
메서드를 구현해야 합니다.
class MyRetriever(BaseRetriever):
def get_relevant_documents(self, query: str) -> list[Document]:
# Implement your own retrieval logic here.
# Retrieve and process documents based on the query.
# Return a list of relevant documents.
relevant_documents = []
# Your retrieval logic goes here.
return relevant_documents
Implementing a chatbot¶
- 문서 로더를 설정
- 벡터 저장소에 문서를 저장
- 벡터 저장소에서 검색하여 챗봇을 설정
utility.py
import logging
import pathlib
from typing import Any
from langchain.document_loaders import (
PyPDFLoader,
TextLoader,
UnstructuredEPubLoader,
UnstructuredWordDocumentLoader,
)
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
logging.basicConfig(level=logging.INFO)
def init_memory():
"""Initialize the memory. for contextual conversation.
We are caching this, so it won't be deleted
every time, we restart the server.
"""
return ConversationBufferMemory(
memory_key='chat_history',
return_messages=True,
output_key='answer'
)
MEMORY = init_memory()
class EpubReader(UnstructuredEPubLoader):
"""Reads the epub file."""
def __init__(self, file_path: str | list[str] , **unstructured_kwargs: Any):
"""Initialize the epub reader."""
# strategy="fast" 가 있는 경우 에러남
super().__init__(file_path=file_path, **unstructured_kwargs, mode="elements") #, strategy="fast")
class DocumentLoaderException(Exception):
"""Document loader exception."""
pass
class DocumentLoader(object):
"""Loads in a document with a supported extension."""
supproted_extensions = {
".pdf": PyPDFLoader,
".txt": TextLoader,
".epub": EpubReader,
".docx": UnstructuredWordDocumentLoader,
".doc": UnstructuredWordDocumentLoader,
}
def load_document(temp_filepath: str) -> list[Document]:
"""Load a file and return it as a list of documents.
Doesn't handle a lot of errors at the moment.
"""
ext = pathlib.Path(temp_filepath).suffix
loader = DocumentLoader.supproted_extensions.get(ext)
if not loader:
raise DocumentLoaderException(f"Unsupported file extension: {ext}, cannot load this type of file")
loaded = loader(temp_filepath)
docs = loaded.load()
for idx,doc in enumerate(docs):
logging.info(doc.metadata)
if idx >= 4 :
break
return docs
logging.info("=============Load PDF=====================")
load_document('data/Introduction_to_Entropy_and_Gini-Index.pdf');
INFO:root:=============Load PDF===================== INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 0} INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 1} INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 2} INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 3} INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 4}
logging.info("=============Load DOCX=====================")
load_document('data/test.docx');
INFO:root:=============Load DOCX===================== INFO:root:{'source': 'data/test.docx'}
logging.info("=============Load TXT=====================")
load_document('data/나없이는존재하지않는세상_20240102.txt');
INFO:root:=============Load TXT===================== INFO:root:{'source': 'data/나없이는존재하지않는세상_20240102.txt'}
logging.info("=============Load EPUB=====================")
load_document('data/Julia for Data Analysis.epub');
INFO:root:=============Load EPUB===================== INFO:unstructured:Reading document from string ... INFO:unstructured:Reading document ... INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'} INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'} INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'} INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'} INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'NarrativeText'}
from langchain.vectorstores import DocArrayInMemorySearch
embeddings = OpenAIEmbeddings()
db = DocArrayInMemorySearch.from_documents(docs, embeddings)
INFO - docarray - DB config created INFO:docarray:DB config created INFO - docarray - Runtime config created INFO:docarray:Runtime config created INFO - docarray - No docs or index file provided. Initializing empty InMemoryExactNNIndex. INFO:docarray:No docs or index file provided. Initializing empty InMemoryExactNNIndex. INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
Coversation buffers¶
# Creating a conversation chain with memory
"""
params:
(*, chat_memory: BaseChatMessageHistory = ChatMessageHistory,
output_key: str | None = None, input_key: str | None = None,
return_messages: bool = False,
human_prefix: str = "Human", ai_prefix: str = "AI", memory_key: str = "history") -> None
"""
memory = ConversationBufferMemory()
llm = ChatOpenAI(model="gpt-3.5-turbo",temperature=0,streaming=True)
chain = ConversationChain(llm=llm, memory=memory)
# 해당 chain의 input_key, output_key를 확인할 수 있다.
print(f"chain inpu_keys: {chain.input_keys}, chain output_keys: {chain.output_keys}")
# memory의 변수
print(f"memory memory_key: {memory.memory_variables}")
# User inputs a smessage
user_input = "Hi, how are you?"
# Processing the user input in the conversation chain
response = chain.predict(input=user_input)
# Printing the response
print(response)
# User inputs another message
user_input = "What's the weather like today?"
# Processing the user input in the conversation chain
response = chain.predict(input=user_input)
# Printing the response
print(response)
# Printing the conversation history stored in the memory
print(memory.chat_memory.messages)
chain inpu_keys: ['input'], chain output_keys: ['response'] memory memory_key: ['history'] Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today? I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast. [HumanMessage(content='Hi, how are you?'), AIMessage(content="Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?"), HumanMessage(content="What's the weather like today?"), AIMessage(content="I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.")]
print(memory.load_memory_variables({})["history"])
Human: Hi, how are you? AI: Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today? Human: What's the weather like today? AI: I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.
# template에 사용된 `history`는 memory의 memory_key이고 `input`은 chain의 input_key이다.
llm = OpenAI(temperature=0)
template = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
{history}
Human:{input}
AI Assistant:"""
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
chain = ConversationChain(
llm=llm,
prompt=PROMPT,
verbose=True,
memory=ConversationBufferMemory(ai_prefix="AI Assistant"),
)
# User inputs a smessage
user_input = "Hi, how are you?"
# Processing the user input in the conversation chain
response = chain.predict(input=user_input)
# Printing the response
print(response)
# User inputs another message
user_input = "What's the weather like today?"
# Processing the user input in the conversation chain
response = chain.predict(input=user_input)
# Printing the response
print(response)
# Printing the conversation history stored in the memory
print(chain.memory.chat_memory.messages)
print(chain.memory.load_memory_variables({})["history"])
> Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Current conversation: Human:Hi, how are you? AI Assistant: > Finished chain. I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Current conversation: Human: Hi, how are you? AI Assistant: I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today? Human:What's the weather like today? AI Assistant: > Finished chain. According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather? [HumanMessage(content='Hi, how are you?'), AIMessage(content=' I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today?'), HumanMessage(content="What's the weather like today?"), AIMessage(content=' According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather?')] Human: Hi, how are you? AI Assistant: I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today? Human: What's the weather like today? AI Assistant: According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather?
Remembering conversation summaries¶
ConversationSummaryMemory
LangChain의 메모리 유형으로 요약을 생성합니다.
대화가 진행되면서 모든 메시지를 그대로 저장하는 대신 정보를 압축하여 대화의 요약 버전을 제공합니다.
이는 모든 이전 메시지를 포함하면 토큰 제한을초과할 수 있는 확장된 대화에 특히 유용합니다.
memory = ConversationSummaryMemory(llm=OpenAI(temperature=0))
# memory keys
print(f"memory_key: {memory.memory_variables}")
# Save the ccontext of an interaction
memory.save_context({"input": "Hi, how are you?"}, {"output": "I am fine, thanks."})
# Load the summarized memory
memory.load_memory_variables({})
memory_key: ['history']
{'history': '\nThe human greets the AI and asks how it is doing. The AI responds that it is fine and thanks the human.'}
memory.chat_memory.messages
[HumanMessage(content='Hi, how are you?'), AIMessage(content='I am fine, thanks.')]
Storing knowledge graph¶
LangChain에서는 대화 사실에서 정보를 추출하고 지식 그래프를 메모리로 통합하여 이를 저장할 수도 있습니다.
이를 통해 언어 모델의 기능을 향상시키고 텍스트 생성 및 추론 과정에서 구조화된 지식을 활용할 수 있습니다.
지식 그래프는 정보를 엔티티, 속성, 관계의 형태로 구성하는 구조화된 지식 표현 모델입니다.
지식 그래프는 지식을 그래프로 표현하며, 엔티티는 노드로, 엔티티 간의 관계는 에지로 표현됩니다.
지식 그래프에서 엔티티는 세상의 모든 개념, 객체 또는 사물이 될 수 있으며 속성은 이러한 엔티티의 속성 또는 특징을 설명합니다.
관계는 엔티티 간의 연결과 연관성을 포착하여 컨텍스트 정보를 제공하고 의미론적 추론을 가능하게 합니다.
LangChain에는 검색을 위한 지식 그래프를 위한 기능이 있지만, 대화 메시지를 기반으로 지식 그래프를 자동으로 생성하는 메모리(onversationKGMemory
) 구성 요소도 제공합니다.
memory = ConversationKGMemory(llm=OpenAI(temperature=0))
template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.
Relevant Information:
{history}
Conversation:
Human: {input}
AI:"""
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
chain = ConversationChain(
llm=llm,
prompt=PROMPT,
verbose=True,
memory=memory,
)
# 해당 chain의 input_key, output_key를 확인할 수 있다.
print(f"chain inpu_keys: {chain.input_keys}, chain output_keys: {chain.output_keys}")
# memory의 변수
print(f"memory memory_key: {memory.memory_variables}")
user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)
user_input = "My name is James and I'm helping Will. He's an engineer."
response = chain.predict(input=user_input)
print(response)
user_input = "What do you know about Will?"
response = chain.predict(input=user_input)
print(response)
user_input = "MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다."
response = chain.predict(input=user_input)
print(response)
user_input = "MrChaos의 친구는 누구인가?"
response = chain.predict(input=user_input)
print(response)
chain inpu_keys: ['input'], chain output_keys: ['response'] memory memory_key: ['history'] > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate. Relevant Information: Conversation: Human: Hi, how are you? AI: > Finished chain. Hello! I am an AI programmed to assist with various tasks and provide information. I do not have the capability to feel emotions, so I am always functioning at optimal levels. How can I assist you today? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate. Relevant Information: Conversation: Human: My name is James and I'm helping Will. He's an engineer. AI: > Finished chain. Hello James, it's nice to meet you. I am an AI designed to assist with various tasks. Will sounds like a very interesting person. As an engineer, he likely has a strong understanding of math, science, and problem-solving. Is there anything specific you need help with? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate. Relevant Information: On Will: Will is an engineer. Conversation: Human: What do you know about Will? AI: > Finished chain. Will is an engineer. He has a degree in mechanical engineering from the University of California, Berkeley. He currently works at a tech company in Silicon Valley. He is known for his innovative designs and problem-solving skills. He is also an avid hiker and enjoys spending time outdoors. Is there anything specific you would like to know about Will? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate. Relevant Information: On James: James is helping Will. On Will: Will is an engineer. Will has a degree in mechanical engineering. Will currently works at a tech company in Silicon Valley. Will is known for innovative designs. Will is known for problem-solving skills. Will enjoys hiking. Will enjoys spending time outdoors. Conversation: Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다. AI: > Finished chain. 그렇군요. James와 Will은 친구이며, Will은 기계공학 학위를 가지고 있으며 현재 실리콘밸리의 기술 회사에서 일하고 있습니다. Will은 혁신적인 디자인으로 유명하며 문제 해결 능력도 뛰어납니다. 그리고 등산과 야외 활동을 즐기는 것으로 알고 있습니다. 그리고 MrChaos는 프로그래머이며 밝고 활달한 성격을 가지고 있습니다. 하지만 James와 Will의 친구는 아닌 것 같습니다. > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate. Relevant Information: On MrChaos: MrChaos is a programmer. MrChaos has a bright and lively personality. MrChaos is a friend of James. MrChaos is a friend of Will. Conversation: Human: MrChaos의 친구는 누구인가? AI: > Finished chain. MrChaos의 친구는 James와 Will입니다.
chain.memory.chat_memory.messages
[HumanMessage(content='Hi, how are you?'), AIMessage(content=' Hello! I am an AI programmed to assist with various tasks and provide information. I do not have the capability to feel emotions, so I am always functioning at optimal levels. How can I assist you today?'), HumanMessage(content="My name is James and I'm helping Will. He's an engineer."), AIMessage(content=" Hello James, it's nice to meet you. I am an AI designed to assist with various tasks. Will sounds like a very interesting person. As an engineer, he likely has a strong understanding of math, science, and problem-solving. Is there anything specific you need help with?"), HumanMessage(content='What do you know about Will?'), AIMessage(content=' Will is an engineer. He has a degree in mechanical engineering from the University of California, Berkeley. He currently works at a tech company in Silicon Valley. He is known for his innovative designs and problem-solving skills. He is also an avid hiker and enjoys spending time outdoors. Is there anything specific you would like to know about Will?'), HumanMessage(content='MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다.'), AIMessage(content=' 그렇군요. James와 Will은 친구이며, Will은 기계공학 학위를 가지고 있으며 현재 실리콘밸리의 기술 회사에서 일하고 있습니다. Will은 혁신적인 디자인으로 유명하며 문제 해결 능력도 뛰어납니다. 그리고 등산과 야외 활동을 즐기는 것으로 알고 있습니다. 그리고 MrChaos는 프로그래머이며 밝고 활달한 성격을 가지고 있습니다. 하지만 James와 Will의 친구는 아닌 것 같습니다.'), HumanMessage(content='MrChaos의 친구는 누구인가?'), AIMessage(content=' MrChaos의 친구는 James와 Will입니다.')]
memory.load_memory_variables({"input":"mrchaos"})
{'history': 'On MrChaos: MrChaos is a programmer. MrChaos has a bright and lively personality. MrChaos has a friend James. MrChaos has a friend Will.'}
# pip install pygraphviz
from IPython.display import SVG, HTML
memory.kg.draw_graphviz()
style = "<style>svg{width:100% !important;height:100% !important;</style>"
display(HTML(style))
display(SVG("graph.svg"))
Combining several memorymechanisms¶
LangChain은 CombinedMemory 클래스를 사용하여 여러 메모리 전략을 결합할 수 있습니다.
이는 대화 기록의 다양한 측면을 유지하고자 할 때 유용합니다.
예를 들어, 하나의 메모리를 사용하여 전체 대화 로그를 저장할 수 있습니다:
llm = OpenAI(temperature=0)
# Define Convesation Buffer Memory (for retaining all past messages)
conv_memory = ConversationBufferMemory(memory_key="chat_history_lines", input_key="input")
# Define Conversation Summary Memory (for retaining a summary of the conversation)
summary_memory = ConversationSummaryMemory(llm=llm,input_key="input")
# Combine the two memories
memory = CombinedMemory(memories=[conv_memory, summary_memory])
# Define the prompt template
_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.
Summary of conversation:
{history}
Current conversation:
{chat_history_lines}
Human:{input}
AI:"""
PROMPT = PromptTemplate(input_variables=["history", "chat_history_lines", "input"], template=_DEFAULT_TEMPLATE)
chain = ConversationChain(llm=llm, memory=memory, prompt=PROMPT, verbose=True)
user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)
user_input = "My name is James and I'm helping Will. He's an engineer."
response = chain.predict(input=user_input)
print(response)
user_input = "What do you know about Will?"
response = chain.predict(input=user_input)
print(response)
user_input = "MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다."
response = chain.predict(input=user_input)
print(response)
user_input = "MrChaos의 친구는 누구인가?"
response = chain.predict(input=user_input)
print(response)
> Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Summary of conversation: Current conversation: Human:Hi, how are you? AI: > Finished chain. I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Summary of conversation: The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. Current conversation: Human: Hi, how are you? AI: I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? Human:My name is James and I'm helping Will. He's an engineer. AI: > Finished chain. Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can. > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Summary of conversation: The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with. Current conversation: Human: Hi, how are you? AI: I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? Human: My name is James and I'm helping Will. He's an engineer. AI: Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can. Human:What do you know about Will? AI: > Finished chain. I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Summary of conversation: The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with. The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him. The AI offers to provide more information about engineers and their work if needed. Current conversation: Human: Hi, how are you? AI: I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? Human: My name is James and I'm helping Will. He's an engineer. AI: Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can. Human: What do you know about Will? AI: I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work? Human:MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다. AI: > Finished chain. I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Summary of conversation: The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with. The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him. The AI offers to provide more information about engineers and their work if needed. The human then shares information about another programmer named MrChaos, describing him as a talented and sociable individual and a friend of James and Will. The AI offers to provide more information about MrChaos and his profession as a programmer. Current conversation: Human: Hi, how are you? AI: I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? Human: My name is James and I'm helping Will. He's an engineer. AI: Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can. Human: What do you know about Will? AI: I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work? Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다. AI: I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer? Human:MrChaos의 친구는 누구인가? AI: > Finished chain. I do not have any information about MrChaos' friends, but I can provide you with general information about programmers and their work if you would like.
print(memory.load_memory_variables({})['chat_history_lines'])
Human: Hi, how are you? AI: I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today? Human: My name is James and I'm helping Will. He's an engineer. AI: Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can. Human: What do you know about Will? AI: I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work? Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한 James와 Will의 친구이기도 하다. AI: I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer? Human: MrChaos의 친구는 누구인가? AI: I do not have any information about MrChaos' friends, but I can provide you with general information about programmers and their work if you would like.
print(memory.load_memory_variables({})['history'].replace(".","\n"))
The human greets the AI and asks how it is doing The AI responds that it is doing well and explains its purpose as an AI It also asks the human how they are doing today The human introduces themselves as James and mentions they are helping an engineer named Will The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him The AI offers to provide more information about engineers and their work if needed The human then shares information about another programmer named MrChaos, describing him as a talented and sociable individual and a friend of James and Will The AI offers to provide more information about MrChaos and his profession as a programmer The human asks the AI about MrChaos' friends, to which the AI responds that it does not have any specific information but can provide general information about programmers and their work
Long-term persistance using Zep¶
전용 백엔드에 대화를 저장하는 다양한 방법도 있습니다.
그러한 예 중 하나인 Zep은 벡터 임베딩과 자동 토큰 카운팅을 사용하여 채팅 기록을 저장, 요약, 검색할 수 있는 영구 백엔드를 제공합니다.
빠른 벡터 검색과 구성 가능한 요약 기능을 갖춘 이 장기 메모리는 컨텍스트 인식을 통해 더욱 뛰어난 대화형 AI를 구현할 수 있게 해줍니다.
Zep은 오픈소스로 독립 설치형으로 Docker로 지원된다.
Github: https://github.com/getzep/zep
Home: https://www.getzep.com
Install https://docs.getzep.com/deployment/quickstart/
Admin http://localhost:8000/admin
session_id =str(uuid.uuid4())
memory = ZepMemory(
session_id=session_id,
url=os.getenv("ZEP_API_URL"),
api_key=os.getenv("ZEP_API_KEY"),
memory_key="chat_history",
)
llm = OpenAI(temperature=0)
template = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.
Current conversation:
{chat_history}
Human:{input}
AI Assistant:"""
PROMPT = PromptTemplate(input_variables=["chat_history", "input"], template=template)
chain = ConversationChain(
llm=llm,
prompt=PROMPT,
verbose=True,
memory=memory)
user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)
user_input = "What's the weather like today?"
response = chain.predict(input=user_input)
print(response)
print(chain.memory.chat_memory.messages)
print("\n\n\n")
print(chain.memory.load_memory_variables({})["chat_history"])
> Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Current conversation: Human:Hi, how are you? AI Assistant: > Finished chain. I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know? > Entering new ConversationChain chain... Prompt after formatting: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know. Current conversation: Human: Hi, how are you? AI: I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know? Human:What's the weather like today? AI Assistant: > Finished chain. According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location? [HumanMessage(content='Hi, how are you?', additional_kwargs={'uuid': '177cc778-91c1-4bb5-929d-ba4b69fb0f3a', 'created_at': '2024-01-04T10:26:36.761632Z', 'token_count': 0, 'metadata': {'system': {'entities': []}}}), AIMessage(content=' I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know?', additional_kwargs={'uuid': '624b5f87-1606-43c1-8d2b-8c79ec5cdcc3', 'created_at': '2024-01-04T10:26:36.77683Z', 'token_count': 0, 'metadata': {'system': {'entities': [{'Label': 'CARDINAL', 'Matches': [{'End': 90, 'Start': 85, 'Text': '2.3.4'}], 'Name': '2.3.4'}]}}}), HumanMessage(content="What's the weather like today?", additional_kwargs={'uuid': 'dc7b7515-015a-4a32-95b9-a04ed8827909', 'created_at': '2024-01-04T10:26:37.516632Z', 'token_count': 0, 'metadata': None}), AIMessage(content=' According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location?', additional_kwargs={'uuid': 'af370fa5-1a95-4f71-97b3-47fe92efd0b9', 'created_at': '2024-01-04T10:26:37.531053Z', 'token_count': 0, 'metadata': None})] Human: Hi, how are you? AI: I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know? Human: What's the weather like today? AI: According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location?
Zep의 ZepVectorStore 사용¶
embedding
을 자체제공을 사용할 수 있으나 HuggingFace
의 무료 embedding을 사용하는 것도 좋은 방법이다.
openai
embedding은 비용이 많이 발생 함
from langchain.vectorstores.zep import ZepVectorStore, CollectionConfig
from langchain.embeddings import HuggingFaceEmbeddings
# https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v1
# sentence-transformers/distiluse-base-multilingual-cased-v1 모델의 embedding dimension은 512
# ZepVectorStore,CollectionConfig 사용예) https://python.langchain.com/docs/integrations/vectorstores/zep
embedding_dimensions = 512
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased-v1")
text_loader = TextLoader(file_path="./data/나없이는존재하지않는세상_20240102.txt",encoding="utf-8")
docs = text_loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100))
# _와 같은 특수 기호 안됨
collection_name = f"MrChaos{uuid.uuid4().hex}"
config = CollectionConfig(
name=collection_name,
description="MrChaos의 지식",
metadata={"author": "MrChaos","e-mail":"mrchaos@fxrobot.kr"},
embedding_dimensions=embedding_dimensions, # openai, huggingface등의 embedding dimension, 사용하려는 embedding에 따라 달라짐
#is_auto_embedded=True, # 지연 시간이 짧은 임베더를 사용하여 Zep이 문서를 임베드하도록 할 것입니다.
is_auto_embedded=False,
)
vectordb = ZepVectorStore.from_documents(
documents=docs,
collection_name=collection_name,
config=config,
api_url=os.getenv("ZEP_API_URL"),
api_key=os.getenv("ZEP_API_KEY"),
#embedding=None, #지연 시간이 짧은 임베더를 사용하여 Zep이 문서를 임베드하도록 할 것입니다.,is_auto_embedded=True 인경우
embedding=embedding
)
query = "양자론과 관계론적 관계"
docs_scores = await vectordb.asimilarity_search_with_relevance_scores(query, k=3)
docs_scores
[(Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7650395265424814), (Document(page_content="우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것\n\n* 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예.\n\n180\n\n\n은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다.\n\n물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다.\n\n여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요?\n\n우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠.\n\n나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에\n\n181\n\n\n대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다.\n\n나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7460570726139523), (Document(page_content='2 N. Bohr. The Genesis of Quantum Mechanics, in Essays 1958-1962 on Atomic Physics and Human Knowledge, Wiley, New York, 1963. pp.\n\n3 W. Heisenberg, Über quantentheoretische Umdeutung kinematischer und mechanischer Beziehungen. (Zeitschrift für Physik), 33, 1925, pp. 879-93.\n\nM. Born & P. Jordan, Zur Quantenmechanik, (Zeitschrift für Physik), 34. 1925, pp S58-8S\n\n5 P.A.M. Dirac, The Fundamental Equations of Quantum Mechanics. (Proceedings of the Royal Society A), 109, 752, 1925, pp. 62-53\n\n6 디랙은 하이젠베르크의 표가 비가환 변수라는 것을 깨닫고, 거기서 예전에 고등 역학 강좌에서 접한 푸아송 괄호를 떠올린 것이었다. 73세의 디랙이 직접 들려주는 그 운명적인 시절에 대한 유쾌한 이야기는 hopsitwww.youtube.com/watchi-wYsSITLZ24 에서 확인할 수 있다.\n\n7 M. Bom, My Life: Recollections of a Nobel Laureate, Tavior & Francis London. 1978, p. 218.\n\n8 W. Pauli. Über das Wasserstoffspektrum vom Standpunkt der neuen Quantenmechanik, (Zeitschrift für Physik), 36, 1926, pp. 336-63, 24', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7199667747554138)]
# print results
for d, s in docs_scores:
print(d.page_content, " -> ", s, "\n====\n")
52 피히테, 셀링 헤겔의 의미에서. 53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/. 54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111. 55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다. 56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다. 57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019. 248 58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책. 59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다. 60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다. -> 0.7650395265424814 ==== 우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것 * 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예. 180 은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다. 물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다. 여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요? 우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠. 나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에 181 대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다. 나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다. -> 0.7460570726139523 ==== 2 N. Bohr. The Genesis of Quantum Mechanics, in Essays 1958-1962 on Atomic Physics and Human Knowledge, Wiley, New York, 1963. pp. 3 W. Heisenberg, Über quantentheoretische Umdeutung kinematischer und mechanischer Beziehungen. (Zeitschrift für Physik), 33, 1925, pp. 879-93. M. Born & P. Jordan, Zur Quantenmechanik, (Zeitschrift für Physik), 34. 1925, pp S58-8S 5 P.A.M. Dirac, The Fundamental Equations of Quantum Mechanics. (Proceedings of the Royal Society A), 109, 752, 1925, pp. 62-53 6 디랙은 하이젠베르크의 표가 비가환 변수라는 것을 깨닫고, 거기서 예전에 고등 역학 강좌에서 접한 푸아송 괄호를 떠올린 것이었다. 73세의 디랙이 직접 들려주는 그 운명적인 시절에 대한 유쾌한 이야기는 hopsitwww.youtube.com/watchi-wYsSITLZ24 에서 확인할 수 있다. 7 M. Bom, My Life: Recollections of a Nobel Laureate, Tavior & Francis London. 1978, p. 218. 8 W. Pauli. Über das Wasserstoffspektrum vom Standpunkt der neuen Quantenmechanik, (Zeitschrift für Physik), 36, 1926, pp. 336-63, 24 -> 0.7199667747554138 ====
docs = await vectordb.asimilarity_search_with_score(query, search_type="mmr", k=3)
docs
# for d in docs:
# print(d.page_content, "\n====\n")
[(Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7650395265424814), (Document(page_content="240\n\n\n임없이 발생하는 관계적 존재다. 사물의 속성은 대상안에 있는 것이 아니라 다른 사물과의 상호작용 속에서만 존재하며, 상호작용하는 대상이 달라지면 속성도달라질 수 있는 두 대상 사이의 관계다. 한마디로 이 세계는 확정된 속성을 가진 대상들의 집합이 아닌 관계의 그물망이다. 이것이 로벨리가 말하는 양자 이론이밝혀준 실재의 모습이다. 이제 양자 이론은 하이젠베르크의 기대와 달리 양자적 대상이 관찰을 통해 우리(혹은 '관찰자)에게 어떻게 나타나는지를 기술하는 것이아니라, 두 물리적 대상이 서로에게 나타나는 방식 곧관계를 기술한다.\n\n또한 로벨리는 양자 이론의 확률을 정보와 연결 짓고, 정보 역시 두 대상 사이의 상관관계의 산물로 본다.양자 이론은 대상을 관찰하지 않으면 그것이 어디에있는지 말해주지 않다가 대상을 관찰하면 어떤 지점에있을 확률을 말해주는데, 이는 관찰이라는 두 대상 간의 상호작용이 만들어낸 정보의 변화라는 것이다. 두개의 동전을 자유롭게 던지느냐 아니면 특정한 방식으로 묶어 던지느냐에 따라 일어날 사건에 관한 정보가달라지고 특정 사건이 일어날 확률도 달라지는데, 이는 두 개의 동전 사이의 상호 관계가 달라진 결과인 것처럼 말이다. 로벨리는 이런 정보의 관점에서 양자 이론을 새롭게 이해한다. 하이젠베르크의 불확정성 원리\n\n241\n\n\n는 정보의 유한성에 바탕해서 설명하고, 물리적 변수간의 비가환성은 대상과의 새로운 상호작용이 항상 새로운 관련 정보를 주지만 동시에 기존의 관련 정보를 잃게 만든다는 관점에서 설명한다.\n\n로벨리에게 입자성은 물질과 더불어 양자 현상이 아주 작은 세계에서는 입자적 형태로 나타남을 의미한다. 즉 불연속성을 의미하는 것이지, 세계가 입자와 같은 실체로 이루어졌음을 말하는 것이 아니다. 또한 로벨리는 이미 그의 저서인 《시간은 흐르지 않는다》에서 아주 작은 세계에서는 시간과 공간조차도 입자성을 띤다고 주장하였는데, 이는 입자성이 매우 일반적인 것임을 함축한다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7020414781065081), (Document(page_content="우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것\n\n* 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예.\n\n180\n\n\n은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다.\n\n물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다.\n\n여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요?\n\n우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠.\n\n나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에\n\n181\n\n\n대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다.\n\n나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), 0.7460570726139523)]
retriever = vectordb.as_retriever(
search_type="mmr",
search_kwargs={
"k": 2,
"fetch_k": 4,
},
)
retriever.get_relevant_documents(query)
[Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}), Document(page_content="240\n\n\n임없이 발생하는 관계적 존재다. 사물의 속성은 대상안에 있는 것이 아니라 다른 사물과의 상호작용 속에서만 존재하며, 상호작용하는 대상이 달라지면 속성도달라질 수 있는 두 대상 사이의 관계다. 한마디로 이 세계는 확정된 속성을 가진 대상들의 집합이 아닌 관계의 그물망이다. 이것이 로벨리가 말하는 양자 이론이밝혀준 실재의 모습이다. 이제 양자 이론은 하이젠베르크의 기대와 달리 양자적 대상이 관찰을 통해 우리(혹은 '관찰자)에게 어떻게 나타나는지를 기술하는 것이아니라, 두 물리적 대상이 서로에게 나타나는 방식 곧관계를 기술한다.\n\n또한 로벨리는 양자 이론의 확률을 정보와 연결 짓고, 정보 역시 두 대상 사이의 상관관계의 산물로 본다.양자 이론은 대상을 관찰하지 않으면 그것이 어디에있는지 말해주지 않다가 대상을 관찰하면 어떤 지점에있을 확률을 말해주는데, 이는 관찰이라는 두 대상 간의 상호작용이 만들어낸 정보의 변화라는 것이다. 두개의 동전을 자유롭게 던지느냐 아니면 특정한 방식으로 묶어 던지느냐에 따라 일어날 사건에 관한 정보가달라지고 특정 사건이 일어날 확률도 달라지는데, 이는 두 개의 동전 사이의 상호 관계가 달라진 결과인 것처럼 말이다. 로벨리는 이런 정보의 관점에서 양자 이론을 새롭게 이해한다. 하이젠베르크의 불확정성 원리\n\n241\n\n\n는 정보의 유한성에 바탕해서 설명하고, 물리적 변수간의 비가환성은 대상과의 새로운 상호작용이 항상 새로운 관련 정보를 주지만 동시에 기존의 관련 정보를 잃게 만든다는 관점에서 설명한다.\n\n로벨리에게 입자성은 물질과 더불어 양자 현상이 아주 작은 세계에서는 입자적 형태로 나타남을 의미한다. 즉 불연속성을 의미하는 것이지, 세계가 입자와 같은 실체로 이루어졌음을 말하는 것이 아니다. 또한 로벨리는 이미 그의 저서인 《시간은 흐르지 않는다》에서 아주 작은 세계에서는 시간과 공간조차도 입자성을 띤다고 주장하였는데, 이는 입자성이 매우 일반적인 것임을 함축한다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'})]
Chatbot 소스¶
utils.py¶
"""Utility functions and constants.
I am having some problems caching the memory and the retrieval.
When I decorate for caching, I get streamit init errors.
`UnstructuredWordDocumentLoader`을 사용하기 위해
`pip install docx2txt` 필요
"""
import logging
import pathlib
from typing import Any
from langchain.document_loaders import (
PyPDFLoader,
TextLoader,
UnstructuredEPubLoader,
UnstructuredWordDocumentLoader,
)
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
logging.basicConfig(level=logging.INFO)
def init_memory():
"""Initialize the memory. for contextual conversation.
We are caching this, so it won't be deleted
every time, we restart the server.
"""
return ConversationBufferMemory(
memory_key='chat_history',
return_messages=True,
output_key='answer'
)
MEMORY = init_memory()
class EpubReader(UnstructuredEPubLoader):
"""Reads the epub file."""
def __init__(self, file_path: str | list[str] , **unstructured_kwargs: Any):
"""Initialize the epub reader."""
# strategy="fast" 가 있는 경우 에러남
super().__init__(file_path=file_path, **unstructured_kwargs, mode="elements") #, strategy="fast")
class DocumentLoaderException(Exception):
"""Document loader exception."""
pass
class DocumentLoader(object):
"""Loads in a document with a supported extension."""
supproted_extensions = {
".pdf": PyPDFLoader,
".txt": TextLoader,
".epub": EpubReader,
".docx": UnstructuredWordDocumentLoader,
".doc": UnstructuredWordDocumentLoader,
}
def load_document(temp_filepath: str) -> list[Document]:
"""Load a file and return it as a list of documents.
Doesn't handle a lot of errors at the moment.
"""
ext = pathlib.Path(temp_filepath).suffix
loader = DocumentLoader.supproted_extensions.get(ext)
if not loader:
raise DocumentLoaderException(f"Unsupported file extension: {ext}, cannot load this type of file")
loaded = loader(temp_filepath)
docs = loaded.load()
for idx,doc in enumerate(docs):
logging.info(doc.metadata)
if idx >= 4 :
break
return docs
if __name__ == "__main__":
print("=============Load PDF=====================")
load_document('data/Introduction_to_Entropy_and_Gini-Index.pdf')
print("=============Load EPUB=====================")
load_document('data/Julia for Data Analysis.epub')
print("=============Load DOCX=====================")
load_document('data/test.docx')
print("=============Load TXT=====================")
load_document('data/나없이는존재하지않는세상_20240102.txt')
chat_with_documents.py¶
"""Chat with retrieval and embedding."""
import logging
import os, sys
import tempfile
from langchain.chains import (
ConversationalRetrievalChain,
OpenAIModerationChain,
SimpleSequentialChain,
FlareChain
)
from langchain.chains.base import Chain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
# HuggingFaceEmbeddings를 위해 아래 패키지 설치 필요
# pip install transformers -U
# pip install sentence-transformers
from langchain.embeddings import HuggingFaceEmbeddings
# Retriever that wraps a base retriever and compresses the results.
from langchain.retrievers import ContextualCompressionRetriever
# Document compressor that uses embeddings to drop documents unrelated to the query.
# (임베딩을 사용하여 쿼리와 관련이 없는 문서를 삭제하는 문서 압축기)
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.schema import BaseRetriever, Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
# https://python.langchain.com/docs/integrations/vectorstores/docarray_in_memory
# In-memory DocArray storage for exact search.
from langchain.vectorstores.docarray import DocArrayInMemorySearch
from dotenv import load_dotenv
# project root path를 python path에 추가
current_path = os.path.abspath(".")
print(current_path)
if current_path not in sys.path:
sys.path.append(current_path)
from langchain_ai_05.chatbot.utils import MEMORY, load_document
logging.basicConfig(encoding='utf-8', level=logging.INFO)
LOGGER = logging.getLogger()
load_dotenv()
# Setup LLM and QA chain; set temperature low to keep hallucinations in check.
# gtp-4-1106-preview
LLM = ChatOpenAI(model="gpt-3.5-turbo", temperature=0, streaming=True)
def configure_retriever(
docs: list[Document],
use_compression: bool = False
) -> BaseRetriever:
"""Retriever to use.
Args:
docs (list[Document]): document list
use_compression (bool, optional): compression option. Defaults to False.
Returns:
BaseRetriever: retriever
"""
# Split each document documents:
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500, chunk_overlap=200
)
splits = text_splitter.split_documents(docs)
# Create embeddings and store in vectordb:
# embeddings = OpenAIEmbeddings()
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# alternatively: HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
# Create vectordb with single call to embedding model for texts:
vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
# mmr : Maximal Marginal Relevance
# 텍스트 생성 통계 요약 알고리즘
# 여러 개의 문서가 대상일 때 적용되는 접근 방식으로 먼저 문서 사이에 유사성이 높은 대표적인 문장
# (문서 요약에 사용되는 문장)을 선택하고, 그 다음으로는 이 대표적인 문장과 유사성이 높으면서도
# 다른 문장을 선택하는 방식으로 문서 요약을 수행하는 알고리즘
retriever = vectordb.as_retriever(
search_type="mmr", search_kwargs={
"k": 5,
"fetch_k": 4,
# "include_metadata": True,
},
)
if not use_compression:
return retriever
# similarity_threshold : 두 문서가 중복으로 간주될 만큼 유사한지 판단하는 임계값.
# 0~1 사이의 값으로, 1에 가까울수록 두 문서가 유사하다고 판단함.
# 값이 낮을 수록 두 문서가 유사하다고 판단하고 두 문서 모두 반환함.
# 값이 높을 수록 두 문서가 유사하지 않다고 판단하고 두 문서 중 하나만 반환함.
embeddings_filter = EmbeddingsFilter(
embeddings=embeddings,
similarity_threshold=0.2)
return ContextualCompressionRetriever(
base_compressor=embeddings_filter,
base_retriever=retriever,
)
def configure_chain(retriever: BaseRetriever, use_flare: bool = True) -> Chain:
"""Configure chain with retriever.
Args:
retriever (BaseRetriever): retriever
use_flare (bool, optional): flare option. Defaults to False.
Returns:
Chain: chain
"""
params = dict(
llm = LLM,
retriever = retriever,
memory=MEMORY,
verbose=True,
max_tokens_limit=4000,
)
if use_flare:
# different set of parameters and init
# unforunately, have to use "protected" class
return FlareChain.from_llm(**params)
return ConversationalRetrievalChain.from_llm(**params)
def configure_retrieval_chain(
uploaded_file,
use_compression: bool = False,
use_flare: bool = False,
use_moderation: bool = False,
) -> Chain:
"""Read documents, configure retriever, and configure chain.
Args:
uploaded_file (FileStorage): file
use_compression (bool, optional): compression option. Defaults to False.
use_flare (bool, optional): flare option. Defaults to False.
use_moderation (bool, optional): moderation option. Defaults to False.
Returns:
Chain: chain
"""
# Load documents from file
docs = []
temp_dir = tempfile.TemporaryDirectory()
for file in uploaded_file:
temp_filepath = os.path.join(temp_dir.name, file.name)
with open(temp_filepath, "wb") as f:
f.write(file.getvalue())
docs.extend(load_document(temp_filepath))
# Configure retriever
retriever = configure_retriever(docs=docs, use_compression=use_compression)
# Configure chain
# use_flare가 True인 경우 에러 발생함
# 사용하지 않음 : 수정이 많이 필요함
chain = configure_chain(retriever=retriever, use_flare=use_flare)
# Configure moderation
if not use_moderation:
return chain
# use_moderation이 True인 경우 에러 발생함
# 사용하지 않음
moderation_chain = OpenAIModerationChain()
moderation_chain.con .configure(return_source_documents=False)
return SimpleSequentialChain(chains=[chain, moderation_chain])
app.py¶
"""Document loading functionality.
Run like this:
> PYTHONPATH=. streamlit langchain_ai_05/chatbot/app.py
VS Code에서 streamlit 디버깅 : https://www.gpters.org/c/llm/7-visual-studio-code-streamlit
Streaming demo : https://github.com/streamlit/StreamlitLangChain/blob/main/streaming_demo.py
1) chatBot은 기존의 대화를 기억하고 있다가 기존대화와 지금한 질문을 사용하여 새로운 질문을 만든다.
2) 새로운 질문과 새로운 질문을 사용하여 vectordb에서 가장 유사한 document를 몇개 찾고 새로운 질문은 user prompt로 사용하고 유사 document는 assistant prompt로 사용한다.
3) assistant prompt와 user prompt를 사용하여 chatbot을 실행한다.
예)
1) chatBot은 기존의 대화를 기억하고 있다가 기존대화와 지금한 질문을 사용하여 새로운 질문을 만든다.
Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
Human: 양자역학에서 제일중요한것은?
Assistant: 양자역학에서 가장 중요한 것은 양자 현상의 이해와 해석입니다.
Human: 그럼 양자역학에서 상관관계는 뭔가?
Assistant: 양자역학에서 상관관계는 두 개 이상의 양자 시스템 사이의 관계를 나타내는 것을 말합니다. 양자역학에서는 상관관계를 통해 양자 시스템 간의 상호작용이나 상태의 종속성을 설명하고 이해할 수 있습니다. 상관관계는 양자 시스템의 상태를 측정하거나 조작함으로써 확인할 수 있습니다.
Human: 그렇다면 마흐는 양자역학 탄생에 어떤 역할을 했는가?
Assistant: 마흐(Max Planck)는 양자역학의 탄생에 매우 중요한 역할을 했습니다. 1900년, 마흐는 빛의 방출과 흡수에 관한 연구를 통해 에너지가 이산화되는(discrete) 형태로 존재한다는 개
Human: 당신의 대답에서 마흐를 Max Plank로 표시 했는데 막스 플랑크가 아닙니다. 마흐는 철학자 입니다. 다시 마흐와 양자역학 탄생의 관계를 알려 주세요
Assistant: 마흐(Max Planck)는 양자역학의 탄생에 매우 중요한 역할을 했습니다. 1900년, 마흐는 빛의 방출과 흡수에 관한 연구를 통해 에너지가 이산화되는(discrete) 형태로 존재한다는 개
Human: 당신의 대답에서 마흐를 Max Plank로 표시 했는데 막스 플랑크가 아닙니다. 마흐는 철학자 입니다. 다시 마흐와 양자역학 탄생의 관계를 알려 주세요
Assistant: 마흐(Max Planck)는 양자역학의 탄생에 매우 중요한 역할을 했습니다. 1900년, 마흐는 빛의 방출과 흡수에 관한 연구를 통해 에너지가 이산화되는(discrete) 형태로 존재한다는 개
Follow Up Input: 답이 만족 스럽지 않습니다. 마흐의 사상의 무엇이었나요?
새로운 질문 : 마흐의 사상의 무엇이었나요?
2) 새로운 질문과 새로운 질문을 사용하여 vectordb에서 가장 유사한 document를 몇개 찾고 새로운 질문은 user prompt로 사용하고 유사 document는 assistant prompt로 사용한다.
새로운 질문 -> vectordb -> 유사 document -> assistant prompt
3) assistant prompt와 user prompt를 사용하여 chatbot을 실행한다.
새로운 질문 -> user prompt, 유사 document -> assistant prompt => chatbot 실행
유사 document:
생물학의 기초를 이해할 수 있는 식으로 되어 있죠. 우리가 잘 이해하는 연결도 있지만, 그렇지 않은 연결도 있습니다. 단절이란 우리의 이해에 난 갈라진 틈입니다. 이것이 바로 의미 개념의 물리적 기초에대한 질문이 뜻하는 것입니다.
..
..
System: Use the following pieces of context to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
생물학의 기초를 이해할 수 있는 식으로 되어 있죠. 우리가 잘 이해하는 연결도 있지만, 그렇지 않은 연결도 있습니다. 단절이란 우리의 이해에 난 갈라진 틈입니다. 이것이 바로 의미 개념의 물리적 기초에대한 질문이 뜻하는 것입니다.
..
..
Human: 마흐의 사상은 무엇이었나요?
"""
import logging
import os, sys
import streamlit as st
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks import StreamlitCallbackHandler
# project root path를 python path에 추가
current_path = os.path.abspath(".")
print(current_path)
if current_path not in sys.path:
sys.path.append(current_path)
from langchain_ai_05.chatbot.chat_with_documents import configure_retrieval_chain
from langchain_ai_05.chatbot.utils import MEMORY, DocumentLoader
logging.basicConfig(encoding='utf-8', level=logging.INFO)
LOGGER = logging.getLogger()
class StreamHandler(BaseCallbackHandler):
def __init__(self, container, initial_text=""):
self.container = container
self.text = initial_text
def on_llm_new_token(self, token: str, **kwargs) -> None:
self.text += token
self.container.markdown(self.text)
st.set_page_config(page_title="LangChain: Chat with Documents", page_icon="🦜")
st.title("🦜 LangChain: Chat with Documents")
uploaded_file = st.sidebar.file_uploader(
label="Upload files",
type=list(DocumentLoader.supproted_extensions.keys()),
accept_multiple_files=True,
)
if not uploaded_file:
MEMORY.chat_memory.clear() # clear chat history
st.session_state.clear() # clear session state
st.info("Please upload files to continue.")
st.stop()
# use compression by default:
use_compression = st.checkbox("compression", value=True)
# use_flare = st.checkbox("flare", value=False)
use_flare = False
# use_moderation = st.checkbox("moderation", value=False)
use_moderation = False
uploaded_file_list = set([f.name for f in uploaded_file])
# uploaded_file update check
if "uploaded_file_list" not in st.session_state:
st.session_state["uploaded_file_list"] = uploaded_file_list
if "use_compression" not in st.session_state:
st.session_state["use_compression"] = use_compression
update_configure_retrieval_chain = False
# 대칭차집합이 0이 아니면 업데이트
if len(uploaded_file_list ^ st.session_state["uploaded_file_list"]) != 0:
st.session_state["uploaded_file_list"] = uploaded_file_list
update_configure_retrieval_chain = True
if use_compression != st.session_state["use_compression"]:
st.session_state["use_compression"] = use_compression
update_configure_retrieval_chain = True
# 세션에 초기화 되었다고 선언
if "CONV_CHAIN" not in st.session_state or update_configure_retrieval_chain:
CONV_CHAIN = configure_retrieval_chain(uploaded_file=uploaded_file,
use_compression=use_compression,
use_flare=use_flare,
use_moderation=use_moderation)
st.session_state["CONV_CHAIN"]= CONV_CHAIN
CONV_CHAIN = st.session_state["CONV_CHAIN"]
if st.sidebar.button("Clear message history"):
MEMORY.chat_memory.clear()
avatars = {"human":"user","ai":"assistant"}
# if "messages" not in st.session_state:
# st.session_state["messages"] = [st.chat_message("assistant").markdown("🦜 무엇인든 물어 보세요. $e^{i\pi}+1=0$")]
if len(MEMORY.chat_memory.messages) == 0:
st.chat_message("assistant").markdown("🦜 무엇인든 물어 보세요. $e^{i\pi}+1=0$")
for msg in MEMORY.chat_memory.messages:
st.chat_message(avatars[msg.type]).write(msg.content)
if user_query := st.chat_input(placeholder="Ask me anything!"):
st.chat_message("user").write(user_query)
with st.chat_message("assistant"):
container = st.empty()
stream_handler = StreamlitCallbackHandler(container) #StreamHandler(container)
if use_flare:
params = {
"user_input": user_query,
}
else:
params = {
"question": user_query,
"chat_history": MEMORY.chat_memory.messages,
}
response = CONV_CHAIN.run(params, callbacks=[stream_handler])
# Display the response from the chatbot
if response:
container.markdown(response)
.env¶
OPENAI_API_KEY=sk-9e2Amozcky8ZO3WByoxuT3B...
OPENAI_ORGANIZATION=org-9rVeG...
PINECONE_API_KEY=c0dc5...
PINECONE_ENVIRONMENT=gcp-starter
SERPAPI_API_KEY=c6b1...
GOOGLE_CSE_ID=9...
GOOGLE_API_KEY=AI...
WOLFRAM_ALPHA_APPID=5...
GOOGLE_MAP_API_KEY=AIzaS...
HUGGINGFACEHUB_API_TOKEN=hf_Ldc...
JINACHAT_API_KEY=ADpbLNay...
ZEP_API_URL=http://localhost:8000
ZEP_API_KEY=eyJhb...