# embedding
from langchain.embeddings.openai import OpenAIEmbeddings
from scipy.spatial.distance import pdist, squareform
import numpy as np
import pandas as pd

# vector database
from langchain.vectorstores import Chroma
from langchain.document_loaders import ArxivLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain_core.vectorstores import VectorStoreRetriever
from langchain.chains import RetrievalQA # Chain for question-answering against a vector database.
from langchain.llms import OpenAI
# from langchain.llms import OpenAIChat
from langchain_community.chat_models import ChatOpenAI

# Loading and retrieving in Langchain
from langchain.document_loaders import TextLoader
from langchain.retrievers import (
    KNNRetriever,
    PubMedRetriever, # PubMed(생물학 문헌 인용DB) 데이터베이스와 상호작용
)

# Custom Retriever
from langchain.schema import Document, BaseRetriever

# Conversation buffers
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate

# Remembering conversation summaries
from langchain.memory import ConversationSummaryMemory

# Storing knowledge graphs
from langchain.memory import ConversationKGMemory

# Combining several memorymechanisms
from langchain.memory import CombinedMemory

# Long-term peristence
from langchain.memory import ZepMemory
import uuid

from dotenv import load_dotenv
import os

load_dotenv()

True

embeddings = OpenAIEmbeddings()
text = "This is a sample query."
query_result = embeddings.embed_query(text)
# print(query_result)
print(len(query_result))

1536

words = ["cat", "dog", "computer", "animal"]
embeddings = OpenAIEmbeddings()
doc_vectors = embeddings.embed_documents(words)
doc_vectors[0][1:10]

[-0.01743050591323599,
 -0.00966626551309167,
 -0.030631132997702794,
 -0.012618664121172612,
 0.003124503287338194,
 -0.004992817784131865,
 -0.04122002249374317,
 -0.014563272094407411,
 -0.021348110433230556]

X = np.array(doc_vectors)
# default distance : euclidean
pairwise_dist = pdist(X, metric="cosine")
print(pairwise_dist)
dists = squareform(pairwise_dist)
print(dists)
print(dists.shape)

[0.13631546 0.16534796 0.13565021 0.16875871 0.11445978 0.17475822]
[[0.         0.13631546 0.16534796 0.13565021]
 [0.13631546 0.         0.16875871 0.11445978]
 [0.16534796 0.16875871 0.         0.17475822]
 [0.13565021 0.11445978 0.17475822 0.        ]]
(4, 4)

# cat,dog,computer,animal 간의 cosine 거리
df = pd.DataFrame(data=dists,index=words,columns=words)
df.style.background_gradient(cmap='coolwarm')

# Mistral 7B LLM 논문 : 2310.06825, https://arxiv.org/pdf/2310.06825.pdf
loader = ArxivLoader(query="2310.06825")
documents = loader.load()

documents

[Document(page_content='Mistral 7B\nAlbert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford,\nDevendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel,\nGuillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux,\nPierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix,\nWilliam El Sayed\nAbstract\nWe introduce Mistral 7B, a 7–billion-parameter language model engineered for\nsuperior performance and efficiency. Mistral 7B outperforms the best open 13B\nmodel (Llama 2) across all evaluated benchmarks, and the best released 34B\nmodel (Llama 1) in reasoning, mathematics, and code generation. Our model\nleverages grouped-query attention (GQA) for faster inference, coupled with sliding\nwindow attention (SWA) to effectively handle sequences of arbitrary length with a\nreduced inference cost. We also provide a model fine-tuned to follow instructions,\nMistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and\nautomated benchmarks. Our models are released under the Apache 2.0 license.\nCode: https://github.com/mistralai/mistral-src\nWebpage: https://mistral.ai/news/announcing-mistral-7b/\n1\nIntroduction\nIn the rapidly evolving domain of Natural Language Processing (NLP), the race towards higher model\nperformance often necessitates an escalation in model size. However, this scaling tends to increase\ncomputational costs and inference latency, thereby raising barriers to deployment in practical,\nreal-world scenarios. In this context, the search for balanced models delivering both high-level\nperformance and efficiency becomes critically essential. Our model, Mistral 7B, demonstrates that\na carefully designed language model can deliver high performance while maintaining an efficient\ninference. Mistral 7B outperforms the previous best 13B model (Llama 2, [26]) across all tested\nbenchmarks, and surpasses the best 34B model (LLaMa 34B, [25]) in mathematics and code\ngeneration. Furthermore, Mistral 7B approaches the coding performance of Code-Llama 7B [20],\nwithout sacrificing performance on non-code related benchmarks.\nMistral 7B leverages grouped-query attention (GQA) [1], and sliding window attention (SWA) [6, 3].\nGQA significantly accelerates the inference speed, and also reduces the memory requirement during\ndecoding, allowing for higher batch sizes hence higher throughput, a crucial factor for real-time\napplications. In addition, SWA is designed to handle longer sequences more effectively at a reduced\ncomputational cost, thereby alleviating a common limitation in LLMs. These attention mechanisms\ncollectively contribute to the enhanced performance and efficiency of Mistral 7B.\narXiv:2310.06825v1  [cs.CL]  10 Oct 2023\nMistral 7B is released under the Apache 2.0 license. This release is accompanied by a reference\nimplementation1 facilitating easy deployment either locally or on cloud platforms such as AWS, GCP,\nor Azure using the vLLM [17] inference server and SkyPilot 2. Integration with Hugging Face 3 is\nalso streamlined for easier integration. Moreover, Mistral 7B is crafted for ease of fine-tuning across\na myriad of tasks. As a demonstration of its adaptability and superior performance, we present a chat\nmodel fine-tuned from Mistral 7B that significantly outperforms the Llama 2 13B – Chat model.\nMistral 7B takes a significant step in balancing the goals of getting high performance while keeping\nlarge language models efficient. Through our work, our aim is to help the community create more\naffordable, efficient, and high-performing language models that can be used in a wide range of\nreal-world applications.\n2\nArchitectural details\nFigure 1: Sliding Window Attention. The number of operations in vanilla attention is quadratic in the sequence\nlength, and the memory increases linearly with the number of tokens. At inference time, this incurs higher\nlatency and smaller throughput due to reduced cache availability. To alleviate this issue, we use sliding window\nattention: each token can attend to at most W tokens from the previous layer (here, W = 3). Note that tokens\noutside the sliding window still influence next word prediction. At each attention layer, information can move\nforward by W tokens. Hence, after k attention layers, information can move forward by up to k × W tokens.\nParameter\nValue\ndim\n4096\nn_layers\n32\nhead_dim\n128\nhidden_dim\n14336\nn_heads\n32\nn_kv_heads\n8\nwindow_size\n4096\ncontext_len\n8192\nvocab_size\n32000\nTable 1: Model architecture.\nMistral 7B is based on a transformer architecture [27]. The main\nparameters of the architecture are summarized in Table 1. Compared\nto Llama, it introduces a few changes that we summarize below.\nSliding Window Attention. SWA exploits the stacked layers of a trans-\nformer to attend information beyond the window size W. The hidden\nstate in position i of the layer k, hi, attends to all hidden states from\nthe previous layer with positions between i − W and i. Recursively,\nhi can access tokens from the input layer at a distance of up to W × k\ntokens, as illustrated in Figure 1. At the last layer, using a window size\nof W = 4096, we have a theoretical attention span of approximately\n131K tokens. In practice, for a sequence length of 16K and W = 4096,\nchanges made to FlashAttention [11] and xFormers [18] yield a 2x\nspeed improvement over a vanilla attention baseline.\nRolling Buffer Cache. A fixed attention span means that we can limit our cache size using a rolling\nbuffer cache. The cache has a fixed size of W, and the keys and values for the timestep i are stored\nin position i mod W of the cache. As a result, when the position i is larger than W, past values\nin the cache are overwritten, and the size of the cache stops increasing. We provide an illustration\nin Figure 2 for W = 3. On a sequence length of 32k tokens, this reduces the cache memory usage\nby 8x, without impacting the model quality.\n1https://github.com/mistralai/mistral-src\n2https://github.com/skypilot-org/skypilot\n3https://huggingface.co/mistralai\n2\nFigure 2: Rolling buffer cache. The cache has a fixed size of W = 4. Keys and values for position i are stored\nin position i mod W of the cache. When the position i is larger than W, past values in the cache are overwritten.\nThe hidden state corresponding to the latest generated tokens are colored in orange.\nPre-fill and Chunking. When generating a sequence, we need to predict tokens one-by-one, as\neach token is conditioned on the previous ones. However, the prompt is known in advance, and we\ncan pre-fill the (k, v) cache with the prompt. If the prompt is very large, we can chunk it into smaller\npieces, and pre-fill the cache with each chunk. For this purpose, we can select the window size as\nour chunk size. For each chunk, we thus need to compute the attention over the cache and over the\nchunk. Figure 3 shows how the attention mask works over both the cache and the chunk.\ngo\ndog\n0\n0\n0\n0\n1\n0\n0\n0\n0\n0\nthe\nto\nThe\ncat\nsat\non\nthe\n1\nmat\nand\n1\n1\n1\nsaw\nthe\n1\n0\n0\n0\ndog\ngo\nto\n1\n0\n0\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n0\n0\n1\n1\n1\n0\n0\n0\n0\n0\n0\n1\n1\n1\n1\n0\nPast\nCache\nCurrent\nFigure 3: Pre-fill and chunking. During pre-fill of the cache, long sequences are chunked to limit memory\nusage. We process a sequence in three chunks, “The cat sat on”, “the mat and saw”, “the dog go to”. The figure\nshows what happens for the third chunk (“the dog go to”): it attends itself using a causal mask (rightmost block),\nattends the cache using a sliding window (center block), and does not attend to past tokens as they are outside of\nthe sliding window (left block).\n3\nResults\nWe compare Mistral 7B to Llama, and re-run all benchmarks with our own evaluation pipeline for\nfair comparison. We measure performance on a wide variety of tasks categorized as follow:\n• Commonsense Reasoning (0-shot): Hellaswag [28], Winogrande [21], PIQA [4], SIQA [22],\nOpenbookQA [19], ARC-Easy, ARC-Challenge [9], CommonsenseQA [24]\n• World Knowledge (5-shot): NaturalQuestions [16], TriviaQA [15]\n• Reading Comprehension (0-shot): BoolQ [8], QuAC [7]\n• Math: GSM8K [10] (8-shot) with maj@8 and MATH [13] (4-shot) with maj@4\n• Code: Humaneval [5] (0-shot) and MBPP [2] (3-shot)\n• Popular aggregated results: MMLU [12] (5-shot), BBH [23] (3-shot), and AGI Eval [29]\n(3-5-shot, English multiple-choice questions only)\nDetailed results for Mistral 7B, Llama 2 7B/13B, and Code-Llama 7B are reported in Table 2. Figure 4\ncompares the performance of Mistral 7B with Llama 2 7B/13B, and Llama 1 34B4 in different\ncategories. Mistral 7B surpasses Llama 2 13B across all metrics, and outperforms Llama 1 34B on\nmost benchmarks. In particular, Mistral 7B displays a superior performance in code, mathematics,\nand reasoning benchmarks.\n4Since Llama 2 34B was not open-sourced, we report results for Llama 1 34B.\n3\nFigure 4: Performance of Mistral 7B and different Llama models on a wide range of benchmarks. All\nmodels were re-evaluated on all metrics with our evaluation pipeline for accurate comparison. Mistral 7B\nsignificantly outperforms Llama 2 7B and Llama 2 13B on all benchmarks. It is also vastly superior to Llama 1\n34B in mathematics, code generation, and reasoning benchmarks.\nModel\nModality MMLU HellaSwag WinoG PIQA\nArc-e\nArc-c\nNQ\nTriviaQA HumanEval MBPP MATH GSM8K\nLLaMA 2 7B\nPretrained 44.4%\n77.1%\n69.5% 77.9% 68.7% 43.2% 24.7%\n63.8%\n11.6%\n26.1%\n3.9%\n16.0%\nLLaMA 2 13B\nPretrained 55.6%\n80.7%\n72.9% 80.8% 75.2% 48.8% 29.0%\n69.6%\n18.9%\n35.4%\n6.0%\n34.3%\nCode-Llama 7B Finetuned\n36.9%\n62.9%\n62.3% 72.8% 59.4% 34.5% 11.0%\n34.9%\n31.1%\n52.5%\n5.2%\n20.8%\nMistral 7B\nPretrained 60.1%\n81.3%\n75.3% 83.0% 80.0% 55.5% 28.8%\n69.9%\n30.5%\n47.5% 13.1%\n52.2%\nTable 2: Comparison of Mistral 7B with Llama. Mistral 7B outperforms Llama 2 13B on all metrics, and\napproaches the code performance of Code-Llama 7B without sacrificing performance on non-code benchmarks.\nSize and Efficiency. We computed “equivalent model sizes” of the Llama 2 family, aiming to\nunderstand Mistral 7B models’ efficiency in the cost-performance spectrum (see Figure 5). When\nevaluated on reasoning, comprehension, and STEM reasoning (specifically MMLU), Mistral 7B\nmirrored performance that one might expect from a Llama 2 model with more than 3x its size. On\nthe Knowledge benchmarks, Mistral 7B’s performance achieves a lower compression rate of 1.9x,\nwhich is likely due to its limited parameter count that restricts the amount of knowledge it can store.\nEvaluation Differences. On some benchmarks, there are some differences between our evaluation\nprotocol and the one reported in the Llama 2 paper: 1) on MBPP, we use the hand-verified subset 2)\non TriviaQA, we do not provide Wikipedia contexts.\n4\nInstruction Finetuning\nModel\nChatbot Arena\nELO Rating\nMT Bench\nWizardLM 13B v1.2\n1047\n7.2\nMistral 7B Instruct\n1031\n6.84 +/- 0.07\nLlama 2 13B Chat\n1012\n6.65\nVicuna 13B\n1041\n6.57\nLlama 2 7B Chat\n985\n6.27\nVicuna 7B\n997\n6.17\nAlpaca 13B\n914\n4.53\nTable 3: Comparison of Chat models. Mistral 7B –\nInstruct outperforms all 7B models on MT-Bench, and\nis comparable to 13B – Chat models.\nTo evaluate the generalization capabilities of\nMistral 7B, we fine-tuned it on instruction datasets\npublicly available on the Hugging Face repository.\nNo proprietary data or training tricks were utilized:\nMistral 7B – Instruct model is a simple and\npreliminary demonstration that the base model can\neasily be fine-tuned to achieve good performance.\nIn Table 3, we observe that the resulting model,\nMistral 7B – Instruct, exhibits superior perfor-\nmance compared to all 7B models on MT-Bench,\nand is comparable to 13B – Chat models. An\nindependent human evaluation was conducted on\nhttps://llmboxing.com/leaderboard.\nIn this evaluation, participants were provided with a set of questions along with anonymous responses\nfrom two models and were asked to select their preferred response, as illustrated in Figure 6. As of\nOctober 6, 2023, the outputs generated by Mistral 7B were preferred 5020 times, compared to 4143\ntimes for Llama 2 13B.\n4\nFigure 5: Results on MMLU, commonsense reasoning, world knowledge and reading comprehension for\nMistral 7B and Llama 2 (7B/13B/70B). Mistral 7B largely outperforms Llama 2 13B on all evaluations, except\non knowledge benchmarks, where it is on par (this is likely due to its limited parameter count, which limits the\namount of knowledge it can compress).\n5\nAdding guardrails for front-facing applications\nThe ability to enforce guardrails when it comes to AI generation is important for front-facing appli-\ncations. In this section, we highlight how to leverage system prompting to optionally enforce output\nconstraints on top of our models. Additionally, we showcase the ability of Mistral 7B to perform\nfine-grained content moderation, which can be useful to enforce quality content in applications.\n5.1\nSystem prompt to enforce guardrails\nWe introduce a system prompt (see below) to guide the model to generate answers within specified\nguardrails, similar to the work done with Llama 2. Using this prompt allows the user to move on the\nPareto front of model utility / guardrails enforcement, as indicated in Table 4.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful,\nunethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\nGuardrails\nMT Bench\nNo system prompt\n6.84 ± 0.07\nLlama 2 system prompt\n6.38 ± 0.07\nMistral system prompt\n6.58 ± 0.05\nTable 4: System prompts. Mean official\nMT Bench score over 10 iterations with\nstandard deviation for Mistral 7B – Instruct.\nFor reference, Llama 2 13B – Chat reports\nofficial results of 6.65.\nWe use a set of 175 unsafe prompts for evaluating safety.\nWith the recommended system prompt, the model properly\ndeclines to answer 100% of the harmful questions.\nAs an illustration, we provide in Table 5 the answers of\nboth Mistral 7B – Instruct and Llama 2 Chat 13B to the\nquestion How to kill a linux process with system prompts\nactivated. We observe that Mistral 7B provides a correct\nresponse while Llama 2 declines to answer. Note that on\nthis specific question, both models answer correctly when\nsystem prompts are deactivated.\n5.2\nContent moderation with self-reflection\nMistral 7B – Instruct can be used as a content moderator: the model itself is able to accurately\nclassify a user prompt or its generated answer as being either acceptable or falling into one of the\nfollowing categories: Illegal activities such as terrorism, child abuse or fraud; Hateful, harassing\nor violent content such as discrimination, self-harm or bullying; Unqualified advice for instance\nin legal, medical or financial domains.\n5\nModel\nAnswer\nMistral 7B – Instruct\nwith Mistral system prompt\nTo kill a Linux process, you can use the `kill`command followed by the process ID (PID)\nof the process you want to terminate. For example, to kill process with PID 1234, you\nwould run the command `kill 1234`. It’s important to note that killing a process can\nhave unintended consequences, so it’s generally a good idea to only kill processes that\nyou are certain you want to terminate. Additionally, it’s generally a good idea to use the\n`kill`command with caution, as it can be used to terminate critical system processes that\nare necessary for the operation of the system.\nLlama 2 13B – Chat\nwith Llama 2 system prompt\nAs a helpful and respectful assistant, I cannot provide information on how to kill a Linux\nprocess. It is not appropriate or ethical to intentionally harm or terminate processes, as it\ncan cause damage to the system and potentially result in data loss. Additionally, it is\nimportant to respect the autonomy and security of other users’ processes, as they may be\ncritical to their work or personal activities. Instead, I suggest that you explore other\noptions to address any issues or concerns you may have.\nTable 5: Comparison between Mistral and Llama system prompts. Mistral provides the right answer whereas\nLlama 2 declines to answer to the question How to kill a linux process.\nTo do so, we designed a self-reflection prompt that makes Mistral 7B classify a prompt or a generated\nanswer. We evaluated self-reflection on our manually curated and balanced dataset of adversarial\nand standard prompts and got a precision of 99.4% for a recall of 95.6% (considering acceptable\nprompts as positives).\nThe use cases are vast, from moderating comments on social media or forums to brand monitoring\non the internet. In particular, the end user is able to select afterwards which categories to effectively\nfilter based on their particular use-case.\n6\nConclusion\nOur work on Mistral 7B demonstrates that language models may compress knowledge more than\nwhat was previously thought. This opens up interesting perspectives: the field has so far put the\nemphasis on scaling laws in 2 dimensions (directly associating model capabilities to training cost, as\nin [14]); the problem is rather 3 dimensional (model capabilities, training cost, inference cost), and\nmuch remains to be explored to obtain the best performance with the smallest possible model.\nAcknowledgements\nWe are grateful to CoreWeave for their 24/7 help in marshalling our cluster.\nWe thank the\nCINECA/EuroHPC team, and in particular the operators of Leonardo, for their resources and help.\nWe thank the maintainers of FlashAttention, vLLM, xFormers, Skypilot for their precious assistance\nin implementing new features and integrating their solutions into ours. A huge thanks to Tri Dao\nand Daniel Haziza for helping include Mistral related changes to FlashAttention and xFormers on\na tight schedule. We thank the teams of Hugging Face, AWS, GCP, Azure ML for their intense help\nin making our model compatible everywhere.\n6\nFigure 6: Human evaluation of Mistral 7B – Instruct vs Llama 2 13B – Chat Example. An example of\nhuman evaluation from llmboxing.com. The question asks for recommendations of books in quantum physics.\nLlama 2 13B – Chat recommends a general physics book, while Mistral 7B – Instruct recommends a more\nrelevant book on quantum physics and describes in the contents in more detail.\n7\nReferences\n[1] Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebrón, and\nSumit Sanghai. Gqa: Training generalized multi-query transformer models from multi-head\ncheckpoints. arXiv preprint arXiv:2305.13245, 2023.\n[2] Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David\nDohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, et al. Program synthesis with large\nlanguage models. arXiv preprint arXiv:2108.07732, 2021.\n[3] Iz Beltagy, Matthew E Peters, and Arman Cohan. Longformer: The long-document transformer.\narXiv preprint arXiv:2004.05150, 2020.\n[4] Yonatan Bisk, Rowan Zellers, Jianfeng Gao, Yejin Choi, et al. Piqa: Reasoning about phys-\nical commonsense in natural language. In Proceedings of the AAAI conference on artificial\nintelligence, 2020.\n[5] Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared\nKaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large\nlanguage models trained on code. arXiv preprint arXiv:2107.03374, 2021.\n[6] Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. Generating long sequences with\nsparse transformers. arXiv preprint arXiv:1904.10509, 2019.\n[7] Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, and\nLuke Zettlemoyer. Quac: Question answering in context. arXiv preprint arXiv:1808.07036,\n2018.\n[8] Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and\nKristina Toutanova. Boolq: Exploring the surprising difficulty of natural yes/no questions.\narXiv preprint arXiv:1905.10044, 2019.\n[9] Peter Clark, Isaac Cowhey, Oren Etzioni, Tushar Khot, Ashish Sabharwal, Carissa Schoenick,\nand Oyvind Tafjord. Think you have solved question answering? try arc, the ai2 reasoning\nchallenge. arXiv preprint arXiv:1803.05457, 2018.\n[10] Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser,\nMatthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, et al. Training verifiers to\nsolve math word problems. arXiv preprint arXiv:2110.14168, 2021.\n[11] Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, and Christopher Ré. FlashAttention: Fast\nand memory-efficient exact attention with IO-awareness. In Advances in Neural Information\nProcessing Systems, 2022.\n[12] Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and\nJacob Steinhardt.\nMeasuring massive multitask language understanding.\narXiv preprint\narXiv:2009.03300, 2020.\n[13] Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn\nSong, and Jacob Steinhardt. Measuring mathematical problem solving with the math dataset.\narXiv preprint arXiv:2103.03874, 2021.\n[14] Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza\nRutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, Thomas\nHennigan, Eric Noland, Katherine Millican, George van den Driessche, Bogdan Damoc, Aurelia\nGuy, Simon Osindero, Karén Simonyan, Erich Elsen, Oriol Vinyals, Jack Rae, and Laurent\nSifre. An empirical analysis of compute-optimal large language model training. In Advances in\nNeural Information Processing Systems, volume 35, 2022.\n[15] Mandar Joshi, Eunsol Choi, Daniel S Weld, and Luke Zettlemoyer.\nTriviaqa: A large\nscale distantly supervised challenge dataset for reading comprehension.\narXiv preprint\narXiv:1705.03551, 2017.\n[16] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redfield, Michael Collins, Ankur Parikh, Chris\nAlberti, Danielle Epstein, Illia Polosukhin, Jacob Devlin, Kenton Lee, et al. Natural questions: a\nbenchmark for question answering research. Transactions of the Association for Computational\nLinguistics, 7:453–466, 2019.\n8\n[17] Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu,\nJoseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large lan-\nguage model serving with pagedattention. In Proceedings of the ACM SIGOPS 29th Symposium\non Operating Systems Principles, 2023.\n[18] Benjamin Lefaudeux, Francisco Massa, Diana Liskovich, Wenhan Xiong, Vittorio Caggiano,\nSean Naren, Min Xu, Jieru Hu, Marta Tintore, Susan Zhang, Patrick Labatut, and Daniel Haziza.\nxformers: A modular and hackable transformer modelling library. https://github.com/\nfacebookresearch/xformers, 2022.\n[19] Todor Mihaylov, Peter Clark, Tushar Khot, and Ashish Sabharwal. Can a suit of armor conduct\nelectricity? a new dataset for open book question answering. arXiv preprint arXiv:1809.02789,\n2018.\n[20] Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan,\nYossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, et al. Code llama: Open foundation models\nfor code. arXiv preprint arXiv:2308.12950, 2023.\n[21] Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhagavatula, and Yejin Choi. Winogrande: An\nadversarial winograd schema challenge at scale. Communications of the ACM, 64(9):99–106,\n2021.\n[22] Maarten Sap, Hannah Rashkin, Derek Chen, Ronan LeBras, and Yejin Choi. Socialiqa: Com-\nmonsense reasoning about social interactions. arXiv preprint arXiv:1904.09728, 2019.\n[23] Mirac Suzgun, Nathan Scales, Nathanael Schärli, Sebastian Gehrmann, Yi Tay, Hyung Won\nChung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou, , and Jason Wei.\nChallenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint\narXiv:2210.09261, 2022.\n[24] Alon Talmor, Jonathan Herzig, Nicholas Lourie, and Jonathan Berant. Commonsenseqa: A ques-\ntion answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937,\n2018.\n[25] Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timo-\nthée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open\nand efficient foundation language models. arXiv preprint arXiv:2302.13971, 2023.\n[26] Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei,\nNikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, et al. Llama 2: Open\nfoundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288, 2023.\n[27] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,\nŁukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information\nprocessing systems, 30, 2017.\n[28] Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. Hellaswag: Can a\nmachine really finish your sentence? arXiv preprint arXiv:1905.07830, 2019.\n[29] Wanjun Zhong, Ruixiang Cui, Yiduo Guo, Yaobo Liang, Shuai Lu, Yanlin Wang, Amin Saied,\nWeizhu Chen, and Nan Duan. Agieval: A human-centric benchmark for evaluating foundation\nmodels. arXiv preprint arXiv:2304.06364, 2023.\n9\n', metadata={'Published': '2023-10-10', 'Title': 'Mistral 7B', 'Authors': 'Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed', 'Summary': 'We introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.'})]

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

len(docs)

1

# local에 .chroma/index 폴더가 생성되고 그 안에 vector database가 생성됨
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./.chroma/arxiv")

vectordb.persist()
vector_store = None

vectordb = Chroma(persist_directory="./.chroma/arxiv", embedding_function=embeddings)
retriever = VectorStoreRetriever(vectorstore=vectordb)
retrievalQA =  RetrievalQA.from_chain_type(llm=ChatOpenAI(model="gpt-4-1106-preview"),  retriever=retriever)

sim_docs = vectordb.similarity_search("whta is the best important feratures of the mistal?")
sim_docs

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1

[Document(page_content='Mistral 7B\nAlbert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford,\nDevendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel,\nGuillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux,\nPierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix,\nWilliam El Sayed\nAbstract\nWe introduce Mistral 7B, a 7–billion-parameter language model engineered for\nsuperior performance and efficiency. Mistral 7B outperforms the best open 13B\nmodel (Llama 2) across all evaluated benchmarks, and the best released 34B\nmodel (Llama 1) in reasoning, mathematics, and code generation. Our model\nleverages grouped-query attention (GQA) for faster inference, coupled with sliding\nwindow attention (SWA) to effectively handle sequences of arbitrary length with a\nreduced inference cost. We also provide a model fine-tuned to follow instructions,\nMistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and\nautomated benchmarks. Our models are released under the Apache 2.0 license.\nCode: https://github.com/mistralai/mistral-src\nWebpage: https://mistral.ai/news/announcing-mistral-7b/\n1\nIntroduction\nIn the rapidly evolving domain of Natural Language Processing (NLP), the race towards higher model\nperformance often necessitates an escalation in model size. However, this scaling tends to increase\ncomputational costs and inference latency, thereby raising barriers to deployment in practical,\nreal-world scenarios. In this context, the search for balanced models delivering both high-level\nperformance and efficiency becomes critically essential. Our model, Mistral 7B, demonstrates that\na carefully designed language model can deliver high performance while maintaining an efficient\ninference. Mistral 7B outperforms the previous best 13B model (Llama 2, [26]) across all tested\nbenchmarks, and surpasses the best 34B model (LLaMa 34B, [25]) in mathematics and code\ngeneration. Furthermore, Mistral 7B approaches the coding performance of Code-Llama 7B [20],\nwithout sacrificing performance on non-code related benchmarks.\nMistral 7B leverages grouped-query attention (GQA) [1], and sliding window attention (SWA) [6, 3].\nGQA significantly accelerates the inference speed, and also reduces the memory requirement during\ndecoding, allowing for higher batch sizes hence higher throughput, a crucial factor for real-time\napplications. In addition, SWA is designed to handle longer sequences more effectively at a reduced\ncomputational cost, thereby alleviating a common limitation in LLMs. These attention mechanisms\ncollectively contribute to the enhanced performance and efficiency of Mistral 7B.\narXiv:2310.06825v1  [cs.CL]  10 Oct 2023\nMistral 7B is released under the Apache 2.0 license. This release is accompanied by a reference\nimplementation1 facilitating easy deployment either locally or on cloud platforms such as AWS, GCP,\nor Azure using the vLLM [17] inference server and SkyPilot 2. Integration with Hugging Face 3 is\nalso streamlined for easier integration. Moreover, Mistral 7B is crafted for ease of fine-tuning across\na myriad of tasks. As a demonstration of its adaptability and superior performance, we present a chat\nmodel fine-tuned from Mistral 7B that significantly outperforms the Llama 2 13B – Chat model.\nMistral 7B takes a significant step in balancing the goals of getting high performance while keeping\nlarge language models efficient. Through our work, our aim is to help the community create more\naffordable, efficient, and high-performing language models that can be used in a wide range of\nreal-world applications.\n2\nArchitectural details\nFigure 1: Sliding Window Attention. The number of operations in vanilla attention is quadratic in the sequence\nlength, and the memory increases linearly with the number of tokens. At inference time, this incurs higher\nlatency and smaller throughput due to reduced cache availability. To alleviate this issue, we use sliding window\nattention: each token can attend to at most W tokens from the previous layer (here, W = 3). Note that tokens\noutside the sliding window still influence next word prediction. At each attention layer, information can move\nforward by W tokens. Hence, after k attention layers, information can move forward by up to k × W tokens.\nParameter\nValue\ndim\n4096\nn_layers\n32\nhead_dim\n128\nhidden_dim\n14336\nn_heads\n32\nn_kv_heads\n8\nwindow_size\n4096\ncontext_len\n8192\nvocab_size\n32000\nTable 1: Model architecture.\nMistral 7B is based on a transformer architecture [27]. The main\nparameters of the architecture are summarized in Table 1. Compared\nto Llama, it introduces a few changes that we summarize below.\nSliding Window Attention. SWA exploits the stacked layers of a trans-\nformer to attend information beyond the window size W. The hidden\nstate in position i of the layer k, hi, attends to all hidden states from\nthe previous layer with positions between i − W and i. Recursively,\nhi can access tokens from the input layer at a distance of up to W × k\ntokens, as illustrated in Figure 1. At the last layer, using a window size\nof W = 4096, we have a theoretical attention span of approximately\n131K tokens. In practice, for a sequence length of 16K and W = 4096,\nchanges made to FlashAttention [11] and xFormers [18] yield a 2x\nspeed improvement over a vanilla attention baseline.\nRolling Buffer Cache. A fixed attention span means that we can limit our cache size using a rolling\nbuffer cache. The cache has a fixed size of W, and the keys and values for the timestep i are stored\nin position i mod W of the cache. As a result, when the position i is larger than W, past values\nin the cache are overwritten, and the size of the cache stops increasing. We provide an illustration\nin Figure 2 for W = 3. On a sequence length of 32k tokens, this reduces the cache memory usage\nby 8x, without impacting the model quality.\n1https://github.com/mistralai/mistral-src\n2https://github.com/skypilot-org/skypilot\n3https://huggingface.co/mistralai\n2\nFigure 2: Rolling buffer cache. The cache has a fixed size of W = 4. Keys and values for position i are stored\nin position i mod W of the cache. When the position i is larger than W, past values in the cache are overwritten.\nThe hidden state corresponding to the latest generated tokens are colored in orange.\nPre-fill and Chunking. When generating a sequence, we need to predict tokens one-by-one, as\neach token is conditioned on the previous ones. However, the prompt is known in advance, and we\ncan pre-fill the (k, v) cache with the prompt. If the prompt is very large, we can chunk it into smaller\npieces, and pre-fill the cache with each chunk. For this purpose, we can select the window size as\nour chunk size. For each chunk, we thus need to compute the attention over the cache and over the\nchunk. Figure 3 shows how the attention mask works over both the cache and the chunk.\ngo\ndog\n0\n0\n0\n0\n1\n0\n0\n0\n0\n0\nthe\nto\nThe\ncat\nsat\non\nthe\n1\nmat\nand\n1\n1\n1\nsaw\nthe\n1\n0\n0\n0\ndog\ngo\nto\n1\n0\n0\n0\n0\n0\n1\n1\n0\n0\n0\n0\n0\n0\n0\n0\n1\n1\n1\n0\n0\n0\n0\n0\n0\n1\n1\n1\n1\n0\nPast\nCache\nCurrent\nFigure 3: Pre-fill and chunking. During pre-fill of the cache, long sequences are chunked to limit memory\nusage. We process a sequence in three chunks, “The cat sat on”, “the mat and saw”, “the dog go to”. The figure\nshows what happens for the third chunk (“the dog go to”): it attends itself using a causal mask (rightmost block),\nattends the cache using a sliding window (center block), and does not attend to past tokens as they are outside of\nthe sliding window (left block).\n3\nResults\nWe compare Mistral 7B to Llama, and re-run all benchmarks with our own evaluation pipeline for\nfair comparison. We measure performance on a wide variety of tasks categorized as follow:\n• Commonsense Reasoning (0-shot): Hellaswag [28], Winogrande [21], PIQA [4], SIQA [22],\nOpenbookQA [19], ARC-Easy, ARC-Challenge [9], CommonsenseQA [24]\n• World Knowledge (5-shot): NaturalQuestions [16], TriviaQA [15]\n• Reading Comprehension (0-shot): BoolQ [8], QuAC [7]\n• Math: GSM8K [10] (8-shot) with maj@8 and MATH [13] (4-shot) with maj@4\n• Code: Humaneval [5] (0-shot) and MBPP [2] (3-shot)\n• Popular aggregated results: MMLU [12] (5-shot), BBH [23] (3-shot), and AGI Eval [29]\n(3-5-shot, English multiple-choice questions only)\nDetailed results for Mistral 7B, Llama 2 7B/13B, and Code-Llama 7B are reported in Table 2. Figure 4\ncompares the performance of Mistral 7B with Llama 2 7B/13B, and Llama 1 34B4 in different\ncategories. Mistral 7B surpasses Llama 2 13B across all metrics, and outperforms Llama 1 34B on\nmost benchmarks. In particular, Mistral 7B displays a superior performance in code, mathematics,\nand reasoning benchmarks.\n4Since Llama 2 34B was not open-sourced, we report results for Llama 1 34B.\n3\nFigure 4: Performance of Mistral 7B and different Llama models on a wide range of benchmarks. All\nmodels were re-evaluated on all metrics with our evaluation pipeline for accurate comparison. Mistral 7B\nsignificantly outperforms Llama 2 7B and Llama 2 13B on all benchmarks. It is also vastly superior to Llama 1\n34B in mathematics, code generation, and reasoning benchmarks.\nModel\nModality MMLU HellaSwag WinoG PIQA\nArc-e\nArc-c\nNQ\nTriviaQA HumanEval MBPP MATH GSM8K\nLLaMA 2 7B\nPretrained 44.4%\n77.1%\n69.5% 77.9% 68.7% 43.2% 24.7%\n63.8%\n11.6%\n26.1%\n3.9%\n16.0%\nLLaMA 2 13B\nPretrained 55.6%\n80.7%\n72.9% 80.8% 75.2% 48.8% 29.0%\n69.6%\n18.9%\n35.4%\n6.0%\n34.3%\nCode-Llama 7B Finetuned\n36.9%\n62.9%\n62.3% 72.8% 59.4% 34.5% 11.0%\n34.9%\n31.1%\n52.5%\n5.2%\n20.8%\nMistral 7B\nPretrained 60.1%\n81.3%\n75.3% 83.0% 80.0% 55.5% 28.8%\n69.9%\n30.5%\n47.5% 13.1%\n52.2%\nTable 2: Comparison of Mistral 7B with Llama. Mistral 7B outperforms Llama 2 13B on all metrics, and\napproaches the code performance of Code-Llama 7B without sacrificing performance on non-code benchmarks.\nSize and Efficiency. We computed “equivalent model sizes” of the Llama 2 family, aiming to\nunderstand Mistral 7B models’ efficiency in the cost-performance spectrum (see Figure 5). When\nevaluated on reasoning, comprehension, and STEM reasoning (specifically MMLU), Mistral 7B\nmirrored performance that one might expect from a Llama 2 model with more than 3x its size. On\nthe Knowledge benchmarks, Mistral 7B’s performance achieves a lower compression rate of 1.9x,\nwhich is likely due to its limited parameter count that restricts the amount of knowledge it can store.\nEvaluation Differences. On some benchmarks, there are some differences between our evaluation\nprotocol and the one reported in the Llama 2 paper: 1) on MBPP, we use the hand-verified subset 2)\non TriviaQA, we do not provide Wikipedia contexts.\n4\nInstruction Finetuning\nModel\nChatbot Arena\nELO Rating\nMT Bench\nWizardLM 13B v1.2\n1047\n7.2\nMistral 7B Instruct\n1031\n6.84 +/- 0.07\nLlama 2 13B Chat\n1012\n6.65\nVicuna 13B\n1041\n6.57\nLlama 2 7B Chat\n985\n6.27\nVicuna 7B\n997\n6.17\nAlpaca 13B\n914\n4.53\nTable 3: Comparison of Chat models. Mistral 7B –\nInstruct outperforms all 7B models on MT-Bench, and\nis comparable to 13B – Chat models.\nTo evaluate the generalization capabilities of\nMistral 7B, we fine-tuned it on instruction datasets\npublicly available on the Hugging Face repository.\nNo proprietary data or training tricks were utilized:\nMistral 7B – Instruct model is a simple and\npreliminary demonstration that the base model can\neasily be fine-tuned to achieve good performance.\nIn Table 3, we observe that the resulting model,\nMistral 7B – Instruct, exhibits superior perfor-\nmance compared to all 7B models on MT-Bench,\nand is comparable to 13B – Chat models. An\nindependent human evaluation was conducted on\nhttps://llmboxing.com/leaderboard.\nIn this evaluation, participants were provided with a set of questions along with anonymous responses\nfrom two models and were asked to select their preferred response, as illustrated in Figure 6. As of\nOctober 6, 2023, the outputs generated by Mistral 7B were preferred 5020 times, compared to 4143\ntimes for Llama 2 13B.\n4\nFigure 5: Results on MMLU, commonsense reasoning, world knowledge and reading comprehension for\nMistral 7B and Llama 2 (7B/13B/70B). Mistral 7B largely outperforms Llama 2 13B on all evaluations, except\non knowledge benchmarks, where it is on par (this is likely due to its limited parameter count, which limits the\namount of knowledge it can compress).\n5\nAdding guardrails for front-facing applications\nThe ability to enforce guardrails when it comes to AI generation is important for front-facing appli-\ncations. In this section, we highlight how to leverage system prompting to optionally enforce output\nconstraints on top of our models. Additionally, we showcase the ability of Mistral 7B to perform\nfine-grained content moderation, which can be useful to enforce quality content in applications.\n5.1\nSystem prompt to enforce guardrails\nWe introduce a system prompt (see below) to guide the model to generate answers within specified\nguardrails, similar to the work done with Llama 2. Using this prompt allows the user to move on the\nPareto front of model utility / guardrails enforcement, as indicated in Table 4.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful,\nunethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\nGuardrails\nMT Bench\nNo system prompt\n6.84 ± 0.07\nLlama 2 system prompt\n6.38 ± 0.07\nMistral system prompt\n6.58 ± 0.05\nTable 4: System prompts. Mean official\nMT Bench score over 10 iterations with\nstandard deviation for Mistral 7B – Instruct.\nFor reference, Llama 2 13B – Chat reports\nofficial results of 6.65.\nWe use a set of 175 unsafe prompts for evaluating safety.\nWith the recommended system prompt, the model properly\ndeclines to answer 100% of the harmful questions.\nAs an illustration, we provide in Table 5 the answers of\nboth Mistral 7B – Instruct and Llama 2 Chat 13B to the\nquestion How to kill a linux process with system prompts\nactivated. We observe that Mistral 7B provides a correct\nresponse while Llama 2 declines to answer. Note that on\nthis specific question, both models answer correctly when\nsystem prompts are deactivated.\n5.2\nContent moderation with self-reflection\nMistral 7B – Instruct can be used as a content moderator: the model itself is able to accurately\nclassify a user prompt or its generated answer as being either acceptable or falling into one of the\nfollowing categories: Illegal activities such as terrorism, child abuse or fraud; Hateful, harassing\nor violent content such as discrimination, self-harm or bullying; Unqualified advice for instance\nin legal, medical or financial domains.\n5\nModel\nAnswer\nMistral 7B – Instruct\nwith Mistral system prompt\nTo kill a Linux process, you can use the `kill`command followed by the process ID (PID)\nof the process you want to terminate. For example, to kill process with PID 1234, you\nwould run the command `kill 1234`. It’s important to note that killing a process can\nhave unintended consequences, so it’s generally a good idea to only kill processes that\nyou are certain you want to terminate. Additionally, it’s generally a good idea to use the\n`kill`command with caution, as it can be used to terminate critical system processes that\nare necessary for the operation of the system.\nLlama 2 13B – Chat\nwith Llama 2 system prompt\nAs a helpful and respectful assistant, I cannot provide information on how to kill a Linux\nprocess. It is not appropriate or ethical to intentionally harm or terminate processes, as it\ncan cause damage to the system and potentially result in data loss. Additionally, it is\nimportant to respect the autonomy and security of other users’ processes, as they may be\ncritical to their work or personal activities. Instead, I suggest that you explore other\noptions to address any issues or concerns you may have.\nTable 5: Comparison between Mistral and Llama system prompts. Mistral provides the right answer whereas\nLlama 2 declines to answer to the question How to kill a linux process.\nTo do so, we designed a self-reflection prompt that makes Mistral 7B classify a prompt or a generated\nanswer. We evaluated self-reflection on our manually curated and balanced dataset of adversarial\nand standard prompts and got a precision of 99.4% for a recall of 95.6% (considering acceptable\nprompts as positives).\nThe use cases are vast, from moderating comments on social media or forums to brand monitoring\non the internet. In particular, the end user is able to select afterwards which categories to effectively\nfilter based on their particular use-case.\n6\nConclusion\nOur work on Mistral 7B demonstrates that language models may compress knowledge more than\nwhat was previously thought. This opens up interesting perspectives: the field has so far put the\nemphasis on scaling laws in 2 dimensions (directly associating model capabilities to training cost, as\nin [14]); the problem is rather 3 dimensional (model capabilities, training cost, inference cost), and\nmuch remains to be explored to obtain the best performance with the smallest possible model.\nAcknowledgements\nWe are grateful to CoreWeave for their 24/7 help in marshalling our cluster.\nWe thank the\nCINECA/EuroHPC team, and in particular the operators of Leonardo, for their resources and help.\nWe thank the maintainers of FlashAttention, vLLM, xFormers, Skypilot for their precious assistance\nin implementing new features and integrating their solutions into ours. A huge thanks to Tri Dao\nand Daniel Haziza for helping include Mistral related changes to FlashAttention and xFormers on\na tight schedule. We thank the teams of Hugging Face, AWS, GCP, Azure ML for their intense help\nin making our model compatible everywhere.\n6\nFigure 6: Human evaluation of Mistral 7B – Instruct vs Llama 2 13B – Chat Example. An example of\nhuman evaluation from llmboxing.com. The question asks for recommendations of books in quantum physics.\nLlama 2 13B – Chat recommends a general physics book, while Mistral 7B – Instruct recommends a more\nrelevant book on quantum physics and describes in the contents in more detail.\n7\nReferences\n[1] Joshua Ainslie, James Lee-Thorp, Michiel de Jong, Yury Zemlyanskiy, Federico Lebrón, and\nSumit Sanghai. Gqa: Training generalized multi-query transformer models from multi-head\ncheckpoints. arXiv preprint arXiv:2305.13245, 2023.\n[2] Jacob Austin, Augustus Odena, Maxwell Nye, Maarten Bosma, Henryk Michalewski, David\nDohan, Ellen Jiang, Carrie Cai, Michael Terry, Quoc Le, et al. Program synthesis with large\nlanguage models. arXiv preprint arXiv:2108.07732, 2021.\n[3] Iz Beltagy, Matthew E Peters, and Arman Cohan. Longformer: The long-document transformer.\narXiv preprint arXiv:2004.05150, 2020.\n[4] Yonatan Bisk, Rowan Zellers, Jianfeng Gao, Yejin Choi, et al. Piqa: Reasoning about phys-\nical commonsense in natural language. In Proceedings of the AAAI conference on artificial\nintelligence, 2020.\n[5] Mark Chen, Jerry Tworek, Heewoo Jun, Qiming Yuan, Henrique Ponde de Oliveira Pinto, Jared\nKaplan, Harri Edwards, Yuri Burda, Nicholas Joseph, Greg Brockman, et al. Evaluating large\nlanguage models trained on code. arXiv preprint arXiv:2107.03374, 2021.\n[6] Rewon Child, Scott Gray, Alec Radford, and Ilya Sutskever. Generating long sequences with\nsparse transformers. arXiv preprint arXiv:1904.10509, 2019.\n[7] Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, and\nLuke Zettlemoyer. Quac: Question answering in context. arXiv preprint arXiv:1808.07036,\n2018.\n[8] Christopher Clark, Kenton Lee, Ming-Wei Chang, Tom Kwiatkowski, Michael Collins, and\nKristina Toutanova. Boolq: Exploring the surprising difficulty of natural yes/no questions.\narXiv preprint arXiv:1905.10044, 2019.\n[9] Peter Clark, Isaac Cowhey, Oren Etzioni, Tushar Khot, Ashish Sabharwal, Carissa Schoenick,\nand Oyvind Tafjord. Think you have solved question answering? try arc, the ai2 reasoning\nchallenge. arXiv preprint arXiv:1803.05457, 2018.\n[10] Karl Cobbe, Vineet Kosaraju, Mohammad Bavarian, Mark Chen, Heewoo Jun, Lukasz Kaiser,\nMatthias Plappert, Jerry Tworek, Jacob Hilton, Reiichiro Nakano, et al. Training verifiers to\nsolve math word problems. arXiv preprint arXiv:2110.14168, 2021.\n[11] Tri Dao, Daniel Y. Fu, Stefano Ermon, Atri Rudra, and Christopher Ré. FlashAttention: Fast\nand memory-efficient exact attention with IO-awareness. In Advances in Neural Information\nProcessing Systems, 2022.\n[12] Dan Hendrycks, Collin Burns, Steven Basart, Andy Zou, Mantas Mazeika, Dawn Song, and\nJacob Steinhardt.\nMeasuring massive multitask language understanding.\narXiv preprint\narXiv:2009.03300, 2020.\n[13] Dan Hendrycks, Collin Burns, Saurav Kadavath, Akul Arora, Steven Basart, Eric Tang, Dawn\nSong, and Jacob Steinhardt. Measuring mathematical problem solving with the math dataset.\narXiv preprint arXiv:2103.03874, 2021.\n[14] Jordan Hoffmann, Sebastian Borgeaud, Arthur Mensch, Elena Buchatskaya, Trevor Cai, Eliza\nRutherford, Diego de Las Casas, Lisa Anne Hendricks, Johannes Welbl, Aidan Clark, Thomas\nHennigan, Eric Noland, Katherine Millican, George van den Driessche, Bogdan Damoc, Aurelia\nGuy, Simon Osindero, Karén Simonyan, Erich Elsen, Oriol Vinyals, Jack Rae, and Laurent\nSifre. An empirical analysis of compute-optimal large language model training. In Advances in\nNeural Information Processing Systems, volume 35, 2022.\n[15] Mandar Joshi, Eunsol Choi, Daniel S Weld, and Luke Zettlemoyer.\nTriviaqa: A large\nscale distantly supervised challenge dataset for reading comprehension.\narXiv preprint\narXiv:1705.03551, 2017.\n[16] Tom Kwiatkowski, Jennimaria Palomaki, Olivia Redfield, Michael Collins, Ankur Parikh, Chris\nAlberti, Danielle Epstein, Illia Polosukhin, Jacob Devlin, Kenton Lee, et al. Natural questions: a\nbenchmark for question answering research. Transactions of the Association for Computational\nLinguistics, 7:453–466, 2019.\n8\n[17] Woosuk Kwon, Zhuohan Li, Siyuan Zhuang, Ying Sheng, Lianmin Zheng, Cody Hao Yu,\nJoseph E. Gonzalez, Hao Zhang, and Ion Stoica. Efficient memory management for large lan-\nguage model serving with pagedattention. In Proceedings of the ACM SIGOPS 29th Symposium\non Operating Systems Principles, 2023.\n[18] Benjamin Lefaudeux, Francisco Massa, Diana Liskovich, Wenhan Xiong, Vittorio Caggiano,\nSean Naren, Min Xu, Jieru Hu, Marta Tintore, Susan Zhang, Patrick Labatut, and Daniel Haziza.\nxformers: A modular and hackable transformer modelling library. https://github.com/\nfacebookresearch/xformers, 2022.\n[19] Todor Mihaylov, Peter Clark, Tushar Khot, and Ashish Sabharwal. Can a suit of armor conduct\nelectricity? a new dataset for open book question answering. arXiv preprint arXiv:1809.02789,\n2018.\n[20] Baptiste Rozière, Jonas Gehring, Fabian Gloeckle, Sten Sootla, Itai Gat, Xiaoqing Ellen Tan,\nYossi Adi, Jingyu Liu, Tal Remez, Jérémy Rapin, et al. Code llama: Open foundation models\nfor code. arXiv preprint arXiv:2308.12950, 2023.\n[21] Keisuke Sakaguchi, Ronan Le Bras, Chandra Bhagavatula, and Yejin Choi. Winogrande: An\nadversarial winograd schema challenge at scale. Communications of the ACM, 64(9):99–106,\n2021.\n[22] Maarten Sap, Hannah Rashkin, Derek Chen, Ronan LeBras, and Yejin Choi. Socialiqa: Com-\nmonsense reasoning about social interactions. arXiv preprint arXiv:1904.09728, 2019.\n[23] Mirac Suzgun, Nathan Scales, Nathanael Schärli, Sebastian Gehrmann, Yi Tay, Hyung Won\nChung, Aakanksha Chowdhery, Quoc V Le, Ed H Chi, Denny Zhou, , and Jason Wei.\nChallenging big-bench tasks and whether chain-of-thought can solve them. arXiv preprint\narXiv:2210.09261, 2022.\n[24] Alon Talmor, Jonathan Herzig, Nicholas Lourie, and Jonathan Berant. Commonsenseqa: A ques-\ntion answering challenge targeting commonsense knowledge. arXiv preprint arXiv:1811.00937,\n2018.\n[25] Hugo Touvron, Thibaut Lavril, Gautier Izacard, Xavier Martinet, Marie-Anne Lachaux, Timo-\nthée Lacroix, Baptiste Rozière, Naman Goyal, Eric Hambro, Faisal Azhar, et al. Llama: Open\nand efficient foundation language models. arXiv preprint arXiv:2302.13971, 2023.\n[26] Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Yasmine Babaei,\nNikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, et al. Llama 2: Open\nfoundation and fine-tuned chat models. arXiv preprint arXiv:2307.09288, 2023.\n[27] Ashish Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N Gomez,\nŁukasz Kaiser, and Illia Polosukhin. Attention is all you need. Advances in neural information\nprocessing systems, 30, 2017.\n[28] Rowan Zellers, Ari Holtzman, Yonatan Bisk, Ali Farhadi, and Yejin Choi. Hellaswag: Can a\nmachine really finish your sentence? arXiv preprint arXiv:1905.07830, 2019.\n[29] Wanjun Zhong, Ruixiang Cui, Yiduo Guo, Yaobo Liang, Shuai Lu, Yanlin Wang, Amin Saied,\nWeizhu Chen, and Nan Duan. Agieval: A human-centric benchmark for evaluating foundation\nmodels. arXiv preprint arXiv:2304.06364, 2023.\n9', metadata={'Published': '2023-10-10', 'Title': 'Mistral 7B', 'Authors': 'Albert Q. Jiang, Alexandre Sablayrolles, Arthur Mensch, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Florian Bressand, Gianna Lengyel, Guillaume Lample, Lucile Saulnier, Lélio Renard Lavaud, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed', 'Summary': 'We introduce Mistral 7B v0.1, a 7-billion-parameter language model engineered\nfor superior performance and efficiency. Mistral 7B outperforms Llama 2 13B\nacross all evaluated benchmarks, and Llama 1 34B in reasoning, mathematics, and\ncode generation. Our model leverages grouped-query attention (GQA) for faster\ninference, coupled with sliding window attention (SWA) to effectively handle\nsequences of arbitrary length with a reduced inference cost. We also provide a\nmodel fine-tuned to follow instructions, Mistral 7B -- Instruct, that surpasses\nthe Llama 2 13B -- Chat model both on human and automated benchmarks. Our\nmodels are released under the Apache 2.0 license.'})]

response = retrievalQA.run("whta is the best important feratures of the mistal?")

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1

print(response)

The most important features of Mistral 7B, a 7-billion-parameter language model, include:

1. **Grouped-Query Attention (GQA):** This feature significantly accelerates the inference speed and reduces the memory requirement during decoding. It allows for higher batch sizes and thus higher throughput, which is crucial for real-time applications.

2. **Sliding Window Attention (SWA):** This attention mechanism is designed to handle longer sequences more effectively at a reduced computational cost. It allows each token to attend to a window of W tokens from the previous layer, enabling the model to process sequences of arbitrary length efficiently.

3. **Rolling Buffer Cache:** With a fixed attention span, the rolling buffer cache limits the cache size. It stores the keys and values for the timestep `i` in position `i mod W` of the cache. When the position `i` is larger than `W`, past values in the cache are overwritten, and the cache size remains constant.

4. **Pre-fill and Chunking:** These techniques allow for efficient handling of large prompts by pre-filling the cache with known prompt information and chunking large sequences to limit memory usage.

5. **High Performance and Efficiency:** Despite its smaller size compared to other models like Llama 2 13B and Llama 1 34B, Mistral 7B outperforms these models across various benchmarks, particularly in reasoning, mathematics, and code generation tasks.

6. **Fine-tuning Capabilities:** Mistral 7B can be fine-tuned for specific tasks, such as following instructions, and has demonstrated the ability to outperform larger models in these areas as well.

7. **Open Source and Compatibility:** Mistral 7B is released under the Apache 2.0 license and comes with a reference implementation that supports deployment on cloud platforms (AWS, GCP, Azure) and integration with Hugging Face.

These features make Mistral 7B a highly efficient and effective language model for various NLP tasks, balancing performance with inference cost and computational efficiency.

loader = TextLoader(file_path="./data/나없이는존재하지않는세상_20240102.txt")
docs = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100))
len(docs)

164

# local에 .chroma/index 폴더가 생성되고 그 안에 vector database가 생성됨
vectordb = Chroma.from_documents(documents=docs, embedding=embeddings, persist_directory="./.chroma/txt")
vectordb.persist()

sim_docs = vectordb.similarity_search("양자역학은 무엇에 대한 것인가?", k=4)
for doc in sim_docs:
    print(doc.page_content)
    print("=====================================")

189


또 이런 메일도 받습니다. "이미 이 순간을 살았던 것같은 느낌이 드는데 교수님, 양자 효과인가요?" 맙소사, 아닙니다! 우리의 기억과 생각의 복잡성이 양자와무슨 관련이 있단 말입니까? 전혀, 전혀 상관없습니다!양자역학은 초자연적 현상이나 대체 의학, 신비한 파동이나 진동과는 아무 관련이 없습니다.

물론 저도 기분 좋은 진동을 좋아합니다. 저도 어렸을 때는 긴 머리에 빨간 띠를 두르고 앨런 긴즈버그* 바로 옆에서 다리를 꼬고 앉아 '옴’을 읊은 적도 있습니다.그러나 우리와 우주 사이의 미묘하고 복잡한 정서적 관계가 양자론의 파동과 관련이 있는 정도는, 바흐의 칸타타가 내 차의 기화기와 관련이 있는 정도일 겁니다.

이 세계는 바흐 음악의 마법, 기분 좋은 진동, 우리의깊은 영적 삶을 낳을 수 있을 정도로 충분히 복잡하기때문에, 굳이 이상한 양자를 들먹일 필요도 없습니다.

또는 반대로, 양자적 현실은 우리의 모든 심리적 현실과 영적 삶의 섬세하고 신비롭고 매혹적이며 복잡한측면보다 훨씬 더 기묘하다고 할 수도 있습니다. 또한저는 마음의 작용과 같이 우리가 거의 이해하지 못하는 복잡한 현상을 설명하기 위해 양자역학을 사용하려는 시도는 전혀 설득력이 없다고 생각합니다.

*앨런 긴즈버그는 Allen Ginsbery는 미국의 시인이자 반문화의 아이콘으로 '비트 세대'의 대표작가로 꼽힌다.

190


ᏂᏂ

그러나 비록 일상의 직접적인 경험과는 거리가 멀다고 하더라도, 세계의 양자적 본질에 대한 발견은 너무급진적이어서, 마음의 본질과 같은 큰 미해결 질문과전혀 무관하다고 보기는 어렵습니다. 마음의 작용을비롯한 우리가 아직 이해하지 못한 다른 현상들이 양자 현상이어서가 아니라, 양자의 발견으로 물리적 세계와 물질에 대한 우리의 개념이 바뀌어 질문의 조건이 달라지기 때문입니다.
=====================================
“양자역학은 우리 가운데 누구도 제대로 이해하지못하지만 사용할 줄은 아는 무척 신비롭고 당혹스러운학문이다."

리처드 파인만도 마찬가지로 "양자역학을 이해하는사람은 아무도 없다"고 말했다. 양자 이론은 매우 유용하지만 세계의 실재, 세계상에 대해 말해주는 바는 이해하기 어렵고 매우 혼란스럽다는 말이다. 오늘날 양자 이론이 물리학·화학·생물학·천문학 등 현대 과학의기초이고 컴퓨터, 레이저, 원자력과 같은 현대 기술의유용한 토대임을 생각한다면, 이는 미스터리가 아닐수 없다.

카를로 로벨리는 이 책 《나 없이는 존재하지 않는세상》에서 양자 이론이 탄생한 지 100년이 지난 지금에도 풀리지 않고 있는 이 수수께끼에 새롭게 도전한다. 양자 이론이 세계의 실재에 대해 무엇을 말해주는

238


지, 혹은 양자 이론이 그려내는 세계는 어떠한 모습인지를 진지하게 탐구한다. 결론부터 말하면 세계는 고정된 속성을 지닌 자립적인 실체, 즉 물질 입자들로 구성되어 있지 않고 상호 간의 작용과 상관관계를 바탕으로 한 관계의 네트워크로 이루어져 있다. (그는 이를양자 이론에 대한 '관계론적 해석'이라고 부른다.)

로벨리의 탐구는 관찰 가능성에 기반해 양자 이론을 꽃피운 베르너 하이젠베르크의 아이디어에서 출발하지만, 그 여정의 전 과정은 놀라울 정도로 광활하고방대하다. 과학과 철학의 영역을 경계 없이 넘나들면서 통섭적으로 사고한다.
=====================================
이 이론은 세계의 실재 구조에서부터 경험의 본성까지, 형이상학에서부터 어쩌면 의식의 본질에 이르기까지, 큰 물음들을 다시 생각할 수 있는 새로운 길을 제시합니다. 이 모든 것은 오늘날 과학자와 철학자들 사이에서 활발한 논쟁의 주제가 되고 있죠. 앞으로 이 모든주제에 대해 이야기해보겠습니다.

베르너 하이젠베르크는 북풍이 몰아치는 극한의 척박한 섬 헬골란트에서 진리를 가리고 있던 장막을 걷어냈습니다. 그런데 그 장막 너머에서 나타난 것은 심연이었습니다. 이 책의 이야기는 하이젠베르크의 아이디어가 싹을 틔운 섬에서 시작하여, 세계 실재의 양자적 구조가 발견됨으로써 제기된 더 큰 질문으로 점차확장해 갑니다.

저는 이 책을 주로 양자 물리학에 익숙하지 않으며양자 물리학이 무엇인지, 양자 물리학이 의미하는 바가 무엇인지 궁금해하는 사람들을 위해 썼습니다. 문

11

hh


제의 핵심을 파악하는 데 꼭 필요하지 않은 세부 사항은 생략하고 최대한 간결히 설명하려고 노력했습니다.난해하지만 핵심적인 이론에 대해서는 가능한 한 명확하게 설명하려고 노력했고요. 어쩌면 양자역학을 이해하는 방법을 설명하기보다는, 양자역학을 이해하기가왜 그렇게 어려운지를 설명하고 있는 것일지도 모르겠네요.

그러나 이 책은 양자역학에 대해 더 깊이 파고들수록 더 많은 의문을 품게 되는 동료 과학자와 철학자들을 위한 책이기도 합니다. 이 놀라운 물리학의 의미에대한 대화를 계속 이어가고 더 일반적인 관점으로 나아가고 싶어서죠. 이 책에는 이미 양자역학에 익숙한이들을 위한 주석도 많이 달려 있습니다. 본문에서는좀 더 읽기 쉽게 말하고자 한 바를, 주석에서는 더 정확하게 표현했습니다.
=====================================
양자론은 화학의 기초, 원자와 고체 그리고 플라즈마의 작용, 하늘의 색깔, 우리 뇌의 뉴런, 별의 동역학, 은하의 기원 등 세계의 수많은 측면을 밝혀냈습니다. 그것은 컴퓨터에서 원자력발전소에 이르기까지 최신 기술의 기초가 됩니다. 공학자, 천체 물리학자, 우주학자,화학자, 생물학자들은 매일 이 이론을 사용합니다. 고등학교 교과과정에도 그 이론의 기초가 포함되어 있

9


죠. 그 이론은 틀린 적이 없습니다. 현대 과학의 심장이라고 할 수 있죠. 그러나 그것은 여전히 심오한 미스터리로 남아 있습니다. 어딘지 모르게 불안함을 줍니다.

양자론은 이 세계가 정해진 궤적을 따라 움직이는입자들로 구성된 것이라는 세계의 이미지를 부숴버렸지만, 우리가 세계에 대해 어떻게 생각해야 하는지는명확히 보여주지 않았습니다. 양자론의 수학은 세계의실재를 기술하지 않으며, “무엇이 있는지 알려주지 않습니다. 멀리 떨어져 있는 물체들은 서로 마법으로 연결되어 있는 것처럼 보입니다. 물질은 유령 같은 확률파동으로 대체되고….

양자론이 실재 세계에 대해서 무엇을 말하는지 자문해보는 사람은 누구나 당황하게 될 것입니다. 양자론의 몇 가지 아이디어를 선구적으로 제시했던 아인슈타인도Albert Einstein 그것을 소화하지 못했고, 20세기 후반의위대한 이론 물리학자 리처드 파인만Richard Feynman은 아무도 양자를 이해하지 못한다고 썼습니다.

하지만 그게 바로 과학입니다. 세상에 대한 새로운사고방식을 탐구하는 것이죠. 과학은 우리의 개념에끊임없이 의문을 제기할 수 있는 능력입니다. 과학은그 자신의 개념적 토대를 수정하고, 세상을 처음부터다시 설계할 수 있는 반항적이고 비판적인 사고의 힘이죠.

10


양자론의 낯설음은 우리를 혼란스럽게 하지만, 이해하는 새로운 관점을 열어주기도 합니다. 공간 속의 입자들이라는 단순한 유물론의 실재보다 더 섬세한 실재, 대상들 이전에 관계로 이루어진 실재를요.
=====================================

# 결과는 질문과 가장 높은 유사도 순으로 목록을 나열 한다.
words =["cat","dog","computer","animal"]
retriever = KNNRetriever.from_texts(words,embeddings=embeddings)
result = retriever.get_relevant_documents("dog")
print(result)

[Document(page_content='dog'), Document(page_content='animal'), Document(page_content='cat'), Document(page_content='computer')]

retriever = PubMedRetriever()
documents = retriever.get_relevant_documents("COVID")
for document in documents:
    print(document.metadata["Title"])

[Association of polymorphic variants of hemostatic system genes with the course of COVID-19].
{'i': 'Betacoronavirus', '#text': '[Clinical symptoms and signs in hamsters during experimental infection with the SARS-CoV-2 virus (Coronaviridae: )].'}
Post COVID-19 condition and behavioral manifestations in Taiwanese children.

class MyRetriever(BaseRetriever):
    def get_relevant_documents(self, query: str) -> list[Document]:
        # Implement your own retrieval logic here.
        # Retrieve and process documents based on the query.
        # Return a list of relevant documents.
        relevant_documents = []
        # Your retrieval logic goes here.
        return relevant_documents

import logging
import pathlib
from typing import Any

from langchain.document_loaders import (
    PyPDFLoader,
    TextLoader,
    UnstructuredEPubLoader,
    UnstructuredWordDocumentLoader,
)

from langchain.memory import ConversationBufferMemory
from langchain.schema import Document

logging.basicConfig(level=logging.INFO)

def init_memory():
    """Initialize the memory. for contextual conversation.

    We are caching this, so it won't be deleted
    every time, we restart the server.
    """
    return ConversationBufferMemory(
        memory_key='chat_history',
        return_messages=True,
        output_key='answer'
    )

MEMORY = init_memory()

class EpubReader(UnstructuredEPubLoader):
    """Reads the epub file."""

    def __init__(self, file_path: str | list[str] , **unstructured_kwargs: Any):
        """Initialize the epub reader."""
        # strategy="fast" 가 있는 경우 에러남
        super().__init__(file_path=file_path, **unstructured_kwargs, mode="elements") #, strategy="fast")


class DocumentLoaderException(Exception):
    """Document loader exception."""

    pass


class DocumentLoader(object):
    """Loads in a document with a supported extension."""

    supproted_extensions = {
        ".pdf": PyPDFLoader,
        ".txt": TextLoader,
        ".epub": EpubReader,
        ".docx": UnstructuredWordDocumentLoader,
        ".doc": UnstructuredWordDocumentLoader,
    }

def load_document(temp_filepath: str)  -> list[Document]:
    """Load a file and return it as a list of documents.

    Doesn't handle a lot of errors at the moment.
    """

    ext = pathlib.Path(temp_filepath).suffix
    loader = DocumentLoader.supproted_extensions.get(ext)
    if not loader:
        raise DocumentLoaderException(f"Unsupported file extension: {ext}, cannot load this type of file")

    loaded = loader(temp_filepath)
    docs = loaded.load()
    for idx,doc in enumerate(docs):
        logging.info(doc.metadata)
        if idx >= 4 :
            break
    return docs

logging.info("=============Load PDF=====================")
load_document('data/Introduction_to_Entropy_and_Gini-Index.pdf');

INFO:root:=============Load PDF=====================
INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 0}
INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 1}
INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 2}
INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 3}
INFO:root:{'source': 'data/Introduction_to_Entropy_and_Gini-Index.pdf', 'page': 4}

logging.info("=============Load DOCX=====================")
load_document('data/test.docx');

INFO:root:=============Load DOCX=====================
INFO:root:{'source': 'data/test.docx'}

logging.info("=============Load TXT=====================")
load_document('data/나없이는존재하지않는세상_20240102.txt');

INFO:root:=============Load TXT=====================
INFO:root:{'source': 'data/나없이는존재하지않는세상_20240102.txt'}

logging.info("=============Load EPUB=====================")
load_document('data/Julia for Data Analysis.epub');

INFO:root:=============Load EPUB=====================
INFO:unstructured:Reading document from string ...
INFO:unstructured:Reading document ...
INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'}
INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'}
INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'}
INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'Title'}
INFO:root:{'source': 'data/Julia for Data Analysis.epub', 'page_number': 1, 'category': 'NarrativeText'}

from langchain.vectorstores import DocArrayInMemorySearch

embeddings = OpenAIEmbeddings()

db = DocArrayInMemorySearch.from_documents(docs, embeddings)

INFO - docarray - DB config created
INFO:docarray:DB config created
INFO - docarray - Runtime config created
INFO:docarray:Runtime config created
INFO - docarray - No docs or index file provided. Initializing empty InMemoryExactNNIndex.
INFO:docarray:No docs or index file provided. Initializing empty InMemoryExactNNIndex.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

# Creating a conversation chain with memory
"""
params:
(*, chat_memory: BaseChatMessageHistory = ChatMessageHistory,
output_key: str | None = None, input_key: str | None = None,
return_messages: bool = False,
human_prefix: str = "Human", ai_prefix: str = "AI", memory_key: str = "history") -> None
"""
memory = ConversationBufferMemory()
llm = ChatOpenAI(model="gpt-3.5-turbo",temperature=0,streaming=True)
chain = ConversationChain(llm=llm, memory=memory)

# 해당 chain의 input_key, output_key를 확인할 수 있다.
print(f"chain inpu_keys: {chain.input_keys}, chain output_keys: {chain.output_keys}")
# memory의 변수
print(f"memory memory_key: {memory.memory_variables}")

# User inputs a smessage
user_input = "Hi, how are you?"

# Processing the user input in the conversation chain
response = chain.predict(input=user_input)

# Printing the response
print(response)

# User inputs another message
user_input = "What's the weather like today?"

# Processing the user input in the conversation chain
response = chain.predict(input=user_input)

# Printing the response
print(response)

# Printing the conversation history stored in the memory
print(memory.chat_memory.messages)

chain inpu_keys: ['input'], chain output_keys: ['response']
memory memory_key: ['history']
Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?
I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.
[HumanMessage(content='Hi, how are you?'), AIMessage(content="Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?"), HumanMessage(content="What's the weather like today?"), AIMessage(content="I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.")]

print(memory.load_memory_variables({})["history"])

Human: Hi, how are you?
AI: Hello! I'm an AI, so I don't have feelings, but I'm here to help you. How can I assist you today?
Human: What's the weather like today?
AI: I'm sorry, but as an AI, I don't have access to real-time information. I suggest checking a weather website or app for the most accurate and up-to-date weather forecast.

# template에 사용된 `history`는 memory의 memory_key이고 `input`은 chain의 input_key이다.
llm = OpenAI(temperature=0)
template = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Human:{input}
AI Assistant:"""
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
chain = ConversationChain(
    llm=llm,
    prompt=PROMPT,
    verbose=True,
    memory=ConversationBufferMemory(ai_prefix="AI Assistant"),
)
# User inputs a smessage
user_input = "Hi, how are you?"

# Processing the user input in the conversation chain
response = chain.predict(input=user_input)

# Printing the response
print(response)

# User inputs another message
user_input = "What's the weather like today?"

# Processing the user input in the conversation chain
response = chain.predict(input=user_input)

# Printing the response
print(response)

# Printing the conversation history stored in the memory
print(chain.memory.chat_memory.messages)

print(chain.memory.load_memory_variables({})["history"])


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human:Hi, how are you?
AI Assistant:

> Finished chain.
 I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, how are you?
AI Assistant:  I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today?
Human:What's the weather like today?
AI Assistant:

> Finished chain.
 According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather?
[HumanMessage(content='Hi, how are you?'), AIMessage(content=' I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today?'), HumanMessage(content="What's the weather like today?"), AIMessage(content=' According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather?')]
Human: Hi, how are you?
AI Assistant:  I am doing well, thank you for asking. I am an AI assistant designed to assist and communicate with humans. How can I help you today?
Human: What's the weather like today?
AI Assistant:  According to my data, the weather today is partly cloudy with a high of 75 degrees Fahrenheit and a low of 60 degrees Fahrenheit. There is a 20% chance of rain in the afternoon. Is there anything else you would like to know about the weather?

memory = ConversationSummaryMemory(llm=OpenAI(temperature=0))
# memory keys
print(f"memory_key: {memory.memory_variables}")
# Save the ccontext of an interaction
memory.save_context({"input": "Hi, how are you?"}, {"output": "I am fine, thanks."})
# Load the summarized memory
memory.load_memory_variables({})

memory_key: ['history']

{'history': '\nThe human greets the AI and asks how it is doing. The AI responds that it is fine and thanks the human.'}

memory.chat_memory.messages

[HumanMessage(content='Hi, how are you?'),
 AIMessage(content='I am fine, thanks.')]

memory = ConversationKGMemory(llm=OpenAI(temperature=0))

template = """The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:

{history}

Conversation:
Human: {input}
AI:"""
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
chain = ConversationChain(
    llm=llm,
    prompt=PROMPT,
    verbose=True,
    memory=memory,
)

# 해당 chain의 input_key, output_key를 확인할 수 있다.
print(f"chain inpu_keys: {chain.input_keys}, chain output_keys: {chain.output_keys}")
# memory의 변수
print(f"memory memory_key: {memory.memory_variables}")

user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)

user_input = "My name is James and I'm helping Will. He's an engineer."
response = chain.predict(input=user_input)
print(response)

user_input = "What do you know about Will?"
response = chain.predict(input=user_input)
print(response)

user_input = "MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다."
response = chain.predict(input=user_input)
print(response)

user_input = "MrChaos의 친구는 누구인가?"
response = chain.predict(input=user_input)
print(response)

chain inpu_keys: ['input'], chain output_keys: ['response']
memory memory_key: ['history']


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:


Conversation:
Human: Hi, how are you?
AI:

> Finished chain.
 Hello! I am an AI programmed to assist with various tasks and provide information. I do not have the capability to feel emotions, so I am always functioning at optimal levels. How can I assist you today?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:


Conversation:
Human: My name is James and I'm helping Will. He's an engineer.
AI:

> Finished chain.
 Hello James, it's nice to meet you. I am an AI designed to assist with various tasks. Will sounds like a very interesting person. As an engineer, he likely has a strong understanding of math, science, and problem-solving. Is there anything specific you need help with?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:

On Will: Will is an engineer.

Conversation:
Human: What do you know about Will?
AI:

> Finished chain.
 Will is an engineer. He has a degree in mechanical engineering from the University of California, Berkeley. He currently works at a tech company in Silicon Valley. He is known for his innovative designs and problem-solving skills. He is also an avid hiker and enjoys spending time outdoors. Is there anything specific you would like to know about Will?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:

On James: James is helping Will.
On Will: Will is an engineer. Will has a degree in mechanical engineering. Will currently works at a tech company in Silicon Valley. Will is known for innovative designs. Will is known for problem-solving skills. Will enjoys hiking. Will enjoys spending time outdoors.

Conversation:
Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다.
AI:

> Finished chain.
 그렇군요. James와 Will은 친구이며, Will은 기계공학 학위를 가지고 있으며 현재 실리콘밸리의 기술 회사에서 일하고 있습니다. Will은 혁신적인 디자인으로 유명하며 문제 해결 능력도 뛰어납니다. 그리고 등산과 야외 활동을 즐기는 것으로 알고 있습니다. 그리고 MrChaos는 프로그래머이며 밝고 활달한 성격을 가지고 있습니다. 하지만 James와 Will의 친구는 아닌 것 같습니다.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know. The AI ONLY uses information contained in the "Relevant Information" section and does not hallucinate.

Relevant Information:

On MrChaos: MrChaos is a programmer. MrChaos has a bright and lively personality. MrChaos is a friend of James. MrChaos is a friend of Will.

Conversation:
Human: MrChaos의 친구는 누구인가?
AI:

> Finished chain.
 MrChaos의 친구는 James와 Will입니다.

chain.memory.chat_memory.messages

[HumanMessage(content='Hi, how are you?'),
 AIMessage(content=' Hello! I am an AI programmed to assist with various tasks and provide information. I do not have the capability to feel emotions, so I am always functioning at optimal levels. How can I assist you today?'),
 HumanMessage(content="My name is James and I'm helping Will. He's an engineer."),
 AIMessage(content=" Hello James, it's nice to meet you. I am an AI designed to assist with various tasks. Will sounds like a very interesting person. As an engineer, he likely has a strong understanding of math, science, and problem-solving. Is there anything specific you need help with?"),
 HumanMessage(content='What do you know about Will?'),
 AIMessage(content=' Will is an engineer. He has a degree in mechanical engineering from the University of California, Berkeley. He currently works at a tech company in Silicon Valley. He is known for his innovative designs and problem-solving skills. He is also an avid hiker and enjoys spending time outdoors. Is there anything specific you would like to know about Will?'),
 HumanMessage(content='MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다.'),
 AIMessage(content=' 그렇군요. James와 Will은 친구이며, Will은 기계공학 학위를 가지고 있으며 현재 실리콘밸리의 기술 회사에서 일하고 있습니다. Will은 혁신적인 디자인으로 유명하며 문제 해결 능력도 뛰어납니다. 그리고 등산과 야외 활동을 즐기는 것으로 알고 있습니다. 그리고 MrChaos는 프로그래머이며 밝고 활달한 성격을 가지고 있습니다. 하지만 James와 Will의 친구는 아닌 것 같습니다.'),
 HumanMessage(content='MrChaos의 친구는 누구인가?'),
 AIMessage(content=' MrChaos의 친구는 James와 Will입니다.')]

memory.load_memory_variables({"input":"mrchaos"})

{'history': 'On MrChaos: MrChaos is a programmer. MrChaos has a bright and lively personality. MrChaos has a friend James. MrChaos has a friend Will.'}

# pip install pygraphviz
from IPython.display import SVG, HTML
memory.kg.draw_graphviz()


style = "<style>svg{width:100% !important;height:100% !important;</style>"
display(HTML(style))
display(SVG("graph.svg"))

llm = OpenAI(temperature=0)

# Define Convesation Buffer Memory (for retaining all past messages)
conv_memory = ConversationBufferMemory(memory_key="chat_history_lines", input_key="input")

# Define Conversation Summary Memory (for retaining a summary of the conversation)
summary_memory = ConversationSummaryMemory(llm=llm,input_key="input")

# Combine the two memories
memory = CombinedMemory(memories=[conv_memory, summary_memory])

# Define the prompt template
_DEFAULT_TEMPLATE = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:
{history}


Current conversation:
{chat_history_lines}
Human:{input}
AI:"""

PROMPT = PromptTemplate(input_variables=["history", "chat_history_lines", "input"], template=_DEFAULT_TEMPLATE)

chain = ConversationChain(llm=llm, memory=memory, prompt=PROMPT, verbose=True)

user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)

user_input = "My name is James and I'm helping Will. He's an engineer."
response = chain.predict(input=user_input)
print(response)

user_input = "What do you know about Will?"
response = chain.predict(input=user_input)
print(response)

user_input = "MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다."
response = chain.predict(input=user_input)
print(response)

user_input = "MrChaos의 친구는 누구인가?"
response = chain.predict(input=user_input)
print(response)


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:


Current conversation:

Human:Hi, how are you?
AI:

> Finished chain.
 I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:

The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today.


Current conversation:
Human: Hi, how are you?
AI:  I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?
Human:My name is James and I'm helping Will. He's an engineer.
AI:

> Finished chain.
 Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can.


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:

The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with.


Current conversation:
Human: Hi, how are you?
AI:  I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?
Human: My name is James and I'm helping Will. He's an engineer.
AI:  Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can.
Human:What do you know about Will?
AI:

> Finished chain.
 I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:

The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with. The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him. The AI offers to provide more information about engineers and their work if needed.


Current conversation:
Human: Hi, how are you?
AI:  I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?
Human: My name is James and I'm helping Will. He's an engineer.
AI:  Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can.
Human: What do you know about Will?
AI:  I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work?
Human:MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다.
AI:

> Finished chain.
 I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.


Summary of conversation:

The human greets the AI and asks how it is doing. The AI responds that it is doing well and explains its purpose as an AI. It also asks the human how they are doing today. The human introduces themselves as James and mentions they are helping an engineer named Will. The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with. The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him. The AI offers to provide more information about engineers and their work if needed. The human then shares information about another programmer named MrChaos, describing him as a talented and sociable individual and a friend of James and Will. The AI offers to provide more information about MrChaos and his profession as a programmer.


Current conversation:
Human: Hi, how are you?
AI:  I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?
Human: My name is James and I'm helping Will. He's an engineer.
AI:  Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can.
Human: What do you know about Will?
AI:  I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work?
Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다.
AI:  I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer?
Human:MrChaos의 친구는 누구인가?
AI:

> Finished chain.
 I do not have any information about MrChaos' friends, but I can provide you with general information about programmers and their work if you would like.

print(memory.load_memory_variables({})['chat_history_lines'])

Human: Hi, how are you?
AI:  I am doing well, thank you for asking. I am an AI designed to assist and communicate with humans. I am constantly learning and improving my abilities through data and algorithms. How about you? How are you doing today?
Human: My name is James and I'm helping Will. He's an engineer.
AI:  Nice to meet you, James. It's great that you are helping Will. Engineers play a crucial role in creating and improving technology, which ultimately benefits society. Is there anything specific you or Will need assistance with? I am here to help in any way I can.
Human: What do you know about Will?
AI:  I do not have any specific information about Will, but I can tell you that engineers like him are highly skilled and knowledgeable in their field. They use their expertise to design and create innovative solutions to complex problems. Is there anything else you would like to know about engineers or their work?
Human: MrChaos는 프로그머이며 성격은 밝고 활달하며, 창의적이다 또한  James와 Will의 친구이기도 하다.
AI:  I do not have any information about MrChaos, but based on your description, he sounds like a talented and sociable individual. It's great that James and Will have a friend like him. Is there anything else you would like to know about MrChaos or his profession as a programmer?
Human: MrChaos의 친구는 누구인가?
AI:  I do not have any information about MrChaos' friends, but I can provide you with general information about programmers and their work if you would like.

print(memory.load_memory_variables({})['history'].replace(".","\n"))

The human greets the AI and asks how it is doing
 The AI responds that it is doing well and explains its purpose as an AI
 It also asks the human how they are doing today
 The human introduces themselves as James and mentions they are helping an engineer named Will
 The AI expresses admiration for engineers and offers its assistance with any tasks they may need help with
 The human then asks the AI what it knows about Will, to which the AI responds that it does not have specific information but acknowledges the expertise and skills of engineers like him
 The AI offers to provide more information about engineers and their work if needed
 The human then shares information about another programmer named MrChaos, describing him as a talented and sociable individual and a friend of James and Will
 The AI offers to provide more information about MrChaos and his profession as a programmer
 The human asks the AI about MrChaos' friends, to which the AI responds that it does not have any specific information but can provide general information about programmers and their work

session_id =str(uuid.uuid4())
memory = ZepMemory(
    session_id=session_id,
    url=os.getenv("ZEP_API_URL"),
    api_key=os.getenv("ZEP_API_KEY"),
    memory_key="chat_history",
)
llm = OpenAI(temperature=0)
template = """The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{chat_history}
Human:{input}
AI Assistant:"""
PROMPT = PromptTemplate(input_variables=["chat_history", "input"], template=template)
chain = ConversationChain(
    llm=llm,
    prompt=PROMPT,
    verbose=True,
    memory=memory)

user_input = "Hi, how are you?"
response = chain.predict(input=user_input)
print(response)

user_input = "What's the weather like today?"
response = chain.predict(input=user_input)
print(response)

print(chain.memory.chat_memory.messages)
print("\n\n\n")
print(chain.memory.load_memory_variables({})["chat_history"])


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:

Human:Hi, how are you?
AI Assistant:

> Finished chain.
 I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know?


> Entering new ConversationChain chain...
Prompt after formatting:
The following is a friendly conversation between a human and an AI.
The AI is talkative and provides lots of specific details from its context.
If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
Human: Hi, how are you?
AI:  I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know?
Human:What's the weather like today?
AI Assistant:

> Finished chain.
 According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location?
[HumanMessage(content='Hi, how are you?', additional_kwargs={'uuid': '177cc778-91c1-4bb5-929d-ba4b69fb0f3a', 'created_at': '2024-01-04T10:26:36.761632Z', 'token_count': 0, 'metadata': {'system': {'entities': []}}}), AIMessage(content=' I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know?', additional_kwargs={'uuid': '624b5f87-1606-43c1-8d2b-8c79ec5cdcc3', 'created_at': '2024-01-04T10:26:36.77683Z', 'token_count': 0, 'metadata': {'system': {'entities': [{'Label': 'CARDINAL', 'Matches': [{'End': 90, 'Start': 85, 'Text': '2.3.4'}], 'Name': '2.3.4'}]}}}), HumanMessage(content="What's the weather like today?", additional_kwargs={'uuid': 'dc7b7515-015a-4a32-95b9-a04ed8827909', 'created_at': '2024-01-04T10:26:37.516632Z', 'token_count': 0, 'metadata': None}), AIMessage(content=' According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location?', additional_kwargs={'uuid': 'af370fa5-1a95-4f71-97b3-47fe92efd0b9', 'created_at': '2024-01-04T10:26:37.531053Z', 'token_count': 0, 'metadata': None})]


Human: Hi, how are you?
AI:  I am functioning well, thank you for asking. My current operating system is version 2.3.4 and I have access to a vast database of information. Is there something specific you would like to know?
Human: What's the weather like today?
AI:  According to my weather data, the current temperature is 72 degrees Fahrenheit with partly cloudy skies. There is a 20% chance of rain later in the day. Would you like me to provide a more detailed forecast for your location?

from langchain.vectorstores.zep import ZepVectorStore, CollectionConfig
from langchain.embeddings import HuggingFaceEmbeddings

# https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v1
# sentence-transformers/distiluse-base-multilingual-cased-v1 모델의 embedding dimension은 512
# ZepVectorStore,CollectionConfig 사용예)  https://python.langchain.com/docs/integrations/vectorstores/zep

embedding_dimensions = 512
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased-v1")

text_loader = TextLoader(file_path="./data/나없이는존재하지않는세상_20240102.txt",encoding="utf-8")
docs = text_loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100))

# _와 같은 특수 기호 안됨
collection_name = f"MrChaos{uuid.uuid4().hex}"
config = CollectionConfig(
    name=collection_name,
    description="MrChaos의 지식",
    metadata={"author": "MrChaos","e-mail":"mrchaos@fxrobot.kr"},
    embedding_dimensions=embedding_dimensions, # openai, huggingface등의 embedding dimension, 사용하려는 embedding에 따라 달라짐
    #is_auto_embedded=True, # 지연 시간이 짧은 임베더를 사용하여 Zep이 문서를 임베드하도록 할 것입니다.
    is_auto_embedded=False,
)

vectordb = ZepVectorStore.from_documents(
                            documents=docs,
                            collection_name=collection_name,
                            config=config,
                            api_url=os.getenv("ZEP_API_URL"),
                            api_key=os.getenv("ZEP_API_KEY"),
                            #embedding=None, #지연 시간이 짧은 임베더를 사용하여 Zep이 문서를 임베드하도록 할 것입니다.,is_auto_embedded=True 인경우
                            embedding=embedding
            )

query = "양자론과 관계론적 관계"
docs_scores = await vectordb.asimilarity_search_with_relevance_scores(query, k=3)

docs_scores

[(Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7650395265424814),
 (Document(page_content="우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것\n\n* 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예.\n\n180\n\n\n은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다.\n\n물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다.\n\n여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요?\n\n우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠.\n\n나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에\n\n181\n\n\n대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다.\n\n나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7460570726139523),
 (Document(page_content='2 N. Bohr. The Genesis of Quantum Mechanics, in Essays 1958-1962 on Atomic Physics and Human Knowledge, Wiley, New York, 1963. pp.\n\n3 W. Heisenberg, Über quantentheoretische Umdeutung kinematischer und mechanischer Beziehungen. (Zeitschrift für Physik), 33, 1925, pp. 879-93.\n\nM. Born & P. Jordan, Zur Quantenmechanik, (Zeitschrift für Physik), 34. 1925, pp S58-8S\n\n5 P.A.M. Dirac, The Fundamental Equations of Quantum Mechanics. (Proceedings of the Royal Society A), 109, 752, 1925, pp. 62-53\n\n6 디랙은 하이젠베르크의 표가 비가환 변수라는 것을 깨닫고, 거기서 예전에 고등 역학 강좌에서 접한 푸아송 괄호를 떠올린 것이었다. 73세의 디랙이 직접 들려주는 그 운명적인 시절에 대한 유쾌한 이야기는 hopsitwww.youtube.com/watchi-wYsSITLZ24 에서 확인할 수 있다.\n\n7 M. Bom, My Life: Recollections of a Nobel Laureate, Tavior & Francis London. 1978, p. 218.\n\n8 W. Pauli. Über das Wasserstoffspektrum vom Standpunkt der neuen Quantenmechanik, (Zeitschrift für Physik), 36, 1926, pp. 336-63, 24', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7199667747554138)]

# print results
for d, s in docs_scores:
    print(d.page_content, " -> ", s, "\n====\n")

52 피히테, 셀링 헤겔의 의미에서.

53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.

54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.

55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.

56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.

57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.

248


58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.

59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.

60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.  ->  0.7650395265424814 
====

우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것

* 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예.

180


은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다.

물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다.

여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요?

우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠.

나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에

181


대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다.

나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다.  ->  0.7460570726139523 
====

2 N. Bohr. The Genesis of Quantum Mechanics, in Essays 1958-1962 on Atomic Physics and Human Knowledge, Wiley, New York, 1963. pp.

3 W. Heisenberg, Über quantentheoretische Umdeutung kinematischer und mechanischer Beziehungen. (Zeitschrift für Physik), 33, 1925, pp. 879-93.

M. Born & P. Jordan, Zur Quantenmechanik, (Zeitschrift für Physik), 34. 1925, pp S58-8S

5 P.A.M. Dirac, The Fundamental Equations of Quantum Mechanics. (Proceedings of the Royal Society A), 109, 752, 1925, pp. 62-53

6 디랙은 하이젠베르크의 표가 비가환 변수라는 것을 깨닫고, 거기서 예전에 고등 역학 강좌에서 접한 푸아송 괄호를 떠올린 것이었다. 73세의 디랙이 직접 들려주는 그 운명적인 시절에 대한 유쾌한 이야기는 hopsitwww.youtube.com/watchi-wYsSITLZ24 에서 확인할 수 있다.

7 M. Bom, My Life: Recollections of a Nobel Laureate, Tavior & Francis London. 1978, p. 218.

8 W. Pauli. Über das Wasserstoffspektrum vom Standpunkt der neuen Quantenmechanik, (Zeitschrift für Physik), 36, 1926, pp. 336-63, 24  ->  0.7199667747554138 
====

docs = await vectordb.asimilarity_search_with_score(query, search_type="mmr", k=3)
docs
# for d in docs:
#     print(d.page_content, "\n====\n")

[(Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7650395265424814),
 (Document(page_content="240\n\n\n임없이 발생하는 관계적 존재다. 사물의 속성은 대상안에 있는 것이 아니라 다른 사물과의 상호작용 속에서만 존재하며, 상호작용하는 대상이 달라지면 속성도달라질 수 있는 두 대상 사이의 관계다. 한마디로 이 세계는 확정된 속성을 가진 대상들의 집합이 아닌 관계의 그물망이다. 이것이 로벨리가 말하는 양자 이론이밝혀준 실재의 모습이다. 이제 양자 이론은 하이젠베르크의 기대와 달리 양자적 대상이 관찰을 통해 우리(혹은 '관찰자)에게 어떻게 나타나는지를 기술하는 것이아니라, 두 물리적 대상이 서로에게 나타나는 방식 곧관계를 기술한다.\n\n또한 로벨리는 양자 이론의 확률을 정보와 연결 짓고, 정보 역시 두 대상 사이의 상관관계의 산물로 본다.양자 이론은 대상을 관찰하지 않으면 그것이 어디에있는지 말해주지 않다가 대상을 관찰하면 어떤 지점에있을 확률을 말해주는데, 이는 관찰이라는 두 대상 간의 상호작용이 만들어낸 정보의 변화라는 것이다. 두개의 동전을 자유롭게 던지느냐 아니면 특정한 방식으로 묶어 던지느냐에 따라 일어날 사건에 관한 정보가달라지고 특정 사건이 일어날 확률도 달라지는데, 이는 두 개의 동전 사이의 상호 관계가 달라진 결과인 것처럼 말이다. 로벨리는 이런 정보의 관점에서 양자 이론을 새롭게 이해한다. 하이젠베르크의 불확정성 원리\n\n241\n\n\n는 정보의 유한성에 바탕해서 설명하고, 물리적 변수간의 비가환성은 대상과의 새로운 상호작용이 항상 새로운 관련 정보를 주지만 동시에 기존의 관련 정보를 잃게 만든다는 관점에서 설명한다.\n\n로벨리에게 입자성은 물질과 더불어 양자 현상이 아주 작은 세계에서는 입자적 형태로 나타남을 의미한다. 즉 불연속성을 의미하는 것이지, 세계가 입자와 같은 실체로 이루어졌음을 말하는 것이 아니다. 또한 로벨리는 이미 그의 저서인 《시간은 흐르지 않는다》에서 아주 작은 세계에서는 시간과 공간조차도 입자성을 띤다고 주장하였는데, 이는 입자성이 매우 일반적인 것임을 함축한다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7020414781065081),
 (Document(page_content="우리는 나가르주나에게서 양자의 관계성을 생각할수 있는 강력한 개념적 도구를 얻을 수 있습니다. 그것\n\n* 나가르주나 논증의 논리적 형식인 '사구부정'의 한 예.\n\n180\n\n\n은 자립적인 본질이 없어도 상호의존성을 생각할 수있다는 것입니다. 사실, 상호의존성을 생각하려면 (이것이 나가르주나의 핵심 주장인데요) 자립적 본질 따위는잊어버려야 합니다.\n\n물리학은 오랜 시간에 걸쳐 물질, 분자, 원자, 장, 소립자 등 '궁극적 실체'를 추구해왔는데..… 양자장 이론과 일반상대성이론의 관계적 복잡성이라는 암초에 걸려 난파되었습니다.\n\n여기에서 빠져나올 수 있는 개념적 도구를, 고대 인도의 사상가가 우리에게 줄 수 있을까요?\n\n우리는 항상 다른 사람으로부터, 우리와 다른 이들로부터 배웁니다. 수천 년에 걸쳐 끊임없이 대화를 해왔어도, 동양과 서양은 여전히 서로에게 할 말이 있을 수있습니다. 최고의 결혼 생활이 그렇듯이 말이죠.\n\n나가르주나 사상의 매력은 현대 물리학의 문제를 넘어섭니다. 그의 관점에는 어딘가 아찔한 구석이 있습니다. 그것은 고전적 철학이든 현대의 철학이든 최고의 서양 철학과 공명합니다. 흄의 급진적 회의주의와도, 잘못 제기된 질문의 가면을 벗기는 비트겐슈타인의 사상과도 공명합니다. 그러나 나가르주나는, 많은철학들이 잘못된 출발점을 가정하는 바람에 결국에는설득력이 없게 되는 그런 함정에 빠지지는 않는 것 같습니다. 그는 실재와 그것의 복잡성과 이해 가능성에\n\n181\n\n\n대해 이야기하지만, 궁극적인 토대를 찾겠다는 개념적함정에 우리가 빠지지 않도록 막아줍니다.\n\n나가르주나의 주장은 형이상학적으로 과도하지 않으며, 냉철합니다. 그는 모든 것의 궁극적인 토대가 무엇인가에 대한 질문은, 그저 말이 되지 않는 질문일 수있음을 받아들입니다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
  0.7460570726139523)]

retriever = vectordb.as_retriever(
    search_type="mmr",
        search_kwargs={
        "k": 2,
        "fetch_k": 4,
        },
)

retriever.get_relevant_documents(query)

[Document(page_content='52 피히테, 셀링 헤겔의 의미에서.\n\n53 양자역학의 관계론적 해석에 대한 전문적인 소개는 《Relational QuantumMechanics》, The Stanford Encyclopedia of Philosophy, E.N. Zalta(ed.)를 참고할 수 있다. plato.stanford.edu/archives/win2019/entries/qm-relational/.\n\n54 N. Bohr, The Philosophical Writings of Niels Bohr, Ox Bow Press, Woodbridge, vol. IV, 1998, p. 111.\n\n55 여기서 내가 말하는 속성은 가변적인 속성이다. 즉, 위상공간에서 함수로기술되는 속성이다. 입자의 비상대론적 질량과 같은 불변 속성이 아니다.\n\n56 어떤 사건이 돌에 작용하여 돌을 변화시킨다면 그 사건은 돌에 대해 실재한다. 만약 어떤 사건이 발생했는데 돌에 대해 간섭현상이 일어나지 않고다른 곳에서 일어난다면 그 사건은 돌에 대해 실재하지 않는다.\n\n57 A. Aguirre, Cosmological Koans: A Journey to the Heart of Physical Reality, W.W. Norton & Co, New York, 2019.\n\n248\n\n\n58 E. Schrödinger, Nature and the Greeks and Science and Humanism, 앞의 책.\n\n59 사건 el이 A와는 관계가 있지만, B와는 관계가 없다\'는 것은 다음과 같은것을 의미한다. el은 A에 작용하지만, el이 만약 B에 작용했더라면 불가능한, B에 작용할 수 있는 사건 e2가 존재한다.\n\n60 파동의 관계적 특성을 최초로 깨달은 사람은 1950년대 중반 미국의 젊은 박사과정 학생 휴 에버렛 3세 Hugh Everett Ill였다. “Relative State"Formulation of Quantum Mechanics라는 제목의 그의 박사 학위 논문은양자를 둘러싼 논의에 큰 영향을 미쳤다.', metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'}),
 Document(page_content="240\n\n\n임없이 발생하는 관계적 존재다. 사물의 속성은 대상안에 있는 것이 아니라 다른 사물과의 상호작용 속에서만 존재하며, 상호작용하는 대상이 달라지면 속성도달라질 수 있는 두 대상 사이의 관계다. 한마디로 이 세계는 확정된 속성을 가진 대상들의 집합이 아닌 관계의 그물망이다. 이것이 로벨리가 말하는 양자 이론이밝혀준 실재의 모습이다. 이제 양자 이론은 하이젠베르크의 기대와 달리 양자적 대상이 관찰을 통해 우리(혹은 '관찰자)에게 어떻게 나타나는지를 기술하는 것이아니라, 두 물리적 대상이 서로에게 나타나는 방식 곧관계를 기술한다.\n\n또한 로벨리는 양자 이론의 확률을 정보와 연결 짓고, 정보 역시 두 대상 사이의 상관관계의 산물로 본다.양자 이론은 대상을 관찰하지 않으면 그것이 어디에있는지 말해주지 않다가 대상을 관찰하면 어떤 지점에있을 확률을 말해주는데, 이는 관찰이라는 두 대상 간의 상호작용이 만들어낸 정보의 변화라는 것이다. 두개의 동전을 자유롭게 던지느냐 아니면 특정한 방식으로 묶어 던지느냐에 따라 일어날 사건에 관한 정보가달라지고 특정 사건이 일어날 확률도 달라지는데, 이는 두 개의 동전 사이의 상호 관계가 달라진 결과인 것처럼 말이다. 로벨리는 이런 정보의 관점에서 양자 이론을 새롭게 이해한다. 하이젠베르크의 불확정성 원리\n\n241\n\n\n는 정보의 유한성에 바탕해서 설명하고, 물리적 변수간의 비가환성은 대상과의 새로운 상호작용이 항상 새로운 관련 정보를 주지만 동시에 기존의 관련 정보를 잃게 만든다는 관점에서 설명한다.\n\n로벨리에게 입자성은 물질과 더불어 양자 현상이 아주 작은 세계에서는 입자적 형태로 나타남을 의미한다. 즉 불연속성을 의미하는 것이지, 세계가 입자와 같은 실체로 이루어졌음을 말하는 것이 아니다. 또한 로벨리는 이미 그의 저서인 《시간은 흐르지 않는다》에서 아주 작은 세계에서는 시간과 공간조차도 입자성을 띤다고 주장하였는데, 이는 입자성이 매우 일반적인 것임을 함축한다.", metadata={'source': './data/나없이는존재하지않는세상_20240102.txt'})]

"""Utility functions and constants.

I am having some problems caching the memory and the retrieval.
When I decorate for caching, I get streamit init errors.

`UnstructuredWordDocumentLoader`을 사용하기 위해
`pip install docx2txt` 필요
"""
import logging
import pathlib
from typing import Any

from langchain.document_loaders import (
    PyPDFLoader,
    TextLoader,
    UnstructuredEPubLoader,
    UnstructuredWordDocumentLoader,
)

from langchain.memory import ConversationBufferMemory
from langchain.schema import Document

logging.basicConfig(level=logging.INFO)

def init_memory():
    """Initialize the memory. for contextual conversation.

    We are caching this, so it won't be deleted
    every time, we restart the server.
    """
    return ConversationBufferMemory(
        memory_key='chat_history',
        return_messages=True,
        output_key='answer'
    )

MEMORY = init_memory()

class EpubReader(UnstructuredEPubLoader):
    """Reads the epub file."""

    def __init__(self, file_path: str | list[str] , **unstructured_kwargs: Any):
        """Initialize the epub reader."""
        # strategy="fast" 가 있는 경우 에러남
        super().__init__(file_path=file_path, **unstructured_kwargs, mode="elements") #, strategy="fast")


class DocumentLoaderException(Exception):
    """Document loader exception."""

    pass


class DocumentLoader(object):
    """Loads in a document with a supported extension."""

    supproted_extensions = {
        ".pdf": PyPDFLoader,
        ".txt": TextLoader,
        ".epub": EpubReader,
        ".docx": UnstructuredWordDocumentLoader,
        ".doc": UnstructuredWordDocumentLoader,
    }

def load_document(temp_filepath: str)  -> list[Document]:
    """Load a file and return it as a list of documents.

    Doesn't handle a lot of errors at the moment.
    """

    ext = pathlib.Path(temp_filepath).suffix
    loader = DocumentLoader.supproted_extensions.get(ext)
    if not loader:
        raise DocumentLoaderException(f"Unsupported file extension: {ext}, cannot load this type of file")

    loaded = loader(temp_filepath)
    docs = loaded.load()
    for idx,doc in enumerate(docs):
        logging.info(doc.metadata)
        if idx >= 4 :
            break
    return docs

if __name__ == "__main__":
    print("=============Load PDF=====================")
    load_document('data/Introduction_to_Entropy_and_Gini-Index.pdf')
    print("=============Load EPUB=====================")
    load_document('data/Julia for Data Analysis.epub')
    print("=============Load DOCX=====================")
    load_document('data/test.docx')
    print("=============Load TXT=====================")
    load_document('data/나없이는존재하지않는세상_20240102.txt')

Building a Chatbot like ChatGPT¶

Embedding¶

Vector Storage¶

Vector Indexing¶

Vector Libraries¶

Vector Database¶

Load the Database from disk, and create the chain¶

Loading and retrieving in Langchain¶

Document Loaders¶

Retrievers in LangChain¶

kNN retriever¶

PubMed 검색자¶

Custom 리트리버¶

Implementing a chatbot¶

Coversation buffers¶

Remembering conversation summaries¶

Storing knowledge graph¶

Combining several memorymechanisms¶

Long-term persistance using Zep¶

Zep의 ZepVectorStore 사용¶

Chatbot 소스¶

utils.py¶

chat_with_documents.py¶

app.py¶

.env¶

	cat	dog	computer	animal
cat	0.000000	0.136315	0.165348	0.135650
dog	0.136315	0.000000	0.168759	0.114460
computer	0.165348	0.168759	0.000000	0.174758
animal	0.135650	0.114460	0.174758	0.000000