💬Chat on PDF

Python Implementation

Importing Components

from genai_stack.stack.stack import Stack
from genai_stack.etl.langchain import LangchainETL
from genai_stack.embedding.langchain import LangchainEmbedding
from genai_stack.vectordb.chromadb import ChromaDB
from genai_stack.prompt_engine.engine import PromptEngine
from genai_stack.model.gpt3_5 import OpenAIGpt35Model
from genai_stack.retriever.langchain import LangChainRetriever
from genai_stack.memory.langchain import ConversationBufferMemory

Initializing Stack Components

ETL

etl.json

{
    "name": "PyPDFLoader",
    "fields": {
        "file_path": "/path/to/sample.pdf"
    }
}
etl = LangchainETL.from_config_file(config_file_path="/path/to/etl.json")

Embeddings

embeddings.json

{
    "name": "HuggingFaceEmbeddings",
    "fields": {
        "model_name": "sentence-transformers/all-mpnet-base-v2",
        "model_kwargs": { "device": "cpu" },
        "encode_kwargs": { "normalize_embeddings": false }
    }
}
embedding = LangchainEmbedding.from_config_file(config_file_path="/path/to/embeddings.json")

VectorDB

chromadb = ChromaDB.from_kwargs()

Model

llm = OpenAIGpt35Model.from_kwargs(parameters={"openai_api_key": "your-api-key"})

Prompt Engine

prompt_engine.json

{
    "should_validate": false
}
prompt_engine = PromptEngine.from_config_file(config_file_path="/path/to/prompt_engine.json")

Retriever

retriever = LangChainRetriever.from_kwargs()

Memory

memory = ConversationBufferMemory.from_kwargs()

Initializing Stack

Stack

Stack(
    etl=etl,
    embedding=embedding,
    vectordb=chromadb,
    model=llm,
    prompt_engine=prompt_engine,
    retriever=retriever,
    memory=memory
)

Performing ETL operations

run() will execute Extract, Transform and Load operations.

etl.run()

Now you can start asking your queries.

response = retriever.retrieve("your query")
print(response)

Last updated