Skip to content

Tutorial on creating a vector database with openml objects

  • How would you use the API to create a vector database with openml objects (datasets, flows etc)
1
2
3
4
5
6
from __future__ import annotations
from langchain.globals import set_llm_cache
from langchain_community.cache import SQLiteCache
import os
import sys
import chromadb
1
2
from backend.modules.utils import *
from backend.modules.rag_llm import *
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
config = load_config_and_device("../../../backend/config.json")
config["persist_dir"] = "../../data/doc_examples/chroma_db/"
config["data_dir"] = "../../data/doc_examples/"
config["type_of_data"] = "dataset"
config["training"] = False
config["testing_flag"] = True  # set this to false while training, this is for demo
config["test_subset"] = True  # set this to false while training, this is for demo

# load the persistent database using ChromaDB
client = chromadb.PersistentClient(path=config["persist_dir"])
print(config)
1
2
3
4
5
qa_dataset_handler = QASetup(
    config=config,
    data_type="dataset",
    client=client,
)
1
qa_dataset, _ = qa_dataset_handler.setup_vector_db_and_qa()