Example Databases

Chroma

nest_asyncio.apply()

source

async_openai_client

 async_openai_client ()

source

openai_client

 openai_client ()

source

check_openai_key

 check_openai_key ()

source

CachedEmbeddingFunction

 CachedEmbeddingFunction (model='text-embedding-3-small')

*A protocol for embedding functions. To implement a new embedding function, you need to implement the following methods at minimum: - call

For future compatibility, it is strongly recommended to also implement: - init - name - build_from_config - get_config*

source

OpenAIEmbed

 OpenAIEmbed (model='text-embedding-3-small')

Initialize self. See help(type(self)) for accurate signature.

source

openai_embed

 openai_embed (text, model='text-embedding-3-small')

c = CachedEmbeddingFunction()
x = c(['hello world'])
x

[array([-0.00676333, -0.03919632,  0.03417581, ..., -0.01964353,
        -0.01937133, -0.02247135])]

source

ChromaClient

 ChromaClient (persist_path=None, embed_model='text-embedding-3-small')

*Initialize ChromaDB client with a collection name.

Args: persist_path: Path to the directory to persist the database to embed_model: Model to use for embedding*

Tests

# Test ChromaClient
client = ChromaClient()

client.reset()  # Start with a clean state

# Test collection management
client.add_collection("test_collection")
assert "test_collection" in client.list_collections(), f"Collection creation failed, {client.list_collections()}"

# Test document operations
test_docs = [
    {
        'id': 'doc1',
        'text': 'The quick brown fox jumps over the lazy dog',
        'metadata': {'type': 'pangram'}
    },
    {
        'id': 'doc2',
        'text': 'A quick brown fox jumped over the lazy dogs',
        'metadata': {'type': 'variant'}
    },
    {
        'id': 'doc3',
        'text': 'The weather is sunny today',
        'metadata': {'type': 'weather'}
    }
]

# Test upsert
client.upsert("test_collection", test_docs)

[{'id': 'doc1', 'text': 'The quick brown fox jumps over the lazy dog'},
 {'id': 'doc2', 'text': 'A quick brown fox jumped over the lazy dogs'},
 {'id': 'doc3', 'text': 'The weather is sunny today'}]

# Test query
results = client.query("test_collection", "fox jumping", k=2)

assert len(results) == 2, "Query should return 2 results"
assert all('fox' in doc['text'] for doc in results), "Query results should contain relevant documents"

# query with metadata filtering
results = client.query("test_collection", "fox jumping",where={'type':'pangram'},k=2)
assert len(results) == 1, results
assert results[0]['text'] == 'The quick brown fox jumps over the lazy dog'

# query with full text search
results = client.query("test_collection", "sunny",k=2,where_document={"$contains":"fox"})
results
assert len(results) == 2, results
assert all('fox' in doc['text'] for doc in results), "Query results should contain relevant documents"

# query with both filters
results = client.query("test_collection", "sunny",k=2,where_document={"$contains":"fox"},where={'type':{'$in':['weather','variant']}})
results

[{'id': 'doc2',
  'text': 'A quick brown fox jumped over the lazy dogs',
  'metadata': {'type': 'variant'},
  'distance': 1.513525366783142}]

client.get("test_collection",["doc2","doc1"])

[{'id': 'doc1',
  'text': 'The quick brown fox jumps over the lazy dog',
  'metadata': {'type': 'pangram'}},
 {'id': 'doc2',
  'text': 'A quick brown fox jumped over the lazy dogs',
  'metadata': {'type': 'variant'}}]

client.list("test_collection",k=3)

[{'id': 'doc1',
  'text': 'The quick brown fox jumps over the lazy dog',
  'metadata': {'type': 'pangram'},
  'embedding': array([-0.02083762, -0.01689642, -0.00453628, ...,  0.01019769,
         -0.01523149,  0.02468777])},
 {'id': 'doc2',
  'text': 'A quick brown fox jumped over the lazy dogs',
  'metadata': {'type': 'variant'},
  'embedding': array([-1.61350556e-02,  1.02180371e-03, -6.04663728e-05, ...,
          8.89423583e-03, -2.04253849e-02,  1.07899625e-02])},
 {'id': 'doc3',
  'text': 'The weather is sunny today',
  'metadata': {'type': 'weather'},
  'embedding': array([ 0.01581731, -0.03885713,  0.00716233, ..., -0.02583253,
          0.01166436,  0.0264344 ])}]

# Test get
doc_get = client.get("test_collection", ["doc1"])

assert doc_get[0]['id'] == 'doc1', "Get should return correct document"
assert doc_get[0]['text'] == test_docs[0]['text'], "Document text should match"

# Test list
collection_peek = client.list("test_collection", k=2)
assert len(collection_peek) == 2, "List should return 2 documents"

# Test query
results = client.query("test_collection", "fox jumping", k=2)
assert len(results) == 2, "Query should return 2 results"
assert all('fox' in result['text'] for result in results), "Query results should contain relevant documents"
assert all(isinstance(result['distance'], float) for result in results), "Each result should have a distance score"
assert all(isinstance(result['metadata'], dict) for result in results), "Each result should have metadata"

# Test delete
client.delete("test_collection", ["doc1"])
remaining_docs = client.list("test_collection")
assert "doc1" not in [doc['id'] for doc in remaining_docs], "Document should be deleted"

# Test collection deletion
client.delete_collection("test_collection")
assert "test_collection" not in client.list_collections(), "Collection deletion failed"

# Test error cases
client.add_collection("test_collection")
with pytest.raises(ValueError,match="Collection test_collection already exists"):
    client.add_collection("test_collection")

client.add_collection("test_collection", exists_ok=True)
client.delete_collection("test_collection")

SQL

We show here how to create and use an in memory SQL db and configure tables using SQLModel Objects

source

temp_sql_db

 temp_sql_db (**kwargs)

creates and sqlalchemy engine to a shared memory sqlite DB. Kwargs are passed to to sqlalchemy’s create_engine function.

engine = temp_sql_db(echo=False)

SQLModel.metadata.clear()

class Hero(SQLModel,table=True,extend_existing=True):
    id: Optional[int] = Field(default=None,primary_key=True)
    name: str
    secret_name: str
    age: Optional[int] = None

SQLModel.metadata.create_all(engine)

def merge_heros(heros:List[Hero]):
    with Session(engine) as session:
        for hero in heros:
            session.merge(hero)
        session.commit()

merge_heros(
    [Hero(id=1,name="Deadpond", secret_name="Dive"),
    Hero(id=2,name="Spider-Boy", secret_name="Pedro"),
    Hero(id=3,name="Rusty-Man", secret_name="Tommy")])

def get_hero(name:str):
    with Session(engine) as session:
        stmt = select(Hero).where(Hero.name == name)
        result = session.exec(stmt).one()
        return result

get_hero("Deadpond")

Hero(name='Deadpond', id=1, age=None, secret_name='Dive')

Chroma

async_openai_client

openai_client

check_openai_key

CachedEmbeddingFunction

OpenAIEmbed

openai_embed

ChromaClient

Tests

SQL

temp_sql_db

Export