apply() nest_asyncio.
Example Databases
Chroma
async_openai_client
async_openai_client ()
openai_client
openai_client ()
check_openai_key
check_openai_key ()
CachedEmbeddingFunction
CachedEmbeddingFunction (model='text-embedding-3-small')
*A protocol for embedding functions. To implement a new embedding function, you need to implement the following methods at minimum: - call
For future compatibility, it is strongly recommended to also implement: - init - name - build_from_config - get_config*
OpenAIEmbed
OpenAIEmbed (model='text-embedding-3-small')
Initialize self. See help(type(self)) for accurate signature.
openai_embed
openai_embed (text, model='text-embedding-3-small')
= CachedEmbeddingFunction()
c = c(['hello world'])
x x
[array([-0.00676333, -0.03919632, 0.03417581, ..., -0.01964353,
-0.01937133, -0.02247135])]
ChromaClient
ChromaClient (persist_path=None, embed_model='text-embedding-3-small')
*Initialize ChromaDB client with a collection name.
Args: persist_path: Path to the directory to persist the database to embed_model: Model to use for embedding*
Tests
# Test ChromaClient
= ChromaClient() client
# Start with a clean state
client.reset()
# Test collection management
"test_collection")
client.add_collection(assert "test_collection" in client.list_collections(), f"Collection creation failed, {client.list_collections()}"
# Test document operations
= [
test_docs
{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'}
},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'}
},
{'id': 'doc3',
'text': 'The weather is sunny today',
'metadata': {'type': 'weather'}
}
]
# Test upsert
"test_collection", test_docs) client.upsert(
[{'id': 'doc1', 'text': 'The quick brown fox jumps over the lazy dog'},
{'id': 'doc2', 'text': 'A quick brown fox jumped over the lazy dogs'},
{'id': 'doc3', 'text': 'The weather is sunny today'}]
# Test query
= client.query("test_collection", "fox jumping", k=2)
results
assert len(results) == 2, "Query should return 2 results"
assert all('fox' in doc['text'] for doc in results), "Query results should contain relevant documents"
# query with metadata filtering
= client.query("test_collection", "fox jumping",where={'type':'pangram'},k=2)
results assert len(results) == 1, results
assert results[0]['text'] == 'The quick brown fox jumps over the lazy dog'
# query with full text search
= client.query("test_collection", "sunny",k=2,where_document={"$contains":"fox"})
results
resultsassert len(results) == 2, results
assert all('fox' in doc['text'] for doc in results), "Query results should contain relevant documents"
# query with both filters
= client.query("test_collection", "sunny",k=2,where_document={"$contains":"fox"},where={'type':{'$in':['weather','variant']}})
results results
[{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'},
'distance': 1.513525366783142}]
"test_collection",["doc2","doc1"]) client.get(
[{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'}},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'}}]
list("test_collection",k=3) client.
[{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'},
'embedding': array([-0.02083762, -0.01689642, -0.00453628, ..., 0.01019769,
-0.01523149, 0.02468777])},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'},
'embedding': array([-1.61350556e-02, 1.02180371e-03, -6.04663728e-05, ...,
8.89423583e-03, -2.04253849e-02, 1.07899625e-02])},
{'id': 'doc3',
'text': 'The weather is sunny today',
'metadata': {'type': 'weather'},
'embedding': array([ 0.01581731, -0.03885713, 0.00716233, ..., -0.02583253,
0.01166436, 0.0264344 ])}]
# Test get
= client.get("test_collection", ["doc1"])
doc_get
assert doc_get[0]['id'] == 'doc1', "Get should return correct document"
assert doc_get[0]['text'] == test_docs[0]['text'], "Document text should match"
# Test list
= client.list("test_collection", k=2)
collection_peek assert len(collection_peek) == 2, "List should return 2 documents"
# Test query
= client.query("test_collection", "fox jumping", k=2)
results assert len(results) == 2, "Query should return 2 results"
assert all('fox' in result['text'] for result in results), "Query results should contain relevant documents"
assert all(isinstance(result['distance'], float) for result in results), "Each result should have a distance score"
assert all(isinstance(result['metadata'], dict) for result in results), "Each result should have metadata"
# Test delete
"test_collection", ["doc1"])
client.delete(= client.list("test_collection")
remaining_docs assert "doc1" not in [doc['id'] for doc in remaining_docs], "Document should be deleted"
# Test collection deletion
"test_collection")
client.delete_collection(assert "test_collection" not in client.list_collections(), "Collection deletion failed"
# Test error cases
"test_collection")
client.add_collection(with pytest.raises(ValueError,match="Collection test_collection already exists"):
"test_collection")
client.add_collection(
"test_collection", exists_ok=True)
client.add_collection("test_collection") client.delete_collection(
SQL
We show here how to create and use an in memory SQL db and configure tables using SQLModel Objects
temp_sql_db
temp_sql_db (**kwargs)
creates and sqlalchemy engine to a shared memory sqlite DB. Kwargs are passed to to sqlalchemy’s create_engine function.
= temp_sql_db(echo=False)
engine
SQLModel.metadata.clear()
class Hero(SQLModel,table=True,extend_existing=True):
id: Optional[int] = Field(default=None,primary_key=True)
str
name: str
secret_name: int] = None
age: Optional[
SQLModel.metadata.create_all(engine)
def merge_heros(heros:List[Hero]):
with Session(engine) as session:
for hero in heros:
session.merge(hero)
session.commit()
merge_heros(id=1,name="Deadpond", secret_name="Dive"),
[Hero(id=2,name="Spider-Boy", secret_name="Pedro"),
Hero(id=3,name="Rusty-Man", secret_name="Tommy")])
Hero(
def get_hero(name:str):
with Session(engine) as session:
= select(Hero).where(Hero.name == name)
stmt = session.exec(stmt).one()
result return result
"Deadpond") get_hero(
Hero(name='Deadpond', id=1, age=None, secret_name='Dive')