from stringdale.db import ChromaClient
DBs
Here are examples of wrapping Vector and Relational DBs for use in agents.
Chroma DB
ChromaClient
Signature:
ChromaClient(persist_path=None, embed_model='text-embedding-3-small')
None
Parameter | Type | Default | Description |
---|
add_collection
Add a collection to the database
Signature:
ChromaClient.add_collection(self, name, distance: Literal['l2', 'ip', 'cosine'] = 'l2', metadata=None, exists_ok=False)
Parameter | Type | Default | Description |
---|---|---|---|
name | None | None | Name of the collection to add |
distance | typing.Literal[‘l2’, ‘ip’, ‘cosine’] | l2 | Distance metric to use, one of ‘l2’,‘ip’,‘cosine’. Default is ‘l2’ |
metadata | None | None | Metadata to add to the collection |
exists_ok | None | False | If True, do not raise an error if the collection already exists |
delete
Delete a list of documents from a collection
Signature:
ChromaClient.delete(self, collection_name: str, ids: List[str])
Parameter | Type | Default | Description |
---|---|---|---|
collection_name | <class ‘str’> | None | Name of the collection to delete from |
ids | typing.List[str] | None | List of ids to delete |
delete_collection
Delete a collection from the database
Signature:
ChromaClient.delete_collection(self, name)
Parameter | Type | Default | Description |
---|---|---|---|
name | None | None | Name of the collection to delete |
embed_texts
Embed a list of texts
Signature:
ChromaClient.embed_texts(self, texts: List[str])
Parameter | Type | Default | Description |
---|---|---|---|
texts | typing.List[str] | None | List of texts to embed |
get
Get a list of documents from a collection
Signature:
ChromaClient.get(self, collection_name: str, ids: List[str])
Parameter | Type | Default | Description |
---|---|---|---|
collection_name | <class ‘str’> | None | Name of the collection to get from |
ids | typing.List[str] | None | List of ids to get |
list
Get a list of documents from a collection
Signature:
ChromaClient.list(self, collection_name: str, k: int = None)
Parameter | Type | Default | Description |
---|---|---|---|
collection_name | <class ‘str’> | None | Name of the collection to list |
k | <class ‘int’> | None | Number of results to return |
list_collections
List all collections in the database
Signature:
ChromaClient.list_collections(self)
Parameter | Type | Default | Description |
---|
query
Query a collection for documents similar to a query
Signature:
ChromaClient.query(self, collection_name: str, query: str, k: int = 10, threshold: float = None, where: Dict[str, Any] = None, where_document: Dict[str, Any] = None)
Parameter | Type | Default | Description |
---|---|---|---|
collection_name | <class ‘str’> | None | Name of the collection to query |
query | <class ‘str’> | None | Query to search for |
k | <class ‘int’> | 10 | Number of results to return |
threshold | <class ‘float’> | None | Threshold for filtering results |
where | typing.Dict[str, typing.Any] | None | Filter results by metadata |
where_document | typing.Dict[str, typing.Any] | None | Filter results by document text |
reset
Reset the database > Signature: ChromaClient.reset(self)
Parameter | Type | Default | Description |
---|
upsert
Upsert a list of documents into a collection
Signature:
ChromaClient.upsert(self, collection_name: str, docs)
Parameter | Type | Default | Description |
---|---|---|---|
collection_name | <class ‘str’> | None | Name of the collection to upsert into |
docs | None | None | List of documents to upsertdocs should be a list of dictionaries with a ‘text’ key, with optional ‘id’ and ‘metadata’ keys |
Examples
= ChromaClient()
client
client.reset()'test_collection')
client.add_collection(= [
test_docs
{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'}
},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'}
},
{'id': 'doc3',
'text': 'The weather is sunny today',
'metadata': {'type': 'weather'}
}
]
"test_collection", test_docs) client.upsert(
[{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'}},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'}},
{'id': 'doc3',
'text': 'The weather is sunny today',
'metadata': {'type': 'weather'}}]
"test_collection", "fox jumping", k=2) client.query(
[{'id': 'doc1',
'text': 'The quick brown fox jumps over the lazy dog',
'metadata': {'type': 'pangram'},
'distance': 0.9130460619926453},
{'id': 'doc2',
'text': 'A quick brown fox jumped over the lazy dogs',
'metadata': {'type': 'variant'},
'distance': 0.9181089997291565}]
SQLite
from stringdale.db import temp_sql_db
temp_sql_db
Signature:
temp_sql_db(**kwargs)
creates and sqlalchemy engine to a shared memory sqlite DB. Kwargs are passed to to sqlalchemy’s create_engine function.
Parameter | Type | Default | Description |
---|
from sqlmodel import SQLModel, Session, select, Field
from typing import Optional,List
= temp_sql_db(echo=False)
engine
SQLModel.metadata.clear()
class Hero(SQLModel,table=True,extend_existing=True):
id: Optional[int] = Field(default=None,primary_key=True)
str
name: str
secret_name: int] = None
age: Optional[
SQLModel.metadata.create_all(engine)
def merge_heros(heros:List[Hero]):
with Session(engine) as session:
for hero in heros:
session.merge(hero)
session.commit()
merge_heros(id=1,name="Deadpond", secret_name="Dive"),
[Hero(id=2,name="Spider-Boy", secret_name="Pedro"),
Hero(id=3,name="Rusty-Man", secret_name="Tommy")])
Hero(
def get_hero(name:str):
with Session(engine) as session:
= select(Hero).where(Hero.name == name)
stmt = session.exec(stmt).one()
result return result
"Deadpond") get_hero(
Hero(secret_name='Dive', name='Deadpond', id=1, age=None)