Getting Started

RAG example

Working with stringdale Involves three steps:

  • Bringing your functions
  • Defining a diagram schema
  • Running the a diagram instance

Lets look at an example step by step.

Bring your functions

First, let us import some basic llm functions, we built some thin wrappers:

These functions are just for instruction purposes, you should build your own around the SDKs of the tools suited to your specific LLM needs.

from stringdale.chat import Chat
from stringdale.db import ChromaClient

Lets make an example vector db:

chroma_client = ChromaClient()

dog_docs = [
    {
        'id': 'dog1',
        'text': 'The Golden Retriever is a friendly, intelligent breed known for its golden coat. They make excellent family pets and are great with children.',
        'metadata': {'breed': 'Golden Retriever'}
    },
    {
        'id': 'dog2', 
        'text': 'German Shepherds are loyal, protective dogs often used in police work. They are highly trainable and good at various tasks.',
        'metadata': {'breed': 'German Shepherd'}
    },
    {
        'id': 'dog3',
        'text': 'The Golden Retriever is a friendly, intelligent breed with a beautiful golden coat. They are wonderful family pets that get along well with kids.',
        'metadata': {'breed': 'Golden Retriever'}  
    },
    {
        'id': 'dog4',
        'text': 'Huskies are energetic working dogs bred for cold climates. They have thick fur and often blue eyes.',
        'metadata': {'breed': 'Husky'}
    },
    {
        'id': 'dog5',
        'text': 'Siberian Huskies are active working dogs that thrive in cold weather. They are known for their thick coats and striking blue eyes.',
        'metadata': {'breed': 'Husky'} 
    }
]

chroma_client.add_collection("dog_docs",exists_ok=True)
chroma_client.upsert(collection_name="dog_docs",docs=dog_docs)
chroma_client.query(query='the name of the dog is german something...',collection_name='dog_docs',k=3)
[{'id': 'dog2',
  'text': 'German Shepherds are loyal, protective dogs often used in police work. They are highly trainable and good at various tasks.',
  'metadata': {'breed': 'German Shepherd'},
  'distance': 1.1108803749084473},
 {'id': 'dog4',
  'text': 'Huskies are energetic working dogs bred for cold climates. They have thick fur and often blue eyes.',
  'metadata': {'breed': 'Husky'},
  'distance': 1.40901517868042},
 {'id': 'dog1',
  'text': 'The Golden Retriever is a friendly, intelligent breed known for its golden coat. They make excellent family pets and are great with children.',
  'metadata': {'breed': 'Golden Retriever'},
  'distance': 1.4540519714355469}]
rag_chat = Chat(
    model='gpt-4o-mini',
    messages=[
        {'role':'system','content':'''
            You are a helpful assistant that answers questions about dogs.
            I found the following documents that may be relevant to the user's question:
            {% for doc in docs %}
            {{doc['text']}}
            {% endfor %}
            '''},
        {'role':'user','content':'{{question}}'},
    ]
)
rag_chat
Chat(model='gpt-4o-mini', required_keys={'docs', 'question'}, seed=42)
await rag_chat(question='the name of the dog is german something...',docs=[
    {'text': 'German Shepherds are loyal, protective dogs often used in police work. They are highly trainable and good at various tasks.'}
])
{'role': 'assistant',
 'content': 'The dog you are referring to is likely a German Shepherd.',
 'meta': {'input_tokens': 171, 'output_tokens': 19}}

Define your diagram

from stringdale import V,E,Define

We define a diagram as follows:

with Define('RAG Agent',type='flow') as RAG:

    # each node has a name and an optional function
    V('get_docs',chroma_client.query)
    V('chat',rag_chat)
    # nodes that have no function are called "pass through" nodes
    # they simply collect outputs passed to them into a dict
    # Start and End are created automatically as pass through nodes if not defined otherwise
    V('Start')
    V('End')
    
    # we can define edges using the E function
    # edges are defined via a 'source->target(port_mapping)' syntax
    # we cover the port mapping syntax in depth in the next tutorial
    E('Start->get_docs(**)')   
    E('get_docs->chat(docs=.)')
    E('Start->chat(question=query)')
    E('chat->End')

The RAG object we got from the Define context manager is a Diagram Schema

RAG
<stringdale.base.DiagramSchema>

You can draw the diagram using the draw method

RAG.draw()

You can also define incoming and outgoing edges within the node definition:

with Define('RAG Agent',type='flow') as RAG:
    V('get_docs',chroma_client.query,
        #inputs are defined using the 'source(port_mapping)' syntax
        #outputs can be likewise defined using the 'target(port_mapping) syntax
        inputs=['Start(**)'],
    )
    V('chat',rag_chat,
        inputs=['get_docs(docs)','Start(question=query)'],
        outputs=['End']
    )
    
RAG.draw()

Running a diagram

We can get a instance of a diagram that can be run by calling the schema object.

d = RAG()
d
<stringdale.base.Diagram>

The most common way to run a diagram is using the run or arun methods, which allows us to handle traces of each nodes however we want within the for loop, avoiding the need for complex callback mechanisms.

for trace in d.run({'query':'tell me some stuff about golden retrievers','collection_name':'dog_docs','k':2}):
    # we can use pprint for easy debugging
    trace.pprint()
    pass

# after running a diagram, we can access the output using the output attribute
d.output
Node Start:
{ 'input': { 'collection_name': 'dog_docs',
             'k': 2,
             'query': 'tell me some stuff about golden retrievers'},
  'output': { 'collection_name': 'dog_docs',
              'k': 2,
              'query': 'tell me some stuff about golden retrievers'}}
================================================================================
Node get_docs:
{ 'input': { 'collection_name': 'dog_docs',
             'k': 2,
             'query': 'tell me some stuff about golden retrievers'},
  'output': [ { 'distance': 0.6993070840835571,
                'id': 'dog3',
                'metadata': {'breed': 'Golden Retriever'},
                'text': 'The Golden Retriever is a friendly, intelligent breed '
                        'with a beautiful golden coat. They are wonderful '
                        'family pets that get along well with kids.'},
              { 'distance': 0.7122190594673157,
                'id': 'dog1',
                'metadata': {'breed': 'Golden Retriever'},
                'text': 'The Golden Retriever is a friendly, intelligent breed '
                        'known for its golden coat. They make excellent family '
                        'pets and are great with children.'}]}
================================================================================
Node chat:
{ 'input': { 'docs': [ { 'distance': 0.6993070840835571,
                         'id': 'dog3',
                         'metadata': {'breed': 'Golden Retriever'},
                         'text': 'The Golden Retriever is a friendly, '
                                 'intelligent breed with a beautiful golden '
                                 'coat. They are wonderful family pets that '
                                 'get along well with kids.'},
                       { 'distance': 0.7122190594673157,
                         'id': 'dog1',
                         'metadata': {'breed': 'Golden Retriever'},
                         'text': 'The Golden Retriever is a friendly, '
                                 'intelligent breed known for its golden coat. '
                                 'They make excellent family pets and are '
                                 'great with children.'}],
             'question': 'tell me some stuff about golden retrievers'},
  'output': { 'content': 'The Golden Retriever is a friendly and intelligent '
                         'breed known for its beautiful golden coat. They make '
                         'excellent family pets and are particularly good with '
                         'children, making them wonderful companions for '
                         'families.',
              'meta': {'input_tokens': 198, 'output_tokens': 42},
              'role': 'assistant'}}
================================================================================
Node End:
{ 'input': { 0: { 'content': 'The Golden Retriever is a friendly and '
                             'intelligent breed known for its beautiful golden '
                             'coat. They make excellent family pets and are '
                             'particularly good with children, making them '
                             'wonderful companions for families.',
                  'meta': {'input_tokens': 198, 'output_tokens': 42},
                  'role': 'assistant'}},
  'output': { 'content': 'The Golden Retriever is a friendly and intelligent '
                         'breed known for its beautiful golden coat. They make '
                         'excellent family pets and are particularly good with '
                         'children, making them wonderful companions for '
                         'families.',
              'meta': {'input_tokens': 198, 'output_tokens': 42},
              'role': 'assistant'}}
================================================================================
{'role': 'assistant',
 'content': 'The Golden Retriever is a friendly and intelligent breed known for its beautiful golden coat. They make excellent family pets and are particularly good with children, making them wonderful companions for families.',
 'meta': {'input_tokens': 198, 'output_tokens': 42}}

The Details

Nodes can be any callable, including function objects and class methods. Nodes should be pure functions, meaning that they do not remember how they were called in the past.

For rare use-cases requiring stateful nodes, see the stateful nodes tutorial.

def add(a,b):
    return a+b

class Pow():
    def __init__(self,power):
        self.power = power
    def __call__(self,a):
        return a**self.power
    def __str__(self):
        return f'Pow({self.power})'
with Define('hello world') as Schema:
    
    V('add',add,inputs=['Start(**=.)'],outputs=['End(add_result=.)'])
    V('square',Pow(2),inputs=['Start(a=b)'],outputs=['End(square_result=.)'])
Schema.draw()

Different ways of Running Diagrams

We can run diagrams end-to-end like so:

running without side effects

d=Schema()
d.run_all({'a':1,'b':2})
{'add_result': 3, 'square_result': 4}

Running with synchronous side effects

but in most cases, we would like to monitor the traces of each nodes, which is why we usually run it like so:

d=Schema()
for trace in d.run({'a':1,'b':2}):
    # put here any side affect you want
    trace.pprint()
Node Start:
{'input': {'a': 1, 'b': 2}, 'output': {'a': 1, 'b': 2}}
================================================================================
Node square:
{'input': {'a': 2}, 'output': 4}
================================================================================
Node add:
{'input': {'a': 1, 'b': 2}, 'output': 3}
================================================================================
Node End:
{ 'input': {'add_result': 3, 'square_result': 4},
  'output': {'add_result': 3, 'square_result': 4}}
================================================================================

Accessing the output via the output attribute of our diagram

d.output
{'square_result': 4, 'add_result': 3}

Running async functions is transparent

stringdale works with transparently with both sync and async functions. For example:

import time
import timeit
import asyncio
async def async_add(a,b):
    await asyncio.sleep(0.1)
    return a+b

class AsyncPow():
    def __init__(self,power):
        self.power = power
    async def __call__(self,a):
        await asyncio.sleep(0.1)
        return a**self.power
    def __str__(self):
        return f'AsyncPow({self.power})'

Can be used exactly the same as the sync versions:

with Define('hello world') as Schema:
    V('add',async_add,inputs=['Start(**=.)'],outputs=['End(add_result=.)'])
    V('square',AsyncPow(2),inputs=['Start(a=b)'],outputs=['End(square_result=.)'])
Schema.draw()

d=Schema()
start_time = timeit.default_timer()
for trace in d.run({'a':1,'b':2}):
    trace.pprint()
end_time = timeit.default_timer()
Node Start:
{'input': {'a': 1, 'b': 2}, 'output': {'a': 1, 'b': 2}}
================================================================================
Node add:
{'input': {'a': 1, 'b': 2}, 'output': 3}
================================================================================
Node square:
{'input': {'a': 2}, 'output': 4}
================================================================================
Node End:
{ 'input': {'add_result': 3, 'square_result': 4},
  'output': {'add_result': 3, 'square_result': 4}}
================================================================================

Note that these functions were run asynchronously

print(f'Time taken: {end_time-start_time:.2f} seconds')
Time taken: 0.10 seconds

Async side effects

If we want some asynchronous side effects, like logging our traces to our server, for example with this mock class:

class Logserver():
    def __init__(self):
        self.logs=[]
    async def log(self,trace):
        self.logs.append(trace)
        # time.sleep(0.5)
        await asyncio.sleep(0.1)
        return trace.pretty_name()

We can use the asynchronous arun method.

logserver = Logserver()
logs = []

start_time = timeit.default_timer()

async for trace in d.arun({'a':1,'b':2}):
    log_task = logserver.log(trace)
    logs.append(log_task)

res = await asyncio.gather(*logs)

end_time = timeit.default_timer()
res
['Start', 'add', 'square', 'End']
print(f'Time taken: {end_time-start_time:.2f} seconds')
Time taken: 0.20 seconds