AdvancedPython~25 min

Full Agent

Architecture

User: "I forgot my password, help!"
        │
        ▼
   ┌─ ValkeyStore.search() ─── semantic cache lookup
   │   score ≥ 0.90 → ✅ return cached answer (1ms)
   │   score < 0.90 → ❌ continue to LLM
   │
   ├─ ChatBedrockConverse.invoke() ─── LLM call (3-5s)
   │
   ├─ ValkeyStore.put() ─── cache the response
   │
   └─ ValkeySaver ─── checkpoint conversation state

Step 1: Shared Valkey Connection

from valkey import Valkey
from langgraph_checkpoint_aws import ValkeySaver, ValkeyStore, ValkeyCache
from langchain_aws import ChatBedrockConverse, BedrockEmbeddings

# Single Valkey connection shared across all components
valkey_client = Valkey.from_url("valkey://localhost:6379", decode_responses=False)

# Embeddings for semantic search
embeddings = BedrockEmbeddings(
    model_id="amazon.titan-embed-text-v2:0",
    region_name="us-west-2",
)

# LLM
model = ChatBedrockConverse(
    model="us.anthropic.claude-sonnet-4-20250514-v1:0",
    region_name="us-west-2",
)

Step 2: Initialize All Three Components

# 1. Checkpointer - persists conversation state
saver = ValkeySaver(client=valkey_client, ttl=3600)

# 2. Semantic store - vector search for cache lookups
store = ValkeyStore(
    client=valkey_client,
    index={
        "collection_name": "helpdesk_cache",
        "dims": 1536,
        "embed": embeddings,
        "fields": ["query"],
        "index_type": "hnsw",
    },
    ttl={"default_ttl": 60.0},
)
store.setup()

# 3. Exact cache - fast key-value cache for repeated prompts
cache = ValkeyCache(client=valkey_client, prefix="llm_cache:", ttl=3600)

Step 3: Build the Agent Graph

from langgraph.graph import StateGraph, MessagesState
from langchain_core.messages import HumanMessage
import hashlib

SIMILARITY_THRESHOLD = 0.90

def helpdesk_agent(state: MessagesState):
    user_msg = state["messages"][-1].content

    # 1. Check semantic cache
    hits = store.search(("helpdesk",), query=user_msg, limit=1)
    if hits and hits[0].score >= SIMILARITY_THRESHOLD:
        cached_answer = hits[0].value["answer"]
        return {"messages": [AIMessage(content=cached_answer)]}

    # 2. Cache miss - call LLM
    response = model.invoke(state["messages"])

    # 3. Store in semantic cache for future hits
    key = hashlib.md5(user_msg.encode()).hexdigest()[:12]
    store.put(
        ("helpdesk",), key,
        {"query": user_msg, "answer": response.content},
    )

    return {"messages": [response]}

# Build and compile with ValkeySaver
builder = StateGraph(MessagesState)
builder.add_node("agent", helpdesk_agent)
builder.set_entry_point("agent")
builder.set_finish_point("agent")

graph = builder.compile(checkpointer=saver)

Step 4: Run the Complete Flow

import time

config = {"configurable": {"thread_id": "user-42"}}

# First question - cache miss, calls LLM
t0 = time.time()
r1 = graph.invoke(
    {"messages": [HumanMessage(content="How do I reset my password?")]},
    config,
)
print(f"❌ MISS: {(time.time()-t0)*1000:.0f}ms")

# Paraphrased question - cache hit!
t0 = time.time()
r2 = graph.invoke(
    {"messages": [HumanMessage(content="I forgot my password, help!")]},
    config,
)
print(f"✅ HIT:  {(time.time()-t0)*1000:.1f}ms")

# Output:
# ❌ MISS: 4200ms
# ✅ HIT:  3.1ms

Complete Valkey Command Sequence (cache miss):

# 1. Semantic cache lookup
FT.SEARCH helpdesk_cache_idx "(*)==>[KNN 1 @embedding $vec]" ...

# 2. (No match - call LLM via Bedrock)

# 3. Store response in semantic cache
JSON.SET store:helpdesk_cache:helpdesk:a1b2c3d4e5f6 $ '{...}'
EXPIRE store:helpdesk_cache:helpdesk:a1b2c3d4e5f6 3600

# 4. Checkpoint conversation state
JSON.SET checkpoint:user-42:__empty__:cp_001 $ '{...}'
EXPIRE checkpoint:user-42:__empty__:cp_001 3600

Next Steps

You now have a complete LangGraph agent with Valkey-backed checkpointing, semantic caching, and vector search. To deploy to production, see the ElastiCache for Valkey Getting Started guide - just swap valkey://localhost:6379 for valkeys://your-cluster.amazonaws.com:6379.

← Back to LangChain Cookbooks

← Previous03 - Semantic Search