Documentation Index
Fetch the complete documentation index at: https://docs.moss.dev/llms.txt
Use this file to discover all available pages before exploring further.
Integrate Moss Semantic Search SDK directly into a LiveKit Voice Agent. This setup allows your voice AI to perform ultra-low latency searches over your custom data to answer user questions in real-time.
Why Use Moss with LiveKit?
Moss delivers sub-10ms semantic retrieval, ensuring your voice agents respond naturally without noticeable delays.
Integration Guide
Installation
Install the Moss SDK.pip install moss \
python-dotenv
Environment Setup
Create a .env file in your project root directory with your API keys.File: .env# LiveKit Credentials, keep it as it is for local deployment, don't change it.
LIVEKIT_URL=ws://localhost:7880
LIVEKIT_API_KEY=devkey
LIVEKIT_API_SECRET=secret
# Moss Credentials
MOSS_PROJECT_ID=your-moss-id
MOSS_PROJECT_KEY=your-moss-key
# AI Provider Keys
OPENAI_API_KEY=sk-...
DEEPGRAM_API_KEY=your-deepgram-key
Creating the Knowledge Base with Moss
Before the agent can answer questions, you must index your data. Run this script once to upload documents to Moss.File: build_index.pyimport asyncio
import os
from dotenv import load_dotenv
from moss import MossClient, DocumentInfo
load_dotenv()
async def main():
# Initialize the Moss Client
client = MossClient(
project_id=os.environ["MOSS_PROJECT_ID"],
project_key=os.environ["MOSS_PROJECT_KEY"]
)
index_name = os.getenv("MOSS_INDEX_NAME", "product-knowledge")
# Define documents
docs = [
DocumentInfo(
id="1",
text="Our return policy allows returns within 30 days of purchase with a receipt."
),
DocumentInfo(
id="2",
text="Standard shipping takes 3-5 business days. Express shipping takes 1-2 days."
),
DocumentInfo(
id="3",
text="Technical support is available 24/7 via email at support@example.com."
),
]
print(f"Creating index '{index_name}'...")
await client.create_index(index_name, docs, model_id="moss-minilm")
print("Index created successfully.")
if __name__ == "__main__":
asyncio.run(main())
Run the builder: Building the Agent
This implementation uses a Context Injection pattern where Moss search results are automatically injected into the conversation context on every user turn. This is faster and more consistent than tool calling, as the LLM always has relevant context without needing to decide when to search.Key Architecture:
- Automatic semantic-retrieval: Moss is queried automatically on every user message
- Context Injection: Search results are injected into the chat history before the LLM generates a response
- Lower Latency: No LLM “thinking” step to decide whether to search—it just happens
File: agent.pyimport asyncio
import logging
import os
from dotenv import load_dotenv
from livekit.plugins import openai, deepgram, silero
from livekit.plugins.turn_detector.english import EnglishModel
from livekit.agents import (
JobContext,
WorkerOptions,
cli,
ChatContext,
ChatMessage,
Agent,
AgentSession,
)
# Moss Import
from moss import MossClient, QueryOptions
load_dotenv()
# Configuration
MOSS_PROJECT_ID = os.getenv("MOSS_PROJECT_ID")
MOSS_PROJECT_KEY = os.getenv("MOSS_PROJECT_KEY")
INDEX_NAME = os.getenv("MOSS_INDEX_NAME", "product-knowledge")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("moss-agent")
class MossSemanticRetrievalAgent(Agent):
def __init__(self, moss_client: MossClient):
super().__init__(
instructions="""
You are a helpful customer support voice assistant.
You have access to a knowledge base which will be provided to you as context.
Always answer the user's question based on the provided context.
If the context doesn't contain the answer, politely say you don't know.
"""
)
self.moss = moss_client
async def on_user_turn_completed(self, turn_ctx: ChatContext, new_message: ChatMessage) -> None:
"""
Intercept user message -> Search Moss -> Inject Context -> Continue
"""
user_query = new_message.text_content
logger.info(f"User asked: {user_query}")
try:
# 1. Automatic Search
results = await self.moss.query(
INDEX_NAME,
user_query,
QueryOptions(top_k=5, alpha=0.8)
)
# 2. Context Injection
if results.docs:
context_str = "\n".join([f"- {d.text}" for d in results.docs])
injection = f"Relevant context from knowledge base:\n{context_str}\n\nUse this to answer the user."
# Insert into chat history as a system message
turn_ctx.add_message(role="system", content=injection)
logger.info(f"Injected context: {context_str[:100]}...") # Log first 100 chars
else:
logger.info("No relevant context found in Moss index")
except Exception as e:
logger.error(f"Moss search failed: {e}", exc_info=True)
# 3. Proceed with standard generation
await super().on_user_turn_completed(turn_ctx, new_message)
async def entrypoint(ctx: JobContext):
await ctx.connect()
# Initialize Moss
moss_client = MossClient(project_id=MOSS_PROJECT_ID, project_key=MOSS_PROJECT_KEY)
# Pre-load index
try:
await moss_client.load_index(INDEX_NAME)
logger.info(f"Successfully loaded index: {INDEX_NAME}")
except Exception as e:
logger.warning(f"Index not found or failed to load: {e}")
logger.warning("Moss queries will fail until the index is created. Run upload.py first.")
# Create Session
session = AgentSession(
stt=deepgram.STT(),
llm=openai.LLM(model="gpt-4o"),
tts=openai.TTS(),
turn_detection=EnglishModel(),
vad=silero.VAD.load(),
)
# Start the session with our custom MossSemanticRetrievalAgent
await session.start(
agent=MossSemanticRetrievalAgent(moss_client),
room=ctx.room,
)
if __name__ == "__main__":
cli.run_app(WorkerOptions(entrypoint_fnc=entrypoint))
Running the Agent
First, start the LiveKit server in development mode:Then, in a separate terminal, start your worker. The necessary VAD models will handle themselves or be downloaded automatically if needed by the plugin.python agent.py download-files
python agent.py console