Integration guide
Haystack + SourceScore VERITAS
Two Haystack 2.x components: a retriever that pulls signed VERITAS claims, and a verifier that drops any document not backed by a high-confidence claim. Connect them in a normal Pipeline.
Install
pip install haystack-ai requestsA VERITAS retriever component
Haystack 2.x components are plain classes decorated with @component. The run() method declares its outputs via @component.output_types. This one turns VERITAS /search hits into Haystack Documents, carrying the claim id, confidence, and canonical URL in meta.
import requests
from typing import List
from haystack import component, Document
VERITAS = "https://sourcescore.org/api/v1"
@component
class VeritasRetriever:
def __init__(self, top_k: int = 5):
self.top_k = top_k
@component.output_types(documents=List[Document])
def run(self, query: str):
r = requests.get(
f"{VERITAS}/search",
params={"q": query, "limit": self.top_k},
timeout=8,
)
r.raise_for_status()
docs = []
for c in r.json().get("results", []):
docs.append(
Document(
content=c["statement"],
score=c.get("matchScore", c["confidence"]),
meta={
"claim_id": c["id"],
"confidence": c["confidence"],
"url": f"https://sourcescore.org/claims/{c['id']}/",
"tags": c.get("tags", []),
},
)
)
return {"documents": docs}
A verify component (for any existing retriever)
If your pipeline already has a primary retriever (a vector store, say), drop this verifier in after it. It POSTs each document to /verify and keeps only those that match a signed claim at or above min_confidence, stamping the claim id + confidence onto the document.
import requests
from typing import List
from haystack import component, Document
@component
class VeritasVerifier:
def __init__(self, min_confidence: float = 0.85):
self.min_confidence = min_confidence
@component.output_types(documents=List[Document], dropped=List[Document])
def run(self, documents: List[Document]):
kept, dropped = [], []
for d in documents:
r = requests.post(
f"{VERITAS}/verify",
json={"claim": d.content, "minConfidence": self.min_confidence},
timeout=8,
).json()
best = r.get("bestMatch")
if best:
d.meta["veritas_claim_id"] = best["id"]
d.meta["veritas_confidence"] = best["confidence"]
d.meta["veritas_url"] = best.get("detailUrl")
kept.append(d)
else:
dropped.append(d)
return {"documents": kept, "dropped": dropped}
Wire the pipeline
Connect the retriever to a PromptBuilder and an OpenAIGenerator. The prompt forces citation of every fact by claim_id.
from haystack import Pipeline
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
template = """Answer using ONLY the verified claims below. Cite every fact with [claim_id].
If the claims do not cover the question, say so — do not improvise.
{% for doc in documents %}
[{{ doc.meta.claim_id }}] {{ doc.content }} (confidence {{ doc.meta.confidence }})
{% endfor %}
Question: {{ query }}
Answer (every fact ends with [claim_id]):"""
pipe = Pipeline()
pipe.add_component("retriever", VeritasRetriever(top_k=5))
pipe.add_component("prompt", PromptBuilder(template=template, required_variables=["query"]))
pipe.add_component("llm", OpenAIGenerator(model="gpt-4o-mini"))
pipe.connect("retriever.documents", "prompt.documents")
pipe.connect("prompt.prompt", "llm.prompt")
question = "Who introduced the Transformer architecture?"
result = pipe.run({
"retriever": {"query": question},
"prompt": {"query": question},
})
print(result["llm"]["replies"][0])
Why a verify step
A retriever returns the closest documents; it does not confirm a generated answer is consistent with them. The verifier closes that gap — every kept document is backed by a signed claim with ≥2 primary sources, so the citations the model produces are checkable, not asserted. Free tier is 1,000 calls/month, no signup; the API is the same one the rest of these guides use.