The Evaluators API provides powerful tools for automated evaluation and quality assessment of AI responses. Create custom evaluators, run evaluations on datasets, and get detailed scoring and feedback to improve your AI applications.
from keywordsai import KeywordsAI# Initialize clientclient = KeywordsAI(api_key="your-api-key")# Create a simple evaluatorevaluator = client.evaluators.create( name="Response Quality", description="Evaluates response quality and relevance", criteria=[ { "name": "relevance", "description": "How relevant is the response to the question?", "scale": {"min": 1, "max": 10} }, { "name": "accuracy", "description": "How accurate is the information provided?", "scale": {"min": 1, "max": 10} } ])# Run evaluationresult = client.evaluators.evaluate( evaluator_id=evaluator['id'], input_text="What is the capital of France?", response_text="The capital of France is Paris.", context="Geography question about European capitals")print(f"Overall score: {result['overall_score']}")print(f"Relevance: {result['criteria_scores']['relevance']}")print(f"Accuracy: {result['criteria_scores']['accuracy']}")
# Evaluator Managementclient.evaluators.create(...) # Create new evaluatorclient.evaluators.list(...) # List evaluatorsclient.evaluators.get(evaluator_id) # Get evaluator detailsclient.evaluators.update(...) # Update evaluatorclient.evaluators.delete(evaluator_id) # Delete evaluator# Evaluation Operationsclient.evaluators.evaluate(...) # Run single evaluationclient.evaluators.batch_evaluate(...) # Run batch evaluationclient.evaluators.get_evaluation(...) # Get evaluation resultsclient.evaluators.list_evaluations(...) # List evaluation history
# All methods have async equivalentsawait client.evaluators.create(...)await client.evaluators.evaluate(...)await client.evaluators.batch_evaluate(...)# ... and so on
try: result = client.evaluators.evaluate( evaluator_id="eval_123", input_text="Question", response_text="Answer" )except Exception as e: if "evaluator not found" in str(e).lower(): print("Evaluator does not exist") elif "quota exceeded" in str(e).lower(): print("Evaluation quota exceeded") elif "invalid input" in str(e).lower(): print("Invalid evaluation input") else: print(f"Evaluation error: {e}")
Define Your Evaluation Needs: Identify what aspects of responses you want to evaluate
Create Evaluators: Set up evaluators with appropriate criteria and scoring
Test and Validate: Run evaluations on sample data to validate performance
Integrate: Incorporate evaluations into your workflow
Monitor and Improve: Track evaluation results and refine criteria as needed
The Evaluators API provides the foundation for building robust, automated quality assurance into your AI applications, ensuring consistent, high-quality outputs that meet your specific requirements.