Running a suite of evals

On this page

Demo Video
Example Code
Response Format

Demo Video

Example Code

import os
from athina.evals import DoesResponseAnswerQuery, ContextContainsEnoughInformation, Faithfulness
from athina.loaders import Loader
from athina.keys import AthinaApiKey, OpenAiApiKey
from athina.runner.run import EvalRunner
from athina.datasets import yc_query_mini
import pandas as pd

from dotenv import load_dotenv
load_dotenv()

# Configure an API key.
OpenAiApiKey.set_key(os.getenv('OPENAI_API_KEY'))

# Load the dataset
dataset = [
    {
    "query": "query_string",
    "context": ["chunk_1", "chunk_2"],
    "response": "llm_generated_response_string",
    "expected_response": "ground truth (optional)",
    },
    { ... },
    { ... },
    { ... },
]

# Evaluate a dataset across a suite of eval criteria

EvalRunner.run_suite(
    evals=[
        RagasAnswerCorrectness(),
        RagasContextPrecision(),
        RagasContextRelevancy(),
        RagasContextRecall(),
        RagasFaithfulness(),
        ResponseFaithfulness(),
        Groundedness(),
        ContextSufficiency(),
    ],
    data=dataset,
    max_parallel_evals=10
)

Response Format

Python: Run a single eval Loading data for Evals

Logging

Datasets

Evals

GraphQL API

Deprecated

Running a suite of evals

Demo Video

Example Code

Response Format

Logging

Datasets

Evals

GraphQL API

Deprecated

​Demo Video

​Example Code

​Response Format

Demo Video

Example Code

Response Format