Logging
- Overview
- POSTLogging Attributes
- Logging LLM Inferences
- Updating Logs
Evals
- Running Evals via SDK
- Loading Data for Evals
- Preset Evals
- Custom Evals
Sample GraphQL Queries
Currently you can query the inference logs and the evaluations for those inferences via the GraphQL API. GraphQL API expects JSON with two essentials things: “query” and “variables”.
Here are some sample queries (along with corresponding variables and response):
Inference log queries
1. Get paginated inference logs:
query GetPromptRuns($limit: Int!, $page: Int!) {
getPromptRunsByFilters(limit: $limit, page: $page) {
id
org_id
prompt_slug
language_model_id
prompt_response
prompt_tokens
}
}
Variables for the above query:
{
"limit": 2,
"page": 0
}
{
"data": {
"getPromptRunsByFilters": [
{
"id": "647dcf49-9f15-492c-af8d-858d43762e18",
"org_id": "vivek_local",
"prompt_slug": "greetings/v1",
"language_model_id": "gpt-4",
"prompt_response": "Bonjour, monde",
"prompt_tokens": 50
},
{
"id": "6d4ea5a4-ca75-45da-af62-a9556811c604",
"org_id": "vivek_local",
"prompt_slug": "yc-query/v4",
"language_model_id": "gpt-3.5-turbo",
"prompt_response": "YC invests $125,000 in every startup in exchange for 7% equity. This is the expected response. Contact us at contact@example.com.",
"prompt_tokens": 149
}
]
}
}
2. Get evaluations of these inference logs:
query GetPromptRuns($limit: Int!, $page: Int!) {
getPromptRunsByFilters(limit: $limit, page: $page) {
id
prompt_slug
language_model_id
environment
prompt_run_topic {
topic {
label
}
}
eval_result {
id
results
eval_name
eval_description
eval_result_metric {
value
eval_result_id
eval_metric {
type
label
description
}
}
}
}
}
Variables for the above query:
{
"limit": 1,
"page": 0
}
{
"data": {
"getPromptRunsByFilters": [
{
"id": "aa0e7a55-eb39-4155-b3ef-e6ac7f5b672c",
"prompt_slug": "yc-query/v1",
"language_model_id": "gpt-3.5-turbo",
"environment": "production",
"eval_result": [
{
"id": "c7318999-b127-4be7-839a-584402772cf8",
"results": [
{
"failed": false,
"reason": "The response provides a clear answer to the user's query by stating that YC invests $125,000 in every startup in exchange for 7% equity. It covers all aspects of the user's query by specifying the amount of investment and the percentage of equity taken. Additionally, it invites further contact for more information.",
"runtime": 2861
}
],
"eval_name": "Answer Completeness",
"eval_description": "Checks if the response is relevant to the user's query",
"eval_result_metric": [
{
"value": 1,
"eval_result_id": "c7318999-b127-4be7-839a-584402772cf8",
"eval_metric": {
"type": "boolean",
"label": "Passed",
"description": "Did the evaluation pass"
}
}
]
},
{
"id": "cb7216bc-1b60-45f0-b124-b54ca5cf766a",
"results": [
{
"failed": true,
"reason": "regex pattern test1 not found in output",
"runtime": 0
}
],
"eval_name": "Regex check",
"eval_description": "Checks if the response matches the provided regex",
"eval_result_metric": [
{
"value": 0,
"eval_result_id": "cb7216bc-1b60-45f0-b124-b54ca5cf766a",
"eval_metric": {
"type": "boolean",
"label": "Passed",
"description": "Did the evaluation pass"
}
}
]
}
]
}
]
}
}
3. Get inference logs filtered by created at:
query GetPromptRunsFilteredByCreatedAt($limit: Int!, $page: Int!, $start_date: String, $end_date: String) {
getPromptRunsByFilters(limit: $limit, page: $page, start_date: $start_date, end_date: $end_date) {
id
org_id
workspace_slug
prompt_slug
language_model_id
prompt_response
prompt_tokens
eval_result {
id
results
eval_name
eval_description
eval_result_metric {
value
eval_result_id
eval_metric {
type
label
description
}
}
}
}
}
Variables for the above query:
{
"limit": 1,
"page": 0,
"start_date": "2024-09-04",
"end_date": "2024-09-06"
}
{
"data": {
"getPromptRunsByFilters": [
{
"id": "aa0e7a55-eb39-4155-b3ef-e6ac7f5b672c",
"prompt_slug": "yc-query/v1",
"language_model_id": "gpt-3.5-turbo",
"environment": "production",
"eval_result": [
{
"id": "c7318999-b127-4be7-839a-584402772cf8",
"results": [
{
"failed": false,
"reason": "The response provides a clear answer to the user's query by stating that YC invests $125,000 in every startup in exchange for 7% equity. It covers all aspects of the user's query by specifying the amount of investment and the percentage of equity taken. Additionally, it invites further contact for more information.",
"runtime": 2861
}
],
"eval_name": "Answer Completeness",
"eval_description": "Checks if the response is relevant to the user's query",
"eval_result_metric": [
{
"value": 1,
"eval_result_id": "c7318999-b127-4be7-839a-584402772cf8",
"eval_metric": {
"type": "boolean",
"label": "Passed",
"description": "Did the evaluation pass"
}
}
]
},
{
"id": "cb7216bc-1b60-45f0-b124-b54ca5cf766a",
"results": [
{
"failed": true,
"reason": "regex pattern test1 not found in output",
"runtime": 0
}
],
"eval_name": "Regex check",
"eval_description": "Checks if the response matches the provided regex",
"eval_result_metric": [
{
"value": 0,
"eval_result_id": "cb7216bc-1b60-45f0-b124-b54ca5cf766a",
"eval_metric": {
"type": "boolean",
"label": "Passed",
"description": "Did the evaluation pass"
}
}
]
}
]
}
]
}
}
Note that the start_date
and end_date
should be in the format YYYY-MM-DD
and the end_date
should be greater than the start_date
. Both the dates are
inclusive.
4. Get paginated inference logs with custom attribute filters:
query GetPromptRuns($limit: Int!, $page: Int!, $customAttributes: [CustomAttributeFilter]) {
getPromptRunsByFilters(limit: $limit, page: $page, customAttributes: $customAttributes) {
id
org_id
prompt_slug
language_model_id
prompt_response
prompt_tokens
prompt_run_custom_attribute {
key
value
}
}
}
Variables for the above query:
{
"limit": 2,
"page": 0,
"customAttributes": [
{"key": "example_key", "value": "example_value"}
]
}
{
"data": {
"getPromptRunsByFilters": [
{
"id": "647dcf49-9f15-492c-af8d-858d43762e18",
"org_id": "vivek_local",
"prompt_slug": "greetings/v1",
"language_model_id": "gpt-4",
"prompt_response": "Bonjour, monde",
"prompt_tokens": 50,
"prompt_run_custom_attribute": [
{
"key": "example_key",
"value": "example_value"
}
]
},
{
"id": "6d4ea5a4-ca75-45da-af62-a9556811c604",
"org_id": "vivek_local",
"prompt_slug": "yc-query/v4",
"language_model_id": "gpt-3.5-turbo",
"prompt_response": "YC invests $125,000 in every startup in exchange for 7% equity. This is the expected response. Contact us at contact@example.com.",
"prompt_tokens": 149,
"prompt_run_custom_attribute": [
{
"key": "example_key",
"value": "example_value"
}
]
}
]
}
}
Dataset Queries
1. Get all Datasets:
query GetDatasets{
getDatasets{
id
name
}
}
{
"data": {
"getDatasets": [
{
"id": "fb292fef-cf9e-49c7-a167-63f39034e693",
"name": "RECENT_50_LOGS_08/05/2024"
}
]
}
}
2. Get a dataset with rows:
query GetDataset($datasetId: String!, $limit: Int) {
getDataset(datasetId: $datasetId, limit: $limit) {
id
name
rows {
query
response
eval_results {
id
metric_id
metric_value
explanation
eval_name
eval_type
eval_config
}
}
}
}
Variables for the above query:
{
"datasetId": "fb292fef-cf9e-49c7-a167-63f39034e693",
"limit": 2
}
{
"data": {
"getDataset": {
"id": "fb292fef-cf9e-49c7-a167-63f39034e693",
"name": "RECENT_50_LOGS_08/05/2024",
"rows": [
{
"query": "Hi, Who are you?",
"response": "Hello! I am a language model AI trained to provide information and assist with a variety of tasks. How can I help you today?",
"eval_results": [
{
"id": "c3ccaab4-2678-4ebb-8faa-bb64bdd147c6",
"metric_id": "passed",
"metric_value": "1",
"explanation": "The response can be inferred from the context provided. The response clearly states that the AI is a language model trained to provide information and assist with tasks, which aligns with the context of the AI introducing itself and offering help.",
"eval_name": "Response Faithfulness",
"eval_type": "Irftc",
"eval_config": {}
}
]
},
{
"query": "I would like to get a refund on a pair of shoes I purchased online",
"response": "The approaches to Task Decomposition include using LLM with simple prompting, task-specific instructions, and human inputs. Task decomposition involves breaking down large tasks into smaller subgoals for efficient handling of complex tasks and reflecting on past actions for refinement. Challenges in long-term planning and task decomposition include planning over a lengthy history and adjusting plans when faced with unexpected errors.",
"eval_results": [
{
"id": "4a5bb2ed-b9bd-42c3-8059-a7d3f1ec58a0",
"metric_id": "passed",
"metric_value": "1",
"explanation": "The response can be inferred from the context provided. The response elaborates on the approaches to Task Decomposition, the process involved, and the challenges faced, all of which are directly related to the initial context.",
"eval_name": "Response Faithfulness",
"eval_type": "Irftc",
"eval_config": {}
}
]
}
]
}
}
}