# coding: utf-8
# Copyright 2023 IBM All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# TODO: Add parameters validation in every method
from ibm_cloud_sdk_core import BaseService
from ibm_watson_openscale.utils.utils import validate_type,get
from ibm_watson_openscale.supporting_classes.metrics.utils import is_entitled_on_cloud
from ibm_cloud_sdk_core.authenticators import BearerTokenAuthenticator
import pandas as pd
import json
import warnings
warnings.filterwarnings('ignore')
[docs]
class LLMMetrics():
def __init__(self, ai_client: "WatsonOpenScaleV2Adapter") -> None:
validate_type(ai_client, "ai_client", BaseService, True)
self.ai_client = ai_client
#authenticator.token_manager
if type(self.ai_client.authenticator) is BearerTokenAuthenticator:
self.token = self.ai_client.authenticator.bearer_token
else:
self.token = self.ai_client.authenticator.token_manager.get_token()
[docs]
def compute_metrics(self, configuration: dict, sources: pd.DataFrame, predictions: pd.DataFrame, references: pd.DataFrame, custom_evaluators = [], **kwargs):
"""
Compute LLM based metrics based on the configuration.
:param DataFrame sources: data frame containing the input data (if required or else empty dataframe).
:param DataFrame predictions: data frame containing the input data (if required or else empty dataframe).
:param DataFrame references: data frame containing the referene data (if required or else empty dataframe).
:param List custom_evaluators: List of custom evaluator functions that compute additional custom metrics
:return: Key/Value pair where key is the metric name and value is an object consisting of the metric results for all individual metrics.
:rtype: dict
This is how the configuration parameter dict will look like
>>>
from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup, LLMSummarizationMetrics, HAP_SCORE
metric_config = {
"configuration": {
LLMTextMetricGroup.SUMMARIZATION.value: { #This is metric group
LLMSummarizationMetrics.ROUGE_SCORE.value: { #This is individual metric and contains it's specific parameters if required
"use_aggregator": True,
"use_stemmer": True
},
LLMSummarizationMetrics.SARI.value: { #This is individual metric and contains it's specific parameters if required
},
LLMSummarizationMetrics.BLEURT_SCORE.value: {},
HAP_SCORE: {},
LLMSummarizationMetrics.SACREBLEU.value: {},
LLMSummarizationMetrics.WIKI_SPLIT.value: {},
LLMSummarizationMetrics.METEOR.value: {},
LLMSummarizationMetrics.NORMALIZED_RECALL.value: {},
LLMSummarizationMetrics.NORMALIZED_PRECISION.value: {},
LLMSummarizationMetrics.NORMALIZED_F1_SCORE.value: {},
}
}
}
A way you might use me is:
>>> client.llm_metrics.compute_metrics(configuration, sources, predictions, references)
User can pass custom_evaluators as argument to compute custom metrics.
eg: def fun1(sources: pd.DataFrame, predictions: pd.DataFrame, references: pd.DataFrame):
# compute custom metrics and returns it as a dict
custom_evaluators = [fun1]
>>> client.llm_metrics.compute_metrics(configuration, sources, predictions, references, custom_evaluators = custom_evaluators)
"""
metrics = {}
self.__validate_params(configuration, sources, predictions, references)
try:
metric_manager_module = __import__("ibm_metrics_plugin.metrics.llm.core.llm_metrics_manager", fromlist=["LLMMetricManager"])
except Exception as e:
msg = "Unable to find metric-plugins library with LLM support to compute metrics. Please install it using `pip install ibm-metrics-plugin`"
raise Exception(msg)
#Allow user to compute metrics only if he has standard plan on cloud
self.__check_entitlement()
llm_metric_manager = getattr(metric_manager_module, "LLMMetricManager")(configuration, **kwargs)
metrics = llm_metric_manager.compute(sources, predictions, references, **kwargs)
for fun in custom_evaluators:
custom_metric = fun(sources, predictions, references, **kwargs)
metrics.update(custom_metric)
return metrics
[docs]
def show_supported_metrics(self):
"""
List all the supported LLM based metrics for different prompt types
"""
self.__show_supported_metrics()
def __show_supported_metrics(self):
try:
metric_type_module = __import__("ibm_metrics_plugin.metrics.llm.utils.constants", fromlist=["LLMSummarizationMetrics,LLMGenerationMetrics, LLMExtractionMetrics, LLMQAMetrics"])
except Exception as e:
msg = "Unable to find metric-plugins library with LLM support to list metrics. Please install it using `pip install ibm-metrics-plugin`"
raise Exception(msg)
metric_type = getattr(metric_type_module, "LLMSummarizationMetrics")
print("Following Text Summrization metrics are supported")
for m in metric_type:
print(" {}".format(m))
print(" ---------- ")
metric_type = getattr(metric_type_module, "LLMGenerationMetrics")
print("Following Text Generation metrics are supported")
for m in metric_type:
print(" {}".format(m))
print(" ---------- ")
metric_type = getattr(metric_type_module, "LLMExtractionMetrics")
print("Following Text Extraction metrics are supported")
for m in metric_type:
print(" {}".format(m))
print(" ---------- ")
metric_type = getattr(metric_type_module, "LLMQAMetrics")
print("Following Question and Answer metrics are supported")
for m in metric_type:
print(" {}".format(m))
print(" ---------- ")
def __validate_params(self, configuration, sources, predictions, references):
validate_type(configuration, "configuration", dict, True)
validate_type(sources, "data_frame", [pd.DataFrame], False)
validate_type(predictions, "data_frame", [pd.DataFrame], True)
validate_type(references, "data_frame", [pd.DataFrame], False)
def __check_entitlement(self):
#Allow user to compute metrics only if he has standard plan on cloud
if self.ai_client.is_cp4d is not True:
is_entitled_on_cloud(self.ai_client.service_url, self.ai_client.service_instance_id, self.token)