Source code for ibm_watson_openscale.supporting_classes.metrics.llm_metrics


# coding: utf-8

# Copyright 2023 IBM All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TODO: Add parameters validation in every method

from ibm_cloud_sdk_core import BaseService
from ibm_watson_openscale.utils.utils import validate_type,get
from ibm_watson_openscale.supporting_classes.metrics.utils import is_entitled_on_cloud
from ibm_cloud_sdk_core.authenticators import BearerTokenAuthenticator

import pandas as pd
import json

import warnings
warnings.filterwarnings('ignore')

[docs] class LLMMetrics(): def __init__(self, ai_client: "WatsonOpenScaleV2Adapter") -> None: validate_type(ai_client, "ai_client", BaseService, True) self.ai_client = ai_client #authenticator.token_manager if type(self.ai_client.authenticator) is BearerTokenAuthenticator: self.token = self.ai_client.authenticator.bearer_token else: self.token = self.ai_client.authenticator.token_manager.get_token()
[docs] def compute_metrics(self, configuration: dict, sources: pd.DataFrame, predictions: pd.DataFrame, references: pd.DataFrame, custom_evaluators = [], **kwargs): """ Compute LLM based metrics based on the configuration. :param DataFrame sources: data frame containing the input data (if required or else empty dataframe). :param DataFrame predictions: data frame containing the input data (if required or else empty dataframe). :param DataFrame references: data frame containing the referene data (if required or else empty dataframe). :param List custom_evaluators: List of custom evaluator functions that compute additional custom metrics :return: Key/Value pair where key is the metric name and value is an object consisting of the metric results for all individual metrics. :rtype: dict This is how the configuration parameter dict will look like >>> from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup, LLMSummarizationMetrics, HAP_SCORE metric_config = { "configuration": { LLMTextMetricGroup.SUMMARIZATION.value: { #This is metric group LLMSummarizationMetrics.ROUGE_SCORE.value: { #This is individual metric and contains it's specific parameters if required "use_aggregator": True, "use_stemmer": True }, LLMSummarizationMetrics.SARI.value: { #This is individual metric and contains it's specific parameters if required }, LLMSummarizationMetrics.BLEURT_SCORE.value: {}, HAP_SCORE: {}, LLMSummarizationMetrics.SACREBLEU.value: {}, LLMSummarizationMetrics.WIKI_SPLIT.value: {}, LLMSummarizationMetrics.METEOR.value: {}, LLMSummarizationMetrics.NORMALIZED_RECALL.value: {}, LLMSummarizationMetrics.NORMALIZED_PRECISION.value: {}, LLMSummarizationMetrics.NORMALIZED_F1_SCORE.value: {}, } } } A way you might use me is: >>> client.llm_metrics.compute_metrics(configuration, sources, predictions, references) User can pass custom_evaluators as argument to compute custom metrics. eg: def fun1(sources: pd.DataFrame, predictions: pd.DataFrame, references: pd.DataFrame): # compute custom metrics and returns it as a dict custom_evaluators = [fun1] >>> client.llm_metrics.compute_metrics(configuration, sources, predictions, references, custom_evaluators = custom_evaluators) """ metrics = {} self.__validate_params(configuration, sources, predictions, references) try: metric_manager_module = __import__("ibm_metrics_plugin.metrics.llm.core.llm_metrics_manager", fromlist=["LLMMetricManager"]) except Exception as e: msg = "Unable to find metric-plugins library with LLM support to compute metrics. Please install it using `pip install ibm-metrics-plugin`" raise Exception(msg) #Allow user to compute metrics only if he has standard plan on cloud self.__check_entitlement() llm_metric_manager = getattr(metric_manager_module, "LLMMetricManager")(configuration, **kwargs) metrics = llm_metric_manager.compute(sources, predictions, references, **kwargs) for fun in custom_evaluators: custom_metric = fun(sources, predictions, references, **kwargs) metrics.update(custom_metric) return metrics
[docs] def show_supported_metrics(self): """ List all the supported LLM based metrics for different prompt types """ self.__show_supported_metrics()
def __show_supported_metrics(self): try: metric_type_module = __import__("ibm_metrics_plugin.metrics.llm.utils.constants", fromlist=["LLMSummarizationMetrics,LLMGenerationMetrics, LLMExtractionMetrics, LLMQAMetrics"]) except Exception as e: msg = "Unable to find metric-plugins library with LLM support to list metrics. Please install it using `pip install ibm-metrics-plugin`" raise Exception(msg) metric_type = getattr(metric_type_module, "LLMSummarizationMetrics") print("Following Text Summrization metrics are supported") for m in metric_type: print(" {}".format(m)) print(" ---------- ") metric_type = getattr(metric_type_module, "LLMGenerationMetrics") print("Following Text Generation metrics are supported") for m in metric_type: print(" {}".format(m)) print(" ---------- ") metric_type = getattr(metric_type_module, "LLMExtractionMetrics") print("Following Text Extraction metrics are supported") for m in metric_type: print(" {}".format(m)) print(" ---------- ") metric_type = getattr(metric_type_module, "LLMQAMetrics") print("Following Question and Answer metrics are supported") for m in metric_type: print(" {}".format(m)) print(" ---------- ") def __validate_params(self, configuration, sources, predictions, references): validate_type(configuration, "configuration", dict, True) validate_type(sources, "data_frame", [pd.DataFrame], False) validate_type(predictions, "data_frame", [pd.DataFrame], True) validate_type(references, "data_frame", [pd.DataFrame], False) def __check_entitlement(self): #Allow user to compute metrics only if he has standard plan on cloud if self.ai_client.is_cp4d is not True: is_entitled_on_cloud(self.ai_client.service_url, self.ai_client.service_instance_id, self.token)