Source code for bigdl.nano.pytorch.inference.optimizer

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import torch
from torch import nn
import time
from copy import deepcopy
from typing import Dict, Callable, Tuple, Optional, List, Union, Sequence
from torch.utils.data import DataLoader
from torchmetrics.metric import Metric
from bigdl.nano.utils.inference.common.checker import available_acceleration_combination
from bigdl.nano.utils.inference.common.utils import AccelerationOption,\
    throughput_calculate_helper, format_optimize_result
from bigdl.nano.utils.inference.common.base_optimizer import BaseInferenceOptimizer
from bigdl.nano.utils.log4Error import invalidInputError
from bigdl.nano.pytorch.amp import BF16Model
from bigdl.nano.deps.openvino.openvino_api import PytorchOpenVINOModel
from bigdl.nano.deps.ipex.ipex_api import PytorchIPEXJITModel, PytorchIPEXJITBF16Model
from bigdl.nano.deps.onnxruntime.onnxruntime_api import PytorchONNXRuntimeModel
from bigdl.nano.deps.neural_compressor.inc_api import quantize as inc_quantize
from bigdl.nano.utils.inference.pytorch.model import AcceleratedLightningModule
from bigdl.nano.utils.inference.pytorch.model_utils import get_forward_args, get_input_example
from bigdl.nano.utils.inference.pytorch.metrics import NanoMetric
from bigdl.nano.utils.inference.pytorch.dataset import RepeatDataset, remove_batch_dim_fn
from bigdl.nano.utils.inference.pytorch.dataloader import\
    transform_multiple_input_dataloader_to_inc_mode
from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, save_model, load_model
import traceback
import warnings
# Filter out useless Userwarnings
warnings.filterwarnings('ignore', category=UserWarning, module='pytorch_lightning')
warnings.filterwarnings('ignore', category=DeprecationWarning, module='pytorch_lightning')
warnings.filterwarnings('ignore', category=UserWarning, module='torch')
warnings.filterwarnings('ignore', category=DeprecationWarning, module='torch')

import os
os.environ['LOGLEVEL'] = 'ERROR'  # remove parital output of inc


class TorchAccelerationOption(AccelerationOption):
    def optimize(self, model, training_data=None, input_sample=None,
                 thread_num=None, logging=False, sample_size_for_pot=100):
        accelerator = self.get_accelerator()
        if self.get_precision() == "fp32":
            # trace
            if accelerator is None and self.ipex is False:
                return model
            if accelerator in ("jit", None):
                acce_model = \
                    InferenceOptimizer.trace(model=model,
                                             accelerator=accelerator,
                                             use_ipex=self.ipex,
                                             # channels_last is only for jit
                                             channels_last=self.channels_last,
                                             input_sample=input_sample)
            else:
                acce_model = \
                    InferenceOptimizer.trace(model=model,
                                             accelerator=accelerator,
                                             input_sample=input_sample,
                                             thread_num=thread_num,
                                             # remove output of openvino
                                             logging=logging)
        else:
            # quantize
            ort_method: str = self.method
            acce_model = \
                InferenceOptimizer.quantize(model=deepcopy(model),
                                            precision=self.get_precision(),
                                            accelerator=accelerator,
                                            use_ipex=self.ipex,
                                            calib_dataloader=training_data,
                                            input_sample=input_sample,
                                            method=ort_method,
                                            thread_num=thread_num,
                                            sample_size=sample_size_for_pot,
                                            # remove output of openvino
                                            logging=logging)
        return acce_model


[docs]class InferenceOptimizer(BaseInferenceOptimizer): # acceleration method combinations, developers may want to register some new # combinations here ALL_INFERENCE_ACCELERATION_METHOD = \ { "original": TorchAccelerationOption(), "fp32_ipex": TorchAccelerationOption(ipex=True), "bf16": TorchAccelerationOption(bf16=True), "bf16_ipex": TorchAccelerationOption(bf16=True, ipex=True), "int8": TorchAccelerationOption(inc=True), "jit_fp32": TorchAccelerationOption(jit=True), "jit_fp32_ipex": TorchAccelerationOption(jit=True, ipex=True), "jit_fp32_ipex_channels_last": TorchAccelerationOption(jit=True, ipex=True, channels_last=True), "openvino_fp32": TorchAccelerationOption(openvino=True), "openvino_int8": TorchAccelerationOption(openvino=True, pot=True), "onnxruntime_fp32": TorchAccelerationOption(onnxruntime=True), "onnxruntime_int8_qlinear": TorchAccelerationOption(onnxruntime=True, inc=True, method="qlinear"), "onnxruntime_int8_integer": TorchAccelerationOption(onnxruntime=True, inc=True, method="integer"), }
[docs] def optimize(self, model: nn.Module, training_data: Union[DataLoader, torch.Tensor, Tuple[torch.Tensor]], validation_data: Optional[Union[DataLoader, torch.Tensor, Tuple[torch.Tensor]]] = None, input_sample: Union[torch.Tensor, Dict, Tuple[torch.Tensor], None] = None, metric: Optional[Callable] = None, direction: str = "max", thread_num: Optional[int] = None, logging: bool = False, latency_sample_num: int = 100, includes: Optional[List[str]] = None, excludes: Optional[List[str]] = None) -> None: ''' This function will give all available inference acceleration methods a try and record the latency, accuracy and model instance inside the Optimizer for future usage. All model instance is setting to eval mode. The available methods are "original", "fp32_ipex", "bf16", "bf16_ipex","int8", "jit_fp32", "jit_fp32_ipex", "jit_fp32_ipex_channels_last", "openvino_fp32", "openvino_int8", "onnxruntime_fp32", "onnxruntime_int8_qlinear" and "onnxruntime_int8_integer". :param model: A torch.nn.Module to be optimized :param training_data: training_data support following formats: | 1. a torch.utils.data.dataloader.DataLoader object for training dataset. | Users should be careful with this parameter since this dataloader | might be exposed to the model, which causing data leak. The | batch_size of this dataloader is important as well, users may | want to set it to the same batch size you may want to use the model | in real deploy environment. E.g. batch size should be set to 1 | if you would like to use the accelerated model in an online service. | | 2. a single torch.Tensor which used for training, this case is used to | accept single sample input x. | | 3. a tuple of torch.Tensor which used for training, this case is used to | accept single sample input (x, y) or (x1, x2) et al. :param validation_data: (optional) validation_data is only needed when users care about the possible accuracy drop. It support following formats: | 1. a torch.utils.data.dataloader.DataLoader object for accuracy evaluation. | | 2. a single torch.Tensor which used for training, this case is used to | accept single sample input x. | | 3. a tuple of torch.Tensor which used for training, this case is used to | accept single sample input (x, y) or (x1, x2) et al. :param input_sample: (optional) A set of inputs for trace, defaults to None. In most cases, you don't need specify this parameter, it will be obtained from training_data. :param metric: (optional) A callable object which is used for calculating accuracy. It supports two kinds of callable object: | 1. A torchmetrics.Metric object or similar callable object which takes | prediction and target then returns an accuracy value in this calling | method `metric(pred, target)`. This requires data in validation_data | is composed of (input_data, target). | 2. A callable object that takes model and validation_data (if | validation_data is not None) as input, and returns an accuracy value in | this calling method metric(model, data_loader) (or metric(model) if | validation_data is None). :param direction: (optional) A string that indicates the higher/lower better for the metric, "min" for the lower the better and "max" for the higher the better. Default value is "max". :param thread_num: (optional) a int represents how many threads(cores) is needed for inference. :param logging: whether to log detailed information of model conversion. Default: False. :param latency_sample_num: (optional) a int represents the number of repetitions to calculate the average latency. The default value is 100. :param includes: (optional) a list of acceleration methods that will be included in the search. Default to None meaning including all available methods. "original" method will be automatically add to includes. :param excludes: (optional) a list of acceleration methods that will be excluded from the search. "original" will be ignored in the excludes. ''' # check if model is a nn.Module or inherited from a nn.Module invalidInputError(isinstance(model, nn.Module), "model should be a nn module.") invalidInputError(direction in ['min', 'max'], "Only support direction 'min', 'max'.") # get the available methods whose dep is met available_dict: Dict =\ available_acceleration_combination(excludes=excludes, includes=includes, full_methods=self.ALL_INFERENCE_ACCELERATION_METHOD) self._direction: str = direction # save direction as attr # record whether calculate accuracy in optimize by this attr if validation_data is None and metric is None: self._calculate_accuracy = False else: # test whether accuracy calculation works later self._calculate_accuracy = True default_threads: int = torch.get_num_threads() thread_num: int = default_threads if thread_num is None else int(thread_num) result_map: Dict[str, Dict] = {} model.eval() # change model to eval mode if input_sample is None: forward_args = get_forward_args(model) if isinstance(training_data, DataLoader): input_sample = get_input_example(model, training_data, forward_args) else: if isinstance(training_data, Sequence): input_sample = tuple(list(training_data)[:len(forward_args)]) else: input_sample = training_data # turn training_data into dataset dataset = RepeatDataset(sample=training_data, num=1) training_data = DataLoader(dataset, batch_size=1) training_data = remove_batch_dim_fn(training_data) if validation_data is not None and not isinstance(validation_data, DataLoader): # turn validation_data into dataset val_dataset = RepeatDataset(sample=validation_data, num=1) validation_data = DataLoader(val_dataset, batch_size=1) validation_data = remove_batch_dim_fn(validation_data) st = time.perf_counter() try: with torch.no_grad(): if isinstance(input_sample, (Dict, torch.Tensor)): model(input_sample) else: model(*input_sample) except Exception: invalidInputError(False, "training_data is incompatible with your model input.") baseline_time = time.perf_counter() - st if baseline_time > 0.1: # 100ms sample_size_for_pot = 15 else: sample_size_for_pot = 100 print("==========================Start Optimization==========================") start_time = time.perf_counter() for idx, (method, available) in enumerate(available_dict.items()): result_map[method] = {} if available is False: result_map[method]["status"] = "lack dependency" else: print(f"----------Start test {method} model " f"({idx+1}/{len(available_dict)})----------") option: AccelerationOption = self.ALL_INFERENCE_ACCELERATION_METHOD[method] precision = option.get_precision() try: acce_model = option.optimize(model, training_data=training_data, input_sample=input_sample, thread_num=thread_num, logging=logging, sample_size_for_pot=sample_size_for_pot) except Exception as e: traceback.print_exc() result_map[method]["status"] = "fail to convert" print(f"----------Failed to convert to {method}----------") continue result_map[method]["status"] = "successful" def func_test(model, input_sample): with torch.no_grad(): if isinstance(input_sample, (Dict, torch.Tensor)): model(input_sample) else: model(*input_sample) torch.set_num_threads(thread_num) try: result_map[method]["latency"], status =\ throughput_calculate_helper(latency_sample_num, baseline_time, func_test, acce_model, input_sample) if status is False and method != "original": result_map[method]["status"] = "early stopped" torch.set_num_threads(default_threads) continue except Exception as e: traceback.print_exc() result_map[method]["status"] = "fail to forward" print(f"----------{method} failed to forward----------") torch.set_num_threads(default_threads) continue torch.set_num_threads(default_threads) if self._calculate_accuracy: # here we suppose trace don't change accuracy, # so we jump it to reduce time cost of optimize if precision == "fp32" and method != "original": result_map[method]["accuracy"] = "not recomputed" else: if method == "original": # test whether metric works try: result_map[method]["accuracy"] =\ _accuracy_calculate_helper(acce_model, metric, validation_data) except Exception as e: traceback.print_exc() self._calculate_accuracy = False invalidInputError( False, "Your metric is incompatible with validation_data or don't " "follow our given pattern. Our expected metric pattern is " "as follows:\n1. a torchmetrics.Metric object\n2. a callable " "object which takes prediction and target then returns a value" " in this calling method `metric(pred, target)`\n3. a callable" " object that takes model and validation_data (if " "validation_data is not None) as input, and returns an accuracy" " value in this calling method metric(model, data_loader) " "(or metric(model) if validation_data is None).") else: result_map[method]["accuracy"] =\ _accuracy_calculate_helper(acce_model, metric, validation_data) else: result_map[method]["accuracy"] = None result_map[method]["model"] = acce_model print(f"----------Finish test {method} model " f"({idx+1}/{len(available_dict)})----------") self.optimized_model_dict: Dict = result_map print("\n\n==========================Optimization Results==========================") self._optimize_result = format_optimize_result(self.optimized_model_dict, self._calculate_accuracy) # save time cost to self._optimize_result time_cost = time.perf_counter() - start_time time_cost_str = f"Optimization cost {time_cost:.1f}s in total." self._optimize_result += time_cost_str print(self._optimize_result) print("===========================Stop Optimization===========================")
[docs] @staticmethod def quantize(model: nn.Module, precision: str = 'int8', accelerator: Optional[str] = None, use_ipex: bool = False, calib_dataloader: Optional[DataLoader] = None, metric: Optional[Metric] = None, accuracy_criterion: Optional[dict] = None, approach: str = 'static', method: Optional[str] = None, conf: Optional[str] = None, tuning_strategy: Optional[str] = None, timeout: Optional[int] = None, max_trials: Optional[int] = None, input_sample=None, thread_num: Optional[int] = None, onnxruntime_session_options=None, openvino_config=None, simplification: bool = True, sample_size: int = 100, logging: bool = True, **export_kwargs): """ Calibrate a torch.nn.Module for post-training quantization. :param model: A model to be quantized. Model type should be an instance of torch.nn.Module. :param precision: Global precision of quantized model, supported type: 'int8', 'bf16', 'fp16', defaults to 'int8'. :param accelerator: Use accelerator 'None', 'onnxruntime', 'openvino', defaults to None. None means staying in pytorch. :param calib_dataloader: A torch.utils.data.dataloader.DataLoader object for calibration. Required for static quantization. It's also used as validation dataloader. :param metric: A torchmetrics.metric.Metric object for evaluation. :param accuracy_criterion: Tolerable accuracy drop, defaults to None meaning no accuracy control. accuracy_criterion = {'relative': 0.1, 'higher_is_better': True} allows relative accuracy loss: 1%. accuracy_criterion = {'absolute': 0.99, 'higher_is_better':False} means accuracy must be smaller than 0.99. :param approach: 'static' or 'dynamic'. 'static': post_training_static_quant, 'dynamic': post_training_dynamic_quant. Default: 'static'. OpenVINO supports static mode only. :param method: Method to do quantization. When accelerator=None, supported methods: 'fx', 'eager', 'ipex', defaults to 'fx'. If you don't use ipex, suggest using 'fx' which executes automatic optimizations like fusion. For more information, please refer to https://pytorch.org/docs/stable/quantization.html#eager-mode-quantization. When accelerator='onnxruntime', supported methods: 'qlinear', 'integer', defaults to 'qlinear'. Suggest 'qlinear' for lower accuracy drop if using static quantization. More details in https://onnxruntime.ai/docs/performance/quantization.html. This argument doesn't take effect for OpenVINO, don't change it for OpenVINO. :param conf: A path to conf yaml file for quantization. Default: None, using default config. :param tuning_strategy: 'bayesian', 'basic', 'mse', 'sigopt'. Default: 'bayesian'. :param timeout: Tuning timeout (seconds). Default: None, which means early stop. Combine with max_trials field to decide when to exit. :param max_trials: Max tune times. Default: None, which means no tuning. Combine with timeout field to decide when to exit. "timeout=0, max_trials=1" means it will try quantization only once and return satisfying best model. :param input_sample: An input example to convert pytorch model into ONNX/OpenVINO. :param thread_num: (optional) a int represents how many threads(cores) is needed for inference, only valid for accelerator='onnxruntime' or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param openvino_config: The config to be inputted in core.compile_model. Only valid when accelerator='openvino', otherwise will be ignored. :param simplification: whether we use onnxsim to simplify the ONNX model, only valid when accelerator='onnxruntime', otherwise will be ignored. If this option is set to True, new dependency 'onnxsim' need to be installed. :param sample_size: (optional) a int represents how many samples will be used for Post-training Optimization Tools (POT) from OpenVINO toolkit, only valid for accelerator='openvino'. Default to 100. The larger the value, the more accurate the conversion, the lower the performance degradation, but the longer the time. :param logging: whether to log detailed information of model conversion, only valid when accelerator='openvino', otherwise will be ignored. Default: ``True``. :param **export_kwargs: will be passed to torch.onnx.export function. :return: A accelerated torch.nn.Module if quantization is sucessful. """ if precision == 'bf16': if accelerator is None: if use_ipex: invalidInputError(not TORCH_VERSION_LESS_1_10, "torch version should >=1.10 to use ipex") use_jit = (accelerator == "jit") channels_last = export_kwargs["channels_last"] \ if "channels_last" in export_kwargs else None return PytorchIPEXJITBF16Model(model, input_sample=input_sample, use_ipex=use_ipex, use_jit=use_jit, channels_last=channels_last) bf16_model = BF16Model(model) return bf16_model else: invalidInputError(False, "Accelerator {} is invalid for BF16.".format(accelerator)) if precision == 'int8': # transform the dataloader to inc mode inc_calib_dataloader =\ transform_multiple_input_dataloader_to_inc_mode(model, calib_dataloader) if not accelerator or accelerator == 'onnxruntime': method_map = { None: { 'fx': 'pytorch_fx', 'eager': 'pytorch', 'ipex': 'pytorch_ipex', None: 'pytorch_fx' # default }, 'onnxruntime': { 'qlinear': 'onnxrt_qlinearops', 'integer': 'onnxrt_integerops', None: 'onnxrt_qlinearops' # default } } framework = method_map[accelerator].get(method, None) if accelerator == "onnxruntime": if not type(model).__name__ == 'PytorchONNXRuntimeModel': # try to establish onnx model if input_sample is None: # input_sample can be a dataloader input_sample = calib_dataloader if onnxruntime_session_options is None: import onnxruntime onnxruntime_session_options = onnxruntime.SessionOptions() if thread_num is not None: onnxruntime_session_options.intra_op_num_threads = thread_num onnxruntime_session_options.inter_op_num_threads = thread_num model = InferenceOptimizer.trace( model, input_sample=input_sample, accelerator='onnxruntime', onnxruntime_session_options=onnxruntime_session_options, simplification=simplification, **export_kwargs) """ If accelerator==None, quantized model returned should be an object of PytorchModel which is defined by neural-compressor containing a `GraphModule` for inference. Otherwise accelerator=='onnxruntime', it returns an ONNXModel object. A supported model which is able to run on Pytorch or ONNXRuntime can be fetched by `quantized_model.model`. """ return inc_quantize(model, inc_calib_dataloader, metric, framework=framework, conf=conf, approach=approach, tuning_strategy=tuning_strategy, accuracy_criterion=accuracy_criterion, timeout=timeout, max_trials=max_trials, onnxruntime_session_options=onnxruntime_session_options) elif accelerator == 'openvino': model_type = type(model).__name__ if not model_type == 'PytorchOpenVINOModel': if input_sample is None: # input_sample can be a dataloader input_sample = calib_dataloader model = InferenceOptimizer.trace(model, input_sample=input_sample, accelerator='openvino', thread_num=thread_num, logging=logging, **export_kwargs) invalidInputError(type(model).__name__ == 'PytorchOpenVINOModel', "Invalid model to quantize. Please use a nn.Module or a model " "from trainer.trance(accelerator=='openvino')") drop_type = None higher_is_better = None maximal_drop = None if metric: if not isinstance(accuracy_criterion, dict): accuracy_criterion = {'relative': 0.99, 'higher_is_better': True} drop_type = 'relative' if 'relative' in accuracy_criterion else 'absolute' higher_is_better = accuracy_criterion.get('higher_is_better', None) maximal_drop = accuracy_criterion.get(drop_type, None) kwargs = { "metric": metric, "higher_better": higher_is_better, "drop_type": drop_type, "maximal_drop": maximal_drop, "max_iter_num": max_trials, # TODO following two keys are optional, if there is need, we can add them # "n_requests": None, "sample_size": sample_size } return model.pot(calib_dataloader, thread_num=thread_num, config=openvino_config, **kwargs) else: invalidInputError(False, "Accelerator {} is invalid.".format(accelerator)) invalidInputError(False, "Precision {} is invalid.".format(precision))
[docs] @staticmethod def trace(model: nn.Module, input_sample=None, accelerator: Optional[str] = None, use_ipex: bool = False, thread_num: Optional[int] = None, onnxruntime_session_options=None, openvino_config=None, simplification: bool = True, logging: bool = True, **export_kwargs): """ Trace a torch.nn.Module and convert it into an accelerated module for inference. For example, this function returns a PytorchOpenVINOModel when accelerator=='openvino'. :param model: An torch.nn.Module model, including pl.LightningModule. :param input_sample: A set of inputs for trace, defaults to None if you have trace before or model is a LightningModule with any dataloader attached. :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch backend. 'openvino', 'onnxruntime' and 'jit' are supported for now. :param use_ipex: whether we use ipex as accelerator for inferencing. default: False. :param thread_num: (optional) a int represents how many threads(cores) is needed for inference, only valid for accelerator='onnxruntime' or accelerator='openvino'. :param onnxruntime_session_options: The session option for onnxruntime, only valid when accelerator='onnxruntime', otherwise will be ignored. :param openvino_config: The config to be inputted in core.compile_model. Only valid when accelerator='openvino', otherwise will be ignored. :param simplification: whether we use onnxsim to simplify the ONNX model, only valid when accelerator='onnxruntime', otherwise will be ignored. If this option is set to True, new dependency 'onnxsim' need to be installed. :param logging: whether to log detailed information of model conversion, only valid when accelerator='openvino', otherwise will be ignored. Default: ``True``. :param **kwargs: other extra advanced settings include 1. those be passed to torch.onnx.export function, only valid when accelerator='onnxruntime'/'openvino', otherwise will be ignored. 2. if channels_last is set and `use_ipex=True`, we will transform the data to be channels last according to the setting. Defaultly, channels_last will be set to ``True`` if `use_ipex=True`. :return: Model with different acceleration. """ invalidInputError( isinstance(model, nn.Module) and not isinstance(model, AcceleratedLightningModule), "Expect a nn.Module instance that is not traced or quantized" "but got type {}".format(type(model)) ) if accelerator == 'openvino': # openvino backend will not care about ipex usage final_openvino_option = {"INFERENCE_PRECISION_HINT": "f32"} if openvino_config is not None: final_openvino_option.update(openvino_config) return PytorchOpenVINOModel(model, input_sample, thread_num, logging, final_openvino_option, **export_kwargs) if accelerator == 'onnxruntime': # onnxruntime backend will not care about ipex usage if onnxruntime_session_options is None: import onnxruntime onnxruntime_session_options = onnxruntime.SessionOptions() if thread_num is not None: onnxruntime_session_options.intra_op_num_threads = thread_num onnxruntime_session_options.inter_op_num_threads = thread_num return PytorchONNXRuntimeModel(model, input_sample, onnxruntime_session_options, simplification=simplification, **export_kwargs) if accelerator == 'jit' or use_ipex: if use_ipex: invalidInputError(not TORCH_VERSION_LESS_1_10, "torch version should >=1.10 to use ipex") use_jit = (accelerator == "jit") channels_last = export_kwargs["channels_last"]\ if "channels_last" in export_kwargs else None return PytorchIPEXJITModel(model, input_sample=input_sample, use_ipex=use_ipex, use_jit=use_jit, channels_last=channels_last) invalidInputError(False, "Accelerator {} is invalid.".format(accelerator))
[docs] @staticmethod def save(model: nn.Module, path): """ Save the model to local file. :param model: Any model of torch.nn.Module, including all models accelareted by Trainer.trace/Trainer.quantize. :param path: Path to saved model. Path should be a directory. """ save_model(model, path)
[docs] @staticmethod def load(path, model: Optional[nn.Module] = None): """ Load a model from local. :param path: Path to model to be loaded. Path should be a directory. :param model: Required FP32 model to load pytorch model, it is needed if you accelerated the model with accelerator=None by Trainer.trace/Trainer.quantize. model should be set to None if you choose accelerator="onnxruntime"/"openvino"/"jit". :return: Model with different acceleration(None/OpenVINO/ONNX Runtime/JIT) or precision(FP32/FP16/BF16/INT8). """ return load_model(path, model)
def _signature_check(function): ''' A quick helper to judge whether input function is following this calling method `metric(pred, target)`. ''' import inspect sig = inspect.signature(function) if len(sig.parameters.values()) < 2: return False param1_name = list(sig.parameters.values())[0].name param2_name = list(sig.parameters.values())[1].name if "pred" in param1_name and "target" in param2_name: return True return False def _accuracy_calculate_helper(model, metric, data): ''' A quick helper to calculate accuracy ''' if isinstance(metric, Metric) or _signature_check(metric) is True: invalidInputError(data is not None, "Validation data can't be None when you pass a " "torchmetrics.Metric object or similar callable " "object which takes prediction and target as input.") metric = NanoMetric(metric) return metric(model, data) else: if data is None: return metric(model) else: return metric(model, data)