Source code for bigdl.nano.pytorch.inference.optimizer

#
# Copyright 2016 The BigDL Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import torch
from torch import nn
import time
from copy import deepcopy
from typing import Dict, Callable, Tuple, Optional, List, Union, Sequence
from torch.utils.data import DataLoader
from torchmetrics.metric import Metric
from bigdl.nano.utils.inference.common.checker import available_acceleration_combination
from bigdl.nano.utils.inference.common.utils import AccelerationOption,\
    throughput_calculate_helper, format_optimize_result
from bigdl.nano.utils.inference.common.base_optimizer import BaseInferenceOptimizer
from bigdl.nano.utils.log4Error import invalidInputError
from bigdl.nano.pytorch.amp import BF16Model
from bigdl.nano.deps.openvino.openvino_api import PytorchOpenVINOModel
from bigdl.nano.deps.ipex.ipex_api import PytorchIPEXJITModel, PytorchIPEXJITBF16Model
from bigdl.nano.deps.onnxruntime.onnxruntime_api import PytorchONNXRuntimeModel
from bigdl.nano.deps.neural_compressor.inc_api import quantize as inc_quantize
from bigdl.nano.utils.inference.pytorch.model import AcceleratedLightningModule
from bigdl.nano.utils.inference.pytorch.model_utils import get_forward_args, get_input_example
from bigdl.nano.utils.inference.pytorch.metrics import NanoMetric
from bigdl.nano.utils.inference.pytorch.dataset import RepeatDataset, remove_batch_dim_fn
from bigdl.nano.utils.inference.pytorch.dataloader import\
    transform_multiple_input_dataloader_to_inc_mode
from bigdl.nano.pytorch.utils import TORCH_VERSION_LESS_1_10, save_model, load_model
import traceback
import warnings
# Filter out useless Userwarnings
warnings.filterwarnings('ignore', category=UserWarning, module='pytorch_lightning')
warnings.filterwarnings('ignore', category=DeprecationWarning, module='pytorch_lightning')
warnings.filterwarnings('ignore', category=UserWarning, module='torch')
warnings.filterwarnings('ignore', category=DeprecationWarning, module='torch')

import os
os.environ['LOGLEVEL'] = 'ERROR'  # remove parital output of inc


class TorchAccelerationOption(AccelerationOption):
    def optimize(self, model, training_data=None, input_sample=None,
                 thread_num=None, logging=False, sample_size_for_pot=100):
        accelerator = self.get_accelerator()
        if self.get_precision() == "fp32":
            # trace
            if accelerator is None and self.ipex is False:
                return model
            if accelerator in ("jit", None):
                acce_model = \
                    InferenceOptimizer.trace(model=model,
                                             accelerator=accelerator,
                                             use_ipex=self.ipex,
                                             # channels_last is only for jit
                                             channels_last=self.channels_last,
                                             input_sample=input_sample)
            else:
                acce_model = \
                    InferenceOptimizer.trace(model=model,
                                             accelerator=accelerator,
                                             input_sample=input_sample,
                                             thread_num=thread_num,
                                             # remove output of openvino
                                             logging=logging)
        else:
            # quantize
            ort_method: str = self.method
            acce_model = \
                InferenceOptimizer.quantize(model=deepcopy(model),
                                            precision=self.get_precision(),
                                            accelerator=accelerator,
                                            use_ipex=self.ipex,
                                            calib_dataloader=training_data,
                                            input_sample=input_sample,
                                            method=ort_method,
                                            thread_num=thread_num,
                                            sample_size=sample_size_for_pot,
                                            # remove output of openvino
                                            logging=logging)
        return acce_model


[docs]class InferenceOptimizer(BaseInferenceOptimizer):

    # acceleration method combinations, developers may want to register some new
    # combinations here
    ALL_INFERENCE_ACCELERATION_METHOD = \
        {
            "original": TorchAccelerationOption(),
            "fp32_ipex": TorchAccelerationOption(ipex=True),
            "bf16": TorchAccelerationOption(bf16=True),
            "bf16_ipex": TorchAccelerationOption(bf16=True, ipex=True),
            "int8": TorchAccelerationOption(inc=True),
            "jit_fp32": TorchAccelerationOption(jit=True),
            "jit_fp32_ipex": TorchAccelerationOption(jit=True, ipex=True),
            "jit_fp32_ipex_channels_last": TorchAccelerationOption(jit=True, ipex=True,
                                                                   channels_last=True),
            "openvino_fp32": TorchAccelerationOption(openvino=True),
            "openvino_int8": TorchAccelerationOption(openvino=True, pot=True),
            "onnxruntime_fp32": TorchAccelerationOption(onnxruntime=True),
            "onnxruntime_int8_qlinear": TorchAccelerationOption(onnxruntime=True, inc=True,
                                                                method="qlinear"),
            "onnxruntime_int8_integer": TorchAccelerationOption(onnxruntime=True, inc=True,
                                                                method="integer"),
        }

[docs]    def optimize(self, model: nn.Module,
                 training_data: Union[DataLoader, torch.Tensor, Tuple[torch.Tensor]],
                 validation_data:
                     Optional[Union[DataLoader, torch.Tensor, Tuple[torch.Tensor]]] = None,
                 input_sample: Union[torch.Tensor, Dict, Tuple[torch.Tensor], None] = None,
                 metric: Optional[Callable] = None,
                 direction: str = "max",
                 thread_num: Optional[int] = None,
                 logging: bool = False,
                 latency_sample_num: int = 100,
                 includes: Optional[List[str]] = None,
                 excludes: Optional[List[str]] = None) -> None:
        '''
        This function will give all available inference acceleration methods a try
        and record the latency, accuracy and model instance inside the Optimizer for
        future usage. All model instance is setting to eval mode.

        The available methods are "original", "fp32_ipex", "bf16", "bf16_ipex","int8",
        "jit_fp32", "jit_fp32_ipex", "jit_fp32_ipex_channels_last", "openvino_fp32",
        "openvino_int8", "onnxruntime_fp32", "onnxruntime_int8_qlinear"
        and "onnxruntime_int8_integer".

        :param model: A torch.nn.Module to be optimized
        :param training_data: training_data support following formats:

                | 1. a torch.utils.data.dataloader.DataLoader object for training dataset.
                | Users should be careful with this parameter since this dataloader
                | might be exposed to the model, which causing data leak. The
                | batch_size of this dataloader is important as well, users may
                | want to set it to the same batch size you may want to use the model
                | in real deploy environment. E.g. batch size should be set to 1
                | if you would like to use the accelerated model in an online service.
                |
                | 2. a single torch.Tensor which used for training, this case is used to
                | accept single sample input x.
                |
                | 3. a tuple of torch.Tensor which used for training, this case is used to
                | accept single sample input (x, y) or (x1, x2) et al.

        :param validation_data: (optional) validation_data is only needed when users care
                                about the possible accuracy drop. It support following formats:

                | 1. a torch.utils.data.dataloader.DataLoader object for accuracy evaluation.
                |
                | 2. a single torch.Tensor which used for training, this case is used to
                | accept single sample input x.
                |
                | 3. a tuple of torch.Tensor which used for training, this case is used to
                | accept single sample input (x, y) or (x1, x2) et al.

        :param input_sample: (optional) A set of inputs for trace, defaults to None.
               In most cases, you don't need specify this parameter, it will be obtained from
               training_data.
        :param metric: (optional) A callable object which is used for calculating accuracy.
               It supports two kinds of callable object:

               | 1. A torchmetrics.Metric object or similar callable object which takes
               | prediction and target then returns an accuracy value in this calling
               | method `metric(pred, target)`. This requires data in validation_data
               | is composed of (input_data, target).
               | 2. A callable object that takes model and validation_data (if
               | validation_data is not None) as input, and returns an accuracy value in
               | this calling method metric(model, data_loader) (or metric(model) if
               | validation_data is None).

        :param direction: (optional) A string that indicates the higher/lower
               better for the metric, "min" for the lower the better and "max" for the
               higher the better. Default value is "max".
        :param thread_num: (optional) a int represents how many threads(cores) is needed for
               inference.
        :param logging: whether to log detailed information of model conversion.
               Default: False.
        :param latency_sample_num: (optional) a int represents the number of repetitions
               to calculate the average latency. The default value is 100.
        :param includes: (optional) a list of acceleration methods that will be included in the
               search. Default to None meaning including all available methods. "original" method
               will be automatically add to includes.
        :param excludes: (optional) a list of acceleration methods that will be excluded from the
               search. "original" will be ignored in the excludes.
        '''

        # check if model is a nn.Module or inherited from a nn.Module
        invalidInputError(isinstance(model, nn.Module), "model should be a nn module.")
        invalidInputError(direction in ['min', 'max'],
                          "Only support direction 'min', 'max'.")

        # get the available methods whose dep is met
        available_dict: Dict =\
            available_acceleration_combination(excludes=excludes,
                                               includes=includes,
                                               full_methods=self.ALL_INFERENCE_ACCELERATION_METHOD)

        self._direction: str = direction  # save direction as attr
        # record whether calculate accuracy in optimize by this attr
        if validation_data is None and metric is None:
            self._calculate_accuracy = False
        else:
            # test whether accuracy calculation works later
            self._calculate_accuracy = True

        default_threads: int = torch.get_num_threads()
        thread_num: int = default_threads if thread_num is None else int(thread_num)

        result_map: Dict[str, Dict] = {}

        model.eval()  # change model to eval mode

        if input_sample is None:
            forward_args = get_forward_args(model)
            if isinstance(training_data, DataLoader):
                input_sample = get_input_example(model, training_data, forward_args)
            else:
                if isinstance(training_data, Sequence):
                    input_sample = tuple(list(training_data)[:len(forward_args)])
                else:
                    input_sample = training_data
                # turn training_data into dataset
                dataset = RepeatDataset(sample=training_data, num=1)
                training_data = DataLoader(dataset, batch_size=1)
                training_data = remove_batch_dim_fn(training_data)
                if validation_data is not None and not isinstance(validation_data, DataLoader):
                    # turn validation_data into dataset
                    val_dataset = RepeatDataset(sample=validation_data, num=1)
                    validation_data = DataLoader(val_dataset, batch_size=1)
                    validation_data = remove_batch_dim_fn(validation_data)

        st = time.perf_counter()
        try:
            with torch.no_grad():
                if isinstance(input_sample, (Dict, torch.Tensor)):
                    model(input_sample)
                else:
                    model(*input_sample)
        except Exception:
            invalidInputError(False,
                              "training_data is incompatible with your model input.")
        baseline_time = time.perf_counter() - st
        if baseline_time > 0.1:  # 100ms
            sample_size_for_pot = 15
        else:
            sample_size_for_pot = 100

        print("==========================Start Optimization==========================")
        start_time = time.perf_counter()
        for idx, (method, available) in enumerate(available_dict.items()):
            result_map[method] = {}
            if available is False:
                result_map[method]["status"] = "lack dependency"
            else:
                print(f"----------Start test {method} model "
                      f"({idx+1}/{len(available_dict)})----------")
                option: AccelerationOption = self.ALL_INFERENCE_ACCELERATION_METHOD[method]
                precision = option.get_precision()
                try:
                    acce_model = option.optimize(model, training_data=training_data,
                                                 input_sample=input_sample,
                                                 thread_num=thread_num,
                                                 logging=logging,
                                                 sample_size_for_pot=sample_size_for_pot)
                except Exception as e:
                    traceback.print_exc()
                    result_map[method]["status"] = "fail to convert"
                    print(f"----------Failed to convert to {method}----------")
                    continue

                result_map[method]["status"] = "successful"

                def func_test(model, input_sample):
                    with torch.no_grad():
                        if isinstance(input_sample, (Dict, torch.Tensor)):
                            model(input_sample)
                        else:
                            model(*input_sample)

                torch.set_num_threads(thread_num)
                try:
                    result_map[method]["latency"], status =\
                        throughput_calculate_helper(latency_sample_num, baseline_time,
                                                    func_test, acce_model, input_sample)
                    if status is False and method != "original":
                        result_map[method]["status"] = "early stopped"
                        torch.set_num_threads(default_threads)
                        continue
                except Exception as e:
                    traceback.print_exc()
                    result_map[method]["status"] = "fail to forward"
                    print(f"----------{method} failed to forward----------")
                    torch.set_num_threads(default_threads)
                    continue

                torch.set_num_threads(default_threads)
                if self._calculate_accuracy:
                    # here we suppose trace don't change accuracy,
                    # so we jump it to reduce time cost of optimize
                    if precision == "fp32" and method != "original":
                        result_map[method]["accuracy"] = "not recomputed"
                    else:
                        if method == "original":
                            # test whether metric works
                            try:
                                result_map[method]["accuracy"] =\
                                    _accuracy_calculate_helper(acce_model, metric,
                                                               validation_data)
                            except Exception as e:
                                traceback.print_exc()
                                self._calculate_accuracy = False
                                invalidInputError(
                                    False,
                                    "Your metric is incompatible with validation_data or don't "
                                    "follow our given pattern. Our expected metric pattern is "
                                    "as follows:\n1. a torchmetrics.Metric object\n2. a callable "
                                    "object which takes prediction and target then returns a value"
                                    " in this calling method `metric(pred, target)`\n3. a callable"
                                    " object that takes model and validation_data (if "
                                    "validation_data is not None) as input, and returns an accuracy"
                                    " value in this calling method metric(model, data_loader) "
                                    "(or metric(model) if validation_data is None).")
                        else:
                            result_map[method]["accuracy"] =\
                                _accuracy_calculate_helper(acce_model, metric,
                                                           validation_data)
                else:
                    result_map[method]["accuracy"] = None

                result_map[method]["model"] = acce_model
                print(f"----------Finish test {method} model "
                      f"({idx+1}/{len(available_dict)})----------")

        self.optimized_model_dict: Dict = result_map
        print("\n\n==========================Optimization Results==========================")

        self._optimize_result = format_optimize_result(self.optimized_model_dict,
                                                       self._calculate_accuracy)
        # save time cost to self._optimize_result
        time_cost = time.perf_counter() - start_time
        time_cost_str = f"Optimization cost {time_cost:.1f}s in total."
        self._optimize_result += time_cost_str
        print(self._optimize_result)
        print("===========================Stop Optimization===========================")

[docs]    @staticmethod
    def quantize(model: nn.Module,
                 precision: str = 'int8',
                 accelerator: Optional[str] = None,
                 use_ipex: bool = False,
                 calib_dataloader: Optional[DataLoader] = None,
                 metric: Optional[Metric] = None,
                 accuracy_criterion: Optional[dict] = None,
                 approach: str = 'static',
                 method: Optional[str] = None,
                 conf: Optional[str] = None,
                 tuning_strategy: Optional[str] = None,
                 timeout: Optional[int] = None,
                 max_trials: Optional[int] = None,
                 input_sample=None,
                 thread_num: Optional[int] = None,
                 onnxruntime_session_options=None,
                 openvino_config=None,
                 simplification: bool = True,
                 sample_size: int = 100,
                 logging: bool = True,
                 **export_kwargs):
        """
        Calibrate a torch.nn.Module for post-training quantization.

        :param model:           A model to be quantized. Model type should be an instance of
                                torch.nn.Module.
        :param precision:       Global precision of quantized model,
                                supported type: 'int8', 'bf16', 'fp16', defaults to 'int8'.
        :param accelerator:     Use accelerator 'None', 'onnxruntime', 'openvino', defaults to None.
                                None means staying in pytorch.
        :param calib_dataloader:    A torch.utils.data.dataloader.DataLoader object for calibration.
                                    Required for static quantization.
                                    It's also used as validation dataloader.
        :param metric:              A torchmetrics.metric.Metric object for evaluation.
        :param accuracy_criterion:  Tolerable accuracy drop, defaults to None meaning no
                                    accuracy control.
                                    accuracy_criterion = {'relative': 0.1, 'higher_is_better': True}
                                    allows relative accuracy loss: 1%. accuracy_criterion =
                                    {'absolute': 0.99, 'higher_is_better':False} means accuracy
                                    must be smaller than 0.99.
        :param approach:    'static' or 'dynamic'.
                            'static': post_training_static_quant,
                            'dynamic': post_training_dynamic_quant.
                            Default: 'static'. OpenVINO supports static mode only.
        :param method:          Method to do quantization. When accelerator=None, supported
            methods: 'fx', 'eager', 'ipex', defaults to 'fx'. If you don't use ipex, suggest using
            'fx' which executes automatic optimizations like fusion. For more information, please
            refer to https://pytorch.org/docs/stable/quantization.html#eager-mode-quantization.
            When accelerator='onnxruntime', supported methods: 'qlinear', 'integer', defaults
            to 'qlinear'. Suggest 'qlinear' for lower accuracy drop if using static quantization.
            More details in https://onnxruntime.ai/docs/performance/quantization.html.
            This argument doesn't take effect for OpenVINO, don't change it for OpenVINO.
        :param conf:        A path to conf yaml file for quantization.
                            Default: None, using default config.
        :param tuning_strategy:    'bayesian', 'basic', 'mse', 'sigopt'. Default: 'bayesian'.
        :param timeout:     Tuning timeout (seconds). Default: None,  which means early stop.
                            Combine with max_trials field to decide when to exit.
        :param max_trials:  Max tune times. Default: None, which means no tuning.
                            Combine with timeout field to decide when to exit.
                            "timeout=0, max_trials=1" means it will try quantization only once and
                            return satisfying best model.
        :param input_sample:      An input example to convert pytorch model into ONNX/OpenVINO.
        :param thread_num: (optional) a int represents how many threads(cores) is needed for
                           inference, only valid for accelerator='onnxruntime'
                           or accelerator='openvino'.
        :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                            accelerator='onnxruntime', otherwise will be ignored.
        :param openvino_config: The config to be inputted in core.compile_model. Only valid when
                                accelerator='openvino', otherwise will be ignored.
        :param simplification: whether we use onnxsim to simplify the ONNX model, only valid when
                               accelerator='onnxruntime', otherwise will be ignored. If this option
                               is set to True, new dependency 'onnxsim' need to be installed.
        :param sample_size: (optional) a int represents how many samples will be used for
                            Post-training Optimization Tools (POT) from OpenVINO toolkit,
                            only valid for accelerator='openvino'. Default to 100.
                            The larger the value, the more accurate the conversion,
                            the lower the performance degradation, but the longer the time.
        :param logging: whether to log detailed information of model conversion, only valid when
                        accelerator='openvino', otherwise will be ignored. Default: ``True``.
        :param **export_kwargs: will be passed to torch.onnx.export function.
        :return:            A accelerated torch.nn.Module if quantization is sucessful.
        """
        if precision == 'bf16':
            if accelerator is None:
                if use_ipex:
                    invalidInputError(not TORCH_VERSION_LESS_1_10,
                                      "torch version should >=1.10 to use ipex")
                    use_jit = (accelerator == "jit")
                    channels_last = export_kwargs["channels_last"] \
                        if "channels_last" in export_kwargs else None
                    return PytorchIPEXJITBF16Model(model, input_sample=input_sample,
                                                   use_ipex=use_ipex, use_jit=use_jit,
                                                   channels_last=channels_last)
                bf16_model = BF16Model(model)
                return bf16_model
            else:
                invalidInputError(False,
                                  "Accelerator {} is invalid for BF16.".format(accelerator))
        if precision == 'int8':
            # transform the dataloader to inc mode
            inc_calib_dataloader =\
                transform_multiple_input_dataloader_to_inc_mode(model,
                                                                calib_dataloader)
            if not accelerator or accelerator == 'onnxruntime':
                method_map = {
                    None: {
                        'fx': 'pytorch_fx',
                        'eager': 'pytorch',
                        'ipex': 'pytorch_ipex',
                        None: 'pytorch_fx'  # default
                    },
                    'onnxruntime': {
                        'qlinear': 'onnxrt_qlinearops',
                        'integer': 'onnxrt_integerops',
                        None: 'onnxrt_qlinearops'  # default
                    }
                }
                framework = method_map[accelerator].get(method, None)
                if accelerator == "onnxruntime":
                    if not type(model).__name__ == 'PytorchONNXRuntimeModel':
                        # try to establish onnx model
                        if input_sample is None:
                            # input_sample can be a dataloader
                            input_sample = calib_dataloader
                        if onnxruntime_session_options is None:
                            import onnxruntime
                            onnxruntime_session_options = onnxruntime.SessionOptions()
                            if thread_num is not None:
                                onnxruntime_session_options.intra_op_num_threads = thread_num
                                onnxruntime_session_options.inter_op_num_threads = thread_num
                        model = InferenceOptimizer.trace(
                            model,
                            input_sample=input_sample,
                            accelerator='onnxruntime',
                            onnxruntime_session_options=onnxruntime_session_options,
                            simplification=simplification,
                            **export_kwargs)
                """
                If accelerator==None, quantized model returned should be an object of PytorchModel
                which is defined by neural-compressor containing a `GraphModule` for inference.
                Otherwise accelerator=='onnxruntime', it returns an ONNXModel object. A supported
                model which is able to run on Pytorch or ONNXRuntime can be fetched by
                `quantized_model.model`.
                """
                return inc_quantize(model, inc_calib_dataloader, metric,
                                    framework=framework,
                                    conf=conf,
                                    approach=approach,
                                    tuning_strategy=tuning_strategy,
                                    accuracy_criterion=accuracy_criterion,
                                    timeout=timeout,
                                    max_trials=max_trials,
                                    onnxruntime_session_options=onnxruntime_session_options)

            elif accelerator == 'openvino':
                model_type = type(model).__name__
                if not model_type == 'PytorchOpenVINOModel':
                    if input_sample is None:
                        # input_sample can be a dataloader
                        input_sample = calib_dataloader
                    model = InferenceOptimizer.trace(model,
                                                     input_sample=input_sample,
                                                     accelerator='openvino',
                                                     thread_num=thread_num,
                                                     logging=logging,
                                                     **export_kwargs)
                invalidInputError(type(model).__name__ == 'PytorchOpenVINOModel',
                                  "Invalid model to quantize. Please use a nn.Module or a model "
                                  "from trainer.trance(accelerator=='openvino')")
                drop_type = None
                higher_is_better = None
                maximal_drop = None
                if metric:
                    if not isinstance(accuracy_criterion, dict):
                        accuracy_criterion = {'relative': 0.99, 'higher_is_better': True}

                    drop_type = 'relative' if 'relative' in accuracy_criterion else 'absolute'
                    higher_is_better = accuracy_criterion.get('higher_is_better', None)
                    maximal_drop = accuracy_criterion.get(drop_type, None)

                kwargs = {
                    "metric": metric,
                    "higher_better": higher_is_better,
                    "drop_type": drop_type,
                    "maximal_drop": maximal_drop,
                    "max_iter_num": max_trials,
                    # TODO following two keys are optional, if there is need, we can add them
                    # "n_requests": None,
                    "sample_size": sample_size
                }
                return model.pot(calib_dataloader, thread_num=thread_num,
                                 config=openvino_config, **kwargs)
            else:
                invalidInputError(False,
                                  "Accelerator {} is invalid.".format(accelerator))
        invalidInputError(False,
                          "Precision {} is invalid.".format(precision))

[docs]    @staticmethod
    def trace(model: nn.Module,
              input_sample=None,
              accelerator: Optional[str] = None,
              use_ipex: bool = False,
              thread_num: Optional[int] = None,
              onnxruntime_session_options=None,
              openvino_config=None,
              simplification: bool = True,
              logging: bool = True,
              **export_kwargs):
        """
        Trace a torch.nn.Module and convert it into an accelerated module for inference.

        For example, this function returns a PytorchOpenVINOModel when accelerator=='openvino'.

        :param model: An torch.nn.Module model, including pl.LightningModule.
        :param input_sample: A set of inputs for trace, defaults to None if you have trace before or
                             model is a LightningModule with any dataloader attached.
        :param accelerator: The accelerator to use, defaults to None meaning staying in Pytorch
                            backend. 'openvino', 'onnxruntime' and 'jit' are supported for now.
        :param use_ipex: whether we use ipex as accelerator for inferencing. default: False.
        :param thread_num: (optional) a int represents how many threads(cores) is needed for
                           inference, only valid for accelerator='onnxruntime'
                           or accelerator='openvino'.
        :param onnxruntime_session_options: The session option for onnxruntime, only valid when
                                            accelerator='onnxruntime', otherwise will be ignored.
        :param openvino_config: The config to be inputted in core.compile_model. Only valid when
                                accelerator='openvino', otherwise will be ignored.
        :param simplification: whether we use onnxsim to simplify the ONNX model, only valid when
                               accelerator='onnxruntime', otherwise will be ignored. If this option
                               is set to True, new dependency 'onnxsim' need to be installed.
        :param logging: whether to log detailed information of model conversion, only valid when
                        accelerator='openvino', otherwise will be ignored. Default: ``True``.
        :param **kwargs: other extra advanced settings include
                         1. those be passed to torch.onnx.export function, only valid when
                         accelerator='onnxruntime'/'openvino', otherwise will be ignored.
                         2. if channels_last is set and `use_ipex=True`, we will transform the
                         data to be channels last according to the setting. Defaultly, channels_last
                         will be set to ``True`` if `use_ipex=True`.
        :return: Model with different acceleration.
        """
        invalidInputError(
            isinstance(model, nn.Module) and not isinstance(model, AcceleratedLightningModule),
            "Expect a nn.Module instance that is not traced or quantized"
            "but got type {}".format(type(model))
        )
        if accelerator == 'openvino':  # openvino backend will not care about ipex usage
            final_openvino_option = {"INFERENCE_PRECISION_HINT": "f32"}
            if openvino_config is not None:
                final_openvino_option.update(openvino_config)
            return PytorchOpenVINOModel(model, input_sample, thread_num, logging,
                                        final_openvino_option, **export_kwargs)
        if accelerator == 'onnxruntime':  # onnxruntime backend will not care about ipex usage
            if onnxruntime_session_options is None:
                import onnxruntime
                onnxruntime_session_options = onnxruntime.SessionOptions()
                if thread_num is not None:
                    onnxruntime_session_options.intra_op_num_threads = thread_num
                    onnxruntime_session_options.inter_op_num_threads = thread_num
            return PytorchONNXRuntimeModel(model, input_sample, onnxruntime_session_options,
                                           simplification=simplification, **export_kwargs)
        if accelerator == 'jit' or use_ipex:
            if use_ipex:
                invalidInputError(not TORCH_VERSION_LESS_1_10,
                                  "torch version should >=1.10 to use ipex")
            use_jit = (accelerator == "jit")
            channels_last = export_kwargs["channels_last"]\
                if "channels_last" in export_kwargs else None
            return PytorchIPEXJITModel(model, input_sample=input_sample, use_ipex=use_ipex,
                                       use_jit=use_jit, channels_last=channels_last)
        invalidInputError(False, "Accelerator {} is invalid.".format(accelerator))

[docs]    @staticmethod
    def save(model: nn.Module, path):
        """
        Save the model to local file.

        :param model: Any model of torch.nn.Module, including all models accelareted by
               Trainer.trace/Trainer.quantize.
        :param path: Path to saved model. Path should be a directory.
        """
        save_model(model, path)

[docs]    @staticmethod
    def load(path, model: Optional[nn.Module] = None):
        """
        Load a model from local.

        :param path: Path to model to be loaded. Path should be a directory.
        :param model: Required FP32 model to load pytorch model, it is needed if you accelerated
               the model with accelerator=None by Trainer.trace/Trainer.quantize. model
               should be set to None if you choose accelerator="onnxruntime"/"openvino"/"jit".
        :return: Model with different acceleration(None/OpenVINO/ONNX Runtime/JIT) or
                 precision(FP32/FP16/BF16/INT8).
        """
        return load_model(path, model)


def _signature_check(function):
    '''
    A quick helper to judge whether input function is following this calling
    method `metric(pred, target)`.
    '''
    import inspect
    sig = inspect.signature(function)
    if len(sig.parameters.values()) < 2:
        return False
    param1_name = list(sig.parameters.values())[0].name
    param2_name = list(sig.parameters.values())[1].name
    if "pred" in param1_name and "target" in param2_name:
        return True
    return False


def _accuracy_calculate_helper(model, metric, data):
    '''
    A quick helper to calculate accuracy
    '''
    if isinstance(metric, Metric) or _signature_check(metric) is True:
        invalidInputError(data is not None,
                          "Validation data can't be None when you pass a "
                          "torchmetrics.Metric object or similar callable "
                          "object which takes prediction and target as input.")
        metric = NanoMetric(metric)
        return metric(model, data)
    else:
        if data is None:
            return metric(model)
        else:
            return metric(model, data)