agent-enviroments/builder/libs/seastar/dpdk/lib/mldev/rte_mldev.h
2024-09-10 17:06:08 +03:00

1143 lines
36 KiB
C

/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2022 Marvell.
*/
#ifndef RTE_MLDEV_H
#define RTE_MLDEV_H
/**
* @file rte_mldev.h
*
* @warning
* @b EXPERIMENTAL:
* All functions in this file may be changed or removed without prior notice.
*
* ML (Machine Learning) device API.
*
* The ML framework is built on the following model:
*
*
* +-----------------+ rte_ml_[en|de]queue_burst()
* | | |
* | Machine o------+ +--------+ |
* | Learning | | | queue | | +------+
* | Inference o------+-----o |<===o===>|Core 0|
* | Engine | | | pair 0 | +------+
* | o----+ | +--------+
* | | | |
* +-----------------+ | | +--------+
* ^ | | | queue | +------+
* | | +-----o |<=======>|Core 1|
* | | | pair 1 | +------+
* | | +--------+
* +--------+--------+ |
* | +-------------+ | | +--------+
* | | Model 0 | | | | queue | +------+
* | +-------------+ | +-------o |<=======>|Core N|
* | +-------------+ | | pair N | +------+
* | | Model 1 | | +--------+
* | +-------------+ |
* | +-------------+ |<------> rte_ml_model_load()
* | | Model .. | |-------> rte_ml_model_info_get()
* | +-------------+ |<------- rte_ml_model_start()
* | +-------------+ |<------- rte_ml_model_stop()
* | | Model N | |<------- rte_ml_model_params_update()
* | +-------------+ |<------- rte_ml_model_unload()
* +-----------------+
*
* ML Device: A hardware or software-based implementation of ML device API for
* running inferences using a pre-trained ML model.
*
* ML Model: An ML model is an algorithm trained over a dataset. A model consists of
* procedure/algorithm and data/pattern required to make predictions on live data.
* Once the model is created and trained outside of the DPDK scope, the model can be loaded
* via rte_ml_model_load() and then start it using rte_ml_model_start() API.
* The rte_ml_model_params_update() can be used to update the model parameters such as weight
* and bias without unloading the model using rte_ml_model_unload().
*
* ML Inference: ML inference is the process of feeding data to the model via
* rte_ml_enqueue_burst() API and use rte_ml_dequeue_burst() API to get the calculated
* outputs/predictions from the started model.
*
* In all functions of the ML device API, the ML device is designated by an
* integer >= 0 named as device identifier *dev_id*.
*
* The functions exported by the ML device API to setup a device designated by
* its device identifier must be invoked in the following order:
*
* - rte_ml_dev_configure()
* - rte_ml_dev_queue_pair_setup()
* - rte_ml_dev_start()
*
* A model is required to run the inference operations with the user specified inputs.
* Application needs to invoke the ML model API in the following order before queueing
* inference jobs.
*
* - rte_ml_model_load()
* - rte_ml_model_start()
*
* A model can be loaded on a device only after the device has been configured and can be
* started or stopped only after a device has been started.
*
* The rte_ml_model_info_get() API is provided to retrieve the information related to the model.
* The information would include the shape and type of input and output required for the inference.
*
* Data quantization and dequantization is one of the main aspects in ML domain. This involves
* conversion of input data from a higher precision to a lower precision data type and vice-versa
* for the output. APIs are provided to enable quantization through rte_ml_io_quantize() and
* dequantization through rte_ml_io_dequantize(). These APIs have the capability to handle input
* and output buffers holding data for multiple batches.
*
* Two utility APIs rte_ml_io_input_size_get() and rte_ml_io_output_size_get() can used to get the
* size of quantized and de-quantized multi-batch input and output buffers.
*
* User can optionally update the model parameters with rte_ml_model_params_update() after
* invoking rte_ml_model_stop() API on a given model ID.
*
* The application can invoke, in any order, the functions exported by the ML API to enqueue
* inference jobs and dequeue inference response.
*
* If the application wants to change the device configuration (i.e., call
* rte_ml_dev_configure() or rte_ml_dev_queue_pair_setup()), then application must stop the
* device using rte_ml_dev_stop() API. Likewise, if model parameters need to be updated then
* the application must call rte_ml_model_stop() followed by rte_ml_model_params_update() API
* for the given model. The application does not need to call rte_ml_dev_stop() API for
* any model re-configuration such as rte_ml_model_params_update(), rte_ml_model_unload() etc.
*
* Once the device is in the start state after invoking rte_ml_dev_start() API and the model is in
* start state after invoking rte_ml_model_start() API, then the application can call
* rte_ml_enqueue_burst() and rte_ml_dequeue_burst() API on the destined device and model ID.
*
* Finally, an application can close an ML device by invoking the rte_ml_dev_close() function.
*
* Typical application utilisation of the ML API will follow the following
* programming flow.
*
* - rte_ml_dev_configure()
* - rte_ml_dev_queue_pair_setup()
* - rte_ml_model_load()
* - rte_ml_dev_start()
* - rte_ml_model_start()
* - rte_ml_model_info_get()
* - rte_ml_enqueue_burst()
* - rte_ml_dequeue_burst()
* - rte_ml_model_stop()
* - rte_ml_model_unload()
* - rte_ml_dev_stop()
* - rte_ml_dev_close()
*
* Regarding multi-threading, by default, all the functions of the ML Device API exported by a PMD
* are lock-free functions which assume to not be invoked in parallel on different logical cores
* on the same target object. For instance, the dequeue function of a poll mode driver cannot be
* invoked in parallel on two logical cores to operate on same queue pair. Of course, this function
* can be invoked in parallel by different logical core on different queue pair.
* It is the responsibility of the user application to enforce this rule.
*/
#include <rte_common.h>
#include <rte_log.h>
#include <rte_mempool.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Logging Macro */
extern int rte_ml_dev_logtype;
#define RTE_MLDEV_LOG(level, fmt, args...) \
rte_log(RTE_LOG_##level, rte_ml_dev_logtype, "%s(): " fmt "\n", __func__, ##args)
#define RTE_ML_STR_MAX 128
/**< Maximum length of name string */
#define RTE_MLDEV_DEFAULT_MAX 32
/** Maximum number of devices if rte_ml_dev_init() is not called. */
/* Device operations */
/**
* Initialize the device array before probing devices. If not called, the first device probed would
* initialize the array to a size of RTE_MLDEV_DEFAULT_MAX.
*
* @param dev_max
* Maximum number of devices.
*
* @return
* 0 on success, -rte_errno otherwise:
* - ENOMEM if out of memory
* - EINVAL if 0 size
* - EBUSY if already initialized
*/
__rte_experimental
int
rte_ml_dev_init(size_t dev_max);
/**
* Get the total number of ML devices that have been successfully initialised.
*
* @return
* - The total number of usable ML devices.
*/
__rte_experimental
uint16_t
rte_ml_dev_count(void);
/**
* Check if the device is in ready state.
*
* @param dev_id
* The identifier of the device.
*
* @return
* - 0 if device state is not in ready state.
* - 1 if device state is ready state.
*/
__rte_experimental
int
rte_ml_dev_is_valid_dev(int16_t dev_id);
/**
* Return the NUMA socket to which a device is connected.
*
* @param dev_id
* The identifier of the device.
*
* @return
* - The NUMA socket id to which the device is connected
* - 0 If the socket could not be determined.
* - -EINVAL: if the dev_id value is not valid.
*/
__rte_experimental
int
rte_ml_dev_socket_id(int16_t dev_id);
/** ML device information */
struct rte_ml_dev_info {
const char *driver_name;
/**< Driver name */
uint16_t max_models;
/**< Maximum number of models supported by the device.
* @see struct rte_ml_dev_config::nb_models
*/
uint16_t max_queue_pairs;
/**< Maximum number of queues pairs supported by the device.
* @see struct rte_ml_dev_config::nb_queue_pairs
*/
uint16_t max_desc;
/**< Maximum allowed number of descriptors for queue pair by the device.
* @see struct rte_ml_dev_qp_conf::nb_desc
*/
uint16_t max_segments;
/**< Maximum number of scatter-gather entries supported by the device.
* @see struct rte_ml_buff_seg struct rte_ml_buff_seg::next
*/
uint16_t min_align_size;
/**< Minimum alignment size of IO buffers used by the device. */
};
/**
* Retrieve the information of the device.
*
* @param dev_id
* The identifier of the device.
* @param dev_info
* A pointer to a structure of type *rte_ml_dev_info* to be filled with the info of the device.
*
* @return
* - 0: Success, driver updates the information of the ML device
* - < 0: Error code returned by the driver info get function.
*/
__rte_experimental
int
rte_ml_dev_info_get(int16_t dev_id, struct rte_ml_dev_info *dev_info);
/** ML device configuration structure */
struct rte_ml_dev_config {
int socket_id;
/**< Socket to allocate resources on. */
uint16_t nb_models;
/**< Number of models to be loaded on the device.
* This value cannot exceed the max_models which is previously provided in
* struct rte_ml_dev_info::max_models
*/
uint16_t nb_queue_pairs;
/**< Number of queue pairs to configure on this device.
* This value cannot exceed the max_models which is previously provided in
* struct rte_ml_dev_info::max_queue_pairs
*/
};
/**
* Configure an ML device.
*
* This function must be invoked first before any other function in the API.
*
* ML Device can be re-configured, when in a stopped state. Device cannot be re-configured after
* rte_ml_dev_close() is called.
*
* The caller may use rte_ml_dev_info_get() to get the capability of each resources available for
* this ML device.
*
* @param dev_id
* The identifier of the device to configure.
* @param config
* The ML device configuration structure.
*
* @return
* - 0: Success, device configured.
* - < 0: Error code returned by the driver configuration function.
*/
__rte_experimental
int
rte_ml_dev_configure(int16_t dev_id, const struct rte_ml_dev_config *config);
/* Forward declaration */
struct rte_ml_op;
/**< Callback function called during rte_ml_dev_stop(), invoked once per flushed ML op */
typedef void (*rte_ml_dev_stop_flush_t)(int16_t dev_id, uint16_t qp_id, struct rte_ml_op *op);
/** ML device queue pair configuration structure. */
struct rte_ml_dev_qp_conf {
uint32_t nb_desc;
/**< Number of descriptors per queue pair.
* This value cannot exceed the max_desc which previously provided in
* struct rte_ml_dev_info:max_desc
*/
rte_ml_dev_stop_flush_t cb;
/**< Callback function called during rte_ml_dev_stop(), invoked once per active ML op.
* Value NULL is allowed, in which case callback will not be invoked.
* This function can be used to properly dispose of outstanding ML ops from all
* queue pairs, for example ops containing memory pointers.
* @see rte_ml_dev_stop()
*/
};
/**
* Set up a queue pair for a device. This should only be called when the device is stopped.
*
* @param dev_id
* The identifier of the device.
* @param queue_pair_id
* The index of the queue pairs to set up. The value must be in the range [0, nb_queue_pairs - 1]
* previously supplied to rte_ml_dev_configure().
* @param qp_conf
* The pointer to the configuration data to be used for the queue pair.
* @param socket_id
* The *socket_id* argument is the socket identifier in case of NUMA.
* The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the memory allocated
* for the queue pair.
*
* @return
* - 0: Success, queue pair correctly set up.
* - < 0: Queue pair configuration failed.
*/
__rte_experimental
int
rte_ml_dev_queue_pair_setup(int16_t dev_id, uint16_t queue_pair_id,
const struct rte_ml_dev_qp_conf *qp_conf, int socket_id);
/**
* Start an ML device.
*
* The device start step consists of setting the configured features and enabling the ML device
* to accept inference jobs.
*
* @param dev_id
* The identifier of the device.
*
* @return
* - 0: Success, device started.
* - <0: Error code of the driver device start function.
*/
__rte_experimental
int
rte_ml_dev_start(int16_t dev_id);
/**
* Stop an ML device. A stopped device cannot accept inference jobs.
* The device can be restarted with a call to rte_ml_dev_start().
*
* @param dev_id
* The identifier of the device.
*
* @return
* - 0: Success, device stopped.
* - <0: Error code of the driver device stop function.
*/
__rte_experimental
int
rte_ml_dev_stop(int16_t dev_id);
/**
* Close an ML device. The device cannot be restarted!
*
* @param dev_id
* The identifier of the device.
*
* @return
* - 0 on successfully closing device.
* - <0 on failure to close device.
*/
__rte_experimental
int
rte_ml_dev_close(int16_t dev_id);
/** Status of ML operation */
enum rte_ml_op_status {
RTE_ML_OP_STATUS_SUCCESS = 0,
/**< Operation completed successfully */
RTE_ML_OP_STATUS_NOT_PROCESSED,
/**< Operation has not yet been processed by the device. */
RTE_ML_OP_STATUS_ERROR,
/**< Operation completed with error.
* Application can invoke rte_ml_op_error_get() to get PMD specific
* error code if needed.
*/
};
/** ML operation's input and output buffer representation as scatter gather list
*/
struct rte_ml_buff_seg {
rte_iova_t iova_addr;
/**< IOVA address of segment buffer. */
void *addr;
/**< Virtual address of segment buffer. */
uint32_t length;
/**< Segment length. */
uint32_t reserved;
/**< Reserved for future use. */
struct rte_ml_buff_seg *next;
/**< Points to next segment. Value NULL represents the last segment. */
};
/**
* ML Operation.
*
* This structure contains data related to performing an ML operation on the buffers using
* the model specified through model_id.
*/
struct rte_ml_op {
uint16_t model_id;
/**< Model ID to be used for the operation. */
uint16_t nb_batches;
/**< Number of batches. Minimum value must be one.
* Input buffer must hold inference data for each batch as contiguous.
*/
uint32_t reserved;
/**< Reserved for future use. */
struct rte_mempool *mempool;
/**< Pool from which operation is allocated. */
struct rte_ml_buff_seg input;
/**< Input buffer to hold the inference data. */
struct rte_ml_buff_seg output;
/**< Output buffer to hold the inference output by the driver. */
RTE_STD_C11
union {
uint64_t user_u64;
/**< User data as uint64_t.*/
void *user_ptr;
/**< User data as void*.*/
};
enum rte_ml_op_status status;
/**< Operation status. */
uint64_t impl_opaque;
/**< Implementation specific opaque value.
* An implementation may use this field to hold
* implementation specific value to share between
* dequeue and enqueue operation.
* The application should not modify this field.
*/
} __rte_cache_aligned;
/* Enqueue/Dequeue operations */
/**
* Enqueue a burst of ML inferences for processing on an ML device.
*
* The rte_ml_enqueue_burst() function is invoked to place ML inference
* operations on the queue *qp_id* of the device designated by its *dev_id*.
*
* The *nb_ops* parameter is the number of inferences to process which are
* supplied in the *ops* array of *rte_ml_op* structures.
*
* The rte_ml_enqueue_burst() function returns the number of inferences it
* actually enqueued for processing. A return value equal to *nb_ops* means that
* all packets have been enqueued.
*
* @param dev_id
* The identifier of the device.
* @param qp_id
* The index of the queue pair which inferences are to be enqueued for processing.
* The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
* *rte_ml_dev_configure*.
* @param ops
* The address of an array of *nb_ops* pointers to *rte_ml_op* structures which contain the
* ML inferences to be processed.
* @param nb_ops
* The number of operations to process.
*
* @return
* The number of inference operations actually enqueued to the ML device.
* The return value can be less than the value of the *nb_ops* parameter when the ML device queue
* is full or if invalid parameters are specified in a *rte_ml_op*.
*/
__rte_experimental
uint16_t
rte_ml_enqueue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
/**
* Dequeue a burst of processed ML inferences operations from a queue on the ML device.
* The dequeued operations are stored in *rte_ml_op* structures whose pointers are supplied
* in the *ops* array.
*
* The rte_ml_dequeue_burst() function returns the number of inferences actually dequeued,
* which is the number of *rte_ml_op* data structures effectively supplied into the *ops* array.
*
* A return value equal to *nb_ops* indicates that the queue contained at least nb_ops* operations,
* and this is likely to signify that other processed operations remain in the devices output queue.
* Application implementing a "retrieve as many processed operations as possible" policy can check
* this specific case and keep invoking the rte_ml_dequeue_burst() function until a value less than
* *nb_ops* is returned.
*
* The rte_ml_dequeue_burst() function does not provide any error notification to avoid
* the corresponding overhead.
*
* @param dev_id
* The identifier of the device.
* @param qp_id
* The index of the queue pair from which to retrieve processed packets.
* The value must be in the range [0, nb_queue_pairs - 1] previously supplied to
* rte_ml_dev_configure().
* @param ops
* The address of an array of pointers to *rte_ml_op* structures that must be large enough to
* store *nb_ops* pointers in it.
* @param nb_ops
* The maximum number of inferences to dequeue.
*
* @return
* The number of operations actually dequeued, which is the number of pointers
* to *rte_ml_op* structures effectively supplied to the *ops* array.
*/
__rte_experimental
uint16_t
rte_ml_dequeue_burst(int16_t dev_id, uint16_t qp_id, struct rte_ml_op **ops, uint16_t nb_ops);
/**
* Verbose error structure definition.
*/
struct rte_ml_op_error {
char message[RTE_ML_STR_MAX]; /**< Human-readable error message. */
uint64_t errcode; /**< Vendor specific error code. */
};
/**
* Get PMD specific error information for an ML op.
*
* When an ML operation completed with RTE_ML_OP_STATUS_ERROR as status,
* This API allows to get PMD specific error details.
*
* @param[in] dev_id
* Device identifier
* @param[in] op
* Handle of ML operation
* @param[in] error
* Address of structure rte_ml_op_error to be filled
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_op_error_get(int16_t dev_id, struct rte_ml_op *op, struct rte_ml_op_error *error);
/* Statistics operations */
/** Device statistics. */
struct rte_ml_dev_stats {
uint64_t enqueued_count;
/**< Count of all operations enqueued */
uint64_t dequeued_count;
/**< Count of all operations dequeued */
uint64_t enqueue_err_count;
/**< Total error count on operations enqueued */
uint64_t dequeue_err_count;
/**< Total error count on operations dequeued */
};
/**
* Retrieve the general I/O statistics of a device.
*
* @param dev_id
* The identifier of the device.
* @param stats
* Pointer to structure to where statistics will be copied.
* On error, this location may or may not have been modified.
* @return
* - 0 on success
* - -EINVAL: If invalid parameter pointer is provided.
*/
__rte_experimental
int
rte_ml_dev_stats_get(int16_t dev_id, struct rte_ml_dev_stats *stats);
/**
* Reset the statistics of a device.
*
* @param dev_id
* The identifier of the device.
*/
__rte_experimental
void
rte_ml_dev_stats_reset(int16_t dev_id);
/**
* Selects the component of the mldev to retrieve statistics from.
*/
enum rte_ml_dev_xstats_mode {
RTE_ML_DEV_XSTATS_DEVICE,
/**< Device xstats */
RTE_ML_DEV_XSTATS_MODEL,
/**< Model xstats */
};
/**
* A name-key lookup element for extended statistics.
*
* This structure is used to map between names and ID numbers for extended ML device statistics.
*/
struct rte_ml_dev_xstats_map {
uint16_t id;
/**< xstat identifier */
char name[RTE_ML_STR_MAX];
/**< xstat name */
};
/**
* Retrieve names of extended statistics of an ML device.
*
* @param dev_id
* The identifier of the device.
* @param mode
* Mode of statistics to retrieve. Choices include the device statistics and model statistics.
* @param model_id
* Used to specify the model number in model mode, and is ignored in device mode.
* @param[out] xstats_map
* Block of memory to insert names and ids into. Must be at least size in capacity. If set to
* NULL, function returns required capacity. The id values returned can be passed to
* *rte_ml_dev_xstats_get* to select statistics.
* @param size
* Capacity of xstats_names (number of xstats_map).
* @return
* - Positive value lower or equal to size: success. The return value is the number of entries
* filled in the stats table.
* - Positive value higher than size: error, the given statistics table is too small. The return
* value corresponds to the size that should be given to succeed. The entries in the table are not
* valid and shall not be used by the caller.
* - Negative value on error:
* -ENODEV for invalid *dev_id*.
* -EINVAL for invalid mode, model parameters.
* -ENOTSUP if the device doesn't support this function.
*/
__rte_experimental
int
rte_ml_dev_xstats_names_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
struct rte_ml_dev_xstats_map *xstats_map, uint32_t size);
/**
* Retrieve the value of a single stat by requesting it by name.
*
* @param dev_id
* The identifier of the device.
* @param name
* Name of stat name to retrieve.
* @param[out] stat_id
* If non-NULL, the numerical id of the stat will be returned, so that further requests for the
* stat can be got using rte_ml_dev_xstats_get, which will be faster as it doesn't need to scan a
* list of names for the stat. If the stat cannot be found, the id returned will be (unsigned)-1.
* @param[out] value
* Value of the stat to be returned.
* @return
* - Zero: No error.
* - Negative value: -EINVAL if stat not found, -ENOTSUP if not supported.
*/
__rte_experimental
int
rte_ml_dev_xstats_by_name_get(int16_t dev_id, const char *name, uint16_t *stat_id, uint64_t *value);
/**
* Retrieve extended statistics of an ML device.
*
* @param dev_id
* The identifier of the device.
* @param mode
* Mode of statistics to retrieve. Choices include the device statistics and model statistics.
* @param model_id
* Used to specify the model id in model mode, and is ignored in device mode.
* @param stat_ids
* ID numbers of the stats to get. The ids can be got from the stat position in the stat list from
* rte_ml_dev_xstats_names_get(), or by using rte_ml_dev_xstats_by_name_get().
* @param[out] values
* Values for each stats request by ID.
* @param nb_ids
* Number of stats requested.
* @return
* - Positive value: number of stat entries filled into the values array
* - Negative value on error:
* -ENODEV for invalid *dev_id*.
* -EINVAL for invalid mode, model id or stat id parameters.
* -ENOTSUP if the device doesn't support this function.
*/
__rte_experimental
int
rte_ml_dev_xstats_get(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
const uint16_t stat_ids[], uint64_t values[], uint16_t nb_ids);
/**
* Reset the values of the xstats of the selected component in the device.
*
* @param dev_id
* The identifier of the device.
* @param mode
* Mode of the statistics to reset. Choose from device or model.
* @param model_id
* Model stats to reset. 0 and positive values select models, while -1 indicates all models.
* @param stat_ids
* Selects specific statistics to be reset. When NULL, all statistics selected by *mode* will be
* reset. If non-NULL, must point to array of at least *nb_ids* size.
* @param nb_ids
* The number of ids available from the *ids* array. Ignored when ids is NULL.
* @return
* - Zero: successfully reset the statistics.
* - Negative value: -EINVAL invalid parameters, -ENOTSUP if not supported.
*/
__rte_experimental
int
rte_ml_dev_xstats_reset(int16_t dev_id, enum rte_ml_dev_xstats_mode mode, int32_t model_id,
const uint16_t stat_ids[], uint16_t nb_ids);
/**
* Dump internal information about *dev_id* to the FILE* provided in *fd*.
*
* @param dev_id
* The identifier of the device.
* @param fd
* A pointer to a file for output.
* @return
* - 0: on success.
* - <0: on failure.
*/
__rte_experimental
int
rte_ml_dev_dump(int16_t dev_id, FILE *fd);
/**
* Trigger the ML device self test.
*
* @param dev_id
* The identifier of the device.
* @return
* - 0: Selftest successful.
* - -ENOTSUP: if the device doesn't support selftest.
* - other values < 0 on failure.
*/
__rte_experimental
int
rte_ml_dev_selftest(int16_t dev_id);
/* Model operations */
/** ML model load parameters
*
* Parameters required to load an ML model.
*/
struct rte_ml_model_params {
void *addr;
/**< Address of model buffer */
size_t size;
/**< Size of model buffer */
};
/**
* Load an ML model to the device.
*
* Load an ML model to the device with parameters requested in the structure rte_ml_model_params.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] params
* Parameters for the model to be loaded.
* @param[out] model_id
* Identifier of the model loaded.
*
* @return
* - 0: Success, Model loaded.
* - < 0: Failure, Error code of the model load driver function.
*/
__rte_experimental
int
rte_ml_model_load(int16_t dev_id, struct rte_ml_model_params *params, uint16_t *model_id);
/**
* Unload an ML model from the device.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier of the model to be unloaded.
*
* @return
* - 0: Success, Model unloaded.
* - < 0: Failure, Error code of the model unload driver function.
*/
__rte_experimental
int
rte_ml_model_unload(int16_t dev_id, uint16_t model_id);
/**
* Start an ML model for the given device ID.
*
* Start an ML model to accept inference requests.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier of the model to be started.
*
* @return
* - 0: Success, Model loaded.
* - < 0: Failure, Error code of the model start driver function.
*/
__rte_experimental
int
rte_ml_model_start(int16_t dev_id, uint16_t model_id);
/**
* Stop an ML model for the given device ID.
*
* Model stop would disable the ML model to be used for inference jobs.
* All inference jobs must have been completed before model stop is attempted.
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier of the model to be stopped.
*
* @return
* - 0: Success, Model unloaded.
* - < 0: Failure, Error code of the model stop driver function.
*/
__rte_experimental
int
rte_ml_model_stop(int16_t dev_id, uint16_t model_id);
/**
* Input and output data types. ML models can operate on reduced precision
* datatypes to achieve better power efficiency, lower network latency and lower memory footprint.
* This enum is used to represent the lower precision integer and floating point types used
* by ML models.
*/
enum rte_ml_io_type {
RTE_ML_IO_TYPE_UNKNOWN = 0,
/**< Invalid or unknown type */
RTE_ML_IO_TYPE_INT8,
/**< 8-bit integer */
RTE_ML_IO_TYPE_UINT8,
/**< 8-bit unsigned integer */
RTE_ML_IO_TYPE_INT16,
/**< 16-bit integer */
RTE_ML_IO_TYPE_UINT16,
/**< 16-bit unsigned integer */
RTE_ML_IO_TYPE_INT32,
/**< 32-bit integer */
RTE_ML_IO_TYPE_UINT32,
/**< 32-bit unsigned integer */
RTE_ML_IO_TYPE_FP8,
/**< 8-bit floating point number */
RTE_ML_IO_TYPE_FP16,
/**< IEEE 754 16-bit floating point number */
RTE_ML_IO_TYPE_FP32,
/**< IEEE 754 32-bit floating point number */
RTE_ML_IO_TYPE_BFLOAT16
/**< 16-bit brain floating point number. */
};
/**
* Input and output format. This is used to represent the encoding type of multi-dimensional
* used by ML models.
*/
enum rte_ml_io_format {
RTE_ML_IO_FORMAT_NCHW = 1,
/**< Batch size (N) x channels (C) x height (H) x width (W) */
RTE_ML_IO_FORMAT_NHWC,
/**< Batch size (N) x height (H) x width (W) x channels (C) */
RTE_ML_IO_FORMAT_CHWN,
/**< Channels (C) x height (H) x width (W) x batch size (N) */
RTE_ML_IO_FORMAT_3D,
/**< Format to represent a 3 dimensional data */
RTE_ML_IO_FORMAT_2D,
/**< Format to represent matrix data */
RTE_ML_IO_FORMAT_1D,
/**< Format to represent vector data */
RTE_ML_IO_FORMAT_SCALAR,
/**< Format to represent scalar data */
};
/**
* Input and output shape. This structure represents the encoding format and dimensions
* of the tensor or vector.
*
* The data can be a 4D / 3D tensor, matrix, vector or a scalar. Number of dimensions used
* for the data would depend on the format. Unused dimensions to be set to 1.
*/
struct rte_ml_io_shape {
enum rte_ml_io_format format;
/**< Format of the data */
uint32_t w;
/**< First dimension */
uint32_t x;
/**< Second dimension */
uint32_t y;
/**< Third dimension */
uint32_t z;
/**< Fourth dimension */
};
/** Input and output data information structure
*
* Specifies the type and shape of input and output data.
*/
struct rte_ml_io_info {
char name[RTE_ML_STR_MAX];
/**< Name of data */
struct rte_ml_io_shape shape;
/**< Shape of data */
enum rte_ml_io_type qtype;
/**< Type of quantized data */
enum rte_ml_io_type dtype;
/**< Type of de-quantized data */
};
/** Model information structure */
struct rte_ml_model_info {
char name[RTE_ML_STR_MAX];
/**< Model name. */
char version[RTE_ML_STR_MAX];
/**< Model version */
uint16_t model_id;
/**< Model ID */
uint16_t device_id;
/**< Device ID */
uint16_t batch_size;
/**< Maximum number of batches that the model can process simultaneously */
uint32_t nb_inputs;
/**< Number of inputs */
const struct rte_ml_io_info *input_info;
/**< Input info array. Array size is equal to nb_inputs */
uint32_t nb_outputs;
/**< Number of outputs */
const struct rte_ml_io_info *output_info;
/**< Output info array. Array size is equal to nb_output */
uint64_t wb_size;
/**< Size of model weights and bias */
};
/**
* Get ML model information.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model created
* @param[out] model_info
* Pointer to a model info structure
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_model_info_get(int16_t dev_id, uint16_t model_id, struct rte_ml_model_info *model_info);
/**
* Update the model parameters without unloading model.
*
* Update model parameters such as weights and bias without unloading the model.
* rte_ml_model_stop() must be called before invoking this API.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model created
* @param[in] buffer
* Pointer to the model weights and bias buffer.
* Size of the buffer is equal to wb_size returned in *rte_ml_model_info*.
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_model_params_update(int16_t dev_id, uint16_t model_id, void *buffer);
/* IO operations */
/**
* Get size of quantized and dequantized input buffers.
*
* Calculate the size of buffers required for quantized and dequantized input data.
* This API would return the buffer sizes for the number of batches provided and would
* consider the alignment requirements as per the PMD. Input sizes computed by this API can
* be used by the application to allocate buffers.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model created
* @param[in] nb_batches
* Number of batches of input to be processed in a single inference job
* @param[out] input_qsize
* Quantized input size pointer.
* NULL value is allowed, in which case input_qsize is not calculated by the driver.
* @param[out] input_dsize
* Dequantized input size pointer.
* NULL value is allowed, in which case input_dsize is not calculated by the driver.
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_io_input_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
uint64_t *input_qsize, uint64_t *input_dsize);
/**
* Get size of quantized and dequantized output buffers.
*
* Calculate the size of buffers required for quantized and dequantized output data.
* This API would return the buffer sizes for the number of batches provided and would consider
* the alignment requirements as per the PMD. Output sizes computed by this API can be used by the
* application to allocate buffers.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model created
* @param[in] nb_batches
* Number of batches of input to be processed in a single inference job
* @param[out] output_qsize
* Quantized output size pointer.
* NULL value is allowed, in which case output_qsize is not calculated by the driver.
* @param[out] output_dsize
* Dequantized output size pointer.
* NULL value is allowed, in which case output_dsize is not calculated by the driver.
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_io_output_size_get(int16_t dev_id, uint16_t model_id, uint32_t nb_batches,
uint64_t *output_qsize, uint64_t *output_dsize);
/**
* Quantize input data.
*
* Quantization converts data from a higher precision types to a lower precision types to improve
* the throughput and efficiency of the model execution with minimal loss of accuracy.
* Types of dequantized data and quantized data are specified by the model.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model
* @param[in] nb_batches
* Number of batches in the dequantized input buffer
* @param[in] dbuffer
* Address of dequantized input data
* @param[in] qbuffer
* Address of quantized input data
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_io_quantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *dbuffer,
void *qbuffer);
/**
* Dequantize output data.
*
* Dequantization converts data from a lower precision type to a higher precision type.
* Types of quantized data and dequantized are specified by the model.
*
* @param[in] dev_id
* The identifier of the device.
* @param[in] model_id
* Identifier for the model
* @param[in] nb_batches
* Number of batches in the dequantized output buffer
* @param[in] qbuffer
* Address of quantized output data
* @param[in] dbuffer
* Address of dequantized output data
*
* @return
* - Returns 0 on success
* - Returns negative value on failure
*/
__rte_experimental
int
rte_ml_io_dequantize(int16_t dev_id, uint16_t model_id, uint16_t nb_batches, void *qbuffer,
void *dbuffer);
/* ML op pool operations */
/**
* Create an ML operation pool
*
* @param name
* ML operations pool name
* @param nb_elts
* Number of elements in pool
* @param cache_size
* Number of elements to cache on lcore, see
* *rte_mempool_create* for further details about cache size
* @param user_size
* Size of private data to allocate for user with each operation
* @param socket_id
* Socket to identifier allocate memory on
* @return
* - On success pointer to mempool
* - On failure NULL
*/
__rte_experimental
struct rte_mempool *
rte_ml_op_pool_create(const char *name, unsigned int nb_elts, unsigned int cache_size,
uint16_t user_size, int socket_id);
/**
* Free an ML operation pool
*
* @param mempool
* A pointer to the mempool structure.
* If NULL then, the function does nothing.
*/
__rte_experimental
void
rte_ml_op_pool_free(struct rte_mempool *mempool);
#ifdef __cplusplus
}
#endif
#endif /* RTE_MLDEV_H */