amlnn-model-playground/dependency/nnsdk/include/nn_sdk.h
2026-01-06 10:29:54 +08:00

1103 lines
No EOL
32 KiB
C
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef _NN_SDK_H
#define _NN_SDK_H
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/*=============================================================
Macro Definitions
==============================================================*/
#define AML_NN_SDK_VERSION "2.8.5"
#define MAX_NAME_LENGTH 64
#define INPUT_MAX_NUM 64
#define OUTPUT_MAX_NUM 64
#define INPUT_CHANNEL 3
#define MAX_DETECT_NUM 230
#define SUPPORT_NET_NUM 60
#define ADDRESS_MAX_NUM 64
#define MAX_TENSOR_NUM_DIMS 6
#define INPUT_META_NUM 1
#define DELEGATE_MAX_INPUT_NUM 128
#define DELEGATE_MAX_OUTPUT_NUM 128
/*=====================================================
the common type for sdk api
======================================================*/
typedef enum {
AML_IN_PICTURE = 0,
AML_IN_VIDEO = 1,
AML_IN_CAMERA = 2
} amlnn_input_mode_t;
typedef enum _amlnn_model_ {
CAFFE = 0,
TENSORFLOW,
TENSORFLOWLITE,
DARKNET,
ONNX,
KERAS,
PYTORCH,
ADLA_LOADABLE,
MEDEL_MAX
} amlnn_model_type;
/*=================================================================================
load adla type
==================================================================================*/
typedef enum _amlnn_nbg_type_ {
NN_NBG_FILE = 0,
NN_NBG_MEMORY = 1,
NN_RUNTIME_FILE = 2,
NN_RUNTIME_MEMORY = 3,
NN_ADLA_FILE = 4,
NN_ADLA_MEMORY = 5
} amlnn_nbg_type;
typedef enum _amlnn_input_ {
RGB24_RAW_DATA = 0,
TENSOR_RAW_DATA,
QTENSOR_RAW_DATA,
BINARY_RAW_DATA,
INPUT_DMA_DATA,
INPUT_DMA_SECURE_DATA,
NV12_RAW_DATA,
RAW_DATA_MAX
} amlnn_input_type;
typedef enum _nn_buffer_format_e
{
/*! \brief A float type of buffer data */
NN_BUFFER_FORMAT_FP32 = 0,
/*! \brief A half float type of buffer data */
NN_BUFFER_FORMAT_FP16 = 1,
/*! \brief A 8 bit unsigned integer type of buffer data */
NN_BUFFER_FORMAT_UINT8 = 2,
/*! \brief A 8 bit signed integer type of buffer data */
NN_BUFFER_FORMAT_INT8 = 3,
/*! \brief A 16 bit unsigned integer type of buffer data */
NN_BUFFER_FORMAT_UINT16 = 4,
/*! \brief A 16 signed integer type of buffer data */
NN_BUFFER_FORMAT_INT16 = 5,
/*! \brief A 32 signed integer type of buffer data */
NN_BUFFER_FORMAT_INT32 = 8,
/*! \brief A 64 signed integer type of buffer data */
NN_BUFFER_FORMAT_INT64 = 10,
/*! \brief A bool type of buffer data */
NN_BUFFER_FORMAT_BOOL = 11,
} nn_buffer_format_e;
typedef enum _nn_buffer_quantize_format_e
{
/*! \brief Not quantized format */
NN_BUFFER_QUANTIZE_NONE = 0,
/*! \brief The data is quantized with dynamic fixed point */
NN_BUFFER_QUANTIZE_DYNAMIC_FIXED_POINT = 1,
/*! \brief The data is quantized with TF asymmetric format */
NN_BUFFER_QUANTIZE_TF_ASYMM = 2
} nn_buffer_quantize_format_e;
typedef enum {
IMAGE_CLASSIFY = 0,
OBJECT_DETECT = 1, /// object_detection
FACE_DETECTION = 2, ///
FACE_LANDMARK_5 = 3, /// landmark 5 points
FACE_LANDMARK_68 = 4, ///< Face Landmark (68 KeyPoints)
FACE_RECOGNIZE = 5, ///< Face Recognition
FACE_COMPARISON = 6, ///< Face compare,whether same person
FACE_AGE = 7, ///< Face age
FACE_GENDER = 8, ///< Face gender
FACE_EMOTION = 9, ///< Face emotion
BODY_POSE = 10, ///< Body Pose
FINGER_POSE = 11, ///< Finger Landmark(21 KeyPoint)
HEAD_DETECTION = 12, ///< Head Detection
CARPLATE_DETECTION = 13, ///< Car Plate Detection
CARPLATE_RECOG = 14, ///< Car Plate Recognition
TEXT_DETECTION = 15, ///< text region detect
IMAGE_SR = 16, ///< image SR
IMAGE_SEGMENTATION = 17, ///< image segment, based on icnet
PERSON_DETECT = 18, ///< person detect
YOLOFACE_V2 = 19,
YOLO_V2 = 20,
YOLO_V3 = 21,
FACE_NET = 22,
FACE_RECOG_U = 23,
FACE_RFB_DETECTION = 24,
AML_PERSON_DETECT = 25,
AML_BODY_POSE = 26,
mobilenetv2 = 30,
centerface = 31,
ALPD = 32,
ALPR = 33,
face_blur_v2 = 34,
face_recog_small = 35,
face_reid_v2 = 36,
landmark2d_106 = 37,
new_car = 38,
old_person = 39,
CUSTOM_NETWORK = 99, ///< custom network, for user development
MODEL_MAX = 100 ///< max model number
} aml_module_t;
typedef enum {
AML_OUTDATA_FLOAT32 = 0,
AML_OUTDATA_RAW = 1,
AML_OUTDATA_DMA = 2,
AML_OUTDATA_DMA_SECURE = 3
} aml_output_format_t;
typedef enum {
AML_NO_PERF = 0,
AML_PERF_INFERENCE = 1,
AML_PERF_OUTPUT_GET = 2,
AML_PERF_OUTPUT_SET = 3,
AML_PERF_RESET_TRANSFORMER = 4,
AML_PERF_BREAK_TRANSFORMER = 5
} aml_perf_mode_t;
typedef enum {
AML_PROFILE_NONE = 0,
AML_PROFILE_PERFORMANCE = 1,
AML_PROFILE_BANDWIDTH = 2,
AML_PROFILE_MEMORY = 3,
AML_PERLAYER_RUNTIME = 4,
AML_PERLAYER_BANDWIDTH = 5,
AML_PERLAYER_OUTPUT = 6,
AML_PERLAYER_INPUT = 7
} aml_profile_type_t;
typedef enum {
AML_PERFORMANCE_MODE = 1,
AML_POWER_SAVE_MODE = 2,
AML_MINIMUM_POWER_MODE = 3
} aml_policy_type_t;
typedef enum {
AML_IO_VIRTUAL = 0,
AML_IO_PHYS = 1,
AML_IO_VIRTUAL_SECURE = 2,
AML_IO_PHYS_SECURE = 3
} aml_io_format_t;
typedef enum {
AML_OUTPUT_ORDER_DEFAULT = 0, //output format: nhwc for adla, nchw for galcore
AML_OUTPUT_ORDER_NHWC = 1, //output format: nhwc
AML_OUTPUT_ORDER_NCHW = 2, //output format: nchw
} aml_output_order_t;
typedef struct __aml_kvcache_dynamic_val_t
{
int32_t current_mask;
} aml_kvcache_dynamic_val_t;
typedef struct __kvCacheDynamicInfo_t
{
bool update_kvcache_info_flag;
aml_kvcache_dynamic_val_t kvcache_dynamic_val;
} kvCacheDynamicInfo_t;
typedef struct __aml_transformer_model_info
{
uint64_t eos_token;
int32_t max_sequence_length;
} aml_transformer_model_info;
typedef struct __aml_invoke_info_t
{
int typeSize;
int invoke_type; // 1: invoke_no_wait, 2: waitwithid
int32_t timeout; //ms
int64_t invoke_id;
kvCacheDynamicInfo_t kvcache_dynamic_info;
} aml_invoke_info_t;
typedef struct __amlnn_module_out_data_t
{
int typeSize;
aml_module_t mdType;
aml_perf_mode_t perfMode;
aml_output_format_t format;
aml_output_order_t order;
aml_invoke_info_t invoke;
} aml_output_config_t;
typedef struct _nn_buffer_create_params_t
{
unsigned int num_of_dims; /*!< \brief The number of dimensions specified in *sizes*/
unsigned int sizes[4]; /*!< \brief The pointer to an array of dimension */
nn_buffer_format_e data_format; /*!< \brief Data format for the tensor, see <tt>\ref nn_buffer_format_e </tt> */
nn_buffer_quantize_format_e quant_format; /*!< \brief Quantized format see <tt>\ref nn_buffer_quantize_format_e </tt>. */
union {
struct {
unsigned char fixed_point_pos; /*!< \brief Specifies the fixed point position when the input element type is int16, if 0 calculations are performed in integer math */
} dfp;
struct {
float scale; /*!< \brief Scale value for the quantized value */
unsigned int zeroPoint; /*!< \brief A 32 bit integer, in range [0, 255] */
} affine;
}
quant_data; /*<! \brief The union of quantization information */
} nn_buffer_params_t;
typedef enum {
AML_INPUT_DEFAULT = 0, //channel format: caffe 2 1 0 ,others 0 1 2
AML_INPUT_MODEL_NHWC = 1, //channel format: 0 1 2
AML_INPUT_MODEL_NCHW = 2, //channel format: 2 1 0
} aml_input_format_t;
typedef struct out_buf
{
unsigned int size;
char name[MAX_NAME_LENGTH]; //output tensor name
unsigned char *buf;
nn_buffer_params_t *param;
aml_output_format_t out_format;
int output_valid_length;
} outBuf_t;
typedef struct __nnout
{
int typeSize;
unsigned int num; /*===========output tensor number============*/
outBuf_t out[OUTPUT_MAX_NUM];
} nn_output;
typedef enum {
AML_INPUT_U8,
AML_INPUT_I8,
AML_INPUT_FP32,
} aml_input_data_type_t;
typedef struct
{
int valid;
int int16_type;
int preprocess_debug;
float mean[INPUT_CHANNEL];
float scale;
aml_input_format_t input_format;
aml_input_data_type_t input_data_type;
}input_info;
typedef struct __nn_input
{
int typeSize;
int input_index;
int size;
unsigned char* input;
amlnn_input_type input_type;
input_info info;
int subgraph_index;
int input_valid_length;
}nn_input;
typedef struct __assign_address
{
unsigned int inAddr_size;
unsigned int outAddr_size;
aml_io_format_t io_type;
unsigned char* inAddr[ADDRESS_MAX_NUM];
unsigned char* outAddr[ADDRESS_MAX_NUM];
}assign_user_address_t;
typedef enum __enCoreId
{
AML_ID_0 = 0,
AML_ID_1 = 1,
AML_ID_2 = 2,
AML_ID_3 = 3,
AML_ID_4 = 4,
AML_ID_BUTT
} aml_encore_id;
typedef enum __aml_runtime_mode_t
{
AML_RUNTIME_NNRT = 0,
AML_RUNTIME_OVX = 1,
AML_RUNTIME_TIM_VX = 2,
RUN_TIME_MODE_BUTT
} aml_runtime_mode_t;
typedef enum {
AML_WITH_CACHE = 0,
AML_WITHOUT_CACHE = 1,
} aml_cache_type_t;
typedef enum {
AML_Add = 0,
AML_AveragePool2d = 1,
AML_Concatenation = 2,
AML_Conv2d = 3,
AML_DepthwiseConv2d = 4,
AML_DepthToSpace = 5,
AML_Dequantize = 6,
AML_EmbeddingLookup = 7,
AML_Floor = 8,
AML_FullyConnected = 9,
AML_HashtableLookup = 10,
AML_L2Normalization = 11,
AML_L2Pool2d = 12,
AML_LocalResponseNormalization = 13,
AML_Logistic = 14,
AML_LshProjection = 15,
AML_Lstm = 16,
AML_MaxPool2d = 17,
AML_Mul = 18,
AML_Relu = 19,
AML_ReluN1To1 = 20,
AML_Relu6 = 21,
AML_Reshape = 22,
AML_ResizeBilinear = 23,
AML_Rnn = 24,
AML_Softmax = 25,
AML_SpaceToDepth = 26,
AML_Svdf = 27,
AML_Tanh = 28,
AML_ConcatEmbeddings = 29,
AML_SkipGram = 30,
AML_Call = 31,
AML_Custom = 32,
AML_EmbeddingLookupSparse = 33,
AML_Pad = 34,
AML_UnidirectionalSequenceRnn = 35,
AML_Gather = 36,
AML_BatchToSpaceNd = 37,
AML_SpaceToBatchNd = 38,
AML_Transpose = 39,
AML_Mean = 40,
AML_Sub = 41,
AML_Div = 42,
AML_Squeeze = 43,
AML_UnidirectionalSequenceLstm = 44,
AML_StridedSlice = 45,
AML_BidirectionalSequenceRnn = 46,
AML_Exp = 47,
AML_TopkV2 = 48,
AML_Split = 49,
AML_LogSoftmax = 50,
AML_Delegate = 51,
AML_BidirectionalSequenceLstm = 52,
AML_Cast = 53,
AML_Prelu = 54,
AML_Maximum = 55,
AML_ArgMax = 56,
AML_Minimum = 57,
AML_Less = 58,
AML_Neg = 59,
AML_PadV2 = 60,
AML_Greater = 61,
AML_GreaterEqual = 62,
AML_LessEqual = 63,
AML_Select = 64,
AML_Slice = 65,
AML_Sin = 66,
AML_TransposeConv = 67,
AML_SparseToDense = 68,
AML_Tile = 69,
AML_ExpandDims = 70,
AML_Equal = 71,
AML_NotEqual = 72,
AML_Log = 73,
AML_Sum = 74,
AML_Sqrt = 75,
AML_Rsqrt = 76,
AML_Shape = 77,
AML_Pow = 78,
AML_ArgMin = 79,
AML_FakeQuant = 80,
AML_ReduceProd = 81,
AML_ReduceMax = 82,
AML_Pack = 83,
AML_LogicalOr = 84,
AML_OneHot = 85,
AML_LogicalAnd = 86,
AML_LogicalNot = 87,
AML_Unpack = 88,
AML_ReduceMin = 89,
AML_FloorDiv = 90,
AML_ReduceAny = 91,
AML_Square = 92,
AML_ZerosLike = 93,
AML_Fill = 94,
AML_FloorMod = 95,
AML_Range = 96,
AML_ResizeNearestNeighbor = 97,
AML_LeakyRelu = 98,
AML_SquaredDifference = 99,
AML_MirrorPad = 100,
AML_Abs = 101,
AML_SplitV = 102,
AML_Unique = 103,
AML_Ceil = 104,
AML_ReverseV2 = 105,
AML_AddN = 106,
AML_GatherNd = 107,
AML_Cos = 108,
AML_Where = 109,
AML_Rank = 110,
AML_Elu = 111,
AML_ReverseSequence = 112,
AML_MatrixDiag = 113,
AML_Quantize = 114,
AML_MatrixSetDiag = 115,
AML_Round = 116,
AML_HardSwish = 117,
AML_If = 118,
AML_While = 119,
AML_NonMaxSuppressionV4 = 120,
AML_NonMaxSuppressionV5 = 121,
AML_ScatterNd = 122,
AML_SelectV2 = 123,
AML_Densify = 124,
AML_SegmentSum = 125,
AML_BatchMatmul = 126,
AML_PlaceholderForGreaterOpCodes = 127,
AML_Cumsum = 128,
AML_CallOnce = 129,
AML_BroadcastTo = 130,
AML_Rfft2d = 131,
AML_Conv3d = 132,
AML_Imag = 133,
AML_Real = 134,
AML_ComplexAbs = 135,
AML_Hashtable = 136,
AML_HashtableFind = 137,
AML_HashtableImport = 138,
AML_HashtableSize = 139,
AML_ReduceAll = 140,
AML_Conv3dTranspose = 141,
AML_VarHandle = 142,
AML_ReadVariable = 143,
AML_AssignVariable = 144,
AML_BroadcastArgs = 145,
AML_RandomStandardNormal = 146,
AML_Bucketize = 147,
AML_RandomUniform = 148,
AML_Multinomial = 149,
AML_Gelu = 150,
AML_DynamicUpdateSlice = 151,
AML_Relu0To1 = 152,
AML_UnsortedSegmentProd = 153,
AML_UnsortedSegmentMax = 154,
AML_UnsortedSegmentSum = 155,
AML_Atan2 = 156,
AML_UnsortedSegmentMin = 157,
AML_Sign = 158,
AML_Bitcast = 159,
AML_BitwiseXor = 160,
AML_RightShift = 161,
AML_DetectionPostProcess = 256,
AML_Erf = 260,
AML_Hardware = 511,
AML_Unknown = 2147483647,
AML_MIN = AML_Add,
AML_MAX = AML_Unknown
} aml_operator_t;
typedef struct __aml_openmp_opt_t {
aml_operator_t operator_type;
bool enable_openmp;
bool involve_all_ops; // enable openmp for all operators.
int8_t openmp_num;
} aml_openmp_opt_t;
typedef struct __aml_neon_opt_t {
aml_operator_t operator_type;
bool enable_neon;
bool involve_all_ops;
} aml_neon_opt_t;
typedef struct __softOpInfo_t {
bool set_openmp_opt_flag;
int openmp_opt_num;
aml_openmp_opt_t* openmp_opt;
bool set_neon_opt_flag;
int neon_opt_num;
aml_neon_opt_t* neon_opt;
} softOpInfo_t;
typedef enum __aml_kvcache_type_t
{
KVCompute_Prune = 1,
KVTransformer_Accel = 2
} aml_kvcache_type_t;
typedef struct __aml_kvcache_opt_t {
int32_t operator_index;
bool enable_kvcache; // enable skipping invalid vector computations outside the range of ADLA_KVCACHE_DYNAMIC_VAL.current_mask.
bool zero_out_invalid_value; // set output tensors partial values to zero outside the range of ADLA_KVCACHE_DYNAMIC_VAL.current_mask,
// When the software operator(enable skip) is followed by a operator(disable skip), it must be set to true to ensure that the result is correct.
int8_t active_axis;
int32_t active_axis_size;
} aml_kvcache_opt_t;
typedef struct __kvCacheInfo_t {
bool set_kvcache_opt_flag;
int kvcache_opt_num;
aml_kvcache_opt_t* kvcache_opt;
} kvCacheInfo_t;
typedef struct __aml_forward_ctrl_t
{
aml_encore_id enCoreId; /* device target which running the seg*/
int64_t invoke_id;
int32_t timeout_ms;
softOpInfo_t softop_info;
aml_kvcache_type_t kvcache_type;
kvCacheInfo_t kvcache_info;
} aml_forward_ctrl_t;
typedef enum __aml_model_type_t
{
AML_MODEL_TYPE_ADLA_LOADABLE = 0,
AML_MODEL_TYPE_TENSORFLOW,
AML_MODEL_TYPE_TENSORFLOW_LITE
} aml_model_type_t;
typedef enum __aml_model_in_out_type_t
{
AML_MODEL_IN_OUT_TYPE_MEMORY = 0,
AML_MODEL_IN_OUT_TYPE_FILE
} aml_model_in_out_type_t;
typedef enum __aml_compiler_optimization_mode_t
{
AML_COMPILER_OPTIMIZATION_MODE_FAST = 0,
AML_COMPILER_OPTIMIZATION_MODE_PRECISE
} aml_compiler_optimization_mode_t;
typedef struct __aml_compiler_input_t
{
aml_model_type_t model_type;
aml_model_in_out_type_t input_type;
const void* model_data;
size_t model_size;
const char* model_path;
} aml_compiler_input_t;
typedef struct __aml_compiler_config_t
{
const char* hw_version;
int32_t axi_sram_size;
int32_t batch_multiplier;
aml_compiler_optimization_mode_t optimization_mode;
} aml_compiler_config_t;
typedef struct __aml_compiler_allocator_t
{
uint8_t* (*allocate)(size_t size);
void (*deallocate)(uint8_t *p, size_t size);
} aml_compiler_allocator_t;
typedef struct __aml_compiler_metadata_t
{
int32_t count;
const char** names;
const char** data;
} aml_compiler_metadata_t;
typedef struct __aml_compiler_debug_options_t
{
bool dump_model_info;
bool disable_fusion;
bool disable_compression;
bool disable_memory_optimization;
} aml_compiler_debug_options_t;
typedef struct __aml_compiler_transformer_config
{
uint64_t eos_token;
int32_t max_sequence_length;
int32_t freq_base;
float factor;
int32_t low_freq_factor;
int32_t high_freq_factor;
int32_t max_position_embeddings;
} aml_compiler_transformer_config;
typedef struct __aml_compiler_args_t
{
// int32_t batch_multiplier;
int32_t compiler_only;
int32_t set_compiler_args_flag;
aml_compiler_input_t input;
aml_compiler_config_t config;
const aml_compiler_allocator_t* allocator;
const aml_compiler_metadata_t* metadata;
const aml_compiler_debug_options_t* debug_options;
const aml_compiler_transformer_config* transformer_config;
const char* custom_option_path;
} aml_compiler_args_t;
typedef enum __aml_hw_flag_t
{
AML_HW_NPU = 1,
AML_HW_GPU = 2,
AML_HW_CPU = 3
} aml_hw_flag_t;
typedef struct __aml_nn_config
{
bool secure_config;
int typeSize;
int length;
const char *path;
const char *pdata;
amlnn_model_type modelType;
amlnn_nbg_type nbgType;
assign_user_address_t inOut;
aml_forward_ctrl_t forward_ctrl;
aml_compiler_args_t compiler_args;
aml_hw_flag_t hw_flag;
int timeout_ms;
const char *on_path;
} aml_config;
typedef struct {
unsigned int dim_count; /*dim count*/
unsigned int sizes_of_dim[MAX_TENSOR_NUM_DIMS]; /*dim value,just support 4-d dim*/
unsigned int data_format; /*see as nn_buffer_format_e*/
unsigned int data_type; /*not use*/
unsigned int quantization_format; /*see as nn_buffer_quantize_format_e*/
int fixed_point_pos; /*for int8/int16 QUANTIZE_DYNAMIC_FIXED_POINT*/
float TF_scale; /*as tf define,scale*/
int TF_zeropoint; /*as tf define,zeropoint*/
char name[MAX_NAME_LENGTH]; /*not use,will used in future*/
} info_t;
typedef struct {
unsigned int valid;
unsigned int num;
info_t *info;
} tensor_info;
typedef enum {
AML_INPUT_TENSOR = 0,
AML_OUTPUT_TENSOR = 1,
} aml_flush_type_t;
typedef enum {
AML_HARDWARE_INVALID = -1,
AML_HARDWARE_VSI_UNIFY = 0,
AML_HARDWARE_VIPLITE = 1,
AML_HARDWARE_ADLA = 2,
AML_HARDWARE_DELEGATE = 3,
AML_HARDWARE_MAX = 100,
} aml_hw_type_t;
typedef struct {
unsigned int core_num;
unsigned int num;
float flops; // TOps
} aml_npu_hw_info_t;
typedef struct {
char hw_version[10];
int i8_mac_cnt;
int max_clk;
int Gops;
bool kernel_vlc;
bool feature_vlc;
int cur_clk;
unsigned long sram_base;
int sram_size;
} aml_hw_info_t;
typedef struct {
unsigned int cpuinfo;
char* sdk_version;
char* ddk_version;
char* hw_version;
aml_hw_type_t hw_type;
aml_npu_hw_info_t npu_hw_info;
unsigned int platform_type;
aml_hw_info_t hw_info;
} aml_platform_info_t;
typedef struct {
int64_t memory_size; // memory size used by the loaded model
int32_t axi_sram_size; // axi sram size required by the loaded model
int32_t num_layers;
int64_t macc_count;
double sum_macc; //Gops
} aml_ctx_info_t;
typedef struct {
int32_t working_frequency; // in MHz
int32_t axi_working_frequency; // in MHz
int64_t memory_size; // memory size used by the device driver
int32_t axi_sram_size; // axi sram size allocated for the device
float tops; // TOPS
} aml_dev_memory_info_t;
typedef struct {
aml_ctx_info_t ctx_info;
aml_dev_memory_info_t dev_info;
} aml_context_info_t;
typedef struct aml_profiling_ext_data
{
uint64_t axi_freq_cur; // adla axi clock frequency currently
uint64_t core_freq_cur; // adla core clock frequency currently
uint64_t mem_alloced_base;
uint64_t mem_alloced_umd;
int64_t mem_pool_size; //-1:the limit base on the system
uint64_t mem_pool_used;
int32_t us_elapsed_in_fixup_cmq;
int32_t us_elapsed_in_hw_op;
int32_t us_elapsed_in_sw_op;
int32_t invoke_has_error;
} aml_profiling_ext_data_t;
typedef struct aml_profiling_data
{
uint64_t inference_time_us;
uint64_t memory_usage_bytes;
uint64_t dram_read_bytes;
uint64_t dram_write_bytes;
uint64_t sram_read_bytes;
uint64_t sram_write_bytes;
aml_profiling_ext_data_t ext;
} aml_profiling_data_t;
typedef struct aml_work_frequency
{
float *curr_freq;
float *min_freq;
} aml_work_frequency_t;
typedef struct aml_clk_core
{
int32_t set_clk_core_switch;
int32_t clk;
} aml_clk_core_t;
typedef struct aml_dpm_period
{
int32_t set_dpm_period_switch;
int32_t dpm;
} aml_dpm_period_t;
typedef struct {
unsigned int suspend_time;
float freq;
aml_profile_type_t profile_type;
aml_policy_type_t policy_type;
aml_profiling_data_t profiling_data;
aml_work_frequency_t frequency_data;
aml_context_info_t context_info;
aml_clk_core_t clk_core;
aml_dpm_period_t dpm_period;
} aml_profile_config_t;
typedef enum {
AML_WITH_MMU = 0,
AML_WITHOUT_MMU = 1,
} aml_mmu_type_t;
typedef enum {
AML_VIRTUAL_ADDR = 0,
AML_PHYS_ADDR = 1,
AML_VIRTUAL_SECURE_ADDR = 2,
AML_PHYS_SECURE_ADDR = 3
} aml_memory_type_t;
typedef struct __aml_memory_data_t
{
uint32_t typeSize;
void *memory;
void *viraddr;
uint64_t phyaddr;
} aml_memory_data_t;
typedef enum {
AML_MEM_DIRECTION_READ_WRITE = 0,
AML_MEM_DIRECTION_READ_ONLY = 1,
AML_MEM_DIRECTION_WRITE_ONLY = 2,
} aml_mem_direction_t;
typedef struct __aml_memory_config_t
{
uint32_t typeSize;
int64_t mem_size;
uint32_t index;
aml_mmu_type_t mmu_type;
aml_cache_type_t cache_type;
aml_memory_type_t memory_type;
aml_mem_direction_t direction;
} aml_memory_config_t;
/*=============================================================
NNSDK main api
==============================================================*/
/**
* @brief Create aml network module
* @param config Module configuration
* @return void* Context handle
*/
void* aml_module_create(aml_config* config);
/**
* @brief Set network input
* @param context Context handle
* @param pInput Input structure
* @return int Status code
*/
int aml_module_input_set(void* context, nn_input *pInput);
/**
* @brief Run and get output
* @param context Context handle
* @param outconfig Output configuration
* @return void* Output handle or data
*/
void* aml_module_output_get(void* context, aml_output_config_t outconfig);
/**
* @brief Run and get output simply, for custom network
* @param context Context handle
* @return void* Output handle
*/
void* aml_module_output_get_simple(void* context);
/**
* @brief Destroy network environment, free the alloced buffer
* @param context Context handle
* @return int Status code
*/
int aml_module_destroy(void* context);
/*============================================================
NNSDK debug api
==============================================================*/
/**
* @brief Set profile type
* @param type Profile type
* @param savepath Path to save profile data
* @return int Status code
*/
int aml_util_setProfile(aml_profile_type_t type, const char *savepath);
/**
* @brief Set power policy
* @param type Power policy type
* @return int Status code
*/
int aml_util_setPowerPolicy(aml_policy_type_t type);
/**
* @brief Get hardware info
* @param customID Pointer to custom ID
* @param powerStatus Pointer to power status
* @param version Pointer to version
* @return int Status code
*/
int aml_util_getHardwareStatus(int* customID, int *powerStatus, int* version);
/**
* @brief Set auto suspend timeout
* @param timeout Timeout in milliseconds
* @return int Status code
*/
int aml_util_setAutoSuspend(int timeout);
/**
* @brief Enable profiling
* @param context Context handle
* @param profile_data Pointer to profile configuration
* @return int Status code
*/
int aml_util_enableProfile(void *context, aml_profile_config_t* profile_data);
/**
* @brief Get profile info
* @param context Context handle
* @param profile_data Pointer to profile configuration
* @return int Status code
*/
int aml_util_getProfileInfo(void *context, aml_profile_config_t* profile_data);
/**
* @brief Disable profiling
* @param context Context handle
* @param profile_data Pointer to profile configuration
* @return int Status code
*/
int aml_util_disableProfile(void *context, aml_profile_config_t* profile_data);
/**
* @brief Read chip info
* @param platform_info Pointer to platform info structure
* @return int Status code
*/
int aml_read_chip_info(aml_platform_info_t* platform_info);
/*============================================================
NNSDK dma api
==============================================================*/
/**
* @brief Malloc 4k align buffer for dma IO
* @param context Context handle
* @param mem_size Size of memory
* @param mem_config Memory configuration
* @return unsigned char* Pointer to allocated buffer
*/
unsigned char *aml_util_mallocAlignedBuffer(void* context, int mem_size, aml_memory_config_t* mem_config);
/**
* @brief Free buffer alloced by aml_util_mallocAlignedBuffer
* @param context Context handle
* @param addr Address to free
*/
void aml_util_freeAlignedBuffer(void* context, unsigned char *addr);
/**
* @brief Swap input buffer. The inputId (for multi-number input) is ordered as aml_util_getInputTensorInfo array
* @param context Context handle
* @param newBuffer Pointer to new buffer
* @param inputId Index of input
* @return int Status code
*/
int aml_util_swapInputBuffer(void *context, void *newBuffer, unsigned int inputId);
/**
* @brief Swap output buffer
* @param context Context handle
* @param newBuffer Pointer to new buffer
* @param outputId Index of output
* @return int Status code
*/
int aml_util_swapOutputBuffer(void *context, void *newBuffer, unsigned int outputId);
/**
* @brief Switch input buffer
* @param context Context handle
* @param newBuffer Pointer to new buffer
* @param inputId Index of input
* @return int Status code
*/
int aml_util_switchInputBuffer(void *context, void *newBuffer, unsigned int inputId);
/**
* @brief Switch output buffer
* @param context Context handle
* @param newBuffer Pointer to new buffer
* @param outputId Index of output
* @return int Status code
*/
int aml_util_switchOutputBuffer(void *context, void *newBuffer, unsigned int outputId);
/**
* @brief Flush tensor cache memory
* @param context Context handle
* @param type Flush type
* @return int Status code
*/
int aml_util_flushTensorHandle(void *context, aml_flush_type_t type);
/**
* @brief Get model input tensor information list
* @param adladata Pointer to adla data
* @return tensor_info* Pointer to tensor info structure
*/
tensor_info* aml_util_getInputTensorInfo(const char* adladata);
/**
* @brief Get model output tensor information list
* @param adladata Pointer to adla data
* @return tensor_info* Pointer to tensor info structure
*/
tensor_info* aml_util_getOutputTensorInfo(const char* adladata);
/**
* @brief Malloc buffer
* @param context Context handle
* @param mem_config Memory configuration
* @param mem_data Memory data structure to fill
* @return int Status code
*/
int aml_util_mallocBuffer(void* context, aml_memory_config_t* mem_config, aml_memory_data_t* mem_data);
/**
* @brief Free buffer
* @param context Context handle
* @param mem_config Memory configuration
* @param mem_data Memory data structure
* @return int Status code
*/
int aml_util_freeBuffer(void* context, aml_memory_config_t* mem_config, aml_memory_data_t* mem_data);
/**
* @brief Flush buffer
* @param context Context handle
* @param mem_config Memory configuration
* @param mem_data Memory data structure
* @return int Status code
*/
int aml_util_flushBuffer(void* context, aml_memory_config_t* mem_config, aml_memory_data_t* mem_data);
/**
* @brief Swap external input buffer
* @param context Context handle
* @param mem_config Memory configuration
* @param mem_data Memory data structure
* @return int Status code
*/
int aml_util_swapExternalInputBuffer(void *context, aml_memory_config_t* mem_config, aml_memory_data_t* mem_data);
/**
* @brief Swap external output buffer
* @param context Context handle
* @param mem_config Memory configuration
* @param mem_data Memory data structure
* @return int Status code
*/
int aml_util_swapExternalOutputBuffer(void *context, aml_memory_config_t* mem_config, aml_memory_data_t* mem_data);
/**
* @brief Get tensor info
* @param context Context handle
* @param model_data Model data
* @param in_tInfo Pointer to input tensor info pointer
* @param out_tInfo Pointer to output tensor info pointer
* @return int Status code
*/
int aml_util_getTensorInfo(void *context, const char* model_data, tensor_info** in_tInfo, tensor_info** out_tInfo);
/**
* @brief Free the tensor_info memory
* @param tinfo Pointer to tensor_info
* @return int Status code
*/
int aml_util_freeTensorInfo(tensor_info* tinfo);
#ifdef __cplusplus
} //extern "C"
#endif
#endif // _NN_SDK_H