Upload first version

This commit is contained in:
dengliu1105 2026-01-06 10:29:54 +08:00
parent f95d5a63b0
commit 3bdf2003ec
898 changed files with 1405811 additions and 1 deletions

View file

View file

@ -0,0 +1,77 @@
#!/bin/bash
set -e
#
# Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
usage() {
echo "Usage: $0 [-a <target_abi>]"
echo " -a <target_abi> : Target ABI (default: arm64-v8a)"
echo " -h : Show this help message"
exit 1
}
# Default values
TARGET_ABI=arm64-v8a
# Parse arguments
while getopts 'a:h' opt; do
case "$opt" in
a)
TARGET_ABI=$OPTARG
;;
h)
usage
;;
*)
usage
;;
esac
done
if [ -z "${ANDROID_NDK_PATH}" ]; then
if [ -n "${ANDROID_NDK}" ]; then
ANDROID_NDK_PATH=${ANDROID_NDK}
elif [ -n "${ANDROID_NDK_HOME}" ]; then
ANDROID_NDK_PATH=${ANDROID_NDK_HOME}
else
echo "Error: ANDROID_NDK_PATH is not set."
echo "Please set ANDROID_NDK_PATH to your Android NDK directory."
exit 1
fi
fi
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
BUILD_DIR=${ROOT_PWD}/build/android_${TARGET_ABI}
echo "Building for Android..."
echo "NDK_PATH: ${ANDROID_NDK_PATH}"
echo "TARGET_ABI: ${TARGET_ABI}"
echo "BUILD_DIR: ${BUILD_DIR}"
mkdir -p ${BUILD_DIR}
cd ${BUILD_DIR}
cmake ../../src \
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake \
-DANDROID_ABI=${TARGET_ABI} \
-DANDROID_PLATFORM=android-24 \
-DCMAKE_BUILD_TYPE=Release
make -j4
echo "Build complete. Executable in ${BUILD_DIR}/clip_demo"

View file

@ -0,0 +1,20 @@
#!/bin/bash
set -e
#
# Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
### TO DO

View file

@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.5)
project(clip_demo)
set(CMAKE_CXX_STANDARD 17)
# Set NNSDK path
set(NNSDK_ROOT "${CMAKE_SOURCE_DIR}/../../../../dependency/nnsdk")
include_directories(${NNSDK_ROOT}/include)
include_directories(${CMAKE_SOURCE_DIR}/../../../../common)
# Set 3rdparty path
set(3RDPARTY_DIR "${CMAKE_SOURCE_DIR}/../../../../dependency")
# Include directories for stb_image and json
# Note: code uses #include "stb_image.h" and #include "json.hpp"
include_directories(${3RDPARTY_DIR}/stb_image)
include_directories(${3RDPARTY_DIR}/json)
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
if (ANDROID_ABI STREQUAL "arm64-v8a")
link_directories(${NNSDK_ROOT}/lib/android/arm64-v8a)
else()
link_directories(${NNSDK_ROOT}/lib/android/armeabi-v7a)
endif()
# Android needs log
link_libraries(log)
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
link_directories(${NNSDK_ROOT}/lib/linux/lib64_yocto)
endif()
add_executable(${PROJECT_NAME}
main.cpp
model_invoke.cpp
pre_postprocess.cpp
)
target_link_libraries(${PROJECT_NAME}
nnsdk
dl
m
)

109
examples/clip/cpp/src/main.cpp Executable file
View file

@ -0,0 +1,109 @@
/*
* Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "model_invoke.h"
#define BILLION 1000000000
struct Get_Times
{
uint64_t init_start_time, init_end_time, init_total_time;
uint64_t preProcess_start_time, preProcess_end_time, preProcess_total_time;
uint64_t invoke_start_time, invoke_end_time, invoke_total_time;
uint64_t postProcess_start_time, postProcess_end_time, postProcess_total_time;
uint64_t total_time;
std::vector<uint64_t> total_time_group;
};
static uint64_t get_time_count()
{
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
return (uint64_t)((uint64_t)ts.tv_nsec + (uint64_t)ts.tv_sec * BILLION);
}
int main(int argc, char ** argv)
{
Get_Times model_time;
std::vector<float> input_data_fir;
float* model_output_data;
int ret = 0;
int max_index = 0;
if (argc < 2) {
printf("Usage: %s <model_path> [base_dir] [json_filename]\n", argv[0]);
printf(" model_path: Path to the model file\n");
printf(" base_dir: Base directory for clip datasets (optional, can also use CLIP_BASE_DIR env var)\n");
printf(" json_filename: JSON filename in each dataset folder (optional, can also use CLIP_JSON_FILENAME env var, default: clip_text_res.json)\n");
return -1;
}
char* model_path_encoder = argv[1];
std::string base_dir = (argc >= 3) ? argv[2] : "";
std::string json_filename = (argc >= 4) ? argv[3] : "";
void *context_model = NULL;
model_time.init_start_time = get_time_count();
context_model = init_network_file(model_path_encoder);
model_time.init_end_time = get_time_count();
if (context_model == NULL)
{
printf("init_network [context_model] fail.\n");
return -1;
}
if (getenv("GET_TIME"))
{
model_time.init_total_time = (model_time.init_end_time - model_time.init_start_time) / 1000000;
std::cout << "init_model_total time : " << model_time.init_total_time << "ms" << std::endl;
}
while (true)
{
std::string json_path;
printf("\nPlease enter the JPG image path (enter exit to quit):\n");
std::getline(std::cin, json_path);
if (json_path == "exit") break;
if (json_path.empty()) {
printf("The path cannot be empty.\n");
continue;
}
std::vector<std::string> out_str_path = process_image_dir(context_model, json_path, base_dir, json_filename);
for (int i = 0; i < out_str_path.size(); i++)
{
std::cout << "Index[" << i << "] : " << out_str_path[i] << std::endl;
}
}
ret = destroy_network(context_model);
if (ret != 0)
{
printf("destroy_network [context_model] fail.\n");
return -1;
}
return ret;
}

View file

@ -0,0 +1,296 @@
/*
* Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <fstream>
#include <algorithm>
#include <vector>
#include <cstdlib>
#include "model_invoke.h"
#include "nn_sdk.h"
#include "json.hpp"
#include <filesystem>
#include <regex>
using json = nlohmann::ordered_json;
namespace fs = std::__fs::filesystem;
struct DMAConfig {
bool use_dma = true;
bool malloc_buffer_once = true;
};
DMAConfig context_model;
///////////////////////////////////////////////////////////
aml_memory_config_t mem_config_context_model;
aml_memory_data_t mem_data_context_model;
std::vector<float> preprocess_image(const std::string& image_path);
float post_process(const float* a, const std::vector<float>& b);
void* init_network_file(const char *model_path)
{
void *qcontext = NULL;
aml_config config;
memset(&config, 0, sizeof(aml_config));
config.nbgType = NN_ADLA_FILE;
config.path = model_path;
config.modelType = ADLA_LOADABLE;
config.typeSize = sizeof(aml_config);
/* set omp, If you are considering high CPU usage during operation,
you can turn off this api, set_openmp_opt_flag = false */
aml_openmp_opt_t openmp_opt[] =
{
{
.operator_type = AML_Unknown,
.enable_openmp = true,
.involve_all_ops = true,
.openmp_num = 2,
},
};
config.forward_ctrl.softop_info.set_openmp_opt_flag = true;
config.forward_ctrl.softop_info.openmp_opt_num = sizeof(openmp_opt) / sizeof(aml_openmp_opt_t);
config.forward_ctrl.softop_info.openmp_opt = openmp_opt;
/* set neon */
aml_neon_opt_t neon_opt[] =
{
{
.operator_type = AML_Unknown,
.enable_neon = true,
.involve_all_ops = true,
},
};
config.forward_ctrl.softop_info.set_neon_opt_flag = true;
config.forward_ctrl.softop_info.neon_opt_num = sizeof(neon_opt) / sizeof(aml_neon_opt_t);
config.forward_ctrl.softop_info.neon_opt = neon_opt;
qcontext = aml_module_create(&config);
if (NULL == qcontext)
{
printf("aml_module_create fail.\n");
return NULL;
}
return qcontext;
}
float* run_network(void *qcontext, std::vector<float> input_ids, const std::string image_type)
{
int ret = 0;
nn_input inData;
nn_output *outdata = NULL;
aml_output_config_t outconfig;
inData.input_index = 0;
inData.info.input_format = AML_INPUT_DEFAULT;
inData.size = input_ids.size() * sizeof(float);
if (context_model.use_dma) {
if (context_model.malloc_buffer_once) {
mem_config_context_model.cache_type = AML_WITH_CACHE;
mem_config_context_model.memory_type = AML_VIRTUAL_ADDR;
mem_config_context_model.direction = AML_MEM_DIRECTION_READ_WRITE;
mem_config_context_model.index = 0;
mem_config_context_model.mem_size = inData.size;
aml_util_mallocBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
aml_util_swapExternalInputBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
}
inData.input_type = INPUT_DMA_DATA;
memcpy(mem_data_context_model.viraddr, input_ids.data(), mem_config_context_model.mem_size);
inData.input = NULL;
} else {
inData.input = reinterpret_cast<unsigned char*>(input_ids.data());
inData.input_type = BINARY_RAW_DATA;
ret = aml_module_input_set(qcontext, &inData);
if (ret)
{
printf("aml_module_input_set fail.\n");
}
}
context_model.malloc_buffer_once = false;
memset(&outconfig, 0, sizeof(aml_output_config_t));
if (context_model.use_dma) {
outconfig.format = AML_OUTDATA_DMA;
} else {
outconfig.format = AML_OUTDATA_RAW;
}
outconfig.typeSize = sizeof(aml_output_config_t);
outdata = (nn_output*)aml_module_output_get(qcontext, outconfig);
return reinterpret_cast<float*>(outdata->out[0].buf);
}
int extract_index(const std::string& filename) {
std::regex pattern(R"(test_\w+_(\d+)\.jpg)");
std::smatch match;
if (std::regex_match(filename, match, pattern)) {
return std::stoi(match[1]);
}
return -1;
}
std::vector<std::string> process_image_dir(
void* context_model,
const std::string& image_dir_path,
const std::string& base_dir,
const std::string& json_filename)
{
std::vector<std::string> results;
std::regex file_pattern(R"(test_(\w+)_\d+\.jpg)");
// Get base_dir from parameter, environment variable, or use default
std::string actual_base_dir = base_dir;
if (actual_base_dir.empty()) {
const char* env_base_dir = std::getenv("CLIP_BASE_DIR");
if (env_base_dir != nullptr) {
actual_base_dir = env_base_dir;
} else {
actual_base_dir = "./demo_data/clip_datasets/";
}
}
// Ensure base_dir ends with '/'
if (!actual_base_dir.empty() && actual_base_dir.back() != '/') {
actual_base_dir += "/";
}
// Get json_filename from parameter, environment variable, or use default
std::string actual_json_filename = json_filename;
if (actual_json_filename.empty()) {
const char* env_json_filename = std::getenv("CLIP_JSON_FILENAME");
if (env_json_filename != nullptr) {
actual_json_filename = env_json_filename;
} else {
actual_json_filename = "clip_text_res.json";
}
}
// storing qualified paths
std::vector<fs::directory_entry> matched_files;
// collect all relevant img.
for (const auto& entry : fs::directory_iterator(image_dir_path)) {
if (!entry.is_regular_file()) continue;
std::string filename = entry.path().filename().string();
if (std::regex_match(filename, file_pattern)) {
matched_files.push_back(entry);
}
}
// use index sort, test_type_index.jpg
std::sort(matched_files.begin(), matched_files.end(),
[](const fs::directory_entry& a, const fs::directory_entry& b) {
return extract_index(a.path().filename().string()) <
extract_index(b.path().filename().string());
});
for (const auto& entry : matched_files) {
if (!entry.is_regular_file()) continue;
std::string filename = entry.path().filename().string();
std::smatch match;
if (!std::regex_match(filename, match, file_pattern)) continue;
std::string name = match[1];
std::vector<float> input_data = preprocess_image(entry.path().string());
float* model_output = run_network(context_model, input_data, name);
float max_sim = -std::numeric_limits<float>::infinity();
std::string best_key, best_id;
// Iterate through all directories to find the directory containing the name
for (const auto& dir_entry : fs::directory_iterator(actual_base_dir)) {
if (!dir_entry.is_directory()) continue;
std::string folder_name = dir_entry.path().filename().string();
if (folder_name.find(name) == std::string::npos) continue;
std::string vit_res_path = actual_base_dir + folder_name + "/" + actual_json_filename;
std::ifstream vit_in(vit_res_path);
if (!vit_in.is_open()) {
printf("unopen: %s\n", vit_res_path.c_str());
continue;
}
json vit_json;
vit_in >> vit_json;
for (auto it = vit_json.begin(); it != vit_json.end(); ++it) {
const std::string& key = it.key();
const std::vector<float> vec = it.value().get<std::vector<float>>();
float sim = post_process(model_output, vec);
// printf("sim: %.4f\n", sim);
if (sim > max_sim) {
max_sim = sim;
best_key = key;
best_id = folder_name;
}
}
}
if (!best_key.empty() && !best_id.empty()) {
std::string best_path = actual_base_dir + best_id + "/";
results.push_back(best_path);
printf("\nProcessing images: %s, datasets img path: %s\n", filename.c_str(), best_path.c_str());
// printf("最相似图片: %s 相似度: %.4f\n", best_path.c_str(), max_sim); // for debug
}
}
return results;
}
int destroy_network(void *qcontext)
{
int ret = 0;
/* free model
model.use_dma = true
model.malloc_buffer_once = false
*/
if (context_model.use_dma && mem_config_context_model.mem_size != 0) {
ret = aml_util_freeBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
if (ret)
{
std::cout << "aml_util_freeBuffer fail." << std::endl;
}
}
context_model.use_dma = false;
ret = aml_module_destroy(qcontext);
if (ret)
{
printf("aml_module_destroy fail.\n");
return -1;
}
return ret;
}

View file

@ -0,0 +1,13 @@
#ifndef MODEL_INVOKE_H
#define MODEL_INVOKE_H
#include <string>
#include <vector>
#include <map>
void* init_network_file(const char *model_path);
std::vector<std::string> process_image_dir(void *context_model, const std::string& json_path, const std::string& base_dir = "", const std::string& json_filename = "");
int destroy_network(void *qcontext);
#endif // MODEL_INVOKE_H

View file

@ -0,0 +1,144 @@
/*
* Copyright (C) 20242025 Amlogic, Inc. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <vector>
#include <fstream>
#include <cmath>
#include <algorithm>
#include <string>
#include <iostream>
#include "model_invoke.h"
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
// bilinear interpolation scaling
std::vector<float> resize_bilinear(
const unsigned char* src, int src_w, int src_h, int channels,
int dst_w, int dst_h)
{
std::vector<float> dst(dst_w * dst_h * channels);
for (int y = 0; y < dst_h; y++) {
float fy = (y + 0.5f) * src_h / dst_h - 0.5f;
int y0 = std::max(0, (int)std::floor(fy));
int y1 = std::min(src_h - 1, y0 + 1);
float wy = fy - y0;
for (int x = 0; x < dst_w; x++) {
float fx = (x + 0.5f) * src_w / dst_w - 0.5f;
int x0 = std::max(0, (int)std::floor(fx));
int x1 = std::min(src_w - 1, x0 + 1);
float wx = fx - x0;
for (int c = 0; c < channels; c++) {
float v00 = src[(y0 * src_w + x0) * channels + c];
float v01 = src[(y0 * src_w + x1) * channels + c];
float v10 = src[(y1 * src_w + x0) * channels + c];
float v11 = src[(y1 * src_w + x1) * channels + c];
float v0 = v00 * (1 - wx) + v01 * wx;
float v1 = v10 * (1 - wx) + v11 * wx;
float v = v0 * (1 - wy) + v1 * wy;
dst[(y * dst_w + x) * channels + c] = v / 255.0f;
}
}
}
return dst;
}
std::vector<float> preprocess_image(const std::string& image_path) {
int width, height, channels;
unsigned char* img = stbi_load(image_path.c_str(), &width, &height, &channels, 3);
if (!img) {
std::cerr << "Failed to load image: " << image_path << std::endl;
return {};
}
const int target_size = 224;
// scale the shorter side
float scale = (float)target_size / std::min(width, height);
int new_w = std::round(width * scale);
int new_h = std::round(height * scale);
// resize
std::vector<float> resized = resize_bilinear(img, width, height, 3, new_w, new_h);
// center crop
int left = (new_w - target_size) / 2;
int top = (new_h - target_size) / 2;
std::vector<float> cropped(target_size * target_size * 3);
for (int h = 0; h < target_size; h++) {
for (int w = 0; w < target_size; w++) {
for (int c = 0; c < 3; c++) {
cropped[(h * target_size + w) * 3 + c] =
resized[((h + top) * new_w + (w + left)) * 3 + c];
}
}
}
stbi_image_free(img);
// normalization (CLIP)
float mean[3] = {0.48145466f, 0.4578275f, 0.40821073f};
float std[3] = {0.26862954f, 0.26130258f, 0.27577711f};
for (int i = 0; i < target_size * target_size; i++) {
for (int c = 0; c < 3; c++) {
cropped[i * 3 + c] = (cropped[i * 3 + c] - mean[c]) / std[c];
}
}
// get NHWC
return cropped;
}
float post_process(const float* a, const std::vector<float>& b) {
float dot = 0.0f, scale = 100.00000762939453f;
for (size_t i = 0; i < b.size(); ++i) {
dot += a[i] * b[i];
}
dot *= scale;
return dot;
}
float post_process(const int8_t* a, const std::vector<float>& b) {
float dot = 0.0f, scale = 100.00000762939453f;
for (size_t i = 0; i < b.size(); ++i) {
dot += (a[i] - 66) * b[i];
}
dot *= scale;
return dot;
}
std::vector<float> softmax(const std::vector<float>& logits) {
std::vector<float> result(logits.size());
// numerical stability: subtract the maximum value first.
float max_logit = *std::max_element(logits.begin(), logits.end());
float sum_exp = 0.0f;
for (size_t i = 0; i < logits.size(); ++i) {
result[i] = std::exp(logits[i] - max_logit);
sum_exp += result[i];
}
for (float& val : result) {
val /= sum_exp;
}
return result;
}