Upload first version
This commit is contained in:
parent
f95d5a63b0
commit
3bdf2003ec
898 changed files with 1405811 additions and 1 deletions
0
examples/clip/cpp/.gitkeep
Normal file
0
examples/clip/cpp/.gitkeep
Normal file
77
examples/clip/cpp/build-android.sh
Executable file
77
examples/clip/cpp/build-android.sh
Executable file
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_abi>]"
|
||||
echo " -a <target_abi> : Target ABI (default: arm64-v8a)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ABI=arm64-v8a
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ABI=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "${ANDROID_NDK_PATH}" ]; then
|
||||
if [ -n "${ANDROID_NDK}" ]; then
|
||||
ANDROID_NDK_PATH=${ANDROID_NDK}
|
||||
elif [ -n "${ANDROID_NDK_HOME}" ]; then
|
||||
ANDROID_NDK_PATH=${ANDROID_NDK_HOME}
|
||||
else
|
||||
echo "Error: ANDROID_NDK_PATH is not set."
|
||||
echo "Please set ANDROID_NDK_PATH to your Android NDK directory."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/android_${TARGET_ABI}
|
||||
|
||||
echo "Building for Android..."
|
||||
echo "NDK_PATH: ${ANDROID_NDK_PATH}"
|
||||
echo "TARGET_ABI: ${TARGET_ABI}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=${TARGET_ABI} \
|
||||
-DANDROID_PLATFORM=android-24 \
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/clip_demo"
|
||||
|
||||
20
examples/clip/cpp/build-linux.sh
Executable file
20
examples/clip/cpp/build-linux.sh
Executable file
|
|
@ -0,0 +1,20 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
### TO DO
|
||||
|
||||
42
examples/clip/cpp/src/CMakeLists.txt
Executable file
42
examples/clip/cpp/src/CMakeLists.txt
Executable file
|
|
@ -0,0 +1,42 @@
|
|||
cmake_minimum_required(VERSION 3.5)
|
||||
project(clip_demo)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# Set NNSDK path
|
||||
set(NNSDK_ROOT "${CMAKE_SOURCE_DIR}/../../../../dependency/nnsdk")
|
||||
include_directories(${NNSDK_ROOT}/include)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/../../../../common)
|
||||
|
||||
# Set 3rdparty path
|
||||
set(3RDPARTY_DIR "${CMAKE_SOURCE_DIR}/../../../../dependency")
|
||||
|
||||
# Include directories for stb_image and json
|
||||
# Note: code uses #include "stb_image.h" and #include "json.hpp"
|
||||
include_directories(${3RDPARTY_DIR}/stb_image)
|
||||
include_directories(${3RDPARTY_DIR}/json)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
|
||||
if (ANDROID_ABI STREQUAL "arm64-v8a")
|
||||
link_directories(${NNSDK_ROOT}/lib/android/arm64-v8a)
|
||||
else()
|
||||
link_directories(${NNSDK_ROOT}/lib/android/armeabi-v7a)
|
||||
endif()
|
||||
# Android needs log
|
||||
link_libraries(log)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
link_directories(${NNSDK_ROOT}/lib/linux/lib64_yocto)
|
||||
endif()
|
||||
|
||||
add_executable(${PROJECT_NAME}
|
||||
main.cpp
|
||||
model_invoke.cpp
|
||||
pre_postprocess.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
nnsdk
|
||||
dl
|
||||
m
|
||||
)
|
||||
|
||||
109
examples/clip/cpp/src/main.cpp
Executable file
109
examples/clip/cpp/src/main.cpp
Executable file
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
|
||||
#include "model_invoke.h"
|
||||
|
||||
#define BILLION 1000000000
|
||||
|
||||
struct Get_Times
|
||||
{
|
||||
uint64_t init_start_time, init_end_time, init_total_time;
|
||||
uint64_t preProcess_start_time, preProcess_end_time, preProcess_total_time;
|
||||
uint64_t invoke_start_time, invoke_end_time, invoke_total_time;
|
||||
uint64_t postProcess_start_time, postProcess_end_time, postProcess_total_time;
|
||||
uint64_t total_time;
|
||||
std::vector<uint64_t> total_time_group;
|
||||
};
|
||||
|
||||
static uint64_t get_time_count()
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (uint64_t)((uint64_t)ts.tv_nsec + (uint64_t)ts.tv_sec * BILLION);
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
Get_Times model_time;
|
||||
|
||||
std::vector<float> input_data_fir;
|
||||
float* model_output_data;
|
||||
|
||||
int ret = 0;
|
||||
int max_index = 0;
|
||||
|
||||
if (argc < 2) {
|
||||
printf("Usage: %s <model_path> [base_dir] [json_filename]\n", argv[0]);
|
||||
printf(" model_path: Path to the model file\n");
|
||||
printf(" base_dir: Base directory for clip datasets (optional, can also use CLIP_BASE_DIR env var)\n");
|
||||
printf(" json_filename: JSON filename in each dataset folder (optional, can also use CLIP_JSON_FILENAME env var, default: clip_text_res.json)\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
char* model_path_encoder = argv[1];
|
||||
std::string base_dir = (argc >= 3) ? argv[2] : "";
|
||||
std::string json_filename = (argc >= 4) ? argv[3] : "";
|
||||
void *context_model = NULL;
|
||||
|
||||
model_time.init_start_time = get_time_count();
|
||||
context_model = init_network_file(model_path_encoder);
|
||||
model_time.init_end_time = get_time_count();
|
||||
|
||||
if (context_model == NULL)
|
||||
{
|
||||
printf("init_network [context_model] fail.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (getenv("GET_TIME"))
|
||||
{
|
||||
model_time.init_total_time = (model_time.init_end_time - model_time.init_start_time) / 1000000;
|
||||
std::cout << "init_model_total time : " << model_time.init_total_time << "ms" << std::endl;
|
||||
}
|
||||
|
||||
while (true)
|
||||
{
|
||||
std::string json_path;
|
||||
|
||||
printf("\nPlease enter the JPG image path (enter exit to quit):\n");
|
||||
std::getline(std::cin, json_path);
|
||||
if (json_path == "exit") break;
|
||||
if (json_path.empty()) {
|
||||
printf("The path cannot be empty.\n");
|
||||
continue;
|
||||
}
|
||||
std::vector<std::string> out_str_path = process_image_dir(context_model, json_path, base_dir, json_filename);
|
||||
|
||||
for (int i = 0; i < out_str_path.size(); i++)
|
||||
{
|
||||
std::cout << "Index[" << i << "] : " << out_str_path[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
ret = destroy_network(context_model);
|
||||
if (ret != 0)
|
||||
{
|
||||
printf("destroy_network [context_model] fail.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
296
examples/clip/cpp/src/model_invoke.cpp
Executable file
296
examples/clip/cpp/src/model_invoke.cpp
Executable file
|
|
@ -0,0 +1,296 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <cstdlib>
|
||||
|
||||
#include "model_invoke.h"
|
||||
#include "nn_sdk.h"
|
||||
#include "json.hpp"
|
||||
#include <filesystem>
|
||||
#include <regex>
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
namespace fs = std::__fs::filesystem;
|
||||
|
||||
struct DMAConfig {
|
||||
bool use_dma = true;
|
||||
bool malloc_buffer_once = true;
|
||||
};
|
||||
|
||||
DMAConfig context_model;
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
aml_memory_config_t mem_config_context_model;
|
||||
aml_memory_data_t mem_data_context_model;
|
||||
|
||||
std::vector<float> preprocess_image(const std::string& image_path);
|
||||
float post_process(const float* a, const std::vector<float>& b);
|
||||
|
||||
void* init_network_file(const char *model_path)
|
||||
{
|
||||
void *qcontext = NULL;
|
||||
aml_config config;
|
||||
|
||||
memset(&config, 0, sizeof(aml_config));
|
||||
config.nbgType = NN_ADLA_FILE;
|
||||
config.path = model_path;
|
||||
config.modelType = ADLA_LOADABLE;
|
||||
config.typeSize = sizeof(aml_config);
|
||||
|
||||
/* set omp, If you are considering high CPU usage during operation,
|
||||
you can turn off this api, set_openmp_opt_flag = false */
|
||||
aml_openmp_opt_t openmp_opt[] =
|
||||
{
|
||||
{
|
||||
.operator_type = AML_Unknown,
|
||||
.enable_openmp = true,
|
||||
.involve_all_ops = true,
|
||||
.openmp_num = 2,
|
||||
},
|
||||
};
|
||||
config.forward_ctrl.softop_info.set_openmp_opt_flag = true;
|
||||
config.forward_ctrl.softop_info.openmp_opt_num = sizeof(openmp_opt) / sizeof(aml_openmp_opt_t);
|
||||
config.forward_ctrl.softop_info.openmp_opt = openmp_opt;
|
||||
|
||||
/* set neon */
|
||||
aml_neon_opt_t neon_opt[] =
|
||||
{
|
||||
{
|
||||
.operator_type = AML_Unknown,
|
||||
.enable_neon = true,
|
||||
.involve_all_ops = true,
|
||||
},
|
||||
};
|
||||
config.forward_ctrl.softop_info.set_neon_opt_flag = true;
|
||||
config.forward_ctrl.softop_info.neon_opt_num = sizeof(neon_opt) / sizeof(aml_neon_opt_t);
|
||||
config.forward_ctrl.softop_info.neon_opt = neon_opt;
|
||||
|
||||
qcontext = aml_module_create(&config);
|
||||
if (NULL == qcontext)
|
||||
{
|
||||
printf("aml_module_create fail.\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return qcontext;
|
||||
}
|
||||
|
||||
float* run_network(void *qcontext, std::vector<float> input_ids, const std::string image_type)
|
||||
{
|
||||
int ret = 0;
|
||||
nn_input inData;
|
||||
|
||||
nn_output *outdata = NULL;
|
||||
aml_output_config_t outconfig;
|
||||
|
||||
inData.input_index = 0;
|
||||
inData.info.input_format = AML_INPUT_DEFAULT;
|
||||
inData.size = input_ids.size() * sizeof(float);
|
||||
|
||||
if (context_model.use_dma) {
|
||||
if (context_model.malloc_buffer_once) {
|
||||
mem_config_context_model.cache_type = AML_WITH_CACHE;
|
||||
mem_config_context_model.memory_type = AML_VIRTUAL_ADDR;
|
||||
mem_config_context_model.direction = AML_MEM_DIRECTION_READ_WRITE;
|
||||
mem_config_context_model.index = 0;
|
||||
mem_config_context_model.mem_size = inData.size;
|
||||
aml_util_mallocBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
|
||||
aml_util_swapExternalInputBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
|
||||
}
|
||||
|
||||
inData.input_type = INPUT_DMA_DATA;
|
||||
memcpy(mem_data_context_model.viraddr, input_ids.data(), mem_config_context_model.mem_size);
|
||||
inData.input = NULL;
|
||||
} else {
|
||||
inData.input = reinterpret_cast<unsigned char*>(input_ids.data());
|
||||
inData.input_type = BINARY_RAW_DATA;
|
||||
|
||||
ret = aml_module_input_set(qcontext, &inData);
|
||||
if (ret)
|
||||
{
|
||||
printf("aml_module_input_set fail.\n");
|
||||
}
|
||||
}
|
||||
context_model.malloc_buffer_once = false;
|
||||
|
||||
memset(&outconfig, 0, sizeof(aml_output_config_t));
|
||||
|
||||
if (context_model.use_dma) {
|
||||
outconfig.format = AML_OUTDATA_DMA;
|
||||
} else {
|
||||
outconfig.format = AML_OUTDATA_RAW;
|
||||
}
|
||||
outconfig.typeSize = sizeof(aml_output_config_t);
|
||||
outdata = (nn_output*)aml_module_output_get(qcontext, outconfig);
|
||||
|
||||
return reinterpret_cast<float*>(outdata->out[0].buf);
|
||||
}
|
||||
|
||||
int extract_index(const std::string& filename) {
|
||||
std::regex pattern(R"(test_\w+_(\d+)\.jpg)");
|
||||
std::smatch match;
|
||||
if (std::regex_match(filename, match, pattern)) {
|
||||
return std::stoi(match[1]);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::vector<std::string> process_image_dir(
|
||||
void* context_model,
|
||||
const std::string& image_dir_path,
|
||||
const std::string& base_dir,
|
||||
const std::string& json_filename)
|
||||
{
|
||||
std::vector<std::string> results;
|
||||
std::regex file_pattern(R"(test_(\w+)_\d+\.jpg)");
|
||||
|
||||
// Get base_dir from parameter, environment variable, or use default
|
||||
std::string actual_base_dir = base_dir;
|
||||
if (actual_base_dir.empty()) {
|
||||
const char* env_base_dir = std::getenv("CLIP_BASE_DIR");
|
||||
if (env_base_dir != nullptr) {
|
||||
actual_base_dir = env_base_dir;
|
||||
} else {
|
||||
actual_base_dir = "./demo_data/clip_datasets/";
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure base_dir ends with '/'
|
||||
if (!actual_base_dir.empty() && actual_base_dir.back() != '/') {
|
||||
actual_base_dir += "/";
|
||||
}
|
||||
|
||||
// Get json_filename from parameter, environment variable, or use default
|
||||
std::string actual_json_filename = json_filename;
|
||||
if (actual_json_filename.empty()) {
|
||||
const char* env_json_filename = std::getenv("CLIP_JSON_FILENAME");
|
||||
if (env_json_filename != nullptr) {
|
||||
actual_json_filename = env_json_filename;
|
||||
} else {
|
||||
actual_json_filename = "clip_text_res.json";
|
||||
}
|
||||
}
|
||||
|
||||
// storing qualified paths
|
||||
std::vector<fs::directory_entry> matched_files;
|
||||
|
||||
// collect all relevant img.
|
||||
for (const auto& entry : fs::directory_iterator(image_dir_path)) {
|
||||
if (!entry.is_regular_file()) continue;
|
||||
|
||||
std::string filename = entry.path().filename().string();
|
||||
if (std::regex_match(filename, file_pattern)) {
|
||||
matched_files.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
// use index sort, test_type_index.jpg
|
||||
std::sort(matched_files.begin(), matched_files.end(),
|
||||
[](const fs::directory_entry& a, const fs::directory_entry& b) {
|
||||
return extract_index(a.path().filename().string()) <
|
||||
extract_index(b.path().filename().string());
|
||||
});
|
||||
|
||||
for (const auto& entry : matched_files) {
|
||||
if (!entry.is_regular_file()) continue;
|
||||
|
||||
std::string filename = entry.path().filename().string();
|
||||
std::smatch match;
|
||||
if (!std::regex_match(filename, match, file_pattern)) continue;
|
||||
|
||||
std::string name = match[1];
|
||||
|
||||
std::vector<float> input_data = preprocess_image(entry.path().string());
|
||||
float* model_output = run_network(context_model, input_data, name);
|
||||
|
||||
float max_sim = -std::numeric_limits<float>::infinity();
|
||||
std::string best_key, best_id;
|
||||
|
||||
// Iterate through all directories to find the directory containing the name
|
||||
for (const auto& dir_entry : fs::directory_iterator(actual_base_dir)) {
|
||||
if (!dir_entry.is_directory()) continue;
|
||||
|
||||
std::string folder_name = dir_entry.path().filename().string();
|
||||
if (folder_name.find(name) == std::string::npos) continue;
|
||||
|
||||
std::string vit_res_path = actual_base_dir + folder_name + "/" + actual_json_filename;
|
||||
std::ifstream vit_in(vit_res_path);
|
||||
if (!vit_in.is_open()) {
|
||||
printf("unopen: %s\n", vit_res_path.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
json vit_json;
|
||||
vit_in >> vit_json;
|
||||
|
||||
for (auto it = vit_json.begin(); it != vit_json.end(); ++it) {
|
||||
const std::string& key = it.key();
|
||||
const std::vector<float> vec = it.value().get<std::vector<float>>();
|
||||
float sim = post_process(model_output, vec);
|
||||
// printf("sim: %.4f\n", sim);
|
||||
if (sim > max_sim) {
|
||||
max_sim = sim;
|
||||
best_key = key;
|
||||
best_id = folder_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!best_key.empty() && !best_id.empty()) {
|
||||
std::string best_path = actual_base_dir + best_id + "/";
|
||||
results.push_back(best_path);
|
||||
printf("\nProcessing images: %s, datasets img path: %s\n", filename.c_str(), best_path.c_str());
|
||||
// printf("最相似图片: %s 相似度: %.4f\n", best_path.c_str(), max_sim); // for debug
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
int destroy_network(void *qcontext)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
/* free model
|
||||
model.use_dma = true
|
||||
model.malloc_buffer_once = false
|
||||
*/
|
||||
if (context_model.use_dma && mem_config_context_model.mem_size != 0) {
|
||||
ret = aml_util_freeBuffer(qcontext, &mem_config_context_model, &mem_data_context_model);
|
||||
if (ret)
|
||||
{
|
||||
std::cout << "aml_util_freeBuffer fail." << std::endl;
|
||||
}
|
||||
}
|
||||
context_model.use_dma = false;
|
||||
|
||||
ret = aml_module_destroy(qcontext);
|
||||
if (ret)
|
||||
{
|
||||
printf("aml_module_destroy fail.\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
13
examples/clip/cpp/src/model_invoke.h
Executable file
13
examples/clip/cpp/src/model_invoke.h
Executable file
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef MODEL_INVOKE_H
|
||||
#define MODEL_INVOKE_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
void* init_network_file(const char *model_path);
|
||||
std::vector<std::string> process_image_dir(void *context_model, const std::string& json_path, const std::string& base_dir = "", const std::string& json_filename = "");
|
||||
int destroy_network(void *qcontext);
|
||||
|
||||
#endif // MODEL_INVOKE_H
|
||||
|
||||
144
examples/clip/cpp/src/pre_postprocess.cpp
Executable file
144
examples/clip/cpp/src/pre_postprocess.cpp
Executable file
|
|
@ -0,0 +1,144 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "model_invoke.h"
|
||||
|
||||
#define STB_IMAGE_IMPLEMENTATION
|
||||
#include "stb_image.h"
|
||||
|
||||
// bilinear interpolation scaling
|
||||
std::vector<float> resize_bilinear(
|
||||
const unsigned char* src, int src_w, int src_h, int channels,
|
||||
int dst_w, int dst_h)
|
||||
{
|
||||
std::vector<float> dst(dst_w * dst_h * channels);
|
||||
|
||||
for (int y = 0; y < dst_h; y++) {
|
||||
float fy = (y + 0.5f) * src_h / dst_h - 0.5f;
|
||||
int y0 = std::max(0, (int)std::floor(fy));
|
||||
int y1 = std::min(src_h - 1, y0 + 1);
|
||||
float wy = fy - y0;
|
||||
|
||||
for (int x = 0; x < dst_w; x++) {
|
||||
float fx = (x + 0.5f) * src_w / dst_w - 0.5f;
|
||||
int x0 = std::max(0, (int)std::floor(fx));
|
||||
int x1 = std::min(src_w - 1, x0 + 1);
|
||||
float wx = fx - x0;
|
||||
|
||||
for (int c = 0; c < channels; c++) {
|
||||
float v00 = src[(y0 * src_w + x0) * channels + c];
|
||||
float v01 = src[(y0 * src_w + x1) * channels + c];
|
||||
float v10 = src[(y1 * src_w + x0) * channels + c];
|
||||
float v11 = src[(y1 * src_w + x1) * channels + c];
|
||||
float v0 = v00 * (1 - wx) + v01 * wx;
|
||||
float v1 = v10 * (1 - wx) + v11 * wx;
|
||||
float v = v0 * (1 - wy) + v1 * wy;
|
||||
dst[(y * dst_w + x) * channels + c] = v / 255.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
std::vector<float> preprocess_image(const std::string& image_path) {
|
||||
int width, height, channels;
|
||||
unsigned char* img = stbi_load(image_path.c_str(), &width, &height, &channels, 3);
|
||||
if (!img) {
|
||||
std::cerr << "Failed to load image: " << image_path << std::endl;
|
||||
return {};
|
||||
}
|
||||
|
||||
const int target_size = 224;
|
||||
|
||||
// scale the shorter side
|
||||
float scale = (float)target_size / std::min(width, height);
|
||||
int new_w = std::round(width * scale);
|
||||
int new_h = std::round(height * scale);
|
||||
|
||||
// resize
|
||||
std::vector<float> resized = resize_bilinear(img, width, height, 3, new_w, new_h);
|
||||
|
||||
// center crop
|
||||
int left = (new_w - target_size) / 2;
|
||||
int top = (new_h - target_size) / 2;
|
||||
|
||||
std::vector<float> cropped(target_size * target_size * 3);
|
||||
for (int h = 0; h < target_size; h++) {
|
||||
for (int w = 0; w < target_size; w++) {
|
||||
for (int c = 0; c < 3; c++) {
|
||||
cropped[(h * target_size + w) * 3 + c] =
|
||||
resized[((h + top) * new_w + (w + left)) * 3 + c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stbi_image_free(img);
|
||||
|
||||
// normalization (CLIP)
|
||||
float mean[3] = {0.48145466f, 0.4578275f, 0.40821073f};
|
||||
float std[3] = {0.26862954f, 0.26130258f, 0.27577711f};
|
||||
|
||||
for (int i = 0; i < target_size * target_size; i++) {
|
||||
for (int c = 0; c < 3; c++) {
|
||||
cropped[i * 3 + c] = (cropped[i * 3 + c] - mean[c]) / std[c];
|
||||
}
|
||||
}
|
||||
|
||||
// get NHWC
|
||||
return cropped;
|
||||
}
|
||||
|
||||
float post_process(const float* a, const std::vector<float>& b) {
|
||||
float dot = 0.0f, scale = 100.00000762939453f;
|
||||
for (size_t i = 0; i < b.size(); ++i) {
|
||||
dot += a[i] * b[i];
|
||||
}
|
||||
dot *= scale;
|
||||
return dot;
|
||||
}
|
||||
|
||||
float post_process(const int8_t* a, const std::vector<float>& b) {
|
||||
float dot = 0.0f, scale = 100.00000762939453f;
|
||||
for (size_t i = 0; i < b.size(); ++i) {
|
||||
dot += (a[i] - 66) * b[i];
|
||||
}
|
||||
dot *= scale;
|
||||
return dot;
|
||||
}
|
||||
|
||||
std::vector<float> softmax(const std::vector<float>& logits) {
|
||||
std::vector<float> result(logits.size());
|
||||
|
||||
// numerical stability: subtract the maximum value first.
|
||||
float max_logit = *std::max_element(logits.begin(), logits.end());
|
||||
|
||||
float sum_exp = 0.0f;
|
||||
for (size_t i = 0; i < logits.size(); ++i) {
|
||||
result[i] = std::exp(logits[i] - max_logit);
|
||||
sum_exp += result[i];
|
||||
}
|
||||
|
||||
for (float& val : result) {
|
||||
val /= sum_exp;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue