add python demos
This commit is contained in:
parent
3bdf2003ec
commit
c91356fc38
97 changed files with 3250 additions and 290 deletions
0
examples/resnet/README.md
Normal file → Executable file
0
examples/resnet/README.md
Normal file → Executable file
0
examples/resnet/cpp/.gitkeep
Normal file → Executable file
0
examples/resnet/cpp/.gitkeep
Normal file → Executable file
|
|
@ -1,63 +1,19 @@
|
|||
#TODO
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_arch>]"
|
||||
echo " -a <target_arch> : Target architecture (default: aarch64)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ARCH=aarch64
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ARCH=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Default to aarch64-linux-gnu if GCC_COMPILER is not set
|
||||
GCC_COMPILER=${GCC_COMPILER:-aarch64-linux-gnu}
|
||||
|
||||
# Set compilers
|
||||
export CC=${GCC_COMPILER}-gcc
|
||||
export CXX=${GCC_COMPILER}-g++
|
||||
|
||||
# Validate compiler
|
||||
if ! command -v ${CC} &> /dev/null; then
|
||||
echo "Error: Compiler ${CC} not found."
|
||||
echo "Please set GCC_COMPILER environment variable to your cross-compiler path prefix."
|
||||
echo "Example: export GCC_COMPILER=/path/to/toolchain/bin/aarch64-linux-gnu"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/linux
|
||||
|
||||
echo "Building for Linux..."
|
||||
echo "COMPILER: ${CC}"
|
||||
echo "TARGET_ARCH: ${TARGET_ARCH}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_SYSTEM_NAME=Linux \
|
||||
-DCMAKE_SYSTEM_PROCESSOR=${TARGET_ARCH} \
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/resnet_demo"
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
### TO DO
|
||||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "postprocess.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RESNET_POSTPROCESS_H
|
||||
#define RESNET_POSTPROCESS_H
|
||||
|
||||
|
|
|
|||
0
examples/resnet/model/.gitkeep
Normal file → Executable file
0
examples/resnet/model/.gitkeep
Normal file → Executable file
0
examples/resnet/py/.gitkeep
Normal file → Executable file
0
examples/resnet/py/.gitkeep
Normal file → Executable file
115
examples/resnet/py/resnet.py
Executable file
115
examples/resnet/py/resnet.py
Executable file
|
|
@ -0,0 +1,115 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import glob
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
MEAN = np.array([123.675, 116.28, 103.53], dtype=np.float32)
|
||||
STD = np.array([58.395, 58.395, 58.395], dtype=np.float32)
|
||||
|
||||
def preprocess(img_path):
|
||||
img = cv2.imread(img_path)
|
||||
if img is None:
|
||||
return None
|
||||
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
img = cv2.resize(img, (224, 224), interpolation=cv2.INTER_LINEAR)
|
||||
img = img.astype(np.float32)
|
||||
|
||||
img = (img - MEAN) / STD
|
||||
img = np.expand_dims(img, axis=0)
|
||||
return img
|
||||
|
||||
def postprocess_topk(logits, labels, k=5):
|
||||
logits = logits.squeeze()
|
||||
idx = np.argsort(logits)[::-1][:k]
|
||||
|
||||
print(f"\n Top-{k} Results:")
|
||||
for i, c in enumerate(idx):
|
||||
name = labels[c] if c < len(labels) else f"Unknown({c})"
|
||||
score = logits[c]
|
||||
print(f" {i+1}. {name:20s} score={score:.6f}")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Classification AMLNNLite Demo")
|
||||
|
||||
parser.add_argument('--board-work-path', default='/data/nn', help='Work path on board')
|
||||
parser.add_argument('--model-path', required=True, help='Path to .adla or .tflite model')
|
||||
parser.add_argument('--image-dir', required=True, help='Directory containing test images')
|
||||
parser.add_argument('--labels', required=True, help='Path to synset_words.txt or labels.txt')
|
||||
parser.add_argument('--run-cycles', type=int, default=1, help='Number of inference cycles')
|
||||
parser.add_argument('--loglevel', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
amlnn = AMLNNLite()
|
||||
amlnn.config(
|
||||
board_work_path=args.board_work_path,
|
||||
model_path=args.model_path,
|
||||
run_cycles=args.run_cycles,
|
||||
loglevel=args.loglevel
|
||||
)
|
||||
amlnn.init()
|
||||
|
||||
if not os.path.exists(args.labels):
|
||||
print(f"Error: Label file not found: {args.labels}")
|
||||
amlnn.uninit(); return
|
||||
|
||||
with open(args.labels, "r") as f:
|
||||
labels = [line.strip() for line in f.readlines()]
|
||||
|
||||
image_files = []
|
||||
for ext in ['*.jpg', '*.jpeg', '*.png', '*.bmp']:
|
||||
image_files.extend(glob.glob(os.path.join(args.image_dir, ext)))
|
||||
image_files.extend(glob.glob(os.path.join(args.image_dir, ext.upper())))
|
||||
image_files.sort()
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in: {args.image_dir}")
|
||||
amlnn.uninit(); return
|
||||
|
||||
total_images = len(image_files)
|
||||
|
||||
for idx, img_path in enumerate(image_files, start=1):
|
||||
print("=" * 60)
|
||||
print(f"Processing image {idx}/{total_images}: {os.path.basename(img_path)}")
|
||||
print("=" * 60)
|
||||
|
||||
inp = preprocess(img_path)
|
||||
if inp is None:
|
||||
print(f" Skip: Cannot read {img_path}")
|
||||
continue
|
||||
|
||||
for _ in range(args.run_cycles):
|
||||
outputs = amlnn.inference(
|
||||
inp,
|
||||
inputs_data_format='NHWC',
|
||||
outputs_data_format='NHWC'
|
||||
)
|
||||
|
||||
postprocess_topk(outputs[0], labels, k=5)
|
||||
|
||||
amlnn.visualize()
|
||||
amlnn.uninit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
examples/retinaface/README.md
Normal file → Executable file
0
examples/retinaface/README.md
Normal file → Executable file
0
examples/retinaface/cpp/.gitkeep
Normal file → Executable file
0
examples/retinaface/cpp/.gitkeep
Normal file → Executable file
|
|
@ -1,63 +1,19 @@
|
|||
#TODO
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_arch>]"
|
||||
echo " -a <target_arch> : Target architecture (default: aarch64)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ARCH=aarch64
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ARCH=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Default to aarch64-linux-gnu if GCC_COMPILER is not set
|
||||
GCC_COMPILER=${GCC_COMPILER:-aarch64-linux-gnu}
|
||||
|
||||
# Set compilers
|
||||
export CC=${GCC_COMPILER}-gcc
|
||||
export CXX=${GCC_COMPILER}-g++
|
||||
|
||||
# Validate compiler
|
||||
if ! command -v ${CC} &> /dev/null; then
|
||||
echo "Error: Compiler ${CC} not found."
|
||||
echo "Please set GCC_COMPILER environment variable to your cross-compiler path prefix."
|
||||
echo "Example: export GCC_COMPILER=/path/to/toolchain/bin/aarch64-linux-gnu"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/linux
|
||||
|
||||
echo "Building for Linux..."
|
||||
echo "COMPILER: ${CC}"
|
||||
echo "TARGET_ARCH: ${TARGET_ARCH}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_SYSTEM_NAME=Linux \
|
||||
-DCMAKE_SYSTEM_PROCESSOR=${TARGET_ARCH} \
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/retinaface_demo"
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
### TO DO
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
|
|
@ -19,43 +35,87 @@ static void hwc_to_chw(const cv::Mat& src, float* dst) {
|
|||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.adla> <image_dir>\n"; return 0; }
|
||||
if (argc < 3) {
|
||||
std::cout << "Usage: " << argv[0] << " <model.adla> <image_dir>\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
aml_config cfg{};
|
||||
cfg.typeSize = sizeof(cfg); cfg.modelType = ADLA_LOADABLE; cfg.nbgType = NN_ADLA_FILE; cfg.path = argv[1];
|
||||
cfg.typeSize = sizeof(cfg);
|
||||
cfg.modelType = ADLA_LOADABLE;
|
||||
cfg.nbgType = NN_ADLA_FILE;
|
||||
cfg.path = argv[1];
|
||||
void* ctx = aml_module_create(&cfg);
|
||||
if (!ctx) {
|
||||
std::cerr << "Failed to create aml_module\n";
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto priors = generate_priors();
|
||||
size_t num_priors = priors.size();
|
||||
std::vector<float> chw_buffer(kInputW * kInputH * 3);
|
||||
|
||||
fs::create_directory("retinaface_result");
|
||||
const std::string out_dir = "retinaface_result";
|
||||
fs::create_directory(out_dir);
|
||||
|
||||
std::vector<fs::path> image_paths;
|
||||
for (auto& it : fs::directory_iterator(argv[2])) {
|
||||
cv::Mat img = cv::imread(it.path().string());
|
||||
std::string ext = it.path().extension().string();
|
||||
std::transform(ext.begin(), ext.end(), ext.begin(), ::tolower);
|
||||
if (ext == ".jpg" || ext == ".png" || ext == ".jpeg" || ext == ".bmp") {
|
||||
image_paths.push_back(it.path());
|
||||
}
|
||||
}
|
||||
std::sort(image_paths.begin(), image_paths.end());
|
||||
|
||||
int total = image_paths.size();
|
||||
if (total == 0) {
|
||||
std::cout << "No images found in " << argv[2] << "\n";
|
||||
aml_module_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < total; ++i) {
|
||||
const auto& path = image_paths[i];
|
||||
const std::string filename = path.filename().string();
|
||||
|
||||
std::cout << "============================================================\n";
|
||||
std::cout << "Processing image " << (i + 1) << "/" << total << ": " << filename << "\n";
|
||||
std::cout << "============================================================\n";
|
||||
|
||||
cv::Mat img = cv::imread(path.string());
|
||||
if (img.empty()) continue;
|
||||
|
||||
float scale = std::min((float)kInputW / img.cols, (float)kInputH / img.rows);
|
||||
int nw = img.cols * scale, nh = img.rows * scale;
|
||||
int px = (kInputW - nw) / 2, py = (kInputH - nh) / 2;
|
||||
cv::Mat res, canvas = cv::Mat::zeros(kInputH, kInputW, CV_32FC3);
|
||||
cv::resize(img, res, {nw, nh}); res.convertTo(res, CV_32FC3);
|
||||
cv::resize(img, res, {nw, nh});
|
||||
res.convertTo(res, CV_32FC3);
|
||||
res.copyTo(canvas(cv::Rect(px, py, nw, nh)));
|
||||
hwc_to_chw(canvas, chw_buffer.data());
|
||||
|
||||
nn_input in{}; in.typeSize = sizeof(in); in.input_type = BINARY_RAW_DATA;
|
||||
in.input = (unsigned char*)chw_buffer.data(); in.size = chw_buffer.size() * 4;
|
||||
in.info.valid = 1; in.info.input_format = AML_INPUT_MODEL_NCHW; in.info.input_data_type = AML_INPUT_FP32;
|
||||
nn_input in{};
|
||||
in.typeSize = sizeof(in);
|
||||
in.input_type = BINARY_RAW_DATA;
|
||||
in.input = (unsigned char*)chw_buffer.data();
|
||||
in.size = chw_buffer.size() * 4;
|
||||
in.info.valid = 1;
|
||||
in.info.input_format = AML_INPUT_MODEL_NCHW;
|
||||
in.info.input_data_type = AML_INPUT_FP32;
|
||||
aml_module_input_set(ctx, &in);
|
||||
|
||||
aml_output_config_t outcfg{}; outcfg.typeSize = sizeof(outcfg); outcfg.format = AML_OUTDATA_FLOAT32;
|
||||
aml_output_config_t outcfg{};
|
||||
outcfg.typeSize = sizeof(outcfg);
|
||||
outcfg.format = AML_OUTDATA_FLOAT32;
|
||||
nn_output* out = (nn_output*)aml_module_output_get(ctx, outcfg);
|
||||
if (!out) continue;
|
||||
|
||||
float *loc = nullptr, *conf = nullptr, *landm = nullptr;
|
||||
for (int i = 0; i < out->num; i++) {
|
||||
if (out->out[i].size == num_priors * 4 * 4) loc = (float*)out->out[i].buf;
|
||||
else if (out->out[i].size == num_priors * 2 * 4) conf = (float*)out->out[i].buf;
|
||||
else if (out->out[i].size == num_priors * 10 * 4) landm = (float*)out->out[i].buf;
|
||||
for (int j = 0; j < out->num; j++) {
|
||||
if (out->out[j].size == num_priors * 4 * 4) loc = (float*)out->out[j].buf;
|
||||
else if (out->out[j].size == num_priors * 2 * 4) conf = (float*)out->out[j].buf;
|
||||
else if (out->out[j].size == num_priors * 10 * 4) landm = (float*)out->out[j].buf;
|
||||
}
|
||||
if (!loc || !conf || !landm) continue;
|
||||
|
||||
|
|
@ -64,11 +124,11 @@ int main(int argc, char** argv) {
|
|||
std::vector<std::array<float, 10>> lms;
|
||||
std::vector<float> scores_vec;
|
||||
|
||||
for (size_t i = 0; i < num_priors; i++) {
|
||||
float sc = is_planar ? conf[num_priors + i] : conf[i * 2 + 1];
|
||||
for (size_t j = 0; j < num_priors; j++) {
|
||||
float sc = is_planar ? conf[num_priors + j] : conf[j * 2 + 1];
|
||||
if (sc > 0.5f) {
|
||||
boxes.push_back(decode_box(loc, i, num_priors, is_planar, priors[i]));
|
||||
lms.push_back(decode_landm(landm, i, num_priors, is_planar, priors[i]));
|
||||
boxes.push_back(decode_box(loc, j, num_priors, is_planar, priors[j]));
|
||||
lms.push_back(decode_landm(landm, j, num_priors, is_planar, priors[j]));
|
||||
scores_vec.push_back(sc);
|
||||
}
|
||||
}
|
||||
|
|
@ -83,7 +143,8 @@ int main(int argc, char** argv) {
|
|||
|
||||
char score_text[16];
|
||||
std::snprintf(score_text, sizeof(score_text), "%.2f", scores_vec[k]);
|
||||
cv::putText(img, score_text, {x1, std::max(y1 - 5, 5)}, cv::FONT_HERSHEY_SIMPLEX, 0.5, {0, 255, 0}, 1, cv::LINE_AA);
|
||||
cv::putText(img, score_text, {x1, std::max(y1 - 5, 5)},
|
||||
cv::FONT_HERSHEY_SIMPLEX, 0.5, {0, 255, 0}, 1, cv::LINE_AA);
|
||||
|
||||
auto& lm = lms[k];
|
||||
for (int j = 0; j < 5; j++) {
|
||||
|
|
@ -92,8 +153,14 @@ int main(int argc, char** argv) {
|
|||
cv::circle(img, {lx, ly}, 2, {0, 0, 255}, -1);
|
||||
}
|
||||
}
|
||||
cv::imwrite("retinaface_result/" + it.path().filename().string(), img);
|
||||
std::cout << "Detected: " << it.path().filename() << " (" << keep.size() << " faces)\n";
|
||||
|
||||
std::string save_path = out_dir + "/" + filename;
|
||||
cv::imwrite(save_path, img);
|
||||
|
||||
std::cout << " Detected " << keep.size() << " faces\n";
|
||||
std::cout << " Result saved to: " << save_path << "\n\n";
|
||||
}
|
||||
aml_module_destroy(ctx); return 0;
|
||||
|
||||
aml_module_destroy(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "postprocess.h"
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef RETINAFACE_POSTPROCESS_H
|
||||
#define RETINAFACE_POSTPROCESS_H
|
||||
|
||||
|
|
|
|||
0
examples/retinaface/model/.gitkeep
Normal file → Executable file
0
examples/retinaface/model/.gitkeep
Normal file → Executable file
0
examples/retinaface/py/.gitkeep
Normal file → Executable file
0
examples/retinaface/py/.gitkeep
Normal file → Executable file
173
examples/retinaface/py/RetinaFace.py
Executable file
173
examples/retinaface/py/RetinaFace.py
Executable file
|
|
@ -0,0 +1,173 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import os
|
||||
import cv2
|
||||
import glob
|
||||
import argparse
|
||||
import time
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
class PriorBox:
|
||||
def __init__(self, image_size=(320, 320)):
|
||||
self.image_size = image_size
|
||||
self.steps = [8, 16, 32]
|
||||
self.min_sizes = [[16, 32], [64, 128], [256, 512]]
|
||||
|
||||
def forward(self):
|
||||
priors = []
|
||||
h, w = self.image_size
|
||||
for idx, step in enumerate(self.steps):
|
||||
fm_h, fm_w = int(np.ceil(h / step)), int(np.ceil(w / step))
|
||||
for i in range(fm_h):
|
||||
for j in range(fm_w):
|
||||
for min_size in self.min_sizes[idx]:
|
||||
cx, cy = (j + 0.5) * step / w, (i + 0.5) * step / h
|
||||
s_kx, s_ky = min_size / w, min_size / h
|
||||
priors.append([cx, cy, s_kx, s_ky])
|
||||
return np.array(priors, dtype=np.float32)
|
||||
|
||||
def decode_boxes(loc, priors, variances=(0.1, 0.2)):
|
||||
boxes = np.concatenate((
|
||||
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
|
||||
priors[:, 2:] * np.exp(loc[:, 2:] * variances[1])
|
||||
), axis=1)
|
||||
boxes[:, :2] -= boxes[:, 2:] / 2
|
||||
boxes[:, 2:] += boxes[:, :2]
|
||||
return boxes
|
||||
|
||||
def decode_landmarks(pre, priors, variances=(0.1, 0.2)):
|
||||
landms = np.concatenate([
|
||||
priors[:, :2] + pre[:, i:i+2] * variances[0] * priors[:, 2:] for i in range(0, 10, 2)
|
||||
], axis=1)
|
||||
return landms
|
||||
|
||||
def nms(dets, thresh=0.4):
|
||||
x1, y1, x2, y2, scores = dets.T
|
||||
areas = (x2 - x1) * (y2 - y1)
|
||||
order = scores.argsort()[::-1]
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]; keep.append(i)
|
||||
xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
|
||||
xx2, yy2 = np.maximum(y1[i], y1[order[1:]]), np.maximum(y1[i], y1[order[1:]]) # fix
|
||||
xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
|
||||
w, h = np.maximum(0.0, xx2 - xx1), np.maximum(0.0, yy2 - yy1)
|
||||
ovr = (w * h) / (areas[i] + areas[order[1:]] - (w * h))
|
||||
order = order[np.where(ovr <= thresh)[0] + 1]
|
||||
return keep
|
||||
|
||||
def postprocess_retinaface(outputs, priors, conf_thresh=0.5, nms_thresh=0.4):
|
||||
loc = conf = landms = None
|
||||
for out in outputs:
|
||||
out = np.squeeze(np.asarray(out))
|
||||
if out.shape[-1] == 4: loc = out
|
||||
elif out.shape[-1] == 2: conf = out
|
||||
elif out.shape[-1] == 10: landms = out
|
||||
|
||||
if loc is None or conf is None or landms is None: return [], [], []
|
||||
scores = conf[:, 1]
|
||||
mask = scores > conf_thresh
|
||||
if not np.any(mask): return [], [], []
|
||||
|
||||
boxes = decode_boxes(loc[mask], priors[mask])
|
||||
landms = decode_landmarks(landms[mask], priors[mask])
|
||||
scores = scores[mask]
|
||||
keep = nms(np.hstack((boxes, scores[:, None])), nms_thresh)
|
||||
return boxes[keep], landms[keep], scores[keep]
|
||||
|
||||
def preprocess(img_path, input_size=(320, 320)):
|
||||
img = cv2.imread(img_path)
|
||||
if img is None: return None, None, 0, 0, 0
|
||||
h0, w0 = img.shape[:2]
|
||||
scale = min(input_size[0] / w0, input_size[1] / h0)
|
||||
nw, nh = int(w0 * scale), int(h0 * scale)
|
||||
resized = cv2.resize(img, (nw, nh))
|
||||
canvas = np.full((input_size[1], input_size[0], 3), 128, dtype=np.uint8)
|
||||
pad_x, pad_y = (input_size[0] - nw) // 2, (input_size[1] - nh) // 2
|
||||
canvas[pad_y:pad_y + nh, pad_x:pad_x + nw] = resized
|
||||
return np.expand_dims(canvas.astype(np.float32), axis=0), img, scale, pad_x, pad_y
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="RetinaFace AMLNNLite Demo")
|
||||
parser.add_argument('--board-work-path', type=str, default='/data/nn')
|
||||
parser.add_argument('--model-path', required=True, help='Path to .adla model')
|
||||
parser.add_argument('--image-dir', required=True, help='Directory of test images')
|
||||
parser.add_argument('--run-cycles', type=int, default=1, help='Inference cycles')
|
||||
parser.add_argument('--loglevel', type=str, default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'])
|
||||
args = parser.parse_args()
|
||||
|
||||
amlnn = AMLNNLite()
|
||||
amlnn.config(board_work_path=args.board_work_path,
|
||||
model_path=args.model_path,
|
||||
run_cycles=args.run_cycles,
|
||||
loglevel=args.loglevel)
|
||||
amlnn.init()
|
||||
|
||||
priors = PriorBox((320, 320)).forward()
|
||||
image_files = sorted(glob.glob(os.path.join(args.image_dir, "*.[jp][pn][g]")))
|
||||
|
||||
if not image_files:
|
||||
print(f"No images found in {args.image_dir}")
|
||||
amlnn.uninit(); return
|
||||
|
||||
res_dir = "retinaface_result"
|
||||
os.makedirs(res_dir, exist_ok=True)
|
||||
|
||||
for idx, img_path in enumerate(image_files, start=1):
|
||||
print("=" * 60)
|
||||
print(f"Processing image {idx}/{len(image_files)}: {Path(img_path).name}")
|
||||
print("=" * 60)
|
||||
|
||||
inp, orig, scale, pad_x, pad_y = preprocess(img_path)
|
||||
if inp is None: continue
|
||||
|
||||
outputs = amlnn.inference(inp, inputs_data_format='NHWC')
|
||||
|
||||
boxes, landms, scores = postprocess_retinaface(outputs, priors)
|
||||
|
||||
if len(boxes) > 0:
|
||||
print(f" Detected {len(boxes)} objects:")
|
||||
for i, sc in enumerate(scores, 1):
|
||||
print(f" {i}. face ({sc:.2f})")
|
||||
else:
|
||||
print(" No objects detected")
|
||||
|
||||
for box, lm in zip(boxes, landms):
|
||||
x1 = int((box[0] * 320 - pad_x) / scale)
|
||||
y1 = int((box[1] * 320 - pad_y) / scale)
|
||||
x2 = int((box[2] * 320 - pad_x) / scale)
|
||||
y2 = int((box[3] * 320 - pad_y) / scale)
|
||||
cv2.rectangle(orig, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
for lx, ly in lm.reshape(5, 2):
|
||||
cv2.circle(orig, (int((lx*320-pad_x)/scale), int((ly*320-pad_y)/scale)), 2, (0, 0, 255), -1)
|
||||
|
||||
save_path = os.path.join(res_dir, Path(img_path).name)
|
||||
cv2.imwrite(save_path, orig)
|
||||
print(f" Result saved to: {save_path}")
|
||||
|
||||
if args.loglevel == 'INFO':
|
||||
print("\nI Performance analysis visualization starting...")
|
||||
|
||||
amlnn.visualize()
|
||||
amlnn.uninit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
examples/yolov11/README.md
Normal file → Executable file
0
examples/yolov11/README.md
Normal file → Executable file
0
examples/yolov11/cpp/.gitkeep
Normal file → Executable file
0
examples/yolov11/cpp/.gitkeep
Normal file → Executable file
|
|
@ -1,65 +1,19 @@
|
|||
#TODO
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_arch>]"
|
||||
echo " -a <target_arch> : Target architecture (default: aarch64)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ARCH=aarch64
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ARCH=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Default to aarch64-linux-gnu if GCC_COMPILER is not set
|
||||
GCC_COMPILER=${GCC_COMPILER:-aarch64-linux-gnu}
|
||||
|
||||
# Set compilers
|
||||
export CC=${GCC_COMPILER}-gcc
|
||||
export CXX=${GCC_COMPILER}-g++
|
||||
|
||||
# Validate compiler
|
||||
if ! command -v ${CC} &> /dev/null; then
|
||||
echo "Error: Compiler ${CC} not found."
|
||||
echo "Please set GCC_COMPILER environment variable to your cross-compiler path prefix."
|
||||
echo "Example: export GCC_COMPILER=/path/to/toolchain/bin/aarch64-linux-gnu"
|
||||
# Proceeding anyway as user might have custom env setup
|
||||
else
|
||||
echo "Using compiler: ${CC}"
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/linux
|
||||
|
||||
echo "Building for Linux..."
|
||||
echo "COMPILER: ${CC}"
|
||||
echo "TARGET_ARCH: ${TARGET_ARCH}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_SYSTEM_NAME=Linux \
|
||||
-DCMAKE_SYSTEM_PROCESSOR=${TARGET_ARCH} \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DOpenCV_DIR=${ROOT_PWD}/../../../dependency/opencv/opencv-linux-aarch64/share/OpenCV
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/yolo11_demo"
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
### TO DO
|
||||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <filesystem>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "postprocess.h"
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef YOLO11_POSTPROCESS_H
|
||||
#define YOLO11_POSTPROCESS_H
|
||||
|
||||
|
|
|
|||
0
examples/yolov11/model/.gitkeep
Normal file → Executable file
0
examples/yolov11/model/.gitkeep
Normal file → Executable file
0
examples/yolov11/py/.gitkeep
Normal file → Executable file
0
examples/yolov11/py/.gitkeep
Normal file → Executable file
157
examples/yolov11/py/yolov11.py
Executable file
157
examples/yolov11/py/yolov11.py
Executable file
|
|
@ -0,0 +1,157 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import argparse
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
class_names = {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
|
||||
|
||||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
|
||||
shape = img.shape[:2]
|
||||
scale = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
new_unpad = (int(round(shape[1] * scale)), int(round(shape[0] * scale)))
|
||||
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
|
||||
dw /= 2; dh /= 2
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
|
||||
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||||
return img, scale, (left, top)
|
||||
|
||||
def preprocess(img_path, new_shape=(640,640)):
|
||||
original_img = cv2.imread(str(img_path))
|
||||
if original_img is None: return None, None, None, None
|
||||
processed_img, scale, pad = letterbox(original_img, new_shape)
|
||||
rgb_img = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
|
||||
normalized_img = rgb_img.astype(np.float32) / 255.0
|
||||
input_tensor = np.expand_dims(np.transpose(normalized_img, (2,0,1)), 0) # NCHW
|
||||
return input_tensor, original_img, scale, pad
|
||||
|
||||
def postprocess(outputs, scale, pad, strides=[32,16,8], conf_threshold=0.25, iou_threshold=0.45):
|
||||
all_boxes, all_scores, all_class_ids = [], [], []
|
||||
for scale_idx, output in enumerate(outputs):
|
||||
stride = strides[scale_idx]
|
||||
feat = output[0].transpose(1, 2, 0) # H, W, C
|
||||
h, w, c = feat.shape
|
||||
dfl = feat[:, :, :64].reshape(h, w, 4, 16)
|
||||
cls_logits = feat[:, :, 64:]
|
||||
cls_scores = 1.0 / (1.0 + np.exp(-cls_logits)) # sigmoid
|
||||
|
||||
exp_x = np.exp(dfl - np.max(dfl, axis=-1, keepdims=True))
|
||||
p = exp_x / np.sum(exp_x, axis=-1, keepdims=True)
|
||||
bbox_deltas = np.sum(p * np.arange(16, dtype=np.float32), axis=-1)
|
||||
|
||||
grid_y, grid_x = np.meshgrid(np.arange(h), np.arange(w), indexing='ij')
|
||||
l, t, r, b = np.split(bbox_deltas, 4, axis=-1)
|
||||
x1, y1 = (grid_x + 0.5 - l[..., 0]) * stride, (grid_y + 0.5 - t[..., 0]) * stride
|
||||
x2, y2 = (grid_x + 0.5 + r[..., 0]) * stride, (grid_y + 0.5 + b[..., 0]) * stride
|
||||
|
||||
all_boxes.append(np.stack([x1, y1, x2, y2], axis=-1).reshape(-1, 4))
|
||||
all_scores.append(cls_scores.reshape(-1, cls_scores.shape[-1]))
|
||||
|
||||
final_boxes = np.concatenate(all_boxes, axis=0)
|
||||
final_scores_all = np.concatenate(all_scores, axis=0)
|
||||
final_class_ids = np.argmax(final_scores_all, axis=1)
|
||||
final_scores = np.max(final_scores_all, axis=1)
|
||||
|
||||
mask = final_scores > conf_threshold
|
||||
if not np.any(mask): return []
|
||||
|
||||
valid_boxes = final_boxes[mask]
|
||||
valid_boxes[:, [0, 2]] = (valid_boxes[:, [0, 2]] - pad[0]) / scale
|
||||
valid_boxes[:, [1, 3]] = (valid_boxes[:, [1, 3]] - pad[1]) / scale
|
||||
|
||||
indices = cv2.dnn.NMSBoxes(valid_boxes.tolist(), final_scores[mask].tolist(), conf_threshold, iou_threshold)
|
||||
detections = []
|
||||
if len(indices) > 0:
|
||||
for idx in indices.flatten():
|
||||
detections.append({
|
||||
'bbox': valid_boxes[idx].tolist(),
|
||||
'confidence': float(final_scores[mask][idx]),
|
||||
'class_name': class_names.get(int(final_class_ids[mask][idx]), 'unknown')
|
||||
})
|
||||
return detections
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="YOLOV11 AMLNNLite Demo")
|
||||
parser.add_argument('--board-work-path', default='/data/nn', help='Work path on board')
|
||||
parser.add_argument('-m', '--model-path', required=True, help='Path to .adla or .tflite model')
|
||||
parser.add_argument('--image-dir', required=True, help='Directory containing test images')
|
||||
parser.add_argument('--run-cycles', type=int, default=1, help='Inference cycles for profiling')
|
||||
parser.add_argument('--loglevel', default='WARNING', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], help='Log level')
|
||||
args = parser.parse_args()
|
||||
|
||||
amlnn = AMLNNLite()
|
||||
amlnn.config(
|
||||
board_work_path=args.board_work_path,
|
||||
model_path=args.model_path,
|
||||
run_cycles=args.run_cycles,
|
||||
loglevel=args.loglevel
|
||||
)
|
||||
amlnn.init()
|
||||
|
||||
image_files = []
|
||||
for ext in ["*.jpg", "*.jpeg", "*.png"]:
|
||||
image_files.extend(glob.glob(os.path.join(args.image_dir, ext)))
|
||||
image_files.extend(glob.glob(os.path.join(args.image_dir, ext.upper())))
|
||||
image_files.sort()
|
||||
|
||||
if not image_files:
|
||||
print(f"No images found in {args.image_dir}")
|
||||
amlnn.uninit(); return
|
||||
|
||||
model_stem = Path(args.model_path).stem
|
||||
res_dir = f"{model_stem}_result"
|
||||
os.makedirs(res_dir, exist_ok=True)
|
||||
|
||||
for i, img_path in enumerate(image_files, 1):
|
||||
print("=" * 60)
|
||||
print(f"Processing image {i}/{len(image_files)}: {os.path.basename(img_path)}")
|
||||
print("=" * 60)
|
||||
|
||||
input_tensor, ori_img, scale, pad = preprocess(img_path)
|
||||
if input_tensor is None: continue
|
||||
|
||||
for _ in range(args.run_cycles):
|
||||
outputs = amlnn.inference(input_tensor, inputs_data_format='NCHW', outputs_data_format='NCHW')
|
||||
|
||||
detections = postprocess(outputs, scale, pad)
|
||||
|
||||
print(f" Detected {len(detections)} objects:")
|
||||
for idx, det in enumerate(detections, 1):
|
||||
print(f" {idx}. {det['class_name']} ({det['confidence']:.2f})")
|
||||
|
||||
for det in detections:
|
||||
x1, y1, x2, y2 = map(int, det['bbox'])
|
||||
cv2.rectangle(ori_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
|
||||
cv2.putText(ori_img, f"{det['class_name']} {det['confidence']:.2f}", (x1, y1-10),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
|
||||
|
||||
save_path = os.path.join(res_dir, f"{Path(img_path).stem}_result.jpg")
|
||||
cv2.imwrite(save_path, ori_img)
|
||||
print(f" Result saved to: {save_path}")
|
||||
|
||||
amlnn.visualize()
|
||||
amlnn.uninit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,133 @@
|
|||
# yolov8
|
||||
|
||||
## 1.Overview
|
||||
|
||||
YOLOv8 was released by Ultralytics on January 10, 2023, offering cutting-edge performance in terms of accuracy and speed. Building upon the advancements of previous YOLO versions, YOLOv8 introduced new features and optimizations that make it an ideal choice for various [object detection](https://www.ultralytics.com/blog/a-guide-to-deep-dive-into-object-detection-in-2025) tasks in a wide range of applications.
|
||||
|
||||
## 2.Model Download
|
||||
|
||||
- **Open Source model**
|
||||
|
||||
- **Open Source projects:** https://github.com/ultralytics/ultralytics/tree/v8.2.0
|
||||
|
||||
- **Export Model Step:**
|
||||
|
||||
- **Install ultralytics**
|
||||
|
||||
pip install torch==2.4.1
|
||||
|
||||
pip install torchvision==0.19.1
|
||||
|
||||
pip install ultralytics==8.2.0
|
||||
|
||||
- **Download weights**
|
||||
|
||||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8m.pt
|
||||
|
||||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt
|
||||
|
||||
wget https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8n.pt
|
||||
|
||||
- **Export Model**
|
||||
|
||||
```
|
||||
from ultralytics import YOLO
|
||||
|
||||
model = YOLO("yolov8m.pt")
|
||||
model.export(format="onnx", opset=12, simplify=True, dynamic=False, imgsz=640)
|
||||
```
|
||||
|
||||
|
||||
- **Exported Model**
|
||||
|
||||
link to amlogic server( **onnx model or quantized tflite**)
|
||||
|
||||
|
||||
|
||||
## 3. Model Conversion
|
||||
|
||||
```
|
||||
cd model
|
||||
Usage: ./adla_covnert.sh model_path adla_tookkit_path target_platform
|
||||
|
||||
example
|
||||
./adla_covnert.sh yolov8m.onnx /xxxx/adla-toolkit-binary-3.2.9.3 PRODUCT_PID0XA005
|
||||
./adla_covnert.sh yolov8s.onnx /xxxx/adla-toolkit-binary-3.2.9.3 PRODUCT_PID0XA005
|
||||
./adla_covnert.sh yolov8n.onnx /xxxx/adla-toolkit-binary-3.2.9.3 PRODUCT_PID0XA005
|
||||
```
|
||||
|
||||
| Parameter | Discription |
|
||||
| ----------------- | ------------------------------------------------------------ |
|
||||
| model_path | onnx model path |
|
||||
| adla_tookkit_path | path to adla_toolkit |
|
||||
| target_platform | Specify target platform. for A311D2 : PRODUCT_PID0XA003。for S905X5: PRODUCT_PID0XA005 |
|
||||
|
||||
|
||||
|
||||
## 4. Demo Run
|
||||
|
||||
### CPP
|
||||
|
||||
#### 1. Compile
|
||||
|
||||
**Prerequisites:**
|
||||
- Android NDK (r25e recommended)
|
||||
- `ANDROID_NDK_PATH` environment variable set
|
||||
|
||||
**Build:**
|
||||
```bash
|
||||
# Build for arm64-v8a
|
||||
cd examples/yolov8/cpp
|
||||
./build-android.sh -a arm64-v8a
|
||||
```
|
||||
|
||||
The executable will be generated at `build/android/yolov8_demo` (Note: executable name may vary, verify in build folder).
|
||||
|
||||
#### 2. Run
|
||||
|
||||
```bash
|
||||
# Push executable to device
|
||||
adb push build/android/yolov8_demo /data/local/tmp/
|
||||
adb push model/yolov8s_int8_A311D2.adla /data/local/tmp/
|
||||
adb push test_image.jpg /data/local/tmp/
|
||||
|
||||
# Run on device
|
||||
adb shell
|
||||
cd /data/local/tmp
|
||||
chmod +x yolov8_demo
|
||||
export LD_LIBRARY_PATH=/vendor/lib64 or (/vendor/lib)
|
||||
|
||||
# Usage: ./yolo_world_demo <model_path> <image_path>
|
||||
./yolov8_demo yolov8s_int8_A311D2.adla test_image.jpg"
|
||||
```
|
||||
|
||||
**Note:** Replace `yolov8s_int8_A311D2.adla` with your actual model file path.
|
||||
|
||||
### Python
|
||||
|
||||
**Prerequisites:**
|
||||
- Python 3.10
|
||||
- Required packages: `numpy`, `opencv-python`, `amlnnlite`
|
||||
|
||||
**Install dependencies:**
|
||||
```bash
|
||||
pip install numpy opencv-python amlnnlite-1.0.0-cp310-cp310-linux_aarch64.whl
|
||||
```
|
||||
|
||||
**Run on device:**
|
||||
```bash
|
||||
python yolov8.py --model-path ./yolov8s_int8_A311D2.adla
|
||||
```
|
||||
|
||||
The script will automatically process all image files (`.jpg`, `.jpeg`, `.png`, `.bmp`) in the current directory and save results to a `{model_name}_result` folder.
|
||||
|
||||
## 5.Results
|
||||
The program will print the detection count and inference time. The result image with bounding boxes will be saved to the specified output path (`result.jpg` by default).
|
||||
|
||||
|
||||
You can pull the result image back to view it:
|
||||
```bash
|
||||
adb pull result.jpg.
|
||||
```
|
||||

|
||||
|
||||
|
|
@ -62,8 +62,6 @@ int main(int argc, char** argv) {
|
|||
}
|
||||
|
||||
// 3. Preprocess
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
auto [preprocessed, scale, pad] = preprocess(img, std::make_tuple(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH));
|
||||
|
||||
// Quantize to int8 (model expects quantized input)
|
||||
|
|
@ -88,6 +86,7 @@ int main(int argc, char** argv) {
|
|||
outconfig.typeSize = sizeof(aml_output_config_t);
|
||||
outconfig.format = AML_OUTDATA_FLOAT32;
|
||||
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
nn_output* outdata = (nn_output*)aml_module_output_get(context, outconfig);
|
||||
if (!outdata) {
|
||||
std::cerr << "Failed to run network." << std::endl;
|
||||
|
|
@ -103,8 +102,8 @@ int main(int argc, char** argv) {
|
|||
const int channels = 144; // 64 DFL + 80 classes
|
||||
|
||||
std::vector<Detection> detections = postprocess(
|
||||
std::make_tuple(outbuf0, std::make_tuple(MODEL_INPUT_HEIGHT / 16, MODEL_INPUT_WIDTH / 16, channels), 16),
|
||||
std::make_tuple(outbuf1, std::make_tuple(MODEL_INPUT_HEIGHT / 8, MODEL_INPUT_WIDTH / 8, channels), 8),
|
||||
std::make_tuple(outbuf0, std::make_tuple(MODEL_INPUT_HEIGHT / 8, MODEL_INPUT_WIDTH / 8, channels), 8),
|
||||
std::make_tuple(outbuf1, std::make_tuple(MODEL_INPUT_HEIGHT / 16, MODEL_INPUT_WIDTH / 16, channels), 16),
|
||||
std::make_tuple(outbuf2, std::make_tuple(MODEL_INPUT_HEIGHT / 32, MODEL_INPUT_WIDTH / 32, channels), 32),
|
||||
std::make_tuple(preprocessed, scale, pad),
|
||||
SCORE_THRESHOLD,
|
||||
|
|
|
|||
24
examples/yolov8/model/adla_convert.sh
Executable file
24
examples/yolov8/model/adla_convert.sh
Executable file
|
|
@ -0,0 +1,24 @@
|
|||
# 1. $1: set ADLA_TOOL_PATH
|
||||
# 2. $2: set target-plaftorm
|
||||
# for A311D2 target-platform is PRODUCT_PID0XA003
|
||||
# for S905X5 target-platform is PRODUCT_PID0XA005
|
||||
# Usage: ./adla_covnert.sh yolov8m.onnx /XXX/adla-toolkit-binary-3.2.9.3 PRODUCT_PID0XA005
|
||||
|
||||
model_path=$1
|
||||
ADLA_TOOL_PATH=$2
|
||||
target_platform=$3
|
||||
|
||||
echo "model_path:[$model_path]"
|
||||
echo "ADLA_TOOL_PATH:[$ADLA_TOOL_PATH]"
|
||||
echo "target-plaftorm:[$target_platform]"
|
||||
|
||||
adla_convert=${ADLA_TOOL_PATH}/bin/adla_convert
|
||||
|
||||
$adla_convert --model-type onnx \
|
||||
--model $model_path \
|
||||
--inputs images --input-shapes "1,3,640,640" \
|
||||
--quantize-dtype int8 \
|
||||
--source-file dataset_coco.txt \
|
||||
--channel-mean-value "0,0,0,255" \
|
||||
--outputs "/model.22/Concat_output_0 /model.22/Concat_1_output_0 /model.22/Concat_2_output_0" \
|
||||
--target-platform $target_platform
|
||||
50
examples/yolov8/model/dataset_coco.txt
Executable file
50
examples/yolov8/model/dataset_coco.txt
Executable file
|
|
@ -0,0 +1,50 @@
|
|||
../../../resource/coco_dataset/000000000139.jpg
|
||||
../../../resource/coco_dataset/000000000285.jpg
|
||||
../../../resource/coco_dataset/000000000632.jpg
|
||||
../../../resource/coco_dataset/000000000724.jpg
|
||||
../../../resource/coco_dataset/000000000776.jpg
|
||||
../../../resource/coco_dataset/000000000785.jpg
|
||||
../../../resource/coco_dataset/000000000802.jpg
|
||||
../../../resource/coco_dataset/000000000872.jpg
|
||||
../../../resource/coco_dataset/000000000885.jpg
|
||||
../../../resource/coco_dataset/000000001000.jpg
|
||||
../../../resource/coco_dataset/000000001268.jpg
|
||||
../../../resource/coco_dataset/000000001296.jpg
|
||||
../../../resource/coco_dataset/000000001353.jpg
|
||||
../../../resource/coco_dataset/000000001425.jpg
|
||||
../../../resource/coco_dataset/000000001490.jpg
|
||||
../../../resource/coco_dataset/000000001503.jpg
|
||||
../../../resource/coco_dataset/000000001532.jpg
|
||||
../../../resource/coco_dataset/000000001584.jpg
|
||||
../../../resource/coco_dataset/000000001675.jpg
|
||||
../../../resource/coco_dataset/000000001761.jpg
|
||||
../../../resource/coco_dataset/000000001818.jpg
|
||||
../../../resource/coco_dataset/000000001993.jpg
|
||||
../../../resource/coco_dataset/000000002006.jpg
|
||||
../../../resource/coco_dataset/000000002149.jpg
|
||||
../../../resource/coco_dataset/000000002153.jpg
|
||||
../../../resource/coco_dataset/000000002157.jpg
|
||||
../../../resource/coco_dataset/000000002261.jpg
|
||||
../../../resource/coco_dataset/000000002299.jpg
|
||||
../../../resource/coco_dataset/000000002431.jpg
|
||||
../../../resource/coco_dataset/000000002473.jpg
|
||||
../../../resource/coco_dataset/000000002532.jpg
|
||||
../../../resource/coco_dataset/000000002587.jpg
|
||||
../../../resource/coco_dataset/000000002592.jpg
|
||||
../../../resource/coco_dataset/000000002685.jpg
|
||||
../../../resource/coco_dataset/000000002923.jpg
|
||||
../../../resource/coco_dataset/000000003156.jpg
|
||||
../../../resource/coco_dataset/000000003255.jpg
|
||||
../../../resource/coco_dataset/000000003501.jpg
|
||||
../../../resource/coco_dataset/000000003553.jpg
|
||||
../../../resource/coco_dataset/000000003661.jpg
|
||||
../../../resource/coco_dataset/000000003845.jpg
|
||||
../../../resource/coco_dataset/000000003934.jpg
|
||||
../../../resource/coco_dataset/000000004134.jpg
|
||||
../../../resource/coco_dataset/000000004395.jpg
|
||||
../../../resource/coco_dataset/000000004495.jpg
|
||||
../../../resource/coco_dataset/000000004765.jpg
|
||||
../../../resource/coco_dataset/000000004795.jpg
|
||||
../../../resource/coco_dataset/000000005001.jpg
|
||||
../../../resource/coco_dataset/000000005037.jpg
|
||||
../../../resource/coco_dataset/000000005060.jpg
|
||||
281
examples/yolov8/py/yolov8.py
Executable file
281
examples/yolov8/py/yolov8.py
Executable file
|
|
@ -0,0 +1,281 @@
|
|||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import argparse
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
|
||||
class_names = {
|
||||
0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
|
||||
5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
|
||||
10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird',
|
||||
15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
|
||||
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
|
||||
25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
|
||||
30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat',
|
||||
35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle',
|
||||
40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon',
|
||||
45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange',
|
||||
50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut',
|
||||
55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed',
|
||||
60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse',
|
||||
65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven',
|
||||
70: 'toaster', 71: 'sink', 72: 'refrigerator', 73: 'book', 74: 'clock',
|
||||
75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'
|
||||
}
|
||||
|
||||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
|
||||
shape = img.shape[:2] # [height, width]
|
||||
scale = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
new_unpad = (int(round(shape[1] * scale)), int(round(shape[0] * scale)))
|
||||
pad_w = (new_shape[1] - new_unpad[0]) / 2
|
||||
pad_h = (new_shape[0] - new_unpad[1]) / 2
|
||||
|
||||
if shape[::-1] != new_unpad:
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
|
||||
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||||
|
||||
return img, scale, (left, top)
|
||||
|
||||
def preprocess(img_path, new_shape=(640, 640), data_format='NCHW', s=0.003921568859368563, zp=-128):
|
||||
original_img = cv2.imread(str(img_path))
|
||||
if original_img is None:
|
||||
raise ValueError(f"can't read image: {img_path}")
|
||||
|
||||
processed_img, scale, pad = letterbox(original_img, new_shape)
|
||||
rgb_img = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
|
||||
normalized_img = rgb_img.astype(np.float32) / 255.0
|
||||
|
||||
if data_format == 'NCHW':
|
||||
# HWC -> CHW -> BCHW (ONNX default format)
|
||||
input_tensor = np.transpose(normalized_img, (2, 0, 1))
|
||||
input_tensor = np.expand_dims(input_tensor, axis=0)
|
||||
elif data_format == 'NHWC':
|
||||
# HWC -> BHWC (TFLITE default format)
|
||||
input_tensor = np.expand_dims(normalized_img, axis=0)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}. Only 'NCHW' and 'NHWC' are supported.")
|
||||
|
||||
# Quantize to int8
|
||||
input_tensor = np.round(input_tensor / s + zp).astype(np.int8)
|
||||
|
||||
return input_tensor, original_img, scale, pad
|
||||
|
||||
def postprocess(outputs, scale, pad, data_format='NCHW', strides=[8, 16, 32], conf_threshold=0.25, iou_threshold=0.45):
|
||||
all_boxes = []
|
||||
all_scores = []
|
||||
all_class_ids = []
|
||||
|
||||
for scale_idx, output in enumerate(outputs):
|
||||
stride = strides[scale_idx]
|
||||
|
||||
if data_format == 'NCHW':
|
||||
# (1, 144, H, W) → (H*W, 144)
|
||||
batch_size, channels, height, width = output.shape
|
||||
output_reshaped = output.transpose(0, 2, 3, 1).reshape(-1, channels)
|
||||
elif data_format == 'NHWC':
|
||||
# (1, H, W, 144) → (H*W, 144)
|
||||
batch_size, height, width, channels = output.shape
|
||||
output_reshaped = output.reshape(-1, channels)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}. Only 'NCHW' and 'NHWC' are supported.")
|
||||
|
||||
# Separate DFL and classification: 144 = 64(DFL) + 80(Classes)
|
||||
dfl_predictions = output_reshaped[:, :64]
|
||||
class_predictions = output_reshaped[:, 64:]
|
||||
|
||||
# Apply sigmoid activation to class scores
|
||||
class_scores = 1.0 / (1.0 + np.exp(-class_predictions))
|
||||
max_class_scores = np.max(class_scores, axis=1)
|
||||
class_ids = np.argmax(class_scores, axis=1)
|
||||
|
||||
# Generate grid coordinates
|
||||
grid_y, grid_x = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
|
||||
grid_x = grid_x.flatten().astype(np.float32)
|
||||
grid_y = grid_y.flatten().astype(np.float32)
|
||||
|
||||
# DFL decoding
|
||||
dfl_reshaped = dfl_predictions.reshape(-1, 4, 16)
|
||||
dfl_softmax = np.exp(dfl_reshaped) / np.sum(np.exp(dfl_reshaped), axis=-1, keepdims=True)
|
||||
regression_range = np.arange(16, dtype=np.float32)
|
||||
bbox_deltas = np.sum(dfl_softmax * regression_range[None, None, :], axis=-1)
|
||||
|
||||
# Convert to absolute coordinates
|
||||
anchor_x = (grid_x + 0.5) * stride
|
||||
anchor_y = (grid_y + 0.5) * stride
|
||||
|
||||
left, top, right, bottom = bbox_deltas.T
|
||||
x1 = anchor_x - left * stride
|
||||
y1 = anchor_y - top * stride
|
||||
x2 = anchor_x + right * stride
|
||||
y2 = anchor_y + bottom * stride
|
||||
|
||||
boxes = np.stack([x1, y1, x2, y2], axis=1)
|
||||
|
||||
all_boxes.append(boxes)
|
||||
all_scores.append(max_class_scores)
|
||||
all_class_ids.append(class_ids)
|
||||
|
||||
# Merge all scales
|
||||
final_boxes = np.concatenate(all_boxes, axis=0)
|
||||
final_scores = np.concatenate(all_scores, axis=0)
|
||||
final_class_ids = np.concatenate(all_class_ids, axis=0)
|
||||
|
||||
# Filter by confidence threshold
|
||||
valid_mask = final_scores > conf_threshold
|
||||
if not np.any(valid_mask):
|
||||
return []
|
||||
|
||||
valid_boxes = final_boxes[valid_mask]
|
||||
valid_scores = final_scores[valid_mask]
|
||||
valid_class_ids = final_class_ids[valid_mask]
|
||||
|
||||
# Map coordinates back to original image
|
||||
pad_x, pad_y = pad
|
||||
valid_boxes[:, [0, 2]] = (valid_boxes[:, [0, 2]] - pad_x) / scale
|
||||
valid_boxes[:, [1, 3]] = (valid_boxes[:, [1, 3]] - pad_y) / scale
|
||||
valid_boxes = np.maximum(valid_boxes, 0)
|
||||
|
||||
# NMS
|
||||
if len(valid_boxes) > 0:
|
||||
nms_indices = cv2.dnn.NMSBoxes(
|
||||
valid_boxes.tolist(), valid_scores.tolist(), conf_threshold, iou_threshold
|
||||
)
|
||||
|
||||
if len(nms_indices) > 0:
|
||||
nms_indices = nms_indices.flatten()
|
||||
detections = []
|
||||
|
||||
for idx in nms_indices:
|
||||
x1, y1, x2, y2 = valid_boxes[idx]
|
||||
confidence = valid_scores[idx]
|
||||
class_id = valid_class_ids[idx]
|
||||
|
||||
detections.append({
|
||||
'bbox': [float(x1), float(y1), float(x2), float(y2)],
|
||||
'confidence': float(confidence),
|
||||
'class_id': int(class_id),
|
||||
'class_name': class_names.get(int(class_id), f'class_{class_id}')
|
||||
})
|
||||
|
||||
return detections
|
||||
|
||||
return []
|
||||
|
||||
def get_class_color(class_id):
|
||||
import colorsys
|
||||
hue = (class_id * 137.508) % 360
|
||||
rgb = colorsys.hsv_to_rgb(hue/360.0, 0.8, 0.9)
|
||||
bgr = (int(rgb[2]*255), int(rgb[1]*255), int(rgb[0]*255))
|
||||
return bgr
|
||||
|
||||
def draw_detections(img, detections, save_path):
|
||||
result_img = img.copy()
|
||||
|
||||
for det in detections:
|
||||
x1, y1, x2, y2 = [int(coord) for coord in det['bbox']]
|
||||
confidence = det['confidence']
|
||||
class_name = det['class_name']
|
||||
class_id = det['class_id']
|
||||
|
||||
color = get_class_color(class_id)
|
||||
|
||||
cv2.rectangle(result_img, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
label = f"{class_name}: {confidence:.2f}"
|
||||
(label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(result_img, (x1, y1 - label_h - 10), (x1 + label_w, y1), color, -1)
|
||||
text_color = (255, 255, 255) if sum(color) < 400 else (0, 0, 0)
|
||||
cv2.putText(result_img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1)
|
||||
|
||||
cv2.imwrite(save_path, result_img)
|
||||
return result_img
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model-path', default='./yolov8s_int8_A311D2.adla')
|
||||
parser.add_argument('--run-cycles', default= 1, type=int)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize AMLNNLite
|
||||
amlnn = AMLNNLite()
|
||||
amlnn.config(
|
||||
model_path=args.model_path, # Model file path, Support ADLD and quantized TFlite models
|
||||
run_cycles=args.run_cycles
|
||||
)
|
||||
amlnn.init()
|
||||
|
||||
# Find all image files in the 01_export_model directory
|
||||
image_dir = "./"
|
||||
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp"]
|
||||
image_files = []
|
||||
for ext in image_extensions:
|
||||
image_files.extend(glob.glob(os.path.join(image_dir, ext)))
|
||||
image_files.extend(glob.glob(os.path.join(image_dir, ext.upper())))
|
||||
|
||||
if not image_files:
|
||||
print("No image files found in", image_dir)
|
||||
amlnn.uninit()
|
||||
return
|
||||
|
||||
print(f"Found {len(image_files)} image files to process:")
|
||||
for img_file in image_files:
|
||||
print(f" - {os.path.basename(img_file)}")
|
||||
print()
|
||||
|
||||
# Process each image
|
||||
for i, image_path in enumerate(image_files, 1):
|
||||
print(f"=" * 60)
|
||||
print(f"Processing image {i}/{len(image_files)}: {os.path.basename(image_path)}")
|
||||
print(f"=" * 60)
|
||||
|
||||
try:
|
||||
# Preprocess input
|
||||
input_tensor, original_img, scale, pad = preprocess(image_path, new_shape=(640, 640), data_format='NHWC', s=0.003921568859368563, zp=-128)
|
||||
|
||||
# Run inference
|
||||
outputs = amlnn.inference(
|
||||
inputs=[input_tensor]
|
||||
)
|
||||
|
||||
# Postprocess results
|
||||
detections = postprocess(outputs, scale, pad, data_format='NHWC', strides=[8, 16, 32], conf_threshold=0.25, iou_threshold=0.45)
|
||||
|
||||
# Print detection results
|
||||
if detections:
|
||||
print(f" Detected {len(detections)} objects:")
|
||||
for i, det in enumerate(detections, 1):
|
||||
print(f" {i}. {det['class_name']} ({det['confidence']:.2f})")
|
||||
else:
|
||||
print(" No objects detected")
|
||||
|
||||
# Save result image
|
||||
model_name = Path(args.model_path).stem
|
||||
result_dir = f"{model_name}_result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
img_name = Path(image_path).stem
|
||||
save_path = os.path.join(result_dir, f"{img_name}_result.jpg")
|
||||
draw_detections(original_img, detections, str(save_path))
|
||||
print(f" Result saved to: {save_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {os.path.basename(image_path)}: {e}")
|
||||
|
||||
print()
|
||||
|
||||
# Optional visualization
|
||||
amlnn.visualize()
|
||||
|
||||
# Release resources
|
||||
amlnn.uninit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
BIN
examples/yolov8/result.jpg
Executable file
BIN
examples/yolov8/result.jpg
Executable file
Binary file not shown.
|
After Width: | Height: | Size: 528 KiB |
|
|
@ -91,7 +91,6 @@ int main(int argc, char** argv) {
|
|||
int num_classes = CLASS_NAMES.size();
|
||||
int channels = 87;
|
||||
|
||||
// Using standard stride logic assuming standard YOLOv8/World export
|
||||
std::vector<Detection> detections = postprocess(
|
||||
std::make_tuple(outbuf0, std::make_tuple(MODEL_INPUT_HEIGHT / 8, MODEL_INPUT_WIDTH / 8, channels), 8),
|
||||
std::make_tuple(outbuf1, std::make_tuple(MODEL_INPUT_HEIGHT / 16, MODEL_INPUT_WIDTH / 16, channels), 16),
|
||||
|
|
|
|||
509
examples/yoloworld/py/yoloworld.py
Executable file
509
examples/yoloworld/py/yoloworld.py
Executable file
|
|
@ -0,0 +1,509 @@
|
|||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import argparse
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
|
||||
class_names = [
|
||||
"handbag", "backpack", "wallet",
|
||||
"watch", "necklace", "bracelet", "earrings", "finger ring", "sunglass", "hat", "shoes", "belt",
|
||||
"makeup palette", "lipstick tube",
|
||||
"car", "truck", "bicycle", "motorcycle",
|
||||
"phone", "laptop", "camera", "wine bottle", "stuffed toy"
|
||||
]
|
||||
|
||||
MODEL_INPUT_WIDTH = 640
|
||||
MODEL_INPUT_HEIGHT = 480
|
||||
NUM_CLASSES = len(class_names)
|
||||
CHANNELS = 87 # 4*16 (DFL) + 23 (classes)
|
||||
STRIDES = [8, 16, 32]
|
||||
SCORE_THRESHOLD = 0.3
|
||||
NMS_THRESHOLD = 0.45
|
||||
|
||||
def letterbox(img, new_shape=(480, 640), color=(114, 114, 114)):
|
||||
"""Resize and pad image with letterbox method"""
|
||||
shape = img.shape[:2] # [height, width]
|
||||
scale = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
new_unpad = (int(round(shape[1] * scale)), int(round(shape[0] * scale)))
|
||||
pad_w = (new_shape[1] - new_unpad[0]) / 2
|
||||
pad_h = (new_shape[0] - new_unpad[1]) / 2
|
||||
|
||||
if shape[::-1] != new_unpad:
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
|
||||
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||||
|
||||
return img, scale, (left, top)
|
||||
|
||||
def preprocess(img_path, new_shape=(480, 640), data_format='NHWC'):
|
||||
"""Preprocess image for YOLOWorld model (float32 input/output)"""
|
||||
original_img = cv2.imread(str(img_path))
|
||||
if original_img is None:
|
||||
raise ValueError(f"can't read image: {img_path}")
|
||||
|
||||
processed_img, scale, pad = letterbox(original_img, new_shape)
|
||||
rgb_img = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
|
||||
normalized_img = rgb_img.astype(np.float32) / 255.0
|
||||
|
||||
if data_format == 'NCHW':
|
||||
# HWC -> CHW -> BCHW
|
||||
input_tensor = np.transpose(normalized_img, (2, 0, 1))
|
||||
input_tensor = np.expand_dims(input_tensor, axis=0)
|
||||
elif data_format == 'NHWC':
|
||||
# HWC -> BHWC
|
||||
input_tensor = np.expand_dims(normalized_img, axis=0)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}. Only 'NCHW' and 'NHWC' are supported.")
|
||||
|
||||
# Keep as float32 (no quantization for float32 models)
|
||||
input_tensor = input_tensor.astype(np.float32)
|
||||
|
||||
return input_tensor, original_img, scale, pad
|
||||
|
||||
def sigmoid(x):
|
||||
"""Sigmoid activation function"""
|
||||
return 1.0 / (1.0 + np.exp(-np.clip(x, -250, 250)))
|
||||
|
||||
def compute_iou(box1, box2):
|
||||
"""Compute IoU between two boxes"""
|
||||
x1_1, y1_1, x2_1, y2_1 = box1
|
||||
x1_2, y1_2, x2_2, y2_2 = box2
|
||||
|
||||
xx1 = max(x1_1, x1_2)
|
||||
yy1 = max(y1_1, y1_2)
|
||||
xx2 = min(x2_1, x2_2)
|
||||
yy2 = min(y2_1, y2_2)
|
||||
|
||||
w = max(0.0, xx2 - xx1)
|
||||
h = max(0.0, yy2 - yy1)
|
||||
inter = w * h
|
||||
|
||||
area1 = (x2_1 - x1_1) * (y2_1 - y1_1)
|
||||
area2 = (x2_2 - x1_2) * (y2_2 - y1_2)
|
||||
|
||||
return inter / (area1 + area2 - inter + 1e-6)
|
||||
|
||||
def nms_by_class(detections, iou_threshold):
|
||||
"""NMS within each class"""
|
||||
if len(detections) == 0:
|
||||
return []
|
||||
|
||||
# Group by class
|
||||
class_detections = {}
|
||||
for det in detections:
|
||||
class_id = det['class_id']
|
||||
if class_id not in class_detections:
|
||||
class_detections[class_id] = []
|
||||
class_detections[class_id].append(det)
|
||||
|
||||
final_detections = []
|
||||
for class_id, cls_dets in class_detections.items():
|
||||
# Sort by score
|
||||
cls_dets.sort(key=lambda x: x['confidence'], reverse=True)
|
||||
|
||||
removed = [False] * len(cls_dets)
|
||||
for i in range(len(cls_dets)):
|
||||
if removed[i]:
|
||||
continue
|
||||
final_detections.append(cls_dets[i])
|
||||
|
||||
for j in range(i + 1, len(cls_dets)):
|
||||
if removed[j]:
|
||||
continue
|
||||
iou = compute_iou(cls_dets[i]['bbox'], cls_dets[j]['bbox'])
|
||||
if iou > iou_threshold:
|
||||
removed[j] = True
|
||||
|
||||
return final_detections
|
||||
|
||||
def suppress_cross_class_iou_conflicts(detections, iou_threshold):
|
||||
"""Suppress cross-class IOU conflicts"""
|
||||
if len(detections) == 0:
|
||||
return []
|
||||
|
||||
# Sort by score
|
||||
detections.sort(key=lambda x: x['confidence'], reverse=True)
|
||||
|
||||
removed = [False] * len(detections)
|
||||
final_detections = []
|
||||
|
||||
for i in range(len(detections)):
|
||||
if removed[i]:
|
||||
continue
|
||||
final_detections.append(detections[i])
|
||||
|
||||
for j in range(i + 1, len(detections)):
|
||||
if removed[j]:
|
||||
continue
|
||||
if detections[i]['class_id'] != detections[j]['class_id']:
|
||||
iou = compute_iou(detections[i]['bbox'], detections[j]['bbox'])
|
||||
if iou > iou_threshold:
|
||||
removed[j] = True
|
||||
|
||||
return final_detections
|
||||
|
||||
def get_detections(output, output_shape, stride, conf_thresh, num_classes, reverse=1, data_format='NHWC'):
|
||||
"""Extract detections from a single output layer using vectorized operations"""
|
||||
coords = 4 * 16 # DFL coords: 64
|
||||
|
||||
if data_format == 'NCHW':
|
||||
batch_size, channels, height, width = output_shape
|
||||
# Remove batch dimension and reshape: (channels, height, width) -> (height * width, channels)
|
||||
output_reshaped = output[0].transpose(1, 2, 0).reshape(-1, channels)
|
||||
elif data_format == 'NHWC':
|
||||
batch_size, height, width, channels = output_shape
|
||||
# Remove batch dimension and reshape: (height, width, channels) -> (height * width, channels)
|
||||
output_reshaped = output[0].reshape(-1, channels)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}")
|
||||
|
||||
# reverse=0: standard YOLO [classes + box]
|
||||
# reverse>0: YOLOWorld [box + classes]
|
||||
cls_offset = coords if reverse > 0 else 0
|
||||
dfl_offset = 0 if reverse > 0 else num_classes
|
||||
|
||||
# Extract class predictions and apply sigmoid
|
||||
class_predictions = output_reshaped[:, cls_offset:cls_offset + num_classes]
|
||||
class_scores = sigmoid(class_predictions)
|
||||
|
||||
# Get max class scores and class IDs
|
||||
max_class_scores = np.max(class_scores, axis=1)
|
||||
class_ids = np.argmax(class_scores, axis=1)
|
||||
|
||||
# Filter by confidence threshold
|
||||
valid_mask = max_class_scores > conf_thresh
|
||||
if not np.any(valid_mask):
|
||||
return []
|
||||
|
||||
# Extract DFL predictions for valid detections
|
||||
dfl_predictions = output_reshaped[valid_mask, dfl_offset:dfl_offset + coords]
|
||||
valid_scores = max_class_scores[valid_mask]
|
||||
valid_class_ids = class_ids[valid_mask]
|
||||
|
||||
# Reshape DFL: (N, 64) -> (N, 4, 16)
|
||||
dfl_reshaped = dfl_predictions.reshape(-1, 4, 16)
|
||||
|
||||
# DFL decoding with softmax
|
||||
max_logits = np.max(dfl_reshaped, axis=-1, keepdims=True)
|
||||
dfl_exp = np.exp(dfl_reshaped - max_logits)
|
||||
dfl_softmax = dfl_exp / np.sum(dfl_exp, axis=-1, keepdims=True)
|
||||
|
||||
# Weighted sum: regression_range = [0, 1, 2, ..., 15]
|
||||
regression_range = np.arange(16, dtype=np.float32)
|
||||
bbox_deltas = np.sum(dfl_softmax * regression_range[None, :], axis=-1) # (N, 4)
|
||||
|
||||
# Generate grid coordinates
|
||||
grid_y, grid_x = np.meshgrid(np.arange(height), np.arange(width), indexing='ij')
|
||||
grid_x = grid_x.flatten()
|
||||
grid_y = grid_y.flatten()
|
||||
|
||||
# Filter grid coordinates
|
||||
valid_grid_x = grid_x[valid_mask]
|
||||
valid_grid_y = grid_y[valid_mask]
|
||||
|
||||
# Convert to absolute coordinates
|
||||
anchor_x = (valid_grid_x + 0.5) * stride
|
||||
anchor_y = (valid_grid_y + 0.5) * stride
|
||||
|
||||
left, top, right, bottom = bbox_deltas.T
|
||||
x1 = anchor_x - left * stride
|
||||
y1 = anchor_y - top * stride
|
||||
x2 = anchor_x + right * stride
|
||||
y2 = anchor_y + bottom * stride
|
||||
|
||||
boxes = np.stack([x1, y1, x2, y2], axis=1)
|
||||
|
||||
# Create detections list
|
||||
detections = []
|
||||
for i in range(len(boxes)):
|
||||
detections.append({
|
||||
'bbox': [float(boxes[i, 0]), float(boxes[i, 1]), float(boxes[i, 2]), float(boxes[i, 3])],
|
||||
'confidence': float(valid_scores[i]),
|
||||
'class_id': int(valid_class_ids[i])
|
||||
})
|
||||
|
||||
return detections
|
||||
|
||||
def postprocess(outputs, scale, pad, data_format='NHWC', strides=[8, 16, 32],
|
||||
conf_threshold=0.4, iou_threshold=0.45, num_classes=23, reverse=1):
|
||||
"""Postprocess YOLOWorld outputs"""
|
||||
all_detections = []
|
||||
|
||||
# Process each output scale
|
||||
for scale_idx, output in enumerate(outputs):
|
||||
stride = strides[scale_idx]
|
||||
|
||||
# Output should already be float32 (no dequantization needed)
|
||||
if output.dtype != np.float32:
|
||||
output = output.astype(np.float32)
|
||||
|
||||
if data_format == 'NCHW':
|
||||
batch_size, channels, height, width = output.shape
|
||||
output_shape = (batch_size, channels, height, width)
|
||||
elif data_format == 'NHWC':
|
||||
batch_size, height, width, channels = output.shape
|
||||
output_shape = (batch_size, height, width, channels)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}")
|
||||
|
||||
dets = get_detections(output, output_shape, stride, conf_threshold,
|
||||
num_classes, reverse, data_format)
|
||||
all_detections.extend(dets)
|
||||
|
||||
if len(all_detections) == 0:
|
||||
return []
|
||||
|
||||
# Map coordinates back to original image
|
||||
pad_x, pad_y = pad
|
||||
detections_orig = []
|
||||
for det in all_detections:
|
||||
x1, y1, x2, y2 = det['bbox']
|
||||
x1_orig = (x1 - pad_x) / scale
|
||||
y1_orig = (y1 - pad_y) / scale
|
||||
x2_orig = (x2 - pad_x) / scale
|
||||
y2_orig = (y2 - pad_y) / scale
|
||||
|
||||
detections_orig.append({
|
||||
'bbox': [float(x1_orig), float(y1_orig), float(x2_orig), float(y2_orig)],
|
||||
'confidence': det['confidence'],
|
||||
'class_id': det['class_id'],
|
||||
'class_name': class_names[det['class_id']] if det['class_id'] < len(class_names) else f'class_{det["class_id"]}'
|
||||
})
|
||||
|
||||
# NMS by class
|
||||
detections_nms = nms_by_class(detections_orig, iou_threshold)
|
||||
|
||||
# Suppress cross-class IOU conflicts
|
||||
final_detections = suppress_cross_class_iou_conflicts(detections_nms, 0.8)
|
||||
|
||||
return final_detections
|
||||
|
||||
def get_class_color(class_id):
|
||||
"""Generate a color for each class"""
|
||||
import colorsys
|
||||
hue = (class_id * 137.508) % 360
|
||||
rgb = colorsys.hsv_to_rgb(hue/360.0, 0.8, 0.9)
|
||||
bgr = (int(rgb[2]*255), int(rgb[1]*255), int(rgb[0]*255))
|
||||
return bgr
|
||||
|
||||
def draw_detections(img, detections, save_path):
|
||||
"""Draw detection results on image"""
|
||||
result_img = img.copy()
|
||||
|
||||
for det in detections:
|
||||
x1, y1, x2, y2 = [int(coord) for coord in det['bbox']]
|
||||
confidence = det['confidence']
|
||||
class_name = det['class_name']
|
||||
class_id = det['class_id']
|
||||
|
||||
color = get_class_color(class_id)
|
||||
|
||||
cv2.rectangle(result_img, (x1, y1), (x2, y2), color, 2)
|
||||
|
||||
label = f"{class_name}: {confidence:.2f}"
|
||||
(label_w, label_h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
|
||||
cv2.rectangle(result_img, (x1, y1 - label_h - 10), (x1 + label_w, y1), color, -1)
|
||||
text_color = (255, 255, 255) if sum(color) < 400 else (0, 0, 0)
|
||||
cv2.putText(result_img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 1)
|
||||
|
||||
cv2.imwrite(save_path, result_img)
|
||||
return result_img
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='YOLOWorld object detection demo using AMLNNLite',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog='''
|
||||
Examples:
|
||||
# Use default model path
|
||||
python yoloworld.py
|
||||
|
||||
# Specify model path
|
||||
python yoloworld.py --model-path ./model.adla
|
||||
|
||||
# Run multiple cycles for performance testing
|
||||
python yoloworld.py --run-cycles 100
|
||||
'''
|
||||
)
|
||||
parser.add_argument('--model-path',
|
||||
default='./yolo_world_480_640.adla',
|
||||
help='Path to the model file (.adla or .tflite)')
|
||||
parser.add_argument('--run-cycles',
|
||||
default=1,
|
||||
type=int,
|
||||
help='Number of inference cycles to run (for performance testing)')
|
||||
parser.add_argument('--image-dir',
|
||||
default='./',
|
||||
help='Directory containing images to process')
|
||||
parser.add_argument('--conf-threshold',
|
||||
type=float,
|
||||
default=SCORE_THRESHOLD,
|
||||
help=f'Confidence threshold for detection (default: {SCORE_THRESHOLD})')
|
||||
parser.add_argument('--nms-threshold',
|
||||
type=float,
|
||||
default=NMS_THRESHOLD,
|
||||
help=f'NMS IoU threshold (default: {NMS_THRESHOLD})')
|
||||
parser.add_argument('--no-visualize',
|
||||
action='store_true',
|
||||
help='Skip visualization chart generation')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate model path
|
||||
if not os.path.exists(args.model_path):
|
||||
print(f"Error: Model file not found: {args.model_path}")
|
||||
return 1
|
||||
|
||||
if not os.path.isfile(args.model_path):
|
||||
print(f"Error: Model path is not a file: {args.model_path}")
|
||||
return 1
|
||||
|
||||
# Validate thresholds
|
||||
if not 0.0 < args.conf_threshold <= 1.0:
|
||||
print(f"Error: Confidence threshold must be in (0, 1], got {args.conf_threshold}")
|
||||
return 1
|
||||
|
||||
if not 0.0 < args.nms_threshold <= 1.0:
|
||||
print(f"Error: NMS threshold must be in (0, 1], got {args.nms_threshold}")
|
||||
return 1
|
||||
|
||||
# Initialize AMLNNLite with error handling
|
||||
print("Initializing AMLNNLite...")
|
||||
amlnn = None
|
||||
try:
|
||||
amlnn = AMLNNLite()
|
||||
print(f"Loading model: {args.model_path}")
|
||||
amlnn.config(
|
||||
model_path=args.model_path,
|
||||
run_cycles=args.run_cycles
|
||||
)
|
||||
print("Initializing model...")
|
||||
amlnn.init()
|
||||
print("Model initialized successfully!\n")
|
||||
except Exception as e:
|
||||
print(f"Error initializing AMLNNLite: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
# Use try-finally to ensure resources are released
|
||||
try:
|
||||
# Find all image files in the specified directory
|
||||
image_dir = args.image_dir
|
||||
if not os.path.exists(image_dir):
|
||||
print(f"Error: Image directory not found: {image_dir}")
|
||||
return 1
|
||||
|
||||
if not os.path.isdir(image_dir):
|
||||
print(f"Error: Image path is not a directory: {image_dir}")
|
||||
return 1
|
||||
|
||||
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp"]
|
||||
image_files = []
|
||||
for ext in image_extensions:
|
||||
image_files.extend(glob.glob(os.path.join(image_dir, ext)))
|
||||
image_files.extend(glob.glob(os.path.join(image_dir, ext.upper())))
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in {image_dir}")
|
||||
return 0
|
||||
|
||||
print(f"Found {len(image_files)} image file(s) to process:")
|
||||
for img_file in image_files:
|
||||
print(f" - {os.path.basename(img_file)}")
|
||||
print()
|
||||
|
||||
# Process each image
|
||||
for i, image_path in enumerate(image_files, 1):
|
||||
print(f"=" * 60)
|
||||
print(f"Processing image {i}/{len(image_files)}: {os.path.basename(image_path)}")
|
||||
print(f"=" * 60)
|
||||
|
||||
try:
|
||||
# Preprocess input (float32 model, no quantization)
|
||||
input_tensor, original_img, scale, pad = preprocess(
|
||||
image_path,
|
||||
new_shape=(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH),
|
||||
data_format='NHWC'
|
||||
)
|
||||
|
||||
# Validate input tensor shape and dtype
|
||||
expected_shape = (1, MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH, 3)
|
||||
if input_tensor.shape != expected_shape:
|
||||
raise ValueError(f"Input tensor shape mismatch: expected {expected_shape}, got {input_tensor.shape}")
|
||||
if input_tensor.dtype != np.float32:
|
||||
raise ValueError(f"Input tensor dtype must be float32, got {input_tensor.dtype}")
|
||||
|
||||
# Run inference
|
||||
outputs = amlnn.inference(inputs=[input_tensor])
|
||||
|
||||
# Validate outputs
|
||||
if outputs is None:
|
||||
raise ValueError("Inference returned None")
|
||||
if len(outputs) != 3:
|
||||
raise ValueError(f"Expected 3 output tensors, got {len(outputs)}")
|
||||
|
||||
# Postprocess results
|
||||
detections = postprocess(
|
||||
outputs,
|
||||
scale,
|
||||
pad,
|
||||
data_format='NHWC',
|
||||
strides=STRIDES,
|
||||
conf_threshold=args.conf_threshold,
|
||||
iou_threshold=args.nms_threshold,
|
||||
num_classes=NUM_CLASSES,
|
||||
reverse=1 # YOLOWorld format
|
||||
)
|
||||
|
||||
# Print detection results
|
||||
if detections:
|
||||
print(f" Detected {len(detections)} object(s):")
|
||||
for j, det in enumerate(detections, 1):
|
||||
bbox = det['bbox']
|
||||
print(f"{j}. {det['class_name']} ({det['confidence']:.2f})")
|
||||
else:
|
||||
print(" No objects detected")
|
||||
|
||||
# Save result image
|
||||
model_name = Path(args.model_path).stem
|
||||
result_dir = f"{model_name}_result"
|
||||
os.makedirs(result_dir, exist_ok=True)
|
||||
img_name = Path(image_path).stem
|
||||
save_path = os.path.join(result_dir, f"{img_name}_result.jpg")
|
||||
draw_detections(original_img, detections, str(save_path))
|
||||
print(f" Result saved to: {save_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {os.path.basename(image_path)}: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
# Continue processing other images
|
||||
continue
|
||||
|
||||
print()
|
||||
|
||||
# Optional visualization
|
||||
if not args.no_visualize:
|
||||
print("Generating visualization charts...")
|
||||
amlnn.visualize()
|
||||
print("Visualization charts saved.")
|
||||
finally:
|
||||
# Always release resources
|
||||
if amlnn is not None:
|
||||
print("\nReleasing resources...")
|
||||
amlnn.uninit()
|
||||
print("Resources released.")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
sys.exit(main())
|
||||
|
||||
|
||||
77
examples/yolox/cpp/build-android.sh
Executable file
77
examples/yolox/cpp/build-android.sh
Executable file
|
|
@ -0,0 +1,77 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
#
|
||||
# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_abi>]"
|
||||
echo " -a <target_abi> : Target ABI (default: arm64-v8a)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ABI=arm64-v8a
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ABI=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "${ANDROID_NDK_PATH}" ]; then
|
||||
if [ -n "${ANDROID_NDK}" ]; then
|
||||
ANDROID_NDK_PATH=${ANDROID_NDK}
|
||||
elif [ -n "${ANDROID_NDK_HOME}" ]; then
|
||||
ANDROID_NDK_PATH=${ANDROID_NDK_HOME}
|
||||
else
|
||||
echo "Error: ANDROID_NDK_PATH is not set."
|
||||
echo "Please set ANDROID_NDK_PATH to your Android NDK directory."
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/android
|
||||
|
||||
echo "Building for Android..."
|
||||
echo "NDK_PATH: ${ANDROID_NDK_PATH}"
|
||||
echo "TARGET_ABI: ${TARGET_ABI}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake \
|
||||
-DANDROID_ABI=${TARGET_ABI} \
|
||||
-DANDROID_PLATFORM=android-24 \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DOpenCV_DIR=${ROOT_PWD}/../../../dependency/opencv/opencv-android-sdk-build/sdk/native/jni/abi-${TARGET_ABI}
|
||||
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/yolox_demo"
|
||||
65
examples/yolox/cpp/build-linux.sh
Executable file
65
examples/yolox/cpp/build-linux.sh
Executable file
|
|
@ -0,0 +1,65 @@
|
|||
#TODO
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 [-a <target_arch>]"
|
||||
echo " -a <target_arch> : Target architecture (default: aarch64)"
|
||||
echo " -h : Show this help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Default values
|
||||
TARGET_ARCH=aarch64
|
||||
|
||||
# Parse arguments
|
||||
while getopts 'a:h' opt; do
|
||||
case "$opt" in
|
||||
a)
|
||||
TARGET_ARCH=$OPTARG
|
||||
;;
|
||||
h)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Default to aarch64-linux-gnu if GCC_COMPILER is not set
|
||||
GCC_COMPILER=${GCC_COMPILER:-aarch64-linux-gnu}
|
||||
|
||||
# Set compilers
|
||||
export CC=${GCC_COMPILER}-gcc
|
||||
export CXX=${GCC_COMPILER}-g++
|
||||
|
||||
# Validate compiler
|
||||
if ! command -v ${CC} &> /dev/null; then
|
||||
echo "Error: Compiler ${CC} not found."
|
||||
echo "Please set GCC_COMPILER environment variable to your cross-compiler path prefix."
|
||||
echo "Example: export GCC_COMPILER=/path/to/toolchain/bin/aarch64-linux-gnu"
|
||||
# Proceeding anyway as user might have custom env setup
|
||||
else
|
||||
echo "Using compiler: ${CC}"
|
||||
fi
|
||||
|
||||
ROOT_PWD=$(cd "$(dirname $0)" && pwd)
|
||||
BUILD_DIR=${ROOT_PWD}/build/linux
|
||||
|
||||
echo "Building for Linux..."
|
||||
echo "COMPILER: ${CC}"
|
||||
echo "TARGET_ARCH: ${TARGET_ARCH}"
|
||||
echo "BUILD_DIR: ${BUILD_DIR}"
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
cd ${BUILD_DIR}
|
||||
|
||||
cmake ../../src \
|
||||
-DCMAKE_SYSTEM_NAME=Linux \
|
||||
-DCMAKE_SYSTEM_PROCESSOR=${TARGET_ARCH} \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DOpenCV_DIR=${ROOT_PWD}/../../../dependency/opencv/opencv-linux-aarch64/share/OpenCV
|
||||
make -j4
|
||||
|
||||
echo "Build complete. Executable in ${BUILD_DIR}/yolo11_demo"
|
||||
41
examples/yolox/cpp/src/CMakeLists.txt
Executable file
41
examples/yolox/cpp/src/CMakeLists.txt
Executable file
|
|
@ -0,0 +1,41 @@
|
|||
cmake_minimum_required(VERSION 3.5)
|
||||
project(yolo11_demo)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
# Set NNSDK path
|
||||
set(NNSDK_ROOT "${CMAKE_SOURCE_DIR}/../../../../dependency/nnsdk")
|
||||
include_directories(${NNSDK_ROOT}/include)
|
||||
include_directories(${CMAKE_SOURCE_DIR}/../../../../common)
|
||||
|
||||
# Set dependency path
|
||||
set(3RDPARTY_DIR "${CMAKE_SOURCE_DIR}/../../../../dependency")
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
|
||||
if (ANDROID_ABI STREQUAL "arm64-v8a")
|
||||
link_directories(${NNSDK_ROOT}/lib/android/arm64-v8a)
|
||||
else()
|
||||
link_directories(${NNSDK_ROOT}/lib/android/armeabi-v7a)
|
||||
endif()
|
||||
# Android needs log
|
||||
link_libraries(log)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
link_directories(${NNSDK_ROOT}/lib/linux/lib64_yocto)
|
||||
endif()
|
||||
|
||||
# Find OpenCV
|
||||
message(STATUS "OpenCV_DIR: ${OpenCV_DIR}")
|
||||
find_package(OpenCV REQUIRED)
|
||||
include_directories(${OpenCV_INCLUDE_DIRS})
|
||||
|
||||
|
||||
add_executable(yolox_demo
|
||||
main.cpp
|
||||
postprocess.cpp
|
||||
${CMAKE_SOURCE_DIR}/../../../../common/model_loader.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(yolox_demo
|
||||
${OpenCV_LIBS}
|
||||
nnsdk
|
||||
)
|
||||
153
examples/yolox/cpp/src/main.cpp
Executable file
153
examples/yolox/cpp/src/main.cpp
Executable file
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <cmath>
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include "postprocess.h"
|
||||
#include "model_loader.h"
|
||||
|
||||
static float sigmoid(float x) {
|
||||
return 1.0f / (1.0f + std::exp(-x));
|
||||
}
|
||||
|
||||
const std::string DEFAULT_OUTPUT_PATH = "./result.jpg";
|
||||
const int MODEL_INPUT_WIDTH = 640;
|
||||
const int MODEL_INPUT_HEIGHT = 640;
|
||||
const float SCORE_THRESHOLD = 0.25f;
|
||||
const float NMS_THRESHOLD = 0.45f;
|
||||
const float CONF_THRESHOLD = 0.45f;
|
||||
|
||||
const std::vector<std::string> CLASS_NAMES = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
||||
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
|
||||
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
|
||||
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
|
||||
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
|
||||
"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
||||
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
|
||||
"keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
|
||||
"clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
|
||||
};
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
if (argc < 3) {
|
||||
printf("Usage: %s <model_path> <image_path> [output_path]\n", argv[0]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string model_path = argv[1];
|
||||
std::string image_path = argv[2];
|
||||
std::string output_path = (argc > 3) ? argv[3] : DEFAULT_OUTPUT_PATH;
|
||||
|
||||
std::cout << "YOLOX C++ Demo" << std::endl;
|
||||
std::cout << "Model: " << model_path << std::endl;
|
||||
std::cout << "Image: " << image_path << std::endl;
|
||||
std::cout << "Output: " << output_path << std::endl;
|
||||
|
||||
// 1. Load Image
|
||||
cv::Mat origin_img = cv::imread(image_path);
|
||||
if (origin_img.empty()) {
|
||||
std::cerr << "Failed to load image from " << image_path << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// 2. Initialize Network
|
||||
void* context = init_network(model_path.c_str());
|
||||
if (!context) {
|
||||
std::cerr << "Failed to initialize network." << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// 3. Preprocess
|
||||
cv::Mat img;
|
||||
float scale;
|
||||
std::tuple<int, int> pad;
|
||||
std::tie(img, scale, pad) = preproc(origin_img, std::make_tuple(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH));
|
||||
int pad_left = std::get<0>(pad);
|
||||
int pad_top = std::get<1>(pad);
|
||||
|
||||
// 4. Run Network
|
||||
std::tuple<cv::Mat, float, std::tuple<int, int>> input_tuple =
|
||||
std::make_tuple(img, scale, pad);
|
||||
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
void* output_ptr = run_network(context, {input_tuple});
|
||||
if (!output_ptr) {
|
||||
std::cerr << "Failed to run network." << std::endl;
|
||||
uninit_network(context);
|
||||
return -1;
|
||||
}
|
||||
nn_output* outdata = (nn_output*)output_ptr;
|
||||
|
||||
// 5. Postprocess
|
||||
int num_classes = CLASS_NAMES.size();
|
||||
std::vector<Detection> detections;
|
||||
|
||||
if (outdata->num == 1) {
|
||||
// Single output YOLOX model [1, 8400, 85]
|
||||
float* output = (float*)outdata->out[0].buf;
|
||||
|
||||
int num_boxes = 8400; // Default for YOLOX
|
||||
if (outdata->out[0].param && outdata->out[0].param->num_of_dims >= 2) {
|
||||
if (outdata->out[0].param->num_of_dims == 3) {
|
||||
num_boxes = outdata->out[0].param->sizes[1];
|
||||
} else if (outdata->out[0].param->num_of_dims == 2) {
|
||||
num_boxes = outdata->out[0].param->sizes[0];
|
||||
}
|
||||
}
|
||||
|
||||
demo_postprocess(output, num_boxes, std::make_tuple(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH), false);
|
||||
|
||||
// boxes/scores
|
||||
std::vector<cv::Rect2f> boxes;
|
||||
std::vector<std::vector<float>> scores;
|
||||
extract_boxes_and_scores(
|
||||
output, num_boxes, num_classes,
|
||||
scale, pad_left, pad_top,
|
||||
origin_img.cols, origin_img.rows,
|
||||
boxes, scores
|
||||
);
|
||||
|
||||
// multiclass_nms
|
||||
detections = multiclass_nms(boxes, scores, num_classes, NMS_THRESHOLD, 0.1f);
|
||||
} else {
|
||||
std::cerr << "Error: Unsupported output count: " << outdata->num << std::endl;
|
||||
uninit_network(context);
|
||||
return -1;
|
||||
}
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double, std::milli> inference_time = end_time - start_time;
|
||||
std::cout << "Inference + Postprocess time: " << inference_time.count() << " ms" << std::endl;
|
||||
std::cout << "Detections found: " << detections.size() << std::endl;
|
||||
|
||||
// 6. Visualize and Save
|
||||
cv::Mat result_img = vis(origin_img, detections, CONF_THRESHOLD, CLASS_NAMES);
|
||||
cv::imwrite(output_path, result_img);
|
||||
std::cout << "Result saved to " << output_path << std::endl;
|
||||
|
||||
// 7. Cleanup
|
||||
uninit_network(context);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
409
examples/yolox/cpp/src/postprocess.cpp
Executable file
409
examples/yolox/cpp/src/postprocess.cpp
Executable file
|
|
@ -0,0 +1,409 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "postprocess.h"
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
std::tuple<cv::Mat, float, std::tuple<int, int>> preproc(const cv::Mat& img, std::tuple<int, int> input_size) {
|
||||
// 1. letterbox (resize + padding)
|
||||
// 2. BGR to RGB conversion
|
||||
// 3. Normalize to 0-1 (divide by 255.0)
|
||||
// Note: NNSDK's model_loader expects HWC format, so return HWC instead of CHW
|
||||
|
||||
int input_height = std::get<0>(input_size);
|
||||
int input_width = std::get<1>(input_size);
|
||||
|
||||
// letterbox: calculate scale and padding
|
||||
float scale = std::min(static_cast<float>(input_height) / img.rows,
|
||||
static_cast<float>(input_width) / img.cols);
|
||||
int new_w = static_cast<int>(std::round(img.cols * scale));
|
||||
int new_h = static_cast<int>(std::round(img.rows * scale));
|
||||
|
||||
// resize
|
||||
cv::Mat resized_img;
|
||||
if (img.size() != cv::Size(new_w, new_h)) {
|
||||
cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_LINEAR);
|
||||
} else {
|
||||
resized_img = img.clone();
|
||||
}
|
||||
|
||||
// padding
|
||||
float pad_w = (input_width - new_w) / 2.0f;
|
||||
float pad_h = (input_height - new_h) / 2.0f;
|
||||
int top = static_cast<int>(std::round(pad_h - 0.1f));
|
||||
int bottom = static_cast<int>(std::round(pad_h + 0.1f));
|
||||
int left = static_cast<int>(std::round(pad_w - 0.1f));
|
||||
int right = static_cast<int>(std::round(pad_w + 0.1f));
|
||||
|
||||
cv::Mat padded_img;
|
||||
cv::copyMakeBorder(resized_img, padded_img, top, bottom, left, right,
|
||||
cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
|
||||
|
||||
// BGR to RGB conversion
|
||||
cv::Mat rgb_img;
|
||||
cv::cvtColor(padded_img, rgb_img, cv::COLOR_BGR2RGB);
|
||||
|
||||
// Normalize to 0-1 range (divide by 255.0)
|
||||
cv::Mat normalized_img;
|
||||
rgb_img.convertTo(normalized_img, CV_32F, 1.0 / 255.0);
|
||||
|
||||
// mean = [0.485, 0.456, 0.406]
|
||||
// std = [0.229, 0.224, 0.225]
|
||||
// Note: Use cv::divide for per-channel division in OpenCV
|
||||
cv::Scalar mean(0.485f, 0.456f, 0.406f);
|
||||
cv::Scalar std(0.229f, 0.224f, 0.225f);
|
||||
normalized_img -= mean;
|
||||
cv::divide(normalized_img, std, normalized_img);
|
||||
|
||||
// Return HWC format, ImageNet normalized float32 image (RGB format)
|
||||
// Also return scale and padding (left, top) for coordinate mapping
|
||||
return std::make_tuple(normalized_img, scale, std::make_tuple(left, top));
|
||||
}
|
||||
|
||||
static float sigmoid(float x) {
|
||||
return 1.0f / (1.0f + std::exp(-x));
|
||||
}
|
||||
|
||||
void demo_postprocess(float* outputs, int num_boxes, std::tuple<int, int> img_size, bool p6) {
|
||||
int img_height = std::get<0>(img_size);
|
||||
int img_width = std::get<1>(img_size);
|
||||
|
||||
std::vector<int> strides;
|
||||
if (!p6) {
|
||||
strides = {8, 16, 32};
|
||||
} else {
|
||||
strides = {8, 16, 32, 64};
|
||||
}
|
||||
|
||||
// Calculate grid count for each stride
|
||||
std::vector<int> hsizes, wsizes;
|
||||
for (int stride : strides) {
|
||||
hsizes.push_back(img_height / stride);
|
||||
wsizes.push_back(img_width / stride);
|
||||
}
|
||||
|
||||
// Build grids and expanded_strides
|
||||
std::vector<std::vector<float>> grids_list;
|
||||
std::vector<std::vector<float>> strides_list;
|
||||
|
||||
int total_boxes = 0;
|
||||
for (size_t i = 0; i < strides.size(); ++i) {
|
||||
int hsize = hsizes[i];
|
||||
int wsize = wsizes[i];
|
||||
int stride = strides[i];
|
||||
int grid_size = hsize * wsize;
|
||||
|
||||
std::vector<float> grid(grid_size * 2);
|
||||
std::vector<float> expanded_stride(grid_size);
|
||||
|
||||
for (int h = 0; h < hsize; ++h) {
|
||||
for (int w = 0; w < wsize; ++w) {
|
||||
int idx = h * wsize + w;
|
||||
grid[idx * 2] = static_cast<float>(w);
|
||||
grid[idx * 2 + 1] = static_cast<float>(h);
|
||||
expanded_stride[idx] = static_cast<float>(stride);
|
||||
}
|
||||
}
|
||||
|
||||
grids_list.push_back(grid);
|
||||
strides_list.push_back(expanded_stride);
|
||||
total_boxes += grid_size;
|
||||
}
|
||||
|
||||
// Merge all grids and strides
|
||||
std::vector<float> all_grids(total_boxes * 2);
|
||||
std::vector<float> all_strides(total_boxes);
|
||||
|
||||
int offset = 0;
|
||||
for (size_t i = 0; i < grids_list.size(); ++i) {
|
||||
int grid_size = grids_list[i].size() / 2;
|
||||
for (int j = 0; j < grid_size; ++j) {
|
||||
all_grids[(offset + j) * 2] = grids_list[i][j * 2];
|
||||
all_grids[(offset + j) * 2 + 1] = grids_list[i][j * 2 + 1];
|
||||
all_strides[offset + j] = strides_list[i][j];
|
||||
}
|
||||
offset += grid_size;
|
||||
}
|
||||
|
||||
// Apply grid and stride decoding
|
||||
for (int i = 0; i < num_boxes && i < total_boxes; ++i) {
|
||||
float* box = outputs + i * 85;
|
||||
|
||||
// outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
|
||||
box[0] = (box[0] + all_grids[i * 2]) * all_strides[i];
|
||||
box[1] = (box[1] + all_grids[i * 2 + 1]) * all_strides[i];
|
||||
|
||||
// outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
|
||||
box[2] = std::exp(box[2]) * all_strides[i];
|
||||
box[3] = std::exp(box[3]) * all_strides[i];
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<int> nms(const std::vector<cv::Rect2f>& boxes, const std::vector<float>& scores, float nms_thr) {
|
||||
if (boxes.empty()) return {};
|
||||
|
||||
// Create indices and sort
|
||||
std::vector<int> indices(boxes.size());
|
||||
std::iota(indices.begin(), indices.end(), 0);
|
||||
std::sort(indices.begin(), indices.end(), [&scores](int a, int b) {
|
||||
return scores[a] > scores[b];
|
||||
});
|
||||
|
||||
std::vector<int> keep;
|
||||
std::vector<bool> suppressed(boxes.size(), false);
|
||||
|
||||
for (size_t i = 0; i < indices.size(); ++i) {
|
||||
int idx = indices[i];
|
||||
if (suppressed[idx]) continue;
|
||||
|
||||
keep.push_back(idx);
|
||||
|
||||
float x1_i = boxes[idx].x;
|
||||
float y1_i = boxes[idx].y;
|
||||
float x2_i = boxes[idx].x + boxes[idx].width;
|
||||
float y2_i = boxes[idx].y + boxes[idx].height;
|
||||
float area_i = boxes[idx].width * boxes[idx].height;
|
||||
|
||||
for (size_t j = i + 1; j < indices.size(); ++j) {
|
||||
int idx_j = indices[j];
|
||||
if (suppressed[idx_j]) continue;
|
||||
|
||||
float x1_j = boxes[idx_j].x;
|
||||
float y1_j = boxes[idx_j].y;
|
||||
float x2_j = boxes[idx_j].x + boxes[idx_j].width;
|
||||
float y2_j = boxes[idx_j].y + boxes[idx_j].height;
|
||||
|
||||
float xx1 = std::max(x1_i, x1_j);
|
||||
float yy1 = std::max(y1_i, y1_j);
|
||||
float xx2 = std::min(x2_i, x2_j);
|
||||
float yy2 = std::min(y2_i, y2_j);
|
||||
|
||||
float w = std::max(0.0f, xx2 - xx1);
|
||||
float h = std::max(0.0f, yy2 - yy1);
|
||||
float inter = w * h;
|
||||
|
||||
float area_j = boxes[idx_j].width * boxes[idx_j].height;
|
||||
float ovr = inter / (area_i + area_j - inter);
|
||||
|
||||
if (ovr > nms_thr) {
|
||||
suppressed[idx_j] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return keep;
|
||||
}
|
||||
|
||||
std::vector<Detection> multiclass_nms(const std::vector<cv::Rect2f>& boxes,
|
||||
const std::vector<std::vector<float>>& scores,
|
||||
int num_classes,
|
||||
float nms_thr,
|
||||
float score_thr) {
|
||||
if (boxes.empty() || scores.empty()) return {};
|
||||
|
||||
// Find max class score and class ID for each box
|
||||
std::vector<float> cls_scores(boxes.size());
|
||||
std::vector<int> cls_inds(boxes.size());
|
||||
|
||||
for (size_t i = 0; i < boxes.size(); ++i) {
|
||||
float max_score = -1.0f;
|
||||
int max_idx = -1;
|
||||
for (int c = 0; c < num_classes; ++c) {
|
||||
if (scores[i][c] > max_score) {
|
||||
max_score = scores[i][c];
|
||||
max_idx = c;
|
||||
}
|
||||
}
|
||||
cls_scores[i] = max_score;
|
||||
cls_inds[i] = max_idx;
|
||||
}
|
||||
|
||||
// Filter low-score boxes
|
||||
std::vector<cv::Rect2f> valid_boxes;
|
||||
std::vector<float> valid_scores;
|
||||
std::vector<int> valid_cls_inds;
|
||||
std::vector<int> valid_indices;
|
||||
|
||||
for (size_t i = 0; i < boxes.size(); ++i) {
|
||||
if (cls_scores[i] > score_thr) {
|
||||
valid_boxes.push_back(boxes[i]);
|
||||
valid_scores.push_back(cls_scores[i]);
|
||||
valid_cls_inds.push_back(cls_inds[i]);
|
||||
valid_indices.push_back(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (valid_boxes.empty()) return {};
|
||||
|
||||
// Execute NMS
|
||||
std::vector<int> keep = nms(valid_boxes, valid_scores, nms_thr);
|
||||
|
||||
// Build results
|
||||
std::vector<Detection> dets;
|
||||
for (int idx : keep) {
|
||||
Detection det;
|
||||
det.x1 = valid_boxes[idx].x;
|
||||
det.y1 = valid_boxes[idx].y;
|
||||
det.x2 = valid_boxes[idx].x + valid_boxes[idx].width;
|
||||
det.y2 = valid_boxes[idx].y + valid_boxes[idx].height;
|
||||
det.score = valid_scores[idx];
|
||||
det.class_id = valid_cls_inds[idx];
|
||||
dets.push_back(det);
|
||||
}
|
||||
|
||||
return dets;
|
||||
}
|
||||
|
||||
cv::Mat vis(const cv::Mat& img,
|
||||
const std::vector<Detection>& detections,
|
||||
float conf_thresh,
|
||||
const std::vector<std::string>& class_names) {
|
||||
cv::Mat result = img.clone();
|
||||
|
||||
// Adjust font size based on image size
|
||||
int img_height = img.rows;
|
||||
int img_width = img.cols;
|
||||
float font_scale = std::max(0.6f, std::min(1.2f,
|
||||
static_cast<float>(std::sqrt(img_height * img_height + img_width * img_width)) * 0.0015f));
|
||||
int thickness = std::max(2, static_cast<int>(font_scale * 2.5f));
|
||||
|
||||
// YOLOX color palette
|
||||
static const std::vector<cv::Scalar> colors = {
|
||||
cv::Scalar(0, 114, 189), cv::Scalar(217, 83, 25), cv::Scalar(237, 177, 32),
|
||||
cv::Scalar(126, 47, 142), cv::Scalar(119, 172, 48), cv::Scalar(77, 190, 238),
|
||||
cv::Scalar(162, 20, 47), cv::Scalar(77, 77, 77), cv::Scalar(153, 153, 153),
|
||||
cv::Scalar(255, 0, 0), cv::Scalar(255, 128, 0), cv::Scalar(191, 191, 0),
|
||||
cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(170, 0, 255),
|
||||
cv::Scalar(85, 85, 0), cv::Scalar(85, 170, 0), cv::Scalar(85, 255, 0),
|
||||
cv::Scalar(170, 85, 0), cv::Scalar(170, 170, 0), cv::Scalar(170, 255, 0),
|
||||
cv::Scalar(255, 85, 0), cv::Scalar(255, 170, 0), cv::Scalar(255, 255, 0),
|
||||
cv::Scalar(0, 85, 128), cv::Scalar(0, 170, 128), cv::Scalar(0, 255, 128),
|
||||
cv::Scalar(85, 0, 128), cv::Scalar(85, 85, 128), cv::Scalar(85, 170, 128),
|
||||
cv::Scalar(85, 255, 128), cv::Scalar(170, 0, 128), cv::Scalar(170, 85, 128),
|
||||
cv::Scalar(170, 170, 128), cv::Scalar(170, 255, 128), cv::Scalar(255, 0, 128),
|
||||
cv::Scalar(255, 85, 128), cv::Scalar(255, 170, 128), cv::Scalar(255, 255, 128),
|
||||
cv::Scalar(0, 85, 255), cv::Scalar(0, 170, 255), cv::Scalar(0, 255, 255),
|
||||
cv::Scalar(85, 0, 255), cv::Scalar(85, 85, 255), cv::Scalar(85, 170, 255),
|
||||
cv::Scalar(85, 255, 255), cv::Scalar(170, 0, 255), cv::Scalar(170, 85, 255),
|
||||
cv::Scalar(170, 170, 255), cv::Scalar(170, 255, 255), cv::Scalar(255, 0, 255),
|
||||
cv::Scalar(255, 85, 255), cv::Scalar(255, 170, 255), cv::Scalar(85, 0, 0),
|
||||
cv::Scalar(128, 0, 0), cv::Scalar(170, 0, 0), cv::Scalar(213, 0, 0),
|
||||
cv::Scalar(255, 0, 0), cv::Scalar(0, 43, 0), cv::Scalar(0, 85, 0),
|
||||
cv::Scalar(0, 128, 0), cv::Scalar(0, 170, 0), cv::Scalar(0, 213, 0),
|
||||
cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 43), cv::Scalar(0, 0, 85),
|
||||
cv::Scalar(0, 0, 128), cv::Scalar(0, 0, 170), cv::Scalar(0, 0, 213),
|
||||
cv::Scalar(0, 0, 255), cv::Scalar(0, 0, 0), cv::Scalar(36, 36, 36),
|
||||
cv::Scalar(219, 219, 219), cv::Scalar(255, 255, 255)
|
||||
};
|
||||
|
||||
for (const auto& det : detections) {
|
||||
if (det.score < conf_thresh) continue;
|
||||
if (det.class_id < 0 || det.class_id >= static_cast<int>(class_names.size())) continue;
|
||||
|
||||
int x0 = static_cast<int>(det.x1);
|
||||
int y0 = static_cast<int>(det.y1);
|
||||
int x1 = static_cast<int>(det.x2);
|
||||
int y1 = static_cast<int>(det.y2);
|
||||
|
||||
cv::Scalar color = colors[det.class_id % colors.size()];
|
||||
|
||||
// Draw bounding box
|
||||
cv::rectangle(result, cv::Point(x0, y0), cv::Point(x1, y1), color, thickness);
|
||||
|
||||
// Prepare text
|
||||
std::string text = class_names[det.class_id] + ":" + cv::format("%.1f%%", det.score * 100);
|
||||
|
||||
// Calculate text size
|
||||
int baseline = 0;
|
||||
cv::Size txt_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_scale, thickness, &baseline);
|
||||
|
||||
// Draw text background
|
||||
cv::Scalar txt_bk_color = color * 0.7;
|
||||
cv::rectangle(result,
|
||||
cv::Point(x0, y0 + 1),
|
||||
cv::Point(x0 + txt_size.width + 1, y0 + static_cast<int>(1.5 * txt_size.height)),
|
||||
txt_bk_color, -1);
|
||||
|
||||
// Draw text
|
||||
cv::Scalar txt_color = (cv::mean(color)[0] > 0.5) ? cv::Scalar(0, 0, 0) : cv::Scalar(255, 255, 255);
|
||||
cv::putText(result, text,
|
||||
cv::Point(x0, y0 + txt_size.height),
|
||||
cv::FONT_HERSHEY_SIMPLEX, font_scale, txt_color, thickness);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void extract_boxes_and_scores(
|
||||
float* output,
|
||||
int num_boxes,
|
||||
int num_classes,
|
||||
float scale,
|
||||
int pad_left,
|
||||
int pad_top,
|
||||
int img_width,
|
||||
int img_height,
|
||||
std::vector<cv::Rect2f>& boxes,
|
||||
std::vector<std::vector<float>>& scores)
|
||||
{
|
||||
boxes.clear();
|
||||
scores.clear();
|
||||
boxes.reserve(num_boxes);
|
||||
scores.reserve(num_boxes);
|
||||
|
||||
// Extract all boxes and scores
|
||||
for (int i = 0; i < num_boxes; ++i) {
|
||||
float* box_data = output + i * 85;
|
||||
|
||||
// Format after demo_postprocess: [cx, cy, w, h, obj_conf, class0, ..., class79]
|
||||
float cx = box_data[0];
|
||||
float cy = box_data[1];
|
||||
float w = box_data[2];
|
||||
float h = box_data[3];
|
||||
|
||||
// Python: boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
|
||||
float x1 = cx - w / 2.0f;
|
||||
float y1 = cy - h / 2.0f;
|
||||
float x2 = cx + w / 2.0f;
|
||||
float y2 = cy + h / 2.0f;
|
||||
|
||||
// Python: valid_boxes[:, [0, 2]] = (valid_boxes[:, [0, 2]] - pad_x) / scale
|
||||
// Python: valid_boxes[:, [1, 3]] = (valid_boxes[:, [1, 3]] - pad_y) / scale
|
||||
x1 = (x1 - pad_left) / scale;
|
||||
y1 = (y1 - pad_top) / scale;
|
||||
x2 = (x2 - pad_left) / scale;
|
||||
y2 = (y2 - pad_top) / scale;
|
||||
|
||||
// Ensure coordinates are within image bounds
|
||||
x1 = std::max(0.0f, std::min(static_cast<float>(img_width), x1));
|
||||
y1 = std::max(0.0f, std::min(static_cast<float>(img_height), y1));
|
||||
x2 = std::max(0.0f, std::min(static_cast<float>(img_width), x2));
|
||||
y2 = std::max(0.0f, std::min(static_cast<float>(img_height), y2));
|
||||
|
||||
boxes.push_back(cv::Rect2f(x1, y1, x2 - x1, y2 - y1));
|
||||
|
||||
// Calculate class scores (obj_conf * cls_scores)
|
||||
float obj_conf = box_data[4];
|
||||
std::vector<float> cls_scores(num_classes);
|
||||
for (int c = 0; c < num_classes; ++c) {
|
||||
float cls_score = box_data[5 + c];
|
||||
cls_scores[c] = obj_conf * cls_score;
|
||||
}
|
||||
scores.push_back(cls_scores);
|
||||
}
|
||||
}
|
||||
112
examples/yolox/cpp/src/postprocess.h
Executable file
112
examples/yolox/cpp/src/postprocess.h
Executable file
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef YOLOX_POSTPROCESS_H
|
||||
#define YOLOX_POSTPROCESS_H
|
||||
|
||||
#include <opencv2/opencv.hpp>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
* YOLOX preprocessing function
|
||||
* @param img Input image (BGR format)
|
||||
* @param input_size Target size (height, width)
|
||||
* @return Processed image (HWC format, float32, ImageNet normalized, RGB format), scale factor, padding (left, top)
|
||||
* Note: NNSDK's model_loader expects HWC format, so return HWC instead of CHW
|
||||
* Processing steps:
|
||||
* 1. letterbox (resize + padding with 114)
|
||||
* 2. BGR to RGB conversion
|
||||
* 3. Normalize to 0-1 (divide by 255.0)
|
||||
* 4. ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
*/
|
||||
std::tuple<cv::Mat, float, std::tuple<int, int>> preproc(const cv::Mat& img, std::tuple<int, int> input_size);
|
||||
|
||||
// Detection result structure
|
||||
struct Detection {
|
||||
float x1, y1, x2, y2; // Bounding box coordinates
|
||||
float score; // Confidence score
|
||||
int class_id; // Predicted class ID
|
||||
};
|
||||
|
||||
/**
|
||||
* YOLOX official demo_postprocess function (C++ implementation)
|
||||
* Decode model output to absolute coordinates
|
||||
* @param outputs Model output [batch, num_boxes, 85]
|
||||
* @param num_boxes Number of detection boxes
|
||||
* @param img_size Input image size (height, width)
|
||||
* @param p6 Whether to use P6 (default false, use P5)
|
||||
* @return Decoded output [num_boxes, 85], format: [cx, cy, w, h, obj_conf, class0, ..., class79]
|
||||
*/
|
||||
void demo_postprocess(float* outputs, int num_boxes, std::tuple<int, int> img_size, bool p6 = false);
|
||||
|
||||
/**
|
||||
* Extract boxes and scores from output after demo_postprocess
|
||||
* @param output Model output (processed by demo_postprocess) [num_boxes * 85]
|
||||
* @param num_boxes Number of detection boxes
|
||||
* @param num_classes Number of classes
|
||||
* @param scale Scale factor from preprocessing
|
||||
* @param pad_left Left padding boundary
|
||||
* @param pad_top Top padding boundary
|
||||
* @param img_width Original image width
|
||||
* @param img_height Original image height
|
||||
* @param boxes Output boxes (xyxy format, mapped to original image size)
|
||||
* @param scores Output scores (class scores for each box, obj_conf * cls_scores)
|
||||
*/
|
||||
void extract_boxes_and_scores(
|
||||
float* output,
|
||||
int num_boxes,
|
||||
int num_classes,
|
||||
float scale,
|
||||
int pad_left,
|
||||
int pad_top,
|
||||
int img_width,
|
||||
int img_height,
|
||||
std::vector<cv::Rect2f>& boxes,
|
||||
std::vector<std::vector<float>>& scores
|
||||
);
|
||||
|
||||
/**
|
||||
* Single-class NMS
|
||||
*/
|
||||
std::vector<int> nms(const std::vector<cv::Rect2f>& boxes, const std::vector<float>& scores, float nms_thr);
|
||||
|
||||
/**
|
||||
* YOLOX official multiclass_nms function (class-agnostic version)
|
||||
* @param boxes Detection boxes [N, 4] (x1, y1, x2, y2)
|
||||
* @param scores Class scores [N, num_classes]
|
||||
* @param num_classes Number of classes
|
||||
* @param nms_thr NMS threshold
|
||||
* @param score_thr Score threshold
|
||||
* @return Detection results, each row is [x1, y1, x2, y2, score, class_id]
|
||||
*/
|
||||
std::vector<Detection> multiclass_nms(const std::vector<cv::Rect2f>& boxes,
|
||||
const std::vector<std::vector<float>>& scores,
|
||||
int num_classes,
|
||||
float nms_thr,
|
||||
float score_thr);
|
||||
|
||||
/**
|
||||
* Visualize detection results (consistent with Python version, supports adaptive font size)
|
||||
*/
|
||||
cv::Mat vis(const cv::Mat& img,
|
||||
const std::vector<Detection>& detections,
|
||||
float conf_thresh,
|
||||
const std::vector<std::string>& class_names);
|
||||
|
||||
#endif // YOLOX_POSTPROCESS_H
|
||||
|
||||
0
examples/yolox/py/.gitkeep
Normal file
0
examples/yolox/py/.gitkeep
Normal file
448
examples/yolox/py/yolox.py
Executable file
448
examples/yolox/py/yolox.py
Executable file
|
|
@ -0,0 +1,448 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import numpy as np
|
||||
import os
|
||||
import glob
|
||||
import argparse
|
||||
import cv2
|
||||
from pathlib import Path
|
||||
from amlnnlite.api import AMLNNLite
|
||||
|
||||
# COCO 80 class names
|
||||
CLASS_NAMES = [
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
|
||||
"traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
|
||||
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
|
||||
"handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
|
||||
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
|
||||
"wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
||||
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
|
||||
"keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
|
||||
"clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
|
||||
]
|
||||
|
||||
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
|
||||
shape = img.shape[:2] # [height, width]
|
||||
scale = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
|
||||
new_unpad = (int(round(shape[1] * scale)), int(round(shape[0] * scale)))
|
||||
pad_w = (new_shape[1] - new_unpad[0]) / 2
|
||||
pad_h = (new_shape[0] - new_unpad[1]) / 2
|
||||
|
||||
if shape[::-1] != new_unpad:
|
||||
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
||||
|
||||
top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
|
||||
left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
|
||||
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
|
||||
|
||||
return img, scale, (left, top)
|
||||
|
||||
def demo_postprocess(outputs, img_size, p6=False):
|
||||
"""
|
||||
YOLOX official demo_postprocess function
|
||||
Decode model output to absolute coordinates
|
||||
"""
|
||||
grids = []
|
||||
expanded_strides = []
|
||||
|
||||
if not p6:
|
||||
strides = [8, 16, 32]
|
||||
else:
|
||||
strides = [8, 16, 32, 64]
|
||||
|
||||
hsizes = [img_size[0] // stride for stride in strides]
|
||||
wsizes = [img_size[1] // stride for stride in strides]
|
||||
|
||||
for hsize, wsize, stride in zip(hsizes, wsizes, strides):
|
||||
xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
|
||||
grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
|
||||
grids.append(grid)
|
||||
shape = grid.shape[:2]
|
||||
expanded_strides.append(np.full((*shape, 1), stride))
|
||||
|
||||
grids = np.concatenate(grids, 1)
|
||||
expanded_strides = np.concatenate(expanded_strides, 1)
|
||||
outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
|
||||
outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
|
||||
|
||||
return outputs
|
||||
|
||||
def preprocess(img_path, new_shape=(640, 640), data_format='NHWC'):
|
||||
"""
|
||||
YOLOX preprocessing function (with ImageNet normalization)
|
||||
Returns: processed image (HWC format for NHWC, float32, normalized), scale, pad
|
||||
"""
|
||||
original_img = cv2.imread(str(img_path))
|
||||
if original_img is None:
|
||||
raise ValueError(f"can't read image: {img_path}")
|
||||
|
||||
processed_img, scale, pad = letterbox(original_img, new_shape)
|
||||
rgb_img = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Normalize to 0-1
|
||||
normalized_img = rgb_img.astype(np.float32) / 255.0
|
||||
|
||||
# ImageNet normalization
|
||||
mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
|
||||
std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
|
||||
normalized_img = (normalized_img - mean) / std
|
||||
|
||||
if data_format == 'NCHW':
|
||||
# HWC -> CHW -> BCHW
|
||||
input_tensor = np.transpose(normalized_img, (2, 0, 1))
|
||||
input_tensor = np.expand_dims(input_tensor, axis=0)
|
||||
elif data_format == 'NHWC':
|
||||
# HWC -> BHWC
|
||||
input_tensor = np.expand_dims(normalized_img, axis=0)
|
||||
else:
|
||||
raise ValueError(f"Unsupported data format: {data_format}. Only 'NCHW' and 'NHWC' are supported.")
|
||||
|
||||
return input_tensor, original_img, scale, pad
|
||||
|
||||
def nms(boxes, scores, nms_thr):
|
||||
"""Single class NMS implemented in Numpy."""
|
||||
x1 = boxes[:, 0]
|
||||
y1 = boxes[:, 1]
|
||||
x2 = boxes[:, 2]
|
||||
y2 = boxes[:, 3]
|
||||
|
||||
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
||||
order = scores.argsort()[::-1]
|
||||
|
||||
keep = []
|
||||
while order.size > 0:
|
||||
i = order[0]
|
||||
keep.append(i)
|
||||
if order.size == 1:
|
||||
break
|
||||
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||||
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||||
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||||
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||||
|
||||
w = np.maximum(0.0, xx2 - xx1 + 1)
|
||||
h = np.maximum(0.0, yy2 - yy1 + 1)
|
||||
inter = w * h
|
||||
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||||
|
||||
inds = np.where(ovr <= nms_thr)[0]
|
||||
order = order[inds + 1]
|
||||
|
||||
return keep
|
||||
|
||||
def multiclass_nms(boxes, scores, nms_thr, score_thr):
|
||||
"""
|
||||
YOLOX official multiclass_nms function (class-agnostic version)
|
||||
"""
|
||||
cls_inds = scores.argmax(1)
|
||||
cls_scores = scores[np.arange(len(cls_inds)), cls_inds]
|
||||
|
||||
valid_score_mask = cls_scores > score_thr
|
||||
if valid_score_mask.sum() == 0:
|
||||
return None
|
||||
valid_scores = cls_scores[valid_score_mask]
|
||||
valid_boxes = boxes[valid_score_mask]
|
||||
valid_cls_inds = cls_inds[valid_score_mask]
|
||||
keep = nms(valid_boxes, valid_scores, nms_thr)
|
||||
if keep:
|
||||
dets = np.concatenate(
|
||||
[valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1
|
||||
)
|
||||
return dets
|
||||
return None
|
||||
|
||||
def postprocess(outputs, scale, pad, img_size=(640, 640), conf_threshold=0.25, iou_threshold=0.45, p6=False):
|
||||
"""
|
||||
YOLOX postprocessing (based on python_x.py)
|
||||
Assumes single output [1, 8400, 85] or multiple outputs that need to be concatenated
|
||||
"""
|
||||
# Handle multiple outputs (if AMLNNLite returns multiple scales)
|
||||
if isinstance(outputs, list):
|
||||
if len(outputs) == 1:
|
||||
output = outputs[0]
|
||||
else:
|
||||
# Concatenate multiple outputs if needed
|
||||
# This assumes outputs are already in the correct format
|
||||
output = outputs[0] # Use first output for now
|
||||
else:
|
||||
output = outputs
|
||||
|
||||
# Ensure output is in correct format [1, N, 85]
|
||||
if len(output.shape) == 2:
|
||||
# [N, 85] -> [1, N, 85]
|
||||
output = output[None, :, :]
|
||||
elif len(output.shape) == 3:
|
||||
# [1, N, 85] or [N, 1, 85]
|
||||
if output.shape[0] != 1:
|
||||
output = output.transpose(1, 0, 2)[None, :, :]
|
||||
elif len(output.shape) == 4:
|
||||
# [1, 1, N, 85] -> [1, N, 85]
|
||||
output = output[0, 0]
|
||||
output = output[None, :, :]
|
||||
|
||||
# Convert to float32 if needed (AMLNNLite might return int8)
|
||||
if output.dtype != np.float32:
|
||||
output = output.astype(np.float32)
|
||||
|
||||
# Use demo_postprocess to decode coordinates
|
||||
predictions = demo_postprocess(output, img_size, p6=p6)[0] # [8400, 85]
|
||||
|
||||
# Extract boxes and scores
|
||||
# Format after demo_postprocess: [cx, cy, w, h, obj_conf, class0, ..., class79]
|
||||
boxes = predictions[:, :4] # [cx, cy, w, h] (absolute coordinates)
|
||||
scores = predictions[:, 4:5] * predictions[:, 5:] # obj_conf * cls_scores
|
||||
|
||||
# Convert to xyxy format
|
||||
boxes_xyxy = np.ones_like(boxes)
|
||||
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
|
||||
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
|
||||
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
|
||||
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
|
||||
|
||||
# Map coordinates back to original image
|
||||
pad_x, pad_y = pad
|
||||
boxes_xyxy[:, [0, 2]] = (boxes_xyxy[:, [0, 2]] - pad_x) / scale
|
||||
boxes_xyxy[:, [1, 3]] = (boxes_xyxy[:, [1, 3]] - pad_y) / scale
|
||||
boxes_xyxy = np.maximum(boxes_xyxy, 0)
|
||||
|
||||
# Multiclass NMS (class-agnostic, score_thr=0.1 as in official YOLOX)
|
||||
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=iou_threshold, score_thr=0.1)
|
||||
|
||||
if dets is None:
|
||||
return []
|
||||
|
||||
# Convert to detection format
|
||||
final_boxes = dets[:, :4]
|
||||
final_scores = dets[:, 4]
|
||||
final_cls_inds = dets[:, 5].astype(int)
|
||||
|
||||
detections = []
|
||||
for i in range(len(dets)):
|
||||
x1, y1, x2, y2 = final_boxes[i]
|
||||
confidence = final_scores[i]
|
||||
class_id = final_cls_inds[i]
|
||||
|
||||
if confidence >= conf_threshold:
|
||||
detections.append({
|
||||
'bbox': [float(x1), float(y1), float(x2), float(y2)],
|
||||
'confidence': float(confidence),
|
||||
'class_id': int(class_id),
|
||||
'class_name': CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else f'class_{class_id}'
|
||||
})
|
||||
|
||||
return detections
|
||||
|
||||
# YOLOX color palette (consistent with python_x.py)
|
||||
_COLORS = (
|
||||
np.array(
|
||||
[
|
||||
0.000, 0.447, 0.741,
|
||||
0.850, 0.325, 0.098,
|
||||
0.929, 0.694, 0.125,
|
||||
0.494, 0.184, 0.556,
|
||||
0.466, 0.674, 0.188,
|
||||
0.301, 0.745, 0.933,
|
||||
0.635, 0.078, 0.184,
|
||||
0.300, 0.300, 0.300,
|
||||
0.600, 0.600, 0.600,
|
||||
1.000, 0.000, 0.000,
|
||||
1.000, 0.500, 0.000,
|
||||
0.749, 0.749, 0.000,
|
||||
0.000, 1.000, 0.000,
|
||||
0.000, 0.000, 1.000,
|
||||
0.667, 0.000, 1.000,
|
||||
0.333, 0.333, 0.000,
|
||||
0.333, 0.667, 0.000,
|
||||
0.333, 1.000, 0.000,
|
||||
0.667, 0.333, 0.000,
|
||||
0.667, 0.667, 0.000,
|
||||
0.667, 1.000, 0.000,
|
||||
1.000, 0.333, 0.000,
|
||||
1.000, 0.667, 0.000,
|
||||
1.000, 1.000, 0.000,
|
||||
0.000, 0.333, 0.500,
|
||||
0.000, 0.667, 0.500,
|
||||
0.000, 1.000, 0.500,
|
||||
0.333, 0.000, 0.500,
|
||||
0.333, 0.333, 0.500,
|
||||
0.333, 0.667, 0.500,
|
||||
0.333, 1.000, 0.500,
|
||||
0.667, 0.000, 0.500,
|
||||
0.667, 0.333, 0.500,
|
||||
0.667, 0.667, 0.500,
|
||||
0.667, 1.000, 0.500,
|
||||
1.000, 0.000, 0.500,
|
||||
1.000, 0.333, 0.500,
|
||||
1.000, 0.667, 0.500,
|
||||
1.000, 1.000, 0.500,
|
||||
0.000, 0.333, 1.000,
|
||||
0.000, 0.667, 1.000,
|
||||
0.000, 1.000, 1.000,
|
||||
0.333, 0.000, 1.000,
|
||||
0.333, 0.333, 1.000,
|
||||
0.333, 0.667, 1.000,
|
||||
0.333, 1.000, 1.000,
|
||||
0.667, 0.000, 1.000,
|
||||
0.667, 0.333, 1.000,
|
||||
0.667, 0.667, 1.000,
|
||||
0.667, 1.000, 1.000,
|
||||
1.000, 0.000, 1.000,
|
||||
1.000, 0.333, 1.000,
|
||||
1.000, 0.667, 1.000,
|
||||
0.333, 0.000, 0.000,
|
||||
0.500, 0.000, 0.000,
|
||||
0.667, 0.000, 0.000,
|
||||
0.833, 0.000, 0.000,
|
||||
1.000, 0.000, 0.000,
|
||||
0.000, 0.167, 0.000,
|
||||
0.000, 0.333, 0.000,
|
||||
0.000, 0.500, 0.000,
|
||||
0.000, 0.667, 0.000,
|
||||
0.000, 0.833, 0.000,
|
||||
0.000, 1.000, 0.000,
|
||||
0.000, 0.000, 0.167,
|
||||
0.000, 0.000, 0.333,
|
||||
0.000, 0.000, 0.500,
|
||||
0.000, 0.000, 0.667,
|
||||
0.000, 0.000, 0.833,
|
||||
0.000, 0.000, 1.000,
|
||||
0.000, 0.000, 0.000,
|
||||
0.143, 0.143, 0.143,
|
||||
0.857, 0.857, 0.857,
|
||||
1.000, 1.000, 1.000
|
||||
]
|
||||
).astype(np.float32).reshape(-1, 3)
|
||||
)
|
||||
|
||||
def vis(img, detections, conf=0.5, class_names=None):
|
||||
"""
|
||||
YOLOX official visualization function (based on python_x.py)
|
||||
"""
|
||||
if class_names is None:
|
||||
class_names = CLASS_NAMES
|
||||
|
||||
result_img = img.copy()
|
||||
|
||||
# Adjust font size based on image size
|
||||
img_height, img_width = img.shape[:2]
|
||||
font_scale = max(0.6, min(1.2, np.sqrt(img_height * img_height + img_width * img_width) * 0.0015))
|
||||
thickness = max(2, int(font_scale * 2.5))
|
||||
|
||||
for det in detections:
|
||||
if det['confidence'] < conf:
|
||||
continue
|
||||
|
||||
x1, y1, x2, y2 = [int(coord) for coord in det['bbox']]
|
||||
confidence = det['confidence']
|
||||
class_id = det['class_id']
|
||||
|
||||
if class_id >= len(_COLORS):
|
||||
class_id = class_id % len(_COLORS)
|
||||
|
||||
color = (_COLORS[class_id] * 255).astype(np.uint8).tolist()
|
||||
text = '{}:{:.1f}%'.format(det['class_name'], confidence * 100)
|
||||
txt_color = (0, 0, 0) if np.mean(_COLORS[class_id]) > 0.5 else (255, 255, 255)
|
||||
font = cv2.FONT_HERSHEY_SIMPLEX
|
||||
|
||||
txt_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
|
||||
cv2.rectangle(result_img, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
txt_bk_color = (_COLORS[class_id] * 255 * 0.7).astype(np.uint8).tolist()
|
||||
cv2.rectangle(
|
||||
result_img,
|
||||
(x1, y1 + 1),
|
||||
(x1 + txt_size[0] + 1, y1 + int(1.5 * txt_size[1])),
|
||||
txt_bk_color,
|
||||
-1
|
||||
)
|
||||
cv2.putText(result_img, text, (x1, y1 + txt_size[1]), font, font_scale, txt_color, thickness=thickness)
|
||||
|
||||
return result_img
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--model-path', default='./yolox_s_int8_A311D2.adla')
|
||||
parser.add_argument('--run-cycles', default= 1, type=int)
|
||||
parser.add_argument('--input-path', default='./', help='Input image path (file or directory)')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Initialize AMLNNLite
|
||||
amlnn = AMLNNLite()
|
||||
amlnn.config(
|
||||
model_path=args.model_path, # Model file path, Support ADLD and quantized TFlite models
|
||||
run_cycles=args.run_cycles
|
||||
)
|
||||
amlnn.init()
|
||||
|
||||
# Find image files
|
||||
image_files = []
|
||||
if os.path.isfile(args.input_path):
|
||||
# Single image file
|
||||
image_files = [args.input_path]
|
||||
elif os.path.isdir(args.input_path):
|
||||
# Directory - find all image files
|
||||
image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp"]
|
||||
for ext in image_extensions:
|
||||
image_files.extend(glob.glob(os.path.join(args.input_path, ext)))
|
||||
image_files.extend(glob.glob(os.path.join(args.input_path, ext.upper())))
|
||||
else:
|
||||
print(f"Error: Input path '{args.input_path}' does not exist")
|
||||
amlnn.uninit()
|
||||
return
|
||||
|
||||
if not image_files:
|
||||
print(f"No image files found in {args.input_path}")
|
||||
amlnn.uninit()
|
||||
return
|
||||
|
||||
print(f"Found {len(image_files)} image files to process:")
|
||||
for img_file in image_files:
|
||||
print(f" - {os.path.basename(img_file)}")
|
||||
print()
|
||||
|
||||
# Process each image
|
||||
for i, image_path in enumerate(image_files, 1):
|
||||
print(f"=" * 60)
|
||||
print(f"Processing image {i}/{len(image_files)}: {os.path.basename(image_path)}")
|
||||
print(f"=" * 60)
|
||||
|
||||
try:
|
||||
# Preprocess input
|
||||
input_tensor, original_img, scale, pad = preprocess(image_path, new_shape=(640, 640), data_format='NHWC')
|
||||
|
||||
# Run inference
|
||||
outputs = amlnn.inference(
|
||||
inputs=[input_tensor]
|
||||
)
|
||||
|
||||
# Postprocess results
|
||||
detections = postprocess(outputs, scale, pad, img_size=(640, 640), conf_threshold=0.25, iou_threshold=0.45, p6=False)
|
||||
|
||||
# Print detection results
|
||||
if detections:
|
||||
print(f" Detected {len(detections)} objects:")
|
||||
for i, det in enumerate(detections, 1):
|
||||
print(f" {i}. {det['class_name']} ({det['confidence']:.2f})")
|
||||
else:
|
||||
print(" No objects detected")
|
||||
|
||||
# Save result image (save to current directory)
|
||||
img_name = Path(image_path).stem
|
||||
save_path = f"{img_name}_result.jpg"
|
||||
result_img = vis(original_img, detections, conf=0.25, class_names=CLASS_NAMES)
|
||||
cv2.imwrite(save_path, result_img)
|
||||
print(f" Result saved to: {save_path}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {os.path.basename(image_path)}: {e}")
|
||||
|
||||
print()
|
||||
|
||||
# Optional visualization
|
||||
amlnn.visualize()
|
||||
|
||||
# Release resources
|
||||
amlnn.uninit()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue