add python demos

2026-01-08 19:43:28 +08:00 · 2026-01-08 19:43:28 +08:00 · c91356fc38
commit c91356fc38
parent 3bdf2003ec
97 changed files with 3250 additions and 290 deletions
--- a/examples/yolox/cpp/build-android.sh
+++ b/examples/yolox/cpp/build-android.sh
@ -0,0 +1,77 @@
+#!/bin/bash
+set -e
+
+#
+# Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+usage() {
+    echo "Usage: $0 [-a <target_abi>]"
+    echo "  -a <target_abi> : Target ABI (default: arm64-v8a)"
+    echo "  -h              : Show this help message"
+    exit 1
+}
+
+# Default values
+TARGET_ABI=arm64-v8a
+
+# Parse arguments
+while getopts 'a:h' opt; do
+  case "$opt" in
+    a)
+      TARGET_ABI=$OPTARG
+      ;;
+    h)
+      usage
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+if [ -z "${ANDROID_NDK_PATH}" ]; then
+    if [ -n "${ANDROID_NDK}" ]; then
+        ANDROID_NDK_PATH=${ANDROID_NDK}
+    elif [ -n "${ANDROID_NDK_HOME}" ]; then
+        ANDROID_NDK_PATH=${ANDROID_NDK_HOME}
+    else
+        echo "Error: ANDROID_NDK_PATH is not set."
+        echo "Please set ANDROID_NDK_PATH to your Android NDK directory."
+        exit 1
+    fi
+fi
+
+ROOT_PWD=$(cd "$(dirname $0)" && pwd)
+BUILD_DIR=${ROOT_PWD}/build/android
+
+echo "Building for Android..."
+echo "NDK_PATH: ${ANDROID_NDK_PATH}"
+echo "TARGET_ABI: ${TARGET_ABI}"
+echo "BUILD_DIR: ${BUILD_DIR}"
+
+mkdir -p ${BUILD_DIR}
+cd ${BUILD_DIR}
+
+cmake ../../src \
+    -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_PATH}/build/cmake/android.toolchain.cmake \
+    -DANDROID_ABI=${TARGET_ABI} \
+    -DANDROID_PLATFORM=android-24 \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DOpenCV_DIR=${ROOT_PWD}/../../../dependency/opencv/opencv-android-sdk-build/sdk/native/jni/abi-${TARGET_ABI}
+
+make -j4
+
+echo "Build complete. Executable in ${BUILD_DIR}/yolox_demo"
--- a/examples/yolox/cpp/build-linux.sh
+++ b/examples/yolox/cpp/build-linux.sh
@ -0,0 +1,65 @@
+#TODO
+#!/bin/bash
+set -e
+
+usage() {
+    echo "Usage: $0 [-a <target_arch>]"
+    echo "  -a <target_arch> : Target architecture (default: aarch64)"
+    echo "  -h               : Show this help message"
+    exit 1
+}
+
+# Default values
+TARGET_ARCH=aarch64
+
+# Parse arguments
+while getopts 'a:h' opt; do
+  case "$opt" in
+    a)
+      TARGET_ARCH=$OPTARG
+      ;;
+    h)
+      usage
+      ;;
+    *)
+      usage
+      ;;
+  esac
+done
+
+# Default to aarch64-linux-gnu if GCC_COMPILER is not set
+GCC_COMPILER=${GCC_COMPILER:-aarch64-linux-gnu}
+
+# Set compilers
+export CC=${GCC_COMPILER}-gcc
+export CXX=${GCC_COMPILER}-g++
+
+# Validate compiler
+if ! command -v ${CC} &> /dev/null; then
+    echo "Error: Compiler ${CC} not found."
+    echo "Please set GCC_COMPILER environment variable to your cross-compiler path prefix."
+    echo "Example: export GCC_COMPILER=/path/to/toolchain/bin/aarch64-linux-gnu"
+    # Proceeding anyway as user might have custom env setup
+else
+    echo "Using compiler: ${CC}"
+fi
+
+ROOT_PWD=$(cd "$(dirname $0)" && pwd)
+BUILD_DIR=${ROOT_PWD}/build/linux
+
+echo "Building for Linux..."
+echo "COMPILER: ${CC}"
+echo "TARGET_ARCH: ${TARGET_ARCH}"
+echo "BUILD_DIR: ${BUILD_DIR}"
+
+mkdir -p ${BUILD_DIR}
+cd ${BUILD_DIR}
+
+cmake ../../src \
+    -DCMAKE_SYSTEM_NAME=Linux \
+    -DCMAKE_SYSTEM_PROCESSOR=${TARGET_ARCH} \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DOpenCV_DIR=${ROOT_PWD}/../../../dependency/opencv/opencv-linux-aarch64/share/OpenCV
+make -j4
+
+echo "Build complete. Executable in ${BUILD_DIR}/yolo11_demo"
--- a/examples/yolox/cpp/src/CMakeLists.txt
+++ b/examples/yolox/cpp/src/CMakeLists.txt
@ -0,0 +1,41 @@
+cmake_minimum_required(VERSION 3.5)
+project(yolo11_demo) 
+
+set(CMAKE_CXX_STANDARD 17)
+
+# Set NNSDK path
+set(NNSDK_ROOT "${CMAKE_SOURCE_DIR}/../../../../dependency/nnsdk")
+include_directories(${NNSDK_ROOT}/include)
+include_directories(${CMAKE_SOURCE_DIR}/../../../../common)
+
+# Set dependency path
+set(3RDPARTY_DIR "${CMAKE_SOURCE_DIR}/../../../../dependency")
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    if (ANDROID_ABI STREQUAL "arm64-v8a")
+        link_directories(${NNSDK_ROOT}/lib/android/arm64-v8a)
+    else()
+        link_directories(${NNSDK_ROOT}/lib/android/armeabi-v7a)
+    endif()
+    # Android needs log
+    link_libraries(log)
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    link_directories(${NNSDK_ROOT}/lib/linux/lib64_yocto)
+endif()
+
+# Find OpenCV
+message(STATUS "OpenCV_DIR: ${OpenCV_DIR}")
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+
+add_executable(yolox_demo
+    main.cpp
+    postprocess.cpp
+    ${CMAKE_SOURCE_DIR}/../../../../common/model_loader.cpp 
+)
+
+target_link_libraries(yolox_demo
+    ${OpenCV_LIBS}
+    nnsdk
+)
--- a/examples/yolox/cpp/src/main.cpp
+++ b/examples/yolox/cpp/src/main.cpp
@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <chrono>
+#include <cmath>
+#include <opencv2/opencv.hpp>
+#include "postprocess.h"
+#include "model_loader.h"
+
+static float sigmoid(float x) {
+    return 1.0f / (1.0f + std::exp(-x));
+}
+
+const std::string DEFAULT_OUTPUT_PATH = "./result.jpg";
+const int MODEL_INPUT_WIDTH = 640;
+const int MODEL_INPUT_HEIGHT = 640;
+const float SCORE_THRESHOLD = 0.25f;
+const float NMS_THRESHOLD = 0.45f;
+const float CONF_THRESHOLD = 0.45f;
+
+const std::vector<std::string> CLASS_NAMES = {
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+    "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+    "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+    "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
+    "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
+    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
+    "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
+};
+
+int main(int argc, char** argv) {
+    if (argc < 3) {
+        printf("Usage: %s <model_path> <image_path> [output_path]\n", argv[0]);
+        return -1;
+    }
+
+    std::string model_path = argv[1];
+    std::string image_path = argv[2];
+    std::string output_path = (argc > 3) ? argv[3] : DEFAULT_OUTPUT_PATH;
+
+    std::cout << "YOLOX C++ Demo" << std::endl;
+    std::cout << "Model: " << model_path << std::endl;
+    std::cout << "Image: " << image_path << std::endl;
+    std::cout << "Output: " << output_path << std::endl;
+
+    // 1. Load Image
+    cv::Mat origin_img = cv::imread(image_path);
+    if (origin_img.empty()) {
+        std::cerr << "Failed to load image from " << image_path << std::endl;
+        return -1;
+    }
+
+    // 2. Initialize Network
+    void* context = init_network(model_path.c_str());
+    if (!context) {
+        std::cerr << "Failed to initialize network." << std::endl;
+        return -1;
+    }
+
+    // 3. Preprocess
+    cv::Mat img;
+    float scale;
+    std::tuple<int, int> pad;
+    std::tie(img, scale, pad) = preproc(origin_img, std::make_tuple(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH));
+    int pad_left = std::get<0>(pad);
+    int pad_top = std::get<1>(pad);
+
+    // 4. Run Network
+    std::tuple<cv::Mat, float, std::tuple<int, int>> input_tuple = 
+        std::make_tuple(img, scale, pad);
+
+    auto start_time = std::chrono::high_resolution_clock::now();
+
+    void* output_ptr = run_network(context, {input_tuple});
+    if (!output_ptr) {
+        std::cerr << "Failed to run network." << std::endl;
+        uninit_network(context);
+        return -1;
+    }
+    nn_output* outdata = (nn_output*)output_ptr;
+
+    // 5. Postprocess
+    int num_classes = CLASS_NAMES.size();
+    std::vector<Detection> detections;
+
+    if (outdata->num == 1) {
+        // Single output YOLOX model [1, 8400, 85]
+        float* output = (float*)outdata->out[0].buf;
+
+        int num_boxes = 8400;  // Default for YOLOX
+        if (outdata->out[0].param && outdata->out[0].param->num_of_dims >= 2) {
+            if (outdata->out[0].param->num_of_dims == 3) {
+                num_boxes = outdata->out[0].param->sizes[1];
+            } else if (outdata->out[0].param->num_of_dims == 2) {
+                num_boxes = outdata->out[0].param->sizes[0];
+            }
+        }
+
+        demo_postprocess(output, num_boxes, std::make_tuple(MODEL_INPUT_HEIGHT, MODEL_INPUT_WIDTH), false);
+
+        // boxes/scores
+        std::vector<cv::Rect2f> boxes;
+        std::vector<std::vector<float>> scores;
+        extract_boxes_and_scores(
+            output, num_boxes, num_classes,
+            scale, pad_left, pad_top,
+            origin_img.cols, origin_img.rows,
+            boxes, scores
+        );
+
+        // multiclass_nms
+        detections = multiclass_nms(boxes, scores, num_classes, NMS_THRESHOLD, 0.1f);
+    } else {
+        std::cerr << "Error: Unsupported output count: " << outdata->num << std::endl;
+        uninit_network(context);
+        return -1;
+    }
+
+    auto end_time = std::chrono::high_resolution_clock::now();
+    std::chrono::duration<double, std::milli> inference_time = end_time - start_time;
+    std::cout << "Inference + Postprocess time: " << inference_time.count() << " ms" << std::endl;
+    std::cout << "Detections found: " << detections.size() << std::endl;
+
+    // 6. Visualize and Save
+    cv::Mat result_img = vis(origin_img, detections, CONF_THRESHOLD, CLASS_NAMES);
+    cv::imwrite(output_path, result_img);
+    std::cout << "Result saved to " << output_path << std::endl;
+
+    // 7. Cleanup
+    uninit_network(context);
+
+    return 0;
+}
+
--- a/examples/yolox/cpp/src/postprocess.cpp
+++ b/examples/yolox/cpp/src/postprocess.cpp
@ -0,0 +1,409 @@
+/*
+ * Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "postprocess.h"
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+
+std::tuple<cv::Mat, float, std::tuple<int, int>> preproc(const cv::Mat& img, std::tuple<int, int> input_size) {
+    // 1. letterbox (resize + padding)
+    // 2. BGR to RGB conversion
+    // 3. Normalize to 0-1 (divide by 255.0)
+    // Note: NNSDK's model_loader expects HWC format, so return HWC instead of CHW
+    
+    int input_height = std::get<0>(input_size);
+    int input_width = std::get<1>(input_size);
+
+    // letterbox: calculate scale and padding
+    float scale = std::min(static_cast<float>(input_height) / img.rows,
+                           static_cast<float>(input_width) / img.cols);
+    int new_w = static_cast<int>(std::round(img.cols * scale));
+    int new_h = static_cast<int>(std::round(img.rows * scale));
+    
+    // resize
+    cv::Mat resized_img;
+    if (img.size() != cv::Size(new_w, new_h)) {
+        cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_LINEAR);
+    } else {
+        resized_img = img.clone();
+    }
+    
+    // padding
+    float pad_w = (input_width - new_w) / 2.0f;
+    float pad_h = (input_height - new_h) / 2.0f;
+    int top = static_cast<int>(std::round(pad_h - 0.1f));
+    int bottom = static_cast<int>(std::round(pad_h + 0.1f));
+    int left = static_cast<int>(std::round(pad_w - 0.1f));
+    int right = static_cast<int>(std::round(pad_w + 0.1f));
+    
+    cv::Mat padded_img;
+    cv::copyMakeBorder(resized_img, padded_img, top, bottom, left, right, 
+                       cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
+    
+    // BGR to RGB conversion
+    cv::Mat rgb_img;
+    cv::cvtColor(padded_img, rgb_img, cv::COLOR_BGR2RGB);
+    
+    // Normalize to 0-1 range (divide by 255.0)
+    cv::Mat normalized_img;
+    rgb_img.convertTo(normalized_img, CV_32F, 1.0 / 255.0);
+
+    // mean = [0.485, 0.456, 0.406]
+    // std = [0.229, 0.224, 0.225]
+    // Note: Use cv::divide for per-channel division in OpenCV
+    cv::Scalar mean(0.485f, 0.456f, 0.406f);
+    cv::Scalar std(0.229f, 0.224f, 0.225f);
+    normalized_img -= mean;
+    cv::divide(normalized_img, std, normalized_img);
+    
+    // Return HWC format, ImageNet normalized float32 image (RGB format)
+    // Also return scale and padding (left, top) for coordinate mapping
+    return std::make_tuple(normalized_img, scale, std::make_tuple(left, top));
+}
+
+static float sigmoid(float x) {
+    return 1.0f / (1.0f + std::exp(-x));
+}
+
+void demo_postprocess(float* outputs, int num_boxes, std::tuple<int, int> img_size, bool p6) {
+    int img_height = std::get<0>(img_size);
+    int img_width = std::get<1>(img_size);
+    
+    std::vector<int> strides;
+    if (!p6) {
+        strides = {8, 16, 32};
+    } else {
+        strides = {8, 16, 32, 64};
+    }
+    
+    // Calculate grid count for each stride
+    std::vector<int> hsizes, wsizes;
+    for (int stride : strides) {
+        hsizes.push_back(img_height / stride);
+        wsizes.push_back(img_width / stride);
+    }
+    
+    // Build grids and expanded_strides
+    std::vector<std::vector<float>> grids_list;
+    std::vector<std::vector<float>> strides_list;
+    
+    int total_boxes = 0;
+    for (size_t i = 0; i < strides.size(); ++i) {
+        int hsize = hsizes[i];
+        int wsize = wsizes[i];
+        int stride = strides[i];
+        int grid_size = hsize * wsize;
+        
+        std::vector<float> grid(grid_size * 2);
+        std::vector<float> expanded_stride(grid_size);
+        
+        for (int h = 0; h < hsize; ++h) {
+            for (int w = 0; w < wsize; ++w) {
+                int idx = h * wsize + w;
+                grid[idx * 2] = static_cast<float>(w);
+                grid[idx * 2 + 1] = static_cast<float>(h);
+                expanded_stride[idx] = static_cast<float>(stride);
+            }
+        }
+        
+        grids_list.push_back(grid);
+        strides_list.push_back(expanded_stride);
+        total_boxes += grid_size;
+    }
+    
+    // Merge all grids and strides
+    std::vector<float> all_grids(total_boxes * 2);
+    std::vector<float> all_strides(total_boxes);
+    
+    int offset = 0;
+    for (size_t i = 0; i < grids_list.size(); ++i) {
+        int grid_size = grids_list[i].size() / 2;
+        for (int j = 0; j < grid_size; ++j) {
+            all_grids[(offset + j) * 2] = grids_list[i][j * 2];
+            all_grids[(offset + j) * 2 + 1] = grids_list[i][j * 2 + 1];
+            all_strides[offset + j] = strides_list[i][j];
+        }
+        offset += grid_size;
+    }
+    
+    // Apply grid and stride decoding
+    for (int i = 0; i < num_boxes && i < total_boxes; ++i) {
+        float* box = outputs + i * 85;
+        
+        // outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
+        box[0] = (box[0] + all_grids[i * 2]) * all_strides[i];
+        box[1] = (box[1] + all_grids[i * 2 + 1]) * all_strides[i];
+        
+        // outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
+        box[2] = std::exp(box[2]) * all_strides[i];
+        box[3] = std::exp(box[3]) * all_strides[i];
+    }
+}
+
+std::vector<int> nms(const std::vector<cv::Rect2f>& boxes, const std::vector<float>& scores, float nms_thr) {
+    if (boxes.empty()) return {};
+    
+    // Create indices and sort
+    std::vector<int> indices(boxes.size());
+    std::iota(indices.begin(), indices.end(), 0);
+    std::sort(indices.begin(), indices.end(), [&scores](int a, int b) {
+        return scores[a] > scores[b];
+    });
+    
+    std::vector<int> keep;
+    std::vector<bool> suppressed(boxes.size(), false);
+    
+    for (size_t i = 0; i < indices.size(); ++i) {
+        int idx = indices[i];
+        if (suppressed[idx]) continue;
+        
+        keep.push_back(idx);
+        
+        float x1_i = boxes[idx].x;
+        float y1_i = boxes[idx].y;
+        float x2_i = boxes[idx].x + boxes[idx].width;
+        float y2_i = boxes[idx].y + boxes[idx].height;
+        float area_i = boxes[idx].width * boxes[idx].height;
+        
+        for (size_t j = i + 1; j < indices.size(); ++j) {
+            int idx_j = indices[j];
+            if (suppressed[idx_j]) continue;
+            
+            float x1_j = boxes[idx_j].x;
+            float y1_j = boxes[idx_j].y;
+            float x2_j = boxes[idx_j].x + boxes[idx_j].width;
+            float y2_j = boxes[idx_j].y + boxes[idx_j].height;
+            
+            float xx1 = std::max(x1_i, x1_j);
+            float yy1 = std::max(y1_i, y1_j);
+            float xx2 = std::min(x2_i, x2_j);
+            float yy2 = std::min(y2_i, y2_j);
+            
+            float w = std::max(0.0f, xx2 - xx1);
+            float h = std::max(0.0f, yy2 - yy1);
+            float inter = w * h;
+            
+            float area_j = boxes[idx_j].width * boxes[idx_j].height;
+            float ovr = inter / (area_i + area_j - inter);
+            
+            if (ovr > nms_thr) {
+                suppressed[idx_j] = true;
+            }
+        }
+    }
+    
+    return keep;
+}
+
+std::vector<Detection> multiclass_nms(const std::vector<cv::Rect2f>& boxes, 
+                                      const std::vector<std::vector<float>>& scores,
+                                      int num_classes,
+                                      float nms_thr, 
+                                      float score_thr) {
+    if (boxes.empty() || scores.empty()) return {};
+    
+    // Find max class score and class ID for each box
+    std::vector<float> cls_scores(boxes.size());
+    std::vector<int> cls_inds(boxes.size());
+    
+    for (size_t i = 0; i < boxes.size(); ++i) {
+        float max_score = -1.0f;
+        int max_idx = -1;
+        for (int c = 0; c < num_classes; ++c) {
+            if (scores[i][c] > max_score) {
+                max_score = scores[i][c];
+                max_idx = c;
+            }
+        }
+        cls_scores[i] = max_score;
+        cls_inds[i] = max_idx;
+    }
+    
+    // Filter low-score boxes
+    std::vector<cv::Rect2f> valid_boxes;
+    std::vector<float> valid_scores;
+    std::vector<int> valid_cls_inds;
+    std::vector<int> valid_indices;
+    
+    for (size_t i = 0; i < boxes.size(); ++i) {
+        if (cls_scores[i] > score_thr) {
+            valid_boxes.push_back(boxes[i]);
+            valid_scores.push_back(cls_scores[i]);
+            valid_cls_inds.push_back(cls_inds[i]);
+            valid_indices.push_back(i);
+        }
+    }
+    
+    if (valid_boxes.empty()) return {};
+    
+    // Execute NMS
+    std::vector<int> keep = nms(valid_boxes, valid_scores, nms_thr);
+    
+    // Build results
+    std::vector<Detection> dets;
+    for (int idx : keep) {
+        Detection det;
+        det.x1 = valid_boxes[idx].x;
+        det.y1 = valid_boxes[idx].y;
+        det.x2 = valid_boxes[idx].x + valid_boxes[idx].width;
+        det.y2 = valid_boxes[idx].y + valid_boxes[idx].height;
+        det.score = valid_scores[idx];
+        det.class_id = valid_cls_inds[idx];
+        dets.push_back(det);
+    }
+    
+    return dets;
+}
+
+cv::Mat vis(const cv::Mat& img, 
+            const std::vector<Detection>& detections,
+            float conf_thresh,
+            const std::vector<std::string>& class_names) {
+    cv::Mat result = img.clone();
+    
+    // Adjust font size based on image size
+    int img_height = img.rows;
+    int img_width = img.cols;
+    float font_scale = std::max(0.6f, std::min(1.2f, 
+        static_cast<float>(std::sqrt(img_height * img_height + img_width * img_width)) * 0.0015f));
+    int thickness = std::max(2, static_cast<int>(font_scale * 2.5f));
+    
+    // YOLOX color palette
+    static const std::vector<cv::Scalar> colors = {
+        cv::Scalar(0, 114, 189), cv::Scalar(217, 83, 25), cv::Scalar(237, 177, 32),
+        cv::Scalar(126, 47, 142), cv::Scalar(119, 172, 48), cv::Scalar(77, 190, 238),
+        cv::Scalar(162, 20, 47), cv::Scalar(77, 77, 77), cv::Scalar(153, 153, 153),
+        cv::Scalar(255, 0, 0), cv::Scalar(255, 128, 0), cv::Scalar(191, 191, 0),
+        cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 255), cv::Scalar(170, 0, 255),
+        cv::Scalar(85, 85, 0), cv::Scalar(85, 170, 0), cv::Scalar(85, 255, 0),
+        cv::Scalar(170, 85, 0), cv::Scalar(170, 170, 0), cv::Scalar(170, 255, 0),
+        cv::Scalar(255, 85, 0), cv::Scalar(255, 170, 0), cv::Scalar(255, 255, 0),
+        cv::Scalar(0, 85, 128), cv::Scalar(0, 170, 128), cv::Scalar(0, 255, 128),
+        cv::Scalar(85, 0, 128), cv::Scalar(85, 85, 128), cv::Scalar(85, 170, 128),
+        cv::Scalar(85, 255, 128), cv::Scalar(170, 0, 128), cv::Scalar(170, 85, 128),
+        cv::Scalar(170, 170, 128), cv::Scalar(170, 255, 128), cv::Scalar(255, 0, 128),
+        cv::Scalar(255, 85, 128), cv::Scalar(255, 170, 128), cv::Scalar(255, 255, 128),
+        cv::Scalar(0, 85, 255), cv::Scalar(0, 170, 255), cv::Scalar(0, 255, 255),
+        cv::Scalar(85, 0, 255), cv::Scalar(85, 85, 255), cv::Scalar(85, 170, 255),
+        cv::Scalar(85, 255, 255), cv::Scalar(170, 0, 255), cv::Scalar(170, 85, 255),
+        cv::Scalar(170, 170, 255), cv::Scalar(170, 255, 255), cv::Scalar(255, 0, 255),
+        cv::Scalar(255, 85, 255), cv::Scalar(255, 170, 255), cv::Scalar(85, 0, 0),
+        cv::Scalar(128, 0, 0), cv::Scalar(170, 0, 0), cv::Scalar(213, 0, 0),
+        cv::Scalar(255, 0, 0), cv::Scalar(0, 43, 0), cv::Scalar(0, 85, 0),
+        cv::Scalar(0, 128, 0), cv::Scalar(0, 170, 0), cv::Scalar(0, 213, 0),
+        cv::Scalar(0, 255, 0), cv::Scalar(0, 0, 43), cv::Scalar(0, 0, 85),
+        cv::Scalar(0, 0, 128), cv::Scalar(0, 0, 170), cv::Scalar(0, 0, 213),
+        cv::Scalar(0, 0, 255), cv::Scalar(0, 0, 0), cv::Scalar(36, 36, 36),
+        cv::Scalar(219, 219, 219), cv::Scalar(255, 255, 255)
+    };
+
+    for (const auto& det : detections) {
+        if (det.score < conf_thresh) continue;
+        if (det.class_id < 0 || det.class_id >= static_cast<int>(class_names.size())) continue;
+        
+        int x0 = static_cast<int>(det.x1);
+        int y0 = static_cast<int>(det.y1);
+        int x1 = static_cast<int>(det.x2);
+        int y1 = static_cast<int>(det.y2);
+        
+        cv::Scalar color = colors[det.class_id % colors.size()];
+        
+        // Draw bounding box
+        cv::rectangle(result, cv::Point(x0, y0), cv::Point(x1, y1), color, thickness);
+        
+        // Prepare text
+        std::string text = class_names[det.class_id] + ":" + cv::format("%.1f%%", det.score * 100);
+        
+        // Calculate text size
+        int baseline = 0;
+        cv::Size txt_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, font_scale, thickness, &baseline);
+        
+        // Draw text background
+        cv::Scalar txt_bk_color = color * 0.7;
+        cv::rectangle(result,
+                     cv::Point(x0, y0 + 1),
+                     cv::Point(x0 + txt_size.width + 1, y0 + static_cast<int>(1.5 * txt_size.height)),
+                     txt_bk_color, -1);
+        
+        // Draw text
+        cv::Scalar txt_color = (cv::mean(color)[0] > 0.5) ? cv::Scalar(0, 0, 0) : cv::Scalar(255, 255, 255);
+        cv::putText(result, text,
+                   cv::Point(x0, y0 + txt_size.height),
+                   cv::FONT_HERSHEY_SIMPLEX, font_scale, txt_color, thickness);
+    }
+    
+    return result;
+}
+
+void extract_boxes_and_scores(
+    float* output,
+    int num_boxes,
+    int num_classes,
+    float scale,
+    int pad_left,
+    int pad_top,
+    int img_width,
+    int img_height,
+    std::vector<cv::Rect2f>& boxes,
+    std::vector<std::vector<float>>& scores)
+{
+    boxes.clear();
+    scores.clear();
+    boxes.reserve(num_boxes);
+    scores.reserve(num_boxes);
+
+    // Extract all boxes and scores
+    for (int i = 0; i < num_boxes; ++i) {
+        float* box_data = output + i * 85;
+
+        // Format after demo_postprocess: [cx, cy, w, h, obj_conf, class0, ..., class79]
+        float cx = box_data[0];
+        float cy = box_data[1];
+        float w = box_data[2];
+        float h = box_data[3];
+
+        // Python: boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2]/2.
+        float x1 = cx - w / 2.0f;
+        float y1 = cy - h / 2.0f;
+        float x2 = cx + w / 2.0f;
+        float y2 = cy + h / 2.0f;
+
+        // Python: valid_boxes[:, [0, 2]] = (valid_boxes[:, [0, 2]] - pad_x) / scale
+        // Python: valid_boxes[:, [1, 3]] = (valid_boxes[:, [1, 3]] - pad_y) / scale
+        x1 = (x1 - pad_left) / scale;
+        y1 = (y1 - pad_top) / scale;
+        x2 = (x2 - pad_left) / scale;
+        y2 = (y2 - pad_top) / scale;
+
+        // Ensure coordinates are within image bounds
+        x1 = std::max(0.0f, std::min(static_cast<float>(img_width), x1));
+        y1 = std::max(0.0f, std::min(static_cast<float>(img_height), y1));
+        x2 = std::max(0.0f, std::min(static_cast<float>(img_width), x2));
+        y2 = std::max(0.0f, std::min(static_cast<float>(img_height), y2));
+
+        boxes.push_back(cv::Rect2f(x1, y1, x2 - x1, y2 - y1));
+
+        // Calculate class scores (obj_conf * cls_scores)
+        float obj_conf = box_data[4];
+        std::vector<float> cls_scores(num_classes);
+        for (int c = 0; c < num_classes; ++c) {
+            float cls_score = box_data[5 + c];
+            cls_scores[c] = obj_conf * cls_score;
+        }
+        scores.push_back(cls_scores);
+    }
+}
--- a/examples/yolox/cpp/src/postprocess.h
+++ b/examples/yolox/cpp/src/postprocess.h
@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef YOLOX_POSTPROCESS_H
+#define YOLOX_POSTPROCESS_H
+
+#include <opencv2/opencv.hpp>
+#include <vector>
+#include <tuple>
+#include <string>
+
+/**
+ * YOLOX preprocessing function
+ * @param img Input image (BGR format)
+ * @param input_size Target size (height, width)
+ * @return Processed image (HWC format, float32, ImageNet normalized, RGB format), scale factor, padding (left, top)
+ * Note: NNSDK's model_loader expects HWC format, so return HWC instead of CHW
+ * Processing steps:
+ * 1. letterbox (resize + padding with 114)
+ * 2. BGR to RGB conversion
+ * 3. Normalize to 0-1 (divide by 255.0)
+ * 4. ImageNet normalization (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+ */
+std::tuple<cv::Mat, float, std::tuple<int, int>> preproc(const cv::Mat& img, std::tuple<int, int> input_size);
+
+// Detection result structure
+struct Detection {
+    float x1, y1, x2, y2;  // Bounding box coordinates
+    float score;           // Confidence score
+    int class_id;          // Predicted class ID
+};
+
+/**
+ * YOLOX official demo_postprocess function (C++ implementation)
+ * Decode model output to absolute coordinates
+ * @param outputs Model output [batch, num_boxes, 85]
+ * @param num_boxes Number of detection boxes
+ * @param img_size Input image size (height, width)
+ * @param p6 Whether to use P6 (default false, use P5)
+ * @return Decoded output [num_boxes, 85], format: [cx, cy, w, h, obj_conf, class0, ..., class79]
+ */
+void demo_postprocess(float* outputs, int num_boxes, std::tuple<int, int> img_size, bool p6 = false);
+
+/**
+ * Extract boxes and scores from output after demo_postprocess
+ * @param output Model output (processed by demo_postprocess) [num_boxes * 85]
+ * @param num_boxes Number of detection boxes
+ * @param num_classes Number of classes
+ * @param scale Scale factor from preprocessing
+ * @param pad_left Left padding boundary
+ * @param pad_top Top padding boundary
+ * @param img_width Original image width
+ * @param img_height Original image height
+ * @param boxes Output boxes (xyxy format, mapped to original image size)
+ * @param scores Output scores (class scores for each box, obj_conf * cls_scores)
+ */
+void extract_boxes_and_scores(
+    float* output,
+    int num_boxes,
+    int num_classes,
+    float scale,
+    int pad_left,
+    int pad_top,
+    int img_width,
+    int img_height,
+    std::vector<cv::Rect2f>& boxes,
+    std::vector<std::vector<float>>& scores
+);
+
+/**
+ * Single-class NMS
+ */
+std::vector<int> nms(const std::vector<cv::Rect2f>& boxes, const std::vector<float>& scores, float nms_thr);
+
+/**
+ * YOLOX official multiclass_nms function (class-agnostic version)
+ * @param boxes Detection boxes [N, 4] (x1, y1, x2, y2)
+ * @param scores Class scores [N, num_classes]
+ * @param num_classes Number of classes
+ * @param nms_thr NMS threshold
+ * @param score_thr Score threshold
+ * @return Detection results, each row is [x1, y1, x2, y2, score, class_id]
+ */
+std::vector<Detection> multiclass_nms(const std::vector<cv::Rect2f>& boxes, 
+                                      const std::vector<std::vector<float>>& scores,
+                                      int num_classes,
+                                      float nms_thr, 
+                                      float score_thr);
+
+/**
+ * Visualize detection results (consistent with Python version, supports adaptive font size)
+ */
+cv::Mat vis(const cv::Mat& img, 
+            const std::vector<Detection>& detections,
+            float conf_thresh,
+            const std::vector<std::string>& class_names);
+
+#endif // YOLOX_POSTPROCESS_H
+
--- a/examples/yolox/py/.gitkeep
+++ b/examples/yolox/py/.gitkeep
--- a/examples/yolox/py/yolox.py
+++ b/examples/yolox/py/yolox.py
@ -0,0 +1,448 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import os
+import glob
+import argparse
+import cv2
+from pathlib import Path
+from amlnnlite.api import AMLNNLite
+
+# COCO 80 class names
+CLASS_NAMES = [
+    "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat",
+    "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+    "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
+    "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
+    "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle",
+    "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
+    "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote",
+    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book",
+    "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"
+]
+
+def letterbox(img, new_shape=(640, 640), color=(114, 114, 114)):
+    shape = img.shape[:2]  # [height, width]
+    scale = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    new_unpad = (int(round(shape[1] * scale)), int(round(shape[0] * scale)))
+    pad_w = (new_shape[1] - new_unpad[0]) / 2
+    pad_h = (new_shape[0] - new_unpad[1]) / 2
+
+    if shape[::-1] != new_unpad:
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+
+    top, bottom = int(round(pad_h - 0.1)), int(round(pad_h + 0.1))
+    left, right = int(round(pad_w - 0.1)), int(round(pad_w + 0.1))
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
+
+    return img, scale, (left, top)
+
+def demo_postprocess(outputs, img_size, p6=False):
+    """
+    YOLOX official demo_postprocess function
+    Decode model output to absolute coordinates
+    """
+    grids = []
+    expanded_strides = []
+
+    if not p6:
+        strides = [8, 16, 32]
+    else:
+        strides = [8, 16, 32, 64]
+
+    hsizes = [img_size[0] // stride for stride in strides]
+    wsizes = [img_size[1] // stride for stride in strides]
+
+    for hsize, wsize, stride in zip(hsizes, wsizes, strides):
+        xv, yv = np.meshgrid(np.arange(wsize), np.arange(hsize))
+        grid = np.stack((xv, yv), 2).reshape(1, -1, 2)
+        grids.append(grid)
+        shape = grid.shape[:2]
+        expanded_strides.append(np.full((*shape, 1), stride))
+
+    grids = np.concatenate(grids, 1)
+    expanded_strides = np.concatenate(expanded_strides, 1)
+    outputs[..., :2] = (outputs[..., :2] + grids) * expanded_strides
+    outputs[..., 2:4] = np.exp(outputs[..., 2:4]) * expanded_strides
+
+    return outputs
+
+def preprocess(img_path, new_shape=(640, 640), data_format='NHWC'):
+    """
+    YOLOX preprocessing function (with ImageNet normalization)
+    Returns: processed image (HWC format for NHWC, float32, normalized), scale, pad
+    """
+    original_img = cv2.imread(str(img_path))
+    if original_img is None:
+        raise ValueError(f"can't read image: {img_path}")
+
+    processed_img, scale, pad = letterbox(original_img, new_shape)
+    rgb_img = cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)
+
+    # Normalize to 0-1
+    normalized_img = rgb_img.astype(np.float32) / 255.0
+
+    # ImageNet normalization
+    mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
+    std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
+    normalized_img = (normalized_img - mean) / std
+
+    if data_format == 'NCHW':
+        # HWC -> CHW -> BCHW
+        input_tensor = np.transpose(normalized_img, (2, 0, 1))
+        input_tensor = np.expand_dims(input_tensor, axis=0)
+    elif data_format == 'NHWC':
+        # HWC -> BHWC
+        input_tensor = np.expand_dims(normalized_img, axis=0)
+    else:
+        raise ValueError(f"Unsupported data format: {data_format}. Only 'NCHW' and 'NHWC' are supported.")
+
+    return input_tensor, original_img, scale, pad
+
+def nms(boxes, scores, nms_thr):
+    """Single class NMS implemented in Numpy."""
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        if order.size == 1:
+            break
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= nms_thr)[0]
+        order = order[inds + 1]
+
+    return keep
+
+def multiclass_nms(boxes, scores, nms_thr, score_thr):
+    """
+    YOLOX official multiclass_nms function (class-agnostic version)
+    """
+    cls_inds = scores.argmax(1)
+    cls_scores = scores[np.arange(len(cls_inds)), cls_inds]
+
+    valid_score_mask = cls_scores > score_thr
+    if valid_score_mask.sum() == 0:
+        return None
+    valid_scores = cls_scores[valid_score_mask]
+    valid_boxes = boxes[valid_score_mask]
+    valid_cls_inds = cls_inds[valid_score_mask]
+    keep = nms(valid_boxes, valid_scores, nms_thr)
+    if keep:
+        dets = np.concatenate(
+            [valid_boxes[keep], valid_scores[keep, None], valid_cls_inds[keep, None]], 1
+        )
+        return dets
+    return None
+
+def postprocess(outputs, scale, pad, img_size=(640, 640), conf_threshold=0.25, iou_threshold=0.45, p6=False):
+    """
+    YOLOX postprocessing (based on python_x.py)
+    Assumes single output [1, 8400, 85] or multiple outputs that need to be concatenated
+    """
+    # Handle multiple outputs (if AMLNNLite returns multiple scales)
+    if isinstance(outputs, list):
+        if len(outputs) == 1:
+            output = outputs[0]
+        else:
+            # Concatenate multiple outputs if needed
+            # This assumes outputs are already in the correct format
+            output = outputs[0]  # Use first output for now
+    else:
+        output = outputs
+
+    # Ensure output is in correct format [1, N, 85]
+    if len(output.shape) == 2:
+        # [N, 85] -> [1, N, 85]
+        output = output[None, :, :]
+    elif len(output.shape) == 3:
+        # [1, N, 85] or [N, 1, 85]
+        if output.shape[0] != 1:
+            output = output.transpose(1, 0, 2)[None, :, :]
+    elif len(output.shape) == 4:
+        # [1, 1, N, 85] -> [1, N, 85]
+        output = output[0, 0]
+        output = output[None, :, :]
+
+    # Convert to float32 if needed (AMLNNLite might return int8)
+    if output.dtype != np.float32:
+        output = output.astype(np.float32)
+
+    # Use demo_postprocess to decode coordinates
+    predictions = demo_postprocess(output, img_size, p6=p6)[0]  # [8400, 85]
+
+    # Extract boxes and scores
+    # Format after demo_postprocess: [cx, cy, w, h, obj_conf, class0, ..., class79]
+    boxes = predictions[:, :4]  # [cx, cy, w, h] (absolute coordinates)
+    scores = predictions[:, 4:5] * predictions[:, 5:]  # obj_conf * cls_scores
+
+    # Convert to xyxy format
+    boxes_xyxy = np.ones_like(boxes)
+    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
+    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
+    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
+    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
+
+    # Map coordinates back to original image
+    pad_x, pad_y = pad
+    boxes_xyxy[:, [0, 2]] = (boxes_xyxy[:, [0, 2]] - pad_x) / scale
+    boxes_xyxy[:, [1, 3]] = (boxes_xyxy[:, [1, 3]] - pad_y) / scale
+    boxes_xyxy = np.maximum(boxes_xyxy, 0)
+
+    # Multiclass NMS (class-agnostic, score_thr=0.1 as in official YOLOX)
+    dets = multiclass_nms(boxes_xyxy, scores, nms_thr=iou_threshold, score_thr=0.1)
+
+    if dets is None:
+        return []
+
+    # Convert to detection format
+    final_boxes = dets[:, :4]
+    final_scores = dets[:, 4]
+    final_cls_inds = dets[:, 5].astype(int)
+
+    detections = []
+    for i in range(len(dets)):
+        x1, y1, x2, y2 = final_boxes[i]
+        confidence = final_scores[i]
+        class_id = final_cls_inds[i]
+
+        if confidence >= conf_threshold:
+            detections.append({
+                'bbox': [float(x1), float(y1), float(x2), float(y2)],
+                'confidence': float(confidence),
+                'class_id': int(class_id),
+                'class_name': CLASS_NAMES[class_id] if class_id < len(CLASS_NAMES) else f'class_{class_id}'
+            })
+
+    return detections
+
+# YOLOX color palette (consistent with python_x.py)
+_COLORS = (
+    np.array(
+        [
+            0.000, 0.447, 0.741,
+            0.850, 0.325, 0.098,
+            0.929, 0.694, 0.125,
+            0.494, 0.184, 0.556,
+            0.466, 0.674, 0.188,
+            0.301, 0.745, 0.933,
+            0.635, 0.078, 0.184,
+            0.300, 0.300, 0.300,
+            0.600, 0.600, 0.600,
+            1.000, 0.000, 0.000,
+            1.000, 0.500, 0.000,
+            0.749, 0.749, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.333, 0.333, 0.000,
+            0.333, 0.667, 0.000,
+            0.333, 1.000, 0.000,
+            0.667, 0.333, 0.000,
+            0.667, 0.667, 0.000,
+            0.667, 1.000, 0.000,
+            1.000, 0.333, 0.000,
+            1.000, 0.667, 0.000,
+            1.000, 1.000, 0.000,
+            0.000, 0.333, 0.500,
+            0.000, 0.667, 0.500,
+            0.000, 1.000, 0.500,
+            0.333, 0.000, 0.500,
+            0.333, 0.333, 0.500,
+            0.333, 0.667, 0.500,
+            0.333, 1.000, 0.500,
+            0.667, 0.000, 0.500,
+            0.667, 0.333, 0.500,
+            0.667, 0.667, 0.500,
+            0.667, 1.000, 0.500,
+            1.000, 0.000, 0.500,
+            1.000, 0.333, 0.500,
+            1.000, 0.667, 0.500,
+            1.000, 1.000, 0.500,
+            0.000, 0.333, 1.000,
+            0.000, 0.667, 1.000,
+            0.000, 1.000, 1.000,
+            0.333, 0.000, 1.000,
+            0.333, 0.333, 1.000,
+            0.333, 0.667, 1.000,
+            0.333, 1.000, 1.000,
+            0.667, 0.000, 1.000,
+            0.667, 0.333, 1.000,
+            0.667, 0.667, 1.000,
+            0.667, 1.000, 1.000,
+            1.000, 0.000, 1.000,
+            1.000, 0.333, 1.000,
+            1.000, 0.667, 1.000,
+            0.333, 0.000, 0.000,
+            0.500, 0.000, 0.000,
+            0.667, 0.000, 0.000,
+            0.833, 0.000, 0.000,
+            1.000, 0.000, 0.000,
+            0.000, 0.167, 0.000,
+            0.000, 0.333, 0.000,
+            0.000, 0.500, 0.000,
+            0.000, 0.667, 0.000,
+            0.000, 0.833, 0.000,
+            0.000, 1.000, 0.000,
+            0.000, 0.000, 0.167,
+            0.000, 0.000, 0.333,
+            0.000, 0.000, 0.500,
+            0.000, 0.000, 0.667,
+            0.000, 0.000, 0.833,
+            0.000, 0.000, 1.000,
+            0.000, 0.000, 0.000,
+            0.143, 0.143, 0.143,
+            0.857, 0.857, 0.857,
+            1.000, 1.000, 1.000
+        ]
+    ).astype(np.float32).reshape(-1, 3)
+)
+
+def vis(img, detections, conf=0.5, class_names=None):
+    """
+    YOLOX official visualization function (based on python_x.py)
+    """
+    if class_names is None:
+        class_names = CLASS_NAMES
+
+    result_img = img.copy()
+
+    # Adjust font size based on image size
+    img_height, img_width = img.shape[:2]
+    font_scale = max(0.6, min(1.2, np.sqrt(img_height * img_height + img_width * img_width) * 0.0015))
+    thickness = max(2, int(font_scale * 2.5))
+
+    for det in detections:
+        if det['confidence'] < conf:
+            continue
+
+        x1, y1, x2, y2 = [int(coord) for coord in det['bbox']]
+        confidence = det['confidence']
+        class_id = det['class_id']
+
+        if class_id >= len(_COLORS):
+            class_id = class_id % len(_COLORS)
+
+        color = (_COLORS[class_id] * 255).astype(np.uint8).tolist()
+        text = '{}:{:.1f}%'.format(det['class_name'], confidence * 100)
+        txt_color = (0, 0, 0) if np.mean(_COLORS[class_id]) > 0.5 else (255, 255, 255)
+        font = cv2.FONT_HERSHEY_SIMPLEX
+
+        txt_size = cv2.getTextSize(text, font, font_scale, thickness)[0]
+        cv2.rectangle(result_img, (x1, y1), (x2, y2), color, thickness)
+
+        txt_bk_color = (_COLORS[class_id] * 255 * 0.7).astype(np.uint8).tolist()
+        cv2.rectangle(
+            result_img,
+            (x1, y1 + 1),
+            (x1 + txt_size[0] + 1, y1 + int(1.5 * txt_size[1])),
+            txt_bk_color,
+            -1
+        )
+        cv2.putText(result_img, text, (x1, y1 + txt_size[1]), font, font_scale, txt_color, thickness=thickness)
+
+    return result_img
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--model-path', default='./yolox_s_int8_A311D2.adla')
+    parser.add_argument('--run-cycles', default= 1, type=int)
+    parser.add_argument('--input-path', default='./', help='Input image path (file or directory)')
+    args = parser.parse_args()
+
+    # Initialize AMLNNLite
+    amlnn = AMLNNLite()
+    amlnn.config(
+        model_path=args.model_path,           # Model file path, Support ADLD and quantized TFlite models
+        run_cycles=args.run_cycles
+    )
+    amlnn.init()
+
+    # Find image files
+    image_files = []
+    if os.path.isfile(args.input_path):
+        # Single image file
+        image_files = [args.input_path]
+    elif os.path.isdir(args.input_path):
+        # Directory - find all image files
+        image_extensions = ["*.jpg", "*.jpeg", "*.png", "*.bmp"]
+        for ext in image_extensions:
+            image_files.extend(glob.glob(os.path.join(args.input_path, ext)))
+            image_files.extend(glob.glob(os.path.join(args.input_path, ext.upper())))
+    else:
+        print(f"Error: Input path '{args.input_path}' does not exist")
+        amlnn.uninit()
+        return
+    
+    if not image_files:
+        print(f"No image files found in {args.input_path}")
+        amlnn.uninit()
+        return
+
+    print(f"Found {len(image_files)} image files to process:")
+    for img_file in image_files:
+        print(f"  - {os.path.basename(img_file)}")
+    print()
+
+    # Process each image
+    for i, image_path in enumerate(image_files, 1):
+        print(f"=" * 60)
+        print(f"Processing image {i}/{len(image_files)}: {os.path.basename(image_path)}")
+        print(f"=" * 60)
+
+        try:
+            # Preprocess input
+            input_tensor, original_img, scale, pad = preprocess(image_path, new_shape=(640, 640), data_format='NHWC')
+
+            # Run inference
+            outputs = amlnn.inference(
+                inputs=[input_tensor]
+            )
+
+            # Postprocess results
+            detections = postprocess(outputs, scale, pad, img_size=(640, 640), conf_threshold=0.25, iou_threshold=0.45, p6=False)
+
+            # Print detection results
+            if detections:
+                print(f"    Detected {len(detections)} objects:")
+                for i, det in enumerate(detections, 1):
+                    print(f"      {i}. {det['class_name']} ({det['confidence']:.2f})")
+            else:
+                print("    No objects detected")
+
+            # Save result image (save to current directory)
+            img_name = Path(image_path).stem
+            save_path = f"{img_name}_result.jpg"
+            result_img = vis(original_img, detections, conf=0.25, class_names=CLASS_NAMES)
+            cv2.imwrite(save_path, result_img)
+            print(f"    Result saved to: {save_path}")
+
+        except Exception as e:
+            print(f"Error processing {os.path.basename(image_path)}: {e}")
+
+        print()
+
+    # Optional visualization
+    amlnn.visualize()
+
+    # Release resources
+    amlnn.uninit()
+
+if __name__ == "__main__":
+    main()