diff --git a/examples/LLMs/cpp/Android.mk b/examples/LLMs/cpp/Android.mk new file mode 100644 index 0000000..4d46c5b --- /dev/null +++ b/examples/LLMs/cpp/Android.mk @@ -0,0 +1,21 @@ +LOCAL_PATH := $(call my-dir) +LLM_SDK_PATH := $(LOCAL_PATH)/../../01_src +3RDPARTY_PATH := $(LOCAL_PATH)/../../3rdparty +$(warning $(LOCAL_PATH)) + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := main.cpp + +LOCAL_C_INCLUDES := \ + $(LLM_SDK_PATH)/jni \ + $(3RDPARTY_PATH)/include \ + +LOCAL_LDFLAGS := \ + -L$(LLM_SDK_PATH)/libs/arm64-v8a -lllmsdk + +LOCAL_LDLIBS := -llog -ldl -lm -fuse-ld=ld + +LOCAL_MODULE := demo_llm_main + +include $(BUILD_EXECUTABLE) diff --git a/examples/LLMs/cpp/Application.mk b/examples/LLMs/cpp/Application.mk new file mode 100644 index 0000000..f1a4e43 --- /dev/null +++ b/examples/LLMs/cpp/Application.mk @@ -0,0 +1,13 @@ +APP_PLATFORM := android-28 +APP_CPPFLAGS += -std=c++17 +APP_STL := c++_static + +APP_CFLAGS := -Wno-error=format-security + +# APP_ABI := armeabi-v7a +APP_ABI := arm64-v8a +#APP_ABI := armeabi armeabi-v7a arm64-v8a + +APP_OPTION := release + +# APP_BUILD_SCRIPT := Android.mk \ No newline at end of file diff --git a/examples/LLMs/cpp/CMakeLists.txt b/examples/LLMs/cpp/CMakeLists.txt new file mode 100644 index 0000000..f72b0a4 --- /dev/null +++ b/examples/LLMs/cpp/CMakeLists.txt @@ -0,0 +1,54 @@ +cmake_minimum_required(VERSION 3.5.1) + +set(CMAKE_SYSTEM_NAME Linux) +project(AML_LLM_NNSDK) + +# xinxin, 后面yocto的编so的时候,如果用的cmake的话,可以把这些cmakelist.txt里面指定sysroot的都去掉, +# 然后用官方推荐的方式,source完之后它会增加很多环境变量,你可以export命令看一下,后续cmake根据环境变量自己会配置,就不用再cmakelist里面配置了 +# source /mnt/fileroot/xinxin.he/environment/new-yocto/64/environment-setup-armv8a-poky-linux +# export CXXFLAGS=$(echo "$CXXFLAGS" | sed 's/-g//g') +# export CFLAGS=$(echo "$CXXFLAGS" | sed 's/-g//g') +# cmake -DCMAKE_TOOLCHAIN_FILE=${OE_CMAKE_TOOLCHAIN_FILE} .. + +# # 设置 Yocto 交叉编译环境 +# set(SYSROOT_PATH /mnt/fileroot/xinxin.he/environment/new-yocto/64/sysroots/x86_64-pokysdk-linux) +# set(CMAKE_SYSROOT "${SYSROOT_PATH}") +# message(STATUS "Using sysroot path as ${SYSROOT_PATH}") + +# include(CMakeForceCompiler) +# cmake_force_c_compiler("${SYSROOT_PATH}/usr/bin/aarch64-poky-linux/aarch64-poky-linux-gcc" GNU) +# cmake_force_cxx_compiler("${SYSROOT_PATH}/usr/bin/aarch64-poky-linux/aarch64-poky-linux-g++" GNU) + +# # 设置真正目标板的 sysroot +# set(MYSYSROOT "/mnt/fileroot/xinxin.he/environment/new-yocto/64/sysroots/armv8a-poky-linux") +# add_definitions("--sysroot=${MYSYSROOT}") +# set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --sysroot=${MYSYSROOT}" CACHE INTERNAL "" FORCE) +# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --sysroot=${MYSYSROOT}" CACHE INTERNAL "" FORCE) +# set(CMAKE_FIND_ROOT_PATH "${MYSYSROOT}") +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +# SDK路径 +set(LLM_NNSDK_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../01_src/jni") +set(NNSDK_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../01_src/jni/nnsdk_v2.8.1_2025_0317/lib/linux/lib64_yocto") + +# 包含头文件路径 +include_directories( + ${LLM_NNSDK_PATH} +) + +# 源文件 +aux_source_directory(. SRC_LIST) + +# 生成可执行文件 +add_executable(demo_llm_yocto ${SRC_LIST}) + +# 链接库 +target_link_libraries(demo_llm_yocto + ${LLM_NNSDK_PATH}/build/libllm_nnsdk.so + ${NNSDK_PATH}/libnnsdk.so + pthread + m + dl +) diff --git a/examples/LLMs/cpp/main.cpp b/examples/LLMs/cpp/main.cpp new file mode 100644 index 0000000..cb388e7 --- /dev/null +++ b/examples/LLMs/cpp/main.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include "llmsdk.h" + +typedef struct +{ + int request_id; + bool printed; +} MyUserData; + +void callback(AML_LLMResult *result, void *userdata, AML_LLMRunStatus run_status) +{ + if (!userdata) return; + + MyUserData* my_data = (MyUserData*)userdata; + + if (run_status == AML_LLM_RUN_NORMAL) + { + if (!my_data->printed) + { + printf("[Request #%d]\n", my_data->request_id); + my_data->printed = true; + } + printf("%s", result->text); + // printf("%d,", result->token_id); + fflush(stdout); + } + else if (run_status == AML_LLM_RUN_FINISH) + { + printf("\n"); + } + else if (run_status == AML_LLM_RUN_ERROR) + { + printf("run error\n"); + } +} + + +int main(int argc, char **argv) +{ + if (argc < 3) + { + printf("Usage: %s \n", argv[0]); + return -1; + } + + printf("\nWelcome to Amlogic LLM Demo!\n"); + + LLMContext context; + AML_LLMInitConfig init_config; + memset(&init_config, 0, sizeof(AML_LLMInitConfig)); + init_config.model_path = (const char *)argv[1]; + init_config.tokenizer_path = (const char *)argv[2]; + init_config.sampling_mode = AML_LLM_ARG_Max; + init_config.top_k = 3; + init_config.top_p = 0.9f; + init_config.temperature = 1.0f; + init_config.repeat_penalty = 1.1f; + + aml_llm_init(&context, &init_config, callback); + + AML_LLMInput input; + memset(&input, 0, sizeof(AML_LLMInput)); + input.input_type = AML_LLM_INPUT_PROMPT; + + AML_LLMRunConfig run_config; + memset(&run_config, 0, sizeof(AML_LLMRunConfig)); + run_config.run_mode = AML_LLM_RUN_GENERATE; + run_config.retain_history = 0; + + MyUserData my_data; + memset(&my_data, 0, sizeof(MyUserData)); + + printf("\nType your prompt and press Enter.\n"); + printf("Commands: [exit] to quit, [new_talk] to reset context.\n"); + + while (true) + { + std::string input_str; + printf("\nLLM@Amlogic>>> "); + std::getline(std::cin, input_str); + if (input_str == "new_talk") + { + aml_llm_reset(context); + continue; + } + else if (input_str.empty()) + { + printf("Please enter your question!\n"); + continue; + } + else if (input_str == "exit") + { + break; + } + + my_data.request_id++; + my_data.printed = false; + input.prompt_input = (const char *)input_str.c_str(); + + aml_llm_run(context, &input, &run_config, &my_data); + } + + printf("Bye~\n"); + + aml_llm_uninit(context); + + return 0; +} \ No newline at end of file diff --git a/examples/LLMs/cpp/main_func_call.cpp b/examples/LLMs/cpp/main_func_call.cpp new file mode 100644 index 0000000..a8524d1 --- /dev/null +++ b/examples/LLMs/cpp/main_func_call.cpp @@ -0,0 +1,299 @@ +/* + * Copyright (C) 2024–2025 Amlogic, Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +#include "llmsdk.h" +#include + +// Small holder for callback behavior: +// - printed: print tokens as they come +// - buffer: collect first-pass output (for external two-run mode) +struct DemoUserData { + bool printed = true; + int request_id; + std::string buffer; +}; + +// Stream callback: either print tokens or accumulate to buffer +static void result_callback(AML_LLMResult* result, void* userdata, AML_LLMRunStatus run_status) { + + // std::cout << result->text; + // std::cout.flush(); + auto* mydata = reinterpret_cast(userdata); + if (!mydata) return; + + if (run_status == AML_LLM_RUN_NORMAL) { + if (result && result->text) { + if (mydata->printed) { + std::cout << result->text; + std::cout.flush(); + } + mydata->buffer.append(result->text); + } + } else if (run_status == AML_LLM_RUN_FINISH) { + if (mydata->printed) std::cout << "\n[FINISHED]\n"; + } else if (run_status == AML_LLM_RUN_ERROR) { + std::cerr << "\n[ERROR]\n"; + } +} + +// Internal tool execution callback (JNI-internal mode) +static int tool_callback( + const char* tool_name, + const char* arguments_json, + void* /*userdata*/, + char** out_result_buffer +) { + std::cout << "\n[TOOL CALL - internal] name=" << tool_name + << " args=" << (arguments_json ? arguments_json : "(null)") + << "\n"; + + if (strcmp(tool_name, "get_weather") == 0) { + const char* resp = + "{" + "\"city\":\"Hangzhou\"," + "\"temperature\":\"25C\"," + "\"condition\":\"Sunny\"" + "}"; + *out_result_buffer = strdup(resp); // Freed by SDK + return 0; + } + + const char* fallback = "{\"error\":\"tool_not_found\"}"; + *out_result_buffer = strdup(fallback); + return -1; +} + +// External tool execution stub (for demo) +static std::string external_execute_tool(const std::string& tool_name, const std::string& /*args_json*/) { + if (tool_name == "get_weather") { + return std::string("{") + + "\"city\":\"Hangzhou\"," + "\"temperature\":\"25C\"," + "\"condition\":\"Sunny\"" + "}"; + } + return std::string("{\"error\":\"tool_not_found\"}"); +} + +// Parse {...} blocks from model output +struct ToolCall { std::string name; std::string args_json; }; +static std::vector parse_tool_calls(const std::string& text) { + std::vector calls; + printf("model_output: %s\n", text.c_str()); + + // match across newlines safely + std::regex tool_call_pattern( + "[\\n\\r\\s]*(\\{[\\n\\r\\s\\S]*?\\})[\\n\\r\\s]*", + std::regex::icase | std::regex::ECMAScript + ); + + std::sregex_iterator it(text.begin(), text.end(), tool_call_pattern); + std::sregex_iterator end; + + + for (; it != end; ++it) { + std::string json_str = (*it)[1].str(); + printf("Matched tool_call JSON: %s\n", json_str.c_str()); + nlohmann::json parsed; + parsed = nlohmann::json::parse(json_str, nullptr, /* allow_exceptions = */ false); + if (parsed.is_discarded()) { + printf("Function Calling JSON parse failed: %s\n", json_str.c_str()); + continue; + } + + std::string tool; + if (parsed.contains("tool_name")) tool = parsed["tool_name"].get(); + else if (parsed.contains("name")) tool = parsed["name"].get(); + + if (!tool.empty()) { + calls.push_back(ToolCall{tool, parsed["arguments"].dump()}); + } + printf("tool: %s, arguments: %s\n", tool.c_str(), parsed["arguments"].dump().c_str()); + + } + return calls; +} + +// Build JSON array of tool results by executing each tool externally +static std::string build_tool_results_json(const std::vector& calls) { + std::string out = "["; + for (size_t i = 0; i < calls.size(); ++i) { + const auto& c = calls[i]; + std::string one = external_execute_tool(c.name, c.args_json); + out += one; + if (i + 1 < calls.size()) out += ","; + } + out += "]"; + return out; +} + +int main(int argc, char** argv) { + // Init LLM context + LLMContext ctx = nullptr; + + AML_LLMInitConfig init_cfg; + std::memset(&init_cfg, 0, sizeof(init_cfg)); + init_cfg.model_path = argc > 1 ? argv[1] : "/path/to/your/model.bin"; // TODO: replace + init_cfg.tokenizer_path = argc > 2 ? argv[2] : "/path/to/your/tokenizer.model"; // TODO: replace + init_cfg.sampling_mode = AML_LLM_TOP_P; + init_cfg.top_k = 0; + init_cfg.top_p = 0.9f; + init_cfg.temperature = 0.7f; + init_cfg.repeat_penalty = 1.0f; + + if (aml_llm_init(&ctx, &init_cfg, result_callback) != AML_LLM_Status_Success) { + std::cerr << "Init failed: aml_llm_init\n"; + return -1; + } + + // Define tools JSON exposed to model + const char* tools_json = R"JSON( +[ + { + "name": "get_weather", + "description": "Get current weather for a city", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + } + }, + "required": ["city"] + } + } +] +)JSON"; + + const char* tool_response_tag = "tool_result"; + + // Mode select: "internal" (single run) or default "external" (two runs) + bool use_internal = (argc > 3 && std::string(argv[3]) == "internal"); + + if (aml_llm_enable_function_calling( + ctx, + /*system_prompt=*/nullptr, + tools_json, + tool_response_tag) != AML_LLM_Status_Success) { + std::cerr << "Enable function calling failed: aml_llm_enable_function_calling\n"; + aml_llm_uninit(ctx); + return -1; + } + + AML_LLMInput user_input; + std::memset(&user_input, 0, sizeof(user_input)); + user_input.input_type = AML_LLM_INPUT_PROMPT; + user_input.prompt_input = "What's the weather in Hangzhou now?"; + + AML_LLMRunConfig run_cfg; + std::memset(&run_cfg, 0, sizeof(run_cfg)); + run_cfg.run_mode = AML_LLM_RUN_GENERATE; + run_cfg.retain_history = 0; + + if (use_internal) { + std::cout << "[Mode] Internal tool callback (single run)\n"; + if (aml_llm_register_tool_callback(ctx, tool_callback, nullptr) != AML_LLM_Status_Success) { + std::cerr << "Register tool callback failed: aml_llm_register_tool_callback\n"; + aml_llm_uninit(ctx); + return -1; + } + + std::cout << "[User] " << user_input.prompt_input << "\n[Assistant] "; + DemoUserData ud; ud.printed = true; + AML_LLMRetStatus st = aml_llm_run(ctx, &user_input, &run_cfg, &ud); + if (st != AML_LLM_Status_Success) { + std::cerr << "\naml_llm_run failed\n"; + } + + } else { + std::cout << "[Mode] External tool execution (two runs)\n"; + // First pass: ask the question and collect model output containing {...} + std::cout << "[User] " << user_input.prompt_input << "\n[Model] Planning tool calls...\n"; + DemoUserData ud1; ud1.printed = true; + AML_LLMRetStatus st1 = aml_llm_run(ctx, &user_input, &run_cfg, &ud1); + if (st1 != AML_LLM_Status_Success) { + std::cerr << "First run aml_llm_run failed\n"; + aml_llm_uninit(ctx); + return -1; + } + + // Extract tool calls + auto calls = parse_tool_calls(ud1.buffer); + if (calls.empty()) { + std::cerr << "No tool calls parsed. First run output:\n" << ud1.buffer << "\n"; + aml_llm_uninit(ctx); + return -1; + } + + // Execute tools externally and build JSON array + std::string tool_results = build_tool_results_json(calls); + std::cout << "[External] Tool execution complete, results: " << tool_results << "\n"; + + // Second pass: feed tool results as prompt; SDK stitches full second-run prompt + + + AML_LLMInput tool_input; + std::memset(&tool_input, 0, sizeof(tool_input)); + tool_input.input_type = AML_LLM_INPUT_PROMPT; + + // =================================================== + // 2️⃣ External Mode: Second run after tool execution + // =================================================== + std::string system_prompt = + "You are a helpful assistant. " + "Use the provided tool results to answer the user's question naturally."; + + // prefix: optional tool results block + std::string prompt_prefix; + prompt_prefix += "<|im_start|>user\n"; + prompt_prefix += user_input.prompt_input; // e.g. "What's the weather in Hangzhou now?" + prompt_prefix += "\n<|im_end|>\n"; + prompt_prefix += "<|im_start|>tool\n"; + prompt_prefix += tool_results; // e.g. [{"city":"Hangzhou","temperature":"25C","condition":"Sunny"}] + prompt_prefix += "\n<|im_end|>\n<|im_start|>assistant\n"; + + // postfix: normally empty for direct answer + std::string prompt_postfix = ""; + + // // set a new chat template for answer mode + aml_llm_set_chat_template( + ctx, + system_prompt.c_str(), + prompt_prefix.c_str(), + prompt_postfix.c_str() + ); + + // prepare input + AML_LLMInput answer_input{}; + answer_input.input_type = AML_LLM_INPUT_PROMPT; + answer_input.prompt_input = ""; // no user text, context is already in template + + // run again + + aml_llm_run(ctx, &answer_input, &run_cfg, &ud1); + // printf("Final Result: %s \n", ud2.buffer.c_str()); + } + + aml_llm_uninit(ctx); + return 0; +} diff --git a/examples/LLMs/model/llm_result.png b/examples/LLMs/model/llm_result.png new file mode 100644 index 0000000..ef730b6 Binary files /dev/null and b/examples/LLMs/model/llm_result.png differ diff --git a/examples/LLMs/readme.md b/examples/LLMs/readme.md new file mode 100644 index 0000000..5fb3c83 --- /dev/null +++ b/examples/LLMs/readme.md @@ -0,0 +1 @@ +![llm-result](./model/llm_result.png) \ No newline at end of file