Fix(C++): Use const char* paths for cross-platform compatibility

This commit is contained in:
Guan Yuankai 2025-11-14 18:22:41 +08:00
parent e8b3557522
commit 999a4e8c59
32 changed files with 24018 additions and 131 deletions

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"cmake.sourceDirectory": "/home/gyk/sishu-yolo-sdk/cpp"
}

View File

@ -6,31 +6,49 @@ set(CMAKE_CXX_STANDARD 17)
find_package(JNI REQUIRED)
# --- 2.
find_package(OpenCV REQUIRED)
if(WIN32)
message(STATUS "Configuring for Windows (using vcpkg OpenCV + manual ONNX)")
#
find_package(OpenCV REQUIRED)
#
set(ORT_MANUAL_PATH ${CMAKE_SOURCE_DIR}/../prebuilt_libs/onnxruntime-win-x64-1.23.2)
elseif(UNIX)
message(STATUS "Configuring for Linux (using apt OpenCV + manual ONNX)")
#
#
find_package(OpenCV REQUIRED)
#
set(ORT_MANUAL_PATH ${CMAKE_SOURCE_DIR}/../prebuilt_libs/onnxruntime-linux-x64-1.23.2)
endif()
# --- 3.
#
set(ORT_MANUAL_PATH ${CMAKE_SOURCE_DIR}/../prebuilt_libs/onnxruntime-win-x64-1.23.2)
#
find_path(ONNXRuntime_INCLUDE_DIRS
NAMES onnxruntime_cxx_api.h
HINTS ${ORT_MANUAL_PATH}/include
)
#
find_library(ONNXRuntime_LIBRARY
NAMES onnxruntime
HINTS ${ORT_MANUAL_PATH}/lib
)
#
if(NOT OpenCV_FOUND)
message(FATAL_ERROR "Failed to find OpenCV. On Linux, run 'sudo apt install libopencv-dev'. On Windows, ensure vcpkg is setup.")
endif()
if(NOT ONNXRuntime_INCLUDE_DIRS OR NOT ONNXRuntime_LIBRARY)
message(FATAL_ERROR "Failed to find manual ONNX Runtime at ${ORT_MANUAL_PATH}")
endif()
message(STATUS "Found Manual ONNX Runtime Include: ${ONNXRuntime_INCLUDE_DIRS}")
message(STATUS "Found Manual ONNX Runtime Library: ${ONNXRuntime_LIBRARY}")
message(STATUS "Found OpenCV: ${OpenCV_LIBS}")
message(STATUS "Found ONNX Runtime Include: ${ONNXRuntime_INCLUDE_DIRS}")
message(STATUS "Found ONNX Runtime Library: ${ONNXRuntime_LIBRARY}")
# --- 4.
@ -43,19 +61,20 @@ add_library(my_yolo_sdk SHARED
target_include_directories(my_yolo_sdk PUBLIC
${JNI_INCLUDE_DIRS}
${OpenCV_INCLUDE_DIRS}
${ONNXRuntime_INCLUDE_DIRS} #
${ONNXRuntime_INCLUDE_DIRS}
include
)
target_link_libraries(my_yolo_sdk PRIVATE
${JNI_LIBRARIES}
${OpenCV_LIBS}
${ONNXRuntime_LIBRARY} #
${OpenCV_LIBS} #
${ONNXRuntime_LIBRARY}
)
# --- 6.
if(UNIX)
message(STATUS "Setting RPATH for Linux")
set_target_properties(my_yolo_sdk PROPERTIES
INSTALL_RPATH "$ORIGIN"
INSTALL_RPATH "$ORIGIN" #
)
endif()

View File

@ -1,4 +1,3 @@
//
#pragma once
#include <opencv2/opencv.hpp>
@ -8,7 +7,7 @@
#include <memory>
#include <stdexcept>
//
struct Detection
{
int class_id;
@ -19,23 +18,23 @@ struct Detection
int height;
};
//
cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale);
std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold);
//
class YoloDetector {
public:
//
Ort::Env env;
std::unique_ptr<Ort::Session> session;
//
int input_width = 0;
int input_height = 0;
//
Ort::AllocatorWithDefaultOptions allocator;
std::string input_name_str;
std::string output_name_str;
@ -43,7 +42,7 @@ public:
std::vector<const char*> output_node_names;
public:
YoloDetector(const wchar_t* model_path, int in_width, int in_height);
YoloDetector(const char* model_path, int in_width, int in_height);
std::vector<Detection> detect(
unsigned char* image_bytes,

View File

@ -1,11 +1,11 @@
#include "YoloCore.h" //
#include <opencv2/dnn.hpp> //
#include <iostream> //
#include <stdexcept> //
#include "YoloCore.h"
#include <opencv2/dnn.hpp>
#include <iostream>
#include <stdexcept>
/**
* @brief
* @details
* @brief
* @details
*/
cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int& pad_w, int& pad_h, float& scale) {
cv::Mat resized_img;
@ -16,25 +16,25 @@ cv::Mat preprocess(const cv::Mat& img, int target_width, int target_height, int&
int new_h = static_cast<int>(h * scale);
cv::resize(img, resized_img, cv::Size(new_w, new_h), 0, 0, cv::INTER_AREA);
pad_w = target_width - new_w; //
pad_h = target_height - new_h; //
pad_w = target_width - new_w;
pad_h = target_height - new_h;
//
int top = pad_h / 2;
int bottom = pad_h - top;
int left = pad_w / 2;
int right = pad_w - left;
//
cv::Mat padded_img;
//
cv::copyMakeBorder(resized_img, padded_img, top, bottom, left, right, cv::BORDER_CONSTANT, cv::Scalar(114, 114, 114));
return padded_img;
}
/**
* @brief
* @details
* @brief
* @details
*/
std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int pad_w, int pad_h, int img_w, int img_h, float conf_threshold, float iou_threshold) {
const auto output_shape = output_tensor.GetTensorTypeAndShapeInfo().GetShape();
@ -63,7 +63,7 @@ std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int p
float w = proposal[2];
float h = proposal[3];
int left = static_cast<int>((cx - w / 2 - (pad_w / 2.0f)) / scale);
int top = static_cast<int>((cy - h / 2 - (pad_h / 2.0f)) / scale); //
int top = static_cast<int>((cy - h / 2 - (pad_h / 2.0f)) / scale);
int width = static_cast<int>(w / scale);
int height = static_cast<int>(h / scale);
left = std::max(0, std::min(left, img_w - 1));
@ -88,23 +88,24 @@ std::vector<Detection> postprocess(Ort::Value& output_tensor, float scale, int p
/**
* @brief YoloDetector
*/
YoloDetector::YoloDetector(const wchar_t* model_path, int in_width, int in_height)
: env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-CPU"), //
YoloDetector::YoloDetector(const char* model_path, int in_width, int in_height)
: env(ORT_LOGGING_LEVEL_WARNING, "YOLOv8-ONNX-CPU"),
input_width(in_width),
input_height(in_height)
{
//
Ort::SessionOptions session_options;
//
// OrtCUDAProviderOptions cuda_options;
// session_options.AppendExecutionProvider_CUDA(cuda_options);
OrtCUDAProviderOptions cuda_options;
session_options.AppendExecutionProvider_CUDA(cuda_options);
//
session = std::make_unique<Ort::Session>(env, model_path, session_options);
//
//
input_name_str = session->GetInputNameAllocated(0, allocator).get();
output_name_str = session->GetOutputNameAllocated(0, allocator).get();
input_node_names.push_back(input_name_str.c_str());
@ -121,34 +122,29 @@ std::vector<Detection> YoloDetector::detect(
float conf_threshold,
float iou_threshold)
{
//
cv::Mat image(image_height, image_width, CV_8UC3, image_bytes);
if (image.empty()) {
throw std::runtime_error("Input image is empty.");
}
//
int pad_w, pad_h;
float scale;
cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale); //
cv::Mat preprocessed_img = preprocess(image, input_width, input_height, pad_w, pad_h, scale);
//
cv::Mat blob;
cv::dnn::blobFromImage(preprocessed_img, blob, 1 / 255.0, cv::Size(), cv::Scalar(), true, false);
std::vector<int64_t> input_shape = { 1, 3, (int64_t)input_height, (int64_t)input_width };
//
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, blob.ptr<float>(), blob.total(), input_shape.data(), input_shape.size());
//
auto output_tensors = session->Run(Ort::RunOptions{ nullptr }, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1);
//
return postprocess(output_tensors[0], scale, pad_w, pad_h, image_width, image_height, conf_threshold, iou_threshold);
}
//
//
//
//
}

View File

@ -1,56 +1,54 @@
//
#include "com_bonus_sdk_YoloSdk.h"
#include "YoloCore.h" //
#include "com_bonus_sdk_YoloSdk.h"
#include "YoloCore.h"
#include <string>
#include <vector>
#include <stdexcept>
#include <iostream>
// ---
//
std::wstring jstringToWString(JNIEnv* env, jstring jStr) {
if (!jStr) return L"";
const jchar* raw = env->GetStringChars(jStr, nullptr);
if (!raw) return L"";
jsize len = env->GetStringLength(jStr);
std::wstring wStr(reinterpret_cast<const wchar_t*>(raw), len);
env->ReleaseStringChars(jStr, raw);
return wStr;
}
//
void throwJavaException(JNIEnv* env, const char* message) {
void throwJavaException(JNIEnv *env, const char *message) {
env->ThrowNew(env->FindClass("java/lang/RuntimeException"), message);
}
// ---
/*
* Class: com_mycompany_sdk_YoloSdk
* Class: com_bonus_sdk_YoloSdk
* Method: nativeInit
* Signature: (Ljava/lang/String;II)J
*/
JNIEXPORT jlong JNICALL Java_com_mycompany_sdk_YoloSdk_nativeInit
(JNIEnv* env, jobject thiz, jstring modelPath, jint inputWidth, jint inputHeight) {
try {
std::wstring wpath = jstringToWString(env, modelPath);
YoloDetector* detector = new YoloDetector(wpath.c_str(), inputWidth, inputHeight);
return reinterpret_cast<jlong>(detector);
JNIEXPORT jlong JNICALL Java_com_bonus_sdk_YoloSdk_nativeInit
(JNIEnv *env, jobject thiz, jstring modelPath, jint inputWidth, jint inputHeight) {
const char* c_model_path = env->GetStringUTFChars(modelPath, nullptr);
if (c_model_path == nullptr) {
throwJavaException(env, "Failed to get model path from Java string.");
return 0;
}
catch (const std::exception& e) {
std::string errMsg = "Failed to initialize C++ YoloDetector: " + std::string(e.what());
try {
YoloDetector* detector = new YoloDetector(c_model_path, inputWidth, inputHeight);
env->ReleaseStringUTFChars(modelPath, c_model_path);
return reinterpret_cast<jlong>(detector);
} catch (const std::exception& e) {
env->ReleaseStringUTFChars(modelPath, c_model_path);
std::string errMsg = "Failed to initialize C++ YoloDetector: " + std::string(e.what());
throwJavaException(env, errMsg.c_str());
return 0;
}
}
/*
* Class: com_mycompany_sdk_YoloSdk
* Class: com_bonus_sdk_YoloSdk
* Method: nativeRelease
* Signature: (J)V
*/
JNIEXPORT void JNICALL Java_com_mycompany_sdk_YoloSdk_nativeRelease
(JNIEnv* env, jobject thiz, jlong handle) {
JNIEXPORT void JNICALL Java_com_bonus_sdk_YoloSdk_nativeRelease
(JNIEnv *env, jobject thiz, jlong handle) {
YoloDetector* detector = reinterpret_cast<YoloDetector*>(handle);
if (detector) {
delete detector;
@ -58,14 +56,14 @@ JNIEXPORT void JNICALL Java_com_mycompany_sdk_YoloSdk_nativeRelease
}
/*
* Class: com_mycompany_sdk_YoloSdk
* Class: com_bonus_sdk_YoloSdk
* Method: nativePredict
* Signature: (J[BIIFF)[Lcom/mycompany/sdk/Detection;
* Signature: (J[BIIFF)[Lcom/bonus/sdk/Detection;
*/
JNIEXPORT jobjectArray JNICALL Java_com_mycompany_sdk_YoloSdk_nativePredict
(JNIEnv* env, jobject thiz, jlong handle, jbyteArray bgrBytes,
jint imageWidth, jint imageHeight, jfloat confThreshold, jfloat iouThreshold) {
JNIEXPORT jobjectArray JNICALL Java_com_bonus_sdk_YoloSdk_nativePredict
(JNIEnv *env, jobject thiz, jlong handle, jbyteArray bgrBytes,
jint imageWidth, jint imageHeight, jfloat confThreshold, jfloat iouThreshold) {
YoloDetector* detector = reinterpret_cast<YoloDetector*>(handle);
if (!detector) {
throwJavaException(env, "Native handle is null.");
@ -73,39 +71,35 @@ JNIEXPORT jobjectArray JNICALL Java_com_mycompany_sdk_YoloSdk_nativePredict
}
try {
// 1.
jbyte* bytes = env->GetByteArrayElements(bgrBytes, nullptr);
// 2.
std::vector<Detection> results_cpp = detector->detect(
reinterpret_cast<unsigned char*>(bytes),
imageWidth, imageHeight, confThreshold, iouThreshold
);
// 3.
env->ReleaseByteArrayElements(bgrBytes, bytes, JNI_ABORT);
// 4.
jclass detClass = env->FindClass("com/mycompany/sdk/Detection");
if (!detClass) return nullptr; //
jclass detClass = env->FindClass("com/bonus/sdk/Detection");
if (!detClass) return nullptr;
jmethodID detConstructor = env->GetMethodID(detClass, "<init>", "(IFFIIII)V");
if (!detConstructor) return nullptr; //
if (!detConstructor) return nullptr;
jobjectArray resultArray = env->NewObjectArray(results_cpp.size(), detClass, nullptr);
for (size_t i = 0; i < results_cpp.size(); ++i) {
const auto& d = results_cpp[i];
jobject javaDet = env->NewObject(detClass, detConstructor,
d.class_id, d.score,
d.x, d.y, d.width, d.height);
d.class_id, d.score,
d.x, d.y, d.width, d.height);
env->SetObjectArrayElement(resultArray, i, javaDet);
env->DeleteLocalRef(javaDet);
}
return resultArray;
}
catch (const std::exception& e) {
std::string errMsg = "Error during native prediction: " + std::string(e.what());
} catch (const std::exception& e) {
std::string errMsg = "Error during native prediction: " + std::string(e.what());
throwJavaException(env, errMsg.c_str());
return nullptr;
}

View File

@ -0,0 +1 @@
a83fc4d58cb48eb68890dd689f94f28288cf2278

View File

@ -0,0 +1,21 @@
MIT License
Copyright (c) Microsoft Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -0,0 +1,21 @@
# Privacy
## Data Collection
The software may collect information about you and your use of the software and send it to Microsoft. Microsoft may use this information to provide services and improve our products and services. You may turn off the telemetry as described in the repository. There are also some features in the software that may enable you and Microsoft to collect data from users of your applications. If you use these features, you must comply with applicable law, including providing appropriate notices to users of your applications together with a copy of Microsoft's privacy statement. Our privacy statement is located at https://go.microsoft.com/fwlink/?LinkID=824704. You can learn more about data collection and use in the help documentation and our privacy statement. Your use of the software operates as your consent to these practices.
***
### Private Builds
No data collection is performed when using your private builds built from source code.
### Official Builds
ONNX Runtime does not maintain any independent telemetry collection mechanisms outside of what is provided by the platforms it supports. However, where applicable, ONNX Runtime will take advantage of platform-supported telemetry systems to collect trace events with the goal of improving product quality.
Currently telemetry is only implemented for Windows builds and is turned **ON** by default in the official builds distributed in their respective package management repositories ([see here](../README.md#binaries)). This may be expanded to cover other platforms in the future. Data collection is implemented via 'Platform Telemetry' per vendor platform providers (see [telemetry.h](../onnxruntime/core/platform/telemetry.h)).
#### Technical Details
The Windows provider uses the [TraceLogging](https://docs.microsoft.com/en-us/windows/win32/tracelogging/trace-logging-about) API for its implementation. This enables ONNX Runtime trace events to be collected by the operating system, and based on user consent, this data may be periodically sent to Microsoft servers following GDPR and privacy regulations for anonymity and data access controls.
Windows ML and onnxruntime C APIs allow Trace Logging to be turned on/off (see [API pages](../README.md#api-documentation) for details).
For information on how to enable and disable telemetry, see [C API: Telemetry](./C_API.md#telemetry).
There are equivalent APIs in the C#, Python, and Java language bindings as well.

View File

@ -0,0 +1,49 @@
<p align="center"><img width="50%" src="docs/images/ONNX_Runtime_logo_dark.png" /></p>
**ONNX Runtime is a cross-platform inference and training machine-learning accelerator**.
**ONNX Runtime inference** can enable faster customer experiences and lower costs, supporting models from deep learning frameworks such as PyTorch and TensorFlow/Keras as well as classical machine learning libraries such as scikit-learn, LightGBM, XGBoost, etc. ONNX Runtime is compatible with different hardware, drivers, and operating systems, and provides optimal performance by leveraging hardware accelerators where applicable alongside graph optimizations and transforms. [Learn more &rarr;](https://www.onnxruntime.ai/docs/#onnx-runtime-for-inferencing)
**ONNX Runtime training** can accelerate the model training time on multi-node NVIDIA GPUs for transformer models with a one-line addition for existing PyTorch training scripts. [Learn more &rarr;](https://www.onnxruntime.ai/docs/#onnx-runtime-for-training)
## Get Started & Resources
* **General Information**: [onnxruntime.ai](https://onnxruntime.ai)
* **Usage documentation and tutorials**: [onnxruntime.ai/docs](https://onnxruntime.ai/docs)
* **YouTube video tutorials**: [youtube.com/@ONNXRuntime](https://www.youtube.com/@ONNXRuntime)
* [**Upcoming Release Roadmap**](https://onnxruntime.ai/roadmap)
* **Companion sample repositories**:
- ONNX Runtime Inferencing: [microsoft/onnxruntime-inference-examples](https://github.com/microsoft/onnxruntime-inference-examples)
- ONNX Runtime Training: [microsoft/onnxruntime-training-examples](https://github.com/microsoft/onnxruntime-training-examples)
## Releases
The current release and past releases can be found here: https://github.com/microsoft/onnxruntime/releases.
For details on the upcoming release, including release dates, announcements, features, and guidance on submitting feature requests, please visit the release roadmap: https://onnxruntime.ai/roadmap.
## Data/Telemetry
Windows distributions of this project may collect usage data and send it to Microsoft to help improve our products and services. See the [privacy statement](docs/Privacy.md) for more details.
## Contributions and Feedback
We welcome contributions! Please see the [contribution guidelines](CONTRIBUTING.md).
For feature requests or bug reports, please file a [GitHub Issue](https://github.com/Microsoft/onnxruntime/issues).
For general discussion or questions, please use [GitHub Discussions](https://github.com/microsoft/onnxruntime/discussions).
## Code of Conduct
This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments.
## License
This project is licensed under the [MIT License](LICENSE).

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
1.23.2

View File

@ -0,0 +1,10 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
// CustomOpContext defines an interface allowing a custom op to access ep-specific resources.
struct CustomOpContext {
CustomOpContext() = default;
virtual ~CustomOpContext() {};
};

View File

@ -0,0 +1,14 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
enum ResourceOffset {
cpu_resource_offset = 0,
cuda_resource_offset = 10000,
dml_resource_offset = 20000,
rocm_resource_offset = 30000,
// offsets for other ort eps
custom_ep_resource_offset = 10000000,
// offsets for customized eps
};

View File

@ -0,0 +1,19 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "onnxruntime_c_api.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* \param use_arena zero: false. non-zero: true.
*/
ORT_EXPORT
ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_CPU, _In_ OrtSessionOptions* options, int use_arena)
ORT_ALL_ARGS_NONNULL;
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,988 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
// Do not include this file directly. Please include "onnxruntime_c_api.h" instead.
#ifdef __cplusplus
extern "C" {
#endif
ORT_RUNTIME_CLASS(Ep);
ORT_RUNTIME_CLASS(EpFactory);
ORT_RUNTIME_CLASS(EpGraphSupportInfo);
ORT_RUNTIME_CLASS(MemoryDevice); // opaque class to wrap onnxruntime::OrtDevice
ORT_RUNTIME_CLASS(NodeComputeContext);
ORT_RUNTIME_CLASS(DataTransferImpl);
ORT_RUNTIME_CLASS(SyncNotificationImpl);
ORT_RUNTIME_CLASS(SyncStreamImpl);
// struct that an EP implements for IDataTransfer to copy between devices it uses and CPU
struct OrtDataTransferImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtDataTransferImpl instance.
*
* This is called by ORT when the OrtDataTransferImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtDataTransferImpl* this_ptr);
/** \brief Check if the implementation can copy between the source and destination memory devices.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
* \param[in] src_memory_device Source OrtMemoryDevice to copy from.
* \param[in] dst_memory_device Destination OrtMemoryDevice to copy to.
* \return True if the implementation can copy between the devices.
*
* \since Version 1.23.
*/
ORT_API_T(bool, CanCopy, _In_ const OrtDataTransferImpl* this_ptr,
_In_ const OrtMemoryDevice* src_memory_device, _In_ const OrtMemoryDevice* dst_memory_device);
/** \brief Copy tensors from src_tensors to dst_tensors using the provided streams.
*
* The implementation can use the provided streams to perform asynchronous copies if supported.
* If a stream is not available, the copy is performed synchronously.
*
* \param[in] this_ptr Pointer to the OrtDataTransferImpl instance.
* \param[in] src_tensors Array of source OrtValue pointers to copy from.
* \param[in] dst_tensors Array of destination OrtValue pointers to copy to.
* \param[in] streams Array of OrtSyncStream pointers for the copy operations, if the execution provider is stream
* aware. nullptr if it is not.
* \param[in] num_tensors Number of tensors to copy.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CopyTensors, _In_ OrtDataTransferImpl* this_ptr,
_In_reads_(num_tensors) const OrtValue** src_tensors,
_In_reads_(num_tensors) OrtValue** dst_tensors,
_In_reads_(num_tensors) OrtSyncStream** streams,
_In_ size_t num_tensors);
};
/** \brief Struct that an EP implements for Stream Notifications.
*
* \since Version 1.23.
*/
struct OrtSyncNotificationImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtSyncNotificationImpl instance.
*
* This is called by ORT when the OrtSyncNotificationImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtSyncNotificationImpl* this_ptr);
/** \brief Called by ORT to activate the notification.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Activate, _In_ OrtSyncNotificationImpl* this_ptr);
/** \brief Wait for a device to device operation to complete.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
* \param[in] stream The OrtSyncStream instance that will wait on this notification to be activated.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(WaitOnDevice, _In_ OrtSyncNotificationImpl* this_ptr, _In_ OrtSyncStream* consumer_stream);
/** \brief Wait for a device to host operation to complete.
*
* \param[in] this_ptr Pointer to the OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(WaitOnHost, _In_ OrtSyncNotificationImpl* this_ptr);
};
/** \brief Struct that an EP implements if it wishes to implement Stream support.
*
* This struct provides the overrides for onnxruntime::Stream's virtual methods.
*
* \since Version 1.23.
*/
struct OrtSyncStreamImpl {
uint32_t ort_version_supported; ///< Must be initialized to ORT_API_VERSION
/** \brief Release the OrtSyncStreamImpl instance.
*
* This is called by ORT when the OrtSyncStreamImpl instance is no longer needed.
* The implementation should release any resources held by the instance.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API_T(void, Release, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Get the handle of the stream.
*
* This returns the native handle for the stream. e.g. cudaStream_t for CUDA streams.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
* \return The handle of the stream.
*
* \since Version 1.23.
*/
ORT_API_T(void*, GetHandle, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Create an OrtSyncNotificationImpl for the OrtSyncStreamImpl instance.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance
* \param[out] notification The new OrtSyncNotificationImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateNotification, _In_ OrtSyncStreamImpl* this_ptr,
_Outptr_ OrtSyncNotificationImpl** notification);
/** \brief Flush the stream.
*
* This is called by ORT to flush the stream, ensuring that all operations submitted to the stream are completed.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Flush, _In_ OrtSyncStreamImpl* this_ptr);
/** \brief Notify the stream that a session run has ended.
*
* This is called by ORT to notify the stream that a session run has ended, allowing the stream to perform any
* necessary cleanup or finalization.
*
* \param[in] this_ptr Pointer to the OrtSyncStreamImpl instance.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(OnSessionRunEnd, _In_ OrtSyncStreamImpl* this_ptr);
};
struct OrtNodeFusionOptions;
typedef struct OrtNodeFusionOptions OrtNodeFusionOptions;
struct OrtNodeComputeInfo;
typedef struct OrtNodeComputeInfo OrtNodeComputeInfo;
/**
* \brief The OrtNodeFusionOptions struct specifies options for fusing nodes supported by an execution provider.
*
* Refer to OrtEpApi::EpGraphSupportInfo_AddNodesToFuse.
*
* \since Version 1.23.
*/
struct OrtNodeFusionOptions {
/** \brief The ONNX Runtime version the OrtNodeFusionOptions was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not use members that were not available when the EP library was compiled.
*
* \since Version 1.23.
*/
uint32_t ort_version_supported;
/** \brief If set to true, specify that the execution provider does not require ONNX Runtime to provide constant
* initializers as inputs to the fused node during model inference. This is used when the execution
* provider saves a copy of constant initializers, and allows ONNX Runtime to release constant initializers that
* are not used by any execution provider.
*
* If not specified, defaults to false. That is, ONNX Runtime provides constant initializers as inputs to
* the fused node by default.
*
* \since Version 1.23.
*/
bool drop_constant_initializers;
// const OrtNode* fused_node_schema;
};
/**
* \brief The OrtNodeComputeInfo struct provides functions that an OrtEp implements to specify the compute
* function for a compiled OrtGraph instance.
* \since Version 1.23.
*/
struct OrtNodeComputeInfo {
/** \brief The ONNX Runtime version the OrtNodeComputeInfo was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not call functions that were not available when the EP library was compiled.
*
* \since Version 1.23.
*/
uint32_t ort_version_supported;
/** \brief Creates an opaque compute state object that is then passed to the Compute() function during inference.
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[in] compute_context OrtNodeComputeContext instance that contains compiled/fused node's name and host
* memory allocation functions. Can optionally be used to build the compute state.
* \param[out] compute_state Output parameter that is assigned the opaque computation state. ONNX Runtime calls
* ReleaseState() (after calling Compute()) to allow the implementer to release the
* compute state.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
OrtStatus*(ORT_API_CALL* CreateState)(_In_ OrtNodeComputeInfo* this_ptr,
_In_ OrtNodeComputeContext* compute_context,
_Outptr_ void** compute_state);
/** \brief Computation function called to execute the fused node compiled by an OrtEp instance.
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[in] compute_state The opaque computation state returned by CreateState().
* \param[in] kernel_context The OrtKernelContext instance used to access inputs/outputs.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
OrtStatus*(ORT_API_CALL* Compute)(_In_ OrtNodeComputeInfo* this_ptr, _In_ void* compute_state,
_In_ OrtKernelContext* kernel_context);
/** \brief Releases the compute state returned by CreateState().
* \param[in] this_ptr The OrtNodeComputeInfo instance.
* \param[inout] compute_state The opaque compute state returned by CreateState().
*
* \since Version 1.23.
*/
void(ORT_API_CALL* ReleaseState)(_In_ OrtNodeComputeInfo* this_ptr, _Frees_ptr_opt_ void* compute_state);
};
struct OrtEpApi {
/** \brief Create an OrtEpDevice for the EP and an OrtHardwareDevice.
* \param[in] ep_factory Execution provider factory that is creating the instance.
* \param[in] hardware_device Hardware device that the EP can utilize.
* \param[in] ep_metadata Optional OrtKeyValuePairs instance for execution provider metadata that may be used
* during execution provider selection and passed to CreateEp.
* ep_device will copy this instance and the user should call ReleaseKeyValuePairs.
* \param[in] ep_options Optional OrtKeyValuePairs instance for execution provider options that will be added
* to the Session configuration options if the execution provider is selected.
* ep_device will copy this instance and the user should call ReleaseKeyValuePairs.
* \param ep_device OrtExecutionDevice that is created.
*
* \since Version 1.22.
*/
ORT_API2_STATUS(CreateEpDevice, _In_ OrtEpFactory* ep_factory,
_In_ const OrtHardwareDevice* hardware_device,
_In_opt_ const OrtKeyValuePairs* ep_metadata,
_In_opt_ const OrtKeyValuePairs* ep_options,
_Out_ OrtEpDevice** ep_device);
ORT_CLASS_RELEASE(EpDevice);
/** \brief Specify nodes that are supported by an OrtEp and should be fused into one node.
*
* Because the nodes will be fused into one "fused node", there must not exist an unsupported node in
* a path between two of the provided nodes. Otherwise, the graph will become invalid.
*
* This function can be called multiple times. A subsequent call to this function will force the next set of
* nodes to be fused into a different node.
*
* \param[in] graph_support_info OrtEpGraphSupportInfo instance to which to add the supported nodes.
* \param[in] nodes Array of nodes supported by the EP that should be fused/compiled.
* \param[in] num_nodes The number of supported nodes.
* \param[in] node_fusion_options Optional node fusion options. Ignored if set to NULL.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpGraphSupportInfo_AddNodesToFuse, _In_ OrtEpGraphSupportInfo* graph_support_info,
_In_reads_(num_nodes) const OrtNode* const* nodes, _In_ size_t num_nodes,
_In_opt_ const OrtNodeFusionOptions* node_fusion_options);
/** \brief Specify a node that is supported by an OrtEp and should be run with a registered EP kernel.
*
* \param[in] graph_support_info OrtEpGraphSupportInfo instance to which to add the supported node.
* \param[in] node The supported OrtNode instance.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpGraphSupportInfo_AddSingleNode, _In_ OrtEpGraphSupportInfo* graph_support_info,
_In_ const OrtNode* node);
/** \brief Query a OrtNodeComputeContext for the name of the node that encapsulates the compiled/fused node.
*
* Used in OrtNodeComputeInfo::CreateComputeState().
*
* \param[in] context The OrtNodeComputeContext instance to query.
* \return The node's name.
*
* \note Returned string is owned by ORT and valid only while OrtNodeComputeInfo::CreateComputeState() is called.
*
* \since Version 1.23.
*/
ORT_API_T(const char*, NodeComputeContext_NodeName, _In_ const OrtNodeComputeContext* context);
/** \brief Register an allocator with the OrtEpDevice.
*
* This allows an EP to provide OrtMemoryInfo for DEFAULT and HOST_ACCESSIBLE memory type as needed.
* The registered values will be used in calls to OrtEpFactory::CreateAllocator to ensure the required allocator/s
* are available for EP usage.
*
* Multiple calls for the same entry type will replace a previous entry.
*
* Available entries:
* - OrtDeviceAllocator with type of OrtDeviceMemoryType_DEFAULT
* - OrtDeviceAllocator with type of OrtDeviceMemoryType_HOST_ACCESSIBLE
* - OrtReadOnlyAllocator with type of OrtDeviceMemoryType_DEFAULT
* - if provided this allocator will only be used to copy initializers to the device the EP uses.
* ORT will use the OrtDeviceAllocator if not provided.
*
* \param[in] ep_device The OrtEpDevice instance to register the OrtMemoryInfo with.
* \param[in] allocator_memory_info The OrtMemoryInfo information for the allocator.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(EpDevice_AddAllocatorInfo, _In_ OrtEpDevice* ep_device,
_In_ const OrtMemoryInfo* allocator_memory_info);
/** \brief Get the OrtMemoryDevice from an OrtMemoryInfo instance.
*
* This is required for OrtDataTransferImpl (which implements onnxruntime::IDataTransfer) where the OrtMemoryDevice
* is used in the CanCopy and CopyTensors functions.
*
* \param[in] memory_info The OrtMemoryInfo instance to get the memory device from.
* \return The OrtMemoryDevice associated with the OrtMemoryInfo instance.
*
* \since Version 1.23.
*/
ORT_API_T(const OrtMemoryDevice*, MemoryInfo_GetMemoryDevice, _In_ const OrtMemoryInfo* memory_info);
/** \brief Get the OrtMemoryDevice from an OrtValue instance if it contains a Tensor.
*
* \param[in] value The OrtValue instance to get the memory device from.
* \return Memory device if OrtValue contains a Tensor, nullptr otherwise.
*
* \since Version 1.23.
*/
ORT_API_T(const OrtMemoryDevice*, Value_GetMemoryDevice, _In_ const OrtValue* value);
/** \brief Compare two OrtMemoryDevice instances for equality.
*
* This is used to check if two memory devices are the same.
* Used to implement DataTransferImpl::CanCopy.
*
* \param[in] a The first OrtMemoryDevice instance to compare.
* \param[in] b The second OrtMemoryDevice instance to compare.
* \return True if the two OrtMemoryDevice instances are equal, false otherwise.
*
* \since Version 1.23.
*/
ORT_API_T(bool, MemoryDevice_AreEqual, _In_ const OrtMemoryDevice* a, _In_ const OrtMemoryDevice* b);
/** \brief Get the OrtMemoryInfoDeviceType value from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The OrtMemoryInfoDeviceType value.
*
* \since Version 1.23.
*/
ORT_API_T(OrtMemoryInfoDeviceType, MemoryDevice_GetDeviceType, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the OrtDeviceMemoryType value from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The OrtDeviceMemoryType value.
*
* \since Version 1.23.
*/
ORT_API_T(OrtDeviceMemoryType, MemoryDevice_GetMemoryType, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the vendor ID from an OrtMemoryDevice instance.
*
* The vendor ID is used to identify the vendor of the device, and is typically set to the PCI vendor ID.
*
* If the device is not vendor specific (e.g. CPU memory) the vendor ID is set to 0.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The vendor ID value.
*
* \since Version 1.23.
*/
ORT_API_T(uint32_t, MemoryDevice_GetVendorId, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the device ID from an OrtMemoryDevice instance.
*
* \param[in] memory_device OrtMemoryDevice instance.
* \return The device ID.
*
* \since Version 1.23.
*/
ORT_API_T(uint32_t, MemoryDevice_GetDeviceId, _In_ const OrtMemoryDevice* memory_device);
/** \brief Get the OrtSyncStreamImpl associated with an OrtSyncStream instance.
*
* This allows an the plugin library to connect its OrtSyncStreamImpl instance with an OrtSyncStream if needed.
*
* \param[in] stream The OrtSyncStream instance to find an OrtSyncStreamImpl for.
* \return The associated OrtSyncStreamImpl if found. nullptr otherwise.
*
* \since Version 1.23.
*
* \remarks There should always be an OrtSyncStreamImpl associated with an OrtSyncStream instance that the EP gets.
*/
ORT_API_T(const OrtSyncStreamImpl*, SyncStream_GetImpl, _In_ const OrtSyncStream* stream);
/** \brief Get the current sync ID for a stream.
*
* \param[in] stream The OrtSyncStream to get the sync ID for.
* \return Current sync ID.
*
* \since Version 1.23.
*/
ORT_API_T(uint64_t, SyncStream_GetSyncId, _In_ const OrtSyncStream* stream);
/** \brief Get the sync ID for the last time the consumer_stream waited on the producer_stream.
*
* When two streams are synchronized, the sync id represents the event used in that synchronization.
*
* \param[in] producer_stream The OrtSyncStream that produced the data.
* \param[in] consumer_stream The OrtSyncStream that waited on the producer_stream.
* \return ID for last sync. 0 if no sync has occurred between the two streams.
*
* \since Version 1.23.
*/
ORT_API_T(uint64_t, GetSyncIdForLastWaitOnSyncStream,
_In_ const OrtSyncStream* producer_stream, _In_ const OrtSyncStream* consumer_stream);
};
/**
* \brief The data layout type.
*
* EPs may specify a preferred data layout type. ORT's default layout type is OrtEpDataLayout_NCHW, or
* OrtEpDataLayout_Default.
*
* \since Version 1.23.
*/
typedef enum OrtEpDataLayout {
OrtEpDataLayout_NCHW = 0,
OrtEpDataLayout_NHWC,
OrtEpDataLayout_Default = OrtEpDataLayout_NCHW,
} OrtEpDataLayout;
/**
* \brief The OrtEp struct provides functions to implement for an execution provider.
* \since Version 1.22.
*/
struct OrtEp {
/** \brief The ONNX Runtime version the execution provider was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not call functions that were not available when the library was compiled.
*
* \since Version 1.22.
*/
uint32_t ort_version_supported;
/** \brief Get the execution provider name.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEp instance.
* \return The execution provider name.
*
* \since Version 1.22.
*/
ORT_API_T(const char*, GetName, _In_ const OrtEp* this_ptr);
/** \brief Get information about the nodes supported by the OrtEp instance.
*
* IMPORTANT: This is not the final version of this API function. This is currently experimental but will
* be stabilized by the ONNX Runtime 1.23 release.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] graph The OrtGraph instance for which to populate node support. The OrtGraph could be a nested subgraph
* contained by a node (e.g., an If or Loop node). ONNX Runtime calls this function separately
* for each nested subgraph.
* \param[inout] graph_support_info OrtEpGraphSupportInfo instance that the implementer must fill out in order to
* specify the supported nodes.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(GetCapability, _In_ OrtEp* this_ptr, _In_ const OrtGraph* graph,
_Inout_ OrtEpGraphSupportInfo* graph_support_info);
/** \brief Compile OrtGraph instances assigned to the OrtEp. Implementer must set a OrtNodeComputeInfo instance
* for each OrtGraph in order to define its computation function.
*
* If the session is configured to generate a pre-compiled model, the execution provider must return EPContext nodes,
* as OrtNode instances, that ONNX Runtime uses to create a pre-compiled model, known as an "EPContext model".
* An EPContext model contains EPContext nodes. Each EPContext node encapsulates the pre-compiled binary data for a
* OrtGraph compiled for a specific execution provider. For more details about the EPContext design, refer to:
* \htmlonly
* <a href="https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html">EPContext design document.</a>
* \endhtmlonly
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] graphs Array of `count` OrtGraph instances to compile. Each graph contains only the nodes for
* which the execution provider indicated support. Nested subgraphs contained by a
* node, such as an If or Loop, have separate OrtGraph instances.
* \param[in] fused_nodes Array of `count` fused nodes that will replace the compiled graphs.
* Each fused node is an OrtNode initialized with the intended fused node name and
* input/output information.
* \param[in] count The number of OrtGraph instances to compile.
* \param[out] node_compute_infos Array of `count` OrtNodeComputeInfo instances that define each OrtGraph instance's
* computation function. The implementer allocates the OrtNodeComputeInfo instances.
* ORT calls ReleaseNodeComputeInfos() to release multiple instances in a batch.
* \param[out] ep_context_nodes Output array of `count` OrtNode instances, each representing an EPContext
* node for a compiled OrtGraph. The execution provider must use
* OrtModelEditorApi::CreateNode to create the OrtNode instances. ONNX Runtime takes
* ownership of the OrtNode instances, so the execution provider must NOT call
* OrtApi::ReleaseNode. Should be ignored if the session is not configured to generate an
* EPContext model.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \note Do NOT cache the provided OrtGraph instances in any of the OrtNodeComputeInfo functions because the
* graphs are only valid for the duration of the call to Compile. Any graph/node/input/output
* names that are needed by the OrtNodeComputeInfo functions must be copied and stored by the OrtEp.
*
* \since Version 1.23.
*/
ORT_API2_STATUS(Compile, _In_ OrtEp* this_ptr, _In_ const OrtGraph** graphs,
_In_ const OrtNode** fused_nodes, _In_ size_t count,
_Out_writes_all_(count) OrtNodeComputeInfo** node_compute_infos,
_Out_writes_(count) OrtNode** ep_context_nodes);
/** \brief Release OrtNodeComputeInfo instances.
*
* \param[in] this_ptr The OrtEp instance.
* \param[inout] node_compute_infos The OrtNodeComputeInfo instances to release.
* \param[in] num_node_compute_infos The number of OrtNodeComputeInfo instances.
*
* \since Version 1.23.
*/
ORT_API_T(void, ReleaseNodeComputeInfos, _In_ OrtEp* this_ptr,
OrtNodeComputeInfo** node_compute_infos,
_In_ size_t num_node_compute_infos);
/** \brief Get the EP's preferred data layout.
*
* \note Implementation of this function is optional.
* If not implemented, ORT will assume that this EP prefers the data layout `OrtEpDataLayout::NCHW`.
*
* \param[in] this_ptr The OrtEp instance.
* \param[out] preferred_data_layout The EP's preferred data layout.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(GetPreferredDataLayout, _In_ OrtEp* this_ptr, _Out_ OrtEpDataLayout* preferred_data_layout);
/** \brief Given an op with domain `domain` and type `op_type`, determine whether an associated node's data layout
* should be converted to `target_data_layout`.
* If the EP prefers a non-default data layout (see `GetPreferredDataLayout()`), this function will be called
* during layout transformation with `target_data_layout` set to the EP's preferred data layout.
*
* \note Implementation of this function is optional.
* If an EP prefers a non-default data layout, it may implement this to customize the specific op data layout
* preferences at a finer granularity.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] domain The op domain. An empty string means the ONNX domain.
* \param[in] op_type The op type.
* \param[in] target_data_layout The target data layout.
* \param[out] should_convert Whether the associated node's data layout should be converted to `target_data_layout`.
* If greater than 0, convert.
* If 0, don't convert.
* Otherwise, if less than 0, leave the decision to ORT.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(ShouldConvertDataLayoutForOp, _In_ OrtEp* this_ptr,
_In_z_ const char* domain, _In_z_ const char* op_type,
_In_ OrtEpDataLayout target_data_layout,
_Outptr_ int* should_convert);
/** \brief Set dynamic options on this EP.
*
* Dynamic options can be set by the user at any time after session creation with `OrtApi::SetEpDynamicOptions()`.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] option_keys The dynamic option keys.
* \param[in] option_values The dynamic option values.
* \param[in] num_options The number of dynamic options.
*
* \note Implementation of this function is optional.
* An EP should only implement this if it needs to handle any dynamic options.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(SetDynamicOptions, _In_ OrtEp* this_ptr,
_In_reads_(num_options) const char* const* option_keys,
_In_reads_(num_options) const char* const* option_values,
_In_ size_t num_options);
/** \brief Called by ORT to notify the EP of the start of a run.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] run_options The run options for this run.
*
* \note Implementation of this function is optional.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(OnRunStart, _In_ OrtEp* this_ptr, _In_ const OrtRunOptions* run_options);
/** \brief Called by ORT to notify the EP of the end of a run.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] run_options The run options for this run.
* \param[in] sync_stream Whether any associated stream should be synchronized during this call.
* Only applicable if there is such a stream.
*
* \note Implementation of this function is optional.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(OnRunEnd, _In_ OrtEp* this_ptr, _In_ const OrtRunOptions* run_options, _In_ bool sync_stream);
/** \brief Create an OrtAllocator for the given OrtMemoryInfo for an OrtSession.
*
* The OrtMemoryInfo instance will match one of the values set in the OrtEpDevice using EpDevice_AddAllocatorInfo.
* Any allocator specific options should be read from the session options.
*
* If nullptr OrtEpFactory::CreateAllocator will be used.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] memory_info The OrtMemoryInfo to create the allocator for. May be nullptr.
* \param[out] allocator The created OrtAllocator instance. Set to nullptr if the default CPU allocator is used.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateAllocator, _In_ OrtEp* this_ptr,
_In_ const OrtMemoryInfo* memory_info,
_Outptr_result_maybenull_ OrtAllocator** allocator);
/** \brief Create a synchronization stream for the given memory device for an OrtSession.
*
* This is used to create a synchronization stream for the execution provider and is used to synchronize
* operations on the device during model execution.
* Any stream specific options should be read from the session options.
*
* If nullptr OrtEpFactory::CreateSyncStreamForDevice will be used.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] memory_device The OrtMemoryDevice to create the synchronization stream for.
* \param[out] stream The created OrtSyncStreamImpl instance. nullptr if the execution provider is not stream aware.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateSyncStreamForDevice, _In_ OrtEp* this_ptr,
_In_ const OrtMemoryDevice* memory_device,
_Outptr_ OrtSyncStreamImpl** stream);
/** \brief Get a string with details about the EP stack used to produce a compiled model.
*
* This function gets a compatibility information string that contains details about the execution provider
* used to compile a given model. This string can later be used with ValidateCompiledModelCompatibilityInfo
* to determine if a compiled model is compatible with the EP.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEp instance.
* \param[in] graph The OrtGraph instance for which to generate compatibility information.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API_T(const char*, GetCompiledModelCompatibilityInfo, _In_ OrtEp* this_ptr,
_In_ const OrtGraph* graph);
};
/** \brief The function signature that ORT will call to create OrtEpFactory instances.
*
* This must be available in a function called 'CreateEpFactories' in the execution provider library.
*
* \param[in] registered_name The name the execution library is registered with by RegisterExecutionProviderLibrary
* \param[in] ort_api_base The OrtApiBase instance that is used by the factory to get the OrtApi instance for the
* version of ORT that the library was compiled against.
* \param[in] default_logger The default ORT logger that can be used for logging outside of an inference session.
* \param[in,out] factories The implementation should create and add OrtEpFactory instances to this
* pre-allocated array.
* i.e. usage is `factories[0] = new MyEpFactory();`
* \param[in] max_factories The maximum number of OrtEpFactory instances that can be added to `factories`.
* Current default is to allow 4 factories. This can be increased in the future if needed.
* \param[out] num_factories The number of OrtEpFactory instances created by the factory and added to `factories`.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.22.
*/
typedef OrtStatus* (*CreateEpApiFactoriesFn)(_In_ const char* registered_name, _In_ const OrtApiBase* ort_api_base,
_In_ const OrtLogger* default_logger,
_Inout_ OrtEpFactory** factories, _In_ size_t max_factories,
_Out_ size_t* num_factories);
/** \brief The function signature that ORT will call to release an OrtEpFactory instance.
*
* This must be available in a function called 'ReleaseEpFactory' in the execution provider library.
*
* \param[in] factory The OrtEpFactory instance to release.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.22.
*/
typedef OrtStatus* (*ReleaseEpApiFactoryFn)(_In_ OrtEpFactory* factory);
/**
* \brief The OrtEpFactory provides functions to create and manage execution providers.
* \since Version 1.22.
*/
struct OrtEpFactory {
/** \brief The ONNX Runtime version the execution provider was compiled with.
*
* Implementation should set to ORT_API_VERSION.
* ORT will use this to ensure it does not call functions that were not available when the library was compiled.
*
* \since Version 1.22.
*/
uint32_t ort_version_supported;
/** \brief Get the name of the execution provider that the factory creates.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return The name of the execution provider the factory creates.
*
* \since Version 1.22.
*/
ORT_API_T(const char*, GetName, const OrtEpFactory* this_ptr);
/** \brief Get the name of vendor who owns the execution provider that the factory creates.
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return vendor The vendor name of the execution provider the factory creates.
*
* \since Version 1.22.
*/
ORT_API_T(const char*, GetVendor, const OrtEpFactory* this_ptr); // return EP vendor
/** \brief Get information from the execution provider about OrtHardwareDevice support.
*
* \param[in] this_ptr The OrtEpFactory instance.
* Non-const as the factory is passed through to the CreateEp call via the OrtEpDevice.
* \param[in] devices The OrtHardwareDevice instances that are available.
* \param[in] num_devices The number of OrtHardwareDevice instances.
* \param[out] ep_devices OrtEpDevice instances for each OrtHardwareDevice that the EP can use.
* The implementation should call OrtEpApi::CreateEpDevice to create, and add the OrtEpDevice
* instances to this pre-allocated array. ORT will take ownership of the values returned.
* i.e. usage is `ep_devices[0] = <ptr to OrtEpDevice created with OrtEpApi::CreateEpDevice>;`
* \param[in] max_ep_devices The maximum number of OrtEpDevices that can be added to ep_devices.
* Current default is 8. This can be increased if needed.
* \param[out] num_ep_devices The number of EP devices added to ep_devices.
* \return true if the factory can create an execution provider that uses `device`.
*
* \since Version 1.22.
*/
ORT_API2_STATUS(GetSupportedDevices, _In_ OrtEpFactory* this_ptr,
_In_reads_(num_devices) const OrtHardwareDevice* const* devices,
_In_ size_t num_devices,
_Inout_ OrtEpDevice** ep_devices,
_In_ size_t max_ep_devices,
_Out_ size_t* num_ep_devices);
/** \brief Function to create an OrtEp instance for use in a Session.
*
* ORT will call ReleaseEp to release the instance when it is no longer needed.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] devices The OrtHardwareDevice instances that the execution provider was selected to use.
* May be a subset of the OrtHardwareDevice instances that the execution provider's factory
* set as supported in the call to OrtEpFactory::GetSupportedDevices.
* \param[in] ep_metadata_pairs Execution provider metadata that was provided to OrtEpApi::CreateEpDevice, for each
* device.
* \param[in] num_devices The number of devices the execution provider was selected for.
* \param[in] session_options The OrtSessionOptions instance that contains the configuration options for the
* session. This will include ep_options from GetSupportedDevices as well as any
* user provided overrides.
* Execution provider options will have been added with a prefix of 'ep.[ep name].'.
* The OrtSessionOptions instance will NOT be valid after this call and should not be
* stored for later use.
* \param[in] logger The OrtLogger instance for the session that the execution provider should use for logging.
* \param[out] ep The OrtEp instance created by the factory.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.22.
*/
ORT_API2_STATUS(CreateEp, _In_ OrtEpFactory* this_ptr,
_In_reads_(num_devices) const OrtHardwareDevice* const* devices,
_In_reads_(num_devices) const OrtKeyValuePairs* const* ep_metadata_pairs,
_In_ size_t num_devices,
_In_ const OrtSessionOptions* session_options,
_In_ const OrtLogger* logger, _Outptr_ OrtEp** ep);
/** \brief Release the OrtEp instance.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] ep The OrtEp instance to release.
*
* \since Version 1.22.
*/
ORT_API_T(void, ReleaseEp, OrtEpFactory* this_ptr, struct OrtEp* ep);
/** \brief Get the vendor id who owns the execution provider that the factory creates.
*
* This is typically the PCI vendor ID. See https://pcisig.com/membership/member-companies
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return vendor_id The vendor ID of the execution provider the factory creates.
*
* \since Version 1.23.
*/
ORT_API_T(uint32_t, GetVendorId, const OrtEpFactory* this_ptr);
/** \brief Get the version of the execution provider that the factory creates.
*
* The version string should adhere to the Semantic Versioning 2.0 specification
* (https://github.com/semver/semver/blob/v2.0.0/semver.md).
*
* The returned string should be a null-terminated, UTF-8 encoded string. ORT will copy it.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return The execution provider version string.
*
* \since Version 1.23.
*/
ORT_API_T(const char*, GetVersion, _In_ const OrtEpFactory* this_ptr);
/** \brief Validate the compatibility of a compiled model with the execution provider factory for one or more devices.
*
* Given a compatibility info string produced during model compilation, the EP factory should determine whether the
* compiled model is compatible with the EP factory when targeting the provided hardware devices. All devices provided
* must belong to the same execution provider instance that this factory creates.
*
* The EP factory implementation should consider the set of devices (e.g., multi-adapter or multi-GPU scenarios) when
* evaluating compatibility and set `model_compatibility` accordingly.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] devices Array of OrtHardwareDevice pointers that the EP would run on. All must map to this EP.
* \param[in] num_devices Number of entries in `devices`.
* \param[in] compatibility_info The compatibility information string produced when the model was compiled.
* \param[out] model_compatibility OrtCompiledModelCompatibility value describing the compatibility of the model with the EP.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(ValidateCompiledModelCompatibilityInfo, _In_ OrtEpFactory* this_ptr,
_In_reads_(num_devices) const OrtHardwareDevice* const* devices,
_In_ size_t num_devices,
_In_ const char* compatibility_info,
_Out_ OrtCompiledModelCompatibility* model_compatibility);
/** \brief Create an OrtAllocator that can be shared across sessions for the given OrtMemoryInfo.
*
* The factory that creates the EP is responsible for providing the allocators required by the EP.
* The OrtMemoryInfo instance will match one of the values set in the OrtEpDevice using EpDevice_AddAllocatorInfo.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] memory_info The OrtMemoryInfo to create the allocator for. May be nullptr.
* \param[in] allocator_options Optional key-value pairs for allocator options, can be nullptr.
* \param[out] allocator The created OrtAllocator instance. Set to nullptr if the default CPU allocator is used.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateAllocator, _In_ OrtEpFactory* this_ptr,
_In_ const OrtMemoryInfo* memory_info,
_In_opt_ const OrtKeyValuePairs* allocator_options,
_Outptr_result_maybenull_ OrtAllocator** allocator);
/** \brief Release an OrtAllocator created by the factory.
*
* \since Version 1.23.
*/
ORT_API_T(void, ReleaseAllocator, _In_ OrtEpFactory* this_ptr, _In_ OrtAllocator* allocator);
/** \brief Create an OrtDataTransferImpl instance for the factory.
*
* This is used to create an IDataTransfer implementation that can be used to copy data between devices
* that the execution provider supports.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[out] data_transfer The created OrtDataTransferImpl instance. Set to nullptr if not required.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateDataTransfer, _In_ OrtEpFactory* this_ptr,
_Outptr_result_maybenull_ OrtDataTransferImpl** data_transfer);
/** \brief Check if execution providers created by the factory are stream aware.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \return True if the factory creates execution providers that are stream aware and it implements CreateSyncStreamForDevice.
*
* \since Version 1.23.
*/
ORT_API_T(bool, IsStreamAware, _In_ const OrtEpFactory* this_ptr);
/** \brief Create a synchronization stream for the given memory device.
*
* This is used to create a synchronization stream for the memory device that can be used for operations outside of
* a session.
*
* \param[in] this_ptr The OrtEpFactory instance.
* \param[in] memory_device The OrtMemoryDevice to create the synchronization stream for.
* \param[in] stream_options Options for stream creation. May be nullptr.
* \param[out] stream The created OrtSyncStreamImpl instance. nullptr if the execution provider is not stream aware.
*
* \snippet{doc} snippets.dox OrtStatus Return Value
*
* \since Version 1.23.
*/
ORT_API2_STATUS(CreateSyncStreamForDevice, _In_ OrtEpFactory* this_ptr,
_In_ const OrtMemoryDevice* memory_device,
_In_opt_ const OrtKeyValuePairs* stream_options,
_Outptr_ OrtSyncStreamImpl** stream);
};
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
// This file contains well-known keys for OrtEpDevice EP metadata entries.
// It does NOT specify all available metadata keys.
// Key for the execution provider version string. This should be available for all plugin EPs.
static const char* const kOrtEpDevice_EpMetadataKey_Version = "version";
// Prefix for execution provider compatibility information stored in model metadata.
// Used when generating EP context models to store compatibility strings for each EP.
// Full key format: "ep_compatibility_info.<EP_TYPE>"
static const char* const kOrtModelMetadata_EpCompatibilityInfoPrefix = "ep_compatibility_info.";
// Key for the execution provider library path (for dynamically loaded EPs)
static const char* const kOrtEpDevice_EpMetadataKey_LibraryPath = "library_path";

View File

@ -0,0 +1,535 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <stdint.h>
#include <cmath>
#include <cstring>
#include <limits>
namespace onnxruntime_float16 {
namespace detail {
enum class endian {
#if defined(_WIN32)
little = 0,
big = 1,
native = little,
#elif defined(__GNUC__) || defined(__clang__)
little = __ORDER_LITTLE_ENDIAN__,
big = __ORDER_BIG_ENDIAN__,
native = __BYTE_ORDER__,
#else
#error onnxruntime_float16::detail::endian is not implemented in this environment.
#endif
};
static_assert(
endian::native == endian::little || endian::native == endian::big,
"Only little-endian or big-endian native byte orders are supported.");
} // namespace detail
/// <summary>
/// Shared implementation between public and internal classes. CRTP pattern.
/// </summary>
template <class Derived>
struct Float16Impl {
protected:
/// <summary>
/// Converts from float to uint16_t float16 representation
/// </summary>
/// <param name="v"></param>
/// <returns></returns>
constexpr static uint16_t ToUint16Impl(float v) noexcept;
/// <summary>
/// Converts float16 to float
/// </summary>
/// <returns>float representation of float16 value</returns>
float ToFloatImpl() const noexcept;
/// <summary>
/// Creates an instance that represents absolute value.
/// </summary>
/// <returns>Absolute value</returns>
uint16_t AbsImpl() const noexcept {
return static_cast<uint16_t>(val & ~kSignMask);
}
/// <summary>
/// Creates a new instance with the sign flipped.
/// </summary>
/// <returns>Flipped sign instance</returns>
uint16_t NegateImpl() const noexcept {
return IsNaN() ? val : static_cast<uint16_t>(val ^ kSignMask);
}
public:
// uint16_t special values
static constexpr uint16_t kSignMask = 0x8000U;
static constexpr uint16_t kBiasedExponentMask = 0x7C00U;
static constexpr uint16_t kPositiveInfinityBits = 0x7C00U;
static constexpr uint16_t kNegativeInfinityBits = 0xFC00U;
static constexpr uint16_t kPositiveQNaNBits = 0x7E00U;
static constexpr uint16_t kNegativeQNaNBits = 0xFE00U;
static constexpr uint16_t kMaxValueBits = 0x7BFFU; // Largest normal number
static constexpr uint16_t kOneBits = 0x3C00U;
static constexpr uint16_t kMinusOneBits = 0xBC00U;
uint16_t val{0};
Float16Impl() = default;
/// <summary>
/// Checks if the value is negative
/// </summary>
/// <returns>true if negative</returns>
bool IsNegative() const noexcept {
return static_cast<int16_t>(val) < 0;
}
/// <summary>
/// Tests if the value is NaN
/// </summary>
/// <returns>true if NaN</returns>
bool IsNaN() const noexcept {
return AbsImpl() > kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value is finite
/// </summary>
/// <returns>true if finite</returns>
bool IsFinite() const noexcept {
return AbsImpl() < kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value represents positive infinity.
/// </summary>
/// <returns>true if positive infinity</returns>
bool IsPositiveInfinity() const noexcept {
return val == kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value represents negative infinity
/// </summary>
/// <returns>true if negative infinity</returns>
bool IsNegativeInfinity() const noexcept {
return val == kNegativeInfinityBits;
}
/// <summary>
/// Tests if the value is either positive or negative infinity.
/// </summary>
/// <returns>True if absolute value is infinity</returns>
bool IsInfinity() const noexcept {
return AbsImpl() == kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value is NaN or zero. Useful for comparisons.
/// </summary>
/// <returns>True if NaN or zero.</returns>
bool IsNaNOrZero() const noexcept {
auto abs = AbsImpl();
return (abs == 0 || abs > kPositiveInfinityBits);
}
/// <summary>
/// Tests if the value is normal (not zero, subnormal, infinite, or NaN).
/// </summary>
/// <returns>True if so</returns>
bool IsNormal() const noexcept {
auto abs = AbsImpl();
return (abs < kPositiveInfinityBits) // is finite
&& (abs != 0) // is not zero
&& ((abs & kBiasedExponentMask) != 0); // is not subnormal (has a non-zero exponent)
}
/// <summary>
/// Tests if the value is subnormal (denormal).
/// </summary>
/// <returns>True if so</returns>
bool IsSubnormal() const noexcept {
auto abs = AbsImpl();
return (abs < kPositiveInfinityBits) // is finite
&& (abs != 0) // is not zero
&& ((abs & kBiasedExponentMask) == 0); // is subnormal (has a zero exponent)
}
/// <summary>
/// Creates an instance that represents absolute value.
/// </summary>
/// <returns>Absolute value</returns>
Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); }
/// <summary>
/// Creates a new instance with the sign flipped.
/// </summary>
/// <returns>Flipped sign instance</returns>
Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); }
/// <summary>
/// IEEE defines that positive and negative zero are equal, this gives us a quick equality check
/// for two values by or'ing the private bits together and stripping the sign. They are both zero,
/// and therefore equivalent, if the resulting value is still zero.
/// </summary>
/// <param name="lhs">first value</param>
/// <param name="rhs">second value</param>
/// <returns>True if both arguments represent zero</returns>
static bool AreZero(const Float16Impl& lhs, const Float16Impl& rhs) noexcept {
return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;
}
bool operator==(const Float16Impl& rhs) const noexcept {
if (IsNaN() || rhs.IsNaN()) {
// IEEE defines that NaN is not equal to anything, including itself.
return false;
}
return val == rhs.val;
}
bool operator!=(const Float16Impl& rhs) const noexcept { return !(*this == rhs); }
bool operator<(const Float16Impl& rhs) const noexcept {
if (IsNaN() || rhs.IsNaN()) {
// IEEE defines that NaN is unordered with respect to everything, including itself.
return false;
}
const bool left_is_negative = IsNegative();
if (left_is_negative != rhs.IsNegative()) {
// When the signs of left and right differ, we know that left is less than right if it is
// the negative value. The exception to this is if both values are zero, in which case IEEE
// says they should be equal, even if the signs differ.
return left_is_negative && !AreZero(*this, rhs);
}
return (val != rhs.val) && ((val < rhs.val) ^ left_is_negative);
}
};
// The following Float16_t conversions are based on the code from
// Eigen library.
// The conversion routines are Copyright (c) Fabian Giesen, 2016.
// The original license follows:
//
// Copyright (c) Fabian Giesen, 2016
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace detail {
union float32_bits {
unsigned int u;
float f;
};
} // namespace detail
template <class Derived>
inline constexpr uint16_t Float16Impl<Derived>::ToUint16Impl(float v) noexcept {
detail::float32_bits f{};
f.f = v;
constexpr detail::float32_bits f32infty = {255 << 23};
constexpr detail::float32_bits f16max = {(127 + 16) << 23};
constexpr detail::float32_bits denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
constexpr unsigned int sign_mask = 0x80000000u;
uint16_t val = static_cast<uint16_t>(0x0u);
unsigned int sign = f.u & sign_mask;
f.u ^= sign;
// NOTE all the integer compares in this function can be safely
// compiled into signed compares since all operands are below
// 0x80000000. Important if you want fast straight SSE2 code
// (since there's no unsigned PCMPGTD).
if (f.u >= f16max.u) { // result is Inf or NaN (all exponent bits set)
val = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
} else { // (De)normalized number or zero
if (f.u < (113 << 23)) { // resulting FP16 is subnormal or zero
// use a magic value to align our 10 mantissa bits at the bottom of
// the float. as long as FP addition is round-to-nearest-even this
// just works.
f.f += denorm_magic.f;
// and one integer subtract of the bias later, we have our final float!
val = static_cast<uint16_t>(f.u - denorm_magic.u);
} else {
unsigned int mant_odd = (f.u >> 13) & 1; // resulting mantissa is odd
// update exponent, rounding bias part 1
// Equivalent to `f.u += ((unsigned int)(15 - 127) << 23) + 0xfff`, but
// without arithmetic overflow.
f.u += 0xc8000fffU;
// rounding bias part 2
f.u += mant_odd;
// take the bits!
val = static_cast<uint16_t>(f.u >> 13);
}
}
val |= static_cast<uint16_t>(sign >> 16);
return val;
}
template <class Derived>
inline float Float16Impl<Derived>::ToFloatImpl() const noexcept {
constexpr detail::float32_bits magic = {113 << 23};
constexpr unsigned int shifted_exp = 0x7c00 << 13; // exponent mask after shift
detail::float32_bits o{};
o.u = (val & 0x7fff) << 13; // exponent/mantissa bits
unsigned int exp = shifted_exp & o.u; // just the exponent
o.u += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) { // Inf/NaN?
o.u += (128 - 16) << 23; // extra exp adjust
} else if (exp == 0) { // Zero/Denormal?
o.u += 1 << 23; // extra exp adjust
o.f -= magic.f; // re-normalize
}
// Attempt to workaround the Internal Compiler Error on ARM64
// for bitwise | operator, including std::bitset
#if (defined _MSC_VER) && (defined _M_ARM || defined _M_ARM64 || defined _M_ARM64EC)
if (IsNegative()) {
return -o.f;
}
#else
// original code:
o.u |= (val & 0x8000U) << 16U; // sign bit
#endif
return o.f;
}
/// Shared implementation between public and internal classes. CRTP pattern.
template <class Derived>
struct BFloat16Impl {
protected:
/// <summary>
/// Converts from float to uint16_t float16 representation
/// </summary>
/// <param name="v"></param>
/// <returns></returns>
static uint16_t ToUint16Impl(float v) noexcept;
/// <summary>
/// Converts bfloat16 to float
/// </summary>
/// <returns>float representation of bfloat16 value</returns>
float ToFloatImpl() const noexcept;
/// <summary>
/// Creates an instance that represents absolute value.
/// </summary>
/// <returns>Absolute value</returns>
uint16_t AbsImpl() const noexcept {
return static_cast<uint16_t>(val & ~kSignMask);
}
/// <summary>
/// Creates a new instance with the sign flipped.
/// </summary>
/// <returns>Flipped sign instance</returns>
uint16_t NegateImpl() const noexcept {
return IsNaN() ? val : static_cast<uint16_t>(val ^ kSignMask);
}
public:
// uint16_t special values
static constexpr uint16_t kSignMask = 0x8000U;
static constexpr uint16_t kBiasedExponentMask = 0x7F80U;
static constexpr uint16_t kPositiveInfinityBits = 0x7F80U;
static constexpr uint16_t kNegativeInfinityBits = 0xFF80U;
static constexpr uint16_t kPositiveQNaNBits = 0x7FC1U;
static constexpr uint16_t kNegativeQNaNBits = 0xFFC1U;
static constexpr uint16_t kMaxValueBits = 0x7F7FU;
static constexpr uint16_t kRoundToNearest = 0x7FFFU;
static constexpr uint16_t kOneBits = 0x3F80U;
static constexpr uint16_t kMinusOneBits = 0xBF80U;
uint16_t val{0};
BFloat16Impl() = default;
/// <summary>
/// Checks if the value is negative
/// </summary>
/// <returns>true if negative</returns>
bool IsNegative() const noexcept {
return static_cast<int16_t>(val) < 0;
}
/// <summary>
/// Tests if the value is NaN
/// </summary>
/// <returns>true if NaN</returns>
bool IsNaN() const noexcept {
return AbsImpl() > kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value is finite
/// </summary>
/// <returns>true if finite</returns>
bool IsFinite() const noexcept {
return AbsImpl() < kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value represents positive infinity.
/// </summary>
/// <returns>true if positive infinity</returns>
bool IsPositiveInfinity() const noexcept {
return val == kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value represents negative infinity
/// </summary>
/// <returns>true if negative infinity</returns>
bool IsNegativeInfinity() const noexcept {
return val == kNegativeInfinityBits;
}
/// <summary>
/// Tests if the value is either positive or negative infinity.
/// </summary>
/// <returns>True if absolute value is infinity</returns>
bool IsInfinity() const noexcept {
return AbsImpl() == kPositiveInfinityBits;
}
/// <summary>
/// Tests if the value is NaN or zero. Useful for comparisons.
/// </summary>
/// <returns>True if NaN or zero.</returns>
bool IsNaNOrZero() const noexcept {
auto abs = AbsImpl();
return (abs == 0 || abs > kPositiveInfinityBits);
}
/// <summary>
/// Tests if the value is normal (not zero, subnormal, infinite, or NaN).
/// </summary>
/// <returns>True if so</returns>
bool IsNormal() const noexcept {
auto abs = AbsImpl();
return (abs < kPositiveInfinityBits) // is finite
&& (abs != 0) // is not zero
&& ((abs & kBiasedExponentMask) != 0); // is not subnormal (has a non-zero exponent)
}
/// <summary>
/// Tests if the value is subnormal (denormal).
/// </summary>
/// <returns>True if so</returns>
bool IsSubnormal() const noexcept {
auto abs = AbsImpl();
return (abs < kPositiveInfinityBits) // is finite
&& (abs != 0) // is not zero
&& ((abs & kBiasedExponentMask) == 0); // is subnormal (has a zero exponent)
}
/// <summary>
/// Creates an instance that represents absolute value.
/// </summary>
/// <returns>Absolute value</returns>
Derived Abs() const noexcept { return Derived::FromBits(AbsImpl()); }
/// <summary>
/// Creates a new instance with the sign flipped.
/// </summary>
/// <returns>Flipped sign instance</returns>
Derived Negate() const noexcept { return Derived::FromBits(NegateImpl()); }
/// <summary>
/// IEEE defines that positive and negative zero are equal, this gives us a quick equality check
/// for two values by or'ing the private bits together and stripping the sign. They are both zero,
/// and therefore equivalent, if the resulting value is still zero.
/// </summary>
/// <param name="lhs">first value</param>
/// <param name="rhs">second value</param>
/// <returns>True if both arguments represent zero</returns>
static bool AreZero(const BFloat16Impl& lhs, const BFloat16Impl& rhs) noexcept {
// IEEE defines that positive and negative zero are equal, this gives us a quick equality check
// for two values by or'ing the private bits together and stripping the sign. They are both zero,
// and therefore equivalent, if the resulting value is still zero.
return static_cast<uint16_t>((lhs.val | rhs.val) & ~kSignMask) == 0;
}
};
template <class Derived>
inline uint16_t BFloat16Impl<Derived>::ToUint16Impl(float v) noexcept {
uint16_t result;
if (std::isnan(v)) {
result = kPositiveQNaNBits;
} else {
auto get_msb_half = [](float fl) {
uint16_t result;
#ifdef __cpp_if_constexpr
if constexpr (detail::endian::native == detail::endian::little) {
#else
if (detail::endian::native == detail::endian::little) {
#endif
std::memcpy(&result, reinterpret_cast<char*>(&fl) + sizeof(uint16_t), sizeof(uint16_t));
} else {
std::memcpy(&result, &fl, sizeof(uint16_t));
}
return result;
};
uint16_t upper_bits = get_msb_half(v);
union {
uint32_t U32;
float F32;
};
F32 = v;
U32 += (upper_bits & 1) + kRoundToNearest;
result = get_msb_half(F32);
}
return result;
}
template <class Derived>
inline float BFloat16Impl<Derived>::ToFloatImpl() const noexcept {
if (IsNaN()) {
return std::numeric_limits<float>::quiet_NaN();
}
float result;
char* const first = reinterpret_cast<char*>(&result);
char* const second = first + sizeof(uint16_t);
#ifdef __cpp_if_constexpr
if constexpr (detail::endian::native == detail::endian::little) {
#else
if (detail::endian::native == detail::endian::little) {
#endif
std::memset(first, 0, sizeof(uint16_t));
std::memcpy(second, &val, sizeof(uint16_t));
} else {
std::memcpy(first, &val, sizeof(uint16_t));
std::memset(second, 0, sizeof(uint16_t));
}
return result;
}
} // namespace onnxruntime_float16

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
/*
* This file defines RunOptions Config Keys and format of the Config Values.
*
* The Naming Convention for a RunOptions Config Key,
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
* Such as "ep.cuda.use_arena"
* The Config Key cannot be empty
* The maximum length of the Config Key is 128
*
* The string format of a RunOptions Config Value is defined individually for each Config.
* The maximum length of the Config Value is 1024
*/
// Key for enabling shrinkages of user listed device memory arenas.
// Expects a list of semi-colon separated key value pairs separated by colon in the following format:
// "device_0:device_id_0;device_1:device_id_1"
// No white-spaces allowed in the provided list string.
// Currently, the only supported devices are : "cpu", "gpu" (case sensitive).
// If "cpu" is included in the list, DisableCpuMemArena() API must not be called (i.e.) arena for cpu should be enabled.
// Example usage: "cpu:0;gpu:0" (or) "gpu:0"
// By default, the value for this key is empty (i.e.) no memory arenas are shrunk
static const char* const kOrtRunOptionsConfigEnableMemoryArenaShrinkage = "memory.enable_memory_arena_shrinkage";
// Set to '1' to not synchronize execution providers with CPU at the end of session run.
// Per default it will be set to '0'
// Taking CUDA EP as an example, it omit triggering cudaStreamSynchronize on the compute stream.
static const char* const kOrtRunOptionsConfigDisableSynchronizeExecutionProviders = "disable_synchronize_execution_providers";
// Set HTP performance mode for QNN HTP backend before session run.
// options for HTP performance mode: "burst", "balanced", "default", "high_performance",
// "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver",
// "sustained_high_performance". Default to "default".
static const char* const kOrtRunOptionsConfigQnnPerfMode = "qnn.htp_perf_mode";
// Set HTP performance mode for QNN HTP backend post session run.
static const char* const kOrtRunOptionsConfigQnnPerfModePostRun = "qnn.htp_perf_mode_post_run";
// Set RPC control latency for QNN HTP backend
static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_control_latency";
// Set QNN Lora Config File for apply Lora in QNN context binary
static const char* const kOrtRunOptionsConfigQnnLoraConfig = "qnn.lora_config";
// Set graph annotation id for CUDA EP. Use with enable_cuda_graph=true.
// The value should be an integer. If the value is not set, the default value is 0 and
// ORT session only captures one cuda graph before another capture is requested.
// If the value is set to -1, cuda graph capture/replay is disabled in that run.
// User are not expected to set the value to 0 as it is reserved for internal use.
static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";

View File

@ -0,0 +1,417 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
/*
* This file defines SessionOptions Config Keys and format of the Config Values.
*
* The Naming Convention for a SessionOptions Config Key,
* "[Area][.[SubArea1].[SubArea2]...].[Keyname]"
* Such as "ep.cuda.use_arena"
* The Config Key cannot be empty
* The maximum length of the Config Key is 1024
*
* The string format of a SessionOptions Config Value is defined individually for each Config.
* The maximum length of the Config Value is 2048
*/
// Key for disable PrePacking,
// If the config value is set to "1" then the prepacking is disabled, otherwise prepacking is enabled (default value)
static const char* const kOrtSessionOptionsConfigDisablePrepacking = "session.disable_prepacking";
// A value of "1" means allocators registered in the env will be used. "0" means the allocators created in the session
// will be used. Use this to override the usage of env allocators on a per session level.
static const char* const kOrtSessionOptionsConfigUseEnvAllocators = "session.use_env_allocators";
// Set to 'ORT' (case sensitive) to load an ORT format model.
// If unset, model type will default to ONNX unless inferred from filename ('.ort' == ORT format) or bytes to be ORT
static const char* const kOrtSessionOptionsConfigLoadModelFormat = "session.load_model_format";
// Set to 'ORT' (case sensitive) to save optimized model in ORT format when SessionOptions.optimized_model_path is set.
// If unset, format will default to ONNX unless optimized_model_filepath ends in '.ort'.
static const char* const kOrtSessionOptionsConfigSaveModelFormat = "session.save_model_format";
// If a value is "1", flush-to-zero and denormal-as-zero are applied. The default is "0".
// When multiple sessions are created, a main thread doesn't override changes from succeeding session options,
// but threads in session thread pools follow option changes.
// When ORT runs with OpenMP, the same rule is applied, i.e. the first session option to flush-to-zero and
// denormal-as-zero is only applied to global OpenMP thread pool, which doesn't support per-session thread pool.
// Note that an alternative way not using this option at runtime is to train and export a model without denormals
// and that's recommended because turning this option on may hurt model accuracy.
static const char* const kOrtSessionOptionsConfigSetDenormalAsZero = "session.set_denormal_as_zero";
// It controls to run quantization model in QDQ (QuantizelinearDeQuantizelinear) format or not.
// "0": enable. ORT does fusion logic for QDQ format.
// "1": disable. ORT doesn't do fusion logic for QDQ format.
// Its default value is "0" unless the DirectML execution provider is registered, in which case it defaults to "1".
static const char* const kOrtSessionOptionsDisableQuantQDQ = "session.disable_quant_qdq";
// It controls whether to enable Double QDQ remover and Identical Children Consolidation
// "0": not to disable. ORT does remove the middle 2 Nodes from a Q->(QD->Q)->QD pairs
// "1": disable. ORT doesn't remove the middle 2 Nodes from a Q->(QD->Q)->QD pairs
// Its default value is "0"
static const char* const kOrtSessionOptionsDisableDoubleQDQRemover = "session.disable_double_qdq_remover";
// If set to "1", enables the removal of QuantizeLinear/DequantizeLinear node pairs once all QDQ handling has been
// completed. e.g. If after all QDQ handling has completed and we have -> FloatOp -> Q -> DQ -> FloatOp -> the
// Q -> DQ could potentially be removed. This will provide a performance benefit by avoiding going from float to
// 8-bit and back to float, but could impact accuracy. The impact on accuracy will be model specific and depend on
// other factors like whether the model was created using Quantization Aware Training or Post Training Quantization.
// As such, it's best to test to determine if enabling this works well for your scenario.
// The default value is "0"
// Available since version 1.11.
static const char* const kOrtSessionOptionsEnableQuantQDQCleanup = "session.enable_quant_qdq_cleanup";
// Enable or disable gelu approximation in graph optimization. "0": disable; "1": enable. The default is "0".
// GeluApproximation has side effects which may change the inference results. It is disabled by default due to this.
static const char* const kOrtSessionOptionsEnableGeluApproximation = "optimization.enable_gelu_approximation";
// Enable or disable Cast chain elimination in graph optimization. "0": disable; "1": enable. The default is "0".
// CastElimination with chain elimination has side effects which may change the inference results. It is disabled by default due to this.
static const char* const kOrtSessionOptionsEnableCastChainElimination = "optimization.enable_cast_chain_elimination";
// This setting controls whether to enable AheadOfTime function inlining.
// AOT function inlining examines the graph and attempts to inline as many locally defined functions in the model
// as possible with the help of enabled execution providers.
// This can reduce the number of function calls and improve performance because it is done before
// Level1 optimizers and constant folding. However, under some circumstances, when the EPs are not available,
// one can disable the AOT inlining, produce an optimized model and postpone AOT until run time.
// "0": enable; "1": disable.
// Its default value is "0".
static const char* const kOrtSessionOptionsDisableAheadOfTimeFunctionInlining = "session.disable_aot_function_inlining";
#ifdef ENABLE_TRAINING
// Specifies a path of the file containing a list of memory optimization configurations.
// The value should be a string indicating the file path of the config file.
// The content of the config file is a JSON struct like this:
// [
// "Gelu+Cast+:1:0",
// "Dropout+:1:1"
// ]
// Taking the example of "Gelu+Cast+:1:0",
// > "Gelu+Cast+" is the subgraph string, a valid "subgraph string" should be one subgraph representation
// output by ORT graph transformations.
// > "1" is "optimization strategy", valid values: 0 - disabled, 1 - recompute.
// > "0" is "number of subgraph to apply" which is used to control how many subgraphs to apply optimization,
// to avoid "oversaving" the memory.
static const char* const kOrtSessionOptionsMemoryOptimizerApplyConfig = "optimization.memory_optimizer_config";
// Specifies the config for detecting subgraphs for memory footprint reduction.
// The value should be a string contains int separated using commas. The default value is "0:0".
static const char* const kOrtSessionOptionsMemoryOptimizerProbeConfig = "optimization.enable_memory_probe_recompute_config";
#endif
// This setting if set should contain a comma separated list of optimizers names that should be disabled.
// Optimizers may take time to execute and affect model loading time. If you feel that a specific optimizer
// does not provider runtime benefits, but affects your model loading time you may disable it using this config
// entry. This option is not enabled in ORT_MINIMAL_BUILD build.
// A list of optimizes is available in onnxruntime/core/optimizer/graph_transformer_utils.cc
//
// Default is an empty string which means no optimizers are disabled.
static const char* const kOrtSessionOptionsDisableSpecifiedOptimizers = "optimization.disable_specified_optimizers";
// It controls whether to run graph optimizations in loop or not.
//
// "0": disable. Graph Optimization Loop is disabled.
// ```
// Level 2 --> Level 3 --> InsertCastTransforms --> Level 4
// ^ |
// | "No Loop" |
// | |
// X xxxxxxxxxxx X
// ```
// "1": enable. Graph Optimization Loop is enabled, such that, if optimizations at Level 4 are applied then
// the loop will check for any other valid optimization that can happen.
// ```
// Level 2 --> Level 3 --> InsertCastTransforms --> Level 4
// ^ |
// | "Loop only depending on Level 4" |
// | |
// ---------------------------------------------------
// ```
// "2": enable. Graph Optimization Loop is enabled, such that, if optimizations at Level 2 or above are applied then
// The loop will check for any other valid optimization that can happen.
// ```
// Level 2 --> Level 3 --> InsertCastTransforms --> Level 4
// ^ |
// | "Loop" |
// | |
// ---------------------------------------------------
// ```
// Default value is set to "1".
static const char* const kOrtSessionOptionsGraphOptimizationsLoopLevel = "session.graph_optimizations_loop_level";
// Enable or disable using device allocator for allocating initialized tensor memory. "1": enable; "0": disable. The default is "0".
// Using device allocators means the memory allocation is made using malloc/new.
static const char* const kOrtSessionOptionsUseDeviceAllocatorForInitializers = "session.use_device_allocator_for_initializers";
// Configure whether to allow the inter_op/intra_op threads spinning a number of times before blocking
// "0": thread will block if found no job to run
// "1": thread will spin a number of times before blocking
// The default is "0" when ORT is built with "ORT_CLIENT_PACKAGE_BUILD" and "1" otherwise.
// Thread spinning is disabled by default for client/on-device workloads to reduce cpu utilization and improve power efficiency.
static const char* const kOrtSessionOptionsConfigAllowInterOpSpinning = "session.inter_op.allow_spinning";
static const char* const kOrtSessionOptionsConfigAllowIntraOpSpinning = "session.intra_op.allow_spinning";
// Key for using model bytes directly for ORT format
// If a session is created using an input byte array contains the ORT format model data,
// By default we will copy the model bytes at the time of session creation to ensure the model bytes
// buffer is valid.
// Setting this option to "1" will disable copy the model bytes, and use the model bytes directly. The caller
// has to guarantee that the model bytes are valid until the ORT session using the model bytes is destroyed.
static const char* const kOrtSessionOptionsConfigUseORTModelBytesDirectly = "session.use_ort_model_bytes_directly";
/// <summary>
/// Key for using the ORT format model flatbuffer bytes directly for initializers.
/// This avoids copying the bytes and reduces peak memory usage during model loading and initialization.
/// Requires `session.use_ort_model_bytes_directly` to be true.
/// If set, the flatbuffer bytes provided when creating the InferenceSession MUST remain valid for the entire
/// duration of the InferenceSession.
/// </summary>
static const char* const kOrtSessionOptionsConfigUseORTModelBytesForInitializers =
"session.use_ort_model_bytes_for_initializers";
// This should only be specified when exporting an ORT format model for use on a different platform.
// If the ORT format model will be used on ARM platforms set to "1". For other platforms set to "0"
// Available since version 1.11.
static const char* const kOrtSessionOptionsQDQIsInt8Allowed = "session.qdqisint8allowed";
// x64 SSE4.1/AVX2/AVX512(with no VNNI) has overflow problem with quantizied matrix multiplication with U8S8.
// To avoid this we need to use slower U8U8 matrix multiplication instead. This option, if
// turned on, use slower U8U8 matrix multiplications. Only effective with AVX2 or AVX512
// platforms.
static const char* const kOrtSessionOptionsAvx2PrecisionMode = "session.x64quantprecision";
// Specifies how minimal build graph optimizations are handled in a full build.
// These optimizations are at the extended level or higher.
// Possible values and their effects are:
// "save": Save runtime optimizations when saving an ORT format model.
// "apply": Only apply optimizations available in a minimal build.
// ""/<unspecified>: Apply optimizations available in a full build.
// Available since version 1.11.
static const char* const kOrtSessionOptionsConfigMinimalBuildOptimizations =
"optimization.minimal_build_optimizations";
// Note: The options specific to an EP should be specified prior to appending that EP to the session options object in
// order for them to take effect.
// Specifies a list of stop op types. Nodes of a type in the stop op types and nodes downstream from them will not be
// run by the NNAPI EP.
// The value should be a ","-delimited list of op types. For example, "Add,Sub".
// If not specified, the default set of stop ops is used. To specify an empty stop ops types list and disable stop op
// exclusion, set the value to "".
static const char* const kOrtSessionOptionsConfigNnapiEpPartitioningStopOps = "ep.nnapi.partitioning_stop_ops";
// Enabling dynamic block-sizing for multithreading.
// With a positive value, thread pool will split a task of N iterations to blocks of size starting from:
// N / (num_of_threads * dynamic_block_base)
// As execution progresses, the size will decrease according to the diminishing residual of N,
// meaning the task will be distributed in smaller granularity for better parallelism.
// For some models, it helps to reduce the variance of E2E inference latency and boost performance.
// The feature will not function by default, specify any positive integer, e.g. "4", to enable it.
// Available since version 1.11.
static const char* const kOrtSessionOptionsConfigDynamicBlockBase = "session.dynamic_block_base";
// This option allows to decrease CPU usage between infrequent
// requests and forces any TP threads spinning stop immediately when the last of
// concurrent Run() call returns.
// Spinning is restarted on the next Run() call.
// Applies only to internal thread-pools
static const char* const kOrtSessionOptionsConfigForceSpinningStop = "session.force_spinning_stop";
// "1": all inconsistencies encountered during shape and type inference
// will result in failures.
// "0": in some cases warnings will be logged but processing will continue. The default.
// May be useful to expose bugs in models.
static const char* const kOrtSessionOptionsConfigStrictShapeTypeInference = "session.strict_shape_type_inference";
// "1": every model using a more recent opset than the latest released one will fail
// "0": the model may or may not work if onnxruntime cannot find an implementation, this option
// is used for development purpose.
static const char* const kOrtSessionOptionsConfigStrictAllowReleasedOpsetsOnly = "session.allow_released_opsets_only";
// The file saves configuration for partitioning node among logic streams
static const char* const kNodePartitionConfigFile = "session.node_partition_config_file";
// This Option allows setting affinities for intra op threads.
// Affinity string follows format:
// logical_processor_id,logical_processor_id;logical_processor_id,logical_processor_id
// Semicolon isolates configurations among threads, while comma split processors where ith thread expected to attach to.
// e.g.1,2,3;4,5
// specifies affinities for two threads, with the 1st thread attach to the 1st, 2nd, and 3rd processor, and 2nd thread to the 4th and 5th.
// To ease the configuration, an "interval" is also allowed:
// e.g. 1-8;8-16;17-24
// orders that the 1st thread runs on first eight processors, 2nd thread runs on next eight processors, and so forth.
// Note:
// 1. Once set, the number of thread affinities must equal to intra_op_num_threads - 1, since ort does not set affinity on the main thread which
// is started and managed by the calling app;
// 2. For windows, ort will infer the group id from a logical processor id, for example, assuming there are two groups with each has 64 logical processors,
// an id of 64 will be inferred as the last processor of the 1st group, while 65 will be interpreted as the 1st processor of the second group.
// Hence 64-65 is an invalid configuration, because a windows thread cannot be attached to processors across group boundary.
static const char* const kOrtSessionOptionsConfigIntraOpThreadAffinities = "session.intra_op_thread_affinities";
// This option will dump out the model to assist debugging any issues with layout transformation,
// and is primarily intended for developer usage. It is only relevant if an execution provider that requests
// NHWC layout is enabled such as NNAPI, XNNPACK or QNN.
//
// Default is off. Set to "1" to enable.
//
// If modified by layout transformation the model will be dumped after these steps:
// 1) insertion of the layout transformation Transpose nodes
// 2) after those are optimized using the transpose optimizer,
// 3) after the L1 transformers are applied to the updated graph.
// The model will be saved to filename post_layout_transform_step_<step_number>.onnx.
static const char* const kDebugLayoutTransformation = "session.debug_layout_transformation";
// Graph nodes that are not supported by the execution providers (EPs) explicitly added to the session are
// assigned (i.e., "fallback") to the CPU EP by default.
//
// This option allows the user to disable the fallback of unsupported graph nodes to the CPU EP.
// If this option is set to "1", session creation will fail if the execution providers other than the CPU EP cannot
// fully support all of the nodes in the graph.
//
// It is invalid to set this option and explicitly add the CPU EP to the session. In this case, session creation
// will also fail with an error.
//
// Option values:
// - "0": CPU EP fallback is not disabled. [DEFAULT]
// - "1": CPU EP fallback is disabled.
static const char* const kOrtSessionOptionsDisableCPUEPFallback = "session.disable_cpu_ep_fallback";
// Use this config when serializing a large model after optimization to specify an external initializers file
static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersFileName =
"session.optimized_model_external_initializers_file_name";
// Use this config to control the minimum size of the initializer when externalizing it during serialization
static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes =
"session.optimized_model_external_initializers_min_size_in_bytes";
// When loading model from memory buffer and the model has external initializers
// Use this config to set the external data file folder path
// All external data files should be in the same folder
static const char* const kOrtSessionOptionsModelExternalInitializersFileFolderPath =
"session.model_external_initializers_file_folder_path";
// Use this config when saving pre-packed constant initializers to an external data file.
// This allows you to memory map pre-packed initializers on model load and leave it to
// to the OS the amount of memory consumed by the pre-packed initializers. Otherwise,
// pre-packed data resides on the heap.
//
// - "0": Default is not save pre-packed initializers to a data file.
// - "1": Save pre-packed constant initializers to an external data file.
// Sample usage: sess_options.add_session_config_entry(kOrtSessionOptionsSavePrePackedConstantInitializers, "1")
static const char* const kOrtSessionOptionsSavePrePackedConstantInitializers =
"session.save_external_prepacked_constant_initializers";
// Use this config when you want to collect memory stats for each node in the graph.
// The file format is a CSV file with the following columns:
// The file will be created if it does not exist, and will be overwritten if it does.
//
// The content of the file can be used to estimate memory requirements at run time including
// the temporary allocations. This operation is preferably done on a CPU device, as the model may exceed
// device memory limits in constrained environments. When enabling this option, it is important to disable
// memory patterns, as they tend to allocate large blocks to avoid fragmentation and accommodate needs of multiple
// kernels. Memory patterns may make it difficult to allocate on a device with limited memory.
//
// The collected stats then can be used to partition the graph among the devices in a way that only the
// required memory is allocated on each device.
//
// node_name, initializers_memory, dynamic_outputs_sizes, temp_allocations_size
//
// - "full path to file": there is not a default for this option. If the file can not be opened for writing, an error will be returned.
static const char* const kOrtSessionOptionsCollectNodeMemoryStatsToFile = "session.collect_node_memory_stats_to_file";
/// This is a composite CSV setting formatted as "memory limit in kb,file name for collected stats"
/// "limit > 0": enables Capacity Aware Partitioning for Cuda EP. `limit` is optional and when absent
/// the provider may attempt to figure out the memory available automatically.
/// The setting with no limit is expected to look like: ",file name for collected stats"
/// The EP will place nodes on device "file name" :
/// this file is expected to be found at the same folder with the model. The file contains
/// pre-recorded stats collected when running with kOrtSessionOptionsCollectNodeMemoryStatsToFile enforce (see above)
static const char* const kOrtSessionOptionsResourceCudaPartitioningSettings =
"session.resource_cuda_partitioning_settings";
// Enable EP context feature to dump the partitioned graph which includes the EP context into Onnx file.
// The dumped Onnx model with EP context can be used for future inference to avoid the EP graph partitioning/compile overhead.
// "0": disable. (default)
// "1": enable.
static const char* const kOrtSessionOptionEpContextEnable = "ep.context_enable";
// Specify the file path for the Onnx model which has EP context.
// Default to original_file_name_ctx.onnx if not specified
// Folder is not a valid option
static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_path";
// Flag to specify whether to dump the EP context into the Onnx model.
// "0": dump the EP context into separate file, keep the file name in the Onnx model. (default).
// "1": dump the EP context into the Onnx model.
static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
// Specify the EPContext node name prefix to make it unique
// in case user need to merge/connect multiple EPContext nodes in one model
static const char* const kOrtSessionOptionEpContextNodeNamePrefix = "ep.context_node_name_prefix";
// Share EP related resources across sessions
static const char* const kOrtSessionOptionShareEpContexts = "ep.share_ep_contexts";
// Stop to share EP related resources across sessions from then on
static const char* const kOrtSessionOptionStopShareEpContexts = "ep.stop_share_ep_contexts";
// Used only for context model generation.
// This configuration is used when some nodes are partitioned on the CPU EP and those nodes have external initializers.
// When generating the EP context model, the new model should not rely on the old external data file used by the source ONNX model.
// Use this setting when dumping the EP context model with an external initializers file.
// If specified, all initializers will be placed inside the external data file.
// Otherwise, all initializers will be embedded inside the generated ONNX file.
// By default, this option is not set, meaning all initializers will be included within the ONNX file.
static const char* const kOrtSessionOptionsEpContextModelExternalInitializersFileName =
"ep.context_model_external_initializers_file_name";
// Gemm fastmath mode provides fp32 gemm acceleration with bfloat16 based matmul.
// Option values:
// - "0": Gemm FastMath mode is not enabled. [DEFAULT]
// - "1": Gemm FastMath mode is enabled.
static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas.enable_gemm_fastmath_arm64_bfloat16";
// When converting DQ + MatMul -> MatMulNBits, the accuracy level of the MatMulNBits is controlled by this option.
// Refer to MatMulNBits op schema for more details.
// If not provided, default is 4.
static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";
// THIS OPTION IS NOT A REGULAR SESSION OPTION SINCE IT CAN BE MODIFIED AT ANY TIME
// Meant to be used with SetEpDynamicOptions
// Specify the type of workload for this session.
// "Default": OS determines the scheduling priority and processor performance to service this workload. [Default]
// "Efficient": OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
static const char* const kOrtEpDynamicOptionsWorkloadType = "ep.dynamic.workload_type";
// Disables model compilation during session initialization.
//
// If this option is set to "1", inference session creation will fail with error code ORT_MODEL_REQUIRES_COMPILATION
// if compilation is required to run the model on any Execution Provider added to the session.
// Only the following kinds of models are valid when this option is set to "1":
// - Pre-compiled models that have EPContext nodes for the compiling Execution Providers in the session.
// - Non-compiled models that run only on non-compiling Execution Providers, like CPU EP.
//
// See \href https://onnxruntime.ai/docs/execution-providers/EP-Context-Design.html for details about
// compiled models with EPContext nodes.
//
// Option values:
// - "0": EP compile is not disabled. [DEFAULT]
// - "1": EP compile is disabled.
static const char* const kOrtSessionOptionsDisableModelCompile = "session.disable_model_compile";
// Controls behavior when compiled model compatibility is SUPPORTED_PREFER_RECOMPILATION.
// "0": Allow execution with suboptimal performance. [DEFAULT]
// "1": Fail session creation to require recompilation for optimal performance.
// Note: UNSUPPORTED models always fail regardless of this setting.
static const char* const kOrtSessionOptionsFailOnSuboptimalCompiledModel =
"session.fail_on_suboptimal_compiled_model";
// THIS OPTION IS NOT A REGULAR SESSION OPTION SINCE IT CAN BE MODIFIED AT ANY TIME
// Meant to be used with SetEpDynamicOptions
// options for HTP performance mode: "burst", "balanced", "default", "high_performance",
// "high_power_saver", "low_balanced", "extreme_power_saver", "low_power_saver", "power_saver",
// "sustained_high_performance". Default to "default".
static const char* const kOrtEpDynamicOptionsQnnHtpPerformanceMode = "ep.dynamic.qnn_htp_performance_mode";

View File

@ -0,0 +1,18 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
namespace onnxruntime {
// data types for execution provider options
using ProviderOptions = std::unordered_map<std::string, std::string>;
using ProviderOptionsVector = std::vector<ProviderOptions>;
using ProviderOptionsMap = std::unordered_map<std::string, ProviderOptions>;
} // namespace onnxruntime

View File

@ -0,0 +1,26 @@
####### Expanded from @PACKAGE_INIT@ by configure_package_config_file() #######
####### Any changes to this file will be overwritten by the next CMake run ####
####### The input file was PROJECT_CONFIG_FILE ########
get_filename_component(PACKAGE_PREFIX_DIR "${CMAKE_CURRENT_LIST_DIR}/../../../" ABSOLUTE)
macro(set_and_check _var _file)
set(${_var} "${_file}")
if(NOT EXISTS "${_file}")
message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !")
endif()
endmacro()
macro(check_required_components _NAME)
foreach(comp ${${_NAME}_FIND_COMPONENTS})
if(NOT ${_NAME}_${comp}_FOUND)
if(${_NAME}_FIND_REQUIRED_${comp})
set(${_NAME}_FOUND FALSE)
endif()
endif()
endforeach()
endmacro()
####################################################################################
include("${CMAKE_CURRENT_LIST_DIR}/onnxruntimeTargets.cmake")

View File

@ -0,0 +1,65 @@
# This is a basic version file for the Config-mode of find_package().
# It is used by write_basic_package_version_file() as input file for configure_file()
# to create a version-file which can be installed along a config.cmake file.
#
# The created file sets PACKAGE_VERSION_EXACT if the current version string and
# the requested version string are exactly the same and it sets
# PACKAGE_VERSION_COMPATIBLE if the current version is >= requested version,
# but only if the requested major version is the same as the current one.
# The variable CVF_VERSION must be set before calling configure_file().
set(PACKAGE_VERSION "1.23.2")
if(PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)
set(PACKAGE_VERSION_COMPATIBLE FALSE)
else()
if("1.23.2" MATCHES "^([0-9]+)\\.")
set(CVF_VERSION_MAJOR "${CMAKE_MATCH_1}")
if(NOT CVF_VERSION_MAJOR VERSION_EQUAL 0)
string(REGEX REPLACE "^0+" "" CVF_VERSION_MAJOR "${CVF_VERSION_MAJOR}")
endif()
else()
set(CVF_VERSION_MAJOR "1.23.2")
endif()
if(PACKAGE_FIND_VERSION_RANGE)
# both endpoints of the range must have the expected major version
math (EXPR CVF_VERSION_MAJOR_NEXT "${CVF_VERSION_MAJOR} + 1")
if (NOT PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
OR ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX_MAJOR STREQUAL CVF_VERSION_MAJOR)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND NOT PACKAGE_FIND_VERSION_MAX VERSION_LESS_EQUAL CVF_VERSION_MAJOR_NEXT)))
set(PACKAGE_VERSION_COMPATIBLE FALSE)
elseif(PACKAGE_FIND_VERSION_MIN_MAJOR STREQUAL CVF_VERSION_MAJOR
AND ((PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "INCLUDE" AND PACKAGE_VERSION VERSION_LESS_EQUAL PACKAGE_FIND_VERSION_MAX)
OR (PACKAGE_FIND_VERSION_RANGE_MAX STREQUAL "EXCLUDE" AND PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION_MAX)))
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
else()
if(PACKAGE_FIND_VERSION_MAJOR STREQUAL CVF_VERSION_MAJOR)
set(PACKAGE_VERSION_COMPATIBLE TRUE)
else()
set(PACKAGE_VERSION_COMPATIBLE FALSE)
endif()
if(PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)
set(PACKAGE_VERSION_EXACT TRUE)
endif()
endif()
endif()
# if the installed or the using project don't have CMAKE_SIZEOF_VOID_P set, ignore it:
if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "" OR "8" STREQUAL "")
return()
endif()
# check that the installed version has the same 32/64bit-ness as the one which is currently searching:
if(NOT CMAKE_SIZEOF_VOID_P STREQUAL "8")
math(EXPR installedBits "8 * 8")
set(PACKAGE_VERSION "${PACKAGE_VERSION} (${installedBits}bit)")
set(PACKAGE_VERSION_UNSUITABLE TRUE)
endif()

View File

@ -0,0 +1,19 @@
#----------------------------------------------------------------
# Generated CMake target import file for configuration "Release".
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Import target "onnxruntime::onnxruntime" for configuration "Release"
set_property(TARGET onnxruntime::onnxruntime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
set_target_properties(onnxruntime::onnxruntime PROPERTIES
IMPORTED_LOCATION_RELEASE "${_IMPORT_PREFIX}/lib64/libonnxruntime.so.1.23.2"
IMPORTED_SONAME_RELEASE "libonnxruntime.so.1"
)
list(APPEND _cmake_import_check_targets onnxruntime::onnxruntime )
list(APPEND _cmake_import_check_files_for_onnxruntime::onnxruntime "${_IMPORT_PREFIX}/lib64/libonnxruntime.so.1.23.2" )
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)

View File

@ -0,0 +1,106 @@
# Generated by CMake
if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
if(CMAKE_VERSION VERSION_LESS "2.8.3")
message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.8.3...3.29)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------
# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)
# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_cmake_targets_defined "")
set(_cmake_targets_not_defined "")
set(_cmake_expected_targets "")
foreach(_cmake_expected_target IN ITEMS onnxruntime::onnxruntime)
list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
if(TARGET "${_cmake_expected_target}")
list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
else()
list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
endif()
endforeach()
unset(_cmake_expected_target)
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
unset(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
return()
endif()
if(NOT _cmake_targets_defined STREQUAL "")
string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
endif()
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)
# Compute the installation prefix relative to this file.
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
if(_IMPORT_PREFIX STREQUAL "/")
set(_IMPORT_PREFIX "")
endif()
# Create imported target onnxruntime::onnxruntime
add_library(onnxruntime::onnxruntime SHARED IMPORTED)
set_target_properties(onnxruntime::onnxruntime PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include/onnxruntime"
)
# Load information for each installed configuration.
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/onnxruntimeTargets-*.cmake")
foreach(_cmake_config_file IN LISTS _cmake_config_files)
include("${_cmake_config_file}")
endforeach()
unset(_cmake_config_file)
unset(_cmake_config_files)
# Cleanup temporary variables.
set(_IMPORT_PREFIX)
# Loop over all imported files and verify that they actually exist
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
if(CMAKE_VERSION VERSION_LESS "3.28"
OR NOT DEFINED _cmake_import_check_xcframework_for_${_cmake_target}
OR NOT IS_DIRECTORY "${_cmake_import_check_xcframework_for_${_cmake_target}}")
foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
if(NOT EXISTS "${_cmake_file}")
message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
\"${_cmake_file}\"
but this file does not exist. Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
\"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
endif()
endforeach()
endif()
unset(_cmake_file)
unset("_cmake_import_check_files_for_${_cmake_target}")
endforeach()
unset(_cmake_target)
unset(_cmake_import_check_targets)
# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.
# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)

View File

@ -0,0 +1 @@
libonnxruntime.so.1.23.2

View File

@ -0,0 +1,13 @@
prefix=/usr/local
bindir=${prefix}/bin
mandir=${prefix}/share/man
docdir=${prefix}/share/doc/onnxruntime
libdir=${prefix}/lib64
includedir=${prefix}/include/onnxruntime
Name: onnxruntime
Description: ONNX runtime
URL: https://github.com/microsoft/onnxruntime
Version: 1.23.2
Libs: -L${libdir} -lonnxruntime
Cflags: -I${includedir}

View File

@ -1,5 +1,4 @@
//
package com.bonus.sdk;
package com.bonus.sdk; //
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferByte;
@ -11,11 +10,9 @@ import java.util.Set;
public class YoloSdk implements AutoCloseable {
private long nativeHandle; //
private long nativeHandle;
private static final Set<String> loadedLibraries = new HashSet<>();
/**
* */
static {
try {
loadSdkLibrary();
@ -27,45 +24,56 @@ public class YoloSdk implements AutoCloseable {
private static void loadSdkLibrary() throws IOException {
String osName = System.getProperty("os.name").toLowerCase();
String osArch = System.getProperty("os.arch").toLowerCase();
String libPathInJar, sdkLibName;
String[] dependencyLibs = {}; //
String libPathInJar;
String sdkLibName;
String[] dependencyLibs = {};
if (osName.contains("win") && osArch.contains("64")) {
libPathInJar = "/lib/win-x64/";
dependencyLibs = new String[]{
"onnxruntime.dll",
"abseil_dll.dll",
"libprotobuf.dll",
"zlib1.dll",
"opencv_core4.dll",
"opencv_imgproc4.dll",
"opencv_dnn4.dll"
};
"onnxruntime.dll",
"abseil_dll.dll",
"libprotobuf.dll",
"zlib1.dll",
"opencv_core4.dll",
"opencv_imgproc4.dll",
"opencv_dnn4.dll"
};
sdkLibName = "my_yolo_sdk.dll";
} else if ((osName.contains("nix") || osName.contains("nux")) && osArch.contains("64")) {
libPathInJar = "/lib/linux-x86_64/";
//
//
dependencyLibs = new String[]{
"libonnxruntime.so.1.23.2",
"libopencv_core.so.4.6.0",
"libopencv_imgproc.so.4.6.0",
"libopencv_dnn.so.4.6.0"
};
sdkLibName = "libmy_yolo_sdk.so";
} else {
throw new UnsupportedOperationException("Unsupported OS/Arch: " + osName + "/" + osArch);
}
// 1.
for (String lib : dependencyLibs) {
loadLibraryFromJar(libPathInJar + lib);
}
// 2.
loadLibraryFromJar(libPathInJar + sdkLibName);
}
/**
* */
private static void loadLibraryFromJar(String path) throws IOException {
String libName = new File(path).getName();
if (loadedLibraries.contains(libName)) {
return; //
return;
}
try (InputStream in = YoloSdk.class.getResourceAsStream(path)) {
@ -82,7 +90,6 @@ public class YoloSdk implements AutoCloseable {
}
}
// ---
private native long nativeInit(String modelPath, int inputWidth, int inputHeight);
private native void nativeRelease(long handle);
private native Detection[] nativePredict(
@ -117,8 +124,6 @@ public class YoloSdk implements AutoCloseable {
}
}
/**
* */
private byte[] getBgrBytes(BufferedImage image) {
if (image.getType() == BufferedImage.TYPE_3BYTE_BGR) {
return ((DataBufferByte) image.getRaster().getDataBuffer()).getData();