wasi-nn: Add external delegation to support several NPU/GPU (#2162)

Add VX delegation as an external delegation of TFLite, so that several NPU/GPU (from VeriSilicon, NXP, Amlogic) can be controlled via WASI-NN. Test Code can work with the X86 simulator.
2023-05-05 10:29:36 +02:00
parent 5a23ae465c
commit 89be5622a5
4 changed files with 159 additions and 3 deletions
--- a/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
+++ b/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@ -21,6 +21,10 @@
 #include <tensorflow/lite/delegates/gpu/delegate.h>
 #endif

+#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#include <tensorflow/lite/delegates/external/external_delegate.h>
+#endif
+
 /* Maximum number of graphs per WASM instance */
 #define MAX_GRAPHS_PER_INST 10
 /* Maximum number of graph execution context per WASM instance*/
@ -42,6 +46,7 @@ typedef struct {
    uint32_t current_interpreters;
    Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
    korp_mutex g_lock;
+    TfLiteDelegate *delegate;
 } TFLiteContext;

 /* Utils */
@ -194,18 +199,40 @@ tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
 #if defined(WASI_NN_ENABLE_GPU)
            NN_WARN_PRINTF("GPU enabled.");
            // https://www.tensorflow.org/lite/performance/gpu
-            auto options = TfLiteGpuDelegateOptionsV2Default();
+            TfLiteGpuDelegateOptionsV2 options =
+                TfLiteGpuDelegateOptionsV2Default();
            options.inference_preference =
                TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
            options.inference_priority1 =
                TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
-            auto *delegate = TfLiteGpuDelegateV2Create(&options);
+            tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
+            if (tfl_ctx->delegate == NULL) {
+                NN_ERR_PRINTF("Error when generating GPU delegate.");
+                use_default = true;
+                return missing_memory;
+            }
            if (tfl_ctx->interpreters[*ctx]
-                    .interpreter->ModifyGraphWithDelegate(delegate)
+                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
                != kTfLiteOk) {
                NN_ERR_PRINTF("Error when enabling GPU delegate.");
                use_default = true;
            }
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+            NN_WARN_PRINTF("external delegation enabled.");
+            TfLiteExternalDelegateOptions options =
+                TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+            tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
+            if (tfl_ctx->delegate == NULL) {
+                NN_ERR_PRINTF("Error when generating External delegate.");
+                use_default = true;
+                return missing_memory;
+            }
+            if (tfl_ctx->interpreters[*ctx]
+                    .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
+                != kTfLiteOk) {
+                NN_ERR_PRINTF("Error when enabling External delegate.");
+                use_default = true;
+            }
 #else
            NN_WARN_PRINTF("GPU not enabled.");
            use_default = true;
@ -350,6 +377,8 @@ tensorflowlite_initialize(void **tflite_ctx)
        NN_ERR_PRINTF("Error while initializing the lock");
    }

+    tfl_ctx->delegate = NULL;
+
    *tflite_ctx = (void *)tfl_ctx;
 }

@ -364,6 +393,14 @@ tensorflowlite_destroy(void *tflite_ctx)
    */
    TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;

+    if (tfl_ctx->delegate != NULL) {
+#if defined(WASI_NN_ENABLE_GPU)
+        TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+        TfLiteExternalDelegateDelete(tfl_ctx->delegate);
+#endif
+    }
+
    NN_DBG_PRINTF("Freeing memory.");
    for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
        tfl_ctx->models[i].model.reset();