wasi-nn: Add a new target for llama.cpp as a wasi-nn backend (#3709)

Minimum support:
- [x] accept (WasmEdge) customized model parameters. metadata.
- [x] Target [wasmedge-ggml examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml)
  - [x] basic
  - [x] chatml
  - [x] gemma
  - [x] llama
  - [x] qwen

---

In the future, to support if required:
- [ ] Target [wasmedge-ggml examples](https://github.com/second-state/WasmEdge-WASINN-examples/tree/master/wasmedge-ggml)
  - [ ] command-r. (>70G memory requirement)
  - [ ] embedding. (embedding mode)
  - [ ] grammar. (use the grammar option to constrain the model to generate the JSON output)
  - [ ] llama-stream. (new APIS `compute_single`, `get_output_single`, `fini_single`)
  - [ ] llava. (image representation)
  - [ ] llava-base64-stream. (image representation)
  - [ ] multimodel. (image representation)
- [ ] Target [llamaedge](https://github.com/LlamaEdge/LlamaEdge)
This commit is contained in:
liang.he
2024-09-10 08:45:18 +08:00
committed by GitHub
parent cb71ca5822
commit 0599351262
11 changed files with 949 additions and 122 deletions

View File

@ -63,21 +63,35 @@ WORKDIR /workspaces/wasmedge-wasinn-examples
RUN git clone --depth 1 https://github.com/second-state/WasmEdge-WASINN-examples.git .
COPY core/iwasm/libraries/wasi-nn/test/bump_wasi_nn_to_0_6_0.patch .
RUN git apply ./bump_wasi_nn_to_0_6_0.patch
# recompile with wasi-nn 0.6.0
RUN cd openvino-mobilenet-image/rust && cargo build --target=wasm32-wasi
RUN cd openvino-mobilenet-raw/rust && cargo build --target=wasm32-wasi
RUN cd openvino-road-segmentation-adas/openvino-road-seg-adas && cargo build --target=wasm32-wasi
RUN cd tflite-birds_v1-image/rust && cargo build --target=wasm32-wasi
# preparation
RUN cd openvino-mobilenet-image \
# recompile with wasi-nn 0.6.0
WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-image/
RUN pushd rust \
&& cargo build --target=wasm32-wasi \
&& popd \
&& ./download_mobilenet.sh . \
&& ls -l mobilenet.xml mobilenet.bin
RUN cd openvino-mobilenet-raw \
WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-mobilenet-raw/
RUN pushd rust \
&& cargo build --target=wasm32-wasi \
&& popd \
&& ./download_mobilenet.sh . \
&& ls -l mobilenet.xml mobilenet.bin tensor-1x224x224x3-f32.bgr
WORKDIR /workspaces/wasmedge-wasinn-examples/openvino-road-segmentation-adas/
RUN pushd openvino-road-seg-adas \
&& cargo build --target=wasm32-wasi
WORKDIR /workspaces/wasmedge-wasinn-examples/tflite-birds_v1-image/
RUN pushd rust \
&& cargo build --target=wasm32-wasi
# mount models when running
WORKDIR /workspaces/wasmedge-wasinn-examples/wasmedge-ggml/qwen
RUN wget --progress=dot:giga https://www.modelscope.cn/models/qwen/Qwen1.5-0.5B-Chat-GGUF/resolve/master/qwen1_5-0_5b-chat-q2_k.gguf
RUN cargo build --target=wasm32-wasi
#
# iwasm. build from source
WORKDIR /workspaces/wamr
@ -88,15 +102,16 @@ WORKDIR /workspaces/wamr/product-mini/platforms/linux
RUN OpenVINO_DIR=/usr/lib/openvino-2023.2.0 \
cmake -S . -B build \
-DWAMR_BUILD_WASI_NN=1 -DWAMR_BUILD_WASI_EPHEMERAL_NN=1 \
-DWAMR_BUILD_WASI_NN_OPENVINO=1 -DWAMR_BUILD_WASI_NN_TFLITE=1 \
&& cmake --build build
ENV PATH=/workspaces/wamr/product-mini/platforms/linux/build:${PATH}
ENV LD_LIBRARY_PATH=/workspaces/wamr/product-mini/platforms/linux/build
-DWAMR_BUILD_WASI_NN_OPENVINO=1 \
-DWAMR_BUILD_WASI_NN_TFLITE=1 \
-DWAMR_BUILD_WASI_NN_LLAMACPP=1 \
&& cmake --build build \
&& cmake --install build
ENV LD_LIBRARY_PATH=/usr/local/lib
# add smoke test script
COPY core/iwasm/libraries/wasi-nn/test/run_smoke_test.py /
#
WORKDIR /workspaces/wasmedge-wasinn-examples
CMD ["python3", "/run_smoke_test.py"]

View File

@ -260,6 +260,63 @@ def execute_openvino_road_segmentation_adas(
print("------------------------------------------------------------")
def execute_wasmedge_ggml_qwen(iwasm_bin: str, wasmedge_bin: str, cwd: Path):
iwasm_args = ["--dir=."]
wasm_file = ["./target/wasm32-wasi/debug/wasmedge-ggml-qwen.wasm"]
wasm_args = ["./qwen1_5-0_5b-chat-q2_k.gguf"]
cmd = [iwasm_bin]
cmd.extend(iwasm_args)
cmd.extend(wasm_file)
cmd.extend(wasm_args)
# print(f'Execute: {" ".join(cmd)}')
prompt = "what is the capital of Pakistan"
with subprocess.Popen(
cmd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=cwd,
) as p:
# USER
p.stdout.readline()
p.stdin.write(b"hi\n")
p.stdin.flush()
# ASSITANT
p.stdout.readline()
# xxx
p.stdout.readline()
# USER
p.stdout.readline()
p.stdin.write(prompt.encode())
p.stdin.write(b"\n")
p.stdin.flush()
# ASSITANT
p.stdout.readline()
# xxx
answer = p.stdout.readline().decode("utf-8")
# USER
p.stdout.readline()
p.terminate()
if "Karachi" in answer:
print(f"- wasmedge_ggml_qwen. PASS")
return
print(f"- wasmedge_ggml_qwen. FAILED")
print("------------------------------------------------------------")
pprint(answer)
print("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
pprint("Karachi")
print("------------------------------------------------------------")
def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
assert Path.cwd().name == "wasmedge-wasinn-examples"
assert shutil.which(iwasm_bin)
@ -282,6 +339,9 @@ def execute_wasmedge_wasinn_examples(iwasm_bin: str, wasmedge_bin: str):
iwasm_bin, wasmedge_bin, openvino_road_segmentation_adas_dir
)
wasmedge_ggml_qwem_dir = Path.cwd().joinpath("./wasmedge-ggml/qwen")
execute_wasmedge_ggml_qwen(iwasm_bin, wasmedge_bin, wasmedge_ggml_qwem_dir)
if __name__ == "__main__":
execute_wasmedge_wasinn_examples("iwasm", "wasmedge")