Use huggingface_hub.InferenceClient instead of openai for Sambanova client (#79)

2026-02-05 18:09:23 +08:00 · 2025-02-26 16:06:17 +01:00
parent bb85e894be
commit c6769fe33f
4 changed files with 10 additions and 13 deletions
--- a/demo/hello_computer/app.py
+++ b/demo/hello_computer/app.py
@@ -4,8 +4,8 @@ import os
 from pathlib import Path
 import gradio as gr
 import huggingface_hub
 import numpy as np
 import openai
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, StreamingResponse
@@ -24,9 +24,9 @@ load_dotenv()
 curr_dir = Path(__file__).parent
-client = openai.OpenAI(
+client = huggingface_hub.InferenceClient(
    api_key=os.environ.get("SAMBANOVA_API_KEY"),
-    base_url="https://api.sambanova.ai/v1",
+    provider="sambanova",
 )
 model = get_stt_model()
@@ -49,7 +49,7 @@ def response(
    conversation_state.append({"role": "user", "content": text})
    request = client.chat.completions.create(
-        model="Meta-Llama-3.2-3B-Instruct",
+        model="meta-llama/Llama-3.2-3B-Instruct",
        messages=conversation_state,  # type: ignore
        temperature=0.1,
        top_p=0.1,
--- a/demo/hello_computer/requirements.txt
+++ b/demo/hello_computer/requirements.txt
@@ -1,4 +1,4 @@
 fastrtc[stopword]
 python-dotenv
-openai
+huggingface_hub>=0.29.0
 twilio
--- a/demo/talk_to_sambanova/app.py
+++ b/demo/talk_to_sambanova/app.py
@@ -4,8 +4,8 @@ import os
 from pathlib import Path
 import gradio as gr
 import huggingface_hub
 import numpy as np
 import openai
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, StreamingResponse
@@ -13,7 +13,6 @@ from fastrtc import (
    AdditionalOutputs,
    ReplyOnPause,
    Stream,
    WebRTCError,
    get_stt_model,
    get_twilio_turn_credentials,
 )
@@ -25,9 +24,9 @@ load_dotenv()
 curr_dir = Path(__file__).parent
-client = openai.OpenAI(
+client = huggingface_hub.InferenceClient(
    api_key=os.environ.get("SAMBANOVA_API_KEY"),
-    base_url="https://api.sambanova.ai/v1",
+    provider="sambanova",
 )
 stt_model = get_stt_model()
@@ -49,10 +48,8 @@ def response(
    conversation_state.append({"role": "user", "content": text})
    raise WebRTCError("test")
    request = client.chat.completions.create(
-        model="Meta-Llama-3.2-3B-Instruct",
+        model="meta-llama/Llama-3.2-3B-Instruct",
        messages=conversation_state,  # type: ignore
        temperature=0.1,
        top_p=0.1,
--- a/demo/talk_to_sambanova/requirements.txt
+++ b/demo/talk_to_sambanova/requirements.txt
@@ -1,4 +1,4 @@
 fastrtc[vad, stt]
 python-dotenv
-openai
+huggingface_hub>=0.29.0
 twilio