From c6769fe33f8c705a7ec7d3ab2b7836f60d142432 Mon Sep 17 00:00:00 2001
From: Lucain <lucainp@gmail.com>
Date: Wed, 26 Feb 2025 16:06:17 +0100
Subject: [PATCH] Use huggingface_hub.InferenceClient instead of openai for
 Sambanova client (#79)

---
 demo/hello_computer/app.py              |  8 ++++----
 demo/hello_computer/requirements.txt    |  2 +-
 demo/talk_to_sambanova/app.py           | 11 ++++-------
 demo/talk_to_sambanova/requirements.txt |  2 +-
 4 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/demo/hello_computer/app.py b/demo/hello_computer/app.py
index 9143c12..6a60496 100644
--- a/demo/hello_computer/app.py
+++ b/demo/hello_computer/app.py
@@ -4,8 +4,8 @@ import os
 from pathlib import Path
 
 import gradio as gr
+import huggingface_hub
 import numpy as np
-import openai
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, StreamingResponse
@@ -24,9 +24,9 @@ load_dotenv()
 curr_dir = Path(__file__).parent
 
 
-client = openai.OpenAI(
+client = huggingface_hub.InferenceClient(
     api_key=os.environ.get("SAMBANOVA_API_KEY"),
-    base_url="https://api.sambanova.ai/v1",
+    provider="sambanova",
 )
 model = get_stt_model()
 
@@ -49,7 +49,7 @@ def response(
     conversation_state.append({"role": "user", "content": text})
 
     request = client.chat.completions.create(
-        model="Meta-Llama-3.2-3B-Instruct",
+        model="meta-llama/Llama-3.2-3B-Instruct",
         messages=conversation_state,  # type: ignore
         temperature=0.1,
         top_p=0.1,
diff --git a/demo/hello_computer/requirements.txt b/demo/hello_computer/requirements.txt
index c0920dd..d17d5a3 100644
--- a/demo/hello_computer/requirements.txt
+++ b/demo/hello_computer/requirements.txt
@@ -1,4 +1,4 @@
 fastrtc[stopword]
 python-dotenv
-openai
+huggingface_hub>=0.29.0
 twilio
\ No newline at end of file
diff --git a/demo/talk_to_sambanova/app.py b/demo/talk_to_sambanova/app.py
index 3c2bb1d..2faabae 100644
--- a/demo/talk_to_sambanova/app.py
+++ b/demo/talk_to_sambanova/app.py
@@ -4,8 +4,8 @@ import os
 from pathlib import Path
 
 import gradio as gr
+import huggingface_hub
 import numpy as np
-import openai
 from dotenv import load_dotenv
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse, StreamingResponse
@@ -13,7 +13,6 @@ from fastrtc import (
     AdditionalOutputs,
     ReplyOnPause,
     Stream,
-    WebRTCError,
     get_stt_model,
     get_twilio_turn_credentials,
 )
@@ -25,9 +24,9 @@ load_dotenv()
 curr_dir = Path(__file__).parent
 
 
-client = openai.OpenAI(
+client = huggingface_hub.InferenceClient(
     api_key=os.environ.get("SAMBANOVA_API_KEY"),
-    base_url="https://api.sambanova.ai/v1",
+    provider="sambanova",
 )
 stt_model = get_stt_model()
 
@@ -49,10 +48,8 @@ def response(
 
     conversation_state.append({"role": "user", "content": text})
 
-    raise WebRTCError("test")
-
     request = client.chat.completions.create(
-        model="Meta-Llama-3.2-3B-Instruct",
+        model="meta-llama/Llama-3.2-3B-Instruct",
         messages=conversation_state,  # type: ignore
         temperature=0.1,
         top_p=0.1,
diff --git a/demo/talk_to_sambanova/requirements.txt b/demo/talk_to_sambanova/requirements.txt
index 36f0d00..5642a08 100644
--- a/demo/talk_to_sambanova/requirements.txt
+++ b/demo/talk_to_sambanova/requirements.txt
@@ -1,4 +1,4 @@
 fastrtc[vad, stt]
 python-dotenv
-openai
+huggingface_hub>=0.29.0
 twilio
\ No newline at end of file