Spaces:

modelsmafia
/

models_mafia_llm

Sleeping

App Files Files Community

modelsmafia commited on Apr 27, 2025

Commit

213e222

1 Parent(s): bda7ad7

api inference

Browse files

Files changed (1) hide show

app.py +32 -15

app.py CHANGED Viewed

@@ -1,26 +1,43 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-# Use InferenceClient to interact with your model through the API
-client = InferenceClient(model="modelsmafia/punjabi_Gemma-2B")
 def chat_with_model(message, history):
     # Format conversation history
-    messages = []
     for h in history:
-        messages.append({"role": "user", "content": h[0]})
-        messages.append({"role": "assistant", "content": h[1]})
-    messages.append({"role": "user", "content": message})
     try:
-        # Generate response using Inference API with correct parameters
-        response = client.text_generation(
-            prompt=message,  # You might need to format this differently
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.9
-        )
-        return response
     except Exception as e:
         return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."

 import gradio as gr
+import requests
+import json
+import os
+# Get API token from environment variable (set this in your Space settings)
+API_TOKEN = os.getenv("HF_API_TOKEN", "")  # Make sure to add your token in Space settings
 def chat_with_model(message, history):
     # Format conversation history
+    full_prompt = ""
     for h in history:
+        full_prompt += f"<start_of_turn>user\n{h[0]}\n<end_of_turn>\n"
+        full_prompt += f"<start_of_turn>model\n{h[1]}\n<end_of_turn>\n"
+    full_prompt += f"<start_of_turn>user\n{message}\n<end_of_turn>\n<start_of_turn>model\n"
     try:
+        headers = {
+            "Authorization": f"Bearer {API_TOKEN}",
+            "Content-Type": "application/json"
+        }
+        payload = {
+            "inputs": full_prompt,
+            "parameters": {
+                "max_new_tokens": 512,
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "do_sample": True
+            }
+        }
+        API_URL = "https://api-inference.huggingface.co/models/modelsmafia/punjabi_Gemma-2B"
+        response = requests.post(API_URL, headers=headers, json=payload)
+        if response.status_code == 200:
+            return response.json()[0]["generated_text"].replace(full_prompt, "")
+        else:
+            return f"Error: Status code {response.status_code}\n{response.text}\n\nYou need to configure your model for inference on Hugging Face."
     except Exception as e:
         return f"Error: {str(e)}\n\nThe model might not be properly configured for inference yet."