Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # Use conversational endpoint | |
| client = InferenceClient("meta-llama/Llama-3.2-1B-Instruct") | |
| def generate(prompt, temperature=0.8, max_tokens=256): | |
| try: | |
| # Use conversational endpoint | |
| messages = [{"role": "user", "content": prompt}] | |
| response = client.chat_completion( | |
| messages=messages, | |
| temperature=temperature, | |
| max_tokens=max_tokens | |
| ) | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| with gr.Blocks(title="amkyaw-coder") as demo: | |
| gr.Markdown("# amkyaw-coder\n🤖 Code Generation Model (via HF Inference)") | |
| with gr.Row(): | |
| with gr.Column(): | |
| prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Enter your prompt here...") | |
| temperature = gr.Slider(0.1, 2.0, value=0.8, step=0.1, label="Temperature") | |
| max_tokens = gr.Slider(32, 512, value=128, step=32, label="Max Tokens") | |
| submit = gr.Button("Generate", variant="primary") | |
| with gr.Column(): | |
| output = gr.Textbox(label="Output", lines=15) | |
| submit.click(generate, inputs=[prompt, temperature, max_tokens], outputs=output) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |