Spaces:
Running
Running
| """ | |
| Myanmar LLM Gradio App - Lite Version | |
| Model: amkyawdev/mm-llm-tiny | |
| """ | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| MODEL_NAME = "amkyawdev/mm-llm-tiny" | |
| print(f"Loading {MODEL_NAME}...") | |
| # Load tokenizer only first (saves memory) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Model loads on first request (lazy load) | |
| model = None | |
| def get_model(): | |
| global model | |
| if model is None: | |
| print("Loading model...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float32, | |
| low_cpu_mem_usage=True | |
| ) | |
| model.eval() | |
| print("Model loaded!") | |
| return model | |
| def generate(prompt, max_tokens=128, temp=0.7): | |
| m = get_model() | |
| inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256) | |
| with torch.no_grad(): | |
| outputs = m.generate( | |
| **inputs, | |
| max_new_tokens=int(max_tokens), | |
| temperature=temp, | |
| do_sample=temp > 0, | |
| ) | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return response[len(prompt):].strip() | |
| # UI | |
| with gr.Blocks(title="Myanmar LLM") as app: | |
| gr.Markdown("# π²π² Myanmar LLM") | |
| gr.Markdown("Model: **amkyawdev/mm-llm-tiny**") | |
| with gr.Row(): | |
| msg = gr.Textbox(label="Message", placeholder="αα±αΈαα½ααΊαΈαα±αΈαα¬αΈαα«α...") | |
| output = gr.Textbox(label="Response") | |
| with gr.Row(): | |
| max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens") | |
| temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature") | |
| btn = gr.Button("Generate") | |
| btn.click( | |
| generate, | |
| inputs=[msg, max_tokens, temp], | |
| outputs=output | |
| ) | |
| gr.Examples( | |
| examples=[ | |
| ["Hello ααΌααΊαα¬ααα― ααΌααΊαα«α", 64, 0.7], | |
| ["Python αα²α· list αα±αΈαα«α", 128, 0.7], | |
| ], | |
| inputs=[msg, max_tokens, temp] | |
| ) | |
| app.launch(share=True) |