amkyawdev commited on
Commit
835f437
Β·
verified Β·
1 Parent(s): 41fcec0

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +52 -136
app.py CHANGED
@@ -1,163 +1,79 @@
1
  """
2
- Myanmar LLM Gradio App
3
  Model: amkyawdev/mm-llm-tiny
4
  """
5
 
6
- import os
7
- import torch
8
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, AutoConfig
9
  import gradio as gr
 
 
10
 
11
- # Model name
12
  MODEL_NAME = "amkyawdev/mm-llm-tiny"
13
 
14
- # Load model and tokenizer
15
- print(f"Loading model: {MODEL_NAME}...")
16
-
17
- try:
18
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
-
20
- # Try with device_map first
21
- model = AutoModelForCausalLM.from_pretrained(
22
- MODEL_NAME,
23
- torch_dtype=torch.float16,
24
- device_map="auto",
25
- low_cpu_mem_usage=True
26
- )
27
-
28
- print("Model loaded on GPU!")
29
-
30
- except Exception as e:
31
- print(f"GPU failed: {e}, trying CPU...")
32
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
33
- model = AutoModelForCausalLM.from_pretrained(
34
- MODEL_NAME,
35
- torch_dtype=torch.float32,
36
- low_cpu_mem_usage=True
37
- )
38
- model = model.to("cpu")
39
- print("Model loaded on CPU!")
40
 
41
- # Create pipeline
42
- pipe = pipeline(
43
- "text-generation",
44
- model=model,
45
- tokenizer=tokenizer,
46
- max_new_tokens=256,
47
- temperature=0.7,
48
- top_p=0.95,
49
- )
50
 
51
- print("Model loaded!")
 
52
 
53
- # System prompts
54
- SYSTEM_PROMPTS = {
55
- "General": "α€žα€„α€Ία€žα€Šα€Ί α€™α€Όα€”α€Ία€™α€¬α€…α€¬α€€α€»α€½α€™α€Ία€Έα€€α€»α€„α€Ία€žα€±α€¬ AI ဖြစ်ပါ။",
56
- "Code Expert": "α€žα€„α€Ία€žα€Šα€Ί α€•α€›α€­α€―α€‚α€›α€™α€Ία€›α€±α€Έα€žα€¬α€Έα€α€²α€· α€€α€»α€½α€™α€Ία€Έα€€α€»α€„α€Ία€žα€±α€¬ AI ဖြစ်ပါ။",
57
- "Translator": "α€žα€„α€Ία€žα€Šα€Ί α€˜α€¬α€žα€¬α€•α€Όα€”α€Ία€€α€»α€½α€™α€Ία€Έα€€α€»α€„α€Ία€žα€±α€¬ AI ဖြစ်ပါ။",
58
- }
 
 
 
 
 
 
59
 
60
- def generate_response(message, history, system_prompt, max_tokens, temperature, top_p):
61
- """Generate response"""
62
-
63
- # Build prompt
64
- prompt = f"System: {system_prompt}\n\n"
65
 
66
- for user_msg, bot_msg in history:
67
- prompt += f"User: {user_msg}\n\nAssistant: {bot_msg}\n\n"
68
 
69
- prompt += f"User: {message}\n\nAssistant:"
70
-
71
- # Generate
72
- output = pipe(
73
- prompt,
74
- max_new_tokens=int(max_tokens),
75
- temperature=temperature,
76
- top_p=top_p,
77
- do_sample=temperature > 0,
78
- )
79
-
80
- response = output[0]["generated_text"]
81
- # Remove prompt from response
82
- response = response[len(prompt):].strip()
83
 
84
- return response
 
85
 
86
- # Build UI
87
  with gr.Blocks(title="Myanmar LLM") as app:
88
-
89
- gr.Markdown("# πŸ‡²πŸ‡² Myanmar LLM Chat")
90
  gr.Markdown("Model: **amkyawdev/mm-llm-tiny**")
91
 
92
  with gr.Row():
93
- with gr.Column(scale=3):
94
- chatbot = gr.Chatbot(height=500)
95
-
96
- with gr.Row():
97
- msg = gr.Textbox(
98
- label="Message",
99
- placeholder="α€™α€±α€Έα€α€½α€”α€Ία€Έα€›α€±α€Έα€žα€¬α€Έα€•α€«α‹...",
100
- lines=3
101
- )
102
-
103
- with gr.Row():
104
- submit = gr.Button("πŸ“€ ပို့ပါ။", variant="primary")
105
- clear = gr.Button("πŸ—‘οΈ α€žα€”α€·α€Ία€›α€Ύα€„α€Ία€Έα€•α€«α‹")
106
-
107
- gr.Examples(
108
- examples=[
109
- ["α€™α€„α€Ία€Ήα€‚α€œα€¬α€•οΏ½α‹"],
110
- ["Python α€”α€²α€· Fibonacci ရေးပါ။"],
111
- ["Hello α€€α€­α€― α€™α€Όα€”α€Ία€™α€¬α€œα€­α€― ပြန်ပါ။"],
112
- ],
113
- inputs=msg
114
- )
115
-
116
- with gr.Column(scale=1):
117
- gr.Markdown("### βš™οΈ Settings")
118
-
119
- system_prompt = gr.Dropdown(
120
- choices=list(SYSTEM_PROMPTS.keys()),
121
- value="General",
122
- label="System Prompt"
123
- )
124
-
125
- max_tokens = gr.Slider(
126
- minimum=50, maximum=512, value=256, step=10,
127
- label="Max Tokens"
128
- )
129
-
130
- temperature = gr.Slider(
131
- minimum=0.1, maximum=1.5, value=0.7, step=0.1,
132
- label="Temperature"
133
- )
134
-
135
- top_p = gr.Slider(
136
- minimum=0.5, maximum=1.0, value=0.95, step=0.05,
137
- label="Top-p"
138
- )
139
 
140
- def respond(message, history, system_prompt, max_tokens, temperature, top_p):
141
- response = generate_response(
142
- message, history,
143
- SYSTEM_PROMPTS[system_prompt],
144
- max_tokens, temperature, top_p
145
- )
146
- history.append((message, response))
147
- return "", history
148
 
149
- submit.click(
150
- respond,
151
- inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
152
- outputs=[msg, chatbot]
153
- )
154
 
155
- msg.submit(
156
- respond,
157
- inputs=[msg, chatbot, system_prompt, max_tokens, temperature, top_p],
158
- outputs=[msg, chatbot]
159
  )
160
 
161
- clear.click(lambda: (None, []), outputs=[msg, chatbot])
 
 
 
 
 
 
162
 
163
  app.launch(share=True)
 
1
  """
2
+ Myanmar LLM Gradio App - Lite Version
3
  Model: amkyawdev/mm-llm-tiny
4
  """
5
 
 
 
 
6
  import gradio as gr
7
+ import torch
8
+ from transformers import AutoModelForCausalLM, AutoTokenizer
9
 
 
10
  MODEL_NAME = "amkyawdev/mm-llm-tiny"
11
 
12
+ print(f"Loading {MODEL_NAME}...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Load tokenizer only first (saves memory)
15
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
+ tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
17
 
18
+ # Model loads on first request (lazy load)
19
+ model = None
20
 
21
+ def get_model():
22
+ global model
23
+ if model is None:
24
+ print("Loading model...")
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ MODEL_NAME,
27
+ torch_dtype=torch.float32,
28
+ low_cpu_mem_usage=True
29
+ )
30
+ model.eval()
31
+ print("Model loaded!")
32
+ return model
33
 
34
+ def generate(prompt, max_tokens=128, temp=0.7):
35
+ m = get_model()
 
 
 
36
 
37
+ inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=256)
 
38
 
39
+ with torch.no_grad():
40
+ outputs = m.generate(
41
+ **inputs,
42
+ max_new_tokens=int(max_tokens),
43
+ temperature=temp,
44
+ do_sample=temp > 0,
45
+ )
 
 
 
 
 
 
 
46
 
47
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
+ return response[len(prompt):].strip()
49
 
50
+ # UI
51
  with gr.Blocks(title="Myanmar LLM") as app:
52
+ gr.Markdown("# πŸ‡²πŸ‡² Myanmar LLM")
 
53
  gr.Markdown("Model: **amkyawdev/mm-llm-tiny**")
54
 
55
  with gr.Row():
56
+ msg = gr.Textbox(label="Message", placeholder="α€™α€±α€Έα€α€½α€”α€Ία€Έα€›α€±α€Έα€žα€¬α€Έα€•α€«α‹...")
57
+ output = gr.Textbox(label="Response")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ with gr.Row():
60
+ max_tokens = gr.Slider(32, 256, value=128, step=16, label="Max Tokens")
61
+ temp = gr.Slider(0.1, 1.0, value=0.7, label="Temperature")
 
 
 
 
 
62
 
63
+ btn = gr.Button("Generate")
 
 
 
 
64
 
65
+ btn.click(
66
+ generate,
67
+ inputs=[msg, max_tokens, temp],
68
+ outputs=output
69
  )
70
 
71
+ gr.Examples(
72
+ examples=[
73
+ ["Hello α€™α€Όα€”α€Ία€™α€¬α€œα€­α€― ပြန်ပါ။", 64, 0.7],
74
+ ["Python α€”α€²α€· list ရေးပါ။", 128, 0.7],
75
+ ],
76
+ inputs=[msg, max_tokens, temp]
77
+ )
78
 
79
  app.launch(share=True)