Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from threading import Thread | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer | |
| import torch | |
| import os | |
| # --- 配置 --- | |
| MODEL_ID = "Qwen/Qwen3-0.6B-Base" | |
| # ❗ 1. 定义系统提示词 | |
| SYSTEM_PROMPT = """As a helper/no_think | |
| """ | |
| # --- 加载模型和分词器 --- | |
| print("开始加载模型和分词器...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype="auto", | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| print("模型和分词器加载成功!") | |
| except Exception as e: | |
| print(f"模型加载失败: {e}") | |
| raise gr.Error(f"关键错误:无法加载模型 {MODEL_ID}。错误信息: {e}") | |
| # --- 核心对话函数 --- | |
| def predict(message, history): | |
| messages = [] | |
| # ❗ 2. 在消息列表的开头添加系统提示词 | |
| messages.append({"role": "system", "content": SYSTEM_PROMPT}) | |
| # # 3. 添加历史对话记录 | |
| # for turn in history: | |
| # user_msg, assistant_msg = turn | |
| # messages.append({"role": "user", "content": user_msg}) | |
| # messages.append({"role": "assistant", "content": assistant_msg}) | |
| # 4. 添加当前的用户消息 | |
| messages.append({"role": "user", "content": message}) | |
| # 使用 tokenizer.apply_chat_template 将消息列表转换为模型输入 | |
| model_inputs = tokenizer.apply_chat_template( | |
| messages, | |
| add_generation_prompt=True, | |
| return_tensors="pt" | |
| ).to(model.device) | |
| streamer = TextIteratorStreamer(tokenizer, timeout=300.0, skip_prompt=True, skip_special_tokens=True) | |
| generation_kwargs = dict( | |
| inputs=model_inputs, | |
| streamer=streamer, | |
| max_new_tokens=2048, | |
| do_sample=True, | |
| temperature=0.4, | |
| top_p=0.95, | |
| ) | |
| thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
| thread.start() | |
| full_response = "" | |
| for new_text in streamer: | |
| full_response += new_text | |
| yield full_response | |
| # --- 创建并启动Gradio界面 --- | |
| # 已移除 examples 和 cache_examples 参数来修复点击示例时报错的问题 | |
| demo = gr.ChatInterface( | |
| fn=predict | |
| ) | |
| if __name__ == "__main__": | |
| # 使用 share=True 来允许跨域 WebSocket 连接 | |
| demo.queue().launch(share=True) |