Spaces:

bilalnaveed
/

Naveedai

Runtime error

App Files Files Community

Naveedai / app.py

bilalnaveed

Perf: skip regex on every token unless think tag present

b76b9b2 verified about 2 months ago

raw

history blame contribute delete

15.5 kB

	"""
	Naveed AI — Fast Conversational Chat
	Built by Naveed Khan \| Powered by Qwen2.5 \| Free Forever
	"""

	import os
	import re
	import json

	# ── Speed: silence tokenizer noise ────────────────────────────────────────────
	os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")

	# ── Bootstrap ─────────────────────────────────────────────────────────────────
	print("🚀 Starting Naveed AI…", flush=True)

	from config import get_config
	from model_loader import ModelLoader
	from inference import InferenceEngine

	cfg = get_config()

	_loader = ModelLoader(cfg)
	print(f"📦 Loading model: {cfg.model.default_model}", flush=True)
	_loader.load(model_id=cfg.model.default_model, auto_download=True)
	print("✅ Model ready!", flush=True)

	engine = InferenceEngine(_loader, cfg)

	# ── Gradio ─────────────────────────────────────────────────────────────────────
	import gradio as gr

	try:
	GRADIO_MAJOR = int(str(gr.__version__).split(".")[0])
	except Exception:
	GRADIO_MAJOR = 4

	USE_MSG_FMT = GRADIO_MAJOR >= 5 # message-dict format (v5/v6) vs tuple format (v4)
	# Gradio 6 removed type= param — messages format is always the default
	USE_TYPE_PARAM = (4 < GRADIO_MAJOR < 6) # only Gradio 5.x needs explicit type="messages"
	print(f"🎨 Gradio {gr.__version__} — {'messages' if USE_MSG_FMT else 'tuples'} mode", flush=True)


	# ── Helpers ────────────────────────────────────────────────────────────────────
	def _text(content) -> str:
	"""Normalise any Gradio message content → plain string."""
	if content is None:
	return ""
	if isinstance(content, str):
	return content
	if isinstance(content, (int, float, bool)):
	return str(content)
	if isinstance(content, list):
	return " ".join(_text(i) for i in content if _text(i)).strip()
	if isinstance(content, dict):
	return _text(content.get("text") or content.get("content") or "")
	return str(content)


	def _render_think(text: str) -> str:
	"""Convert <think>…</think> blocks into a readable quote."""
	def _block(m):
	thought = m.group(1).strip()
	return f"\n\n💭 Thinking:\n> {thought}\n\n---\n\n"
	return re.sub(r"<think>(.*?)</think>", _block, text, flags=re.DOTALL)


	# ── Chat function ──────────────────────────────────────────────────────────────
	def chat_fn(message: str, history):
	"""
	Real-time streaming chat.
	- history is a list of message-dicts (v5+) or [user, assistant] tuples (v4).
	- Yields updated history after every token so the UI streams live.
	"""
	message = message.strip()
	if not message:
	yield history
	return

	# Build messages list: system + trimmed history + new user turn
	sys_prompt = cfg.conversation.system_prompt
	messages = [{"role": "system", "content": sys_prompt}]

	max_turns = cfg.conversation.max_history_turns
	recent = history[-max_turns:] if len(history) > max_turns else history

	for item in recent:
	if USE_MSG_FMT:
	role = item.get("role", "user")
	content = _text(item.get("content", ""))
	if content and role in ("user", "assistant"):
	messages.append({"role": role, "content": content})
	else:
	u = _text(item[0]) if len(item) > 0 else ""
	a = _text(item[1]) if len(item) > 1 else ""
	if u:
	messages.append({"role": "user", "content": u})
	if a:
	messages.append({"role": "assistant", "content": a})

	messages.append({"role": "user", "content": message})

	# Append user turn to display history
	if USE_MSG_FMT:
	history = list(history) + [{"role": "user", "content": message}]
	else:
	history = list(history) + [[message, None]]

	# Stream assistant reply token-by-token
	response = ""
	try:
	for token in engine.chat_generate(messages, stream=True):
	response += token
	# Only run regex if <think> tag is present (avoids regex overhead on every token)
	rendered = _render_think(response) if "<think>" in response else response
	if USE_MSG_FMT:
	yield history + [{"role": "assistant", "content": rendered}]
	else:
	yield history[:-1] + [[message, rendered]]
	except Exception as exc:
	err = f"⚠️ Sorry, something went wrong: {exc}"
	if USE_MSG_FMT:
	yield history + [{"role": "assistant", "content": err}]
	else:
	yield history[:-1] + [[message, err]]
	return

	# Commit final clean turn
	final = _render_think(response) if "<think>" in response else response
	if USE_MSG_FMT:
	yield history + [{"role": "assistant", "content": final}]
	else:
	yield history[:-1] + [[message, final]]


	# ── CSS ────────────────────────────────────────────────────────────────────────
	CSS = """
	.gradio-container {
	max-width: 860px !important;
	margin: auto !important;
	font-family: 'Inter', 'Segoe UI', system-ui, sans-serif !important;
	}
	footer { display: none !important; }

	.header-wrap {
	background: linear-gradient(135deg, #0f0c29 0%, #302b63 50%, #24243e 100%);
	border-radius: 16px;
	padding: 24px 32px 20px;
	margin-bottom: 12px;
	text-align: center;
	border: 1px solid #7c3aed33;
	}
	.header-wrap h1 {
	background: linear-gradient(135deg, #a78bfa, #60a5fa, #34d399);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-size: 2.3em;
	margin: 0 0 4px;
	font-weight: 800;
	letter-spacing: -0.6px;
	}
	.header-wrap p { color: #94a3b8; margin: 0; font-size: 0.96em; }

	.dot {
	display: inline-block;
	width: 8px; height: 8px;
	border-radius: 50%;
	background: #22c55e;
	margin-right: 6px;
	vertical-align: middle;
	animation: blink 2s infinite;
	}
	@keyframes blink { 0%,100%{opacity:1;} 50%{opacity:.35;} }

	#send-btn {
	background: #111827 !important;
	color: #fff !important;
	border-radius: 10px !important;
	font-weight: 600 !important;
	min-height: 48px !important;
	}
	#send-btn:hover { background: #1f2937 !important; }
	#clear-btn { border-radius: 10px !important; }

	/* API snippet accordion */
	.api-accordion {
	margin-top: 10px;
	border: 1px solid #334155 !important;
	border-radius: 12px !important;
	background: #0f172a !important;
	}
	.api-accordion .label-wrap span {
	font-size: 0.9em !important;
	color: #94a3b8 !important;
	font-weight: 500 !important;
	}
	.api-tabs .tab-nav button {
	font-size: 0.82em !important;
	padding: 4px 10px !important;
	}
	.api-code pre {
	font-size: 0.82em !important;
	border-radius: 8px !important;
	background: #1e293b !important;
	}
	"""


	# ── Gradio UI ──────────────────────────────────────────────────────────────────
	with gr.Blocks(title="Naveed AI") as demo:

	# ── Header ──────────────────────────────────────────────────────────────
	gr.HTML("""
	<div class="header-wrap">
	<h1>🧠 Naveed AI</h1>
	<p>
	<span class="dot"></span>
	Built by <strong style="color:#c4b5fd">Naveed Khan</strong>
	·  Smart · Fast · Free Forever
	</p>
	</div>
	""")

	# ── Chatbot ─────────────────────────────────────────────────────────────
	bot_kwargs = dict(
	value=[],
	show_label=False,
	height=520,
	)
	if USE_TYPE_PARAM:
	bot_kwargs["type"] = "messages"
	chatbot = gr.Chatbot(**bot_kwargs)

	# ── Input row ───────────────────────────────────────────────────────────
	with gr.Row():
	msg = gr.Textbox(
	placeholder="Message Naveed AI…",
	show_label=False,
	scale=9,
	container=False,
	autofocus=True,
	lines=1,
	max_lines=6,
	elem_id="msg-input",
	)
	send = gr.Button("Send ➤", elem_id="send-btn", scale=1, min_width=90)

	# Rotating placeholder via JS injected into page
	gr.HTML("""
	<script>
	(function() {
	const hints = [
	"Message Naveed AI…",
	"Ask me anything…",
	"What\u2019s on your mind?",
	"Try: \"Explain AI in simple words\"",
	"Try: \"Write me a Python script\"",
	"Try: \"Give me a workout plan\"",
	"Try: \"Help me write a cover letter\"",
	"Try: \"What is quantum computing?\"",
	"Try: \"Is this news headline real?\"",
	];
	let i = 0;
	function rotatePlaceholder() {
	const input = document.querySelector('#msg-input textarea');
	if (input && !input.value) {
	i = (i + 1) % hints.length;
	input.setAttribute('placeholder', hints[i]);
	}
	}
	setInterval(rotatePlaceholder, 3000);
	})();
	</script>
	""")

	# ── Controls ────────────────────────────────────────────────────────────
	with gr.Row():
	clear = gr.Button("🗑 Clear chat", variant="secondary",
	size="sm", elem_id="clear-btn", scale=1)
	gr.HTML('<div style="flex:4"></div>') # spacer

	# ── Starter examples ────────────────────────────────────────────────────
	gr.Examples(
	label="💡 Try asking…",
	examples=[
	["Who are you and what can you do?"],
	["Explain quantum computing like I'm 10 years old"],
	["Write a Python script to fetch today's Bitcoin price"],
	["Give me a 7-day workout plan for a beginner"],
	["Help me write a resignation letter — professional but friendly"],
	["I heard coffee causes cancer. Is that true?"],
	["What are the top 5 habits of highly successful people?"],
	["Tell me something fascinating about the universe"],
	],
	inputs=msg,
	)
	# ── API code snippets (collapsible) ─────────────────────────────────────────────────
	with gr.Accordion("🔌 Use Naveed AI in your own app — free API", open=False,
	elem_classes=["api-accordion"]):
	with gr.Tabs(elem_classes=["api-tabs"]):
	with gr.TabItem("🐍 Python"):
	gr.Code(
	value='''from gradio_client import Client

	client = Client("bilalnaveed/Naveedai")

	# Single message
	result = client.predict(
	message="What are the top habits of successful people?",
	history=[],
	api_name="/chat_fn"
	)
	print(result[-1]["content"]) # last assistant reply

	# Multi-turn conversation
	history = []
	def ask(msg):
	global history
	history = client.predict(message=msg, history=history, api_name="/chat_fn")
	last = history[-1]
	return last.get("content", "") if isinstance(last, dict) else last[1]

	print(ask("Hello!"))
	print(ask("Tell me a fun science fact"))
	''',
	language="python",
	elem_classes=["api-code"],
	interactive=False,
	)
	with gr.TabItem("🌐 JavaScript"):
	gr.Code(
	value='''// Works in browser or Node.js — no API key needed
	async function askNaveedAI(message, history = []) {
	const res = await fetch(
	"https://bilalnaveed-naveedai.hf.space/run/predict",
	{
	method: "POST",
	headers: { "Content-Type": "application/json" },
	body: JSON.stringify({ data: [message, history] }),
	}
	);
	const json = await res.json();
	const updatedHistory = json.data[0]; // full history array
	const lastMsg = updatedHistory[updatedHistory.length - 1];
	return typeof lastMsg === "object" && lastMsg.content
	? lastMsg.content
	: lastMsg[1]; // tuple fallback
	}

	// Usage
	askNaveedAI("Explain machine learning in 2 sentences").then(console.log);
	''',
	language="javascript",
	elem_classes=["api-code"],
	interactive=False,
	)
	with gr.TabItem("💻 cURL"):
	gr.Code(
	value='''# Call from any terminal — completely free
	curl -X POST "https://bilalnaveed-naveedai.hf.space/run/predict" \\
	-H "Content-Type: application/json" \\
	-d \'{"data": ["Who are you?", []]}\'\n''',
	language="shell",
	elem_classes=["api-code"],
	interactive=False,
	)
	# ── Events ──────────────────────────────────────────────────────────────
	send.click(chat_fn, [msg, chatbot], [chatbot]).then(
	lambda: gr.update(value=""), None, [msg]
	)
	msg.submit(chat_fn, [msg, chatbot], [chatbot]).then(
	lambda: gr.update(value=""), None, [msg]
	)
	clear.click(lambda: [], None, [chatbot])


	# ── Launch ─────────────────────────────────────────────────────────────────────
	demo.queue(max_size=15).launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", 7860)),
	show_error=True,
	quiet=False,
	theme=gr.themes.Soft(
	primary_hue="violet",
	secondary_hue="blue",
	neutral_hue="slate",
	font=gr.themes.GoogleFont("Inter"),
	),
	css=CSS,
	)