Spaces:
Running
Running
jefffffff9 Claude Opus 4.7 commited on
Commit ·
757e833
1
Parent(s): 9e99c2c
Forbid parenthetical glosses in LLM replies
Browse filesThe model was appending English literal-translation annotations (e.g.
"On walli e jam? (Lit: Did you sleep in peace?)") which then got read
aloud by TTS as gibberish. Two-layer fix:
1. System prompt now explicitly bans "(Lit: ...)", "(meaning ...)",
and any English/French parenthetical aside, and demands 100%
target-language output.
2. Defensive client-side regex strips any "(Lit/Literal/Meaning/
Translation/English/French ...)" parenthetical the model emits
anyway before returning the string to TTS.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
- src/llm/minimal_client.py +17 -1
src/llm/minimal_client.py
CHANGED
|
@@ -101,7 +101,11 @@ def _build_system_prompt(
|
|
| 101 |
"translate what they said back to them.",
|
| 102 |
"",
|
| 103 |
"Output format: plain natural text only. No JSON, no code fences, no "
|
| 104 |
-
"markdown, no translations, no romanisation, no explanations
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
"1–3 short sentences suitable to be read aloud by a text-to-speech voice.",
|
| 106 |
]
|
| 107 |
|
|
@@ -216,6 +220,18 @@ class MinimalClient:
|
|
| 216 |
first, rest = raw.split("\n", 1)
|
| 217 |
if len(first) < 20 and " " not in first:
|
| 218 |
raw = rest.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
return raw
|
| 220 |
except Exception as exc: # pragma: no cover — surfaced to UI
|
| 221 |
logger.error("MinimalClient error: %s", exc)
|
|
|
|
| 101 |
"translate what they said back to them.",
|
| 102 |
"",
|
| 103 |
"Output format: plain natural text only. No JSON, no code fences, no "
|
| 104 |
+
"markdown, no translations, no romanisation, no explanations, and "
|
| 105 |
+
"ABSOLUTELY no parenthetical glosses, literal translations, or "
|
| 106 |
+
"English/French annotations of any kind (do NOT write things like "
|
| 107 |
+
"'(Lit: ...)', '(meaning ...)', or any '(English ...)' aside). The "
|
| 108 |
+
f"output must be 100% {full} characters and punctuation only. Reply in "
|
| 109 |
"1–3 short sentences suitable to be read aloud by a text-to-speech voice.",
|
| 110 |
]
|
| 111 |
|
|
|
|
| 220 |
first, rest = raw.split("\n", 1)
|
| 221 |
if len(first) < 20 and " " not in first:
|
| 222 |
raw = rest.strip()
|
| 223 |
+
# Defensive: strip parenthetical English/French glosses the model
|
| 224 |
+
# sometimes appends despite the prompt — e.g. "Foo bar (Lit: ...)".
|
| 225 |
+
# We only strip parentheticals that LOOK like glosses (start with
|
| 226 |
+
# Lit/Literal/Meaning/Translation/English/French, or contain ≥3
|
| 227 |
+
# consecutive ASCII letters that aren't part of the target script).
|
| 228 |
+
import re as _re
|
| 229 |
+
raw = _re.sub(
|
| 230 |
+
r"\s*\((?:lit\.?|literal(?:ly)?|meaning|translation|english|french|fr|en)[^)]*\)",
|
| 231 |
+
"",
|
| 232 |
+
raw,
|
| 233 |
+
flags=_re.IGNORECASE,
|
| 234 |
+
).strip()
|
| 235 |
return raw
|
| 236 |
except Exception as exc: # pragma: no cover — surfaced to UI
|
| 237 |
logger.error("MinimalClient error: %s", exc)
|