Spaces:

blaxx14
/

MCG-API

Sleeping

App Files Files Community

blaxx14 commited on Mar 19, 2025

Commit

80c9d39

1 Parent(s): 0099550

create api

Browse files

Files changed (2) hide show

app.py +153 -0
requirements.txt +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,153 @@

+from flask import Flask, request, jsonify
+import tensorflow as tf
+from transformers import AutoTokenizer, TFT5ForConditionalGeneration
+from transformers import MBartForConditionalGeneration, MBart50Tokenizer
+import os
+import re
+import spacy
+from nltk.corpus import wordnet as wn
+import random
+import nltk
+nltk.download('wordnet')
+nlp = spacy.load("en_core_web_sm")
+app = Flask(__name__)
+# Model uploaded configuration
+LOCAL_QG_MODEL_PATH = "blaxx14/t5-question-generation"
+"""string into dictionary"""
+def parse_to_dict(input_string):
+    try:
+        question_part, answer_part = input_string.split('Answer: ')
+        question = question_part.replace('Question: ', '').strip()
+        answer = answer_part.strip()
+        result_dict = {
+            "Question": question,
+            "Answer": answer
+        }
+        return result_dict
+    except ValueError:
+        print("Format input string tidak sesuai")
+        return None
+"""Find sinonim"""
+def get_synonyms(word):
+    synonyms = set()
+    for syn in wn.synsets(word):
+        for lemma in syn.lemmas():
+            synonyms.add(lemma.name())
+    return list(synonyms)
+"""Create distractor"""
+def generate_distractors(question, correct_answer):
+    doc = nlp(question)
+    keywords = [token.text for token in doc if token.pos_ in ['NOUN', 'PROPN']]
+    distractors = []
+    for keyword in keywords:
+        synonyms = get_synonyms(keyword)
+        synonyms = [word for word in synonyms if word.lower() != correct_answer.lower()]
+        distractors.extend(synonyms)
+    distractors = random.sample(distractors, min(3, len(distractors)))
+    return distractors
+"""Load question generator model and tokenizer"""
+print("Loading model...")
+model = TFT5ForConditionalGeneration.from_pretrained(LOCAL_QG_MODEL_PATH, from_pt=False)
+tokenizer = AutoTokenizer.from_pretrained("t5-small")
+print("Model loaded successfully.")
+"""Function for generate question"""
+def generate_question(text, max_length=4096):
+    input_text = f"Generate question answer: {text}"
+    input_ids = tokenizer.encode(input_text, return_tensors="tf", max_length=512, truncation=True)
+    output = model.generate(
+        input_ids,
+        max_length=max_length,
+        num_beams=10,
+        top_k=0,
+        top_p=0.8,
+        temperature=1.5,
+        do_sample=True,
+        early_stopping=True
+    )
+    output_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    return output_text
+"""Cleaning input"""
+def clean_text(text):
+    cleaned_text = text.replace("translit.", "")
+    cleaned_text = re.sub(r'\[.*?\]', '', cleaned_text)
+    return cleaned_text
+def split_text_into_sentences(paragraph):
+    text = clean_text(paragraph)
+    sentences = re.split(r'(?<=[.?!])\s+', text)
+    return sentences
+def split_into_parts(sentences, num_parts=5):
+    if len(sentences) <= num_parts:
+        return sentences
+    else:
+        part_size = len(sentences) // num_parts
+        parts = [sentences[i:i + part_size] for i in range(0, len(sentences), part_size)]
+        if len(parts) > num_parts:
+            parts[-2].extend(parts[-1])
+            parts = parts[:-1]
+        return parts
+"""Route for run generator and save the results in cloud"""
+@app.route('/generate-question', methods=['POST'])
+def api_generate_question():
+    try:
+        data = request.json
+        text = data.get('text', '')
+        if not text:
+            return jsonify({'error': 'Text tidak boleh kosong'}), 400
+        """Run cleaning input"""
+        formatted_sentences = split_text_into_sentences(text)
+        parts = split_into_parts(formatted_sentences)
+        """Just for checking"""
+        #print(parts)
+        """Generate question"""
+        question_list = []
+        for sentence in parts:
+            combined_input = ' '.join(sentence)
+            result = generate_question(combined_input)
+            result_dict = parse_to_dict(result)
+            # print(result_dict)
+            distractors = generate_distractors(result_dict["Question"], result_dict["Answer"])
+            result_dict["distractor"] = distractors
+            question_list.append(result_dict)
+        print(question_list)
+        return jsonify({'generated_question': question_list})
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=8080)

requirements.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+absl-py==2.1.0
+annotated-types==0.7.0
+astunparse==1.6.3
+blinker==1.9.0
+blis==1.0.2
+cachetools==5.5.0
+catalogue==2.0.10
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+cloudpathlib==0.20.0
+colorama==0.4.6
+confection==0.1.5
+cymem==2.0.10
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
+filelock==3.16.1
+Flask==3.1.0
+flatbuffers==24.3.25
+fsspec==2024.10.0
+gast==0.6.0
+# google-api-core==2.24.0
+# google-auth==2.36.0
+# google-cloud-core==2.4.1
+# google-cloud-firestore==2.19.0
+# google-pasta==0.2.0
+# googleapis-common-protos==1.66.0
+grpcio==1.68.1
+grpcio-status==1.68.1
+h5py==3.12.1
+huggingface-hub==0.26.5
+idna==3.10
+itsdangerous==2.2.0
+Jinja2==3.1.4
+joblib==1.4.2
+keras==3.7.0
+langcodes==3.5.0
+language_data==1.3.0
+libclang==18.1.1
+marisa-trie==1.2.1
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==3.0.2
+mdurl==0.1.2
+ml-dtypes==0.4.1
+mpmath==1.3.0
+murmurhash==1.0.11
+namex==0.0.8
+networkx==3.4.2
+nltk==3.9.1
+numpy==2.0.2
+opt_einsum==3.4.0
+optree==0.13.1
+packaging==24.2
+preshed==3.0.9
+proto-plus==1.25.0
+protobuf==5.29.1
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.10.3
+pydantic_core==2.27.1
+Pygments==2.18.0
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+rich==13.9.4
+rsa==4.9
+safetensors==0.4.5
+sentencepiece==0.2.0
+shellingham==1.5.4
+six==1.17.0
+smart-open==7.0.5
+spacy==3.8.2
+spacy-legacy==3.0.12
+spacy-loggers==1.0.5
+srsly==2.5.0
+sympy==1.13.1
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+tensorflow==2.18.0
+tensorflow-io-gcs-filesystem==0.31.0
+tensorflow_intel==2.18.0
+termcolor==2.5.0
+tf_keras==2.18.0
+thinc==8.3.2
+tiktoken==0.8.0
+tokenizers==0.21.0
+torch==2.5.1
+tqdm==4.67.1
+transformers==4.47.0
+typer==0.15.1
+typing_extensions==4.12.2
+urllib3==2.2.3
+wasabi==1.1.3
+weasel==0.4.1
+Werkzeug==3.1.3
+wrapt==1.17.0