ABDALLALSWAITI commited on
Commit
4ea4dba
Β·
verified Β·
1 Parent(s): a48a425

Upload streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +386 -0
src/streamlit_app.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import subprocess
3
+ import os
4
+ import tempfile
5
+ import shutil
6
+ from pathlib import Path
7
+ import base64
8
+ import re
9
+
10
+ st.set_page_config(
11
+ page_title="HTML to PDF Converter",
12
+ page_icon="πŸ“„",
13
+ layout="wide"
14
+ )
15
+
16
+ def detect_aspect_ratio(html_content):
17
+ """
18
+ Detect aspect ratio from HTML content
19
+ Returns: "16:9", "1:1", or "9:16"
20
+ """
21
+ # Check for viewport meta tag
22
+ viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
23
+ if viewport_match:
24
+ viewport = viewport_match.group(1).lower()
25
+ if 'width=device-width' in viewport or 'width=100%' in viewport:
26
+ # Check for orientation hints
27
+ if 'orientation=portrait' in viewport:
28
+ return "9:16"
29
+ elif 'orientation=landscape' in viewport:
30
+ return "16:9"
31
+
32
+ # Check for CSS aspect-ratio property
33
+ aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
34
+ if aspect_match:
35
+ width = int(aspect_match.group(1))
36
+ height = int(aspect_match.group(2))
37
+ ratio = width / height
38
+ if ratio > 1.5:
39
+ return "16:9"
40
+ elif ratio < 0.7:
41
+ return "9:16"
42
+ else:
43
+ return "1:1"
44
+
45
+ # Check for common presentation frameworks
46
+ if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
47
+ return "16:9"
48
+
49
+ # Check body style for width/height hints
50
+ body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
51
+ if body_match:
52
+ style = body_match.group(1).lower()
53
+ if 'width' in style and 'height' in style:
54
+ width_match = re.search(r'width\s*:\s*(\d+)', style)
55
+ height_match = re.search(r'height\s*:\s*(\d+)', style)
56
+ if width_match and height_match:
57
+ w = int(width_match.group(1))
58
+ h = int(height_match.group(1))
59
+ ratio = w / h
60
+ if ratio > 1.5:
61
+ return "16:9"
62
+ elif ratio < 0.7:
63
+ return "9:16"
64
+
65
+ # Default to A4 portrait for documents
66
+ return "9:16"
67
+
68
+ def render_html_preview(html_content):
69
+ """Render HTML preview in an iframe"""
70
+ # Encode HTML content
71
+ b64 = base64.b64encode(html_content.encode()).decode()
72
+ iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
73
+ return iframe_html
74
+
75
+ def render_pdf_preview(pdf_bytes):
76
+ """Render PDF preview"""
77
+ b64 = base64.b64encode(pdf_bytes).decode()
78
+ pdf_display = f'<iframe src="data:application/pdf;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;" type="application/pdf"></iframe>'
79
+ return pdf_display
80
+
81
+ def convert_html_to_pdf(html_content, aspect_ratio):
82
+ """
83
+ Convert HTML content to PDF using Puppeteer with better styling preservation
84
+
85
+ Args:
86
+ html_content: String containing HTML content
87
+ aspect_ratio: One of "16:9", "1:1", or "9:16"
88
+
89
+ Returns:
90
+ Tuple of (pdf_bytes, error_message)
91
+ """
92
+ temp_dir = None
93
+ try:
94
+ # Create temporary directory for processing
95
+ temp_dir = tempfile.mkdtemp()
96
+
97
+ # Inject CSS to preserve styles better
98
+ style_injection = """
99
+ <style>
100
+ @page {
101
+ margin: 0;
102
+ }
103
+ * {
104
+ -webkit-print-color-adjust: exact !important;
105
+ print-color-adjust: exact !important;
106
+ color-adjust: exact !important;
107
+ }
108
+ body {
109
+ -webkit-print-color-adjust: exact !important;
110
+ print-color-adjust: exact !important;
111
+ }
112
+ </style>
113
+ """
114
+
115
+ # Insert style injection before closing head tag or at the start of body
116
+ if '</head>' in html_content:
117
+ html_content = html_content.replace('</head>', style_injection + '</head>')
118
+ elif '<body' in html_content:
119
+ html_content = html_content.replace('<body', style_injection + '<body', 1)
120
+ else:
121
+ html_content = style_injection + html_content
122
+
123
+ # Save HTML content to temporary file
124
+ html_file = os.path.join(temp_dir, "input.html")
125
+ with open(html_file, 'w', encoding='utf-8') as f:
126
+ f.write(html_content)
127
+
128
+ # Get the path to puppeteer_pdf.js
129
+ script_dir = os.path.dirname(os.path.abspath(__file__))
130
+ puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
131
+
132
+ # Run Node.js script to convert HTML to PDF
133
+ result = subprocess.run(
134
+ ['node', puppeteer_script, html_file, aspect_ratio],
135
+ capture_output=True,
136
+ text=True,
137
+ timeout=60,
138
+ cwd=os.path.dirname(script_dir)
139
+ )
140
+
141
+ if result.returncode != 0:
142
+ return None, f"PDF conversion failed: {result.stderr}"
143
+
144
+ # Get the generated PDF path
145
+ pdf_file = html_file.replace('.html', '.pdf')
146
+
147
+ if not os.path.exists(pdf_file):
148
+ return None, "PDF file was not generated"
149
+
150
+ # Read PDF file into memory
151
+ with open(pdf_file, 'rb') as f:
152
+ pdf_bytes = f.read()
153
+
154
+ # Clean up temporary directory
155
+ shutil.rmtree(temp_dir, ignore_errors=True)
156
+
157
+ return pdf_bytes, None
158
+
159
+ except subprocess.TimeoutExpired:
160
+ if temp_dir:
161
+ shutil.rmtree(temp_dir, ignore_errors=True)
162
+ return None, "Error: PDF conversion timed out (60 seconds)"
163
+ except Exception as e:
164
+ if temp_dir:
165
+ shutil.rmtree(temp_dir, ignore_errors=True)
166
+ return None, f"Error: {str(e)}"
167
+
168
+ # Page header
169
+ st.title("πŸ“„ HTML to PDF Converter")
170
+ st.markdown("""
171
+ Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
172
+ Preserves styles, fonts, colors, and layout.
173
+ """)
174
+
175
+ # Create tabs
176
+ tab1, tab2 = st.tabs(["πŸ“€ Upload HTML File", "πŸ“ Paste HTML Code"])
177
+
178
+ # Tab 1: Upload HTML File
179
+ with tab1:
180
+ uploaded_file = st.file_uploader(
181
+ "Choose an HTML file",
182
+ type=['html', 'htm'],
183
+ key="file_uploader",
184
+ help="Upload an HTML file (max 200MB)",
185
+ accept_multiple_files=False
186
+ )
187
+
188
+ if uploaded_file is not None:
189
+ st.success(f"βœ… File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
190
+
191
+ # Read file content
192
+ uploaded_file.seek(0)
193
+ try:
194
+ html_content = uploaded_file.getvalue().decode('utf-8')
195
+ except UnicodeDecodeError:
196
+ uploaded_file.seek(0)
197
+ html_content = uploaded_file.getvalue().decode('latin-1')
198
+
199
+ # Auto-detect aspect ratio
200
+ detected_ratio = detect_aspect_ratio(html_content)
201
+
202
+ col1, col2 = st.columns([1, 1])
203
+
204
+ with col1:
205
+ st.subheader("βš™οΈ Settings")
206
+
207
+ auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
208
+
209
+ if auto_detect:
210
+ aspect_ratio_file = detected_ratio
211
+ st.info(f"πŸ” Detected: **{detected_ratio}**")
212
+ else:
213
+ aspect_ratio_file = st.radio(
214
+ "Aspect Ratio",
215
+ options=["16:9", "1:1", "9:16"],
216
+ index=["16:9", "1:1", "9:16"].index(detected_ratio),
217
+ key="aspect_file",
218
+ help="Select the page orientation and dimensions"
219
+ )
220
+
221
+ st.markdown(f"""
222
+ **Selected: {aspect_ratio_file}**
223
+ - 16:9 = Landscape (297mm Γ— 210mm)
224
+ - 1:1 = Square (210mm Γ— 210mm)
225
+ - 9:16 = Portrait (210mm Γ— 297mm)
226
+ """)
227
+
228
+ convert_file_btn = st.button("πŸ”„ Convert to PDF", key="convert_file", type="primary", use_container_width=True)
229
+
230
+ with col2:
231
+ st.subheader("πŸ‘οΈ HTML Preview")
232
+ with st.expander("Show HTML Preview", expanded=False):
233
+ st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
234
+
235
+ # Conversion section
236
+ if convert_file_btn:
237
+ with st.spinner("Converting HTML to PDF..."):
238
+ pdf_bytes, error = convert_html_to_pdf(html_content, aspect_ratio_file)
239
+
240
+ if error:
241
+ st.error(f"❌ {error}")
242
+ with st.expander("Show error details"):
243
+ st.code(error)
244
+ else:
245
+ st.success("βœ… PDF generated successfully!")
246
+
247
+ col_a, col_b = st.columns([1, 1])
248
+
249
+ with col_a:
250
+ output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
251
+ if not output_filename.endswith('.pdf'):
252
+ output_filename += '.pdf'
253
+
254
+ st.download_button(
255
+ label="⬇️ Download PDF",
256
+ data=pdf_bytes,
257
+ file_name=output_filename,
258
+ mime="application/pdf",
259
+ use_container_width=True,
260
+ key="download_file_pdf"
261
+ )
262
+
263
+ with col_b:
264
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
265
+
266
+ # PDF Preview
267
+ st.subheader("πŸ“„ PDF Preview")
268
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
269
+
270
+ # Tab 2: Paste HTML Code
271
+ with tab2:
272
+ col1, col2 = st.columns([1, 1])
273
+
274
+ with col1:
275
+ html_code = st.text_area(
276
+ "HTML Content",
277
+ value="""<!DOCTYPE html>
278
+ <html>
279
+ <head>
280
+ <title>Sample Document</title>
281
+ <style>
282
+ body {
283
+ font-family: Arial, sans-serif;
284
+ margin: 40px;
285
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
286
+ color: white;
287
+ }
288
+ h1 {
289
+ font-size: 48px;
290
+ margin-bottom: 20px;
291
+ text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
292
+ }
293
+ p {
294
+ font-size: 18px;
295
+ line-height: 1.6;
296
+ }
297
+ .box {
298
+ background: rgba(255,255,255,0.1);
299
+ padding: 20px;
300
+ border-radius: 10px;
301
+ margin-top: 20px;
302
+ }
303
+ </style>
304
+ </head>
305
+ <body>
306
+ <h1>Hello, PDF World! 🌍</h1>
307
+ <p>This is a sample HTML document converted to PDF.</p>
308
+ <div class="box">
309
+ <p>✨ Styles, colors, and gradients are preserved!</p>
310
+ </div>
311
+ </body>
312
+ </html>""",
313
+ height=400,
314
+ key="html_code"
315
+ )
316
+
317
+ if html_code and html_code.strip():
318
+ # Auto-detect aspect ratio
319
+ detected_ratio_text = detect_aspect_ratio(html_code)
320
+
321
+ auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
322
+
323
+ if auto_detect_text:
324
+ aspect_ratio_text = detected_ratio_text
325
+ st.info(f"πŸ” Detected: **{detected_ratio_text}**")
326
+ else:
327
+ aspect_ratio_text = st.radio(
328
+ "Aspect Ratio",
329
+ options=["16:9", "1:1", "9:16"],
330
+ index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
331
+ key="aspect_text",
332
+ help="Select the page orientation and dimensions"
333
+ )
334
+
335
+ convert_text_btn = st.button("πŸ”„ Convert to PDF", key="convert_text", type="primary", use_container_width=True)
336
+ else:
337
+ convert_text_btn = False
338
+
339
+ with col2:
340
+ if html_code and html_code.strip():
341
+ st.subheader("πŸ‘οΈ HTML Preview")
342
+ with st.expander("Show HTML Preview", expanded=False):
343
+ st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
344
+
345
+ if convert_text_btn and html_code and html_code.strip():
346
+ with st.spinner("Converting HTML to PDF..."):
347
+ pdf_bytes, error = convert_html_to_pdf(html_code, aspect_ratio_text)
348
+
349
+ if error:
350
+ st.error(f"❌ {error}")
351
+ with st.expander("Show error details"):
352
+ st.code(error)
353
+ else:
354
+ st.success("βœ… PDF generated successfully!")
355
+
356
+ col_a, col_b = st.columns([1, 1])
357
+
358
+ with col_a:
359
+ st.download_button(
360
+ label="⬇️ Download PDF",
361
+ data=pdf_bytes,
362
+ file_name="converted.pdf",
363
+ mime="application/pdf",
364
+ use_container_width=True,
365
+ key="download_text_pdf"
366
+ )
367
+
368
+ with col_b:
369
+ st.info(f"πŸ“¦ Size: {len(pdf_bytes):,} bytes")
370
+
371
+ # PDF Preview
372
+ st.subheader("πŸ“„ PDF Preview")
373
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
374
+
375
+ # Footer with tips
376
+ st.markdown("---")
377
+ st.markdown("""
378
+ ### πŸ’‘ Tips:
379
+ - **Auto-detection** analyzes your HTML to suggest the best aspect ratio
380
+ - **16:9** - Best for presentations and landscape documents (297mm Γ— 210mm)
381
+ - **1:1** - Square format (210mm Γ— 210mm)
382
+ - **9:16** - Portrait format, standard A4 (210mm Γ— 297mm)
383
+ - All CSS styles, colors, gradients, and fonts are preserved
384
+ - Use inline CSS or `<style>` tags for best results
385
+ - External resources should use absolute URLs
386
+ """)