ABDALLALSWAITI commited on
Commit
45054fd
·
verified ·
1 Parent(s): e23e782

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +250 -455
src/streamlit_app.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Streamlit HTML to PDF Converter with Image Support
3
  Save this file as: src/streamlit_app.py
4
  """
5
  import streamlit as st
@@ -10,6 +10,7 @@ import shutil
10
  from pathlib import Path
11
  import base64
12
  import re
 
13
 
14
  st.set_page_config(
15
  page_title="HTML to PDF Converter",
@@ -18,21 +19,15 @@ st.set_page_config(
18
  )
19
 
20
  def detect_aspect_ratio(html_content):
21
- """
22
- Detect aspect ratio from HTML content
23
- Returns: "16:9", "1:1", or "9:16"
24
- """
25
- # Check for viewport meta tag
26
  viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
27
  if viewport_match:
28
  viewport = viewport_match.group(1).lower()
29
- if 'width=device-width' in viewport or 'width=100%' in viewport:
30
- if 'orientation=portrait' in viewport:
31
- return "9:16"
32
- elif 'orientation=landscape' in viewport:
33
- return "16:9"
34
 
35
- # Check for CSS aspect-ratio property
36
  aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
37
  if aspect_match:
38
  width = int(aspect_match.group(1))
@@ -45,123 +40,114 @@ def detect_aspect_ratio(html_content):
45
  else:
46
  return "1:1"
47
 
48
- # Check for common presentation frameworks
49
  if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
50
  return "16:9"
51
 
52
- # Default to A4 portrait
53
  return "9:16"
54
 
55
- def save_uploaded_images(images, temp_dir):
56
- """Save uploaded images and return mapping"""
57
- if not images:
58
- return {}
59
-
60
- image_mapping = {}
61
- images_dir = os.path.join(temp_dir, "images")
62
- os.makedirs(images_dir, exist_ok=True)
63
-
64
- for image in images:
65
- # Save image
66
- image_path = os.path.join(images_dir, image.name)
67
- with open(image_path, 'wb') as f:
68
- f.write(image.getvalue())
 
 
 
 
 
 
 
69
 
70
- # Create mapping - use relative path from temp_dir
71
- image_mapping[image.name] = f"images/{image.name}"
72
- print(f"✓ Saved image: {image.name} -> {image_path}")
73
- print(f" File exists: {os.path.exists(image_path)}")
74
- print(f" File size: {os.path.getsize(image_path)} bytes")
75
-
76
- return image_mapping
 
77
 
78
- def process_html_with_images(html_content, temp_dir, image_mapping):
79
- """Process HTML to handle image references with absolute file paths"""
80
- if not image_mapping:
81
- return html_content
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- replacements_made = []
 
 
 
 
84
  original_html = html_content
85
 
86
- for original_name, relative_path in image_mapping.items():
87
- # Get absolute path for the image
88
- absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
89
-
90
- # Convert to file:// URL with proper escaping
91
- # Use forward slashes even on Windows for file:// URLs
92
- file_url = f"file://{absolute_path.replace(os.sep, '/')}"
93
-
94
- print(f"\nProcessing image: {original_name}")
95
- print(f" Absolute path: {absolute_path}")
96
- print(f" File URL: {file_url}")
97
- print(f" File exists: {os.path.exists(absolute_path)}")
98
-
99
- # Escape the filename for regex
100
- escaped_name = re.escape(original_name)
101
-
102
- # Pattern 1: src attribute with any path prefix or no prefix
103
- pattern1 = rf'src\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
104
- matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE))
105
  if matches1:
106
- print(f" Found {len(matches1)} src= matches")
107
- for match in matches1:
108
- print(f" - {match.group()}")
109
- html_content = re.sub(
110
- pattern1,
111
- f'src=\\1{file_url}\\1',
112
- html_content,
113
- flags=re.IGNORECASE
114
- )
115
 
116
- # Pattern 2: url() in CSS with any path prefix or no prefix
117
- pattern2 = rf'url\s*\(\s*(["\']?)(?:[^)"\']*/)?{escaped_name}\1\s*\)'
118
  matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
 
119
  if matches2:
120
- print(f" Found {len(matches2)} url() matches")
121
- for match in matches2:
122
- print(f" - {match.group()}")
123
- html_content = re.sub(
124
- pattern2,
125
- f'url("{file_url}")',
126
- html_content,
127
- flags=re.IGNORECASE
128
- )
129
 
130
- # Pattern 3: href attribute (for linked images)
131
- pattern3 = rf'href\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
132
  matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
 
133
  if matches3:
134
- print(f" Found {len(matches3)} href= matches")
135
- for match in matches3:
136
- print(f" - {match.group()}")
137
- html_content = re.sub(
138
- pattern3,
139
- f'href=\\1{file_url}\\1',
140
- html_content,
141
- flags=re.IGNORECASE
142
- )
143
-
144
- total_matches = len(matches1) + len(matches2) + len(matches3)
145
- if total_matches > 0:
146
- replacements_made.append(f"{original_name}: {total_matches} replacement(s)")
147
 
148
- # Print summary
149
- if replacements_made:
150
- print("\n=== Image Replacements Summary ===")
151
- for msg in replacements_made:
152
- print(f" {msg}")
153
  else:
154
- print("\n=== WARNING: No image replacements made ===")
155
- print(f"Looking for images: {list(image_mapping.keys())}")
156
- # Show lines with image references
157
- lines_with_images = [line for line in html_content.split('\n')
158
- if any(keyword in line.lower() for keyword in ['src=', 'url(', 'href='])]
159
- if lines_with_images:
160
- print("Lines with potential image references:")
161
- for line in lines_with_images[:5]:
162
- print(f" {line.strip()}")
 
 
 
163
 
164
- return html_content
165
 
166
  def render_html_preview(html_content):
167
  """Render HTML preview in an iframe"""
@@ -205,53 +191,36 @@ def render_pdf_preview(pdf_bytes):
205
  font-size: 18px;
206
  padding: 20px;
207
  }}
208
- .error {{
209
- color: #ff6b6b;
210
- font-family: Arial, sans-serif;
211
- padding: 20px;
212
- background: rgba(0,0,0,0.5);
213
- border-radius: 5px;
214
- margin: 20px;
215
- }}
216
  </style>
217
  </head>
218
  <body>
219
  <div id="pdf-container">
220
  <div id="loading">Loading PDF...</div>
221
  </div>
222
-
223
  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
224
  <script>
225
  pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
226
-
227
  const pdfData = atob('{b64}');
228
  const pdfContainer = document.getElementById('pdf-container');
229
  const loading = document.getElementById('loading');
230
-
231
  const uint8Array = new Uint8Array(pdfData.length);
232
  for (let i = 0; i < pdfData.length; i++) {{
233
  uint8Array[i] = pdfData.charCodeAt(i);
234
  }}
235
-
236
  pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
237
  loading.style.display = 'none';
238
-
239
  const numPages = pdf.numPages;
240
  const promises = [];
241
-
242
  for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
243
  promises.push(
244
  pdf.getPage(pageNum).then(function(page) {{
245
  const scale = 1.5;
246
  const viewport = page.getViewport({{scale: scale}});
247
-
248
  const canvas = document.createElement('canvas');
249
  const context = canvas.getContext('2d');
250
  canvas.height = viewport.height;
251
  canvas.width = viewport.width;
252
-
253
  pdfContainer.appendChild(canvas);
254
-
255
  return page.render({{
256
  canvasContext: context,
257
  viewport: viewport
@@ -259,11 +228,9 @@ def render_pdf_preview(pdf_bytes):
259
  }})
260
  );
261
  }}
262
-
263
  return Promise.all(promises);
264
  }}).catch(function(error) {{
265
- loading.innerHTML = '<div class="error">Error loading PDF: ' + error.message + '</div>';
266
- console.error('Error loading PDF:', error);
267
  }});
268
  </script>
269
  </body>
@@ -272,24 +239,12 @@ def render_pdf_preview(pdf_bytes):
272
  return pdf_viewer_html
273
 
274
  def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
275
- """
276
- Convert HTML content to PDF using Puppeteer with better styling preservation
277
-
278
- Args:
279
- html_content: String containing HTML content
280
- aspect_ratio: One of "16:9", "1:1", or "9:16"
281
- temp_dir: Temporary directory for processing
282
-
283
- Returns:
284
- Tuple of (pdf_bytes, error_message)
285
- """
286
  try:
287
- # Inject CSS to preserve styles better
288
  style_injection = """
289
  <style>
290
- @page {
291
- margin: 0;
292
- }
293
  * {
294
  -webkit-print-color-adjust: exact !important;
295
  print-color-adjust: exact !important;
@@ -302,7 +257,6 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
302
  </style>
303
  """
304
 
305
- # Insert style injection before closing head tag or at the start of body
306
  if '</head>' in html_content:
307
  html_content = html_content.replace('</head>', style_injection + '</head>')
308
  elif '<body' in html_content:
@@ -310,71 +264,66 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
310
  else:
311
  html_content = style_injection + html_content
312
 
313
- # Save HTML content to temporary file
314
  html_file = os.path.join(temp_dir, "input.html")
315
  with open(html_file, 'w', encoding='utf-8') as f:
316
  f.write(html_content)
317
 
318
- print(f"\nSaved HTML to: {html_file}")
319
- print(f"HTML file size: {os.path.getsize(html_file)} bytes")
320
 
321
- # Get the path to puppeteer_pdf.js
322
  script_dir = os.path.dirname(os.path.abspath(__file__))
323
- puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
 
 
 
 
 
324
 
325
- # If not found, try current directory
326
- if not os.path.exists(puppeteer_script):
327
- puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
 
 
328
 
329
- # If still not found, try one level up
330
- if not os.path.exists(puppeteer_script):
331
- puppeteer_script = os.path.join(os.path.dirname(script_dir), '..', 'puppeteer_pdf.js')
332
 
333
- print(f"Using Puppeteer script: {puppeteer_script}")
334
- print(f"Script exists: {os.path.exists(puppeteer_script)}")
335
 
336
- # Run Node.js script to convert HTML to PDF
337
  result = subprocess.run(
338
  ['node', puppeteer_script, html_file, aspect_ratio],
339
  capture_output=True,
340
  text=True,
341
  timeout=60,
342
- cwd=os.path.dirname(puppeteer_script)
343
  )
344
 
345
- print(f"\nPuppeteer stdout: {result.stdout}")
346
- if result.stderr:
347
- print(f"Puppeteer stderr: {result.stderr}")
348
-
349
  if result.returncode != 0:
350
  return None, f"PDF conversion failed: {result.stderr}"
351
 
352
- # Get the generated PDF path
353
  pdf_file = html_file.replace('.html', '.pdf')
354
-
355
  if not os.path.exists(pdf_file):
356
  return None, "PDF file was not generated"
357
 
358
- # Read PDF file into memory
359
  with open(pdf_file, 'rb') as f:
360
  pdf_bytes = f.read()
361
 
362
- print(f"PDF generated successfully: {len(pdf_bytes)} bytes")
363
  return pdf_bytes, None
364
 
365
  except subprocess.TimeoutExpired:
366
  return None, "Error: PDF conversion timed out (60 seconds)"
367
  except Exception as e:
368
- print(f"Error in convert_html_to_pdf: {str(e)}")
369
- import traceback
370
- traceback.print_exc()
371
  return None, f"Error: {str(e)}"
372
 
373
- # Page header
374
  st.title("📄 HTML to PDF Converter")
375
  st.markdown("""
376
- Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
377
- **With Image Support** - Upload images alongside your HTML files!
378
  """)
379
 
380
  # Create tabs
@@ -386,32 +335,28 @@ with tab1:
386
  "Choose an HTML file",
387
  type=['html', 'htm'],
388
  key="file_uploader",
389
- help="Upload an HTML file (max 200MB)",
390
- accept_multiple_files=False
391
  )
392
 
393
- # Image uploader
394
  uploaded_images = st.file_uploader(
395
- "📷 Upload Images (optional)",
396
  type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
397
  key="image_uploader",
398
- help="Upload images referenced in your HTML. Filename must match exactly what's in your HTML.",
399
  accept_multiple_files=True
400
  )
401
 
402
  if uploaded_images:
403
  st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
404
- with st.expander("View uploaded images", expanded=True):
405
  cols = st.columns(min(len(uploaded_images), 4))
406
  for idx, img in enumerate(uploaded_images):
407
  with cols[idx % 4]:
408
  st.image(img, caption=img.name, use_container_width=True)
409
- st.caption(f"Size: {img.size:,} bytes")
410
 
411
- if uploaded_file is not None:
412
- st.success(f"✅ File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
413
 
414
- # Read file content
415
  uploaded_file.seek(0)
416
  try:
417
  html_content = uploaded_file.getvalue().decode('utf-8')
@@ -419,356 +364,206 @@ with tab1:
419
  uploaded_file.seek(0)
420
  html_content = uploaded_file.getvalue().decode('latin-1')
421
 
422
- # Auto-detect aspect ratio
423
  detected_ratio = detect_aspect_ratio(html_content)
424
 
425
  col1, col2 = st.columns([1, 1])
426
 
427
  with col1:
428
  st.subheader("⚙️ Settings")
429
-
430
- auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
431
 
432
  if auto_detect:
433
- aspect_ratio_file = detected_ratio
434
  st.info(f"🔍 Detected: **{detected_ratio}**")
435
  else:
436
- aspect_ratio_file = st.radio(
437
  "Aspect Ratio",
438
  options=["16:9", "1:1", "9:16"],
439
  index=["16:9", "1:1", "9:16"].index(detected_ratio),
440
- key="aspect_file",
441
- help="Select the page orientation and dimensions"
442
  )
443
 
444
- st.markdown(f"""
445
- **Selected: {aspect_ratio_file}**
446
- - 16:9 = Landscape (297mm × 210mm)
447
- - 1:1 = Square (210mm × 210mm)
448
- - 9:16 = Portrait (210mm × 297mm)
449
- """)
450
-
451
- convert_file_btn = st.button("🔄 Convert to PDF", key="convert_file", type="primary", use_container_width=True)
452
 
453
  with col2:
454
- st.subheader("👁️ HTML Preview")
455
- with st.expander("Show HTML Preview", expanded=False):
456
- st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
457
 
458
- # Conversion section
459
- if convert_file_btn:
460
  temp_dir = None
461
  try:
462
- with st.spinner("🔄 Converting HTML to PDF..."):
463
- # Create temp directory
464
  temp_dir = tempfile.mkdtemp()
465
- print(f"\n{'='*60}")
466
- print(f"Created temp directory: {temp_dir}")
467
 
468
- # Process images if uploaded
469
  processed_html = html_content
470
  if uploaded_images:
471
- st.info(f"📷 Processing {len(uploaded_images)} image(s)...")
472
- image_mapping = save_uploaded_images(uploaded_images, temp_dir)
473
- processed_html = process_html_with_images(html_content, temp_dir, image_mapping)
474
-
475
- # Show debug info
476
- with st.expander("🔍 Debug: Image Processing Details", expanded=False):
477
- st.write("**Uploaded Images:**")
478
- for img in uploaded_images:
479
- st.text(f" ✓ {img.name} ({img.size:,} bytes)")
480
-
481
- st.write("\n**Image Mappings:**")
482
- for orig, rel_path in image_mapping.items():
483
- full_path = os.path.join(temp_dir, rel_path)
484
- exists = os.path.exists(full_path)
485
- st.text(f" {orig}")
486
- st.text(f" → {rel_path}")
487
- st.text(f" → Full: {full_path}")
488
- st.text(f" → Exists: {'✓' if exists else '✗'}")
489
 
490
- st.write("\n**HTML Image References:**")
491
- html_lines = processed_html.split('\n')
492
- img_lines = [line.strip() for line in html_lines
493
- if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
494
- if img_lines:
495
- for line in img_lines[:10]:
496
- st.code(line, language='html')
497
- else:
498
- st.warning("⚠️ No image references found in HTML!")
499
- else:
500
- print("No images uploaded")
501
 
502
  # Convert to PDF
503
- pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_file, temp_dir)
504
 
505
  if error:
506
  st.error(f"❌ {error}")
507
- with st.expander("Show error details"):
508
- st.code(error)
509
  else:
510
- st.success("✅ PDF generated successfully!")
511
 
512
- col_a, col_b = st.columns([1, 1])
 
 
513
 
 
514
  with col_a:
515
- output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
516
- if not output_filename.endswith('.pdf'):
517
- output_filename += '.pdf'
518
-
519
  st.download_button(
520
- label="⬇️ Download PDF",
521
  data=pdf_bytes,
522
- file_name=output_filename,
523
  mime="application/pdf",
524
- use_container_width=True,
525
- key="download_file_pdf"
526
  )
527
-
528
  with col_b:
529
- st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
530
 
531
- # PDF Preview
532
  st.subheader("📄 PDF Preview")
533
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
534
-
535
  except Exception as e:
536
  st.error(f"❌ Error: {str(e)}")
537
- with st.expander("Show full error"):
538
- import traceback
539
- st.code(traceback.format_exc())
540
  finally:
541
- # Cleanup
542
  if temp_dir and os.path.exists(temp_dir):
543
- print(f"Cleaning up temp directory: {temp_dir}")
544
  shutil.rmtree(temp_dir, ignore_errors=True)
545
 
546
- # Tab 2: Paste HTML Code
547
  with tab2:
548
- col1, col2 = st.columns([1, 1])
549
-
550
- with col1:
551
- html_code = st.text_area(
552
- "HTML Content",
553
- value="""<!DOCTYPE html>
554
  <html>
555
  <head>
556
- <title>Sample Document</title>
557
  <style>
558
  body {
559
- font-family: Arial, sans-serif;
560
  margin: 40px;
561
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
562
  color: white;
563
  }
564
- h1 {
565
- font-size: 48px;
566
- margin-bottom: 20px;
567
- text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
568
- }
569
- p {
570
- font-size: 18px;
571
- line-height: 1.6;
572
- }
573
  .box {
574
  background: rgba(255,255,255,0.1);
575
  padding: 20px;
576
  border-radius: 10px;
577
- margin-top: 20px;
578
  }
579
  </style>
580
  </head>
581
  <body>
582
- <h1>Hello, PDF World! 🌍</h1>
583
- <p>This is a sample HTML document converted to PDF.</p>
584
  <div class="box">
585
- <p>Styles, colors, and gradients are preserved!</p>
586
  </div>
587
  </body>
588
  </html>""",
589
- height=400,
590
- key="html_code"
591
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
 
593
- # Image uploader for text tab
594
- uploaded_images_text = st.file_uploader(
595
- "📷 Upload Images (optional)",
596
- type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
597
- key="image_uploader_text",
598
- help="Upload images referenced in your HTML code. Filename must match exactly what's in your HTML.",
599
- accept_multiple_files=True
600
- )
 
 
601
 
602
- if uploaded_images_text:
603
- st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
604
- with st.expander("View uploaded images", expanded=True):
605
- cols = st.columns(min(len(uploaded_images_text), 4))
606
- for idx, img in enumerate(uploaded_images_text):
607
- with cols[idx % 4]:
608
- st.image(img, caption=img.name, use_container_width=True)
609
- st.caption(f"Size: {img.size:,} bytes")
610
 
611
- if html_code and html_code.strip():
612
- # Auto-detect aspect ratio
613
- detected_ratio_text = detect_aspect_ratio(html_code)
614
-
615
- auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
616
-
617
- if auto_detect_text:
618
- aspect_ratio_text = detected_ratio_text
619
- st.info(f"🔍 Detected: **{detected_ratio_text}**")
620
- else:
621
- aspect_ratio_text = st.radio(
622
- "Aspect Ratio",
623
- options=["16:9", "1:1", "9:16"],
624
- index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
625
- key="aspect_text",
626
- help="Select the page orientation and dimensions"
627
- )
628
-
629
- convert_text_btn = st.button("🔄 Convert to PDF", key="convert_text", type="primary", use_container_width=True)
630
- else:
631
- convert_text_btn = False
632
-
633
- with col2:
634
- if html_code and html_code.strip():
635
- st.subheader("👁️ HTML Preview")
636
- with st.expander("Show HTML Preview", expanded=False):
637
- st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
638
-
639
- if convert_text_btn and html_code and html_code.strip():
640
- temp_dir = None
641
- try:
642
- with st.spinner("🔄 Converting HTML to PDF..."):
643
- # Create temp directory
644
- temp_dir = tempfile.mkdtemp()
645
- print(f"\n{'='*60}")
646
- print(f"Created temp directory: {temp_dir}")
647
-
648
- # Process images if uploaded
649
- processed_html = html_code
650
- if uploaded_images_text:
651
- st.info(f"📷 Processing {len(uploaded_images_text)} image(s)...")
652
- image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
653
- processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
654
-
655
- # Show debug info
656
- with st.expander("🔍 Debug: Image Processing Details", expanded=False):
657
- st.write("**Uploaded Images:**")
658
- for img in uploaded_images_text:
659
- st.text(f" ✓ {img.name} ({img.size:,} bytes)")
660
-
661
- st.write("\n**Image Mappings:**")
662
- for orig, rel_path in image_mapping.items():
663
- full_path = os.path.join(temp_dir, rel_path)
664
- exists = os.path.exists(full_path)
665
- st.text(f" {orig}")
666
- st.text(f" → {rel_path}")
667
- st.text(f" → Full: {full_path}")
668
- st.text(f" → Exists: {'✓' if exists else '✗'}")
669
-
670
- st.write("\n**HTML Image References:**")
671
- html_lines = processed_html.split('\n')
672
- img_lines = [line.strip() for line in html_lines
673
- if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
674
- if img_lines:
675
- for line in img_lines[:10]:
676
- st.code(line, language='html')
677
- else:
678
- st.warning("⚠️ No image references found in HTML!")
679
- else:
680
- print("No images uploaded")
681
-
682
- # Convert to PDF
683
- pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
684
-
685
- if error:
686
- st.error(f"❌ {error}")
687
- with st.expander("Show error details"):
688
- st.code(error)
689
- else:
690
- st.success("✅ PDF generated successfully!")
691
-
692
- col_a, col_b = st.columns([1, 1])
693
-
694
- with col_a:
695
- st.download_button(
696
- label="⬇️ Download PDF",
697
- data=pdf_bytes,
698
- file_name="converted.pdf",
699
- mime="application/pdf",
700
- use_container_width=True,
701
- key="download_text_pdf"
702
- )
703
 
704
- with col_b:
705
- st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
 
 
 
 
 
706
 
707
- # PDF Preview
708
- st.subheader("📄 PDF Preview")
709
- st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
710
 
711
- except Exception as e:
712
- st.error(f"❌ Error: {str(e)}")
713
- with st.expander("Show full error"):
714
- import traceback
715
- st.code(traceback.format_exc())
716
- finally:
717
- # Cleanup
718
- if temp_dir and os.path.exists(temp_dir):
719
- print(f"Cleaning up temp directory: {temp_dir}")
720
- shutil.rmtree(temp_dir, ignore_errors=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
721
 
722
- # Footer with tips
723
  st.markdown("---")
724
  st.markdown("""
725
- ### 💡 Tips:
726
- - **Auto-detection** analyzes your HTML to suggest the best aspect ratio
727
- - **16:9** - Best for presentations and landscape documents (297mm × 210mm)
728
- - **1:1** - Square format (210mm × 210mm)
729
- - **9:16** - Portrait format, standard A4 (210mm × 297mm)
730
- - **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
731
- - All CSS styles, colors, gradients, and fonts are preserved
732
- - Use inline CSS or `<style>` tags for best results
733
- - **Image filenames must match exactly** - if your HTML has `<img src="logo.png">`, upload a file named exactly `logo.png`
734
- - External resources should use absolute URLs (https://)
735
- - **PDF Preview** renders directly in the browser using PDF.js
736
 
737
- ### 🖼️ Using Images - IMPORTANT:
738
- 1. **Exact Filename Match**: If your HTML has `<img src="photo.jpg">`, upload a file named exactly `photo.jpg`
739
- 2. **Multiple Images**: Upload all images referenced in your HTML
740
- 3. **Supported Formats**: JPG, JPEG, PNG, GIF, SVG, WebP, BMP
741
- 4. **Path Variations**: These all work:
742
- - `<img src="logo.png">` ✓
743
- - `<img src="./logo.png">` ✓
744
- - `<img src="images/logo.png">` ✓ (just upload as `logo.png`)
745
- 5. **CSS Background Images**: Use `background-image: url('bg.jpg')` and upload `bg.jpg`
746
- 6. **Check Debug Info**: Expand the debug section after conversion to verify image processing
747
 
748
- ### 📝 Example HTML with Images:
749
  ```html
750
- <!DOCTYPE html>
751
- <html>
752
- <head>
753
- <style>
754
- body { font-family: Arial; padding: 40px; }
755
- .header {
756
- background-image: url('banner.jpg');
757
- background-size: cover;
758
- padding: 60px;
759
- color: white;
760
- }
761
- img { max-width: 100%; height: auto; }
762
- </style>
763
- </head>
764
- <body>
765
- <div class="header">
766
- <h1>My Document</h1>
767
- </div>
768
- <img src="photo.png" alt="Photo">
769
- <img src="logo.svg" alt="Logo">
770
- </body>
771
- </html>
772
  ```
773
- **Then upload**: `banner.jpg`, `photo.png`, `logo.svg`
774
  """)
 
1
  """
2
+ Streamlit HTML to PDF Converter with Image Support - REVISED
3
  Save this file as: src/streamlit_app.py
4
  """
5
  import streamlit as st
 
10
  from pathlib import Path
11
  import base64
12
  import re
13
+ import mimetypes
14
 
15
  st.set_page_config(
16
  page_title="HTML to PDF Converter",
 
19
  )
20
 
21
  def detect_aspect_ratio(html_content):
22
+ """Detect aspect ratio from HTML content"""
 
 
 
 
23
  viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
24
  if viewport_match:
25
  viewport = viewport_match.group(1).lower()
26
+ if 'orientation=portrait' in viewport:
27
+ return "9:16"
28
+ elif 'orientation=landscape' in viewport:
29
+ return "16:9"
 
30
 
 
31
  aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
32
  if aspect_match:
33
  width = int(aspect_match.group(1))
 
40
  else:
41
  return "1:1"
42
 
 
43
  if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
44
  return "16:9"
45
 
 
46
  return "9:16"
47
 
48
+ def image_to_base64(image_file):
49
+ """Convert uploaded image to base64 data URL"""
50
+ try:
51
+ # Read image bytes
52
+ image_bytes = image_file.getvalue()
53
+
54
+ # Get MIME type
55
+ mime_type, _ = mimetypes.guess_type(image_file.name)
56
+ if not mime_type:
57
+ # Fallback based on extension
58
+ ext = os.path.splitext(image_file.name)[1].lower()
59
+ mime_map = {
60
+ '.jpg': 'image/jpeg',
61
+ '.jpeg': 'image/jpeg',
62
+ '.png': 'image/png',
63
+ '.gif': 'image/gif',
64
+ '.svg': 'image/svg+xml',
65
+ '.webp': 'image/webp',
66
+ '.bmp': 'image/bmp'
67
+ }
68
+ mime_type = mime_map.get(ext, 'image/png')
69
 
70
+ # Convert to base64
71
+ b64_data = base64.b64encode(image_bytes).decode('utf-8')
72
+ data_url = f"data:{mime_type};base64,{b64_data}"
73
+
74
+ return data_url
75
+ except Exception as e:
76
+ st.error(f"Error converting {image_file.name} to base64: {str(e)}")
77
+ return None
78
 
79
+ def embed_images_as_base64(html_content, uploaded_images):
80
+ """
81
+ Embed all images directly as base64 data URLs in the HTML
82
+ This ensures images are always included in the PDF
83
+ """
84
+ if not uploaded_images:
85
+ return html_content, {}
86
+
87
+ # Create mapping of filename to base64 data URL
88
+ image_data_urls = {}
89
+ for img in uploaded_images:
90
+ data_url = image_to_base64(img)
91
+ if data_url:
92
+ image_data_urls[img.name] = data_url
93
+ st.write(f"✓ Converted {img.name} to base64 ({len(data_url)} chars)")
94
 
95
+ if not image_data_urls:
96
+ return html_content, {}
97
+
98
+ # Track replacements
99
+ replacements = {}
100
  original_html = html_content
101
 
102
+ for filename, data_url in image_data_urls.items():
103
+ # Escape filename for regex
104
+ escaped_name = re.escape(filename)
105
+
106
+ # Pattern 1: src attribute - match any path variation
107
+ # Examples: src="image.jpg", src="./image.jpg", src="images/image.jpg"
108
+ pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
109
+ matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE | re.DOTALL))
110
+ count1 = len(matches1)
 
 
 
 
 
 
 
 
 
 
111
  if matches1:
112
+ html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
113
+ replacements[f"{filename} (img src)"] = count1
 
 
 
 
 
 
 
114
 
115
+ # Pattern 2: background-image in style attributes
116
+ pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
117
  matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
118
+ count2 = len(matches2)
119
  if matches2:
120
+ html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
121
+ replacements[f"{filename} (bg-image)"] = count2
 
 
 
 
 
 
 
122
 
123
+ # Pattern 3: CSS url() without background-image
124
+ pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
125
  matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
126
+ count3 = len(matches3)
127
  if matches3:
128
+ html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
129
+ replacements[f"{filename} (url)"] = count3
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ # Show replacement summary
132
+ if replacements:
133
+ st.success(" Image Replacements:")
134
+ for key, count in replacements.items():
135
+ st.write(f" {key}: {count} replacement(s)")
136
  else:
137
+ st.warning("⚠️ No image references found in HTML matching uploaded files!")
138
+ st.write("Uploaded files:", [img.name for img in uploaded_images])
139
+
140
+ # Show sample HTML for debugging
141
+ with st.expander("🔍 Debug: Show HTML image references"):
142
+ img_lines = [line for line in html_content.split('\n')
143
+ if any(k in line.lower() for k in ['<img', 'src=', 'url(', 'background'])]
144
+ if img_lines:
145
+ for line in img_lines[:10]:
146
+ st.code(line.strip(), language='html')
147
+ else:
148
+ st.write("No image-related lines found in HTML")
149
 
150
+ return html_content, replacements
151
 
152
  def render_html_preview(html_content):
153
  """Render HTML preview in an iframe"""
 
191
  font-size: 18px;
192
  padding: 20px;
193
  }}
 
 
 
 
 
 
 
 
194
  </style>
195
  </head>
196
  <body>
197
  <div id="pdf-container">
198
  <div id="loading">Loading PDF...</div>
199
  </div>
 
200
  <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
201
  <script>
202
  pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
 
203
  const pdfData = atob('{b64}');
204
  const pdfContainer = document.getElementById('pdf-container');
205
  const loading = document.getElementById('loading');
 
206
  const uint8Array = new Uint8Array(pdfData.length);
207
  for (let i = 0; i < pdfData.length; i++) {{
208
  uint8Array[i] = pdfData.charCodeAt(i);
209
  }}
 
210
  pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
211
  loading.style.display = 'none';
 
212
  const numPages = pdf.numPages;
213
  const promises = [];
 
214
  for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
215
  promises.push(
216
  pdf.getPage(pageNum).then(function(page) {{
217
  const scale = 1.5;
218
  const viewport = page.getViewport({{scale: scale}});
 
219
  const canvas = document.createElement('canvas');
220
  const context = canvas.getContext('2d');
221
  canvas.height = viewport.height;
222
  canvas.width = viewport.width;
 
223
  pdfContainer.appendChild(canvas);
 
224
  return page.render({{
225
  canvasContext: context,
226
  viewport: viewport
 
228
  }})
229
  );
230
  }}
 
231
  return Promise.all(promises);
232
  }}).catch(function(error) {{
233
+ loading.innerHTML = '<div style="color:#ff6b6b;">Error: ' + error.message + '</div>';
 
234
  }});
235
  </script>
236
  </body>
 
239
  return pdf_viewer_html
240
 
241
  def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
242
+ """Convert HTML content to PDF using Puppeteer"""
 
 
 
 
 
 
 
 
 
 
243
  try:
244
+ # Inject CSS to preserve styles
245
  style_injection = """
246
  <style>
247
+ @page { margin: 0; }
 
 
248
  * {
249
  -webkit-print-color-adjust: exact !important;
250
  print-color-adjust: exact !important;
 
257
  </style>
258
  """
259
 
 
260
  if '</head>' in html_content:
261
  html_content = html_content.replace('</head>', style_injection + '</head>')
262
  elif '<body' in html_content:
 
264
  else:
265
  html_content = style_injection + html_content
266
 
267
+ # Save HTML to temp file
268
  html_file = os.path.join(temp_dir, "input.html")
269
  with open(html_file, 'w', encoding='utf-8') as f:
270
  f.write(html_content)
271
 
272
+ st.write(f"📝 Saved HTML: {os.path.getsize(html_file):,} bytes")
 
273
 
274
+ # Find puppeteer script
275
  script_dir = os.path.dirname(os.path.abspath(__file__))
276
+ possible_paths = [
277
+ os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js'),
278
+ os.path.join(script_dir, 'puppeteer_pdf.js'),
279
+ os.path.join(script_dir, '..', 'puppeteer_pdf.js'),
280
+ 'puppeteer_pdf.js'
281
+ ]
282
 
283
+ puppeteer_script = None
284
+ for path in possible_paths:
285
+ if os.path.exists(path):
286
+ puppeteer_script = path
287
+ break
288
 
289
+ if not puppeteer_script:
290
+ return None, "Error: puppeteer_pdf.js not found"
 
291
 
292
+ st.write(f"🔧 Using Puppeteer: {puppeteer_script}")
 
293
 
294
+ # Run conversion
295
  result = subprocess.run(
296
  ['node', puppeteer_script, html_file, aspect_ratio],
297
  capture_output=True,
298
  text=True,
299
  timeout=60,
300
+ cwd=os.path.dirname(os.path.abspath(puppeteer_script))
301
  )
302
 
 
 
 
 
303
  if result.returncode != 0:
304
  return None, f"PDF conversion failed: {result.stderr}"
305
 
306
+ # Read PDF
307
  pdf_file = html_file.replace('.html', '.pdf')
 
308
  if not os.path.exists(pdf_file):
309
  return None, "PDF file was not generated"
310
 
 
311
  with open(pdf_file, 'rb') as f:
312
  pdf_bytes = f.read()
313
 
314
+ st.write(f"PDF generated: {len(pdf_bytes):,} bytes")
315
  return pdf_bytes, None
316
 
317
  except subprocess.TimeoutExpired:
318
  return None, "Error: PDF conversion timed out (60 seconds)"
319
  except Exception as e:
 
 
 
320
  return None, f"Error: {str(e)}"
321
 
322
+ # Main UI
323
  st.title("📄 HTML to PDF Converter")
324
  st.markdown("""
325
+ Convert HTML to PDF with **embedded base64 images** for guaranteed display!
326
+ Images are converted to base64 and embedded directly in the HTML.
327
  """)
328
 
329
  # Create tabs
 
335
  "Choose an HTML file",
336
  type=['html', 'htm'],
337
  key="file_uploader",
338
+ help="Upload an HTML file"
 
339
  )
340
 
 
341
  uploaded_images = st.file_uploader(
342
+ "📷 Upload Images",
343
  type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
344
  key="image_uploader",
345
+ help="Upload images - they will be embedded as base64 in the HTML",
346
  accept_multiple_files=True
347
  )
348
 
349
  if uploaded_images:
350
  st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
351
+ with st.expander("View uploaded images"):
352
  cols = st.columns(min(len(uploaded_images), 4))
353
  for idx, img in enumerate(uploaded_images):
354
  with cols[idx % 4]:
355
  st.image(img, caption=img.name, use_container_width=True)
 
356
 
357
+ if uploaded_file:
358
+ st.success(f"✅ File: {uploaded_file.name}")
359
 
 
360
  uploaded_file.seek(0)
361
  try:
362
  html_content = uploaded_file.getvalue().decode('utf-8')
 
364
  uploaded_file.seek(0)
365
  html_content = uploaded_file.getvalue().decode('latin-1')
366
 
 
367
  detected_ratio = detect_aspect_ratio(html_content)
368
 
369
  col1, col2 = st.columns([1, 1])
370
 
371
  with col1:
372
  st.subheader("⚙️ Settings")
373
+ auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_file")
 
374
 
375
  if auto_detect:
376
+ aspect_ratio = detected_ratio
377
  st.info(f"🔍 Detected: **{detected_ratio}**")
378
  else:
379
+ aspect_ratio = st.radio(
380
  "Aspect Ratio",
381
  options=["16:9", "1:1", "9:16"],
382
  index=["16:9", "1:1", "9:16"].index(detected_ratio),
383
+ key="aspect_file"
 
384
  )
385
 
386
+ convert_btn = st.button("🔄 Convert to PDF", key="conv_file", type="primary", use_container_width=True)
 
 
 
 
 
 
 
387
 
388
  with col2:
389
+ st.subheader("👁️ Preview")
390
+ with st.expander("Show HTML"):
391
+ st.components.v1.html(render_html_preview(html_content), height=400, scrolling=True)
392
 
393
+ if convert_btn:
 
394
  temp_dir = None
395
  try:
396
+ with st.spinner("Converting..."):
 
397
  temp_dir = tempfile.mkdtemp()
 
 
398
 
399
+ # Embed images as base64
400
  processed_html = html_content
401
  if uploaded_images:
402
+ with st.expander("🖼️ Image Processing", expanded=True):
403
+ processed_html, replacements = embed_images_as_base64(html_content, uploaded_images)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
+ if not replacements:
406
+ st.warning("⚠️ Images uploaded but no matches found in HTML!")
407
+ st.write("**Tip:** Make sure image filenames in HTML match uploaded files exactly")
 
 
 
 
 
 
 
 
408
 
409
  # Convert to PDF
410
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio, temp_dir)
411
 
412
  if error:
413
  st.error(f"❌ {error}")
 
 
414
  else:
415
+ st.success("✅ PDF generated!")
416
 
417
+ output_name = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
418
+ if not output_name.endswith('.pdf'):
419
+ output_name += '.pdf'
420
 
421
+ col_a, col_b = st.columns(2)
422
  with col_a:
 
 
 
 
423
  st.download_button(
424
+ "⬇️ Download PDF",
425
  data=pdf_bytes,
426
+ file_name=output_name,
427
  mime="application/pdf",
428
+ use_container_width=True
 
429
  )
 
430
  with col_b:
431
+ st.info(f"Size: {len(pdf_bytes):,} bytes")
432
 
 
433
  st.subheader("📄 PDF Preview")
434
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
 
435
  except Exception as e:
436
  st.error(f"❌ Error: {str(e)}")
 
 
 
437
  finally:
 
438
  if temp_dir and os.path.exists(temp_dir):
 
439
  shutil.rmtree(temp_dir, ignore_errors=True)
440
 
441
+ # Tab 2: Paste HTML
442
  with tab2:
443
+ html_code = st.text_area(
444
+ "HTML Content",
445
+ value="""<!DOCTYPE html>
 
 
 
446
  <html>
447
  <head>
 
448
  <style>
449
  body {
450
+ font-family: Arial;
451
  margin: 40px;
452
  background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
453
  color: white;
454
  }
455
+ h1 { font-size: 48px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
 
 
 
 
 
 
 
 
456
  .box {
457
  background: rgba(255,255,255,0.1);
458
  padding: 20px;
459
  border-radius: 10px;
460
+ margin: 20px 0;
461
  }
462
  </style>
463
  </head>
464
  <body>
465
+ <h1>Hello PDF! 🌍</h1>
 
466
  <div class="box">
467
+ <p>Styles and gradients preserved!</p>
468
  </div>
469
  </body>
470
  </html>""",
471
+ height=400,
472
+ key="html_code"
473
+ )
474
+
475
+ uploaded_images_text = st.file_uploader(
476
+ "📷 Upload Images",
477
+ type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
478
+ key="image_text",
479
+ help="Upload images to embed in your HTML",
480
+ accept_multiple_files=True
481
+ )
482
+
483
+ if uploaded_images_text:
484
+ st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
485
+ with st.expander("View images"):
486
+ cols = st.columns(min(len(uploaded_images_text), 4))
487
+ for idx, img in enumerate(uploaded_images_text):
488
+ with cols[idx % 4]:
489
+ st.image(img, caption=img.name, use_container_width=True)
490
+
491
+ if html_code.strip():
492
+ detected_ratio_text = detect_aspect_ratio(html_code)
493
+ auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_text")
494
 
495
+ if auto_detect_text:
496
+ aspect_ratio_text = detected_ratio_text
497
+ st.info(f"🔍 Detected: **{detected_ratio_text}**")
498
+ else:
499
+ aspect_ratio_text = st.radio(
500
+ "Aspect Ratio",
501
+ options=["16:9", "1:1", "9:16"],
502
+ index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
503
+ key="aspect_text"
504
+ )
505
 
506
+ convert_text_btn = st.button("🔄 Convert", key="conv_text", type="primary", use_container_width=True)
 
 
 
 
 
 
 
507
 
508
+ if convert_text_btn:
509
+ temp_dir = None
510
+ try:
511
+ with st.spinner("Converting..."):
512
+ temp_dir = tempfile.mkdtemp()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
513
 
514
+ processed_html = html_code
515
+ if uploaded_images_text:
516
+ with st.expander("🖼️ Image Processing", expanded=True):
517
+ processed_html, replacements = embed_images_as_base64(html_code, uploaded_images_text)
518
+
519
+ if not replacements:
520
+ st.warning("⚠️ Images uploaded but no matches found!")
521
 
522
+ pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
 
 
523
 
524
+ if error:
525
+ st.error(f"❌ {error}")
526
+ else:
527
+ st.success("✅ PDF generated!")
528
+
529
+ col_a, col_b = st.columns(2)
530
+ with col_a:
531
+ st.download_button(
532
+ "⬇️ Download PDF",
533
+ data=pdf_bytes,
534
+ file_name="converted.pdf",
535
+ mime="application/pdf",
536
+ use_container_width=True
537
+ )
538
+ with col_b:
539
+ st.info(f"Size: {len(pdf_bytes):,} bytes")
540
+
541
+ st.subheader("📄 PDF Preview")
542
+ st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
543
+ except Exception as e:
544
+ st.error(f"❌ Error: {str(e)}")
545
+ finally:
546
+ if temp_dir and os.path.exists(temp_dir):
547
+ shutil.rmtree(temp_dir, ignore_errors=True)
548
 
549
+ # Footer
550
  st.markdown("---")
551
  st.markdown("""
552
+ ### 💡 How It Works:
553
+ - **Base64 Embedding**: Images are converted to base64 data URLs and embedded directly in HTML
554
+ - **No File Paths**: No need for file:// URLs or temp directories
555
+ - **Guaranteed Display**: Images are part of the HTML, so they always appear in the PDF
556
+ - **Filename Matching**: Your HTML must reference images by exact filename (e.g., `<img src="photo.jpg">`)
 
 
 
 
 
 
557
 
558
+ ### Supported:
559
+ - `<img src="photo.jpg">`
560
+ - `<img src="./images/logo.png">`
561
+ - `background-image: url('banner.jpg')`
562
+ - `style="background: url(bg.png)"`
 
 
 
 
 
563
 
564
+ ### 📝 Example:
565
  ```html
566
+ <img src="logo.png" alt="Logo">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  ```
568
+ Then upload a file named exactly: `logo.png`
569
  """)