Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +203 -113
src/streamlit_app.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import subprocess
|
| 3 |
import os
|
|
@@ -23,7 +27,6 @@ def detect_aspect_ratio(html_content):
|
|
| 23 |
if viewport_match:
|
| 24 |
viewport = viewport_match.group(1).lower()
|
| 25 |
if 'width=device-width' in viewport or 'width=100%' in viewport:
|
| 26 |
-
# Check for orientation hints
|
| 27 |
if 'orientation=portrait' in viewport:
|
| 28 |
return "9:16"
|
| 29 |
elif 'orientation=landscape' in viewport:
|
|
@@ -46,27 +49,14 @@ def detect_aspect_ratio(html_content):
|
|
| 46 |
if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
|
| 47 |
return "16:9"
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
body_match = re.search(r'<body[^>]*style=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
|
| 51 |
-
if body_match:
|
| 52 |
-
style = body_match.group(1).lower()
|
| 53 |
-
if 'width' in style and 'height' in style:
|
| 54 |
-
width_match = re.search(r'width\s*:\s*(\d+)', style)
|
| 55 |
-
height_match = re.search(r'height\s*:\s*(\d+)', style)
|
| 56 |
-
if width_match and height_match:
|
| 57 |
-
w = int(width_match.group(1))
|
| 58 |
-
h = int(height_match.group(1))
|
| 59 |
-
ratio = w / h
|
| 60 |
-
if ratio > 1.5:
|
| 61 |
-
return "16:9"
|
| 62 |
-
elif ratio < 0.7:
|
| 63 |
-
return "9:16"
|
| 64 |
-
|
| 65 |
-
# Default to A4 portrait for documents
|
| 66 |
return "9:16"
|
| 67 |
|
| 68 |
def save_uploaded_images(images, temp_dir):
|
| 69 |
"""Save uploaded images and return mapping"""
|
|
|
|
|
|
|
|
|
|
| 70 |
image_mapping = {}
|
| 71 |
images_dir = os.path.join(temp_dir, "images")
|
| 72 |
os.makedirs(images_dir, exist_ok=True)
|
|
@@ -77,81 +67,104 @@ def save_uploaded_images(images, temp_dir):
|
|
| 77 |
with open(image_path, 'wb') as f:
|
| 78 |
f.write(image.getvalue())
|
| 79 |
|
| 80 |
-
# Create mapping
|
| 81 |
image_mapping[image.name] = f"images/{image.name}"
|
| 82 |
-
print(f"Saved image: {image.name} -> {image_path}")
|
|
|
|
|
|
|
| 83 |
|
| 84 |
return image_mapping
|
| 85 |
|
| 86 |
def process_html_with_images(html_content, temp_dir, image_mapping):
|
| 87 |
"""Process HTML to handle image references with absolute file paths"""
|
| 88 |
-
|
|
|
|
| 89 |
|
| 90 |
-
# Store original content for comparison
|
| 91 |
-
original_content = html_content
|
| 92 |
replacements_made = []
|
|
|
|
| 93 |
|
| 94 |
for original_name, relative_path in image_mapping.items():
|
| 95 |
# Get absolute path for the image
|
| 96 |
absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
# Escape the filename for regex
|
| 100 |
escaped_name = re.escape(original_name)
|
| 101 |
|
| 102 |
-
# Pattern 1: src with any path prefix
|
| 103 |
-
pattern1 = rf'src=(["\'])(?:[^"\']*?/)?{escaped_name}\1'
|
| 104 |
-
matches1 = re.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
html_content = re.sub(
|
| 106 |
pattern1,
|
| 107 |
f'src=\\1{file_url}\\1',
|
| 108 |
html_content,
|
| 109 |
flags=re.IGNORECASE
|
| 110 |
)
|
| 111 |
-
if matches1:
|
| 112 |
-
replacements_made.append(f"Pattern 1 (src): Found {len(matches1)} matches for {original_name}")
|
| 113 |
|
| 114 |
-
# Pattern 2: url() with any path prefix
|
| 115 |
-
pattern2 = rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)'
|
| 116 |
-
matches2 = re.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
html_content = re.sub(
|
| 118 |
pattern2,
|
| 119 |
f'url("{file_url}")',
|
| 120 |
html_content,
|
| 121 |
flags=re.IGNORECASE
|
| 122 |
)
|
| 123 |
-
if matches2:
|
| 124 |
-
replacements_made.append(f"Pattern 2 (url): Found {len(matches2)} matches for {original_name}")
|
| 125 |
|
| 126 |
-
# Pattern 3: href
|
| 127 |
-
pattern3 = rf'href=(["\'])(?:[^"\']*?/)?{escaped_name}\1'
|
| 128 |
-
matches3 = re.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
html_content = re.sub(
|
| 130 |
pattern3,
|
| 131 |
f'href=\\1{file_url}\\1',
|
| 132 |
html_content,
|
| 133 |
flags=re.IGNORECASE
|
| 134 |
)
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
# Print
|
| 139 |
if replacements_made:
|
| 140 |
-
print("=== Image Replacements
|
| 141 |
for msg in replacements_made:
|
| 142 |
print(f" β {msg}")
|
| 143 |
else:
|
| 144 |
-
print("=== WARNING: No image replacements made ===")
|
| 145 |
print(f"Looking for images: {list(image_mapping.keys())}")
|
| 146 |
-
# Show
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
return html_content
|
| 151 |
|
| 152 |
def render_html_preview(html_content):
|
| 153 |
"""Render HTML preview in an iframe"""
|
| 154 |
-
# Encode HTML content
|
| 155 |
b64 = base64.b64encode(html_content.encode()).decode()
|
| 156 |
iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
|
| 157 |
return iframe_html
|
|
@@ -302,19 +315,37 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
|
|
| 302 |
with open(html_file, 'w', encoding='utf-8') as f:
|
| 303 |
f.write(html_content)
|
| 304 |
|
|
|
|
|
|
|
|
|
|
| 305 |
# Get the path to puppeteer_pdf.js
|
| 306 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 307 |
puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
# Run Node.js script to convert HTML to PDF
|
| 310 |
result = subprocess.run(
|
| 311 |
['node', puppeteer_script, html_file, aspect_ratio],
|
| 312 |
capture_output=True,
|
| 313 |
text=True,
|
| 314 |
timeout=60,
|
| 315 |
-
cwd=os.path.dirname(
|
| 316 |
)
|
| 317 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
if result.returncode != 0:
|
| 319 |
return None, f"PDF conversion failed: {result.stderr}"
|
| 320 |
|
|
@@ -328,18 +359,22 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
|
|
| 328 |
with open(pdf_file, 'rb') as f:
|
| 329 |
pdf_bytes = f.read()
|
| 330 |
|
|
|
|
| 331 |
return pdf_bytes, None
|
| 332 |
|
| 333 |
except subprocess.TimeoutExpired:
|
| 334 |
return None, "Error: PDF conversion timed out (60 seconds)"
|
| 335 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
| 336 |
return None, f"Error: {str(e)}"
|
| 337 |
|
| 338 |
# Page header
|
| 339 |
st.title("π HTML to PDF Converter")
|
| 340 |
st.markdown("""
|
| 341 |
Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
|
| 342 |
-
β¨ **
|
| 343 |
""")
|
| 344 |
|
| 345 |
# Create tabs
|
|
@@ -360,17 +395,18 @@ with tab1:
|
|
| 360 |
"π· Upload Images (optional)",
|
| 361 |
type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
|
| 362 |
key="image_uploader",
|
| 363 |
-
help="Upload images referenced in your HTML",
|
| 364 |
accept_multiple_files=True
|
| 365 |
)
|
| 366 |
|
| 367 |
if uploaded_images:
|
| 368 |
st.success(f"β
{len(uploaded_images)} image(s) uploaded")
|
| 369 |
-
with st.expander("View uploaded images"):
|
| 370 |
cols = st.columns(min(len(uploaded_images), 4))
|
| 371 |
for idx, img in enumerate(uploaded_images):
|
| 372 |
with cols[idx % 4]:
|
| 373 |
st.image(img, caption=img.name, use_container_width=True)
|
|
|
|
| 374 |
|
| 375 |
if uploaded_file is not None:
|
| 376 |
st.success(f"β
File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
|
|
@@ -412,7 +448,7 @@ with tab1:
|
|
| 412 |
- 9:16 = Portrait (210mm Γ 297mm)
|
| 413 |
""")
|
| 414 |
|
| 415 |
-
convert_file_btn = st.button("π Convert to PDF", key="convert_file", type="primary",
|
| 416 |
|
| 417 |
with col2:
|
| 418 |
st.subheader("ποΈ HTML Preview")
|
|
@@ -423,44 +459,48 @@ with tab1:
|
|
| 423 |
if convert_file_btn:
|
| 424 |
temp_dir = None
|
| 425 |
try:
|
| 426 |
-
with st.spinner("Converting HTML to PDF..."):
|
| 427 |
# Create temp directory
|
| 428 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
|
|
|
| 429 |
|
| 430 |
# Process images if uploaded
|
|
|
|
| 431 |
if uploaded_images:
|
|
|
|
| 432 |
image_mapping = save_uploaded_images(uploaded_images, temp_dir)
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
#
|
| 436 |
-
with st.expander("π Debug: Image
|
| 437 |
st.write("**Uploaded Images:**")
|
| 438 |
for img in uploaded_images:
|
| 439 |
-
st.text(f"
|
| 440 |
|
| 441 |
-
st.write("**Image
|
| 442 |
-
for orig,
|
| 443 |
-
full_path = os.path.join(temp_dir,
|
| 444 |
-
|
| 445 |
-
st.text(f"
|
| 446 |
-
st.text(f"
|
|
|
|
|
|
|
| 447 |
|
| 448 |
-
st.write("**HTML
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
if
|
| 453 |
-
for
|
| 454 |
st.code(line, language='html')
|
| 455 |
else:
|
| 456 |
-
st.warning("No
|
|
|
|
|
|
|
| 457 |
|
| 458 |
# Convert to PDF
|
| 459 |
-
pdf_bytes, error = convert_html_to_pdf(
|
| 460 |
-
|
| 461 |
-
# Cleanup
|
| 462 |
-
if temp_dir:
|
| 463 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 464 |
|
| 465 |
if error:
|
| 466 |
st.error(f"β {error}")
|
|
@@ -481,7 +521,7 @@ with tab1:
|
|
| 481 |
data=pdf_bytes,
|
| 482 |
file_name=output_filename,
|
| 483 |
mime="application/pdf",
|
| 484 |
-
|
| 485 |
key="download_file_pdf"
|
| 486 |
)
|
| 487 |
|
|
@@ -491,10 +531,17 @@ with tab1:
|
|
| 491 |
# PDF Preview
|
| 492 |
st.subheader("π PDF Preview")
|
| 493 |
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
|
|
|
|
| 494 |
except Exception as e:
|
| 495 |
-
if temp_dir:
|
| 496 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 497 |
st.error(f"β Error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
|
| 499 |
# Tab 2: Paste HTML Code
|
| 500 |
with tab2:
|
|
@@ -548,17 +595,18 @@ with tab2:
|
|
| 548 |
"π· Upload Images (optional)",
|
| 549 |
type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
|
| 550 |
key="image_uploader_text",
|
| 551 |
-
help="Upload images referenced in your HTML code",
|
| 552 |
accept_multiple_files=True
|
| 553 |
)
|
| 554 |
|
| 555 |
if uploaded_images_text:
|
| 556 |
st.success(f"β
{len(uploaded_images_text)} image(s) uploaded")
|
| 557 |
-
with st.expander("View uploaded images"):
|
| 558 |
cols = st.columns(min(len(uploaded_images_text), 4))
|
| 559 |
for idx, img in enumerate(uploaded_images_text):
|
| 560 |
with cols[idx % 4]:
|
| 561 |
st.image(img, caption=img.name, use_container_width=True)
|
|
|
|
| 562 |
|
| 563 |
if html_code and html_code.strip():
|
| 564 |
# Auto-detect aspect ratio
|
|
@@ -578,7 +626,7 @@ with tab2:
|
|
| 578 |
help="Select the page orientation and dimensions"
|
| 579 |
)
|
| 580 |
|
| 581 |
-
convert_text_btn = st.button("π Convert to PDF", key="convert_text", type="primary",
|
| 582 |
else:
|
| 583 |
convert_text_btn = False
|
| 584 |
|
|
@@ -591,46 +639,49 @@ with tab2:
|
|
| 591 |
if convert_text_btn and html_code and html_code.strip():
|
| 592 |
temp_dir = None
|
| 593 |
try:
|
| 594 |
-
with st.spinner("Converting HTML to PDF..."):
|
| 595 |
# Create temp directory
|
| 596 |
temp_dir = tempfile.mkdtemp()
|
|
|
|
|
|
|
| 597 |
|
| 598 |
# Process images if uploaded
|
| 599 |
processed_html = html_code
|
| 600 |
if uploaded_images_text:
|
|
|
|
| 601 |
image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
|
| 602 |
processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
|
| 603 |
-
|
| 604 |
-
#
|
| 605 |
-
with st.expander("π Debug: Image
|
| 606 |
st.write("**Uploaded Images:**")
|
| 607 |
for img in uploaded_images_text:
|
| 608 |
-
st.text(f"
|
| 609 |
|
| 610 |
-
st.write("**Image
|
| 611 |
-
for orig,
|
| 612 |
-
full_path = os.path.join(temp_dir,
|
| 613 |
-
|
| 614 |
-
st.text(f"
|
| 615 |
-
st.text(f"
|
|
|
|
|
|
|
| 616 |
|
| 617 |
-
st.write("**HTML
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
if
|
| 622 |
-
for
|
| 623 |
st.code(line, language='html')
|
| 624 |
else:
|
| 625 |
-
st.warning("No
|
|
|
|
|
|
|
| 626 |
|
| 627 |
# Convert to PDF
|
| 628 |
pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
|
| 629 |
|
| 630 |
-
# Cleanup
|
| 631 |
-
if temp_dir:
|
| 632 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 633 |
-
|
| 634 |
if error:
|
| 635 |
st.error(f"β {error}")
|
| 636 |
with st.expander("Show error details"):
|
|
@@ -646,7 +697,7 @@ with tab2:
|
|
| 646 |
data=pdf_bytes,
|
| 647 |
file_name="converted.pdf",
|
| 648 |
mime="application/pdf",
|
| 649 |
-
|
| 650 |
key="download_text_pdf"
|
| 651 |
)
|
| 652 |
|
|
@@ -656,10 +707,17 @@ with tab2:
|
|
| 656 |
# PDF Preview
|
| 657 |
st.subheader("π PDF Preview")
|
| 658 |
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
|
|
|
|
| 659 |
except Exception as e:
|
| 660 |
-
if temp_dir:
|
| 661 |
-
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 662 |
st.error(f"β Error: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 663 |
|
| 664 |
# Footer with tips
|
| 665 |
st.markdown("---")
|
|
@@ -672,13 +730,45 @@ st.markdown("""
|
|
| 672 |
- **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
|
| 673 |
- All CSS styles, colors, gradients, and fonts are preserved
|
| 674 |
- Use inline CSS or `<style>` tags for best results
|
| 675 |
-
-
|
| 676 |
-
- External resources should use absolute URLs
|
| 677 |
- **PDF Preview** renders directly in the browser using PDF.js
|
| 678 |
|
| 679 |
-
### πΌοΈ Using Images:
|
| 680 |
-
1.
|
| 681 |
-
2. Upload all images referenced in
|
| 682 |
-
3.
|
| 683 |
-
4.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
""")
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Streamlit HTML to PDF Converter with Image Support
|
| 3 |
+
Save this file as: src/streamlit_app.py
|
| 4 |
+
"""
|
| 5 |
import streamlit as st
|
| 6 |
import subprocess
|
| 7 |
import os
|
|
|
|
| 27 |
if viewport_match:
|
| 28 |
viewport = viewport_match.group(1).lower()
|
| 29 |
if 'width=device-width' in viewport or 'width=100%' in viewport:
|
|
|
|
| 30 |
if 'orientation=portrait' in viewport:
|
| 31 |
return "9:16"
|
| 32 |
elif 'orientation=landscape' in viewport:
|
|
|
|
| 49 |
if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
|
| 50 |
return "16:9"
|
| 51 |
|
| 52 |
+
# Default to A4 portrait
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
return "9:16"
|
| 54 |
|
| 55 |
def save_uploaded_images(images, temp_dir):
|
| 56 |
"""Save uploaded images and return mapping"""
|
| 57 |
+
if not images:
|
| 58 |
+
return {}
|
| 59 |
+
|
| 60 |
image_mapping = {}
|
| 61 |
images_dir = os.path.join(temp_dir, "images")
|
| 62 |
os.makedirs(images_dir, exist_ok=True)
|
|
|
|
| 67 |
with open(image_path, 'wb') as f:
|
| 68 |
f.write(image.getvalue())
|
| 69 |
|
| 70 |
+
# Create mapping - use relative path from temp_dir
|
| 71 |
image_mapping[image.name] = f"images/{image.name}"
|
| 72 |
+
print(f"β Saved image: {image.name} -> {image_path}")
|
| 73 |
+
print(f" File exists: {os.path.exists(image_path)}")
|
| 74 |
+
print(f" File size: {os.path.getsize(image_path)} bytes")
|
| 75 |
|
| 76 |
return image_mapping
|
| 77 |
|
| 78 |
def process_html_with_images(html_content, temp_dir, image_mapping):
|
| 79 |
"""Process HTML to handle image references with absolute file paths"""
|
| 80 |
+
if not image_mapping:
|
| 81 |
+
return html_content
|
| 82 |
|
|
|
|
|
|
|
| 83 |
replacements_made = []
|
| 84 |
+
original_html = html_content
|
| 85 |
|
| 86 |
for original_name, relative_path in image_mapping.items():
|
| 87 |
# Get absolute path for the image
|
| 88 |
absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
|
| 89 |
+
|
| 90 |
+
# Convert to file:// URL with proper escaping
|
| 91 |
+
# Use forward slashes even on Windows for file:// URLs
|
| 92 |
+
file_url = f"file://{absolute_path.replace(os.sep, '/')}"
|
| 93 |
+
|
| 94 |
+
print(f"\nProcessing image: {original_name}")
|
| 95 |
+
print(f" Absolute path: {absolute_path}")
|
| 96 |
+
print(f" File URL: {file_url}")
|
| 97 |
+
print(f" File exists: {os.path.exists(absolute_path)}")
|
| 98 |
|
| 99 |
# Escape the filename for regex
|
| 100 |
escaped_name = re.escape(original_name)
|
| 101 |
|
| 102 |
+
# Pattern 1: src attribute with any path prefix or no prefix
|
| 103 |
+
pattern1 = rf'src\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
|
| 104 |
+
matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE))
|
| 105 |
+
if matches1:
|
| 106 |
+
print(f" Found {len(matches1)} src= matches")
|
| 107 |
+
for match in matches1:
|
| 108 |
+
print(f" - {match.group()}")
|
| 109 |
html_content = re.sub(
|
| 110 |
pattern1,
|
| 111 |
f'src=\\1{file_url}\\1',
|
| 112 |
html_content,
|
| 113 |
flags=re.IGNORECASE
|
| 114 |
)
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
# Pattern 2: url() in CSS with any path prefix or no prefix
|
| 117 |
+
pattern2 = rf'url\s*\(\s*(["\']?)(?:[^)"\']*/)?{escaped_name}\1\s*\)'
|
| 118 |
+
matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
|
| 119 |
+
if matches2:
|
| 120 |
+
print(f" Found {len(matches2)} url() matches")
|
| 121 |
+
for match in matches2:
|
| 122 |
+
print(f" - {match.group()}")
|
| 123 |
html_content = re.sub(
|
| 124 |
pattern2,
|
| 125 |
f'url("{file_url}")',
|
| 126 |
html_content,
|
| 127 |
flags=re.IGNORECASE
|
| 128 |
)
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
# Pattern 3: href attribute (for linked images)
|
| 131 |
+
pattern3 = rf'href\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
|
| 132 |
+
matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
|
| 133 |
+
if matches3:
|
| 134 |
+
print(f" Found {len(matches3)} href= matches")
|
| 135 |
+
for match in matches3:
|
| 136 |
+
print(f" - {match.group()}")
|
| 137 |
html_content = re.sub(
|
| 138 |
pattern3,
|
| 139 |
f'href=\\1{file_url}\\1',
|
| 140 |
html_content,
|
| 141 |
flags=re.IGNORECASE
|
| 142 |
)
|
| 143 |
+
|
| 144 |
+
total_matches = len(matches1) + len(matches2) + len(matches3)
|
| 145 |
+
if total_matches > 0:
|
| 146 |
+
replacements_made.append(f"{original_name}: {total_matches} replacement(s)")
|
| 147 |
|
| 148 |
+
# Print summary
|
| 149 |
if replacements_made:
|
| 150 |
+
print("\n=== Image Replacements Summary ===")
|
| 151 |
for msg in replacements_made:
|
| 152 |
print(f" β {msg}")
|
| 153 |
else:
|
| 154 |
+
print("\n=== WARNING: No image replacements made ===")
|
| 155 |
print(f"Looking for images: {list(image_mapping.keys())}")
|
| 156 |
+
# Show lines with image references
|
| 157 |
+
lines_with_images = [line for line in html_content.split('\n')
|
| 158 |
+
if any(keyword in line.lower() for keyword in ['src=', 'url(', 'href='])]
|
| 159 |
+
if lines_with_images:
|
| 160 |
+
print("Lines with potential image references:")
|
| 161 |
+
for line in lines_with_images[:5]:
|
| 162 |
+
print(f" {line.strip()}")
|
| 163 |
|
| 164 |
return html_content
|
| 165 |
|
| 166 |
def render_html_preview(html_content):
|
| 167 |
"""Render HTML preview in an iframe"""
|
|
|
|
| 168 |
b64 = base64.b64encode(html_content.encode()).decode()
|
| 169 |
iframe_html = f'<iframe src="data:text/html;base64,{b64}" width="100%" height="600" style="border: 2px solid #ddd; border-radius: 5px;"></iframe>'
|
| 170 |
return iframe_html
|
|
|
|
| 315 |
with open(html_file, 'w', encoding='utf-8') as f:
|
| 316 |
f.write(html_content)
|
| 317 |
|
| 318 |
+
print(f"\nSaved HTML to: {html_file}")
|
| 319 |
+
print(f"HTML file size: {os.path.getsize(html_file)} bytes")
|
| 320 |
+
|
| 321 |
# Get the path to puppeteer_pdf.js
|
| 322 |
script_dir = os.path.dirname(os.path.abspath(__file__))
|
| 323 |
puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
|
| 324 |
|
| 325 |
+
# If not found, try current directory
|
| 326 |
+
if not os.path.exists(puppeteer_script):
|
| 327 |
+
puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
|
| 328 |
+
|
| 329 |
+
# If still not found, try one level up
|
| 330 |
+
if not os.path.exists(puppeteer_script):
|
| 331 |
+
puppeteer_script = os.path.join(os.path.dirname(script_dir), '..', 'puppeteer_pdf.js')
|
| 332 |
+
|
| 333 |
+
print(f"Using Puppeteer script: {puppeteer_script}")
|
| 334 |
+
print(f"Script exists: {os.path.exists(puppeteer_script)}")
|
| 335 |
+
|
| 336 |
# Run Node.js script to convert HTML to PDF
|
| 337 |
result = subprocess.run(
|
| 338 |
['node', puppeteer_script, html_file, aspect_ratio],
|
| 339 |
capture_output=True,
|
| 340 |
text=True,
|
| 341 |
timeout=60,
|
| 342 |
+
cwd=os.path.dirname(puppeteer_script)
|
| 343 |
)
|
| 344 |
|
| 345 |
+
print(f"\nPuppeteer stdout: {result.stdout}")
|
| 346 |
+
if result.stderr:
|
| 347 |
+
print(f"Puppeteer stderr: {result.stderr}")
|
| 348 |
+
|
| 349 |
if result.returncode != 0:
|
| 350 |
return None, f"PDF conversion failed: {result.stderr}"
|
| 351 |
|
|
|
|
| 359 |
with open(pdf_file, 'rb') as f:
|
| 360 |
pdf_bytes = f.read()
|
| 361 |
|
| 362 |
+
print(f"PDF generated successfully: {len(pdf_bytes)} bytes")
|
| 363 |
return pdf_bytes, None
|
| 364 |
|
| 365 |
except subprocess.TimeoutExpired:
|
| 366 |
return None, "Error: PDF conversion timed out (60 seconds)"
|
| 367 |
except Exception as e:
|
| 368 |
+
print(f"Error in convert_html_to_pdf: {str(e)}")
|
| 369 |
+
import traceback
|
| 370 |
+
traceback.print_exc()
|
| 371 |
return None, f"Error: {str(e)}"
|
| 372 |
|
| 373 |
# Page header
|
| 374 |
st.title("π HTML to PDF Converter")
|
| 375 |
st.markdown("""
|
| 376 |
Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
|
| 377 |
+
β¨ **With Image Support** - Upload images alongside your HTML files!
|
| 378 |
""")
|
| 379 |
|
| 380 |
# Create tabs
|
|
|
|
| 395 |
"π· Upload Images (optional)",
|
| 396 |
type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
|
| 397 |
key="image_uploader",
|
| 398 |
+
help="Upload images referenced in your HTML. Filename must match exactly what's in your HTML.",
|
| 399 |
accept_multiple_files=True
|
| 400 |
)
|
| 401 |
|
| 402 |
if uploaded_images:
|
| 403 |
st.success(f"β
{len(uploaded_images)} image(s) uploaded")
|
| 404 |
+
with st.expander("View uploaded images", expanded=True):
|
| 405 |
cols = st.columns(min(len(uploaded_images), 4))
|
| 406 |
for idx, img in enumerate(uploaded_images):
|
| 407 |
with cols[idx % 4]:
|
| 408 |
st.image(img, caption=img.name, use_container_width=True)
|
| 409 |
+
st.caption(f"Size: {img.size:,} bytes")
|
| 410 |
|
| 411 |
if uploaded_file is not None:
|
| 412 |
st.success(f"β
File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
|
|
|
|
| 448 |
- 9:16 = Portrait (210mm Γ 297mm)
|
| 449 |
""")
|
| 450 |
|
| 451 |
+
convert_file_btn = st.button("π Convert to PDF", key="convert_file", type="primary", use_container_width=True)
|
| 452 |
|
| 453 |
with col2:
|
| 454 |
st.subheader("ποΈ HTML Preview")
|
|
|
|
| 459 |
if convert_file_btn:
|
| 460 |
temp_dir = None
|
| 461 |
try:
|
| 462 |
+
with st.spinner("π Converting HTML to PDF..."):
|
| 463 |
# Create temp directory
|
| 464 |
temp_dir = tempfile.mkdtemp()
|
| 465 |
+
print(f"\n{'='*60}")
|
| 466 |
+
print(f"Created temp directory: {temp_dir}")
|
| 467 |
|
| 468 |
# Process images if uploaded
|
| 469 |
+
processed_html = html_content
|
| 470 |
if uploaded_images:
|
| 471 |
+
st.info(f"π· Processing {len(uploaded_images)} image(s)...")
|
| 472 |
image_mapping = save_uploaded_images(uploaded_images, temp_dir)
|
| 473 |
+
processed_html = process_html_with_images(html_content, temp_dir, image_mapping)
|
| 474 |
+
|
| 475 |
+
# Show debug info
|
| 476 |
+
with st.expander("π Debug: Image Processing Details", expanded=False):
|
| 477 |
st.write("**Uploaded Images:**")
|
| 478 |
for img in uploaded_images:
|
| 479 |
+
st.text(f" β {img.name} ({img.size:,} bytes)")
|
| 480 |
|
| 481 |
+
st.write("\n**Image Mappings:**")
|
| 482 |
+
for orig, rel_path in image_mapping.items():
|
| 483 |
+
full_path = os.path.join(temp_dir, rel_path)
|
| 484 |
+
exists = os.path.exists(full_path)
|
| 485 |
+
st.text(f" {orig}")
|
| 486 |
+
st.text(f" β {rel_path}")
|
| 487 |
+
st.text(f" β Full: {full_path}")
|
| 488 |
+
st.text(f" β Exists: {'β' if exists else 'β'}")
|
| 489 |
|
| 490 |
+
st.write("\n**HTML Image References:**")
|
| 491 |
+
html_lines = processed_html.split('\n')
|
| 492 |
+
img_lines = [line.strip() for line in html_lines
|
| 493 |
+
if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
|
| 494 |
+
if img_lines:
|
| 495 |
+
for line in img_lines[:10]:
|
| 496 |
st.code(line, language='html')
|
| 497 |
else:
|
| 498 |
+
st.warning("β οΈ No image references found in HTML!")
|
| 499 |
+
else:
|
| 500 |
+
print("No images uploaded")
|
| 501 |
|
| 502 |
# Convert to PDF
|
| 503 |
+
pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_file, temp_dir)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
|
| 505 |
if error:
|
| 506 |
st.error(f"β {error}")
|
|
|
|
| 521 |
data=pdf_bytes,
|
| 522 |
file_name=output_filename,
|
| 523 |
mime="application/pdf",
|
| 524 |
+
use_container_width=True,
|
| 525 |
key="download_file_pdf"
|
| 526 |
)
|
| 527 |
|
|
|
|
| 531 |
# PDF Preview
|
| 532 |
st.subheader("π PDF Preview")
|
| 533 |
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
|
| 534 |
+
|
| 535 |
except Exception as e:
|
|
|
|
|
|
|
| 536 |
st.error(f"β Error: {str(e)}")
|
| 537 |
+
with st.expander("Show full error"):
|
| 538 |
+
import traceback
|
| 539 |
+
st.code(traceback.format_exc())
|
| 540 |
+
finally:
|
| 541 |
+
# Cleanup
|
| 542 |
+
if temp_dir and os.path.exists(temp_dir):
|
| 543 |
+
print(f"Cleaning up temp directory: {temp_dir}")
|
| 544 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 545 |
|
| 546 |
# Tab 2: Paste HTML Code
|
| 547 |
with tab2:
|
|
|
|
| 595 |
"π· Upload Images (optional)",
|
| 596 |
type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
|
| 597 |
key="image_uploader_text",
|
| 598 |
+
help="Upload images referenced in your HTML code. Filename must match exactly what's in your HTML.",
|
| 599 |
accept_multiple_files=True
|
| 600 |
)
|
| 601 |
|
| 602 |
if uploaded_images_text:
|
| 603 |
st.success(f"β
{len(uploaded_images_text)} image(s) uploaded")
|
| 604 |
+
with st.expander("View uploaded images", expanded=True):
|
| 605 |
cols = st.columns(min(len(uploaded_images_text), 4))
|
| 606 |
for idx, img in enumerate(uploaded_images_text):
|
| 607 |
with cols[idx % 4]:
|
| 608 |
st.image(img, caption=img.name, use_container_width=True)
|
| 609 |
+
st.caption(f"Size: {img.size:,} bytes")
|
| 610 |
|
| 611 |
if html_code and html_code.strip():
|
| 612 |
# Auto-detect aspect ratio
|
|
|
|
| 626 |
help="Select the page orientation and dimensions"
|
| 627 |
)
|
| 628 |
|
| 629 |
+
convert_text_btn = st.button("π Convert to PDF", key="convert_text", type="primary", use_container_width=True)
|
| 630 |
else:
|
| 631 |
convert_text_btn = False
|
| 632 |
|
|
|
|
| 639 |
if convert_text_btn and html_code and html_code.strip():
|
| 640 |
temp_dir = None
|
| 641 |
try:
|
| 642 |
+
with st.spinner("π Converting HTML to PDF..."):
|
| 643 |
# Create temp directory
|
| 644 |
temp_dir = tempfile.mkdtemp()
|
| 645 |
+
print(f"\n{'='*60}")
|
| 646 |
+
print(f"Created temp directory: {temp_dir}")
|
| 647 |
|
| 648 |
# Process images if uploaded
|
| 649 |
processed_html = html_code
|
| 650 |
if uploaded_images_text:
|
| 651 |
+
st.info(f"π· Processing {len(uploaded_images_text)} image(s)...")
|
| 652 |
image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
|
| 653 |
processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
|
| 654 |
+
|
| 655 |
+
# Show debug info
|
| 656 |
+
with st.expander("π Debug: Image Processing Details", expanded=False):
|
| 657 |
st.write("**Uploaded Images:**")
|
| 658 |
for img in uploaded_images_text:
|
| 659 |
+
st.text(f" β {img.name} ({img.size:,} bytes)")
|
| 660 |
|
| 661 |
+
st.write("\n**Image Mappings:**")
|
| 662 |
+
for orig, rel_path in image_mapping.items():
|
| 663 |
+
full_path = os.path.join(temp_dir, rel_path)
|
| 664 |
+
exists = os.path.exists(full_path)
|
| 665 |
+
st.text(f" {orig}")
|
| 666 |
+
st.text(f" β {rel_path}")
|
| 667 |
+
st.text(f" β Full: {full_path}")
|
| 668 |
+
st.text(f" β Exists: {'β' if exists else 'β'}")
|
| 669 |
|
| 670 |
+
st.write("\n**HTML Image References:**")
|
| 671 |
+
html_lines = processed_html.split('\n')
|
| 672 |
+
img_lines = [line.strip() for line in html_lines
|
| 673 |
+
if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
|
| 674 |
+
if img_lines:
|
| 675 |
+
for line in img_lines[:10]:
|
| 676 |
st.code(line, language='html')
|
| 677 |
else:
|
| 678 |
+
st.warning("β οΈ No image references found in HTML!")
|
| 679 |
+
else:
|
| 680 |
+
print("No images uploaded")
|
| 681 |
|
| 682 |
# Convert to PDF
|
| 683 |
pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
|
| 684 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
if error:
|
| 686 |
st.error(f"β {error}")
|
| 687 |
with st.expander("Show error details"):
|
|
|
|
| 697 |
data=pdf_bytes,
|
| 698 |
file_name="converted.pdf",
|
| 699 |
mime="application/pdf",
|
| 700 |
+
use_container_width=True,
|
| 701 |
key="download_text_pdf"
|
| 702 |
)
|
| 703 |
|
|
|
|
| 707 |
# PDF Preview
|
| 708 |
st.subheader("π PDF Preview")
|
| 709 |
st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
|
| 710 |
+
|
| 711 |
except Exception as e:
|
|
|
|
|
|
|
| 712 |
st.error(f"β Error: {str(e)}")
|
| 713 |
+
with st.expander("Show full error"):
|
| 714 |
+
import traceback
|
| 715 |
+
st.code(traceback.format_exc())
|
| 716 |
+
finally:
|
| 717 |
+
# Cleanup
|
| 718 |
+
if temp_dir and os.path.exists(temp_dir):
|
| 719 |
+
print(f"Cleaning up temp directory: {temp_dir}")
|
| 720 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 721 |
|
| 722 |
# Footer with tips
|
| 723 |
st.markdown("---")
|
|
|
|
| 730 |
- **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
|
| 731 |
- All CSS styles, colors, gradients, and fonts are preserved
|
| 732 |
- Use inline CSS or `<style>` tags for best results
|
| 733 |
+
- **Image filenames must match exactly** - if your HTML has `<img src="logo.png">`, upload a file named exactly `logo.png`
|
| 734 |
+
- External resources should use absolute URLs (https://)
|
| 735 |
- **PDF Preview** renders directly in the browser using PDF.js
|
| 736 |
|
| 737 |
+
### πΌοΈ Using Images - IMPORTANT:
|
| 738 |
+
1. **Exact Filename Match**: If your HTML has `<img src="photo.jpg">`, upload a file named exactly `photo.jpg`
|
| 739 |
+
2. **Multiple Images**: Upload all images referenced in your HTML
|
| 740 |
+
3. **Supported Formats**: JPG, JPEG, PNG, GIF, SVG, WebP, BMP
|
| 741 |
+
4. **Path Variations**: These all work:
|
| 742 |
+
- `<img src="logo.png">` β
|
| 743 |
+
- `<img src="./logo.png">` β
|
| 744 |
+
- `<img src="images/logo.png">` β (just upload as `logo.png`)
|
| 745 |
+
5. **CSS Background Images**: Use `background-image: url('bg.jpg')` and upload `bg.jpg`
|
| 746 |
+
6. **Check Debug Info**: Expand the debug section after conversion to verify image processing
|
| 747 |
+
|
| 748 |
+
### π Example HTML with Images:
|
| 749 |
+
```html
|
| 750 |
+
<!DOCTYPE html>
|
| 751 |
+
<html>
|
| 752 |
+
<head>
|
| 753 |
+
<style>
|
| 754 |
+
body { font-family: Arial; padding: 40px; }
|
| 755 |
+
.header {
|
| 756 |
+
background-image: url('banner.jpg');
|
| 757 |
+
background-size: cover;
|
| 758 |
+
padding: 60px;
|
| 759 |
+
color: white;
|
| 760 |
+
}
|
| 761 |
+
img { max-width: 100%; height: auto; }
|
| 762 |
+
</style>
|
| 763 |
+
</head>
|
| 764 |
+
<body>
|
| 765 |
+
<div class="header">
|
| 766 |
+
<h1>My Document</h1>
|
| 767 |
+
</div>
|
| 768 |
+
<img src="photo.png" alt="Photo">
|
| 769 |
+
<img src="logo.svg" alt="Logo">
|
| 770 |
+
</body>
|
| 771 |
+
</html>
|
| 772 |
+
```
|
| 773 |
+
**Then upload**: `banner.jpg`, `photo.png`, `logo.svg`
|
| 774 |
""")
|