Spaces:

ABDALLALSWAITI
/

htmlpdf

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 16, 2025

Commit

45054fd

verified ·

1 Parent(s): e23e782

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +250 -455

src/streamlit_app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Streamlit HTML to PDF Converter with Image Support
 Save this file as: src/streamlit_app.py
 """
 import streamlit as st
@@ -10,6 +10,7 @@ import shutil
 from pathlib import Path
 import base64
 import re
 st.set_page_config(
     page_title="HTML to PDF Converter",
@@ -18,21 +19,15 @@ st.set_page_config(
 )
 def detect_aspect_ratio(html_content):
-    """
-    Detect aspect ratio from HTML content
-    Returns: "16:9", "1:1", or "9:16"
-    """
-    # Check for viewport meta tag
     viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
     if viewport_match:
         viewport = viewport_match.group(1).lower()
-        if 'width=device-width' in viewport or 'width=100%' in viewport:
-            if 'orientation=portrait' in viewport:
-                return "9:16"
-            elif 'orientation=landscape' in viewport:
-                return "16:9"
-    # Check for CSS aspect-ratio property
     aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
     if aspect_match:
         width = int(aspect_match.group(1))
@@ -45,123 +40,114 @@ def detect_aspect_ratio(html_content):
         else:
             return "1:1"
-    # Check for common presentation frameworks
     if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
         return "16:9"
-    # Default to A4 portrait
     return "9:16"
-def save_uploaded_images(images, temp_dir):
-    """Save uploaded images and return mapping"""
-    if not images:
-        return {}
-    image_mapping = {}
-    images_dir = os.path.join(temp_dir, "images")
-    os.makedirs(images_dir, exist_ok=True)
-    for image in images:
-        # Save image
-        image_path = os.path.join(images_dir, image.name)
-        with open(image_path, 'wb') as f:
-            f.write(image.getvalue())
-        # Create mapping - use relative path from temp_dir
-        image_mapping[image.name] = f"images/{image.name}"
-        print(f"✓ Saved image: {image.name} -> {image_path}")
-        print(f"  File exists: {os.path.exists(image_path)}")
-        print(f"  File size: {os.path.getsize(image_path)} bytes")
-    return image_mapping
-def process_html_with_images(html_content, temp_dir, image_mapping):
-    """Process HTML to handle image references with absolute file paths"""
-    if not image_mapping:
-        return html_content
-    replacements_made = []
     original_html = html_content
-    for original_name, relative_path in image_mapping.items():
-        # Get absolute path for the image
-        absolute_path = os.path.abspath(os.path.join(temp_dir, relative_path))
-        # Convert to file:// URL with proper escaping
-        # Use forward slashes even on Windows for file:// URLs
-        file_url = f"file://{absolute_path.replace(os.sep, '/')}"
-        print(f"\nProcessing image: {original_name}")
-        print(f"  Absolute path: {absolute_path}")
-        print(f"  File URL: {file_url}")
-        print(f"  File exists: {os.path.exists(absolute_path)}")
-        # Escape the filename for regex
-        escaped_name = re.escape(original_name)
-        # Pattern 1: src attribute with any path prefix or no prefix
-        pattern1 = rf'src\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
-        matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE))
         if matches1:
-            print(f"  Found {len(matches1)} src= matches")
-            for match in matches1:
-                print(f"    - {match.group()}")
-        html_content = re.sub(
-            pattern1,
-            f'src=\\1{file_url}\\1',
-            html_content,
-            flags=re.IGNORECASE
-        )
-        # Pattern 2: url() in CSS with any path prefix or no prefix
-        pattern2 = rf'url\s*\(\s*(["\']?)(?:[^)"\']*/)?{escaped_name}\1\s*\)'
         matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
         if matches2:
-            print(f"  Found {len(matches2)} url() matches")
-            for match in matches2:
-                print(f"    - {match.group()}")
-        html_content = re.sub(
-            pattern2,
-            f'url("{file_url}")',
-            html_content,
-            flags=re.IGNORECASE
-        )
-        # Pattern 3: href attribute (for linked images)
-        pattern3 = rf'href\s*=\s*(["\'])(?:[^"\']*?/)?{escaped_name}\1'
         matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
         if matches3:
-            print(f"  Found {len(matches3)} href= matches")
-            for match in matches3:
-                print(f"    - {match.group()}")
-        html_content = re.sub(
-            pattern3,
-            f'href=\\1{file_url}\\1',
-            html_content,
-            flags=re.IGNORECASE
-        )
-        total_matches = len(matches1) + len(matches2) + len(matches3)
-        if total_matches > 0:
-            replacements_made.append(f"{original_name}: {total_matches} replacement(s)")
-    # Print summary
-    if replacements_made:
-        print("\n=== Image Replacements Summary ===")
-        for msg in replacements_made:
-            print(f"  ✓ {msg}")
     else:
-        print("\n=== WARNING: No image replacements made ===")
-        print(f"Looking for images: {list(image_mapping.keys())}")
-        # Show lines with image references
-        lines_with_images = [line for line in html_content.split('\n')
-                            if any(keyword in line.lower() for keyword in ['src=', 'url(', 'href='])]
-        if lines_with_images:
-            print("Lines with potential image references:")
-            for line in lines_with_images[:5]:
-                print(f"  {line.strip()}")
-    return html_content
 def render_html_preview(html_content):
     """Render HTML preview in an iframe"""
@@ -205,53 +191,36 @@ def render_pdf_preview(pdf_bytes):
                 font-size: 18px;
                 padding: 20px;
             }}
-            .error {{
-                color: #ff6b6b;
-                font-family: Arial, sans-serif;
-                padding: 20px;
-                background: rgba(0,0,0,0.5);
-                border-radius: 5px;
-                margin: 20px;
-            }}
         </style>
     </head>
     <body>
         <div id="pdf-container">
             <div id="loading">Loading PDF...</div>
         </div>
         <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
         <script>
             pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
             const pdfData = atob('{b64}');
             const pdfContainer = document.getElementById('pdf-container');
             const loading = document.getElementById('loading');
             const uint8Array = new Uint8Array(pdfData.length);
             for (let i = 0; i < pdfData.length; i++) {{
                 uint8Array[i] = pdfData.charCodeAt(i);
             }}
             pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
                 loading.style.display = 'none';
                 const numPages = pdf.numPages;
                 const promises = [];
                 for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
                     promises.push(
                         pdf.getPage(pageNum).then(function(page) {{
                             const scale = 1.5;
                             const viewport = page.getViewport({{scale: scale}});
                             const canvas = document.createElement('canvas');
                             const context = canvas.getContext('2d');
                             canvas.height = viewport.height;
                             canvas.width = viewport.width;
                             pdfContainer.appendChild(canvas);
                             return page.render({{
                                 canvasContext: context,
                                 viewport: viewport
@@ -259,11 +228,9 @@ def render_pdf_preview(pdf_bytes):
                         }})
                     );
                 }}
                 return Promise.all(promises);
             }}).catch(function(error) {{
-                loading.innerHTML = '<div class="error">Error loading PDF: ' + error.message + '</div>';
-                console.error('Error loading PDF:', error);
             }});
         </script>
     </body>
@@ -272,24 +239,12 @@ def render_pdf_preview(pdf_bytes):
     return pdf_viewer_html
 def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
-    """
-    Convert HTML content to PDF using Puppeteer with better styling preservation
-    Args:
-        html_content: String containing HTML content
-        aspect_ratio: One of "16:9", "1:1", or "9:16"
-        temp_dir: Temporary directory for processing
-    Returns:
-        Tuple of (pdf_bytes, error_message)
-    """
     try:
-        # Inject CSS to preserve styles better
         style_injection = """
         <style>
-            @page {
-                margin: 0;
-            }
             * {
                 -webkit-print-color-adjust: exact !important;
                 print-color-adjust: exact !important;
@@ -302,7 +257,6 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
         </style>
         """
-        # Insert style injection before closing head tag or at the start of body
         if '</head>' in html_content:
             html_content = html_content.replace('</head>', style_injection + '</head>')
         elif '<body' in html_content:
@@ -310,71 +264,66 @@ def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
         else:
             html_content = style_injection + html_content
-        # Save HTML content to temporary file
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
-        print(f"\nSaved HTML to: {html_file}")
-        print(f"HTML file size: {os.path.getsize(html_file)} bytes")
-        # Get the path to puppeteer_pdf.js
         script_dir = os.path.dirname(os.path.abspath(__file__))
-        puppeteer_script = os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js')
-        # If not found, try current directory
-        if not os.path.exists(puppeteer_script):
-            puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
-        # If still not found, try one level up
-        if not os.path.exists(puppeteer_script):
-            puppeteer_script = os.path.join(os.path.dirname(script_dir), '..', 'puppeteer_pdf.js')
-        print(f"Using Puppeteer script: {puppeteer_script}")
-        print(f"Script exists: {os.path.exists(puppeteer_script)}")
-        # Run Node.js script to convert HTML to PDF
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
-            cwd=os.path.dirname(puppeteer_script)
         )
-        print(f"\nPuppeteer stdout: {result.stdout}")
-        if result.stderr:
-            print(f"Puppeteer stderr: {result.stderr}")
         if result.returncode != 0:
             return None, f"PDF conversion failed: {result.stderr}"
-        # Get the generated PDF path
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
             return None, "PDF file was not generated"
-        # Read PDF file into memory
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
-        print(f"PDF generated successfully: {len(pdf_bytes)} bytes")
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
         return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
-        print(f"Error in convert_html_to_pdf: {str(e)}")
-        import traceback
-        traceback.print_exc()
         return None, f"Error: {str(e)}"
-# Page header
 st.title("📄 HTML to PDF Converter")
 st.markdown("""
-Convert HTML files or HTML code to PDF using Puppeteer with automatic aspect ratio detection.
-✨ **With Image Support** - Upload images alongside your HTML files!
 """)
 # Create tabs
@@ -386,32 +335,28 @@ with tab1:
         "Choose an HTML file",
         type=['html', 'htm'],
         key="file_uploader",
-        help="Upload an HTML file (max 200MB)",
-        accept_multiple_files=False
     )
-    # Image uploader
     uploaded_images = st.file_uploader(
-        "📷 Upload Images (optional)",
         type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
         key="image_uploader",
-        help="Upload images referenced in your HTML. Filename must match exactly what's in your HTML.",
         accept_multiple_files=True
     )
     if uploaded_images:
         st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
-        with st.expander("View uploaded images", expanded=True):
             cols = st.columns(min(len(uploaded_images), 4))
             for idx, img in enumerate(uploaded_images):
                 with cols[idx % 4]:
                     st.image(img, caption=img.name, use_container_width=True)
-                    st.caption(f"Size: {img.size:,} bytes")
-    if uploaded_file is not None:
-        st.success(f"✅ File uploaded: {uploaded_file.name} ({uploaded_file.size:,} bytes)")
-        # Read file content
         uploaded_file.seek(0)
         try:
             html_content = uploaded_file.getvalue().decode('utf-8')
@@ -419,356 +364,206 @@ with tab1:
             uploaded_file.seek(0)
             html_content = uploaded_file.getvalue().decode('latin-1')
-        # Auto-detect aspect ratio
         detected_ratio = detect_aspect_ratio(html_content)
         col1, col2 = st.columns([1, 1])
         with col1:
             st.subheader("⚙️ Settings")
-            auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_file")
             if auto_detect:
-                aspect_ratio_file = detected_ratio
                 st.info(f"🔍 Detected: **{detected_ratio}**")
             else:
-                aspect_ratio_file = st.radio(
                     "Aspect Ratio",
                     options=["16:9", "1:1", "9:16"],
                     index=["16:9", "1:1", "9:16"].index(detected_ratio),
-                    key="aspect_file",
-                    help="Select the page orientation and dimensions"
                 )
-            st.markdown(f"""
-            **Selected: {aspect_ratio_file}**
-            - 16:9 = Landscape (297mm × 210mm)
-            - 1:1 = Square (210mm × 210mm)
-            - 9:16 = Portrait (210mm × 297mm)
-            """)
-            convert_file_btn = st.button("🔄 Convert to PDF", key="convert_file", type="primary", use_container_width=True)
         with col2:
-            st.subheader("👁️ HTML Preview")
-            with st.expander("Show HTML Preview", expanded=False):
-                st.components.v1.html(render_html_preview(html_content), height=600, scrolling=True)
-        # Conversion section
-        if convert_file_btn:
             temp_dir = None
             try:
-                with st.spinner("🔄 Converting HTML to PDF..."):
-                    # Create temp directory
                     temp_dir = tempfile.mkdtemp()
-                    print(f"\n{'='*60}")
-                    print(f"Created temp directory: {temp_dir}")
-                    # Process images if uploaded
                     processed_html = html_content
                     if uploaded_images:
-                        st.info(f"📷 Processing {len(uploaded_images)} image(s)...")
-                        image_mapping = save_uploaded_images(uploaded_images, temp_dir)
-                        processed_html = process_html_with_images(html_content, temp_dir, image_mapping)
-                        # Show debug info
-                        with st.expander("🔍 Debug: Image Processing Details", expanded=False):
-                            st.write("**Uploaded Images:**")
-                            for img in uploaded_images:
-                                st.text(f"  ✓ {img.name} ({img.size:,} bytes)")
-                            st.write("\n**Image Mappings:**")
-                            for orig, rel_path in image_mapping.items():
-                                full_path = os.path.join(temp_dir, rel_path)
-                                exists = os.path.exists(full_path)
-                                st.text(f"  {orig}")
-                                st.text(f"    → {rel_path}")
-                                st.text(f"    → Full: {full_path}")
-                                st.text(f"    → Exists: {'✓' if exists else '✗'}")
-                            st.write("\n**HTML Image References:**")
-                            html_lines = processed_html.split('\n')
-                            img_lines = [line.strip() for line in html_lines
-                                        if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
-                            if img_lines:
-                                for line in img_lines[:10]:
-                                    st.code(line, language='html')
-                            else:
-                                st.warning("⚠️ No image references found in HTML!")
-                    else:
-                        print("No images uploaded")
                     # Convert to PDF
-                    pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_file, temp_dir)
                     if error:
                         st.error(f"❌ {error}")
-                        with st.expander("Show error details"):
-                            st.code(error)
                     else:
-                        st.success("✅ PDF generated successfully!")
-                        col_a, col_b = st.columns([1, 1])
                         with col_a:
-                            output_filename = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
-                            if not output_filename.endswith('.pdf'):
-                                output_filename += '.pdf'
                             st.download_button(
-                                label="⬇️ Download PDF",
                                 data=pdf_bytes,
-                                file_name=output_filename,
                                 mime="application/pdf",
-                                use_container_width=True,
-                                key="download_file_pdf"
                             )
                         with col_b:
-                            st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
-                        # PDF Preview
                         st.subheader("📄 PDF Preview")
-                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
             except Exception as e:
                 st.error(f"❌ Error: {str(e)}")
-                with st.expander("Show full error"):
-                    import traceback
-                    st.code(traceback.format_exc())
             finally:
-                # Cleanup
                 if temp_dir and os.path.exists(temp_dir):
-                    print(f"Cleaning up temp directory: {temp_dir}")
                     shutil.rmtree(temp_dir, ignore_errors=True)
-# Tab 2: Paste HTML Code
 with tab2:
-    col1, col2 = st.columns([1, 1])
-    with col1:
-        html_code = st.text_area(
-            "HTML Content",
-            value="""<!DOCTYPE html>
 <html>
 <head>
-    <title>Sample Document</title>
     <style>
         body {
-            font-family: Arial, sans-serif;
             margin: 40px;
             background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             color: white;
         }
-        h1 {
-            font-size: 48px;
-            margin-bottom: 20px;
-            text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
-        }
-        p {
-            font-size: 18px;
-            line-height: 1.6;
-        }
         .box {
             background: rgba(255,255,255,0.1);
             padding: 20px;
             border-radius: 10px;
-            margin-top: 20px;
         }
     </style>
 </head>
 <body>
-    <h1>Hello, PDF World! 🌍</h1>
-    <p>This is a sample HTML document converted to PDF.</p>
     <div class="box">
-        <p>✨ Styles, colors, and gradients are preserved!</p>
     </div>
 </body>
 </html>""",
-            height=400,
-            key="html_code"
-        )
-        # Image uploader for text tab
-        uploaded_images_text = st.file_uploader(
-            "📷 Upload Images (optional)",
-            type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
-            key="image_uploader_text",
-            help="Upload images referenced in your HTML code. Filename must match exactly what's in your HTML.",
-            accept_multiple_files=True
-        )
-        if uploaded_images_text:
-            st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
-            with st.expander("View uploaded images", expanded=True):
-                cols = st.columns(min(len(uploaded_images_text), 4))
-                for idx, img in enumerate(uploaded_images_text):
-                    with cols[idx % 4]:
-                        st.image(img, caption=img.name, use_container_width=True)
-                        st.caption(f"Size: {img.size:,} bytes")
-        if html_code and html_code.strip():
-            # Auto-detect aspect ratio
-            detected_ratio_text = detect_aspect_ratio(html_code)
-            auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_detect_text")
-            if auto_detect_text:
-                aspect_ratio_text = detected_ratio_text
-                st.info(f"🔍 Detected: **{detected_ratio_text}**")
-            else:
-                aspect_ratio_text = st.radio(
-                    "Aspect Ratio",
-                    options=["16:9", "1:1", "9:16"],
-                    index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
-                    key="aspect_text",
-                    help="Select the page orientation and dimensions"
-                )
-            convert_text_btn = st.button("🔄 Convert to PDF", key="convert_text", type="primary", use_container_width=True)
-        else:
-            convert_text_btn = False
-    with col2:
-        if html_code and html_code.strip():
-            st.subheader("👁️ HTML Preview")
-            with st.expander("Show HTML Preview", expanded=False):
-                st.components.v1.html(render_html_preview(html_code), height=600, scrolling=True)
-    if convert_text_btn and html_code and html_code.strip():
-        temp_dir = None
-        try:
-            with st.spinner("🔄 Converting HTML to PDF..."):
-                # Create temp directory
-                temp_dir = tempfile.mkdtemp()
-                print(f"\n{'='*60}")
-                print(f"Created temp directory: {temp_dir}")
-                # Process images if uploaded
-                processed_html = html_code
-                if uploaded_images_text:
-                    st.info(f"📷 Processing {len(uploaded_images_text)} image(s)...")
-                    image_mapping = save_uploaded_images(uploaded_images_text, temp_dir)
-                    processed_html = process_html_with_images(html_code, temp_dir, image_mapping)
-                    # Show debug info
-                    with st.expander("🔍 Debug: Image Processing Details", expanded=False):
-                        st.write("**Uploaded Images:**")
-                        for img in uploaded_images_text:
-                            st.text(f"  ✓ {img.name} ({img.size:,} bytes)")
-                        st.write("\n**Image Mappings:**")
-                        for orig, rel_path in image_mapping.items():
-                            full_path = os.path.join(temp_dir, rel_path)
-                            exists = os.path.exists(full_path)
-                            st.text(f"  {orig}")
-                            st.text(f"    → {rel_path}")
-                            st.text(f"    → Full: {full_path}")
-                            st.text(f"    → Exists: {'✓' if exists else '✗'}")
-                        st.write("\n**HTML Image References:**")
-                        html_lines = processed_html.split('\n')
-                        img_lines = [line.strip() for line in html_lines
-                                    if any(k in line.lower() for k in ['<img', 'src=', 'url('])]
-                        if img_lines:
-                            for line in img_lines[:10]:
-                                st.code(line, language='html')
-                        else:
-                            st.warning("⚠️ No image references found in HTML!")
-                else:
-                    print("No images uploaded")
-                # Convert to PDF
-                pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
-                if error:
-                    st.error(f"❌ {error}")
-                    with st.expander("Show error details"):
-                        st.code(error)
-                else:
-                    st.success("✅ PDF generated successfully!")
-                    col_a, col_b = st.columns([1, 1])
-                    with col_a:
-                        st.download_button(
-                            label="⬇️ Download PDF",
-                            data=pdf_bytes,
-                            file_name="converted.pdf",
-                            mime="application/pdf",
-                            use_container_width=True,
-                            key="download_text_pdf"
-                        )
-                    with col_b:
-                        st.info(f"📦 Size: {len(pdf_bytes):,} bytes")
-                    # PDF Preview
-                    st.subheader("📄 PDF Preview")
-                    st.components.v1.html(render_pdf_preview(pdf_bytes), height=620, scrolling=True)
-        except Exception as e:
-            st.error(f"❌ Error: {str(e)}")
-            with st.expander("Show full error"):
-                import traceback
-                st.code(traceback.format_exc())
-        finally:
-            # Cleanup
-            if temp_dir and os.path.exists(temp_dir):
-                print(f"Cleaning up temp directory: {temp_dir}")
-                shutil.rmtree(temp_dir, ignore_errors=True)
-# Footer with tips
 st.markdown("---")
 st.markdown("""
-### 💡 Tips:
-- **Auto-detection** analyzes your HTML to suggest the best aspect ratio
-- **16:9** - Best for presentations and landscape documents (297mm × 210mm)
-- **1:1** - Square format (210mm × 210mm)
-- **9:16** - Portrait format, standard A4 (210mm × 297mm)
-- **Image Support** - Upload JPG, PNG, GIF, SVG, WebP, or BMP images
-- All CSS styles, colors, gradients, and fonts are preserved
-- Use inline CSS or `<style>` tags for best results
-- **Image filenames must match exactly** - if your HTML has `<img src="logo.png">`, upload a file named exactly `logo.png`
-- External resources should use absolute URLs (https://)
-- **PDF Preview** renders directly in the browser using PDF.js
-### 🖼️ Using Images - IMPORTANT:
-1. **Exact Filename Match**: If your HTML has `<img src="photo.jpg">`, upload a file named exactly `photo.jpg`
-2. **Multiple Images**: Upload all images referenced in your HTML
-3. **Supported Formats**: JPG, JPEG, PNG, GIF, SVG, WebP, BMP
-4. **Path Variations**: These all work:
-   - `<img src="logo.png">` ✓
-   - `<img src="./logo.png">` ✓
-   - `<img src="images/logo.png">` ✓ (just upload as `logo.png`)
-5. **CSS Background Images**: Use `background-image: url('bg.jpg')` and upload `bg.jpg`
-6. **Check Debug Info**: Expand the debug section after conversion to verify image processing
-### 📝 Example HTML with Images:
 ```html
-<!DOCTYPE html>
-<html>
-<head>
-    <style>
-        body { font-family: Arial; padding: 40px; }
-        .header {
-            background-image: url('banner.jpg');
-            background-size: cover;
-            padding: 60px;
-            color: white;
-        }
-        img { max-width: 100%; height: auto; }
-    </style>
-</head>
-<body>
-    <div class="header">
-        <h1>My Document</h1>
-    </div>
-    <img src="photo.png" alt="Photo">
-    <img src="logo.svg" alt="Logo">
-</body>
-</html>
 ```
-**Then upload**: `banner.jpg`, `photo.png`, `logo.svg`
 """)

 """
+Streamlit HTML to PDF Converter with Image Support - REVISED
 Save this file as: src/streamlit_app.py
 """
 import streamlit as st
 from pathlib import Path
 import base64
 import re
+import mimetypes
 st.set_page_config(
     page_title="HTML to PDF Converter",
 )
 def detect_aspect_ratio(html_content):
+    """Detect aspect ratio from HTML content"""
     viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
     if viewport_match:
         viewport = viewport_match.group(1).lower()
+        if 'orientation=portrait' in viewport:
+            return "9:16"
+        elif 'orientation=landscape' in viewport:
+            return "16:9"
     aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
     if aspect_match:
         width = int(aspect_match.group(1))
         else:
             return "1:1"
     if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
         return "16:9"
     return "9:16"
+def image_to_base64(image_file):
+    """Convert uploaded image to base64 data URL"""
+    try:
+        # Read image bytes
+        image_bytes = image_file.getvalue()
+        # Get MIME type
+        mime_type, _ = mimetypes.guess_type(image_file.name)
+        if not mime_type:
+            # Fallback based on extension
+            ext = os.path.splitext(image_file.name)[1].lower()
+            mime_map = {
+                '.jpg': 'image/jpeg',
+                '.jpeg': 'image/jpeg',
+                '.png': 'image/png',
+                '.gif': 'image/gif',
+                '.svg': 'image/svg+xml',
+                '.webp': 'image/webp',
+                '.bmp': 'image/bmp'
+            }
+            mime_type = mime_map.get(ext, 'image/png')
+        # Convert to base64
+        b64_data = base64.b64encode(image_bytes).decode('utf-8')
+        data_url = f"data:{mime_type};base64,{b64_data}"
+        return data_url
+    except Exception as e:
+        st.error(f"Error converting {image_file.name} to base64: {str(e)}")
+        return None
+def embed_images_as_base64(html_content, uploaded_images):
+    """
+    Embed all images directly as base64 data URLs in the HTML
+    This ensures images are always included in the PDF
+    """
+    if not uploaded_images:
+        return html_content, {}
+    # Create mapping of filename to base64 data URL
+    image_data_urls = {}
+    for img in uploaded_images:
+        data_url = image_to_base64(img)
+        if data_url:
+            image_data_urls[img.name] = data_url
+            st.write(f"✓ Converted {img.name} to base64 ({len(data_url)} chars)")
+    if not image_data_urls:
+        return html_content, {}
+    # Track replacements
+    replacements = {}
     original_html = html_content
+    for filename, data_url in image_data_urls.items():
+        # Escape filename for regex
+        escaped_name = re.escape(filename)
+        # Pattern 1: src attribute - match any path variation
+        # Examples: src="image.jpg", src="./image.jpg", src="images/image.jpg"
+        pattern1 = rf'(<img[^>]*\s+src\s*=\s*)(["\'])(?:[^"\']*?/)?{escaped_name}\2'
+        matches1 = list(re.finditer(pattern1, html_content, flags=re.IGNORECASE | re.DOTALL))
+        count1 = len(matches1)
         if matches1:
+            html_content = re.sub(pattern1, rf'\1\2{data_url}\2', html_content, flags=re.IGNORECASE | re.DOTALL)
+            replacements[f"{filename} (img src)"] = count1
+        # Pattern 2: background-image in style attributes
+        pattern2 = rf'(background-image\s*:\s*url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches2 = list(re.finditer(pattern2, html_content, flags=re.IGNORECASE))
+        count2 = len(matches2)
         if matches2:
+            html_content = re.sub(pattern2, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
+            replacements[f"{filename} (bg-image)"] = count2
+        # Pattern 3: CSS url() without background-image
+        pattern3 = rf'(url\s*\()(["\']?)(?:[^)"\']*/)?{escaped_name}\2(\))'
         matches3 = list(re.finditer(pattern3, html_content, flags=re.IGNORECASE))
+        count3 = len(matches3)
         if matches3:
+            html_content = re.sub(pattern3, rf'\1"{data_url}"\3', html_content, flags=re.IGNORECASE)
+            replacements[f"{filename} (url)"] = count3
+    # Show replacement summary
+    if replacements:
+        st.success("✅ Image Replacements:")
+        for key, count in replacements.items():
+            st.write(f"  • {key}: {count} replacement(s)")
     else:
+        st.warning("⚠️ No image references found in HTML matching uploaded files!")
+        st.write("Uploaded files:", [img.name for img in uploaded_images])
+        # Show sample HTML for debugging
+        with st.expander("🔍 Debug: Show HTML image references"):
+            img_lines = [line for line in html_content.split('\n')
+                        if any(k in line.lower() for k in ['<img', 'src=', 'url(', 'background'])]
+            if img_lines:
+                for line in img_lines[:10]:
+                    st.code(line.strip(), language='html')
+            else:
+                st.write("No image-related lines found in HTML")
+    return html_content, replacements
 def render_html_preview(html_content):
     """Render HTML preview in an iframe"""
                 font-size: 18px;
                 padding: 20px;
             }}
         </style>
     </head>
     <body>
         <div id="pdf-container">
             <div id="loading">Loading PDF...</div>
         </div>
         <script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js"></script>
         <script>
             pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js';
             const pdfData = atob('{b64}');
             const pdfContainer = document.getElementById('pdf-container');
             const loading = document.getElementById('loading');
             const uint8Array = new Uint8Array(pdfData.length);
             for (let i = 0; i < pdfData.length; i++) {{
                 uint8Array[i] = pdfData.charCodeAt(i);
             }}
             pdfjsLib.getDocument({{data: uint8Array}}).promise.then(function(pdf) {{
                 loading.style.display = 'none';
                 const numPages = pdf.numPages;
                 const promises = [];
                 for (let pageNum = 1; pageNum <= numPages; pageNum++) {{
                     promises.push(
                         pdf.getPage(pageNum).then(function(page) {{
                             const scale = 1.5;
                             const viewport = page.getViewport({{scale: scale}});
                             const canvas = document.createElement('canvas');
                             const context = canvas.getContext('2d');
                             canvas.height = viewport.height;
                             canvas.width = viewport.width;
                             pdfContainer.appendChild(canvas);
                             return page.render({{
                                 canvasContext: context,
                                 viewport: viewport
                         }})
                     );
                 }}
                 return Promise.all(promises);
             }}).catch(function(error) {{
+                loading.innerHTML = '<div style="color:#ff6b6b;">Error: ' + error.message + '</div>';
             }});
         </script>
     </body>
     return pdf_viewer_html
 def convert_html_to_pdf(html_content, aspect_ratio, temp_dir):
+    """Convert HTML content to PDF using Puppeteer"""
     try:
+        # Inject CSS to preserve styles
         style_injection = """
         <style>
+            @page { margin: 0; }
             * {
                 -webkit-print-color-adjust: exact !important;
                 print-color-adjust: exact !important;
         </style>
         """
         if '</head>' in html_content:
             html_content = html_content.replace('</head>', style_injection + '</head>')
         elif '<body' in html_content:
         else:
             html_content = style_injection + html_content
+        # Save HTML to temp file
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
+        st.write(f"📝 Saved HTML: {os.path.getsize(html_file):,} bytes")
+        # Find puppeteer script
         script_dir = os.path.dirname(os.path.abspath(__file__))
+        possible_paths = [
+            os.path.join(os.path.dirname(script_dir), 'puppeteer_pdf.js'),
+            os.path.join(script_dir, 'puppeteer_pdf.js'),
+            os.path.join(script_dir, '..', 'puppeteer_pdf.js'),
+            'puppeteer_pdf.js'
+        ]
+        puppeteer_script = None
+        for path in possible_paths:
+            if os.path.exists(path):
+                puppeteer_script = path
+                break
+        if not puppeteer_script:
+            return None, "Error: puppeteer_pdf.js not found"
+        st.write(f"🔧 Using Puppeteer: {puppeteer_script}")
+        # Run conversion
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
+            cwd=os.path.dirname(os.path.abspath(puppeteer_script))
         )
         if result.returncode != 0:
             return None, f"PDF conversion failed: {result.stderr}"
+        # Read PDF
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
             return None, "PDF file was not generated"
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
+        st.write(f"✅ PDF generated: {len(pdf_bytes):,} bytes")
         return pdf_bytes, None
     except subprocess.TimeoutExpired:
         return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
         return None, f"Error: {str(e)}"
+# Main UI
 st.title("📄 HTML to PDF Converter")
 st.markdown("""
+Convert HTML to PDF with **embedded base64 images** for guaranteed display!
+✨ Images are converted to base64 and embedded directly in the HTML.
 """)
 # Create tabs
         "Choose an HTML file",
         type=['html', 'htm'],
         key="file_uploader",
+        help="Upload an HTML file"
     )
     uploaded_images = st.file_uploader(
+        "📷 Upload Images",
         type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
         key="image_uploader",
+        help="Upload images - they will be embedded as base64 in the HTML",
         accept_multiple_files=True
     )
     if uploaded_images:
         st.success(f"✅ {len(uploaded_images)} image(s) uploaded")
+        with st.expander("View uploaded images"):
             cols = st.columns(min(len(uploaded_images), 4))
             for idx, img in enumerate(uploaded_images):
                 with cols[idx % 4]:
                     st.image(img, caption=img.name, use_container_width=True)
+    if uploaded_file:
+        st.success(f"✅ File: {uploaded_file.name}")
         uploaded_file.seek(0)
         try:
             html_content = uploaded_file.getvalue().decode('utf-8')
             uploaded_file.seek(0)
             html_content = uploaded_file.getvalue().decode('latin-1')
         detected_ratio = detect_aspect_ratio(html_content)
         col1, col2 = st.columns([1, 1])
         with col1:
             st.subheader("⚙️ Settings")
+            auto_detect = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_file")
             if auto_detect:
+                aspect_ratio = detected_ratio
                 st.info(f"🔍 Detected: **{detected_ratio}**")
             else:
+                aspect_ratio = st.radio(
                     "Aspect Ratio",
                     options=["16:9", "1:1", "9:16"],
                     index=["16:9", "1:1", "9:16"].index(detected_ratio),
+                    key="aspect_file"
                 )
+            convert_btn = st.button("🔄 Convert to PDF", key="conv_file", type="primary", use_container_width=True)
         with col2:
+            st.subheader("👁️ Preview")
+            with st.expander("Show HTML"):
+                st.components.v1.html(render_html_preview(html_content), height=400, scrolling=True)
+        if convert_btn:
             temp_dir = None
             try:
+                with st.spinner("Converting..."):
                     temp_dir = tempfile.mkdtemp()
+                    # Embed images as base64
                     processed_html = html_content
                     if uploaded_images:
+                        with st.expander("🖼️ Image Processing", expanded=True):
+                            processed_html, replacements = embed_images_as_base64(html_content, uploaded_images)
+                            if not replacements:
+                                st.warning("⚠️ Images uploaded but no matches found in HTML!")
+                                st.write("**Tip:** Make sure image filenames in HTML match uploaded files exactly")
                     # Convert to PDF
+                    pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio, temp_dir)
                     if error:
                         st.error(f"❌ {error}")
                     else:
+                        st.success("✅ PDF generated!")
+                        output_name = uploaded_file.name.replace('.html', '.pdf').replace('.htm', '.pdf')
+                        if not output_name.endswith('.pdf'):
+                            output_name += '.pdf'
+                        col_a, col_b = st.columns(2)
                         with col_a:
                             st.download_button(
+                                "⬇️ Download PDF",
                                 data=pdf_bytes,
+                                file_name=output_name,
                                 mime="application/pdf",
+                                use_container_width=True
                             )
                         with col_b:
+                            st.info(f"Size: {len(pdf_bytes):,} bytes")
                         st.subheader("📄 PDF Preview")
+                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
             except Exception as e:
                 st.error(f"❌ Error: {str(e)}")
             finally:
                 if temp_dir and os.path.exists(temp_dir):
                     shutil.rmtree(temp_dir, ignore_errors=True)
+# Tab 2: Paste HTML
 with tab2:
+    html_code = st.text_area(
+        "HTML Content",
+        value="""<!DOCTYPE html>
 <html>
 <head>
     <style>
         body {
+            font-family: Arial;
             margin: 40px;
             background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
             color: white;
         }
+        h1 { font-size: 48px; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
         .box {
             background: rgba(255,255,255,0.1);
             padding: 20px;
             border-radius: 10px;
+            margin: 20px 0;
         }
     </style>
 </head>
 <body>
+    <h1>Hello PDF! 🌍</h1>
     <div class="box">
+        <p>Styles and gradients preserved!</p>
     </div>
 </body>
 </html>""",
+        height=400,
+        key="html_code"
+    )
+    uploaded_images_text = st.file_uploader(
+        "📷 Upload Images",
+        type=['jpg', 'jpeg', 'png', 'gif', 'svg', 'webp', 'bmp'],
+        key="image_text",
+        help="Upload images to embed in your HTML",
+        accept_multiple_files=True
+    )
+    if uploaded_images_text:
+        st.success(f"✅ {len(uploaded_images_text)} image(s) uploaded")
+        with st.expander("View images"):
+            cols = st.columns(min(len(uploaded_images_text), 4))
+            for idx, img in enumerate(uploaded_images_text):
+                with cols[idx % 4]:
+                    st.image(img, caption=img.name, use_container_width=True)
+    if html_code.strip():
+        detected_ratio_text = detect_aspect_ratio(html_code)
+        auto_detect_text = st.checkbox("Auto-detect aspect ratio", value=True, key="auto_text")
+        if auto_detect_text:
+            aspect_ratio_text = detected_ratio_text
+            st.info(f"🔍 Detected: **{detected_ratio_text}**")
+        else:
+            aspect_ratio_text = st.radio(
+                "Aspect Ratio",
+                options=["16:9", "1:1", "9:16"],
+                index=["16:9", "1:1", "9:16"].index(detected_ratio_text),
+                key="aspect_text"
+            )
+        convert_text_btn = st.button("🔄 Convert", key="conv_text", type="primary", use_container_width=True)
+        if convert_text_btn:
+            temp_dir = None
+            try:
+                with st.spinner("Converting..."):
+                    temp_dir = tempfile.mkdtemp()
+                    processed_html = html_code
+                    if uploaded_images_text:
+                        with st.expander("🖼️ Image Processing", expanded=True):
+                            processed_html, replacements = embed_images_as_base64(html_code, uploaded_images_text)
+                            if not replacements:
+                                st.warning("⚠️ Images uploaded but no matches found!")
+                    pdf_bytes, error = convert_html_to_pdf(processed_html, aspect_ratio_text, temp_dir)
+                    if error:
+                        st.error(f"❌ {error}")
+                    else:
+                        st.success("✅ PDF generated!")
+                        col_a, col_b = st.columns(2)
+                        with col_a:
+                            st.download_button(
+                                "⬇️ Download PDF",
+                                data=pdf_bytes,
+                                file_name="converted.pdf",
+                                mime="application/pdf",
+                                use_container_width=True
+                            )
+                        with col_b:
+                            st.info(f"Size: {len(pdf_bytes):,} bytes")
+                        st.subheader("📄 PDF Preview")
+                        st.components.v1.html(render_pdf_preview(pdf_bytes), height=600, scrolling=True)
+            except Exception as e:
+                st.error(f"❌ Error: {str(e)}")
+            finally:
+                if temp_dir and os.path.exists(temp_dir):
+                    shutil.rmtree(temp_dir, ignore_errors=True)
+# Footer
 st.markdown("---")
 st.markdown("""
+### 💡 How It Works:
+- **Base64 Embedding**: Images are converted to base64 data URLs and embedded directly in HTML
+- **No File Paths**: No need for file:// URLs or temp directories
+- **Guaranteed Display**: Images are part of the HTML, so they always appear in the PDF
+- **Filename Matching**: Your HTML must reference images by exact filename (e.g., `<img src="photo.jpg">`)
+### ✅ Supported:
+- `<img src="photo.jpg">`
+- `<img src="./images/logo.png">`
+- `background-image: url('banner.jpg')`
+- `style="background: url(bg.png)"`
+### 📝 Example:
 ```html
+<img src="logo.png" alt="Logo">
 ```
+Then upload a file named exactly: `logo.png`
 """)