Spaces:

ABDALLALSWAITI
/

htmlpdf

Sleeping

App Files Files Community

ABDALLALSWAITI commited on Oct 16, 2025

Commit

18b284f

verified ·

1 Parent(s): 4b2d20a

Update api.py

Browse files

Files changed (1) hide show

api.py +179 -209

api.py CHANGED Viewed

@@ -1,20 +1,15 @@
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import Response, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
-from typing import List, Optional
 import tempfile
 import shutil
-import os
-import subprocess
-import base64
 from pathlib import Path
-import mimetypes
-app = FastAPI(
-    title="HTML to PDF API with Image Support",
-    description="Convert HTML to PDF using Puppeteer with image upload support",
-    version="2.0.0"
-)
 # Enable CORS
 app.add_middleware(
@@ -25,32 +20,63 @@ app.add_middleware(
     allow_headers=["*"],
 )
 def save_uploaded_images(images: List[UploadFile], temp_dir: str):
-    """Save uploaded images to temp directory and return mapping"""
     image_mapping = {}
     images_dir = os.path.join(temp_dir, "images")
     os.makedirs(images_dir, exist_ok=True)
     for image in images:
-        if image.filename:
-            # Save image to temp directory
-            image_path = os.path.join(images_dir, image.filename)
-            with open(image_path, 'wb') as f:
-                content = image.file.read()
-                f.write(content)
-            # Reset file pointer for potential reuse
-            image.file.seek(0)
-            # Create mapping with relative path
-            image_mapping[image.filename] = f"images/{image.filename}"
-            print(f"Saved image: {image.filename} -> {image_path}")
     return image_mapping
 def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict):
     """Process HTML to handle image references with absolute file paths"""
-    import re
     for original_name, relative_path in image_mapping.items():
         # Get absolute path for the image
@@ -60,42 +86,72 @@ def process_html_with_images(html_content: str, temp_dir: str, image_mapping: di
         # Escape the filename for regex
         escaped_name = re.escape(original_name)
-        # Replace various image reference patterns
-        # Match filename with or without directory paths (images/, src/images/, ./images/, etc.)
         # Pattern 1: src with any path prefix
         html_content = re.sub(
-            rf'src=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
             f'src=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )
         # Pattern 2: url() with any path prefix
         html_content = re.sub(
-            rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)',
             f'url("{file_url}")',
             html_content,
             flags=re.IGNORECASE
         )
         # Pattern 3: href with any path prefix
         html_content = re.sub(
-            rf'href=(["\'])(?:[^"\']*/)?' + escaped_name + r'\1',
             f'href=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )
     return html_content
 def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
-    """Convert HTML content to PDF"""
     try:
-        # Style injection for better PDF rendering
         style_injection = """
         <style>
-            @page { margin: 0; }
             * {
                 -webkit-print-color-adjust: exact !important;
                 print-color-adjust: exact !important;
@@ -108,6 +164,7 @@ def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
         </style>
         """
         if '</head>' in html_content:
             html_content = html_content.replace('</head>', style_injection + '</head>')
         elif '<body' in html_content:
@@ -115,246 +172,159 @@ def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
         else:
             html_content = style_injection + html_content
-        # Save HTML to temp file
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
-        # Get puppeteer script path
-        script_dir = os.path.dirname(os.path.abspath(__file__))
-        puppeteer_script = os.path.join(script_dir, 'puppeteer_pdf.js')
-        # Run conversion
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
-            cwd=script_dir
         )
         if result.returncode != 0:
-            raise Exception(f"PDF conversion failed: {result.stderr}")
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
-            raise Exception("PDF file was not generated")
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
-        return pdf_bytes
     except Exception as e:
-        raise e
 @app.get("/")
 async def root():
-    """API root endpoint"""
     return {
-        "message": "HTML to PDF Conversion API with Image Support",
-        "version": "2.0.0",
         "endpoints": {
-            "POST /convert": "Convert HTML to PDF (file upload with optional images)",
-            "POST /convert-text": "Convert HTML text to PDF (with optional image files)",
-            "POST /convert-with-images": "Convert HTML with multiple images",
-            "GET /health": "Health check",
-            "GET /docs": "API documentation (Swagger UI)"
         }
     }
 @app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {"status": "healthy", "service": "html-to-pdf-api"}
 @app.post("/convert")
-async def convert_file(
-    file: UploadFile = File(...),
-    images: Optional[List[UploadFile]] = File(None),
-    aspect_ratio: str = Form(default="9:16")
 ):
     """
-    Convert uploaded HTML file to PDF with optional images
-    - **file**: HTML file to convert
-    - **images**: Optional list of image files (jpg, png, gif, svg, webp)
-    - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
-    """
-    if not file.filename.lower().endswith(('.html', '.htm')):
-        raise HTTPException(status_code=400, detail="File must be HTML (.html or .htm)")
-    if aspect_ratio not in ["16:9", "1:1", "9:16"]:
-        raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
-    temp_dir = None
-    try:
-        # Create temporary directory
-        temp_dir = tempfile.mkdtemp()
-        # Read HTML content
-        content = await file.read()
-        try:
-            html_content = content.decode('utf-8')
-        except UnicodeDecodeError:
-            html_content = content.decode('latin-1')
-        # Process images if provided
-        if images:
-            image_mapping = save_uploaded_images(images, temp_dir)
-            html_content = process_html_with_images(html_content, temp_dir, image_mapping)
-        # Convert to PDF
-        pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
-        # Clean up
-        shutil.rmtree(temp_dir, ignore_errors=True)
-        # Return PDF file
-        filename = file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
-        if not filename.endswith('.pdf'):
-            filename += '.pdf'
-        return Response(
-            content=pdf_bytes,
-            media_type="application/pdf",
-            headers={
-                "Content-Disposition": f"attachment; filename={filename}"
-            }
-        )
-    except Exception as e:
-        if temp_dir:
-            shutil.rmtree(temp_dir, ignore_errors=True)
-        raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
-@app.post("/convert-text")
-async def convert_text(
-    html: str = Form(...),
-    images: Optional[List[UploadFile]] = File(None),
-    aspect_ratio: str = Form(default="9:16"),
-    return_base64: bool = Form(default=False)
-):
-    """
-    Convert HTML text to PDF with optional images
-    - **html**: HTML content as string
-    - **images**: Optional list of image files
-    - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
-    - **return_base64**: If true, returns base64 encoded PDF in JSON
     """
-    if aspect_ratio not in ["16:9", "1:1", "9:16"]:
-        raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
     temp_dir = None
     try:
-        # Create temporary directory
         temp_dir = tempfile.mkdtemp()
         # Process images if provided
         if images:
             image_mapping = save_uploaded_images(images, temp_dir)
             html = process_html_with_images(html, temp_dir, image_mapping)
-        # Convert to PDF
-        pdf_bytes = convert_html_to_pdf(html, aspect_ratio, temp_dir)
-        # Clean up
-        shutil.rmtree(temp_dir, ignore_errors=True)
-        if return_base64:
-            # Return as JSON with base64 encoded PDF
-            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
-            return JSONResponse(content={
-                "success": True,
-                "pdf_base64": pdf_base64,
-                "size_bytes": len(pdf_bytes)
-            })
         else:
-            # Return PDF file directly
-            return Response(
-                content=pdf_bytes,
-                media_type="application/pdf",
-                headers={
-                    "Content-Disposition": "attachment; filename=converted.pdf"
-                }
-            )
-    except Exception as e:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
-        raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
-@app.post("/convert-with-images")
-async def convert_with_images(
-    html_file: UploadFile = File(...),
-    images: List[UploadFile] = File(...),
-    aspect_ratio: str = Form(default="9:16")
-):
-    """
-    Convert HTML with multiple images - dedicated endpoint
-    - **html_file**: HTML file to convert
-    - **images**: List of image files (required)
-    - **aspect_ratio**: Page orientation (16:9, 1:1, or 9:16)
-    """
-    if not html_file.filename.lower().endswith(('.html', '.htm')):
-        raise HTTPException(status_code=400, detail="HTML file must be .html or .htm")
-    if aspect_ratio not in ["16:9", "1:1", "9:16"]:
-        raise HTTPException(status_code=400, detail="Invalid aspect ratio. Use: 16:9, 1:1, or 9:16")
-    # Validate image files
-    allowed_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.svg', '.webp', '.bmp'}
-    for img in images:
-        ext = Path(img.filename).suffix.lower()
-        if ext not in allowed_extensions:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Invalid image format: {img.filename}. Allowed: {', '.join(allowed_extensions)}"
-            )
-    temp_dir = None
-    try:
-        # Create temporary directory
-        temp_dir = tempfile.mkdtemp()
-        # Read HTML content
-        content = await html_file.read()
-        try:
-            html_content = content.decode('utf-8')
-        except UnicodeDecodeError:
-            html_content = content.decode('latin-1')
-        # Save and process images
-        image_mapping = save_uploaded_images(images, temp_dir)
-        html_content = process_html_with_images(html_content, temp_dir, image_mapping)
-        # Convert to PDF
-        pdf_bytes = convert_html_to_pdf(html_content, aspect_ratio, temp_dir)
-        # Clean up
-        shutil.rmtree(temp_dir, ignore_errors=True)
-        # Return PDF
-        filename = html_file.filename.replace('.html', '.pdf').replace('.htm', '.pdf')
-        if not filename.endswith('.pdf'):
-            filename += '.pdf'
         return Response(
             content=pdf_bytes,
             media_type="application/pdf",
             headers={
-                "Content-Disposition": f"attachment; filename={filename}",
-                "X-Image-Count": str(len(images))
             }
         )
     except Exception as e:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
-        raise HTTPException(status_code=500, detail=f"Conversion failed: {str(e)}")
 if __name__ == "__main__":
     import uvicorn

 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import Response, JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
+import subprocess
+import os
 import tempfile
 import shutil
 from pathlib import Path
+import re
+from typing import List, Optional
+app = FastAPI(title="HTML to PDF Converter API")
 # Enable CORS
 app.add_middleware(
     allow_headers=["*"],
 )
+def detect_aspect_ratio(html_content):
+    """
+    Detect aspect ratio from HTML content
+    Returns: "16:9", "1:1", or "9:16"
+    """
+    # Check for viewport meta tag
+    viewport_match = re.search(r'<meta[^>]*viewport[^>]*content=["\']([^"\']*)["\']', html_content, re.IGNORECASE)
+    if viewport_match:
+        viewport = viewport_match.group(1).lower()
+        if 'width=device-width' in viewport or 'width=100%' in viewport:
+            if 'orientation=portrait' in viewport:
+                return "9:16"
+            elif 'orientation=landscape' in viewport:
+                return "16:9"
+    # Check for CSS aspect-ratio property
+    aspect_match = re.search(r'aspect-ratio\s*:\s*(\d+)\s*/\s*(\d+)', html_content, re.IGNORECASE)
+    if aspect_match:
+        width = int(aspect_match.group(1))
+        height = int(aspect_match.group(2))
+        ratio = width / height
+        if ratio > 1.5:
+            return "16:9"
+        elif ratio < 0.7:
+            return "9:16"
+        else:
+            return "1:1"
+    # Check for common presentation frameworks
+    if any(keyword in html_content.lower() for keyword in ['reveal.js', 'impress.js', 'slide', 'presentation']):
+        return "16:9"
+    # Default to A4 portrait
+    return "9:16"
 def save_uploaded_images(images: List[UploadFile], temp_dir: str):
+    """Save uploaded images and return mapping"""
     image_mapping = {}
     images_dir = os.path.join(temp_dir, "images")
     os.makedirs(images_dir, exist_ok=True)
     for image in images:
+        # Save image
+        image_path = os.path.join(images_dir, image.filename)
+        with open(image_path, 'wb') as f:
+            content = image.file.read()
+            f.write(content)
+        # Create mapping
+        image_mapping[image.filename] = f"images/{image.filename}"
+        print(f"API: Saved image: {image.filename} -> {image_path}")
     return image_mapping
 def process_html_with_images(html_content: str, temp_dir: str, image_mapping: dict):
     """Process HTML to handle image references with absolute file paths"""
+    replacements_made = []
     for original_name, relative_path in image_mapping.items():
         # Get absolute path for the image
         # Escape the filename for regex
         escaped_name = re.escape(original_name)
         # Pattern 1: src with any path prefix
+        pattern1 = rf'src=(["\'])(?:[^"\']*?/)?{escaped_name}\1'
+        matches1 = re.findall(pattern1, html_content, flags=re.IGNORECASE)
         html_content = re.sub(
+            pattern1,
             f'src=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )
+        if matches1:
+            replacements_made.append(f"Pattern 1 (src): Found {len(matches1)} matches for {original_name}")
         # Pattern 2: url() with any path prefix
+        pattern2 = rf'url\((["\']?)(?:[^)"\']*/)?{escaped_name}\1\)'
+        matches2 = re.findall(pattern2, html_content, flags=re.IGNORECASE)
         html_content = re.sub(
+            pattern2,
             f'url("{file_url}")',
             html_content,
             flags=re.IGNORECASE
         )
+        if matches2:
+            replacements_made.append(f"Pattern 2 (url): Found {len(matches2)} matches for {original_name}")
         # Pattern 3: href with any path prefix
+        pattern3 = rf'href=(["\'])(?:[^"\']*?/)?{escaped_name}\1'
+        matches3 = re.findall(pattern3, html_content, flags=re.IGNORECASE)
         html_content = re.sub(
+            pattern3,
             f'href=\\1{file_url}\\1',
             html_content,
             flags=re.IGNORECASE
         )
+        if matches3:
+            replacements_made.append(f"Pattern 3 (href): Found {len(matches3)} matches for {original_name}")
+    # Print debug info
+    if replacements_made:
+        print("=== API Image Replacements Made ===")
+        for msg in replacements_made:
+            print(f"  ✓ {msg}")
+    else:
+        print("=== API WARNING: No image replacements made ===")
+        print(f"Looking for images: {list(image_mapping.keys())}")
     return html_content
 def convert_html_to_pdf(html_content: str, aspect_ratio: str, temp_dir: str):
+    """
+    Convert HTML content to PDF using Puppeteer
+    Args:
+        html_content: String containing HTML content
+        aspect_ratio: One of "16:9", "1:1", or "9:16"
+        temp_dir: Temporary directory for processing
+    Returns:
+        Tuple of (pdf_bytes, error_message)
+    """
     try:
+        # Inject CSS to preserve styles better
         style_injection = """
         <style>
+            @page {
+                margin: 0;
+            }
             * {
                 -webkit-print-color-adjust: exact !important;
                 print-color-adjust: exact !important;
         </style>
         """
+        # Insert style injection
         if '</head>' in html_content:
             html_content = html_content.replace('</head>', style_injection + '</head>')
         elif '<body' in html_content:
         else:
             html_content = style_injection + html_content
+        # Save HTML content to temporary file
         html_file = os.path.join(temp_dir, "input.html")
         with open(html_file, 'w', encoding='utf-8') as f:
             f.write(html_content)
+        # Get the path to puppeteer_pdf.js
+        puppeteer_script = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'puppeteer_pdf.js')
+        print(f"API: Running Puppeteer conversion with aspect ratio: {aspect_ratio}")
+        print(f"API: HTML file: {html_file}")
+        print(f"API: Puppeteer script: {puppeteer_script}")
+        # Run Node.js script to convert HTML to PDF
         result = subprocess.run(
             ['node', puppeteer_script, html_file, aspect_ratio],
             capture_output=True,
             text=True,
             timeout=60,
+            cwd=os.path.dirname(os.path.abspath(__file__))
         )
         if result.returncode != 0:
+            print(f"API: Puppeteer error: {result.stderr}")
+            return None, f"PDF conversion failed: {result.stderr}"
+        # Get the generated PDF path
         pdf_file = html_file.replace('.html', '.pdf')
         if not os.path.exists(pdf_file):
+            return None, "PDF file was not generated"
+        # Read PDF file into memory
         with open(pdf_file, 'rb') as f:
             pdf_bytes = f.read()
+        print(f"API: PDF generated successfully, size: {len(pdf_bytes)} bytes")
+        return pdf_bytes, None
+    except subprocess.TimeoutExpired:
+        return None, "Error: PDF conversion timed out (60 seconds)"
     except Exception as e:
+        print(f"API: Conversion error: {str(e)}")
+        return None, f"Error: {str(e)}"
 @app.get("/")
 async def root():
     return {
+        "message": "HTML to PDF Converter API",
+        "version": "2.0",
         "endpoints": {
+            "/convert": "POST - Convert HTML to PDF (supports file upload or raw HTML)",
+            "/health": "GET - Health check"
         }
     }
 @app.get("/health")
+async def health():
+    return {"status": "healthy"}
 @app.post("/convert")
+async def convert_to_pdf(
+    html_file: Optional[UploadFile] = File(None),
+    html_content: Optional[str] = Form(None),
+    aspect_ratio: Optional[str] = Form(None),
+    auto_detect: bool = Form(True),
+    images: Optional[List[UploadFile]] = File(None)
 ):
     """
+    Convert HTML to PDF
+    Parameters:
+    - html_file: HTML file upload (optional)
+    - html_content: Raw HTML content (optional, used if html_file not provided)
+    - aspect_ratio: "16:9", "1:1", or "9:16" (optional if auto_detect is True)
+    - auto_detect: Auto-detect aspect ratio from HTML (default: True)
+    - images: List of image files referenced in the HTML (optional)
+    Returns:
+    - PDF file as bytes
     """
     temp_dir = None
     try:
+        # Validate input
+        if not html_file and not html_content:
+            raise HTTPException(status_code=400, detail="Either html_file or html_content must be provided")
+        # Get HTML content
+        if html_file:
+            content = await html_file.read()
+            try:
+                html = content.decode('utf-8')
+            except UnicodeDecodeError:
+                html = content.decode('latin-1')
+            filename = html_file.filename
+        else:
+            html = html_content
+            filename = "converted.pdf"
+        # Create temp directory
         temp_dir = tempfile.mkdtemp()
+        print(f"API: Created temp directory: {temp_dir}")
         # Process images if provided
         if images:
+            print(f"API: Processing {len(images)} uploaded images")
             image_mapping = save_uploaded_images(images, temp_dir)
             html = process_html_with_images(html, temp_dir, image_mapping)
+            print(f"API: Image processing complete")
+        # Determine aspect ratio
+        if auto_detect or not aspect_ratio:
+            detected_ratio = detect_aspect_ratio(html)
+            aspect_ratio = detected_ratio
+            print(f"API: Auto-detected aspect ratio: {aspect_ratio}")
         else:
+            # Validate aspect ratio
+            if aspect_ratio not in ["16:9", "1:1", "9:16"]:
+                raise HTTPException(status_code=400, detail="Invalid aspect_ratio. Must be '16:9', '1:1', or '9:16'")
+            print(f"API: Using specified aspect ratio: {aspect_ratio}")
+        # Convert to PDF
+        pdf_bytes, error = convert_html_to_pdf(html, aspect_ratio, temp_dir)
+        # Cleanup
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
+        if error:
+            raise HTTPException(status_code=500, detail=error)
+        # Generate output filename
+        output_filename = filename.replace('.html', '.pdf').replace('.htm', '.pdf')
+        if not output_filename.endswith('.pdf'):
+            output_filename = 'converted.pdf'
+        # Return PDF as response
         return Response(
             content=pdf_bytes,
             media_type="application/pdf",
             headers={
+                "Content-Disposition": f"attachment; filename={output_filename}",
+                "X-Aspect-Ratio": aspect_ratio
             }
         )
+    except HTTPException:
+        raise
     except Exception as e:
         if temp_dir:
             shutil.rmtree(temp_dir, ignore_errors=True)
+        print(f"API: Error in convert endpoint: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn