scripts/generate_pdf.py

#!/usr/bin/env python3
"""
PDF Generation Script for Literature Reviews
Converts markdown files to professionally formatted PDFs with proper styling.
"""

import subprocess
import sys
import os
from pathlib import Path

def generate_pdf(
    markdown_file: str,
    output_pdf: str = None,
    citation_style: str = "apa",
    template: str = None,
    toc: bool = True,
    number_sections: bool = True
) -> bool:
    """
    Generate a PDF from a markdown file using pandoc.

    Args:
        markdown_file: Path to the markdown file
        output_pdf: Path for output PDF (defaults to same name as markdown)
        citation_style: Citation style (apa, nature, chicago, etc.)
        template: Path to custom LaTeX template
        toc: Include table of contents
        number_sections: Number the sections

    Returns:
        True if successful, False otherwise
    """

    # Verify markdown file exists
    if not os.path.exists(markdown_file):
        print(f"Error: Markdown file not found: {markdown_file}")
        return False

    # Set default output path
    if output_pdf is None:
        output_pdf = Path(markdown_file).with_suffix('.pdf')

    # Check if pandoc is installed
    try:
        subprocess.run(['pandoc', '--version'], capture_output=True, check=True)
    except (subprocess.CalledProcessError, FileNotFoundError):
        print("Error: pandoc is not installed.")
        print("Install with: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
        return False

    # Build pandoc command
    cmd = [
        'pandoc',
        markdown_file,
        '-o', str(output_pdf),
        '--pdf-engine=xelatex',  # Better Unicode support
        '-V', 'geometry:margin=1in',
        '-V', 'fontsize=11pt',
        '-V', 'colorlinks=true',
        '-V', 'linkcolor=blue',
        '-V', 'urlcolor=blue',
        '-V', 'citecolor=blue',
    ]

    # Add table of contents
    if toc:
        cmd.extend(['--toc', '--toc-depth=3'])

    # Add section numbering
    if number_sections:
        cmd.append('--number-sections')

    # Add citation processing if bibliography exists
    bib_file = Path(markdown_file).with_suffix('.bib')
    if bib_file.exists():
        cmd.extend([
            '--citeproc',
            '--bibliography', str(bib_file),
            '--csl', f'{citation_style}.csl' if not citation_style.endswith('.csl') else citation_style
        ])

    # Add custom template if provided
    if template and os.path.exists(template):
        cmd.extend(['--template', template])

    # Execute pandoc
    try:
        print(f"Generating PDF: {output_pdf}")
        print(f"Command: {' '.join(cmd)}")
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        print(f"✓ PDF generated successfully: {output_pdf}")
        return True
    except subprocess.CalledProcessError as e:
        print(f"Error generating PDF:")
        print(f"STDOUT: {e.stdout}")
        print(f"STDERR: {e.stderr}")
        return False

def check_dependencies():
    """Check if required dependencies are installed."""
    dependencies = {
        'pandoc': 'pandoc --version',
        'xelatex': 'xelatex --version'
    }

    missing = []
    for name, cmd in dependencies.items():
        try:
            subprocess.run(cmd.split(), capture_output=True, check=True)
            print(f"✓ {name} is installed")
        except (subprocess.CalledProcessError, FileNotFoundError):
            print(f"✗ {name} is NOT installed")
            missing.append(name)

    if missing:
        print("\n" + "="*60)
        print("Missing dependencies:")
        for dep in missing:
            if dep == 'pandoc':
                print("  - pandoc: brew install pandoc (macOS) or apt-get install pandoc (Linux)")
            elif dep == 'xelatex':
                print("  - xelatex: brew install --cask mactex (macOS) or apt-get install texlive-xetex (Linux)")
        return False

    return True

def main():
    """Command-line interface."""
    if len(sys.argv) < 2:
        print("Usage: python generate_pdf.py <markdown_file> [output_pdf] [--citation-style STYLE]")
        print("\nOptions:")
        print("  --citation-style STYLE    Citation style (default: apa)")
        print("  --no-toc                  Disable table of contents")
        print("  --no-numbers              Disable section numbering")
        print("  --check-deps              Check if dependencies are installed")
        sys.exit(1)

    # Check dependencies mode
    if '--check-deps' in sys.argv:
        check_dependencies()
        sys.exit(0)

    # Parse arguments
    markdown_file = sys.argv[1]
    output_pdf = sys.argv[2] if len(sys.argv) > 2 and not sys.argv[2].startswith('--') else None

    citation_style = 'apa'
    toc = True
    number_sections = True

    # Parse optional flags
    if '--citation-style' in sys.argv:
        idx = sys.argv.index('--citation-style')
        if idx + 1 < len(sys.argv):
            citation_style = sys.argv[idx + 1]

    if '--no-toc' in sys.argv:
        toc = False

    if '--no-numbers' in sys.argv:
        number_sections = False

    # Generate PDF
    success = generate_pdf(
        markdown_file,
        output_pdf,
        citation_style=citation_style,
        toc=toc,
        number_sections=number_sections
    )

    sys.exit(0 if success else 1)

if __name__ == "__main__":
    main()
← Back to literature-review