205 lines
6.3 KiB
Python
205 lines
6.3 KiB
Python
"""
|
|
PDF Generation Service for Analyst Reports
|
|
Converts markdown reports to PDF format with Chinese character support
|
|
"""
|
|
import io
|
|
import re
|
|
from typing import Optional
|
|
from datetime import datetime
|
|
from reportlab.lib.pagesizes import A4
|
|
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
|
from reportlab.lib.units import cm
|
|
from reportlab.lib.enums import TA_LEFT, TA_CENTER
|
|
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak
|
|
from reportlab.pdfbase import pdfmetrics
|
|
from reportlab.pdfbase.ttfonts import TTFont
|
|
from reportlab.lib.colors import HexColor
|
|
import markdown
|
|
|
|
|
|
class PDFGenerator:
|
|
"""Generate PDF reports from markdown content"""
|
|
|
|
def __init__(self):
|
|
"""Initialize PDF generator with Chinese font support"""
|
|
# Try to register Chinese fonts (fallback to default if not available)
|
|
try:
|
|
# Try common Chinese font paths on different systems
|
|
# macOS: /System/Library/Fonts/PingFang.ttc
|
|
# Linux: /usr/share/fonts/truetype/
|
|
# For now, we'll use built-in fonts and handle Chinese with Unicode
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
def generate_analyst_report_pdf(
|
|
self,
|
|
analyst_name: str,
|
|
ticker: str,
|
|
analysis_date: str,
|
|
report_content: str,
|
|
) -> bytes:
|
|
"""
|
|
Generate a PDF from analyst report content
|
|
|
|
Args:
|
|
analyst_name: Name of the analyst
|
|
ticker: Stock ticker symbol
|
|
analysis_date: Date of analysis
|
|
report_content: Markdown formatted report content
|
|
|
|
Returns:
|
|
PDF file content as bytes
|
|
"""
|
|
buffer = io.BytesIO()
|
|
|
|
# Create PDF document
|
|
doc = SimpleDocTemplate(
|
|
buffer,
|
|
pagesize=A4,
|
|
rightMargin=2*cm,
|
|
leftMargin=2*cm,
|
|
topMargin=2*cm,
|
|
bottomMargin=2*cm,
|
|
)
|
|
|
|
# Container for the 'Flowable' objects
|
|
elements = []
|
|
|
|
# Define styles
|
|
styles = getSampleStyleSheet()
|
|
|
|
# Custom styles with better Chinese support
|
|
title_style = ParagraphStyle(
|
|
'CustomTitle',
|
|
parent=styles['Heading1'],
|
|
fontSize=24,
|
|
textColor=HexColor('#1a1a1a'),
|
|
spaceAfter=30,
|
|
alignment=TA_CENTER,
|
|
)
|
|
|
|
subtitle_style = ParagraphStyle(
|
|
'CustomSubtitle',
|
|
parent=styles['Normal'],
|
|
fontSize=12,
|
|
textColor=HexColor('#666666'),
|
|
spaceAfter=20,
|
|
alignment=TA_CENTER,
|
|
)
|
|
|
|
heading_style = ParagraphStyle(
|
|
'CustomHeading',
|
|
parent=styles['Heading2'],
|
|
fontSize=16,
|
|
textColor=HexColor('#2c3e50'),
|
|
spaceAfter=12,
|
|
spaceBefore=12,
|
|
)
|
|
|
|
body_style = ParagraphStyle(
|
|
'CustomBody',
|
|
parent=styles['Normal'],
|
|
fontSize=10,
|
|
leading=14,
|
|
textColor=HexColor('#333333'),
|
|
spaceAfter=8,
|
|
)
|
|
|
|
# Add title
|
|
title = f"{analyst_name}"
|
|
elements.append(Paragraph(title, title_style))
|
|
elements.append(Spacer(1, 0.3*cm))
|
|
|
|
# Add metadata
|
|
metadata = f"{ticker} | {analysis_date}"
|
|
elements.append(Paragraph(metadata, subtitle_style))
|
|
elements.append(Spacer(1, 0.5*cm))
|
|
|
|
# Convert markdown to simple text (basic conversion)
|
|
# Clean markdown formatting
|
|
content = self._clean_markdown(report_content)
|
|
|
|
# Split content into paragraphs
|
|
paragraphs = content.split('\n')
|
|
|
|
for para in paragraphs:
|
|
para = para.strip()
|
|
if not para:
|
|
elements.append(Spacer(1, 0.2*cm))
|
|
continue
|
|
|
|
# Check if it's a heading
|
|
if para.startswith('# '):
|
|
text = para[2:]
|
|
elements.append(Paragraph(text, heading_style))
|
|
elif para.startswith('## '):
|
|
text = para[3:]
|
|
elements.append(Paragraph(text, heading_style))
|
|
elif para.startswith('### '):
|
|
text = para[4:]
|
|
elements.append(Paragraph(text, heading_style))
|
|
else:
|
|
# Regular paragraph - escape HTML chars and handle special characters
|
|
text = self._escape_html(para)
|
|
try:
|
|
elements.append(Paragraph(text, body_style))
|
|
except Exception as e:
|
|
# If paragraph fails, add as plain text
|
|
elements.append(Paragraph(text.encode('ascii', 'xmlcharrefreplace').decode(), body_style))
|
|
|
|
# Build PDF
|
|
doc.build(elements)
|
|
|
|
# Get the PDF content
|
|
pdf_content = buffer.getvalue()
|
|
buffer.close()
|
|
|
|
return pdf_content
|
|
|
|
def _clean_markdown(self, text: str) -> str:
|
|
"""
|
|
Clean markdown formatting for PDF
|
|
|
|
Args:
|
|
text: Markdown text
|
|
|
|
Returns:
|
|
Cleaned text
|
|
"""
|
|
# Remove markdown links but keep text
|
|
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
|
|
|
# Remove bold/italic markers
|
|
text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text)
|
|
text = re.sub(r'\*([^\*]+)\*', r'\1', text)
|
|
text = re.sub(r'__([^_]+)__', r'\1', text)
|
|
text = re.sub(r'_([^_]+)_', r'\1', text)
|
|
|
|
# Remove code blocks
|
|
text = re.sub(r'```[^`]*```', '', text, flags=re.DOTALL)
|
|
text = re.sub(r'`([^`]+)`', r'\1', text)
|
|
|
|
# Clean up bullet points
|
|
text = re.sub(r'^\s*[\*\-\+]\s+', '• ', text, flags=re.MULTILINE)
|
|
|
|
# Remove horizontal rules
|
|
text = re.sub(r'^[\-\*\_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
|
|
|
return text
|
|
|
|
def _escape_html(self, text: str) -> str:
|
|
"""
|
|
Escape HTML special characters for PDF
|
|
|
|
Args:
|
|
text: Text to escape
|
|
|
|
Returns:
|
|
Escaped text
|
|
"""
|
|
text = text.replace('&', '&')
|
|
text = text.replace('<', '<')
|
|
text = text.replace('>', '>')
|
|
return text
|