This commit is contained in:
MarkLo 2025-11-26 11:23:07 +08:00
parent d0d3e87e22
commit 5de461e1df
2 changed files with 104 additions and 34 deletions

View File

@ -10,6 +10,32 @@ from datetime import datetime
from backend.app.services.pdf_generator import PDFGenerator
# 分析師中英文名稱對照表
ANALYST_NAME_MAPPING = {
# 分析師組
"市場分析師": "Market_Analyst",
"基本面分析師": "Fundamentals_Analyst",
"社群媒體分析師": "Social_Media_Analyst",
"新聞分析師": "News_Analyst",
# 研究員組
"看漲研究員": "Bull_Researcher",
"看跌研究員": "Bear_Researcher",
# 風險辯論者組
"激進分析師": "Aggressive_Debator",
"保守分析師": "Conservative_Debator",
"中立分析師": "Neutral_Debator",
# 經理組
"研究經理": "Research_Manager",
"風險經理": "Risk_Manager",
# 交易員
"交易員": "Trader",
}
class DownloadService:
"""Service for handling analyst report downloads"""
@ -17,6 +43,19 @@ class DownloadService:
"""Initialize download service"""
self.pdf_generator = PDFGenerator()
def _get_english_name(self, analyst_name: str) -> str:
"""
獲取分析師的英文名稱
Args:
analyst_name: 中文分析師名稱
Returns:
英文分析師名稱
"""
# 使用對照表,如果找不到則使用原名稱並替換空格
return ANALYST_NAME_MAPPING.get(analyst_name, analyst_name.replace(" ", "_"))
def create_single_pdf(
self,
analyst_name: str,
@ -44,8 +83,9 @@ class DownloadService:
report_content=report_content,
)
# Generate filename: 股票代號_分析師_日期.pdf
filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
# Generate filename with English name: TICKER_English_Name_DATE.pdf
english_name = self._get_english_name(analyst_name)
filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
return pdf_bytes, filename
@ -86,15 +126,16 @@ class DownloadService:
report_content=report_content,
)
# Add to ZIP with proper filename
pdf_filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
# Add to ZIP with English filename
english_name = self._get_english_name(analyst_name)
pdf_filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
zip_file.writestr(pdf_filename, pdf_bytes)
# Get ZIP content
zip_bytes = zip_buffer.getvalue()
zip_buffer.close()
# Generate ZIP filename: 股票代號_日期.zip
# Generate ZIP filename: TICKER_DATE.zip
zip_filename = f"{ticker}_{analysis_date}.zip"
return zip_bytes, zip_filename

View File

@ -109,14 +109,14 @@ class PDFGenerator:
"""
buffer = io.BytesIO()
# Create PDF document
# Create PDF document with reduced margins for more content space
doc = SimpleDocTemplate(
buffer,
pagesize=A4,
rightMargin=2*cm,
leftMargin=2*cm,
topMargin=2*cm,
bottomMargin=2*cm,
rightMargin=1.5*cm,
leftMargin=1.5*cm,
topMargin=1.5*cm,
bottomMargin=1.5*cm,
)
# Container for the 'Flowable' objects
@ -163,12 +163,14 @@ class PDFGenerator:
'CustomBody',
parent=styles['Normal'],
fontName=self.primary_font,
fontSize=10,
leading=16, # Increased from 14 for better readability
fontSize=9,
leading=14,
textColor=HexColor('#333333'),
spaceAfter=10,
spaceAfter=8,
wordWrap='CJK',
splitLongWords=True,
allowOrphans=0,
allowWidows=0,
)
# Add title
@ -221,7 +223,8 @@ class PDFGenerator:
def _clean_markdown(self, text: str) -> str:
"""
Clean markdown formatting for PDF
Clean markdown formatting for PDF - IMPROVED VERSION
Fixes spurious character issues and improves cleaning logic
Args:
text: Markdown text
@ -229,36 +232,53 @@ class PDFGenerator:
Returns:
Cleaned text
"""
# Remove markdown links but keep text
# 1. Remove markdown links but keep text
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
# Remove bold/italic markers carefully to avoid orphan characters
text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text)
text = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', text) # Avoid double asterisks
text = re.sub(r'__([^_]+)__', r'\1', text)
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text) # Avoid double underscores
# 2. Remove bold markers (improved version)
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
# Remove code blocks
text = re.sub(r'```[^`]*```', ' ', text, flags=re.DOTALL) # Replace with space not empty
text = re.sub(r'`([^`]+)`', r'\1', text)
# 3. Remove italic markers (more precise to avoid side effects)
text = re.sub(r'(?<![\*_])\*([^\*\n]+?)\*(?![\*_])', r'\1', text)
text = re.sub(r'(?<![\*_])_([^_\n]+?)_(?![\*_])', r'\1', text)
# Clean up bullet points
# 4. Remove underscore bold
text = re.sub(r'__(.+?)__', r'\1', text)
# 5. Remove code blocks
text = re.sub(r'```[^`]*?```', '', text, flags=re.DOTALL)
text = re.sub(r'`([^`]+?)`', r'\1', text)
# 6. Clean up bullet points
text = re.sub(r'^\s*[\*\-\+]\s+', '', text, flags=re.MULTILINE)
# Remove horizontal rules
text = re.sub(r'^[\-\*\_]{3,}\s*$', '', text, flags=re.MULTILINE)
# 7. Remove horizontal rules
text = re.sub(r'^[\-\*_]{3,}\s*$', '', text, flags=re.MULTILINE)
# Remove multiple consecutive spaces
# 8. Clean table separators
text = re.sub(r'^\s*\|?\s*:?-+:?\s*\|?\s*$', '', text, flags=re.MULTILINE)
# 9. Remove table | symbols (keep content)
text = re.sub(r'^\s*\|', '', text, flags=re.MULTILINE)
text = re.sub(r'\|\s*$', '', text, flags=re.MULTILINE)
text = re.sub(r'\|', ' | ', text)
# 10. Clean excess spaces
text = re.sub(r' {2,}', ' ', text)
# Remove orphaned single characters that might be markdown artifacts
text = re.sub(r'(?<=[^\w])([*_`~#])(?=[^\w])', '', text)
# 11. Clean excess blank lines
text = re.sub(r'\n{3,}', '\n\n', text)
return text
# 12. Remove isolated markdown symbols (more cautious to avoid spurious chars)
text = re.sub(r'(?<=\s)[\*_`~#]+(?=\s)', '', text)
text = re.sub(r'^[\*_`~#]+(?=\s)', '', text, flags=re.MULTILINE)
text = re.sub(r'(?<=\s)[\*_`~#]+$', '', text, flags=re.MULTILINE)
return text.strip()
def _escape_html(self, text: str) -> str:
"""
Escape HTML special characters for PDF
Escape HTML special characters for PDF - IMPROVED VERSION
Args:
text: Text to escape
@ -266,7 +286,16 @@ class PDFGenerator:
Returns:
Escaped text
"""
text = text.replace('&', '&amp;')
text = text.replace('<', '&lt;')
text = text.replace('>', '&gt;')
# Escape in order to avoid double-escaping
replacements = [
('&', '&amp;'),
('<', '&lt;'),
('>', '&gt;'),
('"', '&quot;'),
("'", '&apos;'),
]
for old, new in replacements:
text = text.replace(old, new)
return text