This commit is contained in:
parent
d0d3e87e22
commit
5de461e1df
|
|
@ -10,6 +10,32 @@ from datetime import datetime
|
|||
from backend.app.services.pdf_generator import PDFGenerator
|
||||
|
||||
|
||||
# 分析師中英文名稱對照表
|
||||
ANALYST_NAME_MAPPING = {
|
||||
# 分析師組
|
||||
"市場分析師": "Market_Analyst",
|
||||
"基本面分析師": "Fundamentals_Analyst",
|
||||
"社群媒體分析師": "Social_Media_Analyst",
|
||||
"新聞分析師": "News_Analyst",
|
||||
|
||||
# 研究員組
|
||||
"看漲研究員": "Bull_Researcher",
|
||||
"看跌研究員": "Bear_Researcher",
|
||||
|
||||
# 風險辯論者組
|
||||
"激進分析師": "Aggressive_Debator",
|
||||
"保守分析師": "Conservative_Debator",
|
||||
"中立分析師": "Neutral_Debator",
|
||||
|
||||
# 經理組
|
||||
"研究經理": "Research_Manager",
|
||||
"風險經理": "Risk_Manager",
|
||||
|
||||
# 交易員
|
||||
"交易員": "Trader",
|
||||
}
|
||||
|
||||
|
||||
class DownloadService:
|
||||
"""Service for handling analyst report downloads"""
|
||||
|
||||
|
|
@ -17,6 +43,19 @@ class DownloadService:
|
|||
"""Initialize download service"""
|
||||
self.pdf_generator = PDFGenerator()
|
||||
|
||||
def _get_english_name(self, analyst_name: str) -> str:
|
||||
"""
|
||||
獲取分析師的英文名稱
|
||||
|
||||
Args:
|
||||
analyst_name: 中文分析師名稱
|
||||
|
||||
Returns:
|
||||
英文分析師名稱
|
||||
"""
|
||||
# 使用對照表,如果找不到則使用原名稱並替換空格
|
||||
return ANALYST_NAME_MAPPING.get(analyst_name, analyst_name.replace(" ", "_"))
|
||||
|
||||
def create_single_pdf(
|
||||
self,
|
||||
analyst_name: str,
|
||||
|
|
@ -44,8 +83,9 @@ class DownloadService:
|
|||
report_content=report_content,
|
||||
)
|
||||
|
||||
# Generate filename: 股票代號_分析師_日期.pdf
|
||||
filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
|
||||
# Generate filename with English name: TICKER_English_Name_DATE.pdf
|
||||
english_name = self._get_english_name(analyst_name)
|
||||
filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
|
||||
|
||||
return pdf_bytes, filename
|
||||
|
||||
|
|
@ -86,15 +126,16 @@ class DownloadService:
|
|||
report_content=report_content,
|
||||
)
|
||||
|
||||
# Add to ZIP with proper filename
|
||||
pdf_filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
|
||||
# Add to ZIP with English filename
|
||||
english_name = self._get_english_name(analyst_name)
|
||||
pdf_filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
|
||||
zip_file.writestr(pdf_filename, pdf_bytes)
|
||||
|
||||
# Get ZIP content
|
||||
zip_bytes = zip_buffer.getvalue()
|
||||
zip_buffer.close()
|
||||
|
||||
# Generate ZIP filename: 股票代號_日期.zip
|
||||
# Generate ZIP filename: TICKER_DATE.zip
|
||||
zip_filename = f"{ticker}_{analysis_date}.zip"
|
||||
|
||||
return zip_bytes, zip_filename
|
||||
|
|
|
|||
|
|
@ -109,14 +109,14 @@ class PDFGenerator:
|
|||
"""
|
||||
buffer = io.BytesIO()
|
||||
|
||||
# Create PDF document
|
||||
# Create PDF document with reduced margins for more content space
|
||||
doc = SimpleDocTemplate(
|
||||
buffer,
|
||||
pagesize=A4,
|
||||
rightMargin=2*cm,
|
||||
leftMargin=2*cm,
|
||||
topMargin=2*cm,
|
||||
bottomMargin=2*cm,
|
||||
rightMargin=1.5*cm,
|
||||
leftMargin=1.5*cm,
|
||||
topMargin=1.5*cm,
|
||||
bottomMargin=1.5*cm,
|
||||
)
|
||||
|
||||
# Container for the 'Flowable' objects
|
||||
|
|
@ -163,12 +163,14 @@ class PDFGenerator:
|
|||
'CustomBody',
|
||||
parent=styles['Normal'],
|
||||
fontName=self.primary_font,
|
||||
fontSize=10,
|
||||
leading=16, # Increased from 14 for better readability
|
||||
fontSize=9,
|
||||
leading=14,
|
||||
textColor=HexColor('#333333'),
|
||||
spaceAfter=10,
|
||||
spaceAfter=8,
|
||||
wordWrap='CJK',
|
||||
splitLongWords=True,
|
||||
allowOrphans=0,
|
||||
allowWidows=0,
|
||||
)
|
||||
|
||||
# Add title
|
||||
|
|
@ -221,7 +223,8 @@ class PDFGenerator:
|
|||
|
||||
def _clean_markdown(self, text: str) -> str:
|
||||
"""
|
||||
Clean markdown formatting for PDF
|
||||
Clean markdown formatting for PDF - IMPROVED VERSION
|
||||
Fixes spurious character issues and improves cleaning logic
|
||||
|
||||
Args:
|
||||
text: Markdown text
|
||||
|
|
@ -229,36 +232,53 @@ class PDFGenerator:
|
|||
Returns:
|
||||
Cleaned text
|
||||
"""
|
||||
# Remove markdown links but keep text
|
||||
# 1. Remove markdown links but keep text
|
||||
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
||||
|
||||
# Remove bold/italic markers carefully to avoid orphan characters
|
||||
text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text)
|
||||
text = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', text) # Avoid double asterisks
|
||||
text = re.sub(r'__([^_]+)__', r'\1', text)
|
||||
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text) # Avoid double underscores
|
||||
# 2. Remove bold markers (improved version)
|
||||
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||
|
||||
# Remove code blocks
|
||||
text = re.sub(r'```[^`]*```', ' ', text, flags=re.DOTALL) # Replace with space not empty
|
||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
||||
# 3. Remove italic markers (more precise to avoid side effects)
|
||||
text = re.sub(r'(?<![\*_])\*([^\*\n]+?)\*(?![\*_])', r'\1', text)
|
||||
text = re.sub(r'(?<![\*_])_([^_\n]+?)_(?![\*_])', r'\1', text)
|
||||
|
||||
# Clean up bullet points
|
||||
# 4. Remove underscore bold
|
||||
text = re.sub(r'__(.+?)__', r'\1', text)
|
||||
|
||||
# 5. Remove code blocks
|
||||
text = re.sub(r'```[^`]*?```', '', text, flags=re.DOTALL)
|
||||
text = re.sub(r'`([^`]+?)`', r'\1', text)
|
||||
|
||||
# 6. Clean up bullet points
|
||||
text = re.sub(r'^\s*[\*\-\+]\s+', '• ', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove horizontal rules
|
||||
text = re.sub(r'^[\-\*\_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
||||
# 7. Remove horizontal rules
|
||||
text = re.sub(r'^[\-\*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
||||
|
||||
# Remove multiple consecutive spaces
|
||||
# 8. Clean table separators
|
||||
text = re.sub(r'^\s*\|?\s*:?-+:?\s*\|?\s*$', '', text, flags=re.MULTILINE)
|
||||
|
||||
# 9. Remove table | symbols (keep content)
|
||||
text = re.sub(r'^\s*\|', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'\|\s*$', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'\|', ' | ', text)
|
||||
|
||||
# 10. Clean excess spaces
|
||||
text = re.sub(r' {2,}', ' ', text)
|
||||
|
||||
# Remove orphaned single characters that might be markdown artifacts
|
||||
text = re.sub(r'(?<=[^\w])([*_`~#])(?=[^\w])', '', text)
|
||||
# 11. Clean excess blank lines
|
||||
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||
|
||||
return text
|
||||
# 12. Remove isolated markdown symbols (more cautious to avoid spurious chars)
|
||||
text = re.sub(r'(?<=\s)[\*_`~#]+(?=\s)', '', text)
|
||||
text = re.sub(r'^[\*_`~#]+(?=\s)', '', text, flags=re.MULTILINE)
|
||||
text = re.sub(r'(?<=\s)[\*_`~#]+$', '', text, flags=re.MULTILINE)
|
||||
|
||||
return text.strip()
|
||||
|
||||
def _escape_html(self, text: str) -> str:
|
||||
"""
|
||||
Escape HTML special characters for PDF
|
||||
Escape HTML special characters for PDF - IMPROVED VERSION
|
||||
|
||||
Args:
|
||||
text: Text to escape
|
||||
|
|
@ -266,7 +286,16 @@ class PDFGenerator:
|
|||
Returns:
|
||||
Escaped text
|
||||
"""
|
||||
text = text.replace('&', '&')
|
||||
text = text.replace('<', '<')
|
||||
text = text.replace('>', '>')
|
||||
# Escape in order to avoid double-escaping
|
||||
replacements = [
|
||||
('&', '&'),
|
||||
('<', '<'),
|
||||
('>', '>'),
|
||||
('"', '"'),
|
||||
("'", '''),
|
||||
]
|
||||
|
||||
for old, new in replacements:
|
||||
text = text.replace(old, new)
|
||||
|
||||
return text
|
||||
|
|
|
|||
Loading…
Reference in New Issue