This commit is contained in:
parent
d0d3e87e22
commit
5de461e1df
|
|
@ -10,6 +10,32 @@ from datetime import datetime
|
||||||
from backend.app.services.pdf_generator import PDFGenerator
|
from backend.app.services.pdf_generator import PDFGenerator
|
||||||
|
|
||||||
|
|
||||||
|
# 分析師中英文名稱對照表
|
||||||
|
ANALYST_NAME_MAPPING = {
|
||||||
|
# 分析師組
|
||||||
|
"市場分析師": "Market_Analyst",
|
||||||
|
"基本面分析師": "Fundamentals_Analyst",
|
||||||
|
"社群媒體分析師": "Social_Media_Analyst",
|
||||||
|
"新聞分析師": "News_Analyst",
|
||||||
|
|
||||||
|
# 研究員組
|
||||||
|
"看漲研究員": "Bull_Researcher",
|
||||||
|
"看跌研究員": "Bear_Researcher",
|
||||||
|
|
||||||
|
# 風險辯論者組
|
||||||
|
"激進分析師": "Aggressive_Debator",
|
||||||
|
"保守分析師": "Conservative_Debator",
|
||||||
|
"中立分析師": "Neutral_Debator",
|
||||||
|
|
||||||
|
# 經理組
|
||||||
|
"研究經理": "Research_Manager",
|
||||||
|
"風險經理": "Risk_Manager",
|
||||||
|
|
||||||
|
# 交易員
|
||||||
|
"交易員": "Trader",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class DownloadService:
|
class DownloadService:
|
||||||
"""Service for handling analyst report downloads"""
|
"""Service for handling analyst report downloads"""
|
||||||
|
|
||||||
|
|
@ -17,6 +43,19 @@ class DownloadService:
|
||||||
"""Initialize download service"""
|
"""Initialize download service"""
|
||||||
self.pdf_generator = PDFGenerator()
|
self.pdf_generator = PDFGenerator()
|
||||||
|
|
||||||
|
def _get_english_name(self, analyst_name: str) -> str:
|
||||||
|
"""
|
||||||
|
獲取分析師的英文名稱
|
||||||
|
|
||||||
|
Args:
|
||||||
|
analyst_name: 中文分析師名稱
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
英文分析師名稱
|
||||||
|
"""
|
||||||
|
# 使用對照表,如果找不到則使用原名稱並替換空格
|
||||||
|
return ANALYST_NAME_MAPPING.get(analyst_name, analyst_name.replace(" ", "_"))
|
||||||
|
|
||||||
def create_single_pdf(
|
def create_single_pdf(
|
||||||
self,
|
self,
|
||||||
analyst_name: str,
|
analyst_name: str,
|
||||||
|
|
@ -44,8 +83,9 @@ class DownloadService:
|
||||||
report_content=report_content,
|
report_content=report_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Generate filename: 股票代號_分析師_日期.pdf
|
# Generate filename with English name: TICKER_English_Name_DATE.pdf
|
||||||
filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
|
english_name = self._get_english_name(analyst_name)
|
||||||
|
filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
|
||||||
|
|
||||||
return pdf_bytes, filename
|
return pdf_bytes, filename
|
||||||
|
|
||||||
|
|
@ -86,15 +126,16 @@ class DownloadService:
|
||||||
report_content=report_content,
|
report_content=report_content,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add to ZIP with proper filename
|
# Add to ZIP with English filename
|
||||||
pdf_filename = f"{ticker}_{analyst_name}_{analysis_date}.pdf"
|
english_name = self._get_english_name(analyst_name)
|
||||||
|
pdf_filename = f"{ticker}_{english_name}_{analysis_date}.pdf"
|
||||||
zip_file.writestr(pdf_filename, pdf_bytes)
|
zip_file.writestr(pdf_filename, pdf_bytes)
|
||||||
|
|
||||||
# Get ZIP content
|
# Get ZIP content
|
||||||
zip_bytes = zip_buffer.getvalue()
|
zip_bytes = zip_buffer.getvalue()
|
||||||
zip_buffer.close()
|
zip_buffer.close()
|
||||||
|
|
||||||
# Generate ZIP filename: 股票代號_日期.zip
|
# Generate ZIP filename: TICKER_DATE.zip
|
||||||
zip_filename = f"{ticker}_{analysis_date}.zip"
|
zip_filename = f"{ticker}_{analysis_date}.zip"
|
||||||
|
|
||||||
return zip_bytes, zip_filename
|
return zip_bytes, zip_filename
|
||||||
|
|
|
||||||
|
|
@ -109,14 +109,14 @@ class PDFGenerator:
|
||||||
"""
|
"""
|
||||||
buffer = io.BytesIO()
|
buffer = io.BytesIO()
|
||||||
|
|
||||||
# Create PDF document
|
# Create PDF document with reduced margins for more content space
|
||||||
doc = SimpleDocTemplate(
|
doc = SimpleDocTemplate(
|
||||||
buffer,
|
buffer,
|
||||||
pagesize=A4,
|
pagesize=A4,
|
||||||
rightMargin=2*cm,
|
rightMargin=1.5*cm,
|
||||||
leftMargin=2*cm,
|
leftMargin=1.5*cm,
|
||||||
topMargin=2*cm,
|
topMargin=1.5*cm,
|
||||||
bottomMargin=2*cm,
|
bottomMargin=1.5*cm,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Container for the 'Flowable' objects
|
# Container for the 'Flowable' objects
|
||||||
|
|
@ -163,12 +163,14 @@ class PDFGenerator:
|
||||||
'CustomBody',
|
'CustomBody',
|
||||||
parent=styles['Normal'],
|
parent=styles['Normal'],
|
||||||
fontName=self.primary_font,
|
fontName=self.primary_font,
|
||||||
fontSize=10,
|
fontSize=9,
|
||||||
leading=16, # Increased from 14 for better readability
|
leading=14,
|
||||||
textColor=HexColor('#333333'),
|
textColor=HexColor('#333333'),
|
||||||
spaceAfter=10,
|
spaceAfter=8,
|
||||||
wordWrap='CJK',
|
wordWrap='CJK',
|
||||||
splitLongWords=True,
|
splitLongWords=True,
|
||||||
|
allowOrphans=0,
|
||||||
|
allowWidows=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add title
|
# Add title
|
||||||
|
|
@ -221,7 +223,8 @@ class PDFGenerator:
|
||||||
|
|
||||||
def _clean_markdown(self, text: str) -> str:
|
def _clean_markdown(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Clean markdown formatting for PDF
|
Clean markdown formatting for PDF - IMPROVED VERSION
|
||||||
|
Fixes spurious character issues and improves cleaning logic
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Markdown text
|
text: Markdown text
|
||||||
|
|
@ -229,36 +232,53 @@ class PDFGenerator:
|
||||||
Returns:
|
Returns:
|
||||||
Cleaned text
|
Cleaned text
|
||||||
"""
|
"""
|
||||||
# Remove markdown links but keep text
|
# 1. Remove markdown links but keep text
|
||||||
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
text = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', text)
|
||||||
|
|
||||||
# Remove bold/italic markers carefully to avoid orphan characters
|
# 2. Remove bold markers (improved version)
|
||||||
text = re.sub(r'\*\*([^\*]+)\*\*', r'\1', text)
|
text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
|
||||||
text = re.sub(r'(?<!\*)\*([^\*]+)\*(?!\*)', r'\1', text) # Avoid double asterisks
|
|
||||||
text = re.sub(r'__([^_]+)__', r'\1', text)
|
|
||||||
text = re.sub(r'(?<!_)_([^_]+)_(?!_)', r'\1', text) # Avoid double underscores
|
|
||||||
|
|
||||||
# Remove code blocks
|
# 3. Remove italic markers (more precise to avoid side effects)
|
||||||
text = re.sub(r'```[^`]*```', ' ', text, flags=re.DOTALL) # Replace with space not empty
|
text = re.sub(r'(?<![\*_])\*([^\*\n]+?)\*(?![\*_])', r'\1', text)
|
||||||
text = re.sub(r'`([^`]+)`', r'\1', text)
|
text = re.sub(r'(?<![\*_])_([^_\n]+?)_(?![\*_])', r'\1', text)
|
||||||
|
|
||||||
# Clean up bullet points
|
# 4. Remove underscore bold
|
||||||
|
text = re.sub(r'__(.+?)__', r'\1', text)
|
||||||
|
|
||||||
|
# 5. Remove code blocks
|
||||||
|
text = re.sub(r'```[^`]*?```', '', text, flags=re.DOTALL)
|
||||||
|
text = re.sub(r'`([^`]+?)`', r'\1', text)
|
||||||
|
|
||||||
|
# 6. Clean up bullet points
|
||||||
text = re.sub(r'^\s*[\*\-\+]\s+', '• ', text, flags=re.MULTILINE)
|
text = re.sub(r'^\s*[\*\-\+]\s+', '• ', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove horizontal rules
|
# 7. Remove horizontal rules
|
||||||
text = re.sub(r'^[\-\*\_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
text = re.sub(r'^[\-\*_]{3,}\s*$', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
# Remove multiple consecutive spaces
|
# 8. Clean table separators
|
||||||
|
text = re.sub(r'^\s*\|?\s*:?-+:?\s*\|?\s*$', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# 9. Remove table | symbols (keep content)
|
||||||
|
text = re.sub(r'^\s*\|', '', text, flags=re.MULTILINE)
|
||||||
|
text = re.sub(r'\|\s*$', '', text, flags=re.MULTILINE)
|
||||||
|
text = re.sub(r'\|', ' | ', text)
|
||||||
|
|
||||||
|
# 10. Clean excess spaces
|
||||||
text = re.sub(r' {2,}', ' ', text)
|
text = re.sub(r' {2,}', ' ', text)
|
||||||
|
|
||||||
# Remove orphaned single characters that might be markdown artifacts
|
# 11. Clean excess blank lines
|
||||||
text = re.sub(r'(?<=[^\w])([*_`~#])(?=[^\w])', '', text)
|
text = re.sub(r'\n{3,}', '\n\n', text)
|
||||||
|
|
||||||
return text
|
# 12. Remove isolated markdown symbols (more cautious to avoid spurious chars)
|
||||||
|
text = re.sub(r'(?<=\s)[\*_`~#]+(?=\s)', '', text)
|
||||||
|
text = re.sub(r'^[\*_`~#]+(?=\s)', '', text, flags=re.MULTILINE)
|
||||||
|
text = re.sub(r'(?<=\s)[\*_`~#]+$', '', text, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
def _escape_html(self, text: str) -> str:
|
def _escape_html(self, text: str) -> str:
|
||||||
"""
|
"""
|
||||||
Escape HTML special characters for PDF
|
Escape HTML special characters for PDF - IMPROVED VERSION
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
text: Text to escape
|
text: Text to escape
|
||||||
|
|
@ -266,7 +286,16 @@ class PDFGenerator:
|
||||||
Returns:
|
Returns:
|
||||||
Escaped text
|
Escaped text
|
||||||
"""
|
"""
|
||||||
text = text.replace('&', '&')
|
# Escape in order to avoid double-escaping
|
||||||
text = text.replace('<', '<')
|
replacements = [
|
||||||
text = text.replace('>', '>')
|
('&', '&'),
|
||||||
|
('<', '<'),
|
||||||
|
('>', '>'),
|
||||||
|
('"', '"'),
|
||||||
|
("'", '''),
|
||||||
|
]
|
||||||
|
|
||||||
|
for old, new in replacements:
|
||||||
|
text = text.replace(old, new)
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue