From c0c75257a488f9448870d6b4821f217ca4560ec2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=BA=8C=E4=B9=94?= <605056080@qq.com>
Date: Sun, 28 Apr 2024 17:04:19 +0800
Subject: [PATCH] =?UTF-8?q?issues=5Ffeature=5Fpost=5Fapi=5F576=20=E5=AE=9E?=
 =?UTF-8?q?=E7=8E=B0=E9=80=9A=E8=BF=87POST=E6=96=B9=E5=BC=8F=E5=B0=86?=
 =?UTF-8?q?=E6=95=B0=E6=8D=AE=E6=8E=A8=E9=80=81=E5=88=B0=E8=87=AA=E5=AE=9A?=
 =?UTF-8?q?=E4=B9=89=E6=8E=A5=E5=8F=A3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 weibo_spider/parser/comment_parser.py | 2 +-
 weibo_spider/writer/post_writer.py    | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/weibo_spider/parser/comment_parser.py b/weibo_spider/parser/comment_parser.py
index 6e06c776..c0117d80 100644
--- a/weibo_spider/parser/comment_parser.py
+++ b/weibo_spider/parser/comment_parser.py
@@ -33,7 +33,7 @@ def get_long_weibo(self):
                     # 3. 去掉所有 HTML 标签，但保留标签内的有效文本
                     new_content = fromstring(html_string).text_content()
                     # 4. 替换多个连续的 \n 为一个 \n
-                    new_content = re.sub(r'\n+', '\n', new_content)
+                    new_content = re.sub(r'\n+\s*', '\n', new_content)
                     weibo_content = handle_garbled(new_content)
                     if weibo_content is not None:
                         return weibo_content
diff --git a/weibo_spider/writer/post_writer.py b/weibo_spider/writer/post_writer.py
index 7446fbea..af536623 100644
--- a/weibo_spider/writer/post_writer.py
+++ b/weibo_spider/writer/post_writer.py
@@ -5,6 +5,8 @@
 import requests
 
 from .writer import Writer
+from time import sleep
+from requests.exceptions import RequestException
 
 logger = logging.getLogger('spider.post_writer')