From 606de755777af223ffd0b31971f519f019d57f13 Mon Sep 17 00:00:00 2001 From: touero Date: Thu, 25 Apr 2024 23:58:29 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20format=20string=20and=20using=20enum=20c?= =?UTF-8?q?lass=20defined=20constants=20=E2=85=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ExecuteStage/constants.py | 15 ++++++++---- ExecuteStage/easyspider_executestage.py | 31 ++++++++++++++----------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/ExecuteStage/constants.py b/ExecuteStage/constants.py index f6fea8c..7664c18 100644 --- a/ExecuteStage/constants.py +++ b/ExecuteStage/constants.py @@ -3,7 +3,14 @@ from enum import unique, IntEnum @unique class WriteMode(IntEnum): - Create_Mode = 0 # 新建模式|Create Mode - Append_Mode = 1 # 追加模式|Append Mode - Mysql_Mode = 2 # Mysql模式|Mysql Mode - Json_Mode = 3 # Json模式|Json Mode + Create = 0 # 新建模式|Create Mode + Append = 1 # 追加模式|Append Mode + Mysql = 2 # Mysql模式|Mysql Mode + Json = 3 # Json模式|Json Mode + + +@unique +class DataWriteMode(IntEnum): + Append = 1 # 追加模式|Append Mode + Cover = 2 # 覆盖模式|Cover Mode + Rename = 3 # 重命名模式|Rename Mode diff --git a/ExecuteStage/easyspider_executestage.py b/ExecuteStage/easyspider_executestage.py index 45d4199..341a892 100644 --- a/ExecuteStage/easyspider_executestage.py +++ b/ExecuteStage/easyspider_executestage.py @@ -9,7 +9,7 @@ import threading # import undetected_chromedriver as uc from utils import detect_optimizable, download_image, extract_text_from_html, get_output_code, isnotnull, lowercase_tags_in_xpath, myMySQL, new_line, \ on_press_creator, on_release_creator, readCode, rename_downloaded_file, replace_field_values, send_email, split_text_by_lines, write_to_csv, write_to_excel, write_to_json -from constants import WriteMode +from constants import WriteMode, DataWriteMode from myChrome import MyChrome from threading import Thread, Event from PIL import Image @@ -154,7 +154,7 @@ class BrowserThread(Thread): self.maxViewLength = service.get("maxViewLength", 15) # 最大显示长度 self.outputFormat = service.get("outputFormat", "csv") # 输出格式 self.save_threshold = service.get("saveThreshold", 10) # 保存最低阈值 - self.dataWriteMode = service.get("dataWriteMode", 1) # 数据写入模式,1为追加,2为覆盖,3为重命名文件 + self.dataWriteMode = service.get("dataWriteMode", DataWriteMode.Append.value) # 数据写入模式,1为追加,2为覆盖,3为重命名文件 self.task_version = service.get("version", "") # 任务版本 if not self.task_version: @@ -176,37 +176,40 @@ class BrowserThread(Thread): self.links = list(filter(isnotnull, service_links.split("\n"))) # 要执行的link的列表 else: self.links = list(filter(isnotnull, service["url"])) # 要执行的link + self.OUTPUT = [] # 采集的数据 if self.outputFormat in ["csv", "txt", "xlsx", "json"]: if os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat): - if self.dataWriteMode == 2: + if self.dataWriteMode == DataWriteMode.Cover.value: os.remove("Data/Task_" + str(self.id) + "/" + self.saveName + '.' + self.outputFormat) - elif self.dataWriteMode == 3: + elif self.dataWriteMode == DataWriteMode.Rename.value: i = 2 while os.path.exists("Data/Task_" + str(self.id) + "/" + self.saveName + '_' + str(i) + '.' + self.outputFormat): i = i + 1 self.saveName = self.saveName + '_' + str(i) self.print_and_log("文件已存在,已重命名为", self.saveName) - self.writeMode = WriteMode.Create_Mode.value # 写入模式,0为新建,1为追加 + self.writeMode = WriteMode.Create.value # 写入模式,0为新建,1为追加 if self.outputFormat in ['csv', 'txt', 'xlsx']: if not os.path.exists(f"Data/Task_{str(self.id)}/{self.saveName}.{self.outputFormat}"): self.OUTPUT.append([]) # 添加表头 - self.writeMode = WriteMode.Create_Mode.value + self.writeMode = WriteMode.Create.value elif self.outputFormat == "json": - self.writeMode = WriteMode.Json_Mode.value # JSON模式无需判断是否存在文件 + self.writeMode = WriteMode.Json.value # JSON模式无需判断是否存在文件 elif self.outputFormat == "mysql": self.mysql = myMySQL(config["mysql_config_path"]) - self.mysql.create_table(self.saveName, service["outputParameters"], remove_if_exists=self.dataWriteMode == 2) - self.writeMode = WriteMode.MySQL_Mode.value # MySQL模式 + self.mysql.create_table(self.saveName, service["outputParameters"], + remove_if_exists=self.dataWriteMode == DataWriteMode.Cover.value) + self.writeMode = WriteMode.MySQL.value # MySQL模式 - if self.writeMode == WriteMode.Create_Mode.value: + if self.writeMode == WriteMode.Create.value: self.print_and_log("新建模式|Create Mode") - elif self.writeMode == WriteMode.Append_Mode.value: + elif self.writeMode == WriteMode.Append.value: self.print_and_log("追加模式|Append Mode") - elif self.writeMode == WriteMode.MySQL_Mode.value: + elif self.writeMode == WriteMode.MySQL.value: self.print_and_log("MySQL模式|MySQL Mode") - elif self.writeMode == WriteMode.Json_Mode.value: + elif self.writeMode == WriteMode.Json.value: self.print_and_log("JSON模式|JSON Mode") + self.containJudge = service["containJudge"] # 是否含有判断语句 self.outputParameters = {} self.service = service @@ -222,7 +225,7 @@ class BrowserThread(Thread): self.outputParametersTypes.append(param.get("type", "text")) self.outputParametersRecord.append(bool(param.get("recordASField", True))) # 文件叠加的时候不添加表头 - if self.outputFormat in ["csv", "txt", "xlsx"] and self.writeMode == WriteMode.Create_Mode.value: + if self.outputFormat in ["csv", "txt", "xlsx"] and self.writeMode == WriteMode.Create.value: self.OUTPUT[0].append(param["name"]) self.urlId = 0 # 全局记录变量 self.preprocess() # 预处理,优化提取数据流程