Skip to content

Commit 311bc29

Browse files
authored
Merge pull request #16 from roleecorn/dev
test version done
2 parents 8256719 + ae952d3 commit 311bc29

File tree

5 files changed

+31
-13
lines changed

5 files changed

+31
-13
lines changed

logfile/.gitkeep

Whitespace-only changes.

main.py

+24-7
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
import util
1111
import driver_control
1212

13-
parser = argparse.ArgumentParser(description="這是一個簡單的範例 argparse 程式")
14-
parser.add_argument("--shop_name", help="提供一個名字作為參數", required=True)
13+
parser = argparse.ArgumentParser(description="自動飯用爬蟲程式")
14+
parser.add_argument("--shop_name", type=str, help="網站名稱", required=True)
15+
parser.add_argument("--test", action='store_true', help="測試選項,只進行一次運作")
1516
args = parser.parse_args()
1617

1718
# 設定logging的配置
@@ -20,22 +21,38 @@
2021
myhome = Path.cwd()
2122

2223
logging.basicConfig(
23-
filename=myhome / 'logfile' / '{shop_name}.log',
24+
filename=myhome / 'logfile' / f'{args.shop_name}.log',
2425
level=logging.INFO,
2526
format='%(asctime)s - %(levelname)s - %(message)s'
2627
)
2728

28-
# 使用logging對象輸出日誌
29-
logging.info('This is a test message')
3029

31-
a, b = util.read_csv(Path.cwd() / 'eddiebauer_test.csv')
3230
listsite, site_feature = util.read_csv(
33-
(myhome / 'cite_file' / f'{args.shop_name}.csv').resolve())
31+
(myhome / 'cite_file' / f'{args.shop_name}.csv'))
3432
with open('driver_path', 'r', encoding='big5') as f:
3533
tmp = f.read()
3634
driver = util.new_driver(myhome / tmp)
3735
with open('cite_fathers.json', 'r') as file:
3836
father: str = json.load(file)[args.shop_name]
37+
if args.test:
38+
driver.get(url=listsite[0])
39+
imgpath = util.check_imgpath(imgpath=myhome / args.shop_name,
40+
imgfile=site_feature[0])
41+
time.sleep(5)
42+
driver_control.scroll_to_bottom_and_wait(driver=driver)
43+
try:
44+
parent_element = driver.find_element("class name", father)
45+
except NoSuchElementException:
46+
logging.error('element not find in parent_element')
47+
try:
48+
child_elements = parent_element.find_elements('xpath', "./*")
49+
except NoSuchElementException:
50+
logging.error('element not find in child_elements')
51+
for element in child_elements:
52+
tmp = util.capture(ele=element, path=imgpath)
53+
print(tmp)
54+
import sys
55+
sys.exit()
3956
for i in len(listsite):
4057
driver.get(url=listsite[i])
4158
time.sleep(5)

requirements.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
selenium
22
webdriver-manager
3-
opencv-python
4-
pytesseract
3+
pytesseract
4+
Pillow

util/__init__.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313
from .string_util import remove_non_alphanumeric, remove_non_number
1414
from .new_driver import new_driver
1515
from .check_imgpath import check_imgpath
16-
16+
from .ele_capture import capture
1717

1818
__all__ = ["read_csv",
1919
"new_driver",
2020
"remove_non_alphanumeric",
2121
"remove_non_number",
22-
"check_imgpath"]
23-
__version__ = "0.1"
22+
"check_imgpath",
23+
"capture"]
24+
__version__ = "0.2"

util/ele_capture.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from selenium.webdriver.remote.webelement import WebElement
22
from pathlib import Path
3-
from new_driver import new_driver
43
import uuid
54

65

@@ -16,6 +15,7 @@ def capture(ele: WebElement, path: Path):
1615

1716
if __name__ == "__main__":
1817
import time
18+
from new_driver import new_driver
1919
myhome = Path.cwd().parent
2020
driver_path = myhome / '.wdm' / 'drivers' / 'chromedriver' / 'win32'
2121
driver_path = driver_path / '113.0.5672' / 'chromedriver.exe'

0 commit comments

Comments
 (0)