Skip to content

Commit

Permalink
GA
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmfinol committed Sep 24, 2024
1 parent 1092b3f commit f802d93
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 22 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/scrape.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ jobs:
run: sudo apt purge google-chrome-stable
- name: Remove default Chromium
run: sudo apt purge chromium-browser
- name: Install a new Chromium
run: sudo apt install -y chromium-browser
- name: Install all necessary packages
run: pip install requests beautifulsoup4 pandas webdriver-manager selenium
# Need to fetch reqs if needed
- name: Install xvfb
run: sudo apt-get install xvfb
- name: Installing all necessary packages
run: pip install chromedriver-autoinstaller selenium pyvirtualdisplay beautifulsoup4
- name: Run the scraping script
run: cd docs/games/grand_archive_gabrary_spoilers;python scraper.py;cd ../../..
- name: Auto-Commit
Expand Down
45 changes: 27 additions & 18 deletions docs/games/grand_archive_gabrary_spoilers/scraper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from webdriver_manager.core.os_manager import ChromeType
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
import chromedriver_autoinstaller
from pyvirtualdisplay import Display
from bs4 import BeautifulSoup
import re
import json
Expand All @@ -12,23 +12,32 @@

# Set up the WebDriver (make sure to download the appropriate driver)
#driver = webdriver.Firefox()
driver = webdriver.Chrome()
#driver = webdriver.Chrome()

# Driver from https://github.com/jsoma/selenium-github-actions
#chrome_service = Service(ChromeDriverManager(chrome_type=ChromeType.CHROMIUM).install())
#chrome_options = Options()
#options = [
# "--headless",
# "--disable-gpu",
# "--window-size=1920,1200",
# "--ignore-certificate-errors",
# "--disable-extensions",
# "--no-sandbox",
# "--disable-dev-shm-usage"
#]
#for option in options:
# chrome_options.add_argument(option)
#driver = webdriver.Chrome(service=chrome_service, options=chrome_options)
# https://github.com/MarketingPipeline/Python-Selenium-Action/blob/main/Selenium-Template.py
display = Display(visible=0, size=(800, 800))
display.start()
chromedriver_autoinstaller.install() # Check if the current version of chromedriver exists
# and if it doesn't exist, download it automatically,
# then add chromedriver to path
chrome_options = webdriver.ChromeOptions()
# Add your options as needed
options = [
# Define window size here
"--window-size=1200,1200",
"--ignore-certificate-errors"
#"--headless",
#"--disable-gpu",
#"--window-size=1920,1200",
#"--ignore-certificate-errors",
#"--disable-extensions",
#"--no-sandbox",
#"--disable-dev-shm-usage",
#'--remote-debugging-port=9222'
]
for option in options:
chrome_options.add_argument(option)
driver = webdriver.Chrome(options = chrome_options)

# Open the target URL
driver.get(url)
Expand Down

0 comments on commit f802d93

Please sign in to comment.