From e882cd65002901b46cf1f170e5d2b91517bf718f Mon Sep 17 00:00:00 2001 From: Zach Halpern Date: Tue, 2 Apr 2019 12:58:31 -0400 Subject: [PATCH] More cleanups Signed-off-by: Zach Halpern --- .ci/deploy.sh | 3 +- .pylintrc | 68 +++ .travis.yml | 1 - cards_corrections.yml | 68 --- cards_delete.yml | 37 -- cards_manual.yml | 70 --- magic_spoiler/__init__.py | 1 + main.py => magic_spoiler/__main__.py | 189 +++--- mtgs_scraper.py | 275 --------- mypy.ini | 18 + mythic_scraper.py | 87 --- requirements_test.txt | 7 + scryfall_scraper.py | 231 -------- set_info.yml | 83 --- setup.py | 24 + spoilers.py | 832 --------------------------- tox.ini | 56 ++ verify_files.py | 24 - wizards_scraper.py | 269 --------- 19 files changed, 260 insertions(+), 2083 deletions(-) create mode 100644 .pylintrc delete mode 100644 cards_corrections.yml delete mode 100644 cards_delete.yml delete mode 100644 cards_manual.yml create mode 100644 magic_spoiler/__init__.py rename main.py => magic_spoiler/__main__.py (73%) delete mode 100644 mtgs_scraper.py create mode 100644 mypy.ini delete mode 100644 mythic_scraper.py create mode 100644 requirements_test.txt delete mode 100644 scryfall_scraper.py delete mode 100644 set_info.yml create mode 100644 setup.py delete mode 100644 spoilers.py create mode 100644 tox.ini delete mode 100644 verify_files.py delete mode 100644 wizards_scraper.py diff --git a/.ci/deploy.sh b/.ci/deploy.sh index efd3df2c..40a87e0f 100755 --- a/.ci/deploy.sh +++ b/.ci/deploy.sh @@ -6,7 +6,7 @@ TARGET_BRANCH="files" function doCompile { echo "Running script..." - python3 main.py dumpXML=True + python3 -m magic_spoiler } # Pull requests and commits to other branches shouldn't try to deploy, just build to verify @@ -48,7 +48,6 @@ echo TRAVIS_EVENT_TYPE ${TRAVIS_EVENT_TYPE} # Now let's go have some fun with the cloned repo cd out -rm -f AllSets* ls git config user.name "Travis CI" git config user.email "$COMMIT_AUTHOR_EMAIL" diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..2b252f1c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,68 @@ +[MASTER] + +# Pickle collected data for later comparisons. +persistent=yes + + +[MESSAGES CONTROL] + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable= + bad-continuation, + fixme, + line-too-long, + localled-enabled, + locally-disabled, + logging-format-interpolation, + too-few-public-methods, + too-many-statements, + wrong-import-order, + too-many-branches, + import-error + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio).You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=colorized + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma. +good-names= + f, + i, + j, + k, + _, + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=__.*__|test_.* + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes= + FIXME, + XXX, + TODO, + + +[VARIABLES] + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$) diff --git a/.travis.yml b/.travis.yml index 3a54ce15..046f6a46 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,6 @@ env: before_install: - export CFLAGS=-O0 # considerably speed-up build time for pip packages (especially lxml), optimizations doesn't matter for ci - pip install pyyaml - - python verify_files.py # make sure input files are OK before wasting time with prereqs install: - pip install -r requirements.txt diff --git a/cards_corrections.yml b/cards_corrections.yml deleted file mode 100644 index cca907f3..00000000 --- a/cards_corrections.yml +++ /dev/null @@ -1,68 +0,0 @@ - -### How to Use: ######################################################### -# # -# Each card to fix has to be # -# * in its own new array # -# # -# Each card array starts with # -# * a space (" ") # -# * exact card name # -# * followed by a colon (":") # -# # -# Each card array consist out of # -# * a list of corrected fields # -# # -# Each card corrections consist out of # -# * 3 leading spaces (" ") # -# * card attribute with wrong value # -# * followed by a colon (":") # -# * additional space (" ") # -# * new value for that attribute # -# # -### Hints: ############################################################## -# # -# * Indentation is critical, two spaces per indentation # -# * For a better overview group cards from the same set and # -# label them with "#setcode" above the first entry of each set # -# # -### Form: ############################################################### -# # -#card name: # -# field to fix: new value # -# # -### Example Entries: #################################################### -# # -#Jace, the Planeswalker: # -# loyalty: 5 # -# manaCost: 1UUB # -# # -#Terror: # -# type: Instant # -# # -### Explanation of Fields and their Values: ############################# -# # -# name: Card Name # -# cmc: 4 # -# colorIdentity: # -# - U # -# - B #keep track of https://github.com/mtgjson/mtgjson4/issues/56 # -# colors: # -# - Blue # -# - Black # -# manaCost: 1UUB # -# number: 140 # -# rarity: Mythic Rare # -# power: X # -# text: "{5}, {T}: You win the game." # -# toughness: * # -# type: Legendary Artifact Creature - Human Monk # -# url: http://wizards.c0m/link/to/card.png # -# # -######################################################################### -# never remove this part of the file, since it will break otherwise # -corrections: # - - must not be empty! # -######################################################################### - -# Enter cards with their corrections below. But note the syntax hints on top! - diff --git a/cards_delete.yml b/cards_delete.yml deleted file mode 100644 index e8b0222c..00000000 --- a/cards_delete.yml +++ /dev/null @@ -1,37 +0,0 @@ - -### How to Use: ######################################################### -# # -# Each card to delete has to be # -# * in its own new line # -# # -# Each line has to consist out of # -# * 2 leading spaces (" ") # -# * a dash ("-") # -# * additional space (" ") # -# * exact card name # -# # -### Hints: ############################################################## -# # -# * Cards that begin/end with spaces or contain a colon need quoted # -# * Indentation is critical, two spaces per indentation # -# * For a better overview group cards from the same set and # -# label them with "#setcode" above the first entry of each set # -# # -### Form: ############################################################### -# # -# - card name # -# # -### Example Entry: ###################################################### -# # -# - JUNK NAME TO DELETE # -# - " Tocaf's Honor Guard " # -# # -######################################################################### -# never remove this part of the file, since it will break otherwise # -delete: # - - must not be empty! # -######################################################################### - -# Enter cards that should be deleted below. But note the syntax hints on top! - - \ No newline at end of file diff --git a/cards_manual.yml b/cards_manual.yml deleted file mode 100644 index e0572261..00000000 --- a/cards_manual.yml +++ /dev/null @@ -1,70 +0,0 @@ - -### How to Use: ############################################################ -# # -# Each card that you want to manually add has to be # -# * in its correct set block # -# # -# Each set block has to start with # -# * set code # -# * followed by a colon (":") # -# # -# Each set block consist out of # -# * a list of cards # -# # -# Each card in the list has to start with # -# * 2 leading spaces (" ") # -# * a dash ("-") to open an new array for each card # -# # -# Each new card has to consist out of # -# * a new line # -# * 4 leading spaces in total (" ") # -# * card attribute # -# * followed by a colon (":") # -# * additional space (" ") # -# * value for that attribute # -# # -### Hints: ################################################################# -# # -# * Each card attribute you want your card to have needs its own line # -# * Most important fields are: name, manaCost, rarity, type and url # -# * Values for the text field must be surrounded by quatation marks (") # -# * Newlines in the text field must be replaced by \n # -# * Indentation is critical, two spaces per indentation # -# # -### Form: ################################################################## -# # -#set code: # -# - # -# card field to add: value # -# card field to add: value # -# - # -# card field to add: value # -# # -#other set code: # -# - # -# card field to add: value # -# # -### Example Entry: ######################################################### -# # -#XLN: # -# - # -# name: Ripjaw Raptor # -# manaCost: 2GG # -# number: 203 # -# rarity: Rare # -# type: Creature - Dinosaur # -# url: http://mythicspoiler.com/ixa/cards/havenraptor.jpg <--- gath # -# text: "Enrage - Whenever Ripjaw Raptor is dealt damage, draw a card." # -# cmc: 4 # -# power: 4 # -# toughness: 5 # -# - # -# name: Vraska's Contempt # -# manaCost: 2BB # -# rarity: Rare # -# type: Instant # -# url: https://media.wizards.com/2017/xln/en_oUjuu5E2th.png <--- ? # -# # -############################################################################ - -# Enter cards that should be added manually directly below. But note the syntax hints on top! diff --git a/magic_spoiler/__init__.py b/magic_spoiler/__init__.py new file mode 100644 index 00000000..ea8afed7 --- /dev/null +++ b/magic_spoiler/__init__.py @@ -0,0 +1 @@ +"""Magic Spoiler Program""" diff --git a/main.py b/magic_spoiler/__main__.py similarity index 73% rename from main.py rename to magic_spoiler/__main__.py index 05d7cab9..7475289e 100644 --- a/main.py +++ b/magic_spoiler/__main__.py @@ -1,52 +1,28 @@ +""" +Handle Scryfall Spoilers +""" +import contextvars import datetime import pathlib -import sys - -import contextvars -from typing import Dict, Any, List, Union, Tuple +import time +from typing import IO, Any, Dict, List, Tuple, Union import requests import requests_cache -import yaml +from lxml import etree -# Scryfall API for downloading spoiler sets SCRYFALL_SET_URL: str = "https://api.scryfall.com/sets/{}" - -# Downloader sessions for header consistency SESSION: contextvars.ContextVar = contextvars.ContextVar("SESSION_SCRYFALL") +SPOILER_SETS: contextvars.ContextVar = contextvars.ContextVar("SPOILER_SETS") -def load_yaml_file( - input_file: str, lib_to_use: str = "yaml" -) -> Union[Dict[str, Any], List[Dict[str, Any]]]: - """ - Load a yaml file from system - :param input_file: File to open - :param lib_to_use: Open format - :return: Loaded file - """ - try: - with pathlib.Path(input_file).open("r") as f: - if lib_to_use == "yaml": - return yaml.safe_load(f) - else: - return [of for of in yaml.safe_load_all(f)] - except Exception as ex: - print("Unable to load {}: {}".format(input_file, ex.args)) - sys.exit(2) - - -# File containing all spoiler set details -SET_INFO_FILE: List[Dict[str, Any]] = load_yaml_file("set_info.yml", "yaml_multi") - - -def __get_session() -> requests.Session: +def __get_session() -> Union[requests.Session, Any]: """ Get the session for downloading content :return: Session """ requests_cache.install_cache( - cache_name="scryfall_cache", backend="sqlite", expire_after=604800 # 1 week + cache_name="scryfall_cache", backend="sqlite", expire_after=7200 # 2 hours ) if not SESSION.get(None): @@ -54,10 +30,10 @@ def __get_session() -> requests.Session: return SESSION.get() -def __download(scryfall_url: str) -> Dict[str, Any]: +def json_download(scryfall_url: str) -> Dict[str, Any]: """ Get the data from Scryfall in JSON format using our secret keys - :param scryfall_url: URL to __download JSON data from + :param scryfall_url: URL to json_download JSON data from :return: JSON object of the Scryfall data """ session = __get_session() @@ -73,7 +49,7 @@ def download_scryfall_set(set_code: str) -> List[Dict[str, Any]]: :param set_code: Set code :return: Card list """ - set_content: Dict[str, Any] = __download(SCRYFALL_SET_URL.format(set_code)) + set_content: Dict[str, Any] = json_download(SCRYFALL_SET_URL.format(set_code)) if set_content["object"] == "error": print("API download failed for {}: {}".format(set_code, set_content)) return [] @@ -85,9 +61,9 @@ def download_scryfall_set(set_code: str) -> List[Dict[str, Any]]: while download_url: page_downloaded += 1 - cards = __download(download_url) + cards = json_download(download_url) if cards["object"] == "error": - print("Error downloading {0}: {1}".format(set_code, cards)) + print("Set {} has no cards, skipping".format(set_code)) break for card in cards["data"]: @@ -101,7 +77,7 @@ def download_scryfall_set(set_code: str) -> List[Dict[str, Any]]: return sorted(spoiler_cards, key=lambda c: (c["name"], c["collector_number"])) -def build_types(sf_card: Dict[str, Any]) -> Tuple[List[str], List[str], List[str]]: +def build_types(sf_card: Dict[str, Any]) -> Tuple[List[str], str, List[str]]: """ Build the super, type, and sub-types of a given card :param sf_card: Scryfall card @@ -110,7 +86,8 @@ def build_types(sf_card: Dict[str, Any]) -> Tuple[List[str], List[str], List[str all_super_types = ["Legendary", "Snow", "Elite", "Basic", "World", "Ongoing"] # return values - super_types, types, sub_types = [], [], [] + super_types: List[str] = [] + sub_types: List[str] = [] type_line = sf_card["type_line"] @@ -122,14 +99,14 @@ def build_types(sf_card: Dict[str, Any]) -> Tuple[List[str], List[str], List[str if card_type in type_line: super_types.append(card_type) - types = type_line.split(u"—")[0] + types: str = type_line.split(u"—")[0] for card_type in all_super_types: types = types.replace(card_type, "") return super_types, types, sub_types -def convert_scryfall(scryfall_cards: List[Dict[str, Any]]) -> List[Dict[str, Any]]: +def scryfall2mtgjson(scryfall_cards: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Convert SF cards to MTGJSON format for dispatching :param scryfall_cards: List of Scryfall cards @@ -200,23 +177,23 @@ def convert_scryfall(scryfall_cards: List[Dict[str, Any]]) -> List[Dict[str, Any return trice_cards -def open_header(card_xml_file) -> None: +def open_header(card_xml_file: IO[Any]) -> None: """ Add the header data to the XML file :param card_xml_file: Card file path """ card_xml_file.write( "\n" - "\n" - "\n" - "\n" + + "\nCreated By: Magic-Spoiler project @ https://github.com/Cockatrice/Magic-Spoiler\n-->\n" + + "\n" ) -def fill_header_sets(card_xml_file, set_code, set_name, release_date) -> None: +def fill_header_sets(card_xml_file: IO[Any], set_obj: Dict[str, str]) -> None: """ Add header data for set files :param card_xml_file: Card file path @@ -225,15 +202,15 @@ def fill_header_sets(card_xml_file, set_code, set_name, release_date) -> None: :param release_date: Release Date """ card_xml_file.write( - "\n" + set_code + "\n" - "" + set_name + "\n" + "\n" + set_obj["code"] + "\n" + "" + set_obj["name"] + "\n" "Expansion\n" - "" + release_date + "\n" + "" + set_obj["released_at"] + "\n" "\n" ) -def close_header(card_xml_file) -> None: +def close_header(card_xml_file: IO[Any]) -> None: """ Add closing data to files :param card_xml_file: Card file path @@ -241,12 +218,20 @@ def close_header(card_xml_file) -> None: card_xml_file.write("\n\n") -def close_xml_file(card_xml_file) -> None: +def close_xml_file(card_xml_file: IO[Any]) -> None: """ - Add final touch to files to validate them + Add final touch to files to validate them, + then pretty them :param card_xml_file: Card file path """ card_xml_file.write("\n\n") + card_xml_file.close() + + # Make the files pretty + parser = etree.XMLParser(remove_blank_text=True) + root = etree.parse(card_xml_file.name, parser).getroot() + with pathlib.Path(card_xml_file.name).open("wb") as f: + f.write(etree.tostring(root, pretty_print=True)) def write_cards( @@ -258,16 +243,12 @@ def write_cards( :param trice_dict: List of cards :param set_code: Set code """ - count = 0 - related = 0 - for card in trice_dict: if "names" in card.keys() and card["names"]: if "layout" in card and card["layout"] != "double-faced": if card["name"] == card["names"][1]: continue - count += 1 set_name = card["name"] if "mana_cost" in card.keys(): @@ -277,11 +258,11 @@ def write_cards( if "power" in card.keys() or "toughness" in card.keys(): if card["power"]: - pt = str(card["power"]) + "/" + str(card["toughness"]) + pow_tough = str(card["power"]) + "/" + str(card["toughness"]) else: - pt = 0 + pow_tough = "" else: - pt = 0 + pow_tough = "" if "text" in card.keys(): text = card["text"] @@ -311,14 +292,7 @@ def write_cards( if "names" not in card.keys(): print(card["name"] + ' is double-faced but no "names" key') else: - for dfc_name in card["names"]: - if dfc_name != card["name"]: - related = dfc_name - else: - print( - card["name"] - + " has names, but layout != split, aftermath, or double-faced" - ) + pass else: print(card["name"] + " has multiple names and no 'layout' key") @@ -361,39 +335,32 @@ def write_cards( card_xml_file.write("" + card_type + "\n") - if pt: - card_xml_file.write("" + pt + "\n") + if pow_tough: + card_xml_file.write("" + pow_tough + "\n") if "loyalty" in card.keys(): card_xml_file.write("" + str(card["loyalty"]) + "\n") card_xml_file.write("" + table_row + "\n") card_xml_file.write("" + text + "\n") - - if related: - card_xml_file.write("" + related + "\n") - related = "" - card_xml_file.write("\n") -def write_spoilers_xml(trice_dicts) -> None: +def write_spoilers_xml(trice_dicts: Dict[str, List[Dict[str, Any]]]) -> None: """ Write the spoiler.xml file - :param trice_dicts: Dict of entries + :param trice_dicts: Dict of dict entries """ - pathlib.Path("out").mkdir(exist_ok=True) - card_xml_file = pathlib.Path("out/spoiler.xml").open("w") + pathlib.Path("../out").mkdir(exist_ok=True) + card_xml_file = pathlib.Path("../out/spoiler.xml").open("w") # Fill in set headers open_header(card_xml_file) - for value in SET_INFO_FILE: - fill_header_sets( - card_xml_file, value["code"], value["name"], value["releaseDate"] - ) + for value in SPOILER_SETS.get(): + fill_header_sets(card_xml_file, value) close_header(card_xml_file) # Write in all the cards - for value in SET_INFO_FILE: + for value in SPOILER_SETS.get(): try: write_cards(card_xml_file, trice_dicts[value["code"]], value["code"]) except KeyError: @@ -402,47 +369,61 @@ def write_spoilers_xml(trice_dicts) -> None: close_xml_file(card_xml_file) -def write_set_xml( - trice_dict: List[Dict[str, Any]], set_code: str, set_name: str, release_date: str -) -> None: +def write_set_xml(trice_dict: List[Dict[str, Any]], set_obj: Dict[str, str]) -> None: """ Write out a single magic set to XML format :param trice_dict: Cards to print - :param set_code: Set code - :param set_name: Set name - :param release_date: Set release date + :param set_obj: Set object """ if not trice_dict: return - pathlib.Path("out").mkdir(exist_ok=True) - card_xml_file = pathlib.Path("out/{}.xml".format(set_code)).open("w") + pathlib.Path("../out").mkdir(exist_ok=True) + card_xml_file = pathlib.Path("../out/{}.xml".format(set_obj["code"])).open("w") open_header(card_xml_file) - fill_header_sets(card_xml_file, set_code, set_name, release_date) + fill_header_sets(card_xml_file, set_obj) close_header(card_xml_file) - write_cards(card_xml_file, trice_dict, set_code) + write_cards(card_xml_file, trice_dict, set_obj["code"]) close_xml_file(card_xml_file) +def get_spoiler_sets() -> List[Dict[str, str]]: + """ + Download Sf sets and mark spoiler sets + :return: Spoiler sets + """ + sf_sets = json_download("https://api.scryfall.com/sets/") + if sf_sets["object"] == "error": + print("Unable to download SF correctly: {}".format(sf_sets)) + return [] + + spoiler_sets = [] + for sf_set in sf_sets["data"]: + if sf_set["released_at"] >= time.strftime("%Y-%m-%d %H:%M:%S"): + if sf_set["set_type"] != "token": + sf_set["code"] = sf_set["code"].upper() + spoiler_sets.append(sf_set) + + return spoiler_sets + + def main() -> None: """ Main dispatch thread """ + # Determine what sets have spoiler data + SPOILER_SETS.set(get_spoiler_sets()) + spoiler_xml = {} - for set_info in SET_INFO_FILE: + for set_info in SPOILER_SETS.get(): print("Handling {}".format(set_info["code"])) - if not set_info["scryfallOnly"]: - continue - cards = download_scryfall_set(set_info["code"]) - trice_dict = convert_scryfall(cards) + trice_dict = scryfall2mtgjson(cards) # Write SET.xml - write_set_xml( - trice_dict, set_info["code"], set_info["name"], set_info["releaseDate"] - ) + write_set_xml(trice_dict, set_info) # Save for spoiler.xml spoiler_xml[set_info["code"]] = trice_dict diff --git a/mtgs_scraper.py b/mtgs_scraper.py deleted file mode 100644 index 0bc4b794..00000000 --- a/mtgs_scraper.py +++ /dev/null @@ -1,275 +0,0 @@ -# -*- coding: utf-8 -*- -import requests -import feedparser -import re -import sys -import time -from lxml import html - - -def scrape_mtgs(url): - return requests.get(url, headers={'Cache-Control': 'no-cache', 'Pragma': 'no-cache', 'Expires': 'Thu, 01 Jan 1970 00:00:00 GMT'}).text - - -def parse_mtgs(mtgs, manual_cards=[], card_corrections=[], delete_cards=[], related_cards=[], setinfo={"mtgsurl": ""}): - mtgs = mtgs.replace('utf-16', 'utf-8') - patterns = ['Name: (?P.*?)<', - 'Cost: (?P[X]*\d{0,2}[XWUBRGC]*?)<', - 'Type: (?P.*?)<', - 'Pow/Tgh: (?P.*?)<', - 'Rules Text: (?P.*?)
.*?)<', - 'Set Number: #(?P.*?)/' - ] - d = feedparser.parse(mtgs) - - cards = [] - for entry in d.items()[5][1]: - card = dict(cost='', cmc='', img='', pow='', name='', rules='', type='', - color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') - summary = entry['summary'] - for pattern in patterns: - match = re.search(pattern, summary, re.MULTILINE | re.DOTALL) - if match: - dg = match.groupdict() - card[dg.items()[0][0]] = dg.items()[0][1] - cards.append(card) - - gallery_list = list_mtgs_gallery(setinfo['mtgsurl']) - for card in cards: - if card['name'] not in gallery_list: - print ("Removing card scraped from MTGS RSS but not in their gallery: " + card['name']) - cards.remove(card) - - for card in cards: - card['name'] = card['name'].strip() - - # if we didn't find any cards, let's bail out to prevent overwriting good data - if len(cards) < 1: - sys.exit("No cards found, exiting to prevent file overwrite") - - cards2 = [] - for card in cards: - if 'rules' in card: - htmltags = re.compile(r'<.*?>') - card['rules'] = htmltags.sub('', card['rules']) - if '//' in card['name'] or 'Aftermath' in card['rules']: - print ('Splitting up Aftermath card ' + card['name']) - card1 = card.copy() - card2 = dict(cost='', cmc='', img='', pow='', name='', rules='', type='', - color='', altname='', colorIdentity='', colorArray=[], colorIdentityArray=[], setnumber='', rarity='') - if '//' in card['name']: - card['name'] = card['name'].replace(' // ', '//') - card1['name'] = card['name'].split('//')[0] - card2["name"] = card['name'].split('//')[1] - else: - card1['name'] = card['name'] - card2["name"] = card['rules'].split( - '\n\n')[1].strip().split(' {')[0] - card1['rules'] = card['rules'].split('\n\n')[0].strip() - card2["rules"] = "Aftermath" + card['rules'].split('Aftermath')[1] - card2['cost'] = re.findall( - r'{.*}', card['rules'])[0].replace('{', '').replace('}', '').upper() - card2['type'] = re.findall( - r'}\n.*\n', card['rules'])[0].replace('}', '').replace('\n', '') - if 'setnumber' in card: - card1['setnumber'] = card['setnumber'] + 'a' - card2['setnumber'] = card['setnumber'] + 'b' - if 'rarity' in card: - card2['rarity'] = card['rarity'] - card1['layout'] = 'aftermath' - card2['layout'] = 'aftermath' - card1['names'] = [card1['name'], card2['name']] - card2['names'] = [card1['name'], card2['name']] - cards2.append(card1) - cards2.append(card2) - else: - cards2.append(card) - cards = cards2 - - for card in cards: - card['name'] = card['name'].replace(''', '\'') - card['rules'] = card['rules'].replace(''', '\'') \ - .replace('<i>', '') \ - .replace('</i>', '') \ - .replace('"', '"') \ - .replace('blkocking', 'blocking')\ - .replace('&bull;', u'•')\ - .replace('•', u'•')\ - .replace('comes into the', 'enters the')\ - .replace('threeor', 'three or')\ - .replace('[i]', '')\ - .replace('[/i]', '')\ - .replace('Lawlwss', 'Lawless')\ - .replace('Costner', "Counter") - card['type'] = card['type'].replace(' ', ' ')\ - .replace('Crature', 'Creature') - - if card['type'][-1] == ' ': - card['type'] = card['type'][:-1] - - if 'cost' in card and len(card['cost']) > 0: - workingCMC = 0 - stripCost = card['cost'].replace('{', '').replace('}', '') - for manaSymbol in stripCost: - if manaSymbol.isdigit(): - workingCMC += int(manaSymbol) - elif not manaSymbol == 'X': - workingCMC += 1 - card['cmc'] = workingCMC - - for c in 'WUBRG': # figure out card's color - if c not in card['colorIdentity']: - if c in card['cost']: - card['color'] += c - card['colorIdentity'] += c - if (c + '}') in card['rules'] or (str.lower(c) + '}') in card['rules']: - if not (c in card['colorIdentity']): - card['colorIdentity'] += c - - cleanedcards = [] - for card in cards: # let's remove any cards that are named in delete_cards array - if not card['name'] in delete_cards: - cleanedcards.append(card) - cards = cleanedcards - - cardarray = [] - for card in cards: - dupe = False - for dupecheck in cardarray: - if dupecheck['name'] == card['name']: - dupe = True - if dupe == True: - continue - for cid in card['colorIdentity']: - card['colorIdentityArray'].append(cid) - if 'W' in card['color']: - card['colorArray'].append('White') - if 'U' in card['color']: - card['colorArray'].append('Blue') - if 'B' in card['color']: - card['colorArray'].append('Black') - if 'R' in card['color']: - card['colorArray'].append('Red') - if 'G' in card['color']: - card['colorArray'].append('Green') - cardpower = '' - cardtoughness = '' - if len(card['pow'].split('/')) > 1: - cardpower = card['pow'].split('/')[0] - cardtoughness = card['pow'].split('/')[1] - cardnames = [] - cardnumber = card['setnumber'].lstrip('0') - if card['name'] in related_cards: - cardnames.append(card['name']) - cardnames.append(related_cards[card['name']]) - cardnumber += 'a' - card['layout'] = 'double-faced' - for namematch in related_cards: - if card['name'] == related_cards[namematch]: - card['layout'] = 'double-faced' - cardnames.append(namematch) - if not card['name'] in cardnames: - cardnames.append(card['name']) - cardnumber += 'b' - cardnames = [] - - if 'number' in card: - if 'b' in card['number'] or 'a' in card['number']: - if not 'layout' in card: - print (card['name'] + " has a a/b number but no 'layout'") - card['type'] = card['type'].replace('instant', 'Instant').replace( - 'sorcery', 'Sorcery').replace('creature', 'Creature') - if '-' in card['type']: - subtype = card['type'].split(' - ')[1].strip() - else: - subtype = False - if subtype: - subtypes = subtype.split(' ') - else: - subtypes = False - if card['cmc'] == '': - card['cmc'] = 0 - cardjson = {} - #cardjson["id"] = hashlib.sha1(code + card['name'] + str(card['name']).lower()).hexdigest() - cardjson["cmc"] = card['cmc'] - cardjson["manaCost"] = card['cost'] - cardjson["name"] = card['name'] - cardjson["number"] = cardnumber - # not sure if mtgjson has a list of acceptable rarities, but my application does - # so we'll warn me but continue to write a non-standard rarity (timeshifted?) - # may force 'special' in the future - if card['rarity'] not in ['Mythic Rare', 'Rare', 'Uncommon', 'Common', 'Special', 'Basic Land']: - #errors.append({"name": card['name'], "key": "rarity", "value": card['rarity']}) - print (card['name'] + ' has rarity = ' + card['rarity']) - if subtypes: - cardjson['subtypes'] = subtypes - cardjson["rarity"] = card['rarity'] - cardjson["text"] = card['rules'].replace(". ",". ") - cardjson["type"] = card['type'] - - workingtypes = card['type'] - if ' - ' in workingtypes: - workingtypes = card['type'].split(' - ')[0] - cardjson['types'] = workingtypes.replace('Legendary ', '').replace('Snow ', '')\ - .replace('Elite ', '').replace('Basic ', '').replace('World ', '').replace('Ongoing ', '')\ - .strip().split(' ') - cardjson["url"] = card['img'] - - # optional fields - if len(card['colorIdentityArray']) > 0: - cardjson["colorIdentity"] = card['colorIdentityArray'] - if len(card['colorArray']) > 0: - cardjson["colors"] = card['colorArray'] - if len(cardnames) > 1: - cardjson["names"] = cardnames - if 'names' in card: - cardjson['names'] = card['names'] - if cardpower or cardpower == '0': - cardjson["power"] = cardpower - cardjson["toughness"] = cardtoughness - if card.has_key('loyalty'): - cardjson["loyalty"] = card['loyalty'] - if card.has_key('layout'): - cardjson["layout"] = card['layout'] - - cardarray.append(cardjson) - - return {"cards": cardarray} - - -def scrape_mtgs_images(url='http://www.mtgsalvation.com/spoilers/183-hour-of-devastation', mtgscardurl='http://www.mtgsalvation.com/cards/hour-of-devastation/', exemptlist=[]): - page = requests.get(url) - tree = html.fromstring(page.content) - cards = {} - cardstree = tree.xpath('//*[contains(@class, "log-card")]') - for child in cardstree: - if child.text in exemptlist: - continue - childurl = mtgscardurl + child.attrib['data-card-id'] + '-' + child.text.replace( - ' ', '-').replace("'", "").replace(',', '').replace('-//', '') - cardpage = requests.get(childurl) - tree = html.fromstring(cardpage.content) - cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') - try: - cardurl = cardtree[0].attrib['src'] - except: - cardurl = '' - pass - cards[child.text] = { - "url": cardurl - } - time.sleep(.2) - return cards - - -def list_mtgs_gallery(url=''): - if url == '': - return '' - page = requests.get(url) - tree = html.fromstring(page.content) - cards = [] - cardstree = tree.xpath('//*[contains(@class, "log-card")]') - for child in cardstree: - cards.append(child.text) - return cards diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..4f09ee6b --- /dev/null +++ b/mypy.ini @@ -0,0 +1,18 @@ +[mypy] +python_version = 3.7 + +check_untyped_defs = True +disallow_untyped_calls = True +disallow_untyped_defs = True +disallow_subclassing_any = True +follow_imports = normal +incremental = True +ignore_missing_imports = True +strict_optional = True +warn_no_return = True +warn_redundant_casts = True +warn_return_any = True +warn_unused_ignores = True + +[mypy-pkg/generated_code/*] +ignore_errors = True diff --git a/mythic_scraper.py b/mythic_scraper.py deleted file mode 100644 index 82cc4587..00000000 --- a/mythic_scraper.py +++ /dev/null @@ -1,87 +0,0 @@ -# -*- coding: utf-8 -*- -import requests -import time -from bs4 import BeautifulSoup as BS -from bs4 import Comment - - -# mtgjson is optional, will ignore cards found if passed -def get_mythic_cards(url='http://mythicspoiler.com/ixa/', mtgjson=False): - cards = {'cards': []} - r = requests.get(url) - soup = BS(r.text, "html.parser") - cardurls = soup.find_all('a', 'card') - urllist = [] - for cardurl in cardurls: - try: - urllist.append(url + str(cardurl).split("href=\"") - [1].split('"> 0: - for x in range(0, len(full_cost)): - individual_costs.append('{' + str(full_cost[x]).upper() + '}') - return ''.join(individual_costs) - - -def error_check(mtgjson, card_corrections={}): - errors = [] - for card in mtgjson['cards']: - for key in card: - if key == "": - errors.append({"name": card['name'], "key": key, "value": ""}) - requiredKeys = ['name', 'type', 'types'] - for requiredKey in requiredKeys: - if not requiredKey in card: - errors.append( - {"name": card['name'], "key": key, "missing": True}) - if 'text' in card: - card['text'] = card['text'].replace('', '').replace( - '', '').replace('', '').replace(' 0: - if not 'manaCost' in card: - errors.append( - {"name": card['name'], "key": "manaCost", "value": "", "match": card['cmc']}) - else: - if 'manaCost' in card: - errors.append( - {"name": card['name'], "key": "manaCost", "oldvalue": card['manaCost'], "fixed": True}) - del card["manaCost"] - if 'colors' in card: - if not 'colorIdentity' in card: - if 'text' in card: - if not 'devoid' in card['text'].lower(): - errors.append( - {"name": card['name'], "key": "colorIdentity", "value": ""}) - else: - errors.append( - {"name": card['name'], "key": "colorIdentity", "value": ""}) - if 'colorIdentity' in card: - if not 'colors' in card: - # this one will false positive on emerge cards - if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: - if 'text' in card: - if not 'emerge' in card['text'].lower() and not 'devoid' in card['text'].lower(): - errors.append( - {"name": card['name'], "key": "colors", "value": ""}) - else: - errors.append( - {"name": card['name'], "key": "colors", "value": ""}) - # if not 'Land' in card['type'] and not 'Artifact' in card['type'] and not 'Eldrazi' in card['type']: - # errors.append({"name": card['name'], "key": "colors", "value": ""}) - if not 'url' in card: - errors.append({"name": card['name'], "key": "url", "value": ""}) - elif len(card['url']) < 10: - errors.append({"name": card['name'], "key": "url", "value": ""}) - if not 'number' in card: - errors.append({"name": card['name'], "key": "number", "value": ""}) - if not 'types' in card: - errors.append({"name": card['name'], "key": "types", "value": ""}) - else: - for type in card['types']: - if type not in ['Creature', 'Artifact', 'Conspiracy', 'Enchantment', 'Instant', 'Land', 'Phenomenon', 'Plane', 'Planeswalker', 'Scheme', - 'Sorcery', 'Tribal', 'Vanguard']: - errors.append({"name": card['name'], "key": "types", "value":card['types']}) - - # we're going to loop through again and make sure split cards get paired - for card in mtgjson['cards']: - if 'layout' in card: - if card['layout'] == 'split' or card['layout'] == 'meld' or card['layout'] == 'aftermath': - if not 'names' in card: - errors.append( - {"name": card['name'], "key": "names", "value": ""}) - else: - for related_card_name in card['names']: - if related_card_name != card['name']: - related_card = False - for card2 in mtgjson['cards']: - if card2['name'] == related_card_name: - related_card = card2 - if not related_card: - errors.append( - {"name": card['name'], "key": "names", "value": card['names']}) - else: - if 'colors' in related_card: - for color in related_card['colors']: - if not 'colors' in card: - card['colors'] = [color] - elif not color in card['colors']: - card['colors'].append(color) - if 'colorIdentity' in related_card: - for colorIdentity in related_card['colorIdentity']: - if not 'colorIdentity' in card: - card['colorIdentity'] = [ - colorIdentity] - elif not colorIdentity in card['colorIdentity']: - card['colorIdentity'].append( - colorIdentity) - if 'number' in card: - if not 'a' in card['number'] and not 'b' in card['number'] and not 'c' in card['number']: - errors.append( - {"name": card['name'], "key": "number", "value": card['number']}) - - for card in mtgjson['cards']: - for cardCorrection in card_corrections: - if card['name'] == cardCorrection: - for correctionType in card_corrections[cardCorrection]: - # if not correctionType in card and correctionType not in : - # sys.exit("Invalid correction for " + cardCorrection + " of type " + card) - if correctionType == 'number': - card_corrections[cardCorrection]['number'] = str(card_corrections[cardCorrection]['number']) - if not correctionType == 'name': - if correctionType == 'img': - card['url'] = card_corrections[cardCorrection][correctionType] - else: - card[correctionType] = card_corrections[cardCorrection][correctionType] - if 'name' in card_corrections[cardCorrection]: - card['name'] = card_corrections[cardCorrection]['name'] - - return [mtgjson, errors] - - -def remove_corrected_errors(errorlog=[], card_corrections=[], print_fixed=False): - errorlog2 = {} - for error in errorlog: - if not print_fixed: - if 'fixed' in error and error['fixed'] == True: - continue - removeError = False - for correction in card_corrections: - for correction_type in card_corrections[correction]: - if error['name'] == correction: - if error['key'] == correction_type: - removeError = True - if not removeError: - if not error['name'] in errorlog2: - errorlog2[error['name']] = {} - if not 'value' in error: - error['value'] = "" - errorlog2[error['name']][error['key']] = error['value'] - return errorlog2 - - -def get_image_urls(mtgjson, isfullspoil, setinfo=False): - if not 'mythicCode' in setinfo: - setinfo['mythicCode'] = setinfo['code'] - IMAGES = 'https://magic.wizards.com/en/products/' + \ - setinfo['name'].lower().replace(' ', '-') + '/cards' - IMAGES2 = 'http://mythicspoiler.com/newspoilers.html' - IMAGES3 = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + \ - setinfo['name'].lower().replace('of', '').replace(' ', ' ').replace(' ', '-') - - text = requests.get(IMAGES).text - text2 = requests.get(IMAGES2).text - text3 = requests.get(IMAGES3).text - wotcpattern = r'{}.*?' - WOTC = [] - for c in mtgjson['cards']: - if 'names' in c: - cardname = ' // '.join(c['names']) - else: - cardname = c['name'] - match = re.search(wotcpattern.format( - cardname.replace('\'', '’')), text, re.DOTALL) - if match: - c['url'] = match.groupdict()['img'] - else: - match3 = re.search(wotcpattern2.format( - cardname.replace('\'', '’')), text3) - if match3: - c['url'] = match3.groupdict()['img'] - else: - match4 = re.search(wotcpattern.format( - cardname.replace('\'', '’')), text3, re.DOTALL) - if match4: - c['url'] = match4.groupdict()['img'] - else: - match2 = re.search(mythicspoilerpattern.format(cardname.lower().replace(' // ', '').replace( - ' ', '').replace(''', '').replace('-', '').replace('\'', '').replace(',', '')), text2, re.DOTALL) - if match2 and not isfullspoil: - c['url'] = match2.group(0).replace( - ' src="', 'http://mythicspoiler.com/').replace('">', '') - pass - if 'wizards.com' in c['url']: - WOTC.append(c['name']) - if setinfo: - if 'mtgsurl' in setinfo and 'mtgscardpath' in setinfo: - mtgsImages = mtgs_scraper.scrape_mtgs_images( - setinfo['mtgsurl'], setinfo['mtgscardpath'], WOTC) - for card in mtgjson['cards']: - if card['name'] in mtgsImages: - if mtgsImages[card['name']]['url'] != '': - card['url'] = mtgsImages[card['name']]['url'] - - #for card in mtgjson['cards']: - # if len(str(card['url'])) < 10: - # print(card['name'] + ' has no image.') - return mtgjson - - -def write_xml(mtgjson, code, name, releaseDate): - if not 'cards' in mtgjson or not mtgjson['cards'] or mtgjson['cards'] == []: - return - if not os.path.isdir('out/'): - os.makedirs('out/') - cardsxml = open('out/' + code + '.xml', 'w+') - cardsxml.truncate() - count = 0 - dfccount = 0 - newest = '' - related = 0 - cardsxml.write("\n" - "\n" - "\n\n" - + code + - "\n" - "" - + name + - "\n" - "Expansion\n" - "" - + releaseDate + - "\n" - "\n" - "\n" - "\n") - # print (mtgjson) - for card in mtgjson["cards"]: - if 'names' in card: - if 'layout' in card and card['layout'] != 'double-faced': - if card["name"] == card['names'][1]: - continue - if count == 0: - newest = card["name"] - count += 1 - name = card["name"] - if "manaCost" in card.keys(): - manacost = card["manaCost"].replace('{', '').replace('}', '') - else: - manacost = "" - if "power" in card.keys() or "toughness" in card.keys(): - if card["power"]: - pt = str(card["power"]) + "/" + str(card["toughness"]) - else: - pt = 0 - else: - pt = 0 - if "text" in card.keys(): - text = card["text"] - else: - text = "" - cardcmc = str(card['cmc']) - cardtype = card["type"] - if "names" in card.keys(): - if "layout" in card: - if card['layout'] == 'split' or card['layout'] == 'aftermath': - if 'names' in card: - if card['name'] == card['names'][0]: - for jsoncard in mtgjson["cards"]: - if jsoncard['name'] == card['names'][1]: - cardtype += " // " + jsoncard["type"] - newmanacost = "" - if 'manaCost' in jsoncard: - newmanacost = jsoncard['manaCost'] - manacost += " // " + \ - newmanacost.replace( - '{', '').replace('}', '') - cardcmc += " // " + str(jsoncard["cmc"]) - text += "\n---\n" + jsoncard["text"] - name += " // " + jsoncard['name'] - elif card['layout'] == 'double-faced': - if not 'names' in card: - print (card['name'] + ' is double-faced but no "names" key') - else: - for dfcname in card['names']: - if dfcname != card['name']: - related = dfcname - else: - print (card["name"] + " has names, but layout != split, aftermath, or double-faced") - else: - print (card["name"] + " has multiple names and no 'layout' key") - - tablerow = "1" - if "Land" in cardtype: - tablerow = "0" - elif "Sorcery" in cardtype: - tablerow = "3" - elif "Instant" in cardtype: - tablerow = "3" - elif "Creature" in cardtype: - tablerow = "2" - - if 'number' in card: - if 'b' in str(card['number']): - if 'layout' in card: - if card['layout'] == 'split' or card['layout'] == 'aftermath': - # print ("We're skipping " + card['name'] + " because it's the right side of a split card") - continue - - cardsxml.write("\n") - cardsxml.write("" + name + "\n") - cardsxml.write( - '' + code + '\n') - cardsxml.write( - "" + manacost + "\n") - cardsxml.write("" + cardcmc + "\n") - if 'colors' in card.keys(): - colorTranslate = { - "White": "W", - "Blue": "U", - "Black": "B", - "Red": "R", - "Green": "G" - } - for color in card['colors']: - cardsxml.write( - '' + colorTranslate[color] + '\n') - if name + ' enters the battlefield tapped' in text: - cardsxml.write("1\n") - cardsxml.write("" + cardtype + "\n") - if pt: - cardsxml.write("" + pt + "\n") - if 'loyalty' in card.keys(): - cardsxml.write("" + str(card['loyalty']) + "\n") - cardsxml.write("" + tablerow + "\n") - cardsxml.write("" + text + "\n") - if related: - # for relatedname in related: - cardsxml.write( - "" + related + "\n") - related = '' - - cardsxml.write("\n") - - cardsxml.write("\n") - - if count > 0: - print ('XML Stats for ' + code) - print ('Total cards: ' + str(count)) - if dfccount > 0: - print ('DFC: ' + str(dfccount)) - print ('Newest: ' + str(newest)) - else: - print ('Set ' + code + ' has no spoiled cards.') - - -def write_combined_xml(mtgjson, setinfos): - if not os.path.isdir('out/'): - os.makedirs('out/') - cardsxml = open('out/spoiler.xml', 'w+') - cardsxml.truncate() - cardsxml.write("\n" - "\n") - cardsxml.write("\n") - cardsxml.write("\n") - for setcode in mtgjson: - setobj = mtgjson[setcode] - if 'cards' in setobj and len(setobj['cards']) > 0: - cardsxml.write("\n" - + setcode + - "\n" - "" - + setobj['name'] + - "\n" - "" - + setobj['type'].title() + - "\n" - "" - + setobj['releaseDate'] + - "\n" - "\n") - cardsxml.write( - "\n" - "\n") - count = 0 - dfccount = 0 - newest = '' - related = 0 - for setcode in mtgjson: - setobj = mtgjson[setcode] - for card in setobj["cards"]: - if 'layout' in card and (card['layout'] == 'split' or card['layout'] == 'aftermath'): - if 'b' in card["number"]: - continue - if count == 0: - newest = card["name"] - count += 1 - name = card["name"] - if "manaCost" in card.keys(): - manacost = card["manaCost"].replace('{', '').replace('}', '') - else: - manacost = "" - if "power" in card.keys() or "toughness" in card.keys(): - if card["power"]: - pt = str(card["power"]) + "/" + str(card["toughness"]) - else: - pt = 0 - else: - pt = 0 - if "text" in card.keys(): - text = card["text"] - else: - text = "" - cardcmc = str(card['cmc']) - cardtype = card["type"] - if "names" in card.keys(): - if "layout" in card: - if card["layout"] != 'split' and card["layout"] != 'aftermath': - if len(card["names"]) > 1: - if card["names"][0] == card["name"]: - related = card["names"][1] - text += '\n\n(Related: ' + \ - card["names"][1] + ')' - dfccount += 1 - elif card['names'][1] == card['name']: - related = card["names"][0] - text += '\n\n(Related: ' + \ - card["names"][0] + ')' - else: - for cardb in setobj['cards']: - if cardb['name'] == card["names"][1]: - cardtype += " // " + cardb['type'] - manacost += " // " + \ - (cardb["manaCost"]).replace( - '{', '').replace('}', '') - cardcmc += " // " + str(cardb["cmc"]) - text += "\n---\n" + cardb["text"] - name += " // " + cardb['name'] - else: - print (card["name"] + " has multiple names and no 'layout' key") - - tablerow = "1" - if "Land" in cardtype: - tablerow = "0" - elif "Sorcery" in cardtype: - tablerow = "3" - elif "Instant" in cardtype: - tablerow = "3" - elif "Creature" in cardtype: - tablerow = "2" - - if 'number' in card: - if 'b' in card['number']: - if 'layout' in card: - if card['layout'] == 'split' or card['layout'] == 'aftermath': - # print ("We're skipping " + card['name'] + " because it's the right side of a split card") - continue - - cardsxml.write("\n") - cardsxml.write("" + name + "\n") - cardsxml.write( - '' + setcode + '\n') - if 'colors' in card.keys(): - colorTranslate = { - "White": "W", - "Blue": "U", - "Black": "B", - "Red": "R", - "Green": "G" - } - for color in card['colors']: - cardsxml.write( - '' + colorTranslate[color] + '\n') - if related: - # for relatedname in related: - cardsxml.write( - "" + related + "\n") - related = '' - cardsxml.write( - "" + manacost + "\n") - cardsxml.write("" + cardcmc + "\n") - cardsxml.write("" + cardtype + "\n") - if pt: - cardsxml.write("" + pt + "\n") - cardsxml.write("" + tablerow + "\n") - cardsxml.write("" + text + "\n") - if name + ' enters the battlefield tapped' in text: - cardsxml.write("1\n") - if 'loyalty' in card.keys(): - cardsxml.write( - "" + str(card['loyalty']) + "\n") - cardsxml.write("\n") - - cardsxml.write("\n") - - print ('XML COMBINED STATS') - print ('Total cards: ' + str(count)) - if dfccount > 0: - print ('DFC: ' + str(dfccount)) - print ('Newest: ' + str(newest)) - - -def pretty_xml(infile): - # or xml.dom.minidom.parseString(xml_string) - prettyxml = xml.dom.minidom.parse(infile) - pretty_xml_as_string = prettyxml.toprettyxml(newl='') - return pretty_xml_as_string - - -def make_allsets(AllSets, mtgjson, code): - AllSets[code] = mtgjson - return AllSets - - -def scrape_masterpieces(url='http://www.mtgsalvation.com/spoilers/181-amonkhet-invocations', mtgscardurl='http://www.mtgsalvation.com/cards/amonkhet-invocations/'): - page = requests.get(url) - tree = html.fromstring(page.content) - cards = [] - cardstree = tree.xpath('//*[contains(@class, "log-card")]') - for child in cardstree: - childurl = mtgscardurl + \ - child.attrib['data-card-id'] + '-' + child.text.replace(' ', '-') - cardpage = requests.get(childurl) - tree = html.fromstring(cardpage.content) - cardtree = tree.xpath('//img[contains(@class, "card-spoiler-image")]') - try: - cardurl = cardtree[0].attrib['src'] - except: - cardurl = '' - pass - card = { - "name": child.text, - "url": cardurl - } - cards.append(card) - return cards - - -def make_masterpieces(headers, AllSets, spoil): - masterpieces = scrape_masterpieces( - headers['mtgsurl'], headers['mtgscardpath']) - masterpieces2 = [] - for masterpiece in masterpieces: - matched = False - if headers['code'] in AllSets: - for oldMasterpiece in AllSets[headers['code']]['cards']: - if masterpiece['name'] == oldMasterpiece['name']: - matched = True - for set in AllSets: - if not matched: - for oldcard in AllSets[set]['cards']: - if oldcard['name'] == masterpiece['name'] and not matched: - mixcard = oldcard - mixcard['url'] = masterpiece['url'] - mixcard['rarity'] = 'Mythic Rare' - masterpieces2.append(mixcard) - matched = True - break - for spoilcard in spoil['cards']: - if not matched: - if spoilcard['name'] == masterpiece['name']: - mixcard = spoilcard - mixcard['rarity'] = 'Mythic Rare' - mixcard['url'] = masterpiece['url'] - masterpieces2.append(mixcard) - matched = True - break - if not matched: - print ("We couldn't find a card object to assign the data to for masterpiece " + masterpiece['name']) - masterpieces2.append(masterpiece) - mpsjson = { - "name": headers['name'], - "alternativeNames": headers['alternativeNames'], - "code": headers['code'], - "releaseDate": headers['releaseDate'], - "border": "black", - "type": "masterpiece", - "cards": masterpieces2 - } - return mpsjson - - -def set_has_cards(setinfo, manual_cards, mtgjson): - if setinfo['code'] in manual_cards or setinfo['code'] in mtgjson: - return True - for card in manual_cards['cards']: - if set in card: - if set == setinfo['code']: - return True - -def download_file(url): - local_filename = url.split('/')[-1] - headers = {'user-agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko / 20071127 Firefox / 2.0.0.11'} - r = requests.get(url, stream=True, headers=headers) - with open(local_filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - return local_filename - -def get_allsets(): - file_location = download_file('https://mtgjson.com/json/AllSets.json.xz') - AllSets = json.loads(lzma.open(file_location).read()) - return AllSets - - -def add_headers(mtgjson, setinfos): - mtgjson2 = { - "border": "black", - "code": setinfos['code'], - "name": setinfos['name'], - "releaseDate": setinfos['releaseDate'], - "type": setinfos['type'], - "cards": mtgjson['cards'] - } - if not 'noBooster' in setinfos: - mtgjson2['booster'] = [ - [ - "rare", - "mythic rare" - ], - "uncommon", - "uncommon", - "uncommon", - "common", - "common", - "common", - "common", - "common", - "common", - "common", - "common", - "common", - "common", - "land", - "marketing" - ], - if 'block' in setinfos: - mtgjson2['block'] = setinfos['block'] - return mtgjson2 diff --git a/tox.ini b/tox.ini new file mode 100644 index 00000000..7a627e84 --- /dev/null +++ b/tox.ini @@ -0,0 +1,56 @@ +[tox] +envlist = isort-inplace, black-inplace, mypy, lint + +[testenv] +basepython = python3.7 +deps = -r{toxinidir}/requirements.txt + -r{toxinidir}/requirements_test.txt +setenv = PYTHONPATH = {toxinidir} +passenv = PYTHONPATH = {toxinidir} + +[testenv:black-inplace] +description = Run black and edit all files in place +skip_install = True +deps = black +commands = black magic_spoiler/ + +# Active Tests +[testenv:yapf-inplace] +description = Run yapf and edit all files in place +skip_install = True +deps = yapf +commands = yapf --in-place --recursive --parallel magic_spoiler/ + +[testenv:mypy] +description = mypy static type checking only +deps = mypy +commands = mypy {posargs:magic_spoiler/} + +[testenv:lint] +description = Run linting tools +deps = pylint +commands = pylint magic_spoiler/ --rcfile=.pylintrc + +# Inactive Tests +[testenv:yapf-check] +description = Dry-run yapf to see if reformatting is needed +skip_install = True +deps = yapf +# TODO make it error exit if there's a diff +commands = yapf --diff --recursive --parallel magic_spoiler/ + +[testenv:isort-check] +description = dry-run isort to see if imports need resorting +deps = isort +commands = isort --check-only + +[testenv:isort-inplace] +description = Sort imports +deps = isort +commands = isort -rc magic_spoiler/ + +[testenv:unit] +description = Run unit tests with coverage and mypy type checking +extras = dev +deps = pytest +commands = pytest --cov=magic_spoiler {posargs:tests/} diff --git a/verify_files.py b/verify_files.py deleted file mode 100644 index fff97227..00000000 --- a/verify_files.py +++ /dev/null @@ -1,24 +0,0 @@ -import yaml -import sys - -def load_file(input_file, lib_to_use): - try: - with open(input_file) as data_file: - if lib_to_use == 'yaml': - output_file = yaml.safe_load(data_file) - elif lib_to_use == 'yaml_multi': - output_file = [] - for doc in yaml.safe_load_all(data_file): - output_file.append(doc) - return output_file - except Exception as ex: - print ("Unable to load file: " + input_file + "\nException information:\n" + str(ex.args)) - sys.exit("Unable to load file: " + input_file) - -if __name__ == '__main__': - setinfos = load_file('set_info.yml','yaml_multi') - manual_sets = load_file('cards_manual.yml','yaml') - card_corrections = load_file('cards_corrections.yml','yaml') - delete_cards = load_file('cards_delete.yml','yaml') - - print ("Pre-flight: All input files loaded successfully.") diff --git a/wizards_scraper.py b/wizards_scraper.py deleted file mode 100644 index eaa0389d..00000000 --- a/wizards_scraper.py +++ /dev/null @@ -1,269 +0,0 @@ -# -*- coding: utf-8 -*- -import requests -from lxml import html -from PIL import Image -import os - - -def scrape_fullspoil(url="http://magic.wizards.com/en/articles/archive/card-image-gallery/hour-devastation", setinfo={"code": "HOU"}, showRarityColors=False, showFrameColors=False, manual_cards=[], delete_cards=[]): - if 'name' in setinfo: - url = 'http://magic.wizards.com/en/articles/archive/card-image-gallery/' + setinfo['name'].lower().replace('of', '').replace( - ' ', ' ').replace(' ', '-') - page = requests.get(url) - tree = html.fromstring(page.content) - cards = [] - cardtree = tree.xpath('//*[@id="content-detail-page-of-an-article"]') - for child in cardtree: - cardElements = child.xpath('//*/p/img') - cardcount = 0 - for cardElement in cardElements: - card = { - "name": cardElement.attrib['alt'].replace(u"\u2019", '\'').split(' /// ')[0], - "img": cardElement.attrib['src'] - } - card["url"] = card["img"] - #card["cmc"] = 0 - #card["manaCost"] = "" - #card["type"] = "" - #card["types"] = [] - #card["text"] = "" - #card["colorIdentity"] = [""] - - # if card['name'] in split_cards: - # card["names"] = [card['name'], split_cards[card['name']]] - # card["layout"] = "split" - #notSplit = True - # for backsplit in split_cards: - # if card['name'] == split_cards[backsplit]: - # notSplit = False - # if not card['name'] in delete_cards: - cards.append(card) - cardcount += 1 - fullspoil = {"cards": cards} - print ("Spoil Gallery has " + str(cardcount) + " cards.") - download_images(fullspoil['cards'], setinfo['code']) - fullspoil = get_rarities_by_symbol(fullspoil, setinfo['code']) - fullspoil = get_mana_symbols(fullspoil, setinfo['code']) - #fullspoil = get_colors_by_frame(fullspoil, setinfo['code']) - return fullspoil - - -def get_rarities_by_symbol(fullspoil, setcode): - symbolPixels = (240, 219, 242, 221) - highVariance = 15 - colorAverages = { - "Common": [30, 27, 28], - "Uncommon": [121, 155, 169], - "Rare": [166, 143, 80], - "Mythic Rare": [201, 85, 14] - } - symbolCount = 0 - for card in fullspoil['cards']: - try: - cardImage = Image.open( - 'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg') - except: - continue - pass - if '//' in card['name']: - setSymbol = cardImage.crop((240, 138, 242, 140)) - else: - setSymbol = cardImage.crop(symbolPixels) - cardHistogram = setSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + \ - abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + \ - abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + \ - abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - card['rarity'] = color - if variance > highVariance: - # if a card isn't close to any of the colors, it's probably a planeswalker? make it mythic. - print (card['name'], 'has high variance of', variance, ', closest rarity is', card['rarity']) - card['rarity'] = "Mythic Rare" - # print (card['name'], '$', reds, greens, blues) - if symbolCount < 10: - setSymbol.save( - 'images/' + card['name'].replace(' // ', '') + '.symbol.jpg') - symbolCount += 1 - return fullspoil - - -def get_colors_by_frame(fullspoil, setcode): - framePixels = (20, 11, 76, 16) - highVariance = 10 - colorAverages = { - "White": [231, 225, 200], - "Blue": [103, 193, 230], - "Black": [58, 61, 54], - "Red": [221, 122, 101], - "Green": [118, 165, 131], - "Multicolor": [219, 200, 138], - "Artifact": [141, 165, 173], - "Colorless": [216, 197, 176], - } - symbolCount = 0 - for card in fullspoil['cards']: - try: - cardImage = Image.open( - 'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg') - except: - continue - pass - cardColor = cardImage.crop(framePixels) - - cardHistogram = cardColor.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + \ - abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + \ - abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + \ - abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - card['colors'] = [color] - return fullspoil - - -def get_mana_symbols(fullspoil={}, setcode="HOU"): - manaBoxes = [(234, 23, 244, 33), (220, 23, 230, 33), - (206, 23, 216, 33), (192, 23, 202, 33), (178, 23, 188, 33)] - highVariance = 0 - colorAverages = { - "W": [126, 123, 110], - "U": [115, 140, 151], - "B": [105, 99, 98], - "R": [120, 89, 77], - "G": [65, 78, 69], - "1": [162, 156, 154], - "2": [155, 148, 147], - "3": [160, 153, 152], - "4": [149, 143, 141], - "5": [155, 149, 147], - "6": [151, 145, 143], - "7": [169, 163, 161], - "X": [160, 154, 152] - } - for card in fullspoil['cards']: - try: - cardImage = Image.open( - 'images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg') - except: - continue - pass - card['manaCost'] = "" - for manaBox in manaBoxes: - manaSymbol = cardImage.crop(manaBox) - cardHistogram = manaSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum(i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum(i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum(i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - for color in colorAverages: - colorVariance = 0 - colorVariance = colorVariance + \ - abs(colorAverages[color][0] - reds) - colorVariance = colorVariance + \ - abs(colorAverages[color][1] - greens) - colorVariance = colorVariance + \ - abs(colorAverages[color][2] - blues) - if colorVariance < variance: - variance = colorVariance - closestColor = color - if variance < 10: - # if card['name'] in ["Mirage Mirror", "Uncage the Menagerie", "Torment of Hailfire"]: - # print (card['name'] + " " + str(reds) + " " + str(greens) + " " + str(blues)) - if closestColor in ["2", "5"]: - twoVSfive = ( - manaBox[0] + 1, manaBox[1] + 4, manaBox[2] - 5, manaBox[3] - 2) - manaSymbol = cardImage.crop(twoVSfive) - cardHistogram = manaSymbol.histogram() - reds = cardHistogram[0:256] - greens = cardHistogram[256:256 * 2] - blues = cardHistogram[256 * 2: 256 * 3] - reds = sum( - i * w for i, w in enumerate(reds)) / sum(reds) - greens = sum( - i * w for i, w in enumerate(greens)) / sum(greens) - blues = sum( - i * w for i, w in enumerate(blues)) / sum(blues) - variance = 768 - colorVariance = 0 - colorVariance = colorVariance + abs(175 - reds) - colorVariance = colorVariance + abs(168 - greens) - colorVariance = colorVariance + abs(166 - blues) - if colorVariance < 10: - closestColor = "2" - elif colorVariance > 110 and colorVariance < 120: - closestColor = "5" - else: - continue - card['manaCost'] = closestColor + card['manaCost'] - return fullspoil - - -def smash_fullspoil(mtgjson, fullspoil): - different_keys = {} - for mtgjson_card in mtgjson['cards']: - for fullspoil_card in fullspoil['cards']: - if mtgjson_card['name'] == fullspoil_card['name']: - for key in fullspoil_card: - if key in mtgjson_card: - if mtgjson_card[key] != fullspoil_card[key] and key != 'colors': - if not fullspoil_card['name'] in different_keys: - different_keys[fullspoil_card['name']] = { - key: fullspoil_card[key]} - else: - different_keys[fullspoil_card['name'] - ][key] = fullspoil_card[key] - for fullspoil_card in fullspoil['cards']: - WOTC_only = [] - match = False - for mtgjson_card in mtgjson['cards']: - if mtgjson_card['name'] == fullspoil_card['name']: - match = True - if not match: - WOTC_only.append(fullspoil_card['name']) - if len(WOTC_only) > 0: - print ("WOTC only cards: ") - print (WOTC_only) - print (different_keys) - - -def download_images(mtgjson, setcode): - if not os.path.isdir('images/' + setcode): - os.makedirs('images/' + setcode) - if 'cards' in mtgjson: - jsoncards = mtgjson['cards'] - else: - jsoncards = mtgjson - for card in jsoncards: - if card['url']: - if os.path.isfile('images/' + setcode + '/' + card['name'].replace(' // ', '') + '.jpg'): - continue - # print ('Downloading ' + card['url'] + ' to images/' + setcode + '/' + card['name'].replace(' // ','') + '.jpg') - requests.get(card['url'], 'images/' + setcode + - '/' + card['name'].replace(' // ', '') + '.jpg')