Skip to content

Commit

Permalink
scrape_docs refactor heading tag lists definitions
Browse files Browse the repository at this point in the history
  • Loading branch information
JOJ0 committed Sep 10, 2024
1 parent ff8e381 commit 5a853cd
Showing 1 changed file with 6 additions and 4 deletions.
10 changes: 6 additions & 4 deletions scrape_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,18 @@ def scrape(output, url):
apidoc = requests.get(chapter).text
soup = BeautifulSoup(apidoc, 'html.parser')

any_heading_tag = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']

elements = soup.find_all(
['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'a'],
[*any_heading_tag, 'a'],
)

for e in elements:
if e.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
if output in ['default', 'rst']:
if e.name in any_heading_tag:
if output in ['default']:
print(f'{e.name}: {e.text}')
if e.name == 'a':
if e.parent.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
if e.parent.name in any_heading_tag:
link = e['href']
if output == 'default':
print(f'{e.text} {link}')
Expand Down

0 comments on commit 5a853cd

Please sign in to comment.