Skip to content

Commit

Permalink
scrape_docs output formats naming and improvements
Browse files Browse the repository at this point in the history
Rename "default" output format to "debug", make it slightly more useful
and make "csv" the new default output format.
  • Loading branch information
JOJ0 committed Sep 10, 2024
1 parent 8000859 commit 9c762f4
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions scrape_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

@click.command()
@click.option(
'--output', '-o', default='default',
type=click.Choice(['default', 'rst', 'csv']), show_choices=True,
'--output', '-o', default='csv',
type=click.Choice(['debug', 'rst', 'csv']), show_choices=True,
help='''Output format "default" prints human readable on shell, "csv" is a
two-column comma separated value format.''')
@click.argument('URL')
Expand All @@ -26,13 +26,13 @@ def scrape(output, url):
elements = soup.find_all([*any_heading_tag, 'a'],)

for e in elements:
if e.name in any_heading_tag and output == 'default':
if e.name in any_heading_tag and output == 'debug':
print(f'{e.name}: {e.text}')
if e.name == 'a':
if e.parent.name in any_heading_tag:
link = e['href']
if output == 'default':
print(f'{e.text} {link}')
if output == 'debug':
print(f'Element text:\t{e.text}\nLink/Anchor:\t{link}')
if output in ['rst', 'csv']:
parts = chapter.split('/admin_api/')
fulllink = f'{parts[0]}/admin_api/{parts[1]}{link}'
Expand All @@ -50,7 +50,7 @@ def scrape(output, url):
print(f'{left_col},')
elif output == 'rst':
print(rst)
if output == 'default': # Final spacing only with default format
if output == 'debug': # Final spacing only with debug format
print()

# print(soup.prettify())
Expand Down

0 comments on commit 9c762f4

Please sign in to comment.