Skip to content

Commit

Permalink
Merge pull request #3 from jnthn-b/master
Browse files Browse the repository at this point in the history
Multi-Threading!
  • Loading branch information
JoHoop authored Feb 8, 2021
2 parents 3e50c38 + 37782c2 commit ca71f31
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
.vscode/settings.json
config.ini
.DS_Store
.idea
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ to run for the studip platform of the university of bremen:
https://elearning.uni-bremen.de
But it should be possible to move this to any studip installation by changing
the urls.
This version is using Multi-Threading to download, extract & clean all
files from your courses at the same time.

## Setup
It's just a script no GUI or service files yet. To run it you need to have
Expand Down Expand Up @@ -32,3 +34,4 @@ Run the script by double click or from the command line

## Credits
[StudIP_Filename_Cleaner](https://github.com/Schlaurens/StudIP_Filename_Cleaner) courtesy of [Schlaurens](https://github.com/Schlaurens/)
Multi-Threading made by [Jonathan Bröring](https://github.com/jnthn-b)
57 changes: 35 additions & 22 deletions studip-sync.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
import requests, io, zipfile, os, configparser, re
from lxml import html
import configparser
import io
import os
import re
import requests
import zipfile
from bs4 import BeautifulSoup
import threading

login_url = 'https://elearning.uni-bremen.de/index.php?again=yes'
course_url = 'https://elearning.uni-bremen.de/dispatch.php/course/files?cid='
newest_url = 'https://elearning.uni-bremen.de/dispatch.php/course/files/newest_files?cid='

cfg = configparser.ConfigParser()
script_path = os.path.dirname(os.path.abspath(__file__))
cfg.read(os.path.join(script_path,'config.ini'))
cfg.read(os.path.join(script_path, 'config.ini'))
try:
user = cfg.get('settings', 'user')
password = cfg.get('settings', 'password')
path = cfg.get('settings', 'data_folder')
path = cfg.get('settings', 'data_folder')
new_only = cfg.get('settings', 'new_only')
except Exception as e:
print('error parsing config file')
print(e)
print("Config Loaded!")

print('starting session')
session = requests.Session()
Expand All @@ -38,49 +44,56 @@
else:
print('login failed')

for course in cfg.items('courses'):
def getfiles(course):
file_target = os.path.join(path, course[0])
download_url = course_url + course[1]
print('finding course',course[0],'(',course[1],')')
print('finding course', course[0], '(', course[1], ')')

download_page = session.get(download_url)

parsed_content = BeautifulSoup(download_page.content, 'html.parser')
pretty_content = BeautifulSoup(parsed_content.prettify(), 'html.parser')

print('extracting post parameters')
security_token = pretty_content.find('input', attrs={'type':'hidden', 'name':'security_token'}).attrs['value']
parent_folder_id = pretty_content.find('input', attrs={'type':'hidden', 'name':'parent_folder_id'}).attrs['value']
post_url = pretty_content.find('form', attrs={'method': 'post', 'action':re.compile('^https://elearning.uni-bremen.de/dispatch.php/file/bulk/')}).attrs['action']
checkboxes = pretty_content.find_all('input', attrs={'class':'studip-checkbox', 'type':'checkbox', 'name':'ids[]', 'id':re.compile('^file_checkbox_')})
ids = list(map(lambda c : c.attrs['value'], checkboxes))
print(course[0], ': extracting post parameters')
security_token = pretty_content.find('input', attrs={'type': 'hidden', 'name': 'security_token'}).attrs['value']
parent_folder_id = pretty_content.find('input', attrs={'type': 'hidden', 'name': 'parent_folder_id'}).attrs['value']
post_url = pretty_content.find('form', attrs={'method': 'post', 'action': re.compile(
'^https://elearning.uni-bremen.de/dispatch.php/file/bulk/')}).attrs['action']
checkboxes = pretty_content.find_all('input',
attrs={'class': 'studip-checkbox', 'type': 'checkbox', 'name': 'ids[]',
'id': re.compile('^file_checkbox_')})
ids = list(map(lambda c: c.attrs['value'], checkboxes))

token = {}
token['security_token'] = security_token
token['parent_folder_id'] = parent_folder_id
token['ids[]'] = ids
token['download'] = ''

if(new_only == 'yes'):
if (new_only == 'yes'):
post_url = newest_url + course[1]
token = {}

print('requesting download')
print(course[0], ': requesting download')
r = session.post(post_url, data=token)
if(r.status_code != 200):
print('request failed')
if (r.status_code != 200):
print(course[0], ': request failed')

z = zipfile.ZipFile(io.BytesIO(r.content))
print('extracting zip')
print(course[0], ': extracting zip')
z.extractall(file_target)

for root, dirs, files in os.walk(file_target):
for file in files:
for file in files:
if file == "archive_filelist.csv":
os.remove(os.path.join(root, file))
continue

filename = os.path.join(root,file)
filename = os.path.join(root, file)
os.replace(filename, os.path.join(root, re.sub('^[+[0-9]+]_', '', file)))

print("cleaned files")

print(course[0], ': cleaned files')

for course in cfg.items('courses'):
threading.Thread(target=getfiles, args=(course,)).start()

0 comments on commit ca71f31

Please sign in to comment.