Skip to content

Commit

Permalink
Release slybot 0.13.3
Browse files Browse the repository at this point in the history
Update slybot to work with python 3.7 and Scrapy 1.6
  • Loading branch information
ruairif committed Jun 18, 2019
1 parent 11e10d5 commit c6f22ae
Show file tree
Hide file tree
Showing 19 changed files with 94 additions and 76 deletions.
7 changes: 3 additions & 4 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,11 @@ script:
- echo "Portia is at:"`git show -s --pretty=%d HEAD`
- git restore-mtime
- shopt -s extglob
- nvm install 6.10.0
- nvm use 6.10.0
- nvm install 10.16.0
- nvm use 10.16.0
- sudo mkdir -p ~/.npm ~/.node-gyp ~/.cache
- sudo chown -R ubuntu ~/.npm ~/.node-gyp ~/.cache
- npm install -g bower ember-cli --cache-min 999999
- ember version
- npm install -g bower [email protected] --cache-min 999999
- docker/compile-assets.sh
- build_docker_image
- publish_to_dockerhub
Expand Down
47 changes: 17 additions & 30 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
language: python
python: 3.5
sudo: required
dist: xenial
python: 3.7
dist: bionic
services:
- docker
env:
- WHEELHOUSE=$HOME/.cache/wheelhouse PIP_FIND_LINKS=file://$WHEELHOUSE PIP_WHEEL_DIR=$WHEELHOUSE
cache:
Expand All @@ -11,42 +12,28 @@ cache:
- portiaui/node_modules
- portiaui/bower_components

before_install:
- docker build -t scrapinghub/portia .
- docker ps -a

install:
- source ${VIRTUAL_ENV}/bin/activate
- cd docker
- sudo -H ./provision.sh install_splash
- cd ..
- sudo -H pip install tox
- python3.5 -c 'import splash, qt5reactor' # Check it's in the python path
- cd portiaui
- nvm install 6.10.0
- nvm use 6.10.0
- npm install -g bower
- docker run scrapinghub/portia /app/docker/run-tests.sh
- pushd portiaui
- nvm install 10.16.0
- nvm use 10.16.0
- npm install -g bower [email protected]
- npm install
- bower install
- cd ..
- popd

before_script:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
- source ${VIRTUAL_ENV}/bin/activate
- export PYTHONPATH=`pwd`/slybot:`pwd`/slyd
- cd slyd
- python2.7 tests/testserver/server.py 2>&1 | grep -v 'HTTP/1.1" 200' &
- cd ..
- sleep 3 # give xvfb some time to start
script:
- cd ./slybot
- sudo -E tox
- cd ../portia_server
- ./manage.py test portia_orm.tests
- ./manage.py test portia_api.tests
- cd ../portiaui
- pushd portiaui
- npm rebuild node-sass
- npm test
- popd

before_deploy:
- cd ../slybot
- cd slybot
- pip install twine
- sudo chown -R $USER .
deploy:
Expand Down
4 changes: 3 additions & 1 deletion docker/provision.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -e

if [ "x$APP_ROOT" = x ]
then
for dir in "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" /app /vagrant
for dir in "$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" /app /vagrant $(pwd)
do
if [ -d "$dir" ] && [ -d "$dir/slyd" ]
then
Expand Down Expand Up @@ -50,6 +50,8 @@ EOF
SPLASH_SIP_VERSION=${SPLASH_SIP_VERSION:-"4.19.3"}
SPLASH_PYQT_VERSION=${SPLASH_PYQT_VERSION:-"5.9"}
SPLASH_BUILD_PARALLEL_JOBS=${SPLASH_BUILD_PARALLEL_JOBS:-"2"}
QT_MIRROR=${QT_MIRROR:-"http://ftp.fau.de/qtproject/official_releases/qt/5.9/5.9.1/qt-opensource-linux-x64-5.9.1.run"}
export PATH=/opt/qt59/5.9.1/gcc_64/bin:$PATH

# '2' is not supported by this script; allowed values are "3" and "venv" (?).
SPLASH_PYTHON_VERSION=${SPLASH_PYTHON_VERSION:-"3"}
Expand Down
14 changes: 14 additions & 0 deletions docker/run-tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

export PYTHONPATH=`pwd`/slybot:`pwd`/slyd
pip install tox

cd /app/slyd
python2.7 tests/testserver/server.py 2>&1 | grep -v 'HTTP/1.1" 200' &
sleep 3

cd /app/slybot
tox
cd /app/portia_server
./manage.py test portia_orm.tests
./manage.py test portia_api.tests
4 changes: 1 addition & 3 deletions portiaui/app/storages/cookies.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import StorageObject from 'ember-local-storage/local/object';

const Storage = StorageObject.extend();

export default Storage;
export default StorageObject.extend();
4 changes: 1 addition & 3 deletions portiaui/app/storages/page-loads.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import StorageObject from 'ember-local-storage/local/object';

const Storage = StorageObject.extend();

export default Storage;
export default StorageObject.extend();
4 changes: 1 addition & 3 deletions portiaui/app/storages/ui-state-collapsed-panels.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import StorageObject from 'ember-local-storage/local/object';

const Storage = StorageObject.extend();

export default Storage;
export default StorageObject.extend();
6 changes: 3 additions & 3 deletions portiaui/app/storages/ui-state-selected-tools.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import StorageObject from 'ember-local-storage/local/object';

const Storage = StorageObject.extend({
const ToolStorage = StorageObject.extend({
init() {
this._super(...arguments);

Expand All @@ -11,7 +11,7 @@ const Storage = StorageObject.extend({
}
});

Storage.reopenClass({
ToolStorage.reopenClass({
initialState() {
return {
magicToolActive: true,
Expand All @@ -20,4 +20,4 @@ Storage.reopenClass({
}
});

export default Storage;
export default ToolStorage;
6 changes: 3 additions & 3 deletions portiaui/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@
"ember-cli": "2.6.3",
"ember-cli-app-version": "^1.0.0",
"ember-cli-autoprefixer": "^0.6.0",
"ember-cli-babel": "^5.1.6",
"ember-cli-babel": "^6.6.0",
"ember-cli-content-security-policy": "0.5.0",
"ember-cli-dependency-checker": "^1.2.0",
"ember-cli-deprecation-workflow": "^0.2.3",
"ember-cli-htmlbars": "^1.0.3",
"ember-cli-htmlbars-inline-precompile": "^0.3.1",
"ember-cli-htmlbars-inline-precompile": "^1.0.3",
"ember-cli-inject-live-reload": "^1.4.0",
"ember-cli-jshint": "^1.0.0",
"ember-cli-loading-slider": "^1.3.0",
"ember-cli-qunit": "^1.4.0",
"ember-cli-release": "^0.2.9",
"ember-cli-sass": "^5.3.1",
"ember-cli-sass": "^7.2.0",
"ember-cli-sri": "^2.1.0",
"ember-cli-uglify": "^1.2.0",
"ember-concurrency": "0.7.10",
Expand Down
14 changes: 14 additions & 0 deletions slybot/CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
Slybot 0.13.3

Update slybot to work with python 3.7 and Scrapy 1.6

Slybot 0.13.2

Allow default spidermanager to load zipfiles

Slybot 0.13.1

Choose parent of first extracted repeated item for parent region
Drop empty fields if css extraction fails
Handle requests through Splash in python 3

Slybot 0.13.0

Update slybot to use the most recent libraries
Expand Down
6 changes: 3 additions & 3 deletions slybot/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
tox==2.5.0
tox==3.12.1
nose==1.3.7
nose-timer==0.6.0
nose-timer==0.7.5
doctest-ignore-unicode==0.1.2
setuptools>=36.0.1
setuptools>=41.0.1
14 changes: 7 additions & 7 deletions slybot/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Slybot requirements
numpy==1.14.2
Scrapy==1.5.0
scrapely==0.13.2
numpy==1.16.4
Scrapy==1.6.0
scrapely==0.13.5
loginform==1.2.0
lxml==4.2.0
dateparser==0.7.0
python-dateutil==2.6.0
lxml==4.3.4
dateparser==0.7.1
python-dateutil==2.8.0
jsonschema==2.6.0
six==1.11.0
six==1.12.0
scrapy-splash==0.7.2
page_finder==0.1.8
chardet==3.0.4
2 changes: 1 addition & 1 deletion slybot/slybot/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.13.2'
__version__ = '0.13.3'
4 changes: 2 additions & 2 deletions slybot/slybot/spidermanager.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,15 @@
from zipfile import is_zipfile, ZipFile

from zope.interface import implementer
from scrapy.interfaces import ISpiderManager
from scrapy.interfaces import ISpiderLoader
from scrapy.utils.misc import load_object
from scrapy.utils.project import get_project_settings

from slybot.spider import IblSpider
from slybot.utils import open_project_from_dir, load_plugins


@implementer(ISpiderManager)
@implementer(ISpiderLoader)
class SlybotSpiderManager(object):

def __init__(self, datadir, spider_cls=None, settings=None, **kwargs):
Expand Down
5 changes: 4 additions & 1 deletion slybot/slybot/tests/test_extraction_speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
def _next_3(iterable):
i = iter(iterable[1:-3])
while True:
yield SelectorList((next(i), next(i), next(i)))
try:
yield SelectorList((next(i), next(i), next(i)))
except StopIteration:
break
ITERATIONS = int(os.environ.get('SLYBOT_SPEED_TEST_ITERATIONS', 1))
Extractor = namedtuple('Extractor', ['containers', 'selectors', 'group'])
parsel_extractors = {
Expand Down
17 changes: 11 additions & 6 deletions slybot/slybot/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import os
import re
import six
import sys

from collections import OrderedDict, namedtuple
from io import open
from itertools import chain

from scrapely.htmlpage import HtmlPage, HtmlTagType
Expand Down Expand Up @@ -223,12 +225,12 @@ def _quotify(mystr):
to guess if string must be quoted with '"' or "'"
"""
quote = '"'
l = len(mystr)
for i in range(l):
if mystr[i] == "\\" and i + 1 < l and mystr[i + 1] == "'":
length = len(mystr)
for i in range(length):
if mystr[i] == "\\" and i + 1 < length and mystr[i + 1] == "'":
quote = "'"
break
elif mystr[i] == "\\" and i + 1 < l and mystr[i + 1] == '"':
elif mystr[i] == "\\" and i + 1 < length and mystr[i + 1] == '"':
quote = '"'
break
elif mystr[i] == "'":
Expand Down Expand Up @@ -322,7 +324,7 @@ def listdir(self, *args, **kwargs):
def open(self, *args, **kwargs):
"""Open files from filesystem."""
raw = kwargs.pop('raw', False)
with open(self._path(*args), encoding = 'utf-8') as f:
with open(self._path(*args), encoding='utf-8') as f:
return decode(f.read()) if raw else json.load(f)


Expand Down Expand Up @@ -392,7 +394,10 @@ def load_external_templates(self, spec_base, spider_name):
"""
spider_dir = self.storage.rel_path('spiders', spider_name)
if not self.storage.isdir(spider_dir):
raise StopIteration
if sys.version_info < (3, 6):
raise StopIteration
else:
return
for name in self.storage.listdir(spider_dir):
if not name.endswith('.json'):
continue
Expand Down
2 changes: 1 addition & 1 deletion slybot/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# and then run "tox" from this directory.

[tox]
envlist = py27
envlist = py27,py37

[testenv]
deps =
Expand Down
8 changes: 4 additions & 4 deletions slyd/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Slyd requirements
twisted==17.9.0
twisted==19.2.1
pyOpenSSL==17.5.0
service_identity==17.0.0
requests==2.18.4
service_identity==18.1.0
requests==2.20.0
autobahn==18.3.1
six==1.11.0
six==1.12.0
chardet==3.0.4
parse==1.8.2
ndg-httpsclient==0.4.4
Expand Down
2 changes: 1 addition & 1 deletion slyd/slyd/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from six.moves.urllib_parse import urljoin

from scrapely.htmlpage import HtmlTag, HtmlTagType, parse_html
from .splash.css_utils import process_css, wrap_url
from slyd.splash.css_utils import process_css, wrap_url
from slybot.utils import serialize_tag, add_tagids

URI_ATTRIBUTES = ("action", "background", "cite", "classid", "codebase",
Expand Down

0 comments on commit c6f22ae

Please sign in to comment.