From a7a16658e8178e398d521576f821ba88050bf12d Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 15:53:03 -0700 Subject: [PATCH 01/12] chore: bump requirements --- requirements-dev.txt | 2 ++ requirements.txt | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) create mode 100644 requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000..26b77f6 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest diff --git a/requirements.txt b/requirements.txt index af1063a..b7e4f37 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -Flask==2.1.2 -elasticsearch==8.2.0 -requests==2.27.1 +Flask==2.3.2 +elasticsearch==8.8.0 +requests==2.31.0 From 84f0f48b7862bb5a306128da0a4365e223572725 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 15:53:58 -0700 Subject: [PATCH 02/12] test: add pytest.ini --- pytest.ini | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..a635c5c --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +pythonpath = . From a60d3489b5d301e311ce8e3e4a31e196b1186176 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 16:17:15 -0700 Subject: [PATCH 03/12] test: add es docker --- tests/elasticsearch/docker-compose.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 tests/elasticsearch/docker-compose.yaml diff --git a/tests/elasticsearch/docker-compose.yaml b/tests/elasticsearch/docker-compose.yaml new file mode 100644 index 0000000..c6e4c2d --- /dev/null +++ b/tests/elasticsearch/docker-compose.yaml @@ -0,0 +1,11 @@ +version: '2.2' +services: + es01: + image: docker.elastic.co/elasticsearch/elasticsearch:8.1.3 + container_name: es01 + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + ports: + - 9200:9200 From 931381142731905b2ad54a77336be21e26ef0ea1 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 18:06:58 -0700 Subject: [PATCH 04/12] test: views --- tests/conftest.py | 49 ++++++++++++++++++++++++++++ tests/fixtures/tweet_with_photo.json | 1 + tests/fixtures/tweet_with_video.json | 1 + tests/test_views.py | 27 +++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 tests/conftest.py create mode 100644 tests/fixtures/tweet_with_photo.json create mode 100644 tests/fixtures/tweet_with_video.json create mode 100644 tests/test_views.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..3736bc6 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,49 @@ +import os +import json +import time +from pathlib import Path +from datetime import datetime + +import pytest +from elasticsearch import Elasticsearch + +from ash import app + + +class Config: + TESTING = True + T_MEDIA_FROM = 'direct' + + +@pytest.fixture +def client(es_host, es_index): + app.config.from_object(Config) + app.config.update({ + 'T_ES_HOST': es_host, + 'T_ES_INDEX': es_index, + }) + return app.test_client() + + +@pytest.fixture(scope='session') +def es_host() -> str: + return os.environ.get('T_ES_HOST', 'http://localhost:9200') + + +@pytest.fixture(scope='session') +def es_index(es_host: str) -> str: + cluster = Elasticsearch(es_host) + now = datetime.now().strftime('%s') + index = f'pytest-{now}' + here = Path(os.path.abspath(__file__)).parent + tweet_files = [ + here / 'fixtures/tweet_with_photo.json', + here / 'fixtures/tweet_with_video.json', + ] + for tweet_file in tweet_files: + tweet = json.loads(tweet_file.read_text()) + cluster.index(index=index, id=tweet['id'], document=tweet) + + time.sleep(3) + + return index diff --git a/tests/fixtures/tweet_with_photo.json b/tests/fixtures/tweet_with_photo.json new file mode 100644 index 0000000..0a47194 --- /dev/null +++ b/tests/fixtures/tweet_with_photo.json @@ -0,0 +1 @@ +{"@index": "tweets-wzyboy", "@timestamp": "2023-01-17T19:06:46+00:00", "contributors": null, "coordinates": null, "created_at": "Tue Jan 17 19:06:46 +0000 2023", "display_text_range": [0, 75], "entities": {"hashtags": [], "media": [{"display_url": "pic.twitter.com/9dauLWrDZS", "expanded_url": "https://twitter.com/wzyboy/status/1615425412921987074/photo/1", "id": 1615425410095017984, "id_str": "1615425410095017984", "indices": [76, 99], "media_url": "http://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg", "media_url_https": "https://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg", "sizes": {"large": {"h": 2048, "resize": "fit", "w": 922}, "medium": {"h": 1200, "resize": "fit", "w": 540}, "small": {"h": 680, "resize": "fit", "w": 306}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "type": "photo", "url": "https://t.co/9dauLWrDZS"}], "symbols": [], "urls": [], "user_mentions": []}, "extended_entities": {"media": [{"display_url": "pic.twitter.com/9dauLWrDZS", "expanded_url": "https://twitter.com/wzyboy/status/1615425412921987074/photo/1", "id": 1615425410095017984, "id_str": "1615425410095017984", "indices": [76, 99], "media_url": "http://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg", "media_url_https": "https://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg", "sizes": {"large": {"h": 2048, "resize": "fit", "w": 922}, "medium": {"h": 1200, "resize": "fit", "w": 540}, "small": {"h": 680, "resize": "fit", "w": 306}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "type": "photo", "url": "https://t.co/9dauLWrDZS"}]}, "favorite_count": 0, "favorited": false, "full_text": "Todoist: please connect a keyboard to your phone and press F12 to continue. https://t.co/9dauLWrDZS", "geo": null, "id": 1615425412921987074, "id_str": "1615425412921987074", "in_reply_to_screen_name": null, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "is_quote_status": false, "lang": "en", "place": null, "possibly_sensitive": false, "retweet_count": 0, "retweeted": false, "source": "Twitter for Android", "truncated": false, "user": {"contributors_enabled": false, "created_at": "Fri Jun 26 05:13:44 +0000 2009", "default_profile": false, "default_profile_image": false, "description": "Das Leben ist zu kurz, um Deutsch zu lernen. 每天给 @Uucky_Lee 洗碗。欢迎在 Fediverse 里关注我。了解 Fediverse 联邦宇宙: https://t.co/WGCqN97JAz", "entities": {"description": {"urls": [{"display_url": "wzyboy.im/post/1486.html", "expanded_url": "https://wzyboy.im/post/1486.html", "indices": [101, 124], "url": "https://t.co/WGCqN97JAz"}]}, "url": {"urls": [{"display_url": "wzyboy.im", "expanded_url": "https://wzyboy.im/", "indices": [0, 23], "url": "https://t.co/btXCkHdabG"}]}}, "favourites_count": 1608, "follow_request_sent": false, "followers_count": 5072, "following": false, "friends_count": 429, "geo_enabled": true, "has_extended_profile": true, "id": 50932982, "id_str": "50932982", "is_translation_enabled": true, "is_translator": true, "lang": null, "listed_count": 115, "location": "Vancouver, BC", "name": "@wzyboy@dabr.ca", "notifications": false, "profile_background_color": "FFFFFF", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme16/bg.gif", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme16/bg.gif", "profile_background_tile": false, "profile_banner_url": "https://pbs.twimg.com/profile_banners/50932982/1573897758", "profile_image_url": "http://pbs.twimg.com/profile_images/1195639287456391168/IAxCxK39_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1195639287456391168/IAxCxK39_normal.jpg", "profile_link_color": "8CB2BD", "profile_sidebar_border_color": "FFFFFF", "profile_sidebar_fill_color": "424C55", "profile_text_color": "8C94A2", "profile_use_background_image": false, "protected": true, "screen_name": "wzyboy", "statuses_count": 52391, "time_zone": null, "translator_type": "badged", "url": "https://t.co/btXCkHdabG", "utc_offset": null, "verified": false, "withheld_in_countries": []}} \ No newline at end of file diff --git a/tests/fixtures/tweet_with_video.json b/tests/fixtures/tweet_with_video.json new file mode 100644 index 0000000..576daf3 --- /dev/null +++ b/tests/fixtures/tweet_with_video.json @@ -0,0 +1 @@ +{"@index": "tweets-uucky", "@timestamp": "2022-12-03T22:41:03+00:00", "contributors": null, "coordinates": null, "created_at": "Sat Dec 03 22:41:03 +0000 2022", "display_text_range": [0, 118], "entities": {"hashtags": [], "media": [{"display_url": "pic.twitter.com/CZTKmX68wh", "expanded_url": "https://twitter.com/dodo/status/1599161556507365376/video/1", "id": 1598448776582115328, "id_str": "1598448776582115328", "indices": [95, 118], "media_url": "http://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "media_url_https": "https://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "sizes": {"large": {"h": 1080, "resize": "fit", "w": 1080}, "medium": {"h": 1080, "resize": "fit", "w": 1080}, "small": {"h": 680, "resize": "fit", "w": 680}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "source_status_id": 1599161556507365376, "source_status_id_str": "1599161556507365376", "source_user_id": 1604444052, "source_user_id_str": "1604444052", "type": "photo", "url": "https://t.co/CZTKmX68wh"}], "symbols": [], "urls": [], "user_mentions": [{"id": 1604444052, "id_str": "1604444052", "indices": [3, 8], "name": "The Dodo", "screen_name": "dodo"}, {"id": 780516438113153025, "id_str": "780516438113153025", "indices": [80, 94], "name": "PlayforStrays", "screen_name": "PlayforStrays"}]}, "extended_entities": {"media": [{"additional_media_info": {"description": "", "embeddable": true, "monetizable": true, "source_user": {"contributors_enabled": false, "created_at": "Thu Jul 18 22:19:02 +0000 2013", "default_profile": false, "default_profile_image": false, "description": "For animal people.", "entities": {"description": {"urls": []}, "url": {"urls": [{"display_url": "thedodo.com", "expanded_url": "http://www.thedodo.com", "indices": [0, 23], "url": "https://t.co/vdKLF4z50p"}]}}, "favourites_count": 20883, "follow_request_sent": false, "followers_count": 2632250, "following": false, "friends_count": 4446, "geo_enabled": true, "has_extended_profile": false, "id": 1604444052, "id_str": "1604444052", "is_translation_enabled": true, "is_translator": false, "lang": null, "listed_count": 7063, "location": "New York, NY", "name": "The Dodo", "notifications": false, "profile_background_color": "A0E6F9", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": true, "profile_banner_url": "https://pbs.twimg.com/profile_banners/1604444052/1619619087", "profile_image_url": "http://pbs.twimg.com/profile_images/1542905116168056832/3QZfoNql_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1542905116168056832/3QZfoNql_normal.jpg", "profile_link_color": "FA0011", "profile_sidebar_border_color": "FFFFFF", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": false, "protected": false, "screen_name": "dodo", "statuses_count": 79958, "time_zone": null, "translator_type": "none", "url": "https://t.co/vdKLF4z50p", "utc_offset": null, "verified": true, "withheld_in_countries": []}, "title": " Guy Finds Starving Dog On Deserted Island "}, "display_url": "pic.twitter.com/CZTKmX68wh", "expanded_url": "https://twitter.com/dodo/status/1599161556507365376/video/1", "id": 1598448776582115328, "id_str": "1598448776582115328", "indices": [95, 118], "media_url": "http://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "media_url_https": "https://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "sizes": {"large": {"h": 1080, "resize": "fit", "w": 1080}, "medium": {"h": 1080, "resize": "fit", "w": 1080}, "small": {"h": 680, "resize": "fit", "w": 680}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "source_status_id": 1599161556507365376, "source_status_id_str": "1599161556507365376", "source_user_id": 1604444052, "source_user_id_str": "1604444052", "type": "video", "url": "https://t.co/CZTKmX68wh", "video_info": {"aspect_ratio": [1, 1], "duration_millis": 212212, "variants": [{"bitrate": 1280000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/720x720/mn336TwuCUiapYKo.mp4?tag=16"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/540x540/c3ysrVHaOS3gS4pV.mp4?tag=16"}, {"bitrate": 8768000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/1080x1080/uj5eNeYYAafmOFL1.mp4?tag=16"}, {"bitrate": 432000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/320x320/2LKcsB380YiG24b8.mp4?tag=16"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/amplify_video/1598448776582115328/pl/8ThFK3LUK7Z5dwno.m3u8?tag=16&container=fmp4"}]}}]}, "favorite_count": 0, "favorited": false, "full_text": "RT @dodo: This guy found a starving dog on a beach and knew what he had to do 💙 @playforstrays https://t.co/CZTKmX68wh", "geo": null, "id": 1599171888076722176, "id_str": "1599171888076722176", "in_reply_to_screen_name": null, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "is_quote_status": false, "lang": "en", "place": null, "possibly_sensitive": false, "retweet_count": 131, "retweeted": false, "retweeted_status": {"contributors": null, "coordinates": null, "created_at": "Sat Dec 03 22:00:00 +0000 2022", "display_text_range": [0, 84], "entities": {"hashtags": [], "media": [{"display_url": "pic.twitter.com/CZTKmX68wh", "expanded_url": "https://twitter.com/dodo/status/1599161556507365376/video/1", "id": 1598448776582115328, "id_str": "1598448776582115328", "indices": [85, 108], "media_url": "http://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "media_url_https": "https://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "sizes": {"large": {"h": 1080, "resize": "fit", "w": 1080}, "medium": {"h": 1080, "resize": "fit", "w": 1080}, "small": {"h": 680, "resize": "fit", "w": 680}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "type": "photo", "url": "https://t.co/CZTKmX68wh"}], "symbols": [], "urls": [], "user_mentions": [{"id": 780516438113153025, "id_str": "780516438113153025", "indices": [70, 84], "name": "PlayforStrays", "screen_name": "PlayforStrays"}]}, "extended_entities": {"media": [{"additional_media_info": {"description": "", "embeddable": true, "monetizable": true, "title": " Guy Finds Starving Dog On Deserted Island "}, "display_url": "pic.twitter.com/CZTKmX68wh", "expanded_url": "https://twitter.com/dodo/status/1599161556507365376/video/1", "id": 1598448776582115328, "id_str": "1598448776582115328", "indices": [85, 108], "media_url": "http://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "media_url_https": "https://pbs.twimg.com/media/Fi7VTHnUAAE1wRT.jpg", "sizes": {"large": {"h": 1080, "resize": "fit", "w": 1080}, "medium": {"h": 1080, "resize": "fit", "w": 1080}, "small": {"h": 680, "resize": "fit", "w": 680}, "thumb": {"h": 150, "resize": "crop", "w": 150}}, "type": "video", "url": "https://t.co/CZTKmX68wh", "video_info": {"aspect_ratio": [1, 1], "duration_millis": 212212, "variants": [{"bitrate": 1280000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/720x720/mn336TwuCUiapYKo.mp4?tag=16"}, {"bitrate": 832000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/540x540/c3ysrVHaOS3gS4pV.mp4?tag=16"}, {"bitrate": 8768000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/1080x1080/uj5eNeYYAafmOFL1.mp4?tag=16"}, {"bitrate": 432000, "content_type": "video/mp4", "url": "https://video.twimg.com/amplify_video/1598448776582115328/vid/320x320/2LKcsB380YiG24b8.mp4?tag=16"}, {"content_type": "application/x-mpegURL", "url": "https://video.twimg.com/amplify_video/1598448776582115328/pl/8ThFK3LUK7Z5dwno.m3u8?tag=16&container=fmp4"}]}}]}, "favorite_count": 1102, "favorited": false, "full_text": "This guy found a starving dog on a beach and knew what he had to do 💙 @playforstrays https://t.co/CZTKmX68wh", "geo": null, "id": 1599161556507365376, "id_str": "1599161556507365376", "in_reply_to_screen_name": null, "in_reply_to_status_id": null, "in_reply_to_status_id_str": null, "in_reply_to_user_id": null, "in_reply_to_user_id_str": null, "is_quote_status": false, "lang": "en", "place": null, "possibly_sensitive": false, "retweet_count": 131, "retweeted": false, "source": "Twitter Media Studio", "truncated": false, "user": {"contributors_enabled": false, "created_at": "Thu Jul 18 22:19:02 +0000 2013", "default_profile": false, "default_profile_image": false, "description": "For animal people.", "entities": {"description": {"urls": []}, "url": {"urls": [{"display_url": "thedodo.com", "expanded_url": "http://www.thedodo.com", "indices": [0, 23], "url": "https://t.co/vdKLF4z50p"}]}}, "favourites_count": 20883, "follow_request_sent": false, "followers_count": 2632250, "following": false, "friends_count": 4446, "geo_enabled": true, "has_extended_profile": false, "id": 1604444052, "id_str": "1604444052", "is_translation_enabled": true, "is_translator": false, "lang": null, "listed_count": 7063, "location": "New York, NY", "name": "The Dodo", "notifications": false, "profile_background_color": "A0E6F9", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", "profile_background_tile": true, "profile_banner_url": "https://pbs.twimg.com/profile_banners/1604444052/1619619087", "profile_image_url": "http://pbs.twimg.com/profile_images/1542905116168056832/3QZfoNql_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1542905116168056832/3QZfoNql_normal.jpg", "profile_link_color": "FA0011", "profile_sidebar_border_color": "FFFFFF", "profile_sidebar_fill_color": "DDEEF6", "profile_text_color": "333333", "profile_use_background_image": false, "protected": false, "screen_name": "dodo", "statuses_count": 79958, "time_zone": null, "translator_type": "none", "url": "https://t.co/vdKLF4z50p", "utc_offset": null, "verified": true, "withheld_in_countries": []}}, "source": "Twitter Web App", "truncated": false, "user": {"contributors_enabled": false, "created_at": "Sat Jan 05 08:10:03 +0000 2013", "default_profile": false, "default_profile_image": false, "description": "UX。日常推为主,摸鱼up主、@HondaJOJO 鉴定过的傻、好奇、梦多、颈椎病十级、不耐冻、漆黑意志。喜欢逛超市和公园。每天给 @wzyboy 做饭。", "entities": {"description": {"urls": []}, "url": {"urls": [{"display_url": "uucky.me", "expanded_url": "https://uucky.me", "indices": [0, 23], "url": "https://t.co/j80l9t44tU"}]}}, "favourites_count": 9164, "follow_request_sent": false, "followers_count": 2336, "following": true, "friends_count": 963, "geo_enabled": true, "has_extended_profile": true, "id": 1062473329, "id_str": "1062473329", "is_translation_enabled": false, "is_translator": false, "lang": null, "listed_count": 67, "location": "海女美術大学🍁", "name": "@uucky@o3o.ca", "notifications": true, "profile_background_color": "DBE9ED", "profile_background_image_url": "http://abs.twimg.com/images/themes/theme17/bg.gif", "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme17/bg.gif", "profile_background_tile": false, "profile_banner_url": "https://pbs.twimg.com/profile_banners/1062473329/1434218278", "profile_image_url": "http://pbs.twimg.com/profile_images/1591958226446213126/lOaklion_normal.jpg", "profile_image_url_https": "https://pbs.twimg.com/profile_images/1591958226446213126/lOaklion_normal.jpg", "profile_link_color": "FF0066", "profile_sidebar_border_color": "FFFFFF", "profile_sidebar_fill_color": "E6F6F9", "profile_text_color": "333333", "profile_use_background_image": true, "protected": true, "screen_name": "Uucky_Lee", "statuses_count": 42361, "time_zone": null, "translator_type": "regular", "url": "https://t.co/j80l9t44tU", "utc_offset": null, "verified": false, "withheld_in_countries": []}} \ No newline at end of file diff --git a/tests/test_views.py b/tests/test_views.py new file mode 100644 index 0000000..c9f51f5 --- /dev/null +++ b/tests/test_views.py @@ -0,0 +1,27 @@ +class TestViews: + def test_index(self, client): + resp = client.get('/tweet/') + print(resp.text) + assert 'Twitter Archive' in resp.text + assert 'keyboard' in resp.text + + def test_direct_media(self, client): + client.application.config['T_MEDIA_FROM'] = 'direct' + resp = client.get('/tweet/1615425412921987074.html') + assert 'https://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text + + def test_mirror_media(self, client): + CF_DOMAIN = 'd1111111111.cloudfront.net' + client.application.config['T_MEDIA_FROM'] = 'mirror' + client.application.config['T_MEDIA_MIRRORS'] = { + 'pbs.twimg.com': f'{CF_DOMAIN}/pbs.twimg.com', + 'video.twimg.com': f'{CF_DOMAIN}/video.twimg.com', + } + resp = client.get('/tweet/1615425412921987074.html') + assert f'https://{CF_DOMAIN}/pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text + + def test_fs_media(self, client): + client.application.config['T_MEDIA_FROM'] = 'filesystem' + client.application.config['T_MEDIA_FS_PATH'] = './media' + resp = client.get('/tweet/1615425412921987074.html') + assert '/tweet/media/pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text From 8e3396adbd94135f8c071a7909ff4c68afea7b8d Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 18:22:04 -0700 Subject: [PATCH 05/12] fix: serve from fs path --- .gitignore | 1 + ash.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 3817a10..d697b3f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ venv/ +media media/ config.py diff --git a/ash.py b/ash.py index 039d401..dc7a973 100644 --- a/ash.py +++ b/ash.py @@ -8,7 +8,7 @@ import itertools from datetime import datetime from functools import lru_cache -from urllib.parse import urlparse +from urllib.parse import urlsplit from collections.abc import Mapping import flask @@ -246,7 +246,7 @@ def format_tweet_text(tweet): # A bare domain would be prepended a scheme but not a path, # while a real URL would always have a path. # https://docs.python.org/3/library/urllib.parse.html#url-parsing - if urlparse(u['expanded_url']).path: + if urlsplit(u['expanded_url']).path: a = f'{u["display_url"]}' else: a = u['display_url'] @@ -320,11 +320,12 @@ def in_reply_to_link(tweet): def replace_media_url(url): - media_key = os.path.basename(url) if app.config['T_MEDIA_FROM'] == 'direct': return url elif app.config['T_MEDIA_FROM'] == 'filesystem': - return flask.url_for('get_media', filename=media_key) + parts = urlsplit(url) + fs_path = f'{parts.netloc}{parts.path}' + return flask.url_for('get_media_from_filesystem', fs_path=fs_path) elif app.config['T_MEDIA_FROM'] == 'mirror': mirrors = app.config.get('T_MEDIA_MIRRORS', {}) for orig, repl in mirrors.items(): @@ -458,9 +459,9 @@ def get_tweet(tweet_id, ext): return resp -@app.route('/tweet/media/') -def get_media(filename): - return flask.send_from_directory(app.config['T_MEDIA_FS_PATH'], filename) +@app.route('/tweet/media/') +def get_media_from_filesystem(fs_path): + return flask.send_from_directory(app.config['T_MEDIA_FS_PATH'], fs_path) @app.route('/tweet/search.') From 054a9541677bcab80fd3a351f02709e1840d36a5 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 18:26:55 -0700 Subject: [PATCH 06/12] test: DIY --- tests/test_views.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/tests/test_views.py b/tests/test_views.py index c9f51f5..2ace47c 100644 --- a/tests/test_views.py +++ b/tests/test_views.py @@ -1,27 +1,24 @@ -class TestViews: - def test_index(self, client): - resp = client.get('/tweet/') - print(resp.text) - assert 'Twitter Archive' in resp.text - assert 'keyboard' in resp.text +class TestMediaReplacement: + tweet_id = '1615425412921987074' + media_filename = 'Fmsk2gHacAAJGL0.jpg' + cf_domain = 'd1111111111.cloudfront.net' def test_direct_media(self, client): client.application.config['T_MEDIA_FROM'] = 'direct' - resp = client.get('/tweet/1615425412921987074.html') - assert 'https://pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text + resp = client.get(f'/tweet/{self.tweet_id}.html') + assert f'https://pbs.twimg.com/media/{self.media_filename}' in resp.text def test_mirror_media(self, client): - CF_DOMAIN = 'd1111111111.cloudfront.net' client.application.config['T_MEDIA_FROM'] = 'mirror' client.application.config['T_MEDIA_MIRRORS'] = { - 'pbs.twimg.com': f'{CF_DOMAIN}/pbs.twimg.com', - 'video.twimg.com': f'{CF_DOMAIN}/video.twimg.com', + 'pbs.twimg.com': f'{self.cf_domain}/pbs.twimg.com', + 'video.twimg.com': f'{self.cf_domain}/video.twimg.com', } - resp = client.get('/tweet/1615425412921987074.html') - assert f'https://{CF_DOMAIN}/pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text + resp = client.get(f'/tweet/{self.tweet_id}.html') + assert f'https://{self.cf_domain}/pbs.twimg.com/media/{self.media_filename}' in resp.text def test_fs_media(self, client): client.application.config['T_MEDIA_FROM'] = 'filesystem' client.application.config['T_MEDIA_FS_PATH'] = './media' - resp = client.get('/tweet/1615425412921987074.html') - assert '/tweet/media/pbs.twimg.com/media/Fmsk2gHacAAJGL0.jpg' in resp.text + resp = client.get(f'/tweet/{self.tweet_id}.html') + assert f'/tweet/media/pbs.twimg.com/media/{self.media_filename}' in resp.text From 87a968ce2640314d71ad974f34cf55063ef9bfe7 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 18:35:02 -0700 Subject: [PATCH 07/12] test: add GHA workflows --- .github/workflows/pytest.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..c4a4e12 --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,25 @@ +--- +name: pytest +on: push + +jobs: + pytest: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Run pytest + run: | + pip install -r requirements-dev.txt + pytest -v From 430ca22ebc5386ea7b3713ab445973d0d453662c Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sat, 8 Jul 2023 18:46:18 -0700 Subject: [PATCH 08/12] fix: default config not existing --- ash.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ash.py b/ash.py index dc7a973..bb3aae2 100644 --- a/ash.py +++ b/ash.py @@ -2,7 +2,6 @@ A Flask-based web server that serves Twitter Archive. ''' -import os import re import pprint import itertools @@ -16,11 +15,22 @@ from elasticsearch import Elasticsearch +class DefaultConfig: + T_ES_HOST = 'http://localhost:9200' + T_ES_INDEX = 'tweets-*,toots-*' + T_MEDIA_FROM = 'direct' + T_EXTERNAL_TWEETS = False + + app = flask.Flask( __name__, static_url_path='/tweet/static' ) -app.config.from_object('config.Config') +app.config.from_object(DefaultConfig) +try: + app.config.from_object('config.Config') +except ImportError: + pass # Set up external Tweets support From 9862899e7796dc8540fae708013c9e043a89edd0 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sun, 9 Jul 2023 00:17:38 -0700 Subject: [PATCH 09/12] test: setup ES --- .github/workflows/pytest.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index c4a4e12..c6d0fd7 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -19,6 +19,14 @@ jobs: python-version: ${{ matrix.python-version }} cache: pip + - name: Setup Elasticsearch + run: | + pushd tests/elasticsearch + docker compose up -d + # wait until up + docker inspect --format '{{ .NetworkSettings.IPAddress }}:9200' es01 | xargs wget --retry-connrefused --tries=5 -q --wait=3 --spider + popd + - name: Run pytest run: | pip install -r requirements-dev.txt From 1377058c8c8215eb71cb57ed9d99866364ba3dd9 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sun, 9 Jul 2023 00:34:32 -0700 Subject: [PATCH 10/12] test: docker compose wait until healthy --- .github/workflows/pytest.yaml | 4 +--- tests/elasticsearch/docker-compose.yaml | 8 +++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index c6d0fd7..a93b49f 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -22,9 +22,7 @@ jobs: - name: Setup Elasticsearch run: | pushd tests/elasticsearch - docker compose up -d - # wait until up - docker inspect --format '{{ .NetworkSettings.IPAddress }}:9200' es01 | xargs wget --retry-connrefused --tries=5 -q --wait=3 --spider + docker compose up -d --wait popd - name: Run pytest diff --git a/tests/elasticsearch/docker-compose.yaml b/tests/elasticsearch/docker-compose.yaml index c6e4c2d..d0efacb 100644 --- a/tests/elasticsearch/docker-compose.yaml +++ b/tests/elasticsearch/docker-compose.yaml @@ -1,4 +1,4 @@ -version: '2.2' +version: '3' services: es01: image: docker.elastic.co/elasticsearch/elasticsearch:8.1.3 @@ -9,3 +9,9 @@ services: - "ES_JAVA_OPTS=-Xms512m -Xmx512m" ports: - 9200:9200 + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9200"] + interval: 10s + timeout: 1s + retries: 6 + start_period: 20s From 88284fcf169389afcf9981ce760314aa866608a9 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sun, 9 Jul 2023 01:04:16 -0700 Subject: [PATCH 11/12] style: syntax overhaul --- ash.py | 104 ++++++++++++++++++++++++--------------------------------- 1 file changed, 44 insertions(+), 60 deletions(-) diff --git a/ash.py b/ash.py index bb3aae2..205b602 100644 --- a/ash.py +++ b/ash.py @@ -10,6 +10,8 @@ from urllib.parse import urlsplit from collections.abc import Mapping +from collections.abc import Iterator + import flask import requests from elasticsearch import Elasticsearch @@ -22,10 +24,7 @@ class DefaultConfig: T_EXTERNAL_TWEETS = False -app = flask.Flask( - __name__, - static_url_path='/tweet/static' -) +app = flask.Flask(__name__, static_url_path='/tweet/static') app.config.from_object(DefaultConfig) try: app.config.from_object('config.Config') @@ -35,7 +34,6 @@ class DefaultConfig: # Set up external Tweets support if app.config.get('T_EXTERNAL_TWEETS'): - # https://developer.twitter.com/en/docs/basics/authentication/api-reference/token resp = requests.post( 'https://api.twitter.com/oauth2/token', @@ -51,7 +49,7 @@ class DefaultConfig: app.config['T_TWITTER_TOKEN'] = bearer_token -def toot_to_tweet(status): +def toot_to_tweet(status: dict) -> dict: '''Transform toot to be compatible with tweet-interface''' # Status is a tweet if status.get('user'): @@ -82,36 +80,33 @@ def toot_to_tweet(status): class TweetsDatabase(Mapping): - def __init__(self, es_host, es_index): + def __init__(self, es_host: str, es_index: str) -> None: self.es = Elasticsearch(es_host) self.es_index = es_index - def _search(self, **kwargs): + def _search(self, **kwargs) -> Iterator[dict]: if not kwargs.get('index'): kwargs['index'] = self.es_index hits = self.es.search(**kwargs)['hits']['hits'] - tweets = [] for hit in hits: tweet = hit['_source'] tweet['@index'] = hit['_index'] tweet = toot_to_tweet(tweet) - tweets.append(tweet) - return tweets + yield tweet - def __getitem__(self, tweet_id): + def __getitem__(self, tweet_id: str | int) -> dict: resp = self._search( query={ 'term': { '_id': tweet_id } }) - if len(resp) == 0: - raise KeyError(f'Tweet ID {tweet_id} not found') - else: - tweet = resp[0] - return tweet + try: + return next(resp) + except StopIteration: + raise KeyError(f'Tweet ID {tweet_id} not found') from None - def __iter__(self): + def __iter__(self) -> Iterator[int]: resp = self._search( sort=['@timestamp'], #size=1000, @@ -119,7 +114,7 @@ def __iter__(self): for tweet in resp: yield tweet['id'] - def __reversed__(self): + def __reversed__(self) -> Iterator[int]: resp = self._search( sort=[{ '@timestamp': {'order': 'desc'} @@ -129,10 +124,10 @@ def __reversed__(self): for tweet in resp: yield tweet['id'] - def __len__(self): + def __len__(self) -> int: return self.es.count(index=self.es_index)['count'] - def search(self, *, keyword=None, user_screen_name=None, index=None, limit=100): + def search(self, *, keyword=None, user_screen_name=None, index=None, limit=100) -> Iterator[dict]: keyword_query = { 'simple_query_string': { 'query': keyword, @@ -142,7 +137,7 @@ def search(self, *, keyword=None, user_screen_name=None, index=None, limit=100): } if user_screen_name and '@' in user_screen_name: # Mastodon screen_name_field = 'account.fqn.keyword' - else: + else: # Twitter screen_name_field = 'user.screen_name.keyword' user_query = { 'term': { @@ -166,7 +161,7 @@ def search(self, *, keyword=None, user_screen_name=None, index=None, limit=100): ) return resp - def get_users(self): + def get_users(self) -> Iterator[dict]: agg_name_twitter = 'user_screen_names' agg_name_mastodon = 'account_fqn' resp = self.es.search( @@ -186,16 +181,14 @@ def get_users(self): }, ) buckets = resp['aggregations'][agg_name_twitter]['buckets'] + resp['aggregations'][agg_name_mastodon]['buckets'] - users = [ - { + for bucket in buckets: + user = { 'screen_name': bucket['key'], 'tweets_count': bucket['doc_count'] } - for bucket in buckets - ] - return users + yield user - def get_indexes(self): + def get_indexes(self) -> Iterator[dict]: agg_name = 'index_names' resp = self.es.search( index=self.es_index, @@ -208,17 +201,15 @@ def get_indexes(self): } }, ) - indexes = [ - { + for bucket in resp['aggregations'][agg_name]['buckets']: + index = { 'name': bucket['key'], 'tweets_count': bucket['doc_count'] } - for bucket in resp['aggregations'][agg_name]['buckets'] - ] - return indexes + yield index -def get_tdb(): +def get_tdb() -> TweetsDatabase: if not hasattr(flask.g, 'tdb'): flask.g.tdb = TweetsDatabase( app.config['T_ES_HOST'], @@ -228,7 +219,7 @@ def get_tdb(): @app.template_global('get_tweet_link') -def get_tweet_link(screen_name, tweet_id, original_link=False): +def get_tweet_link(screen_name: str, tweet_id: str | int, original_link: bool = False) -> str: if original_link: return f'https://twitter.com/{screen_name}/status/{tweet_id}' else: @@ -236,8 +227,7 @@ def get_tweet_link(screen_name, tweet_id, original_link=False): @app.template_filter('format_tweet_text') -def format_tweet_text(tweet): - +def format_tweet_text(tweet: dict) -> str: try: tweet_text = tweet['full_text'] except KeyError: @@ -286,15 +276,13 @@ def format_tweet_text(tweet): # true and has a valid "retweeted_status". Tweets that are ingested via # Twitter Archive always has "retweeted" set to false (identical to a # "traditional" RT. - retweeted_status = tweet.get('retweeted_status') - if retweeted_status: + if retweeted_status := tweet.get('retweeted_status'): link = get_tweet_link('status', retweeted_status['id']) a = f'RT' tweet_text = tweet_text.replace('RT', a, 1) # Format reblogged toot - reblogged_status = tweet.get('reblog') - if reblogged_status: + if reblogged_status := tweet.get('reblog'): status_link = reblogged_status['url'] author = reblogged_status['account']['fqn'] author_link = reblogged_status['account']['url'] @@ -305,7 +293,7 @@ def format_tweet_text(tweet): @app.template_filter('format_created_at') -def format_created_at(timestamp, fmt): +def format_created_at(timestamp: str, fmt: str) -> str: try: dt = datetime.strptime(timestamp, '%a %b %d %H:%M:%S %z %Y') except ValueError: @@ -317,7 +305,7 @@ def format_created_at(timestamp, fmt): @app.template_filter('in_reply_to_link') -def in_reply_to_link(tweet): +def in_reply_to_link(tweet: dict) -> str: if tweet.get('account'): # Mastodon # If this is a self-thread, return local link if tweet['in_reply_to_account_id'] == tweet['account']['id']: @@ -329,7 +317,7 @@ def in_reply_to_link(tweet): return get_tweet_link('status', tweet['in_reply_to_status_id']) -def replace_media_url(url): +def replace_media_url(url: str) -> str: if app.config['T_MEDIA_FROM'] == 'direct': return url elif app.config['T_MEDIA_FROM'] == 'filesystem': @@ -343,6 +331,8 @@ def replace_media_url(url): return url.replace(orig, repl) else: return url + else: + return url @app.route('/') @@ -352,11 +342,9 @@ def root(): @app.route('/tweet/') def index(): - tdb = get_tdb() total_tweets = len(tdb) - default_user = app.config.get('T_DEFAULT_USER') - if default_user: + if default_user := app.config.get('T_DEFAULT_USER'): latest_tweets = tdb.search(keyword='*', user_screen_name=default_user, limit=10) else: latest_tweets = [tdb[tid] for tid in itertools.islice(reversed(tdb), 10)] @@ -372,10 +360,8 @@ def index(): @lru_cache(maxsize=1024) -def fetch_tweet(tweet_id): - +def fetch_tweet(tweet_id: int | str) -> dict: token = app.config['T_TWITTER_TOKEN'] - resp = requests.get( 'https://api.twitter.com/1.1/statuses/show.json', headers={ @@ -395,7 +381,6 @@ def fetch_tweet(tweet_id): @app.route('/tweet/.') def get_tweet(tweet_id, ext): - if ext not in ('txt', 'json', 'html'): flask.abort(404) @@ -424,7 +409,7 @@ def get_tweet(tweet_id, ext): # HTML output - # Extract list images + # Extract media images = [] videos = [] try: @@ -434,7 +419,7 @@ def get_tweet(tweet_id, ext): entities = tweet['entities'] media = entities.get('media', []) for m in media: - # type = video + # type is video if m.get('type') == 'video': variants = m['video_info']['variants'] hq_variant = max(variants, key=lambda v: v.get('bitrate', -1)) @@ -444,7 +429,7 @@ def get_tweet(tweet_id, ext): videos.append({ 'url': media_url, }) - # type = photo + # type is photo elif m.get('type') == 'photo': media_url = m['media_url_https'] if not _is_external_tweet: @@ -453,7 +438,7 @@ def get_tweet(tweet_id, ext): 'url': media_url, 'description': m.get('description', '') }) - # type = unknown + # type is unknown else: pass @@ -470,12 +455,12 @@ def get_tweet(tweet_id, ext): @app.route('/tweet/media/') -def get_media_from_filesystem(fs_path): +def get_media_from_filesystem(fs_path: str): return flask.send_from_directory(app.config['T_MEDIA_FS_PATH'], fs_path) @app.route('/tweet/search.') -def search_tweet(ext): +def search_tweet(ext: str): if ext not in ('html', 'txt', 'json'): flask.abort(404) @@ -491,9 +476,8 @@ def search_tweet(ext): indexes = tdb.get_indexes() user = flask.request.args.get('u', '') - keyword = flask.request.args.get('q', '') index = flask.request.args.get('i', '') - if keyword: + if keyword := flask.request.args.get('q', ''): tweets = tdb.search( keyword=keyword, user_screen_name=user, From 6dbd2c542ab45772d2a03e5236dd4f92f8690c00 Mon Sep 17 00:00:00 2001 From: Zhuoyun Wei Date: Sun, 9 Jul 2023 01:07:13 -0700 Subject: [PATCH 12/12] fix: Python 3.8 compat --- ash.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ash.py b/ash.py index 205b602..f66dbb0 100644 --- a/ash.py +++ b/ash.py @@ -2,6 +2,8 @@ A Flask-based web server that serves Twitter Archive. ''' +from __future__ import annotations + import re import pprint import itertools