Skip to content

Commit 157bbf3

Browse files
[girlswithmuscle] init
1 parent ad73789 commit 157bbf3

File tree

4 files changed

+281
-0
lines changed

4 files changed

+281
-0
lines changed

docs/supportedsites.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,12 @@ Consider all listed sites to potentially be NSFW.
271271
<td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
272272
<td></td>
273273
</tr>
274+
<tr>
275+
<td>Girls With Muscle</td>
276+
<td>https://www.girlswithmuscle.com/</td>
277+
<td>Posts, Galleries, Search Results, Favorites</td>
278+
<td>Supported</td>
279+
</tr>
274280
<tr>
275281
<td>Gofile</td>
276282
<td>https://gofile.io/</td>

gallery_dl/extractor/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"gelbooru",
5656
"gelbooru_v01",
5757
"gelbooru_v02",
58+
"girlswithmuscle",
5859
"gofile",
5960
"hatenablog",
6061
"hentai2read",
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
import re
8+
9+
from .common import Extractor, Message
10+
from .. import text, exception
11+
from ..cache import cache
12+
13+
14+
class GirlswithmuscleExtractor(Extractor):
15+
def login(self):
16+
username, password = self._get_auth_info()
17+
if username:
18+
self.cookies_update(self._login_impl(username, password))
19+
20+
@staticmethod
21+
def _is_logged_in(page_text: str) -> bool:
22+
return 'Log in' not in page_text
23+
24+
@staticmethod
25+
def _get_csrfmiddlewaretoken(page: str) -> str:
26+
return text.extract(
27+
page,
28+
'name="csrfmiddlewaretoken" value="',
29+
'"'
30+
)[0]
31+
32+
def _open_login_page(self):
33+
"""We need it to get second CSRF token"""
34+
url = "https://www.girlswithmuscle.com/login/?next=/"
35+
response = self.request(url)
36+
return self._get_csrfmiddlewaretoken(response.text)
37+
38+
def _send_login_request(self, username, password, csrf_mw):
39+
"""Actual login action"""
40+
data = {
41+
"csrfmiddlewaretoken": csrf_mw,
42+
"username": username,
43+
"password": password,
44+
"next": "/"
45+
}
46+
47+
# Otherwise will be 403 Forbidden
48+
self.session.headers['Origin'] = 'https://www.girlswithmuscle.com'
49+
self.session.headers['Referer'] = \
50+
'https://www.girlswithmuscle.com/login/?next=/'
51+
52+
# if successful, will update cookies
53+
url = "https://www.girlswithmuscle.com/login/"
54+
response = self.request(url, method="post", data=data)
55+
56+
if "Wrong username or password" in response.text:
57+
raise exception.AuthenticationError()
58+
elif not self._is_logged_in(response.text):
59+
raise exception.AuthenticationError("Account data is missing")
60+
61+
@cache(maxage=28 * 86400, keyarg=1)
62+
def _login_impl(self, username, password):
63+
self.log.info("Logging in as %s", username)
64+
65+
csrf_mw = self._open_login_page()
66+
self._send_login_request(username, password, csrf_mw)
67+
return {c.name: c.value for c in self.session.cookies}
68+
69+
70+
class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
71+
"""Extractor for individual posts on girlswithmuscle.com"""
72+
category = "girlswithmuscle"
73+
subcategory = "post"
74+
directory_fmt = ("{category}", "{model}")
75+
filename_fmt = "{model}_{id}.{extension}"
76+
archive_fmt = "{type}_{model}_{id}"
77+
pattern = (r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
78+
r"/(\d+)/")
79+
example = "https://www.girlswithmuscle.com/1841638/"
80+
81+
def __init__(self, match):
82+
Extractor.__init__(self, match)
83+
self.id = match.groups()[0]
84+
85+
def items(self):
86+
self.login()
87+
url = "https://girlswithmuscle.com/{}/".format(self.id)
88+
page = self.request(url).text
89+
90+
if page is None:
91+
raise exception.NotFoundError("post")
92+
93+
url = text.extr(page, 'class="main-image" src="', '"')
94+
if url:
95+
metadata = self.metadata(page, url, 'picture')
96+
else:
97+
url = text.extr(page, '<source src="', '"')
98+
metadata = self.metadata(page, url, 'video')
99+
100+
yield Message.Directory, metadata
101+
yield Message.Url, url, metadata
102+
103+
def metadata(self, page, url, content_type):
104+
info_source_begin = \
105+
'<div class="image-info" id="info-source" style="display: none">'
106+
info_source_end = "</div>"
107+
source = text.remove_html(
108+
text.extr(page, info_source_begin, info_source_end))
109+
110+
info_uploader_begin = '<div class="image-info" id="info-uploader">'
111+
info_uploader_end = "</div>"
112+
uploader = text.remove_html(
113+
text.extr(page, info_uploader_begin, info_uploader_end))
114+
115+
tags = text.extr(
116+
page, 'class="selected-tags">', "</span>", ''
117+
).split(', ')
118+
tags = [tag for tag in tags if tag]
119+
120+
score = text.parse_int(text.remove_html(
121+
text.extr(page, 'Score: <b>', '</span', '0')))
122+
123+
model = self._parse_model(page)
124+
125+
return {
126+
'id': self.id,
127+
'model': model,
128+
'model_list': self._parse_model_list(model),
129+
'tags': tags,
130+
'posted_dt': text.extr(
131+
page, 'class="hover-time" title="', '"', ''
132+
),
133+
'is_favorite': self._parse_is_favorite(page),
134+
'source_filename': source,
135+
'uploader': uploader,
136+
'score': score,
137+
'comments': self._parse_comments(page),
138+
'extension': text.ext_from_url(url),
139+
'type': content_type,
140+
}
141+
142+
@staticmethod
143+
def _parse_model(page):
144+
model = text.extr(page, '<title>', "</title>", None)
145+
return 'unknown' if model.startswith('Picture #') else model
146+
147+
@staticmethod
148+
def _parse_model_list(model):
149+
if model == 'unknown':
150+
return []
151+
else:
152+
return [name.strip() for name in model.split(',')]
153+
154+
@staticmethod
155+
def _parse_is_favorite(page):
156+
fav_button = text.extr(page, 'id="favorite-button">', "</span>", '')
157+
unfav_button = text.extr(page,
158+
'class="actionbutton unfavorite-button">',
159+
"</span>", '')
160+
161+
is_favorite = None
162+
if unfav_button == 'Unfavorite':
163+
is_favorite = True
164+
if fav_button == 'Favorite':
165+
is_favorite = False
166+
167+
return is_favorite
168+
169+
@staticmethod
170+
def _parse_comments(page):
171+
comments = text.extract_iter(page, '<div class="comment-body-inner">',
172+
'</div>')
173+
return [comment.strip() for comment in comments]
174+
175+
176+
class GirlswithmuscleGalleryExtractor(GirlswithmuscleExtractor):
177+
"""Extractor for galleries on girlswithmuscle.com"""
178+
category = "girlswithmuscle"
179+
subcategory = "gallery"
180+
pattern = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com/images/(.*)"
181+
example = "https://www.girlswithmuscle.com/images/?name=Samantha%20Jerring"
182+
183+
def __init__(self, match):
184+
Extractor.__init__(self, match)
185+
self.query = match.groups()[0]
186+
187+
def pages(self):
188+
url = "https://www.girlswithmuscle.com/images/{}".format(self.query)
189+
response = self.request(url)
190+
if url != response.url:
191+
msg = ('Request was redirected to "{}", try logging in'.
192+
format(response.url))
193+
raise exception.AuthorizationError(msg)
194+
page = response.text
195+
196+
match = re.search(r"Page (\d+) of (\d+)", page)
197+
current, total = match.groups()
198+
current, total = text.parse_int(current), text.parse_int(total)
199+
200+
yield page
201+
for i in range(current + 1, total + 1):
202+
url = ("https://www.girlswithmuscle.com/images/{}/{}".
203+
format(i, self.query))
204+
yield self.request(url).text
205+
206+
def items(self):
207+
self.login()
208+
for page in self.pages():
209+
for imgid in text.extract_iter(page, 'id="imgid-', '"'):
210+
url = "https://www.girlswithmuscle.com/{}/".format(imgid)
211+
yield Message.Queue, url, {
212+
"gallery_name": self._parse_gallery_name(page),
213+
"_extractor": GirlswithmusclePostExtractor
214+
}
215+
216+
@staticmethod
217+
def _parse_gallery_name(page):
218+
return text.extr(page, "<title>", "</title>")

test/results/girlswithmuscle.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
gallery_dl = __import__("gallery_dl.extractor.girlswithmuscle")
8+
_gwm = getattr(gallery_dl.extractor, "girlswithmuscle")
9+
10+
11+
__tests__ = (
12+
{
13+
"#url" : "https://www.girlswithmuscle.com/2136096/",
14+
"#category" : ("", "girlswithmuscle", "post"),
15+
"#class" : _gwm.GirlswithmusclePostExtractor,
16+
17+
'id' : '2136096',
18+
'model' : str,
19+
'tags' : list,
20+
'posted_dt' : '2023-12-12 16:04:03.438979+00:00',
21+
'source_filename': 'IMG_8714.png',
22+
'uploader' : 'toni1991',
23+
'score' : int,
24+
'extension' : 'png',
25+
"type" : 'picture',
26+
# These are not available, unless you're logged in
27+
'is_favorite' : None,
28+
'comments' : list,
29+
},
30+
31+
{
32+
"#url" : "https://www.girlswithmuscle.com/1841638/",
33+
"#category" : ("", "girlswithmuscle", "post"),
34+
"#class" : _gwm.GirlswithmusclePostExtractor,
35+
36+
'id' : '1841638',
37+
'model' : str,
38+
'tags' : list,
39+
'posted_dt' : '2022-08-16 17:20:16.006855+00:00',
40+
'source_filename': 'Snapinsta_299658611_1185267375661829_6167677658282784059_n.mp4',
41+
'uploader' : 'BriedFrain',
42+
'score' : int,
43+
'extension' : 'mp4',
44+
"type" : 'video',
45+
},
46+
47+
{
48+
"#url" : "https://www.girlswithmuscle.com/images/?name=Samantha%20Jerring",
49+
"#category" : ("", "girlswithmuscle", "gallery"),
50+
"#class" : _gwm.GirlswithmuscleGalleryExtractor,
51+
52+
"#count" : range(300, 3000),
53+
"gallery_name" : str
54+
},
55+
56+
)

0 commit comments

Comments
 (0)