Skip to content

Commit 52366d4

Browse files
[girlswithmuscle] init
1 parent ad73789 commit 52366d4

File tree

4 files changed

+279
-0
lines changed

4 files changed

+279
-0
lines changed

docs/supportedsites.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,12 @@ Consider all listed sites to potentially be NSFW.
271271
<td>Favorites, Pools, Posts, Redirects, Tag Searches</td>
272272
<td></td>
273273
</tr>
274+
<tr>
275+
<td>Girls With Muscle</td>
276+
<td>https://www.girlswithmuscle.com/</td>
277+
<td>Posts, Galleries, Search Results, Favorites</td>
278+
<td>Supported</td>
279+
</tr>
274280
<tr>
275281
<td>Gofile</td>
276282
<td>https://gofile.io/</td>

gallery_dl/extractor/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
"gelbooru",
5656
"gelbooru_v01",
5757
"gelbooru_v02",
58+
"girlswithmuscle",
5859
"gofile",
5960
"hatenablog",
6061
"hentai2read",
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
import re
8+
9+
from .common import Extractor, Message
10+
from .. import text, exception
11+
from ..cache import cache
12+
13+
14+
class GirlswithmuscleExtractor(Extractor):
15+
def login(self):
16+
username, password = self._get_auth_info()
17+
if username:
18+
self.cookies_update(self._login_impl(username, password))
19+
20+
@staticmethod
21+
def _is_logged_in(page_text: str) -> bool:
22+
return 'Log in' not in page_text
23+
24+
@staticmethod
25+
def _get_csrfmiddlewaretoken(page: str) -> str:
26+
return text.extract(
27+
page,
28+
'name="csrfmiddlewaretoken" value="',
29+
'"'
30+
)[0]
31+
32+
def _open_login_page(self):
33+
"""We need it to get second CSRF token"""
34+
url = "https://www.girlswithmuscle.com/login/?next=/"
35+
response = self.request(url)
36+
return self._get_csrfmiddlewaretoken(response.text)
37+
38+
def _send_login_request(self, username, password, csrf_mw):
39+
"""Actual login action"""
40+
data = {
41+
"csrfmiddlewaretoken": csrf_mw,
42+
"username": username,
43+
"password": password,
44+
"next": "/"
45+
}
46+
47+
# Otherwise will be 403 Forbidden
48+
self.session.headers['Origin'] = 'https://www.girlswithmuscle.com'
49+
self.session.headers['Referer'] = \
50+
'https://www.girlswithmuscle.com/login/?next=/'
51+
52+
# if successful, will update cookies
53+
url = "https://www.girlswithmuscle.com/login/"
54+
response = self.request(url, method="post", data=data)
55+
56+
if "Wrong username or password" in response.text:
57+
raise exception.AuthenticationError()
58+
elif not self._is_logged_in(response.text):
59+
raise exception.AuthenticationError("Account data is missing")
60+
61+
@cache(maxage=28 * 86400, keyarg=1)
62+
def _login_impl(self, username, password):
63+
self.log.info("Logging in as %s", username)
64+
65+
csrf_mw = self._open_login_page()
66+
self._send_login_request(username, password, csrf_mw)
67+
return {c.name: c.value for c in self.session.cookies}
68+
69+
70+
class GirlswithmusclePostExtractor(GirlswithmuscleExtractor):
71+
"""Extractor for individual posts on girlswithmuscle.com"""
72+
category = "girlswithmuscle"
73+
subcategory = "post"
74+
directory_fmt = ("{category}", "{model}")
75+
filename_fmt = "{model}_{id}.{extension}"
76+
archive_fmt = "{type}_{model}_{id}"
77+
pattern = (r"(?:https?://)?(?:www\.)?girlswithmuscle\.com"
78+
r"/(\d+)/")
79+
80+
def __init__(self, match):
81+
Extractor.__init__(self, match)
82+
self.id = match.groups()[0]
83+
84+
def items(self):
85+
self.login()
86+
url = "https://girlswithmuscle.com/{}/".format(self.id)
87+
page = self.request(url).text
88+
89+
if page is None:
90+
raise exception.NotFoundError("post")
91+
92+
url = text.extr(page, 'class="main-image" src="', '"')
93+
if url:
94+
metadata = self.metadata(page, url, 'picture')
95+
else:
96+
url = text.extr(page, '<source src="', '"')
97+
metadata = self.metadata(page, url, 'video')
98+
99+
yield Message.Directory, metadata
100+
yield Message.Url, url, metadata
101+
102+
def metadata(self, page, url, content_type):
103+
info_source_begin = \
104+
'<div class="image-info" id="info-source" style="display: none">'
105+
info_source_end = "</div>"
106+
source = text.remove_html(
107+
text.extr(page, info_source_begin, info_source_end))
108+
109+
info_uploader_begin = '<div class="image-info" id="info-uploader">'
110+
info_uploader_end = "</div>"
111+
uploader = text.remove_html(
112+
text.extr(page, info_uploader_begin, info_uploader_end))
113+
114+
tags = text.extr(
115+
page, 'class="selected-tags">', "</span>", ''
116+
).split(', ')
117+
tags = [tag for tag in tags if tag]
118+
119+
score = text.parse_int(text.remove_html(
120+
text.extr(page, 'Score: <b>', '</span', '0')))
121+
122+
model = self._parse_model(page)
123+
124+
return {
125+
'id': self.id,
126+
'model': model,
127+
'model_list': self._parse_model_list(model),
128+
'tags': tags,
129+
'posted_dt': text.extr(
130+
page, 'class="hover-time" title="', '"', ''
131+
),
132+
'is_favorite': self._parse_is_favorite(page),
133+
'source_filename': source,
134+
'uploader': uploader,
135+
'score': score,
136+
'comments': self._parse_comments(page),
137+
'extension': text.ext_from_url(url),
138+
'type': content_type,
139+
}
140+
141+
@staticmethod
142+
def _parse_model(page):
143+
model = text.extr(page, '<title>', "</title>", None)
144+
return 'unknown' if model.startswith('Picture #') else model
145+
146+
@staticmethod
147+
def _parse_model_list(model):
148+
if model == 'unknown':
149+
return []
150+
else:
151+
return [name.strip() for name in model.split(',')]
152+
153+
@staticmethod
154+
def _parse_is_favorite(page):
155+
fav_button = text.extr(page, 'id="favorite-button">', "</span>", '')
156+
unfav_button = text.extr(page,
157+
'class="actionbutton unfavorite-button">',
158+
"</span>", '')
159+
160+
is_favorite = None
161+
if unfav_button == 'Unfavorite':
162+
is_favorite = True
163+
if fav_button == 'Favorite':
164+
is_favorite = False
165+
166+
return is_favorite
167+
168+
@staticmethod
169+
def _parse_comments(page):
170+
comments = text.extract_iter(page, '<div class="comment-body-inner">',
171+
'</div>')
172+
return [comment.strip() for comment in comments]
173+
174+
175+
class GirlswithmuscleGalleryExtractor(GirlswithmuscleExtractor):
176+
"""Extractor for individual posts on girlswithmuscle.com"""
177+
category = "girlswithmuscle"
178+
subcategory = "gallery"
179+
pattern = r"(?:https?://)?(?:www\.)?girlswithmuscle\.com/images/(.*)"
180+
181+
def __init__(self, match):
182+
Extractor.__init__(self, match)
183+
self.query = match.groups()[0]
184+
185+
def pages(self):
186+
url = "https://www.girlswithmuscle.com/images/{}".format(self.query)
187+
response = self.request(url)
188+
if url != response.url:
189+
msg = ('Request was redirected to "{}", try logging in'.
190+
format(response.url))
191+
raise exception.AuthorizationError(msg)
192+
page = response.text
193+
194+
match = re.search(r"Page (\d+) of (\d+)", page)
195+
current, total = match.groups()
196+
current, total = text.parse_int(current), text.parse_int(total)
197+
198+
yield page
199+
for i in range(current + 1, total + 1):
200+
url = ("https://www.girlswithmuscle.com/images/{}/{}".
201+
format(i, self.query))
202+
yield self.request(url).text
203+
204+
def items(self):
205+
self.login()
206+
for page in self.pages():
207+
for imgid in text.extract_iter(page, 'id="imgid-', '"'):
208+
url = "https://www.girlswithmuscle.com/{}/".format(imgid)
209+
yield Message.Queue, url, {
210+
"gallery_name": self._parse_gallery_name(page),
211+
"_extractor": GirlswithmusclePostExtractor
212+
}
213+
214+
@staticmethod
215+
def _parse_gallery_name(page):
216+
return text.extr(page, "<title>", "</title>")

test/results/girlswithmuscle.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU General Public License version 2 as
5+
# published by the Free Software Foundation.
6+
7+
gallery_dl = __import__("gallery_dl.extractor.girlswithmuscle")
8+
_gwm = getattr(gallery_dl.extractor, "girlswithmuscle")
9+
10+
11+
__tests__ = (
12+
{
13+
"#url" : "https://www.girlswithmuscle.com/2136096/",
14+
"#category" : ("", "girlswithmuscle", "post"),
15+
"#class" : _gwm.GirlswithmusclePostExtractor,
16+
17+
'id' : '2136096',
18+
'model' : str,
19+
'tags' : list,
20+
'posted_dt' : '2023-12-12 16:04:03.438979+00:00',
21+
'source_filename': 'IMG_8714.png',
22+
'uploader' : 'toni1991',
23+
'score' : int,
24+
'extension' : 'png',
25+
"type" : 'picture',
26+
# These are not available, unless you're logged in
27+
'is_favorite' : None,
28+
'comments' : list,
29+
},
30+
31+
{
32+
"#url" : "https://www.girlswithmuscle.com/1841638/",
33+
"#category" : ("", "girlswithmuscle", "post"),
34+
"#class" : _gwm.GirlswithmusclePostExtractor,
35+
36+
'id' : '1841638',
37+
'model' : str,
38+
'tags' : list,
39+
'posted_dt' : '2022-08-16 17:20:16.006855+00:00',
40+
'source_filename': 'Snapinsta_299658611_1185267375661829_6167677658282784059_n.mp4',
41+
'uploader' : 'BriedFrain',
42+
'score' : int,
43+
'extension' : 'mp4',
44+
"type" : 'video',
45+
},
46+
47+
{
48+
"#url" : "https://www.girlswithmuscle.com/images/?name=Samantha%20Jerring",
49+
"#category" : ("", "girlswithmuscle", "gallery"),
50+
"#class" : _gwm.GirlswithmuscleGalleryExtractor,
51+
52+
"#count" : range(300, 3000),
53+
"gallery_name" : str
54+
},
55+
56+
)

0 commit comments

Comments
 (0)