Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[hotleaks.tv] add support #5547

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,12 @@ Consider all listed sites to potentially be NSFW.
<td>Categories, Creators, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>Hotleakstv</td>
<td>https://hotleaks.tv/</td>
<td>Categories, Creators, Posts, Search Results</td>
<td></td>
</tr>
<tr>
<td>Idol Complex</td>
<td>https://idol.sankakucomplex.com/</td>
Expand Down
1 change: 1 addition & 0 deletions gallery_dl/extractor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
"hiperdex",
"hitomi",
"hotleak",
"hotleakstv",
"idolcomplex",
"imagebam",
"imagechest",
Expand Down
180 changes: 180 additions & 0 deletions gallery_dl/extractor/hotleakstv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://hotleaks.tv/"""

from .common import Extractor, Message
from .. import text, exception
import binascii

BASE_PATTERN = r"(?:https?://)?(?:www\.)?hotleaks\.tv"


class HotleakstvExtractor(Extractor):
"""Base class for hotleaks.tv extractors"""
category = "hotleakstv"
directory_fmt = ("{category}", "{creator}",)
filename_fmt = "{creator}_{id}.{extension}"
archive_fmt = "{type}_{creator}_{id}"
root = "https://hotleaks.tv"

def items(self):
for post in self.posts():
post["_http_expected_status"] = (404,)
yield Message.Directory, post
yield Message.Url, post["url"], post

def posts(self):
"""Return an iterable containing relevant posts"""
return ()

def _pagination(self, url, params):
params = text.parse_query(params)
params["page"] = text.parse_int(params.get("page"), 1)

while True:
page = self.request(url, params=params).text
if "</article>" not in page:
return

for item in text.extract_iter(
page, '<article class="movie-item', '</article>'):
yield text.extr(item, '<a href="', '"')

params["page"] += 1


def decode_video_url(url):
# cut first and last 16 characters, reverse, base64 decode
return binascii.a2b_base64(url[-17:15:-1]).decode()


class HotleakstvPostExtractor(HotleakstvExtractor):
"""Extractor for individual posts on hotleaks.tv"""
subcategory = "post"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/]+)/(photo|video)/(\d+)")
example = "https://hotleaks.tv/MODEL/photo/12345"

def __init__(self, match):
HotleakstvExtractor.__init__(self, match)
self.creator, self.type, self.id = match.groups()

def posts(self):
url = "{}/{}/{}/{}".format(
self.root, self.creator, self.type, self.id)
page = self.request(url).text
page = text.extr(
page, '<div class="movie-image thumb">', '</article>')
data = {
"id" : text.parse_int(self.id),
"creator": self.creator,
"type" : self.type,
}

if self.type == "photo":
data["url"] = text.extr(page, 'data-src="', '"')
text.nameext_from_url(data["url"], data)

elif self.type == "video":
data["url"] = "ytdl:" + decode_video_url(text.extr(
text.unescape(page), '"src":"', '"'))
text.nameext_from_url(data["url"], data)
data["extension"] = "mp4"

return (data,)


class HotleakstvCreatorExtractor(HotleakstvExtractor):
"""Extractor for all posts from a hotleaks.tv creator"""
subcategory = "creator"
pattern = (BASE_PATTERN + r"/(?!(?:hot|creators|videos|photos)(?:$|/))"
r"([^/?#]+)/?$")
example = "https://hotleaks.tv/MODEL"

def __init__(self, match):
HotleakstvExtractor.__init__(self, match)
self.creator = match.group(1)

def posts(self):
url = "{}/{}".format(self.root, self.creator)
return self._pagination(url)

def _pagination(self, url):
headers = {"X-Requested-With": "XMLHttpRequest"}
params = {"page": 1}

while True:
try:
response = self.request(
url, headers=headers, params=params, notfound="creator")
except exception.HttpError as exc:
if exc.response.status_code == 429:
self.wait(
until=exc.response.headers.get("X-RateLimit-Reset"))
continue
raise

posts = response.json()
if not posts:
return

data = {"creator": self.creator}
for post in posts:
data["id"] = text.parse_int(post["id"])

if post["type"] == 0:
data["type"] = "photo"
data["url"] = post["player"]
text.nameext_from_url(data["url"], data)

elif post["type"] == 1:
data["type"] = "video"
data["url"] = "ytdl:" + decode_video_url(
post["stream_url_play"])
text.nameext_from_url(data["url"], data)
data["extension"] = "mp4"

yield data
params["page"] += 1


class HotleakstvCategoryExtractor(HotleakstvExtractor):
"""Extractor for hotleaks.tv categories"""
subcategory = "category"
pattern = BASE_PATTERN + r"/(hot|creators|videos|photos)(?:/?\?([^#]+))?"
example = "https://hotleaks.tv/photos"

def __init__(self, match):
HotleakstvExtractor.__init__(self, match)
self._category, self.params = match.groups()

def items(self):
url = "{}/{}".format(self.root, self._category)

if self._category in ("hot", "creators"):
data = {"_extractor": HotleakstvCreatorExtractor}
elif self._category in ("videos", "photos"):
data = {"_extractor": HotleakstvPostExtractor}

for item in self._pagination(url, self.params):
yield Message.Queue, item, data


class HotleakstvSearchExtractor(HotleakstvExtractor):
"""Extractor for hotleaks.tv search results"""
subcategory = "search"
pattern = BASE_PATTERN + r"/search(?:/?\?([^#]+))"
example = "https://hotleaks.tv/search?search=QUERY"

def __init__(self, match):
HotleakstvExtractor.__init__(self, match)
self.params = match.group(1)

def items(self):
data = {"_extractor": HotleakstvCreatorExtractor}
for creator in self._pagination(self.root + "/search", self.params):
yield Message.Queue, creator, data
103 changes: 103 additions & 0 deletions test/results/hotleakstv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

from gallery_dl.extractor import hotleakstv
from gallery_dl import exception

__tests__ = (
{
"#url" : "https://hotleaks.tv/kaiyakawaii/photo/1617145",
"#category": ("", "hotleakstv", "post"),
"#class" : hotleakstv.HotleakstvPostExtractor,
"#pattern" : r"https://hotleaks\.tv/storage/images/3625/1617145/fefdd5988dfcf6b98cc9e11616018868\.jpg",

"id" : 1617145,
"creator" : "kaiyakawaii",
"type" : "photo",
"filename" : "fefdd5988dfcf6b98cc9e11616018868",
"extension": "jpg",
},

{
"#url" : "https://hotleaks.tv/lilmochidoll/video/1625538",
"#category": ("", "hotleakstv", "post"),
"#class" : hotleakstv.HotleakstvPostExtractor,
"#pattern" : r"ytdl:https://cdn12-leak\.camhdxx\.com/1661/1625538/index\.m3u8",

"id" : 1625538,
"creator" : "lilmochidoll",
"type" : "video",
"filename" : "index",
"extension": "mp4",
},

{
"#url" : "https://hotleaks.tv/kaiyakawaii",
"#category": ("", "hotleakstv", "creator"),
"#class" : hotleakstv.HotleakstvCreatorExtractor,
"#range" : "1-200",
"#count" : 200,
},

{
"#url" : "https://hotleaks.tv/stellaviolet",
"#category": ("", "hotleakstv", "creator"),
"#class" : hotleakstv.HotleakstvCreatorExtractor,
"#count" : "> 600",
},

{
"#url" : "https://hotleaks.tv/doesnotexist",
"#category": ("", "hotleakstv", "creator"),
"#class" : hotleakstv.HotleakstvCreatorExtractor,
"#exception": exception.NotFoundError,
},

{
"#url" : "https://hotleaks.tv/photos",
"#category": ("", "hotleakstv", "category"),
"#class" : hotleakstv.HotleakstvCategoryExtractor,
"#pattern" : hotleakstv.HotleakstvPostExtractor.pattern,
"#range" : "1-50",
"#count" : 50,
},

{
"#url" : "https://hotleaks.tv/videos",
"#category": ("", "hotleakstv", "category"),
"#class" : hotleakstv.HotleakstvCategoryExtractor,
},

{
"#url" : "https://hotleaks.tv/creators",
"#category": ("", "hotleakstv", "category"),
"#class" : hotleakstv.HotleakstvCategoryExtractor,
"#pattern" : hotleakstv.HotleakstvCreatorExtractor.pattern,
"#range" : "1-50",
"#count" : 50,
},

{
"#url" : "https://hotleaks.tv/hot",
"#category": ("", "hotleakstv", "category"),
"#class" : hotleakstv.HotleakstvCategoryExtractor,
},

{
"#url" : "https://hotleaks.tv/search?search=gallery-dl",
"#category": ("", "hotleakstv", "search"),
"#class" : hotleakstv.HotleakstvSearchExtractor,
"#count" : 0,
},

{
"#url" : "https://hotleaks.tv/search?search=hannah",
"#category": ("", "hotleakstv", "search"),
"#class" : hotleakstv.HotleakstvSearchExtractor,
"#count" : "> 30",
},

)
Loading