Skip to content

Commit

Permalink
Merge branch '2.14.0-Beta'
Browse files Browse the repository at this point in the history
  • Loading branch information
ThioJoe committed Jan 26, 2022
2 parents acd7bb8 + a47a090 commit 70bedd9
Show file tree
Hide file tree
Showing 9 changed files with 446 additions and 177 deletions.
2 changes: 1 addition & 1 deletion Scripts/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def fetch_user():

if config == None:
configMatch = None # Used only if channel ID is set in the config
elif config and config['your_channel_id'] == "ask":
elif config['your_channel_id'] == "ask":
configMatch = None
elif validation.validate_channel_id(config['your_channel_id'])[0] == True:
if config['your_channel_id'] == channelID:
Expand Down
152 changes: 110 additions & 42 deletions Scripts/community_downloader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
# Modified from original at: https://github.com/egbertbouman/youtube-comment-downloader

from __future__ import print_function
from Scripts.shared_imports import *

import argparse
import io
Expand All @@ -14,6 +14,7 @@
import requests

YOUTUBE_VIDEO_URL = 'https://www.youtube.com/post/{youtube_id}'
YOUTUBE_COMMUNITY_TAB_URL = 'https://www.youtube.com/channel/{channel_id}/community'

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'

Expand Down Expand Up @@ -63,6 +64,38 @@ def get_post_channel_url(youtube_id):
except KeyError:
return None

# -----------------------------------------------------------------------------

def fetch_recent_community_posts(channel_id):
session = requests.Session()
session.headers['User-Agent'] = USER_AGENT
response = session.get(YOUTUBE_COMMUNITY_TAB_URL.format(channel_id=channel_id))

if 'uxe=' in response.request.url:
session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')
response = session.get(YOUTUBE_COMMUNITY_TAB_URL.format(channel_id=channel_id))

html = response.text
data = json.loads(regex_search(html, YT_INITIAL_DATA_RE, default=''))
section = next(search_dict(data, 'itemSectionRenderer'), None)
rawPosts = list(search_dict(section, 'backstagePostRenderer'))

recentPostsListofDicts = [] # Use list to keep in order - Puts post ID and sample of text into dictionary keypair, strips newlines
# Gets the Post IDs and sample of post text
for post in rawPosts:
id = post['postId']
try:
text = post['contentText']['runs'][0]['text'].strip().replace('\n', '').replace('\r', '')
except KeyError:
text = "[No Text For This Post]"
recentPostsListofDicts.append({id:text})

recentPostsListofDicts.reverse() # Reverse list so newest posts are first

return recentPostsListofDicts

# -----------------------------------------------------------------------------

def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.1):
session = requests.Session()
session.headers['User-Agent'] = USER_AGENT
Expand All @@ -86,6 +119,7 @@ def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.
renderer = next(search_dict(section, 'continuationItemRenderer'), None) if section else None
if not renderer:
# Comments disabled?
print("\nError: 'continuationItemRenderer' not found in page data. Are comments disabled?")
return

needs_sorting = sort_by != SORT_BY_POPULAR
Expand Down Expand Up @@ -118,17 +152,37 @@ def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.
# Process the 'Show more replies' button
continuations.append(next(search_dict(item, 'buttonRenderer'))['command'])

# Get total comments amount for post
try:
commentsHeader = list(search_dict(response, 'commentsHeaderRenderer'))
if commentsHeader:
postCommentsText = commentsHeader[0]['countText']['runs'][0]['text'].replace(',', '')
if 'k' in postCommentsText.lower():
totalPostComments = int(postCommentsText.replace('k', ''))*1000
else:
totalPostComments = int(postCommentsText)
else:
totalPostComments = None
except (KeyError, ValueError):
totalPostComments = -1

for comment in reversed(list(search_dict(response, 'commentRenderer'))):
yield {'cid': comment['commentId'],
'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
'time': comment['publishedTimeText']['runs'][0]['text'],
'author': comment.get('authorText', {}).get('simpleText', ''),
'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
'votes': comment.get('voteCount', {}).get('simpleText', '0'),
'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
'heart': next(search_dict(comment, 'isHearted'), False)}
# Yield instead of return, function called by for loop
yield {
'cid': comment['commentId'],
'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
'time': comment['publishedTimeText']['runs'][0]['text'],
'author': comment.get('authorText', {}).get('simpleText', ''),
'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
'votes': comment.get('voteCount', {}).get('simpleText', '0'),
'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
'heart': next(search_dict(comment, 'isHearted'), False),

time.sleep(sleep)
# Extra data not specific to comment:
'totalPostComments': totalPostComments
}

#time.sleep(sleep)


def search_dict(partial, search_key):
Expand All @@ -146,38 +200,52 @@ def search_dict(partial, search_key):
stack.append(value)


def main(communityPostID=None, limit=1000, sort=SORT_BY_RECENT, language=None):
try:
if not communityPostID:
raise ValueError('you need to specify a Youtube ID')

print('\nLoading Youtube comments for post:', communityPostID)
count = 0
sys.stdout.write(' Loaded %d comment(s)\r' % count)
sys.stdout.flush()
start_time = time.time()

commentsDict = {}
for comment in download_comments(communityPostID, sort, language):
commentID = comment['cid']
commentText = comment['text']
authorName = comment['author']
authorChannelID = comment['channel']
commentsDict[commentID] = {'commentText': commentText, 'authorName':authorName, 'authorChannelID':authorChannelID}

#comment_json = json.dumps(comment, ensure_ascii=False)
count += 1
sys.stdout.write(' Loaded %d comment(s)\r' % count)
sys.stdout.flush()
if limit and count >= limit:
break
print('\n[{:.2f} seconds] Done!'.format(time.time() - start_time))

return commentsDict

except Exception as e:
print('Error:', str(e))
sys.exit(1)
def main(communityPostID=None, limit=1000, sort=SORT_BY_RECENT, language=None, postScanProgressDict=None, postText=None):
if not communityPostID:
raise ValueError('you need to specify a Youtube ID')

if postScanProgressDict:
i = postScanProgressDict['scanned']
j = postScanProgressDict['total']
print(f'\n\n [{i}/{j}] Post ID: {communityPostID}')
else:
print(f'\n Loading Comments For Post: {communityPostID}')

if postText:
print(f" > {F.LIGHTCYAN_EX}Post Text Sample:{S.R} {postText[0:90]}")

count = 0
#print(f' > Loaded {F.YELLOW}{count}{S.R} comment(s)', end='\r')

totalComments = 0
commentsDict = {}
for comment in download_comments(communityPostID, sort, language):
commentID = comment['cid']
commentText = comment['text']
authorName = comment['author']
authorChannelID = comment['channel']
commentsDict[commentID] = {'commentText': commentText, 'authorName':authorName, 'authorChannelID':authorChannelID}

# Print Stats
count += 1

# Doesn't return a number after first page, so don't update after that
if comment['totalPostComments']:
totalComments = comment['totalPostComments']

if totalComments >= 0:
percent = ((count / totalComments) * 100)
progressStats = f"[ {str(count)} / {str(totalComments)} ]".ljust(15, " ") + f" ({percent:.2f}%)"
print(f' > Retrieving Post Comments - {progressStats}', end='\r')
else:
print(f' > Loaded {F.YELLOW}{count}{S.R} comment(s)', end='\r')

if limit and count >= limit:
print(" ")
break

print(" ")
return commentsDict


if __name__ == "__main__":
Expand Down
50 changes: 34 additions & 16 deletions Scripts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ def update_last_checked():
else:
return spamListDict
latestRelease = response.json()["tag_name"]
except OSError as ox:
if silentCheck == True:
return spamListDict
else:
if "WinError 10013" in str(ox):
print(f"{B.RED}{F.WHITE}WinError 10013:{S.R} The OS blocked the connection to GitHub. Check your firewall settings.\n")
return False
except:
if silentCheck == True:
return spamListDict
Expand Down Expand Up @@ -137,34 +144,40 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):
if silentCheck == False:
print(f"\n{B.RED}{F.WHITE}Error [U-4]:{S.R} Got an 403 (ratelimit_reached) when attempting to check for update.")
print(f"This means you have been {F.YELLOW}rate limited by github.com{S.R}. Please try again in a while.\n")
return False
else:
return False
print(f"\n{B.RED}{F.WHITE}Error [U-4]:{S.R} Got an 403 (ratelimit_reached) when attempting to check for update.")
return None

else:
if silentCheck == False:
print(f"{B.RED}{F.WHITE}Error [U-3]:{S.R} Got non 200 status code (got: {response.status_code}) when attempting to check for update.\n")
print(f"If this keeps happening, you may want to report the issue here: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
if silentCheck == False:
return False
else:
return False
print(f"{B.RED}{F.WHITE}Error [U-3]:{S.R} Got non 200 status code (got: {response.status_code}) when attempting to check for update.\n")
return None

else:
# assume 200 response
# assume 200 response (good)
if updateReleaseChannel == "stable":
latestVersion = response.json()["name"]
isBeta = False
elif updateReleaseChannel == "all":
latestVersion = response.json()[0]["name"]
isBeta = response.json()[0]["prerelease"]

except OSError as ox:
if "WinError 10013" in str(ox):
print(f"{B.RED}{F.WHITE}WinError 10013:{S.R} The OS blocked the connection to GitHub. Check your firewall settings.\n")
else:
print(f"{B.RED}{F.WHITE}Unknown OSError{S.R} Error occurred while checking for updates\n")
return None
except Exception as e:
if silentCheck == False:
print(e + "\n")
print(f"{B.RED}{F.WHITE}Error [Code U-1]:{S.R} Problem while checking for updates. See above error for more details.\n")
print("If this keeps happening, you may want to report the issue here: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
return False
elif silentCheck == True:
return False
print(f"{B.RED}{F.WHITE}Error [Code U-1]:{S.R} Unknown problem while checking for updates. See above error for more details.\n")
return None

if parse_version(latestVersion) > parse_version(currentVersion):
isUpdateAvailable = True
Expand Down Expand Up @@ -286,7 +299,7 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):

else:
# We do this because we pull the .exe for windows, but maybe we could use os.system('git pull')? Because this is a GIT repo, unlike the windows version
print(f"> {F.RED} Error:{S.R} You are using an unsupported os for the autoupdater (macos/linux). \n This updater only supports Windows (right now) Feel free to get the files from github: https://github.com/ThioJoe/YT-Spammer-Purge")
print(f"> {F.RED} Error:{S.R} You are using an unsupported OS for the autoupdater (macos/linux). \n This updater only supports Windows (right now). Feel free to get the files from github: https://github.com/ThioJoe/YT-Spammer-Purge")
return False
elif userChoice == "False" or userChoice == None:
return False
Expand All @@ -297,13 +310,11 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):
elif parse_version(latestVersion) == parse_version(currentVersion):
if silentCheck == False:
print(f"\nYou have the {F.LIGHTGREEN_EX}latest{S.R} version: {F.LIGHTGREEN_EX}" + currentVersion)
return False
return False
else:
if silentCheck == False:
print("\nNo newer release available - Your Version: " + currentVersion + " -- Latest Version: " + latestVersion)
return False
elif silentCheck == True:
return isUpdateAvailable
return False


######################### Try To Get Remote File ##########################
Expand Down Expand Up @@ -520,8 +531,15 @@ def list_config_files(relativePath=None):
# Only exact matches, no backups
if file.lower() == "spampurgeconfig" + match + ".ini":
fileList.append(file)
except:
pass
except AttributeError as ax:
if "NoneType" in str(ax):
pass
else:
traceback.print_exc()
print("--------------------------------------------------------------------------------")
print("Something went wrong when getting list of config files. Check your regex.")
input("\nPress Enter to exit...")
sys.exit()

return fileList

Expand Down
4 changes: 2 additions & 2 deletions Scripts/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def print_prepared_comments(current, scanVideoID, comments, j, loggingEnabled, s
add_sample(current, author_id_local, author, text, matchReason)

# Build comment direct link
if scanMode == "communityPost":
if scanMode == "communityPost" or scanMode == "recentCommunityPosts":
directLink = "https://www.youtube.com/post/" + videoID + "?lc=" + comment_id_local
else:
directLink = "https://www.youtube.com/watch?v=" + videoID + "&lc=" + comment_id_local
Expand Down Expand Up @@ -586,7 +586,7 @@ def prepare_logFile_settings(current, config, miscData, jsonSettingsDict, filter

# Set where to put log files
defaultLogPath = "logs"
if config and config['log_path']:
if config['log_path']:
if config['log_path'] == "default": # For backwards compatibility, can remove later on
logPath = defaultLogPath
else:
Expand Down
Loading

0 comments on commit 70bedd9

Please sign in to comment.