Merge branch '2.14.0-Beta'

ThioJoe · Jan 26, 2022 · 70bedd9 · 70bedd9
2 parents acd7bb8 + a47a090
commit 70bedd9
Show file tree

Hide file tree

Showing 9 changed files with 446 additions and 177 deletions.
diff --git a/Scripts/auth.py b/Scripts/auth.py
@@ -155,7 +155,7 @@ def fetch_user():
 
   if config == None:
     configMatch = None # Used only if channel ID is set in the config
-  elif config and config['your_channel_id'] == "ask":
+  elif config['your_channel_id'] == "ask":
     configMatch = None
   elif validation.validate_channel_id(config['your_channel_id'])[0] == True:
     if config['your_channel_id'] == channelID:

diff --git a/Scripts/community_downloader.py b/Scripts/community_downloader.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # Modified from original at: https://github.com/egbertbouman/youtube-comment-downloader
-
 from __future__ import print_function
+from Scripts.shared_imports import *
 
 import argparse
 import io
@@ -14,6 +14,7 @@
 import requests
 
 YOUTUBE_VIDEO_URL = 'https://www.youtube.com/post/{youtube_id}'
+YOUTUBE_COMMUNITY_TAB_URL = 'https://www.youtube.com/channel/{channel_id}/community'
 
 USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
 
@@ -63,6 +64,38 @@ def get_post_channel_url(youtube_id):
     except KeyError:
         return None
 
+# -----------------------------------------------------------------------------
+
+def fetch_recent_community_posts(channel_id):
+    session = requests.Session()
+    session.headers['User-Agent'] = USER_AGENT
+    response = session.get(YOUTUBE_COMMUNITY_TAB_URL.format(channel_id=channel_id))
+
+    if 'uxe=' in response.request.url:
+        session.cookies.set('CONSENT', 'YES+cb', domain='.youtube.com')
+        response = session.get(YOUTUBE_COMMUNITY_TAB_URL.format(channel_id=channel_id))
+
+    html = response.text
+    data = json.loads(regex_search(html, YT_INITIAL_DATA_RE, default=''))
+    section = next(search_dict(data, 'itemSectionRenderer'), None)
+    rawPosts = list(search_dict(section, 'backstagePostRenderer'))
+
+    recentPostsListofDicts = [] # Use list to keep in order - Puts post ID and sample of text into dictionary keypair, strips newlines
+    # Gets the Post IDs and sample of post text
+    for post in rawPosts:
+        id = post['postId']
+        try:
+            text = post['contentText']['runs'][0]['text'].strip().replace('\n', '').replace('\r', '')
+        except KeyError:
+            text = "[No Text For This Post]"
+        recentPostsListofDicts.append({id:text})
+
+    recentPostsListofDicts.reverse() # Reverse list so newest posts are first
+
+    return recentPostsListofDicts
+
+# -----------------------------------------------------------------------------        
+
 def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.1):
     session = requests.Session()
     session.headers['User-Agent'] = USER_AGENT
@@ -86,6 +119,7 @@ def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.
     renderer = next(search_dict(section, 'continuationItemRenderer'), None) if section else None
     if not renderer:
         # Comments disabled?
+        print("\nError: 'continuationItemRenderer' not found in page data. Are comments disabled?")
         return
 
     needs_sorting = sort_by != SORT_BY_POPULAR
@@ -118,17 +152,37 @@ def download_comments(youtube_id, sort_by=SORT_BY_RECENT, language=None, sleep=.
                     # Process the 'Show more replies' button
                     continuations.append(next(search_dict(item, 'buttonRenderer'))['command'])
 
+        # Get total comments amount for post
+        try:
+            commentsHeader = list(search_dict(response, 'commentsHeaderRenderer'))
+            if commentsHeader:
+                postCommentsText = commentsHeader[0]['countText']['runs'][0]['text'].replace(',', '')
+                if 'k' in postCommentsText.lower():
+                    totalPostComments = int(postCommentsText.replace('k', ''))*1000
+                else:
+                    totalPostComments = int(postCommentsText)
+            else:
+                totalPostComments = None
+        except (KeyError, ValueError):
+            totalPostComments = -1
+
         for comment in reversed(list(search_dict(response, 'commentRenderer'))):
-            yield {'cid': comment['commentId'],
-                   'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
-                   'time': comment['publishedTimeText']['runs'][0]['text'],
-                   'author': comment.get('authorText', {}).get('simpleText', ''),
-                   'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
-                   'votes': comment.get('voteCount', {}).get('simpleText', '0'),
-                   'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
-                   'heart': next(search_dict(comment, 'isHearted'), False)}
+            # Yield instead of return, function called by for loop
+            yield {
+                'cid': comment['commentId'],
+                'text': ''.join([c['text'] for c in comment['contentText'].get('runs', [])]),
+                'time': comment['publishedTimeText']['runs'][0]['text'],
+                'author': comment.get('authorText', {}).get('simpleText', ''),
+                'channel': comment['authorEndpoint']['browseEndpoint'].get('browseId', ''),
+                'votes': comment.get('voteCount', {}).get('simpleText', '0'),
+                'photo': comment['authorThumbnail']['thumbnails'][-1]['url'],
+                'heart': next(search_dict(comment, 'isHearted'), False),
 
-        time.sleep(sleep)
+                # Extra data not specific to comment:
+                'totalPostComments': totalPostComments
+                }
+
+        #time.sleep(sleep)
 
 
 def search_dict(partial, search_key):
@@ -146,38 +200,52 @@ def search_dict(partial, search_key):
                 stack.append(value)
 
 
-def main(communityPostID=None, limit=1000, sort=SORT_BY_RECENT, language=None):
-    try:
-        if not communityPostID:
-            raise ValueError('you need to specify a Youtube ID')
-
-        print('\nLoading Youtube comments for post:', communityPostID)
-        count = 0
-        sys.stdout.write(' Loaded %d comment(s)\r' % count)
-        sys.stdout.flush()
-        start_time = time.time()
-
-        commentsDict = {}
-        for comment in download_comments(communityPostID, sort, language):
-            commentID = comment['cid']
-            commentText = comment['text']
-            authorName = comment['author']
-            authorChannelID = comment['channel']
-            commentsDict[commentID] = {'commentText': commentText, 'authorName':authorName, 'authorChannelID':authorChannelID}
-
-            #comment_json = json.dumps(comment, ensure_ascii=False)
-            count += 1
-            sys.stdout.write(' Loaded %d comment(s)\r' % count)
-            sys.stdout.flush()
-            if limit and count >= limit:
-                break
-        print('\n[{:.2f} seconds] Done!'.format(time.time() - start_time))
-
-        return commentsDict
-
-    except Exception as e:
-        print('Error:', str(e))
-        sys.exit(1)
+def main(communityPostID=None, limit=1000, sort=SORT_BY_RECENT, language=None, postScanProgressDict=None, postText=None):
+    if not communityPostID:
+        raise ValueError('you need to specify a Youtube ID')
+
+    if postScanProgressDict:
+        i = postScanProgressDict['scanned']
+        j = postScanProgressDict['total']
+        print(f'\n\n [{i}/{j}] Post ID: {communityPostID}')
+    else:
+        print(f'\n Loading Comments For Post: {communityPostID}')
+
+    if postText:
+            print(f"    >  {F.LIGHTCYAN_EX}Post Text Sample:{S.R} {postText[0:90]}")
+
+    count = 0
+    #print(f'    >  Loaded {F.YELLOW}{count}{S.R} comment(s)', end='\r')
+
+    totalComments = 0
+    commentsDict = {}
+    for comment in download_comments(communityPostID, sort, language):
+        commentID = comment['cid']
+        commentText = comment['text']
+        authorName = comment['author']
+        authorChannelID = comment['channel']
+        commentsDict[commentID] = {'commentText': commentText, 'authorName':authorName, 'authorChannelID':authorChannelID}
+
+        # Print Stats
+        count += 1
+
+        # Doesn't return a number after first page, so don't update after that
+        if comment['totalPostComments']:
+            totalComments = comment['totalPostComments']
+
+        if totalComments >= 0:
+            percent = ((count / totalComments) * 100)
+            progressStats = f"[ {str(count)} / {str(totalComments)} ]".ljust(15, " ") + f" ({percent:.2f}%)"
+            print(f'    >  Retrieving Post Comments - {progressStats}', end='\r')
+        else: 
+            print(f'    >  Loaded {F.YELLOW}{count}{S.R} comment(s)', end='\r')
+
+        if limit and count >= limit:
+            print("                                                                                 ")
+            break
+
+    print("                                                                                 ")
+    return commentsDict
 
 
 if __name__ == "__main__":

diff --git a/Scripts/files.py b/Scripts/files.py
@@ -65,6 +65,13 @@ def update_last_checked():
         else:
           return spamListDict
     latestRelease = response.json()["tag_name"]
+  except OSError as ox:
+    if silentCheck == True:
+      return spamListDict
+    else:
+      if "WinError 10013" in str(ox):
+        print(f"{B.RED}{F.WHITE}WinError 10013:{S.R} The OS blocked the connection to GitHub. Check your firewall settings.\n")
+        return False
   except:
     if silentCheck == True:
       return spamListDict
@@ -137,34 +144,40 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):
         if silentCheck == False:
           print(f"\n{B.RED}{F.WHITE}Error [U-4]:{S.R} Got an 403 (ratelimit_reached) when attempting to check for update.")
           print(f"This means you have been {F.YELLOW}rate limited by github.com{S.R}. Please try again in a while.\n")
-          return False
         else:
-          return False
+          print(f"\n{B.RED}{F.WHITE}Error [U-4]:{S.R} Got an 403 (ratelimit_reached) when attempting to check for update.")
+        return None
+
       else:
         if silentCheck == False:
           print(f"{B.RED}{F.WHITE}Error [U-3]:{S.R} Got non 200 status code (got: {response.status_code}) when attempting to check for update.\n")
           print(f"If this keeps happening, you may want to report the issue here: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
-          if silentCheck == False:
-            return False
         else:
-          return False
+          print(f"{B.RED}{F.WHITE}Error [U-3]:{S.R} Got non 200 status code (got: {response.status_code}) when attempting to check for update.\n")
+        return None
+
     else:
-      # assume 200 response
+      # assume 200 response (good)
       if updateReleaseChannel == "stable":
         latestVersion = response.json()["name"]
         isBeta = False
       elif updateReleaseChannel == "all":
         latestVersion = response.json()[0]["name"]
         isBeta = response.json()[0]["prerelease"]
-
+  except OSError as ox:
+    if "WinError 10013" in str(ox):
+      print(f"{B.RED}{F.WHITE}WinError 10013:{S.R} The OS blocked the connection to GitHub. Check your firewall settings.\n")
+    else:
+      print(f"{B.RED}{F.WHITE}Unknown OSError{S.R} Error occurred while checking for updates\n")
+    return None
   except Exception as e:
     if silentCheck == False:
       print(e + "\n")
       print(f"{B.RED}{F.WHITE}Error [Code U-1]:{S.R} Problem while checking for updates. See above error for more details.\n")
       print("If this keeps happening, you may want to report the issue here: https://github.com/ThioJoe/YT-Spammer-Purge/issues")
-      return False
     elif silentCheck == True:
-      return False
+      print(f"{B.RED}{F.WHITE}Error [Code U-1]:{S.R} Unknown problem while checking for updates. See above error for more details.\n")
+    return None
 
   if parse_version(latestVersion) > parse_version(currentVersion):
     isUpdateAvailable = True
@@ -286,7 +299,7 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):
 
         else:
           # We do this because we pull the .exe for windows, but maybe we could use os.system('git pull')? Because this is a GIT repo, unlike the windows version
-          print(f"> {F.RED} Error:{S.R} You are using an unsupported os for the autoupdater (macos/linux). \n This updater only supports Windows (right now) Feel free to get the files from github: https://github.com/ThioJoe/YT-Spammer-Purge")
+          print(f"> {F.RED} Error:{S.R} You are using an unsupported OS for the autoupdater (macos/linux). \n This updater only supports Windows (right now). Feel free to get the files from github: https://github.com/ThioJoe/YT-Spammer-Purge")
           return False
       elif userChoice == "False" or userChoice == None:
         return False
@@ -297,13 +310,11 @@ def check_for_update(currentVersion, updateReleaseChannel, silentCheck=False):
   elif parse_version(latestVersion) == parse_version(currentVersion):
     if silentCheck == False:
       print(f"\nYou have the {F.LIGHTGREEN_EX}latest{S.R} version: {F.LIGHTGREEN_EX}" + currentVersion)
-      return False
+    return False
   else:
     if silentCheck == False:
       print("\nNo newer release available - Your Version: " + currentVersion + "  --  Latest Version: " + latestVersion)
-      return False
-    elif silentCheck == True:
-      return isUpdateAvailable
+    return False
 
 
 ######################### Try To Get Remote File ##########################
@@ -520,8 +531,15 @@ def list_config_files(relativePath=None):
       # Only exact matches, no backups
       if file.lower() == "spampurgeconfig" + match + ".ini":
         fileList.append(file)
-    except:
-      pass
+    except AttributeError as ax:
+      if "NoneType" in str(ax):
+        pass
+      else:
+        traceback.print_exc()
+        print("--------------------------------------------------------------------------------")
+        print("Something went wrong when getting list of config files. Check your regex.")
+        input("\nPress Enter to exit...")
+        sys.exit()
 
   return fileList
 

diff --git a/Scripts/logging.py b/Scripts/logging.py
@@ -127,7 +127,7 @@ def print_prepared_comments(current, scanVideoID, comments, j, loggingEnabled, s
       add_sample(current, author_id_local, author, text, matchReason)
 
     # Build comment direct link
-    if scanMode == "communityPost":
+    if scanMode == "communityPost" or scanMode == "recentCommunityPosts":
       directLink = "https://www.youtube.com/post/" + videoID + "?lc=" + comment_id_local
     else:
       directLink = "https://www.youtube.com/watch?v=" + videoID + "&lc=" + comment_id_local
@@ -586,7 +586,7 @@ def prepare_logFile_settings(current, config, miscData, jsonSettingsDict, filter
 
   # Set where to put log files
   defaultLogPath = "logs"
-  if config and config['log_path']:
+  if config['log_path']:
     if config['log_path'] == "default": # For backwards compatibility, can remove later on
       logPath = defaultLogPath
     else: