Skip to content

Commit

Permalink
Fix spam thread checking behavior
Browse files Browse the repository at this point in the history
The program's false-positive check was interfering with the ability to catch many spam bot threads. This should remedy that.
  • Loading branch information
ThioJoe committed Oct 26, 2022
1 parent 0d8a037 commit c4b4f6a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Scripts/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,7 @@ def check_against_filter(current, filtersDict, miscData, config, currentCommentD

# Spam Lists
spamListCombinedRegex = smartFilter['spamListCombinedRegex']
spamThreadsRegex = smartFilter['spamThreadsRegex']

# if debugSingleComment == True:
# if input("Sensitive True/False: ").lower() == 'true': sensitive = True
Expand Down Expand Up @@ -937,6 +938,10 @@ def multiVarDetect(text, username):
processedText = commentText
upLowTextSet = set(processedText)

# Run Spam Thread specific check first
if spamThreadsRegex.search(commentTextNormalized.lower()):
add_spam(current, config, miscData, currentCommentDict, videoID)

# Run Checks
if authorChannelID == parentAuthorChannelID:
pass
Expand Down
6 changes: 5 additions & 1 deletion Scripts/prepare_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,16 +400,19 @@ def prepare_filter_mode_smart(scanMode, config, miscData, sensitive=False):
print(" Loading Filters [=================== ]", end="\r")

spamListExpressionsList = []
spamThreadsExpressionsList = []
# Prepare spam domain regex
for domain in spamDomainsList:
spamListExpressionsList.append(confusable_regex(domain.upper().replace(".", "⚫"), include_character_padding=False).replace("(?:⚫)", "(?:[^a-zA-Z0-9 ]{1,2})"))
for account in spamAccountsList:
spamListExpressionsList.append(confusable_regex(account.upper(), include_character_padding=True).replace(m, a))
for spamName in spamThreadsList:
#spamListExpressionsList.append(confusable_regex(thread.upper(), include_character_padding=True).replace(m, a)) #With Confusables
spamListExpressionsList.append(re.escape(spamName.lower())) #Exact lowercase match
spamThreadsExpressionsList.append(re.escape(spamName.lower())) #Exact lowercase match
print(" Loading Filters [====================== ]", end="\r")
spamListCombinedRegex = re.compile('|'.join(spamListExpressionsList))
print(" Loading Filters [========================= ]", end="\r")
spamThreadsRegex = re.compile('|'.join(spamThreadsExpressionsList))

# Prepare Multi Language Detection
turkish = 'Ç窺Ğğİ'
Expand Down Expand Up @@ -445,6 +448,7 @@ def prepare_filter_mode_smart(scanMode, config, miscData, sensitive=False):
'sensitiveRootDomainRegex': sensitiveRootDomainRegex,
'unicodeCategoriesStrip': unicodeCategoriesStrip,
'spamListCombinedRegex': spamListCombinedRegex,
'spamThreadsRegex': spamThreadsRegex,
'threadFiltersDict': threadFiltersDict,
'accompanyingLinkSpamDict': accompanyingLinkSpamDict,
'comboDict': filter.comboDict
Expand Down

0 comments on commit c4b4f6a

Please sign in to comment.