Filtered some kinds of spam.

author: Cyrille Bagard <nocbos@gmail.com> 2017-11-25 10:55:08 (GMT)
committer: Cyrille Bagard <nocbos@gmail.com> 2017-11-25 10:55:08 (GMT)
commit: b5c4d532c34ed4d3ac03622b1e249a5d323afd33 (patch)
tree: ed89f8d886ee5d6542acecc31336c07f0412c0d7
parent: 8c7ba43ddcf5ac6264bd5e85b457025af9fb506a (diff)
2 files changed, 26 insertions, 1 deletions
diff --git a/config.py b/config.py
index 4d6f75d..daaff2f 100644
--- a/config.py
+++ b/config.py
@@ -35,3 +35,6 @@ banned_accounts = 'cnn'
 banned_accounts_re = '.*bot .*Bot .*career.* .*Career.* .*_jobs .*_Jobs'
 
 banned_titles_re = '.*Parts.* .*Jobs.*'
+
+# Threshold of accepted uppercased words
+sensitive_ratio = 50.0
diff --git a/taste.py b/taste.py
index 95139b4..069e273 100644
--- a/taste.py
+++ b/taste.py
@@ -7,6 +7,7 @@ import sys
 import tweepy
 from config import accepted_languages, white_kwds, cs_white_kwds, black_kwds, cs_black_kwds
 from config import banned_accounts, banned_accounts_re, banned_titles_re
+from config import sensitive_ratio
 from random import randint
 from time import sleep
 
@@ -70,6 +71,25 @@ def is_blacklisted(username, displayed):
     return result
 
 
+def is_spam(content):
+    """Define if a given content is suitable or not."""
+
+    keywords = content.split(' ')
+
+    uc_counter = 0
+
+    for kw in keywords:
+
+        if kw == kw.upper():
+            uc_counter += 1
+
+    ratio = (uc_counter * 100.0) / len(keywords)
+
+    result = (ratio > sensitive_ratio)
+
+    return result
+
+
 def analyse(sid, username, displayed, lang, content, api, memory):
     """Analyse a Tweet content."""
 
@@ -77,7 +97,9 @@ def analyse(sid, username, displayed, lang, content, api, memory):
 
     liked = False
 
-    if not is_blacklisted(username, displayed) and (lang in accepted_languages.split(' ')):
+    if not is_blacklisted(username, displayed) \
+       and not is_spam(content) \
+       and (lang in accepted_languages.split(' ')):
 
         like = False
author	Cyrille Bagard <nocbos@gmail.com>	2017-11-25 10:55:08 (GMT)
committer	Cyrille Bagard <nocbos@gmail.com>	2017-11-25 10:55:08 (GMT)
commit	b5c4d532c34ed4d3ac03622b1e249a5d323afd33 (patch)
tree	ed89f8d886ee5d6542acecc31336c07f0412c0d7
parent	8c7ba43ddcf5ac6264bd5e85b457025af9fb506a (diff)