summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--config.py3
-rw-r--r--taste.py24
2 files changed, 26 insertions, 1 deletions
diff --git a/config.py b/config.py
index 4d6f75d..daaff2f 100644
--- a/config.py
+++ b/config.py
@@ -35,3 +35,6 @@ banned_accounts = 'cnn'
banned_accounts_re = '.*bot .*Bot .*career.* .*Career.* .*_jobs .*_Jobs'
banned_titles_re = '.*Parts.* .*Jobs.*'
+
+# Threshold of accepted uppercased words
+sensitive_ratio = 50.0
diff --git a/taste.py b/taste.py
index 95139b4..069e273 100644
--- a/taste.py
+++ b/taste.py
@@ -7,6 +7,7 @@ import sys
import tweepy
from config import accepted_languages, white_kwds, cs_white_kwds, black_kwds, cs_black_kwds
from config import banned_accounts, banned_accounts_re, banned_titles_re
+from config import sensitive_ratio
from random import randint
from time import sleep
@@ -70,6 +71,25 @@ def is_blacklisted(username, displayed):
return result
+def is_spam(content):
+ """Define if a given content is suitable or not."""
+
+ keywords = content.split(' ')
+
+ uc_counter = 0
+
+ for kw in keywords:
+
+ if kw == kw.upper():
+ uc_counter += 1
+
+ ratio = (uc_counter * 100.0) / len(keywords)
+
+ result = (ratio > sensitive_ratio)
+
+ return result
+
+
def analyse(sid, username, displayed, lang, content, api, memory):
"""Analyse a Tweet content."""
@@ -77,7 +97,9 @@ def analyse(sid, username, displayed, lang, content, api, memory):
liked = False
- if not is_blacklisted(username, displayed) and (lang in accepted_languages.split(' ')):
+ if not is_blacklisted(username, displayed) \
+ and not is_spam(content) \
+ and (lang in accepted_languages.split(' ')):
like = False