diff options
Diffstat (limited to 'users.py')
-rw-r--r--[-rwxr-xr-x] | users.py | 116 |
1 files changed, 77 insertions, 39 deletions
@@ -4,11 +4,12 @@ import tweepy from config import accounts -from db import open_db, LikeMemory, TrackMemory +from db import LikeMemory, TrackMemory from taste import analyse import os import pickle -from random import shuffle +from random import randint, shuffle +from time import sleep CACHE_FILENAME = 'ids.cache' @@ -24,8 +25,6 @@ class UsersListener(): self._api = api - open_db() - self._memory = LikeMemory(api) self._tracker = TrackMemory() @@ -65,15 +64,14 @@ class UsersListener(): print('[!] Error while receiving followers for %s...' % master) - - pickle.dump(ids, open(CACHE_FILENAME, 'wb')) - print('[i] Loaded %u accounts' % len(ids)) ids = list(set(ids)) print('[i] Kept %u accounts' % len(ids)) + pickle.dump(ids, open(CACHE_FILENAME, 'wb')) + # Remove all account natively followed already = [] @@ -93,54 +91,94 @@ class UsersListener(): def start(self, auth): """Start the listener.""" - while True: + accounts_reviewed = 0 + tweets_reviewed = 0 + tweets_liked = 0 - for uid in self._ids: + for uid in self._ids[:900]: - since = self._tracker.get_last_seen_for(uid) + since = self._tracker.get_last_seen_for(uid) - last = [] + last = [] - try: + try: + + last = self._api.user_timeline(uid, since) + + print('[*] Current search: %u account%s visited, %u tweet%s analyzed, %u tweet%s liked' \ + % (accounts_reviewed, 's' if accounts_reviewed > 1 else '', \ + tweets_reviewed, 's' if tweets_reviewed > 1 else '', \ + tweets_liked, 's' if tweets_liked > 1 else '')) + + + # Do not be so aggressive! + #tempo = randint(2, 7) + #print('[*] Enjoying a small break for %u seconds... %u account%s visited, %u tweet%s analyzed' \ + # % (tempo, accounts_reviewed, 's' if accounts_reviewed > 1 else '', \ + # tweets_reviewed, 's' if tweets_reviewed > 1 else '')) + #sleep(tempo) + + except tweepy.error.TweepError as e: + + # Private account + # tweepy.error.TweepError: Not authorized. + if e.response.status_code == 401: + pass + + # Blocked ! + # tweepy.error.TweepError: [{'message': 'To protect our users from spam and other malicious activity, this account is temporarily locked. Please log in to https://twitter.com to unlock your account.', 'code': 326}] + elif e.response.status_code == 403: + print(e, e.response.status_code) + assert(False) + + # Nothing new! + # tweepy.error.TweepError: [{'message': 'Sorry, that page does not exist.', 'code': 34}] + elif e.response.status_code == 404: + pass - last = self._api.user_timeline(uid, since) + # ??? + # tweepy.error.TweepError: [{'message': 'Internal error', 'code': 131}] + elif e.response.status_code == 500: + print('[!] Twitter internal error for uid=%u' % uid) - except tweepy.error.TweepError as e: + else: + print(e, e.response.status_code) + assert(False) - # Private account - # tweepy.error.TweepError: Not authorized. - if e.response.status_code == 401: - pass + first = None - # Nothing new! - # tweepy.error.TweepError: [{'message': 'Sorry, that page does not exist.', 'code': 34}] - elif e.response.status_code == 404: - pass + for status in last: - else: - print(e, e.response.status_code) - assert(False) + sid = status.id + uid = status.author.id + username = status.author.screen_name + displayed = status.author.name - first = None + if hasattr(status, 'lang'): + lang = status.lang + else: + lang = 'unknown' - for status in last: + while hasattr(status, 'retweeted_status'): + status = status.retweeted_status - sid = status.id - uid = status.author.id - username = status.author.screen_name + liked = analyse(sid, username, displayed, lang, status.text, self._api, self._memory) - while hasattr(status, 'retweeted_status'): - status = status.retweeted_status + if first is None: + first = uid, username, sid - analyse(sid, username, status.text, self._api, self._memory) + tweets_reviewed += 1 - if first is None: - first = uid, username, sid + # Do not spam users! + if liked: + tweets_liked += 1 + break - if not(first is None): - uid, username, sid = first - self._tracker.set_last_seen_for(uid, username, sid) + if not(first is None): + uid, username, sid = first + self._tracker.set_last_seen_for(uid, username, sid) + accounts_reviewed += 1 def listen_to_users(auth, api): @@ -171,7 +209,7 @@ def listen_to_users(auth, api): listener = UsersListener(api) - ####listener.start(auth) + listener.start(auth) #stream = Stream(auth, listener) #stream.filter(follow=new) |