diff options
Diffstat (limited to 'users.py')
-rwxr-xr-x | users.py | 177 |
1 files changed, 177 insertions, 0 deletions
diff --git a/users.py b/users.py new file mode 100755 index 0000000..e61d3c2 --- /dev/null +++ b/users.py @@ -0,0 +1,177 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + + +import tweepy +from config import accounts +from db import open_db, LikeMemory, TrackMemory +from taste import analyse +import os +import pickle +from random import shuffle + + +CACHE_FILENAME = 'ids.cache' + + +class UsersListener(): + """A listener handles tweets are the received from the stream.""" + + def __init__(self, api): + """Build the Python object.""" + + super().__init__() + + self._api = api + + open_db() + + self._memory = LikeMemory(api) + self._tracker = TrackMemory() + + self._compute_ids_to_follow(True) + + + def _compute_ids_to_follow(self, cached): + """Get the list of accounts to track.""" + + if not os.path.isfile(CACHE_FILENAME): + cached = False + + if cached: + + ids = pickle.load(open(CACHE_FILENAME, 'rb')) + + print('[i] Reloaded %u accounts' % len(ids)) + + else: + + ids = [] + + for master in accounts.split(' '): + + count = 0 + + try: + + for page in tweepy.Cursor(self._api.followers_ids, screen_name=master).pages(): + + count += len(page) + ids.extend(page) + + print('[i] Got %u accounts following %s' % (count, master)) + + except: + + print('[!] Error while receiving followers for %s...' % master) + + + pickle.dump(ids, open(CACHE_FILENAME, 'wb')) + + print('[i] Loaded %u accounts' % len(ids)) + + ids = list(set(ids)) + + print('[i] Kept %u accounts' % len(ids)) + + # Remove all account natively followed + + already = [] + + for page in tweepy.Cursor(self._api.followers_ids, screen_name=self._api.me().name).pages(): + already.extend(page) + + print('[i] I am followed by %u accounts' % len(already)) + + self._ids = [ x for x in ids if x not in already ] + + shuffle(self._ids) + + print('[i] Tracking %u accounts...' % len(self._ids)) + + + def start(self, auth): + """Start the listener.""" + + while True: + + for uid in self._ids: + + since = self._tracker.get_last_seen_for(uid) + + last = [] + + try: + + last = self._api.user_timeline(uid, since) + + except tweepy.error.TweepError as e: + + # Private account + # tweepy.error.TweepError: Not authorized. + if e.response.status_code == 401: + pass + + # Nothing new! + # tweepy.error.TweepError: [{'message': 'Sorry, that page does not exist.', 'code': 34}] + elif e.response.status_code == 404: + pass + + else: + print(e, e.response.status_code) + assert(False) + + first = None + + for status in last: + + sid = status.id + uid = status.author.id + username = status.author.screen_name + + while hasattr(status, 'retweeted_status'): + status = status.retweeted_status + + analyse(sid, username, status.text, self._api, self._memory) + + if first is None: + first = uid, username, sid + + if not(first is None): + uid, username, sid = first + self._tracker.set_last_seen_for(uid, username, sid) + + + +def listen_to_users(auth, api): + """Track all tweets written by users.""" + + + data = api.rate_limit_status() + + for c in data['resources'].keys(): + + print('%s' % c) + + category = data['resources'][c] + + for p in category.keys(): + + props = category[p] + changed = props['remaining'] != props['limit'] + + print(' %s %s: %d / %d' % ('!!' if changed else ' ', p, props['remaining'], props['limit'])) + + + + + + + if True: + + listener = UsersListener(api) + + ####listener.start(auth) + + #stream = Stream(auth, listener) + #stream.filter(follow=new) |