summaryrefslogtreecommitdiff
path: root/users.py
diff options
context:
space:
mode:
Diffstat (limited to 'users.py')
-rwxr-xr-xusers.py177
1 files changed, 177 insertions, 0 deletions
diff --git a/users.py b/users.py
new file mode 100755
index 0000000..e61d3c2
--- /dev/null
+++ b/users.py
@@ -0,0 +1,177 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+
+import tweepy
+from config import accounts
+from db import open_db, LikeMemory, TrackMemory
+from taste import analyse
+import os
+import pickle
+from random import shuffle
+
+
+CACHE_FILENAME = 'ids.cache'
+
+
+class UsersListener():
+ """A listener handles tweets are the received from the stream."""
+
+ def __init__(self, api):
+ """Build the Python object."""
+
+ super().__init__()
+
+ self._api = api
+
+ open_db()
+
+ self._memory = LikeMemory(api)
+ self._tracker = TrackMemory()
+
+ self._compute_ids_to_follow(True)
+
+
+ def _compute_ids_to_follow(self, cached):
+ """Get the list of accounts to track."""
+
+ if not os.path.isfile(CACHE_FILENAME):
+ cached = False
+
+ if cached:
+
+ ids = pickle.load(open(CACHE_FILENAME, 'rb'))
+
+ print('[i] Reloaded %u accounts' % len(ids))
+
+ else:
+
+ ids = []
+
+ for master in accounts.split(' '):
+
+ count = 0
+
+ try:
+
+ for page in tweepy.Cursor(self._api.followers_ids, screen_name=master).pages():
+
+ count += len(page)
+ ids.extend(page)
+
+ print('[i] Got %u accounts following %s' % (count, master))
+
+ except:
+
+ print('[!] Error while receiving followers for %s...' % master)
+
+
+ pickle.dump(ids, open(CACHE_FILENAME, 'wb'))
+
+ print('[i] Loaded %u accounts' % len(ids))
+
+ ids = list(set(ids))
+
+ print('[i] Kept %u accounts' % len(ids))
+
+ # Remove all account natively followed
+
+ already = []
+
+ for page in tweepy.Cursor(self._api.followers_ids, screen_name=self._api.me().name).pages():
+ already.extend(page)
+
+ print('[i] I am followed by %u accounts' % len(already))
+
+ self._ids = [ x for x in ids if x not in already ]
+
+ shuffle(self._ids)
+
+ print('[i] Tracking %u accounts...' % len(self._ids))
+
+
+ def start(self, auth):
+ """Start the listener."""
+
+ while True:
+
+ for uid in self._ids:
+
+ since = self._tracker.get_last_seen_for(uid)
+
+ last = []
+
+ try:
+
+ last = self._api.user_timeline(uid, since)
+
+ except tweepy.error.TweepError as e:
+
+ # Private account
+ # tweepy.error.TweepError: Not authorized.
+ if e.response.status_code == 401:
+ pass
+
+ # Nothing new!
+ # tweepy.error.TweepError: [{'message': 'Sorry, that page does not exist.', 'code': 34}]
+ elif e.response.status_code == 404:
+ pass
+
+ else:
+ print(e, e.response.status_code)
+ assert(False)
+
+ first = None
+
+ for status in last:
+
+ sid = status.id
+ uid = status.author.id
+ username = status.author.screen_name
+
+ while hasattr(status, 'retweeted_status'):
+ status = status.retweeted_status
+
+ analyse(sid, username, status.text, self._api, self._memory)
+
+ if first is None:
+ first = uid, username, sid
+
+ if not(first is None):
+ uid, username, sid = first
+ self._tracker.set_last_seen_for(uid, username, sid)
+
+
+
+def listen_to_users(auth, api):
+ """Track all tweets written by users."""
+
+
+ data = api.rate_limit_status()
+
+ for c in data['resources'].keys():
+
+ print('%s' % c)
+
+ category = data['resources'][c]
+
+ for p in category.keys():
+
+ props = category[p]
+ changed = props['remaining'] != props['limit']
+
+ print(' %s %s: %d / %d' % ('!!' if changed else ' ', p, props['remaining'], props['limit']))
+
+
+
+
+
+
+ if True:
+
+ listener = UsersListener(api)
+
+ ####listener.start(auth)
+
+ #stream = Stream(auth, listener)
+ #stream.filter(follow=new)