diff options
| -rw-r--r-- | .gitignore | 10 | ||||
| -rw-r--r-- | config.py | 5 | ||||
| -rw-r--r-- | db.py | 103 | ||||
| -rwxr-xr-x | htt.py | 40 | ||||
| -rw-r--r-- | taste.py | 76 | ||||
| -rwxr-xr-x | users.py | 177 | 
6 files changed, 392 insertions, 19 deletions
| diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..af9f4aa --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# -*- mode: sh -*- + +# Emacs +*~ + +# Python +__pycache__ + +# Misc +ids.cache @@ -5,8 +5,11 @@  # List of space-separated hashtags to follow, for instance #python #bot  hashtags = '#python #bot' -# Keywords to find in Tweets we want to highlight +# Keywords to find in Tweets we want to highlight; underscores will be replaced by spaces.  white_kwds = 'you got the idea'  # Age of old Tweets to get purged in days  max_age = 14 + +# List of space-separated accounts to follow +accounts = 'laughing_bit' @@ -9,15 +9,25 @@ import time  import tweepy +_db = None + +def open_db(): +    """Open the database.""" + +    global _db + +    _db = sqlite3.connect('HTT.db', detect_types=sqlite3.PARSE_DECLTYPES) + +  class LikeMemory():      """Track all liked Tweets."""      def __init__(self, api):          """Build the Python object.""" -        self._api = api +        global _db -        self._db = sqlite3.connect('HTT.db', detect_types=sqlite3.PARSE_DECLTYPES) +        self._api = api          sqlite3.register_adapter(bool, int)          sqlite3.register_converter("BOOLEAN", lambda v: bool(int(v))) @@ -32,9 +42,9 @@ class LikeMemory():              )          ''' -        cursor = self._db.cursor() +        cursor = _db.cursor()          cursor.execute(sql) -        self._db.commit() +        _db.commit()      def _compute_content_fingerprint(self, content): @@ -78,17 +88,19 @@ class LikeMemory():              if pos != -1:                  base = base[:pos] -        return hashlib.md5(base.rstrip(' ').encode('utf-8')).hexdigest() +        return hashlib.md5(base.rstrip(' ').lower().encode('utf-8')).hexdigest()      def is_original_content(self, content):          """Ensure that a given content has never been seen.""" +        global _db +          fingerprint = self._compute_content_fingerprint(content)          values = (fingerprint, ) -        cursor = self._db.cursor() +        cursor = _db.cursor()          cursor.execute('SELECT sid FROM LikedTweets WHERE fingerprint = ?', values)          found = cursor.fetchone() @@ -99,25 +111,29 @@ class LikeMemory():      def save_liked_status(self, sid, username, content):          """Remember a given liked status.""" +        global _db +          fingerprint = self._compute_content_fingerprint(content)          timestamp = int(time.time())          values = (sid, username, fingerprint, timestamp, False) -        cursor = self._db.cursor() +        cursor = _db.cursor()          cursor.execute('INSERT INTO LikedTweets VALUES (?, ?, ?, ?, ?)', values) -        self._db.commit() +        _db.commit()      def purge_old_status(self):          """Purge old seen statuses.""" +        global _db +          timestamp = int(time.time()) - max_age * 24 * 60 * 60          values = (timestamp, False) -        cursor = self._db.cursor() +        cursor = _db.cursor()          cursor.execute('SELECT sid FROM LikedTweets WHERE timestamp < ? AND purged = ?', values)          rows = cursor.fetchall() @@ -137,9 +153,74 @@ class LikeMemory():              values = (True, sid) -            cursor = self._db.cursor() +            cursor = _db.cursor()              cursor.execute('UPDATE LikedTweets SET purged = ? WHERE sid = ?', values) -        self._db.commit() +        _db.commit()          print('Purged %d liked Tweet%s!' % (len(rows), '' if len(rows) <= 1 else 's')) + + +class TrackMemory(): +    """Remember last seen Tweet for users.""" + +    def __init__(self): +        """Build the Python object.""" + +        global _db + +        sql = ''' +            CREATE TABLE IF NOT EXISTS TrackedUsers( +                uid INTEGER PRIMARY KEY, +                username TEXT, +                last INTEGER +            ) +        ''' + +        cursor = _db.cursor() +        cursor.execute(sql) +        _db.commit() + + +    def get_last_seen_for(self, uid): +        """Get the status id of the last Tweet for a given user.""" + +        global _db + +        values = (uid, ) + +        cursor = _db.cursor() +        cursor.execute('SELECT last FROM TrackedUsers WHERE uid = ?', values) + +        found = cursor.fetchone() + +        if found is None: +            last = None +        else: +            last = found[0] + +        return last + + +    def set_last_seen_for(self, uid, name, sid): +        """Set the status id of the last seen Tweet for a given user.""" + +        global _db + +        since = self.get_last_seen_for(uid) + +        if since is None: + +            values = (uid, name, sid) + +            cursor = _db.cursor() +            cursor.execute('INSERT INTO TrackedUsers VALUES (?, ?, ?)', values) + +        else: + +            values = (sid, uid) + +            cursor = _db.cursor() +            cursor.execute('UPDATE TrackedUsers SET last = ? WHERE uid = ?', values) + +        _db.commit() @@ -9,6 +9,7 @@ from tweepy.streaming import StreamListener  from auth import *  from config import hashtags, white_kwds  from db import LikeMemory +from users import listen_to_users  import json  import sys @@ -16,7 +17,7 @@ import sys  class StdOutListener(StreamListener):      """A listener handles tweets are the received from the stream.""" -    def __init__(self, api, memory): +    def __init__(self, api, memory, tempo):          """Build the Python object."""          super().__init__() @@ -24,6 +25,9 @@ class StdOutListener(StreamListener):          self._api = api          self._memory = memory +        self._tempo = tempo +        self._previous = None +          self._white = [ s.lower() for s in white_kwds.split(' ') ] @@ -52,8 +56,17 @@ class StdOutListener(StreamListener):          decoded = json.loads(data)          if 'retweeted_status' in decoded: + +            if not ('id' in decoded['retweeted_status']): +                print(decoded) +              sid, username, content = self.get_status_info(decoded['retweeted_status']) +          else: + +            if not ('id' in decoded): +                print(decoded) +              sid, username, content = self.get_status_info(decoded)          like = False @@ -76,8 +89,19 @@ class StdOutListener(StreamListener):                  try: -                    self._api.create_favorite(sid) +                    if self._tempo: +                        if self._previous != None: + +                            self._api.create_favorite(self._previous) + +                        self._previous = sid + +                    else: + +                        self._api.create_favorite(sid) + +                    # Save even pending statuses to remember them when looking for original content                      self._memory.save_liked_status(sid, username, content)                      print('@%s: "%s" (id=%d)' % (username, content, sid)) @@ -114,8 +138,8 @@ if __name__ == '__main__':      auth = OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)      auth.set_access_token(ACCESS_KEY, ACCESS_SECRET) -    api = tweepy.API(auth) -    memory = LikeMemory(api) +    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) +    #memory = LikeMemory(api)      if len(sys.argv) > 1 and sys.argv[1] == '--purge': @@ -123,7 +147,9 @@ if __name__ == '__main__':      else: -        listener = StdOutListener(api, memory) +        listen_to_users(auth, api) + +        #listener = StdOutListener(api, memory, True) -        stream = Stream(auth, listener) -        stream.filter(track=hashtags.split(' ')) +        #stream = Stream(auth, listener) +        #stream.filter(track=hashtags.split(' ')) diff --git a/taste.py b/taste.py new file mode 100644 index 0000000..8471473 --- /dev/null +++ b/taste.py @@ -0,0 +1,76 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + + +import tweepy +from config import white_kwds + + +COLOR_RESET = "\033[0m" + +COLOR_REJECTED = "\033[1;31m" +COLOR_ACCEPTED = "\033[1;32m" +COLOR_ALREADY  = "\033[1;33m" + + +def get_displayable_content(orig, margin): +    """Format content to get it displayable.""" + +    padding = '\n' + ' ' * margin + +    useful = [ l for l in orig.split('\n') if len(l) > 0 ] + +    result = padding.join(useful) + +    return result + + +def analyse(sid, username, content, api, memory): +    """Analyse a Tweet content.""" + +    like = False + +    words = content.split(' ') + +    white = [ s.lower().replace('_', ' ') for s in white_kwds.split(' ') ] + +    for kwd in white: + +            for w in words: +                if w.lower() == kwd: +                    like = True +                    break + +            if like: +                break + +    if like: + +        if memory.is_original_content(content): + +            try: + +                api.create_favorite(sid) + +                memory.save_liked_status(sid, username, content) + +                displayable = get_displayable_content(content, len('Liking') + len('  @%s: "' % username)) + +                print(COLOR_ACCEPTED + 'Liking' + COLOR_RESET + ' @%s: "%s"' % (username, displayable)) +                print(' -> https://twitter.com/%s/status/%d' % (username, sid)) + +            except tweepy.error.TweepError: + +                pass + +        else: + +            displayable = get_displayable_content(content, len('Already seen "')) + +            print(COLOR_ALREADY + 'Already seen' + COLOR_RESET + ' "%s"' % displayable) + +    else: + +        displayable = get_displayable_content(content, len('Reject') + len('  @%s: "' % username)) + +        print(COLOR_REJECTED + 'Reject' + COLOR_RESET + ' @%s: "%s"' % (username, displayable)) diff --git a/users.py b/users.py new file mode 100755 index 0000000..e61d3c2 --- /dev/null +++ b/users.py @@ -0,0 +1,177 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + + +import tweepy +from config import accounts +from db import open_db, LikeMemory, TrackMemory +from taste import analyse +import os +import pickle +from random import shuffle + + +CACHE_FILENAME = 'ids.cache' + + +class UsersListener(): +    """A listener handles tweets are the received from the stream.""" + +    def __init__(self, api): +        """Build the Python object.""" + +        super().__init__() + +        self._api = api + +        open_db() + +        self._memory = LikeMemory(api) +        self._tracker = TrackMemory() + +        self._compute_ids_to_follow(True) + + +    def _compute_ids_to_follow(self, cached): +        """Get the list of accounts to track.""" + +        if not os.path.isfile(CACHE_FILENAME): +            cached = False + +        if cached: + +            ids = pickle.load(open(CACHE_FILENAME, 'rb')) + +            print('[i] Reloaded %u accounts' % len(ids)) + +        else: + +            ids = [] + +            for master in accounts.split(' '): + +                count = 0 + +                try: + +                    for page in tweepy.Cursor(self._api.followers_ids, screen_name=master).pages(): + +                        count += len(page) +                        ids.extend(page) + +                    print('[i] Got %u accounts following %s' % (count, master)) + +                except: + +                    print('[!] Error while receiving followers for %s...' % master) + + +            pickle.dump(ids, open(CACHE_FILENAME, 'wb')) + +            print('[i] Loaded %u accounts' % len(ids)) + +            ids = list(set(ids)) + +            print('[i] Kept %u accounts' % len(ids)) + +        # Remove all account natively followed + +        already = [] + +        for page in tweepy.Cursor(self._api.followers_ids, screen_name=self._api.me().name).pages(): +            already.extend(page) + +        print('[i] I am followed by %u accounts' % len(already)) + +        self._ids = [ x for x in ids if x not in already ] + +        shuffle(self._ids) + +        print('[i] Tracking %u accounts...' % len(self._ids)) + + +    def start(self, auth): +        """Start the listener.""" + +        while True: + +            for uid in self._ids: + +                since = self._tracker.get_last_seen_for(uid) + +                last = [] + +                try: + +                    last = self._api.user_timeline(uid, since) + +                except tweepy.error.TweepError as e: + +                    # Private account +                    # tweepy.error.TweepError: Not authorized. +                    if e.response.status_code == 401: +                        pass + +                    # Nothing new! +                    # tweepy.error.TweepError: [{'message': 'Sorry, that page does not exist.', 'code': 34}] +                    elif e.response.status_code == 404: +                        pass + +                    else: +                        print(e, e.response.status_code) +                        assert(False) + +                first = None + +                for status in last: + +                    sid = status.id +                    uid = status.author.id +                    username = status.author.screen_name + +                    while hasattr(status, 'retweeted_status'): +                        status = status.retweeted_status + +                    analyse(sid, username, status.text, self._api, self._memory) + +                    if first is None: +                        first = uid, username, sid + +                if not(first is None): +                    uid, username, sid = first +                    self._tracker.set_last_seen_for(uid, username, sid) + + + +def listen_to_users(auth, api): +    """Track all tweets written by users.""" + + +    data = api.rate_limit_status() + +    for c in data['resources'].keys(): + +        print('%s' % c) + +        category = data['resources'][c] + +        for p in category.keys(): + +            props = category[p] +            changed = props['remaining'] != props['limit'] + +            print(' %s %s: %d / %d' % ('!!' if changed else '  ', p, props['remaining'], props['limit'])) + + + + + + +    if True: + +        listener = UsersListener(api) + +        ####listener.start(auth) + +        #stream = Stream(auth, listener) +        #stream.filter(follow=new) | 
