From 350f2bca3f525d81dc849fe4c3244b1e53950db2 Mon Sep 17 00:00:00 2001 From: Hartmut Goebel Date: Sat, 30 Mar 2019 22:01:28 +0100 Subject: [PATCH] Add detection of feed generator and pass it for get_entry(). This allows generator-specific handling of e.g. url. For example in wordpress `id` is an ugly url, while the speaking permalink is stored in a alternate link --- feediverse.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/feediverse.py b/feediverse.py index 54a9f8f..1e6afac 100755 --- a/feediverse.py +++ b/feediverse.py @@ -67,6 +67,14 @@ def read_config(config_file): config['updated'] = datetime.now(tz=timezone.utc) return config +def detect_generator(feed): + # For RSS the generator tag holds the URL, while for ATOM it holds the name + if "/wordpress.org/" in feed.feed.generator: + return "wordpress" + elif "wordpress" == feed.feed.generator.lower(): + return "wordpress" + return None + def get_feed(feed_url, last_update): new_entries = 0 feed = feedparser.parse(feed_url) @@ -76,9 +84,10 @@ def get_feed(feed_url, last_update): else: entries = feed.entries entries.sort(key=lambda e: e.published_parsed) + generator = detect_generator(feed) for entry in entries: new_entries += 1 - yield get_entry(entry) + yield get_entry(entry, generator) return new_entries def collect_images(entry): @@ -115,7 +124,7 @@ def collect_images(entry): return images -def get_entry(entry): +def get_entry(entry, generator=None): hashtags = [] for tag in entry.get('tags', []): for t in tag['term'].split():