Add retrieving images from RSS & posting them.

Collects image urls from summary, content and enclosures
(attachments).

This add urllib3 as requirement.
This commit is contained in:
Hartmut Goebel
2019-03-30 21:17:24 +01:00
parent 13d1dd2623
commit 83ed532680
2 changed files with 49 additions and 3 deletions

View File

@ -10,10 +10,13 @@ from bs4 import BeautifulSoup
from mastodon import Mastodon from mastodon import Mastodon
from datetime import datetime, timezone from datetime import datetime, timezone
import urllib3
DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse") DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED',)
def main(): def main():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("config_file", nargs="?", metavar="CONFIG-FILE", parser.add_argument("config_file", nargs="?", metavar="CONFIG-FILE",
@ -37,8 +40,15 @@ def main():
for feed in config['feeds']: for feed in config['feeds']:
for entry in get_feed(feed['url'], config['updated']): for entry in get_feed(feed['url'], config['updated']):
masto.status_post(feed['template'].format(**entry)[0:49999999999]) media_ids = []
for img in entry.get("images", []):
media = masto.media_post(img, img.headers['content-type'])
img.release_conn() # deferred from collect_images()
if not 'error' in media:
media_ids.append(media)
entry.pop("images", None)
masto.status_post(feed['template'].format(**entry)[:49999999999],
media_ids=media_ids)
save_config(config, config_file) save_config(config, config_file)
def save_config(config, config_file): def save_config(config, config_file):
@ -71,6 +81,40 @@ def get_feed(feed_url, last_update):
yield get_entry(entry) yield get_entry(entry)
return new_entries return new_entries
def collect_images(entry):
def find_urls(part):
if not part:
return
soup = BeautifulSoup(part, 'html.parser')
for tag in soup.find_all(["a", "img"]):
if tag.name == "a":
url = tag["href"]
elif tag.name == "img":
url = tag["src"]
if url not in urls:
urls.append(url)
urls = []
find_urls(entry.get("summary", ""))
for c in entry.get("content", []):
find_urls(c.value)
for e in (entry.enclosures
+ [l for l in entry.links if l.get("rel") == "enclosure"]):
if (e["type"].startswith(("image/", "video/")) and
e["href"] not in urls):
urls.append(e["href"])
images = []
for url in urls:
resp = http.request('GET', url, preload_content=False)
if resp.headers['content-type'].startswith(("image/", "video/")):
images.append(resp)
# IMPORTANT: Need to release_conn() later!
else:
resp.release_conn()
return images
def get_entry(entry): def get_entry(entry):
hashtags = [] hashtags = []
for tag in entry.get('tags', []): for tag in entry.get('tags', []):
@ -83,6 +127,7 @@ def get_entry(entry):
'summary': BeautifulSoup(summary, 'html.parser').get_text(), 'summary': BeautifulSoup(summary, 'html.parser').get_text(),
'hashtags': ' '.join(hashtags), 'hashtags': ' '.join(hashtags),
'updated': dateutil.parser.parse(entry['updated']), 'updated': dateutil.parser.parse(entry['updated']),
'images': collect_images(entry),
} }
def setup(config_file): def setup(config_file):

View File

@ -18,6 +18,7 @@ setup(
'feedparser', 'feedparser',
'mastodon.py', 'mastodon.py',
'python-dateutil', 'python-dateutil',
'pyyaml'], 'pyyaml',
'urllib3[secure]'],
entry_points={'console_scripts': ['feediverse = feediverse:main']} entry_points={'console_scripts': ['feediverse = feediverse:main']}
) )