Remove HTML tags from content.

Do this as early as processing the entry so later steps can count
on it (esp. when counting characters)

Also add a new requirement: beautifulsoup4.
This commit is contained in:
Hartmut Goebel
2019-03-29 22:54:30 +01:00
parent fc56be6d70
commit 8886fd5d2d
2 changed files with 10 additions and 3 deletions

View File

@ -6,10 +6,12 @@ import argparse
import yaml import yaml
import dateutil import dateutil
import feedparser import feedparser
from bs4 import BeautifulSoup
from mastodon import Mastodon from mastodon import Mastodon
from datetime import datetime, timezone from datetime import datetime, timezone
DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse") DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
def main(): def main():
@ -74,10 +76,11 @@ def get_entry(entry):
for tag in entry.get('tags', []): for tag in entry.get('tags', []):
for t in tag['term'].split(' '): for t in tag['term'].split(' '):
hashtags.append('#{}'.format(t)) hashtags.append('#{}'.format(t))
summary = entry.get('summary', '')
return { return {
'url': entry.id, 'url': entry.id,
'title': entry.title, 'title': BeautifulSoup(entry.title, 'html.parser').get_text(),
'summary': entry.get('summary', ''), 'summary': BeautifulSoup(summary, 'html.parser').get_text(),
'hashtags': ' '.join(hashtags), 'hashtags': ' '.join(hashtags),
'updated': dateutil.parser.parse(entry['updated']), 'updated': dateutil.parser.parse(entry['updated']),
} }

View File

@ -14,6 +14,10 @@ setup(
description='Connect an RSS Feed to Mastodon', description='Connect an RSS Feed to Mastodon',
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
install_requires=['feedparser', 'mastodon.py', 'python-dateutil', 'pyyaml'], install_requires=['beautifulsoup4',
'feedparser',
'mastodon.py',
'python-dateutil',
'pyyaml'],
entry_points={'console_scripts': ['feediverse = feediverse:main']} entry_points={'console_scripts': ['feediverse = feediverse:main']}
) )