Remove HTML tags from content.

Do this as early as processing the entry so later steps can count
on it (esp. when counting characters)

Also add a new requirement: beautifulsoup4.
This commit is contained in:
Hartmut Goebel
2019-03-29 22:54:30 +01:00
parent fc56be6d70
commit 8886fd5d2d
2 changed files with 10 additions and 3 deletions

View File

@ -6,10 +6,12 @@ import argparse
import yaml
import dateutil
import feedparser
from bs4 import BeautifulSoup
from mastodon import Mastodon
from datetime import datetime, timezone
DEFAULT_CONFIG_FILE = os.path.join("~", ".feediverse")
def main():
@ -74,10 +76,11 @@ def get_entry(entry):
for tag in entry.get('tags', []):
for t in tag['term'].split(' '):
hashtags.append('#{}'.format(t))
summary = entry.get('summary', '')
return {
'url': entry.id,
'title': entry.title,
'summary': entry.get('summary', ''),
'title': BeautifulSoup(entry.title, 'html.parser').get_text(),
'summary': BeautifulSoup(summary, 'html.parser').get_text(),
'hashtags': ' '.join(hashtags),
'updated': dateutil.parser.parse(entry['updated']),
}

View File

@ -14,6 +14,10 @@ setup(
description='Connect an RSS Feed to Mastodon',
long_description=long_description,
long_description_content_type="text/markdown",
install_requires=['feedparser', 'mastodon.py', 'python-dateutil', 'pyyaml'],
install_requires=['beautifulsoup4',
'feedparser',
'mastodon.py',
'python-dateutil',
'pyyaml'],
entry_points={'console_scripts': ['feediverse = feediverse:main']}
)