a few changes

- better error reporting
- fix the program hanging at the end if any of the accounts failed to fetch
- remove trailing semicolon from user agent string
This commit is contained in:
io 2021-08-13 09:40:20 +00:00
parent 9b60540d07
commit 97a2f5de00

View file

@ -15,9 +15,9 @@ from functools import partial
from third_party.utils import extract_post_content from third_party.utils import extract_post_content
USER_AGENT = ( USER_AGENT = (
'fedi-ebooks; ' 'pleroma-ebooks; '
f'{aiohttp.__version__}; ' f'{aiohttp.__version__}; '
f'{platform.python_implementation()}/{platform.python_version()}; ' f'{platform.python_implementation()}/{platform.python_version()}'
) )
UTC = pendulum.timezone('UTC') UTC = pendulum.timezone('UTC')
@ -27,6 +27,7 @@ ACTIVITYPUB_CONTENT_TYPE = 'application/activity+json'
class PostFetcher: class PostFetcher:
def __init__(self, *, config): def __init__(self, *, config):
self.config = config self.config = config
self.erroneous_accounts = []
async def __aenter__(self): async def __aenter__(self):
stack = contextlib.AsyncExitStack() stack = contextlib.AsyncExitStack()
@ -80,13 +81,14 @@ class PostFetcher:
# LOL sqlite error handling is so bad # LOL sqlite error handling is so bad
if exc.args[0].startswith('UNIQUE constraint failed: '): if exc.args[0].startswith('UNIQUE constraint failed: '):
# this means we've encountered an item we already have saved # this means we've encountered an item we already have saved
done_ev.set()
break break
self.erroneous_accounts.append(account['fqn'])
raise raise
finally: finally:
print('COMMIT') print('Saving posts from', account['fqn'], 'to the DB')
await self._db.commit() await self._db.commit()
done_ev.set()
async def _insert_activity(self, activity): async def _insert_activity(self, activity):
if activity['type'] != 'Create': if activity['type'] != 'Create':
@ -109,6 +111,7 @@ class PostFetcher:
), ),
) )
# TODO figure out why i put shield here lol
@shield @shield
async def _fetch_account(self, tx, account): async def _fetch_account(self, tx, account):
done_ev = self._completed_accounts[account['fqn']] done_ev = self._completed_accounts[account['fqn']]
@ -118,15 +121,16 @@ class PostFetcher:
except Exception as exc: except Exception as exc:
import traceback import traceback
traceback.print_exception(type(exc), exc, exc.__traceback__) traceback.print_exception(type(exc), exc, exc.__traceback__)
done_ev.set()
self.erroneous_accounts.append(account['fqn'])
return return
print(f'Fetching posts for {account["acct"]}...') print(f'Fetching posts for {account["acct"]}...')
next_page_url = outbox['first'] next_page_url = outbox['first']
while True: while True:
print(f'Fetching {next_page_url}... ', end='', flush=True) print(f'Fetching {next_page_url}... ')
async with self._http.get(next_page_url) as resp: page = await resp.json() async with self._http.get(next_page_url) as resp: page = await resp.json()
print('done.')
for activity in page['orderedItems']: for activity in page['orderedItems']:
try: try:
@ -204,6 +208,13 @@ async def amain():
args = utils.arg_parser_factory(description='Fetch posts from all followed accounts').parse_args() args = utils.arg_parser_factory(description='Fetch posts from all followed accounts').parse_args()
config = utils.load_config(args.cfg) config = utils.load_config(args.cfg)
async with PostFetcher(config=config) as fetcher: await fetcher.fetch_all() async with PostFetcher(config=config) as fetcher: await fetcher.fetch_all()
if (accs := fetcher.erroneous_accounts):
print(
'Exiting unsuccessfully due to previous errors in these accounts:',
', '.join(accs),
file=sys.stderr,
)
sys.exit(1)
def main(): def main():
try: try: