scrubber/2_filter.py

67 lines
1.7 KiB
Python
Raw Normal View History

2024-07-26 10:36:56 -06:00
from dataclasses import dataclass
from pathlib import Path
from typing import Callable, List
import psycopg
2024-09-04 05:47:13 -06:00
from com import FilterableNote, Visibility, eval_config, parse_graph, progressbar
2024-07-26 10:36:56 -06:00
2024-09-04 05:47:13 -06:00
config = eval_config()
2024-07-26 10:36:56 -06:00
conn: psycopg.Connection = config["connect"]()
criteria: Callable[[FilterableNote], bool] = config["criteria"]
2024-09-04 05:47:13 -06:00
intermediate = parse_graph()
2024-07-26 10:36:56 -06:00
def transform(entry: dict) -> FilterableNote:
note = conn.execute(
'select "createdAt", reactions, "renoteCount", visibility from note where id = %s',
[entry["id"]],
).fetchone()
if note is None:
return None # part of thread disappeared during processing
when, reactions, renotes, visibility = note
replies = [transform(intermediate[reply]) for reply in entry["replies"]]
quotes = [transform(intermediate[quote]) for quote in entry["quotes"]]
if None in replies or None in quotes:
return None # bubble up, buttercup
return FilterableNote(
entry["id"],
"self" in entry["flags"],
replies,
quotes,
when.astimezone(),
sum(reactions.values()),
renotes,
Visibility.from_db(visibility),
)
root_count = 0
for entry in intermediate.values():
if "root" in entry["flags"]:
root_count += 1
pb = progressbar.ProgressBar(
0,
root_count,
prefix="processing ",
)
targets = []
for entry in intermediate.values():
if "root" not in entry["flags"]:
continue
transformed = transform(entry)
if transformed is None:
continue # we'll get to it next cycle
if criteria(transformed):
targets.append(entry["id"])
pb.increment()
pb.finish()
Path("filtered.list").write_text("\n".join(targets))