mirror of
https://git.mia.jetzt/scrubber
synced 2024-11-21 21:17:23 -07:00
85 lines
2.1 KiB
Python
85 lines
2.1 KiB
Python
|
from dataclasses import dataclass
|
||
|
from pathlib import Path
|
||
|
from typing import Callable, List
|
||
|
|
||
|
import psycopg
|
||
|
|
||
|
from ty import FilterableNote, Visibility
|
||
|
|
||
|
try:
|
||
|
import progressbar2 as progressbar
|
||
|
except ImportError:
|
||
|
import progressbar
|
||
|
|
||
|
|
||
|
print("configuring")
|
||
|
config = {}
|
||
|
exec(Path("config.py").read_text(), config)
|
||
|
conn: psycopg.Connection = config["connect"]()
|
||
|
criteria: Callable[[FilterableNote], bool] = config["criteria"]
|
||
|
|
||
|
intermediate = {}
|
||
|
|
||
|
print("parsing")
|
||
|
for line in Path("graph.db").read_text().splitlines():
|
||
|
id, replies, quotes, flags = line.split("\t")
|
||
|
intermediate[id] = {
|
||
|
"id": id,
|
||
|
"replies": replies.split(",") if len(replies) > 0 else [],
|
||
|
"quotes": quotes.split(",") if len(quotes) > 0 else [],
|
||
|
"flags": flags.split(",") if len(flags) > 0 else [],
|
||
|
}
|
||
|
|
||
|
|
||
|
def transform(entry: dict) -> FilterableNote:
|
||
|
note = conn.execute(
|
||
|
'select "createdAt", reactions, "renoteCount", visibility from note where id = %s',
|
||
|
[entry["id"]],
|
||
|
).fetchone()
|
||
|
if note is None:
|
||
|
return None # part of thread disappeared during processing
|
||
|
when, reactions, renotes, visibility = note
|
||
|
|
||
|
replies = [transform(intermediate[reply]) for reply in entry["replies"]]
|
||
|
quotes = [transform(intermediate[quote]) for quote in entry["quotes"]]
|
||
|
if None in replies or None in quotes:
|
||
|
return None # bubble up, buttercup
|
||
|
|
||
|
return FilterableNote(
|
||
|
entry["id"],
|
||
|
"self" in entry["flags"],
|
||
|
replies,
|
||
|
quotes,
|
||
|
when.astimezone(),
|
||
|
sum(reactions.values()),
|
||
|
renotes,
|
||
|
Visibility.from_db(visibility),
|
||
|
)
|
||
|
|
||
|
|
||
|
root_count = 0
|
||
|
for entry in intermediate.values():
|
||
|
if "root" in entry["flags"]:
|
||
|
root_count += 1
|
||
|
|
||
|
|
||
|
pb = progressbar.ProgressBar(
|
||
|
0,
|
||
|
root_count,
|
||
|
prefix="processing ",
|
||
|
)
|
||
|
targets = []
|
||
|
for entry in intermediate.values():
|
||
|
if "root" not in entry["flags"]:
|
||
|
continue
|
||
|
transformed = transform(entry)
|
||
|
if transformed is None:
|
||
|
continue # we'll get to it next cycle
|
||
|
if criteria(transformed):
|
||
|
targets.append(entry["id"])
|
||
|
pb.increment()
|
||
|
pb.finish()
|
||
|
|
||
|
|
||
|
Path("filtered.list").write_text("\n".join(targets))
|