desktop changes

This commit is contained in:
mia 2024-09-04 04:47:13 -07:00
parent 81071e8fee
commit bb8a48fd4d
11 changed files with 306 additions and 30 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
/__pycache__/
/graph.db
/filtered.list
/out/
/sec.py

View file

@ -3,24 +3,20 @@ import sys
from collections import namedtuple from collections import namedtuple
from functools import cache from functools import cache
from pathlib import Path from pathlib import Path
from typing import Optional
import psycopg import psycopg
try: from com import eval_config, progressbar
import progressbar2 as progressbar
except ImportError:
import progressbar
Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"]) Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"])
Tree = namedtuple("Tree", ["id", "replies", "renotes"]) Tree = namedtuple("Tree", ["id", "replies", "renotes"])
print("configuring") config = eval_config()
config = {}
exec(Path("config.py").read_text(), config)
conn: psycopg.Connection = config["connect"]() conn: psycopg.Connection = config["connect"]()
user_id: str = config["user_id"] user_id: str = config["user_id"]
early_exit = config.get("early_exit") early_exit: Optional[int] = config.get("early_exit")
print("fetching note ids", file=sys.stderr) print("fetching note ids", file=sys.stderr)

View file

@ -4,32 +4,14 @@ from typing import Callable, List
import psycopg import psycopg
from ty import FilterableNote, Visibility from com import FilterableNote, Visibility, eval_config, parse_graph, progressbar
try:
import progressbar2 as progressbar
except ImportError:
import progressbar
print("configuring") config = eval_config()
config = {}
exec(Path("config.py").read_text(), config)
conn: psycopg.Connection = config["connect"]() conn: psycopg.Connection = config["connect"]()
criteria: Callable[[FilterableNote], bool] = config["criteria"] criteria: Callable[[FilterableNote], bool] = config["criteria"]
intermediate = {} intermediate = parse_graph()
print("parsing")
for line in Path("graph.db").read_text().splitlines():
id, replies, quotes, flags = line.split("\t")
intermediate[id] = {
"id": id,
"replies": replies.split(",") if len(replies) > 0 else [],
"quotes": quotes.split(",") if len(quotes) > 0 else [],
"flags": flags.split(",") if len(flags) > 0 else [],
}
def transform(entry: dict) -> FilterableNote: def transform(entry: dict) -> FilterableNote:
note = conn.execute( note = conn.execute(

144
3_archive.py Normal file
View file

@ -0,0 +1,144 @@
import json
from http.client import HTTPResponse
from pathlib import Path
from shutil import copyfileobj
from urllib.request import urlopen
import brotli
import msgpack
import psycopg
from com import Visibility, eval_config, parse_graph, progressbar
config = eval_config()
conn: psycopg.Connection = config["connect"]()
graph = parse_graph()
print("reading filterlist")
filtered = Path("filtered.list").read_text().strip().splitlines()
collected_users = {}
def collect_user(id: str):
if id in collected_users:
return
user = conn.execute('select username, host, "avatarUrl" from "user" where id = %s', [id]).fetchone()
if user is None:
return None
username, host, avatar_url = user
profile = conn.execute('select description, fields from user_profile where "userId" = %s', [id]).fetchone()
description, fields = profile or ("", [])
output = {}
output["id"] = id
output["username"] = username
output["host"] = host
output["description"] = description
output["fields"] = fields
output["avatar_url"] = avatar_url
collected_users[id] = output
collected_notes = []
files_to_collect = []
def collect_note(id: str):
output = {}
output["id"] = id
note = conn.execute('select text, "userId", "createdAt", "updatedAt", reactions, "renoteCount", visibility, "fileIds" from note where id = %s', [id]).fetchone()
if note is None:
return None
text, user_id, created_at, updated_at, reactions, renotes, visibility, file_ids = note
collect_user(user_id)
output["text"] = text
output["user_id"] = user_id
output["created_at"] = created_at.astimezone(tz=None).isoformat()
output["updated_at"] = None
if updated_at is not None:
output["updated_at"] = updated_at.astimezone(tz=None).isoformat()
output["reactions"] = reactions
output["renotes"] = renotes
output["visibility"] = Visibility.from_db(visibility).code()
node = graph[id]
replies = [collect_note(reply) for reply in node["replies"]]
replies = filter(lambda reply: reply is not None, replies)
quotes = [collect_note(quote) for quote in node["quotes"]]
quotes = filter(lambda quote: quote is not None, quotes)
output["attachments"] = []
for file_id in file_ids:
name, type_, comment, url = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone()
attachment = {
"id": file_id,
"type": type_,
"comment": comment,
}
if "self" in node["flags"]: # archive own attachments
files_to_collect.append((file_id, url))
attachment["url"] = None
else:
attachment["url"] = url
output["replies"] = list(replies)
output["quotes"] = list(quotes)
if len(output["attachments"]) == 0: del output["attachments"]
if len(output["replies"]) == 0: del output["replies"]
if len(output["quotes"]) == 0: del output["quotes"]
return output
pb = progressbar.ProgressBar(
0,
len(filtered),
prefix="collecting data ",
)
for id in filtered:
note = collect_note(id)
collected_notes.append((id, note))
pb.increment()
pb.finish()
outdir = Path("out")
if not outdir.exists():
outdir.mkdir()
if not (outdir / "note").exists():
(outdir / "note").mkdir()
if not (outdir / "user").exists():
(outdir / "user").mkdir()
if not (outdir / "file").exists():
(outdir / "file").mkdir()
pb = progressbar.ProgressBar(
0,
len(collected_notes) + len(collected_users),
prefix="writing data ",
)
for id, note in collected_notes:
outfile = outdir / "note" / f"{id}.mpk.br"
with outfile.open("wb") as f:
f.write(brotli.compress(msgpack.dumps(note)))
pb.increment()
for id, user in collected_users.items():
outfile = outdir / "user" / f"{id}.mpk.br"
with outfile.open("wb") as f:
f.write(brotli.compress(msgpack.dumps(note)))
pb.increment()
pb.finish()
pb = progressbar.ProgressBar(
0,
len(files_to_collect),
prefix="downloading attachments ",
)
for (id, url) in files_to_collect:
outfile = outdir / "file" / id
response: HTTPResponse = urlopen(url)
with outfile.open("wb") as f:
copyfileobj(response, f)
response.close()
pb.increment()
pb.finish()

33
4_delete.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import httpx
import psycopg
from com import eval_config, parse_graph, progressbar
config = eval_config()
conn: psycopg.Connection = config["connect"]()
token: str = config["token"]
api: str = config["api"]
graph = parse_graph()
print("reading filterlist")
filtered = Path("filtered.list").read_text().strip().splitlines()
queue = []
def enqueue(note):
for reply in note["replies"]:
enqueue(graph[reply])
for quote in note["quotes"]:
enqueue(graph[quote])
if "self" in note["flags"]:
files = conn.execute('select "fileIds" from note where id = %s', [note["id"]]).fetchone()[0]
queue.append((note["id"], files))
for id in filtered:
enqueue(graph[id])
print(queue)
# client = httpx.Client()

View file

@ -1,7 +1,15 @@
import sys
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Callable
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from pathlib import Path
from typing import Callable, Dict, List
try:
import progressbar2 as progressbar
except ImportError:
import progressbar
class Visibility(Enum): class Visibility(Enum):
public = 1 public = 1
@ -18,6 +26,13 @@ class Visibility(Enum):
case "specified": return cls.direct case "specified": return cls.direct
case _: raise ValueError(f"unknown visibility `{raw}`") case _: raise ValueError(f"unknown visibility `{raw}`")
def code(self) -> str:
match self:
case self.public: return "p"
case self.unlisted: return "u"
case self.followers: return "f"
case self.direct: return "d"
@dataclass @dataclass
class FilterableNote: class FilterableNote:
@ -59,3 +74,24 @@ class FilterableNote:
"reactions": self.reactions, "reactions": self.reactions,
"renotes": self.renotes, "renotes": self.renotes,
} }
def eval_config() -> dict:
print("configuring")
config = {}
exec(Path(sys.argv[1]).read_text(), config)
return config
def parse_graph() -> Dict[str, dict]:
print("parsing graph")
graph = {}
for line in Path("graph.db").read_text().splitlines():
id, replies, quotes, flags = line.split("\t")
graph[id] = {
"id": id,
"replies": replies.split(",") if len(replies) > 0 else [],
"quotes": quotes.split(",") if len(quotes) > 0 else [],
"flags": flags.split(",") if len(flags) > 0 else [],
}
return graph

46
conf_mia.py Normal file
View file

@ -0,0 +1,46 @@
import math
from datetime import UTC, datetime, timedelta
from com import FilterableNote, Visibility
from sec import connect, tokens
user_id = "9gf2ev4ex5dflllo"
token = tokens["mia"]
api = "https://void.rehab/api/"
early_exit = 0xFFF
now = datetime.now(UTC)
threshold = 0.1
def criteria(root: FilterableNote) -> bool:
thread = root.thread()
thread_self = root.thread_self()
# if there are dms involved...
low_vis = min(thread, key=lambda note: note.visibility.value)
if low_vis.visibility == Visibility.direct:
is_direct = lambda note: note.visibility == Visibility.direct
most_recent_direct = max(filter(is_direct, thread), key=lambda note: note.when)
# ...and the dms are younger than two months...
if now - most_recent_direct.when < timedelta(days=30 * 2):
# ...do not delete the thread
return False
# get the most recent post...
others_recency = max(thread, key=lambda note: note.when)
# ...and bail if it's too new
if now - others_recency.when < timedelta(days=14):
return False
# get my...
most_recent_post = max(thread_self, key=lambda note: note.when) # ...most recent post...
score = lambda note: note.reactions + note.renotes*5
high_score_post = max(thread_self, key=score) # ...highest scoring post...
# ...and their values...
most_recent = most_recent_post.when
most_recent_age = now - most_recent
high_score = score(high_score_post)
# ...weigh it...
weighted_score = high_score / math.sqrt(most_recent_age.days)
# ...and check it against a threshold
return weighted_score < threshold

14
conf_pain.py Normal file
View file

@ -0,0 +1,14 @@
import math
from datetime import UTC, datetime, timedelta
from com import FilterableNote
from sec import connect, tokens
user_id = "9gszslkcdfnomssj"
token = tokens["pain"]
api = "https://void.rehab/api/"
def criteria(root: FilterableNote) -> bool:
# if it's more than two months old, delete
# return (datetime.now(UTC) - root.when).days > 60
return (datetime.now(UTC) - root.when).days > (12 * 30)

13
go.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/sh
set -ex
test -f graph.db && rm graph.db
test -f filtered.list && rm filtered.list
test -d out && rm -r out
python3 1_graph.py conf_$1.py
python3 2_filter.py conf_$1.py
# python3 3_archive.py conf_$1.py
# echo uploading to memorial
# rsync -r -e 'ssh -p23' --progress out/ memorial:fediverse/$1/
# python3 4_delete.py conf_$1.py

2
proxy.sh Executable file
View file

@ -0,0 +1,2 @@
#!/bin/sh
exec ssh -NL 5432:localhost:5432 vr

View file

@ -0,0 +1,5 @@
httpx
progressbar2
psycopg
brotli
msgpack