desktop changes

This commit is contained in:
mia 2024-09-04 04:47:13 -07:00
parent 81071e8fee
commit bb8a48fd4d
11 changed files with 306 additions and 30 deletions

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
/__pycache__/
/graph.db
/filtered.list
/out/
/sec.py

View file

@ -3,24 +3,20 @@ import sys
from collections import namedtuple
from functools import cache
from pathlib import Path
from typing import Optional
import psycopg
try:
import progressbar2 as progressbar
except ImportError:
import progressbar
from com import eval_config, progressbar
Note = namedtuple("Note", ["renote_id", "reply_id", "user_id"])
Tree = namedtuple("Tree", ["id", "replies", "renotes"])
print("configuring")
config = {}
exec(Path("config.py").read_text(), config)
config = eval_config()
conn: psycopg.Connection = config["connect"]()
user_id: str = config["user_id"]
early_exit = config.get("early_exit")
early_exit: Optional[int] = config.get("early_exit")
print("fetching note ids", file=sys.stderr)

View file

@ -4,32 +4,14 @@ from typing import Callable, List
import psycopg
from ty import FilterableNote, Visibility
try:
import progressbar2 as progressbar
except ImportError:
import progressbar
from com import FilterableNote, Visibility, eval_config, parse_graph, progressbar
print("configuring")
config = {}
exec(Path("config.py").read_text(), config)
config = eval_config()
conn: psycopg.Connection = config["connect"]()
criteria: Callable[[FilterableNote], bool] = config["criteria"]
intermediate = {}
print("parsing")
for line in Path("graph.db").read_text().splitlines():
id, replies, quotes, flags = line.split("\t")
intermediate[id] = {
"id": id,
"replies": replies.split(",") if len(replies) > 0 else [],
"quotes": quotes.split(",") if len(quotes) > 0 else [],
"flags": flags.split(",") if len(flags) > 0 else [],
}
intermediate = parse_graph()
def transform(entry: dict) -> FilterableNote:
note = conn.execute(

144
3_archive.py Normal file
View file

@ -0,0 +1,144 @@
import json
from http.client import HTTPResponse
from pathlib import Path
from shutil import copyfileobj
from urllib.request import urlopen
import brotli
import msgpack
import psycopg
from com import Visibility, eval_config, parse_graph, progressbar
config = eval_config()
conn: psycopg.Connection = config["connect"]()
graph = parse_graph()
print("reading filterlist")
filtered = Path("filtered.list").read_text().strip().splitlines()
collected_users = {}
def collect_user(id: str):
if id in collected_users:
return
user = conn.execute('select username, host, "avatarUrl" from "user" where id = %s', [id]).fetchone()
if user is None:
return None
username, host, avatar_url = user
profile = conn.execute('select description, fields from user_profile where "userId" = %s', [id]).fetchone()
description, fields = profile or ("", [])
output = {}
output["id"] = id
output["username"] = username
output["host"] = host
output["description"] = description
output["fields"] = fields
output["avatar_url"] = avatar_url
collected_users[id] = output
collected_notes = []
files_to_collect = []
def collect_note(id: str):
output = {}
output["id"] = id
note = conn.execute('select text, "userId", "createdAt", "updatedAt", reactions, "renoteCount", visibility, "fileIds" from note where id = %s', [id]).fetchone()
if note is None:
return None
text, user_id, created_at, updated_at, reactions, renotes, visibility, file_ids = note
collect_user(user_id)
output["text"] = text
output["user_id"] = user_id
output["created_at"] = created_at.astimezone(tz=None).isoformat()
output["updated_at"] = None
if updated_at is not None:
output["updated_at"] = updated_at.astimezone(tz=None).isoformat()
output["reactions"] = reactions
output["renotes"] = renotes
output["visibility"] = Visibility.from_db(visibility).code()
node = graph[id]
replies = [collect_note(reply) for reply in node["replies"]]
replies = filter(lambda reply: reply is not None, replies)
quotes = [collect_note(quote) for quote in node["quotes"]]
quotes = filter(lambda quote: quote is not None, quotes)
output["attachments"] = []
for file_id in file_ids:
name, type_, comment, url = conn.execute('select name, type, comment, url from drive_file where id = %s', [file_id]).fetchone()
attachment = {
"id": file_id,
"type": type_,
"comment": comment,
}
if "self" in node["flags"]: # archive own attachments
files_to_collect.append((file_id, url))
attachment["url"] = None
else:
attachment["url"] = url
output["replies"] = list(replies)
output["quotes"] = list(quotes)
if len(output["attachments"]) == 0: del output["attachments"]
if len(output["replies"]) == 0: del output["replies"]
if len(output["quotes"]) == 0: del output["quotes"]
return output
pb = progressbar.ProgressBar(
0,
len(filtered),
prefix="collecting data ",
)
for id in filtered:
note = collect_note(id)
collected_notes.append((id, note))
pb.increment()
pb.finish()
outdir = Path("out")
if not outdir.exists():
outdir.mkdir()
if not (outdir / "note").exists():
(outdir / "note").mkdir()
if not (outdir / "user").exists():
(outdir / "user").mkdir()
if not (outdir / "file").exists():
(outdir / "file").mkdir()
pb = progressbar.ProgressBar(
0,
len(collected_notes) + len(collected_users),
prefix="writing data ",
)
for id, note in collected_notes:
outfile = outdir / "note" / f"{id}.mpk.br"
with outfile.open("wb") as f:
f.write(brotli.compress(msgpack.dumps(note)))
pb.increment()
for id, user in collected_users.items():
outfile = outdir / "user" / f"{id}.mpk.br"
with outfile.open("wb") as f:
f.write(brotli.compress(msgpack.dumps(note)))
pb.increment()
pb.finish()
pb = progressbar.ProgressBar(
0,
len(files_to_collect),
prefix="downloading attachments ",
)
for (id, url) in files_to_collect:
outfile = outdir / "file" / id
response: HTTPResponse = urlopen(url)
with outfile.open("wb") as f:
copyfileobj(response, f)
response.close()
pb.increment()
pb.finish()

33
4_delete.py Normal file
View file

@ -0,0 +1,33 @@
from pathlib import Path
import httpx
import psycopg
from com import eval_config, parse_graph, progressbar
config = eval_config()
conn: psycopg.Connection = config["connect"]()
token: str = config["token"]
api: str = config["api"]
graph = parse_graph()
print("reading filterlist")
filtered = Path("filtered.list").read_text().strip().splitlines()
queue = []
def enqueue(note):
for reply in note["replies"]:
enqueue(graph[reply])
for quote in note["quotes"]:
enqueue(graph[quote])
if "self" in note["flags"]:
files = conn.execute('select "fileIds" from note where id = %s', [note["id"]]).fetchone()[0]
queue.append((note["id"], files))
for id in filtered:
enqueue(graph[id])
print(queue)
# client = httpx.Client()

View file

@ -1,7 +1,15 @@
import sys
from dataclasses import dataclass
from typing import List, Callable
from datetime import datetime
from enum import Enum
from pathlib import Path
from typing import Callable, Dict, List
try:
import progressbar2 as progressbar
except ImportError:
import progressbar
class Visibility(Enum):
public = 1
@ -17,6 +25,13 @@ class Visibility(Enum):
case "followers": return cls.followers
case "specified": return cls.direct
case _: raise ValueError(f"unknown visibility `{raw}`")
def code(self) -> str:
match self:
case self.public: return "p"
case self.unlisted: return "u"
case self.followers: return "f"
case self.direct: return "d"
@dataclass
@ -59,3 +74,24 @@ class FilterableNote:
"reactions": self.reactions,
"renotes": self.renotes,
}
def eval_config() -> dict:
print("configuring")
config = {}
exec(Path(sys.argv[1]).read_text(), config)
return config
def parse_graph() -> Dict[str, dict]:
print("parsing graph")
graph = {}
for line in Path("graph.db").read_text().splitlines():
id, replies, quotes, flags = line.split("\t")
graph[id] = {
"id": id,
"replies": replies.split(",") if len(replies) > 0 else [],
"quotes": quotes.split(",") if len(quotes) > 0 else [],
"flags": flags.split(",") if len(flags) > 0 else [],
}
return graph

46
conf_mia.py Normal file
View file

@ -0,0 +1,46 @@
import math
from datetime import UTC, datetime, timedelta
from com import FilterableNote, Visibility
from sec import connect, tokens
user_id = "9gf2ev4ex5dflllo"
token = tokens["mia"]
api = "https://void.rehab/api/"
early_exit = 0xFFF
now = datetime.now(UTC)
threshold = 0.1
def criteria(root: FilterableNote) -> bool:
thread = root.thread()
thread_self = root.thread_self()
# if there are dms involved...
low_vis = min(thread, key=lambda note: note.visibility.value)
if low_vis.visibility == Visibility.direct:
is_direct = lambda note: note.visibility == Visibility.direct
most_recent_direct = max(filter(is_direct, thread), key=lambda note: note.when)
# ...and the dms are younger than two months...
if now - most_recent_direct.when < timedelta(days=30 * 2):
# ...do not delete the thread
return False
# get the most recent post...
others_recency = max(thread, key=lambda note: note.when)
# ...and bail if it's too new
if now - others_recency.when < timedelta(days=14):
return False
# get my...
most_recent_post = max(thread_self, key=lambda note: note.when) # ...most recent post...
score = lambda note: note.reactions + note.renotes*5
high_score_post = max(thread_self, key=score) # ...highest scoring post...
# ...and their values...
most_recent = most_recent_post.when
most_recent_age = now - most_recent
high_score = score(high_score_post)
# ...weigh it...
weighted_score = high_score / math.sqrt(most_recent_age.days)
# ...and check it against a threshold
return weighted_score < threshold

14
conf_pain.py Normal file
View file

@ -0,0 +1,14 @@
import math
from datetime import UTC, datetime, timedelta
from com import FilterableNote
from sec import connect, tokens
user_id = "9gszslkcdfnomssj"
token = tokens["pain"]
api = "https://void.rehab/api/"
def criteria(root: FilterableNote) -> bool:
# if it's more than two months old, delete
# return (datetime.now(UTC) - root.when).days > 60
return (datetime.now(UTC) - root.when).days > (12 * 30)

13
go.sh Executable file
View file

@ -0,0 +1,13 @@
#!/bin/sh
set -ex
test -f graph.db && rm graph.db
test -f filtered.list && rm filtered.list
test -d out && rm -r out
python3 1_graph.py conf_$1.py
python3 2_filter.py conf_$1.py
# python3 3_archive.py conf_$1.py
# echo uploading to memorial
# rsync -r -e 'ssh -p23' --progress out/ memorial:fediverse/$1/
# python3 4_delete.py conf_$1.py

2
proxy.sh Executable file
View file

@ -0,0 +1,2 @@
#!/bin/sh
exec ssh -NL 5432:localhost:5432 vr

View file

@ -0,0 +1,5 @@
httpx
progressbar2
psycopg
brotli
msgpack