scrubber/4_delete.py

156 lines
4.6 KiB
Python
Raw Permalink Normal View History

2024-10-04 16:43:40 -06:00
import sys
import time
2024-09-04 05:47:13 -06:00
from pathlib import Path
import httpx
2024-11-20 07:52:59 -07:00
import psutil
2024-09-04 05:47:13 -06:00
import psycopg
2024-11-20 07:52:59 -07:00
from com import FilterAction, eval_config, parse_graph, progressbar
2024-09-04 05:47:13 -06:00
config = eval_config()
conn: psycopg.Connection = config["connect"]()
token: str = config["token"]
api: str = config["api"]
graph = parse_graph()
print("reading filterlist")
filtered = Path("filtered.list").read_text().strip().splitlines()
2024-10-04 16:43:40 -06:00
filtered = list(map(lambda line: line.split(' '), filtered))
2024-09-04 05:47:13 -06:00
2024-10-04 16:43:40 -06:00
print("building queue")
2024-09-04 05:47:13 -06:00
queue = []
2024-10-04 16:43:40 -06:00
def enqueue(note, action):
2024-09-04 05:47:13 -06:00
for reply in note["replies"]:
2024-10-04 16:43:40 -06:00
enqueue(graph[reply], action)
2024-09-04 05:47:13 -06:00
for quote in note["quotes"]:
2024-10-04 16:43:40 -06:00
enqueue(graph[quote], action)
2024-09-04 05:47:13 -06:00
if "self" in note["flags"]:
2024-10-04 16:43:40 -06:00
queue.append((note["id"], action))
2024-09-04 05:47:13 -06:00
2024-10-04 16:43:40 -06:00
for id, action in filtered:
enqueue(graph[id], FilterAction(action))
2024-09-04 05:47:13 -06:00
2024-10-04 16:43:40 -06:00
class CustomETA(progressbar.ETA):
def __init__(self, *args, **kwargs):
self.history = []
self.lastval = None
progressbar.ETA.__init__(self, *args, **kwargs)
2024-09-04 05:47:13 -06:00
2024-10-04 16:43:40 -06:00
def _calculate_eta(self, progress, data, value, elapsed):
if self.lastval != value:
self.history = [*self.history[-9:], elapsed.total_seconds()]
self.lastval = value
per_item = (self.history[-1] - self.history[0]) / len(self.history)
remaining = (progress.max_value - value) * per_item
spent = elapsed.total_seconds() - self.history[-1]
return max(remaining - spent, 0)
pb = progressbar.ProgressBar(
0,
len(queue),
widgets=[
progressbar.Variable("message", format="{formatted_value}"),
" ",
progressbar.Percentage(),
" ",
progressbar.Bar(),
" ",
progressbar.SimpleProgress("%(value_s)s/%(max_value_s)s"),
" ",
CustomETA(),
],
variables={"status": "work"}
)
pb.update(0) # force initial display
client = httpx.Client(timeout=60)
seeking = False
last_req = 0
for note, action in queue:
# seek through queue
# helps prevent rate limits on resumed deletions
if seeking:
while True:
resp = client.post(f"{api}/notes/show", json={
"i": token,
"noteId": note,
})
if resp.status_code == 502:
pb.update(message="down")
time.sleep(1)
continue
break
if resp.status_code == 404:
pb.increment(message="seeking")
continue
seeking = False
# wait for queue to empty
while True:
resp = client.post(f"{api}/admin/queue/stats", json={"i": token})
if resp.status_code == 502:
pb.update(message="down")
time.sleep(1)
continue
2024-11-20 07:52:59 -07:00
Path('queue-stats.dump').write_text(f"status:{resp.status_code}\nbody:\n{resp.text}")
2024-10-04 16:43:40 -06:00
deliver_waiting = resp.json()["deliver"]["waiting"]
obliterate_waiting = resp.json()["obliterate"]["waiting"]
2024-11-20 07:52:59 -07:00
if deliver_waiting < 100 and obliterate_waiting < 50000:
2024-10-04 16:43:40 -06:00
break
2024-11-20 07:52:59 -07:00
pb.update(message=f"queue ({deliver_waiting}/{obliterate_waiting})")
time.sleep(10)
# make sure there's enough memory for new jobs
while True:
vmem = psutil.virtual_memory()
if vmem.available > (512 * 1024 * 1024):
break
pb.update(message="memory")
2024-10-04 16:43:40 -06:00
time.sleep(10)
# prevent api rate limiting
req_delay = time.time() - last_req
2024-11-20 07:52:59 -07:00
if req_delay < 30:
2024-10-04 16:43:40 -06:00
pb.update(message="delaying")
time.sleep(req_delay)
# queue new deletions
err = 0
while True:
resp = client.post(f"{api}/notes/delete", json={
"i": token,
"noteId": note,
"obliterate": action == FilterAction.Obliterate,
})
if resp.status_code == 429:
pb.update(status="limit")
time.sleep(1)
continue
elif resp.status_code == 502:
pb.update(status="down")
time.sleep(1)
2024-11-20 07:52:59 -07:00
continue
2024-10-04 16:43:40 -06:00
elif resp.status_code >= 400:
body = resp.json()
if body["error"]["code"] == "NO_SUCH_NOTE":
pb.increment(message="seeking")
seeking = True
break
2024-11-20 07:52:59 -07:00
elif body["error"]["code"] == "QUEUE_FULL":
print("\nobliterate queue overflowed, exiting to save server")
break
2024-10-04 16:43:40 -06:00
err += 1
2024-11-20 07:52:59 -07:00
if err > 3:
2024-10-04 16:43:40 -06:00
raise Exception(f"{body['error']['code']}: {body['error']['message']}")
sys.stdout.write("\r")
print(f"err {body['error']['code']} {body['error']['message']} ")
time.sleep(30)
pb.increment(message="deleting")
last_req = time.time()
break
pb.finish()