Merge pull request 'Clean up warnings (+fallback metrics)' (#685) from Oneric/akkoma:metrics into develop
Reviewed-on: https://akkoma.dev/AkkomaGang/akkoma/pulls/685
This commit is contained in:
commit
755c75d8a4
3 changed files with 109 additions and 9 deletions
|
@ -15,8 +15,19 @@ defmodule Pleroma.JobQueueMonitor do
|
|||
|
||||
@impl true
|
||||
def init(state) do
|
||||
:telemetry.attach("oban-monitor-failure", [:oban, :job, :exception], &handle_event/4, nil)
|
||||
:telemetry.attach("oban-monitor-success", [:oban, :job, :stop], &handle_event/4, nil)
|
||||
:telemetry.attach(
|
||||
"oban-monitor-failure",
|
||||
[:oban, :job, :exception],
|
||||
&Pleroma.JobQueueMonitor.handle_event/4,
|
||||
nil
|
||||
)
|
||||
|
||||
:telemetry.attach(
|
||||
"oban-monitor-success",
|
||||
[:oban, :job, :stop],
|
||||
&Pleroma.JobQueueMonitor.handle_event/4,
|
||||
nil
|
||||
)
|
||||
|
||||
{:ok, state}
|
||||
end
|
||||
|
|
|
@ -101,7 +101,8 @@ defmodule Pleroma.Web.Telemetry do
|
|||
]
|
||||
end
|
||||
|
||||
defp summary_metrics do
|
||||
# Summary metrics are currently not (yet) supported by the prometheus exporter
|
||||
defp summary_metrics(byte_unit) do
|
||||
[
|
||||
# Phoenix Metrics
|
||||
summary("phoenix.endpoint.stop.duration",
|
||||
|
@ -118,10 +119,98 @@ defmodule Pleroma.Web.Telemetry do
|
|||
summary("pleroma.repo.query.idle_time", unit: {:native, :millisecond}),
|
||||
|
||||
# VM Metrics
|
||||
summary("vm.memory.total", unit: {:byte, :kilobyte}),
|
||||
summary("vm.memory.total", unit: {:byte, byte_unit}),
|
||||
summary("vm.total_run_queue_lengths.total"),
|
||||
summary("vm.total_run_queue_lengths.cpu"),
|
||||
summary("vm.total_run_queue_lengths.io"),
|
||||
summary("vm.total_run_queue_lengths.io")
|
||||
]
|
||||
end
|
||||
|
||||
defp sum_counter_pair(basename, opts) do
|
||||
[
|
||||
sum(basename <> ".psum", opts),
|
||||
counter(basename <> ".pcount", opts)
|
||||
]
|
||||
end
|
||||
|
||||
# Prometheus exporter doesn't support summaries, so provide fallbacks
|
||||
defp summary_fallback_metrics(byte_unit \\ :byte) do
|
||||
# Summary metrics are not supported by the Prometheus exporter
|
||||
# https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/11
|
||||
# and sum metrics currently only work with integers
|
||||
# https://github.com/beam-telemetry/telemetry_metrics_prometheus_core/issues/35
|
||||
#
|
||||
# For VM metrics this is kindof ok as they appear to always be integers
|
||||
# and we can use sum + counter to get the average between polls from their change
|
||||
# But for repo query times we need to use a full distribution
|
||||
|
||||
simple_buckets = [0, 1, 2, 4, 8, 16]
|
||||
simple_buckets_quick = for t <- simple_buckets, do: t / 100.0
|
||||
|
||||
# Already included in distribution metrics anyway:
|
||||
# phoenix.router_dispatch.stop.duration
|
||||
# pleroma.repo.query.total_time
|
||||
# pleroma.repo.query.queue_time
|
||||
dist_metrics =
|
||||
[
|
||||
distribution("phoenix.endpoint.stop.duration.fdist",
|
||||
event_name: [:phoenix, :endpoint, :stop],
|
||||
measurement: :duration,
|
||||
unit: {:native, :millisecond},
|
||||
reporter_options: [
|
||||
buckets: simple_buckets
|
||||
]
|
||||
),
|
||||
distribution("pleroma.repo.query.decode_time.fdist",
|
||||
event_name: [:pleroma, :repo, :query],
|
||||
measurement: :decode_time,
|
||||
unit: {:native, :millisecond},
|
||||
reporter_options: [
|
||||
buckets: simple_buckets_quick
|
||||
]
|
||||
),
|
||||
distribution("pleroma.repo.query.query_time.fdist",
|
||||
event_name: [:pleroma, :repo, :query],
|
||||
measurement: :query_time,
|
||||
unit: {:native, :millisecond},
|
||||
reporter_options: [
|
||||
buckets: simple_buckets
|
||||
]
|
||||
),
|
||||
distribution("pleroma.repo.query.idle_time.fdist",
|
||||
event_name: [:pleroma, :repo, :query],
|
||||
measurement: :idle_time,
|
||||
unit: {:native, :millisecond},
|
||||
reporter_options: [
|
||||
buckets: simple_buckets
|
||||
]
|
||||
)
|
||||
]
|
||||
|
||||
vm_metrics =
|
||||
sum_counter_pair("vm.memory.total",
|
||||
event_name: [:vm, :memory],
|
||||
measurement: :total,
|
||||
unit: {:byte, byte_unit}
|
||||
) ++
|
||||
sum_counter_pair("vm.total_run_queue_lengths.total",
|
||||
event_name: [:vm, :total_run_queue_lengths],
|
||||
measurement: :total
|
||||
) ++
|
||||
sum_counter_pair("vm.total_run_queue_lengths.cpu",
|
||||
event_name: [:vm, :total_run_queue_lengths],
|
||||
measurement: :cpu
|
||||
) ++
|
||||
sum_counter_pair("vm.total_run_queue_lengths.io.fsum",
|
||||
event_name: [:vm, :total_run_queue_lengths],
|
||||
measurement: :io
|
||||
)
|
||||
|
||||
dist_metrics ++ vm_metrics
|
||||
end
|
||||
|
||||
defp common_metrics do
|
||||
[
|
||||
last_value("pleroma.local_users.total"),
|
||||
last_value("pleroma.domains.total"),
|
||||
last_value("pleroma.local_statuses.total"),
|
||||
|
@ -129,8 +218,10 @@ defmodule Pleroma.Web.Telemetry do
|
|||
]
|
||||
end
|
||||
|
||||
def prometheus_metrics, do: summary_metrics() ++ distribution_metrics()
|
||||
def live_dashboard_metrics, do: summary_metrics()
|
||||
def prometheus_metrics,
|
||||
do: common_metrics() ++ distribution_metrics() ++ summary_fallback_metrics()
|
||||
|
||||
def live_dashboard_metrics, do: common_metrics() ++ summary_metrics(:megabyte)
|
||||
|
||||
defp periodic_measurements do
|
||||
[
|
||||
|
|
|
@ -128,6 +128,4 @@ defmodule Pleroma.HTML.Scrubber.Default do
|
|||
Meta.allow_tag_with_these_attributes(:small, [])
|
||||
|
||||
Meta.strip_everything_not_covered()
|
||||
|
||||
defp scrub_css(value), do: value
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue