Add documentation for ES search
This commit is contained in:
parent
635a3c223a
commit
bc9e76cce7
4 changed files with 278 additions and 0 deletions
|
@ -3472,5 +3472,90 @@ config :pleroma, :config_description, [
|
|||
suggestion: [100_000]
|
||||
}
|
||||
]
|
||||
},
|
||||
%{
|
||||
group: :pleroma,
|
||||
key: Pleroma.Search.Elasticsearch.Cluster,
|
||||
type: :group,
|
||||
description: "Elasticsearch settings.",
|
||||
children: [
|
||||
%{
|
||||
key: :url,
|
||||
type: :string,
|
||||
description: "Elasticsearch URL.",
|
||||
suggestion: ["http://127.0.0.1:9200/"]
|
||||
},
|
||||
%{
|
||||
key: :username,
|
||||
type: :string,
|
||||
description: "Username to connect to ES. Set to nil if your cluster is unauthenticated.",
|
||||
suggestion: ["elastic"]
|
||||
},
|
||||
%{
|
||||
key: :password,
|
||||
type: :string,
|
||||
description: "Password to connect to ES. Set to nil if your cluster is unauthenticated.",
|
||||
suggestion: ["changeme"]
|
||||
},
|
||||
%{
|
||||
key: :api,
|
||||
type: :module,
|
||||
description:
|
||||
"The API module used by Elasticsearch. Should always be Elasticsearch.API.HTTP",
|
||||
suggestion: [Elasticsearch.API.HTTP]
|
||||
},
|
||||
%{
|
||||
key: :json_library,
|
||||
type: :module,
|
||||
description:
|
||||
"The JSON module used to encode/decode when communicating with Elasticsearch",
|
||||
suggestion: [Jason]
|
||||
},
|
||||
%{
|
||||
key: :indexes,
|
||||
type: :map,
|
||||
description: "The indices to set up in Elasticsearch",
|
||||
children: [
|
||||
%{
|
||||
key: :activities,
|
||||
type: :map,
|
||||
description: "Config for the index to use for activities",
|
||||
children: [
|
||||
%{
|
||||
key: :settings,
|
||||
type: :string,
|
||||
description:
|
||||
"Path to the file containing index settings for the activities index. Should contain a mapping.",
|
||||
suggestion: ["priv/es-mappings/activity.json"]
|
||||
},
|
||||
%{
|
||||
key: :store,
|
||||
type: :module,
|
||||
description: "The internal store module",
|
||||
suggestion: [Pleroma.Search.Elasticsearch.Store]
|
||||
},
|
||||
%{
|
||||
key: :sources,
|
||||
type: {:list, :module},
|
||||
description: "The internal types to use for this index",
|
||||
suggestion: [[Pleroma.Activity]]
|
||||
},
|
||||
%{
|
||||
key: :bulk_page_size,
|
||||
type: :int,
|
||||
description: "Size for bulk put requests, mostly used on building the index",
|
||||
suggestion: [5000]
|
||||
},
|
||||
%{
|
||||
key: :bulk_wait_interval,
|
||||
type: :int,
|
||||
description: "Time to wait between bulk put requests (in ms)",
|
||||
suggestion: [15_000]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -121,3 +121,43 @@ This will clear **all** the posts from the search index. Note, that deleted post
|
|||
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
|
||||
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
|
||||
depends on the amount of text in posts.
|
||||
|
||||
## Elasticsearch
|
||||
|
||||
As with meilisearch, this can be rather memory-hungry, but it is very good at what it does.
|
||||
|
||||
To use [elasticsearch](https://www.elastic.co/), set the search module to `Pleroma.Search.Elasticsearch`:
|
||||
|
||||
> config :pleroma, Pleroma.Search, module: Pleroma.Search.Elasticsearch
|
||||
|
||||
You then need to set the URL and authentication credentials if relevant.
|
||||
|
||||
> config :pleroma, Pleroma.Search.Elasticsearch.Cluster,
|
||||
> url: "http://127.0.0.1:9200/",
|
||||
> username: "elastic",
|
||||
> password: "changeme",
|
||||
|
||||
### Initial indexing
|
||||
|
||||
After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only
|
||||
have to do it one time, but it might take a while, depending on the amount of posts your instance has seen.
|
||||
|
||||
The sequence of actions is as follows:
|
||||
|
||||
1. First, change the configuration to use `Pleroma.Search.Elasticsearch` as the search backend
|
||||
2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything
|
||||
3. Start the initial indexing process (as described below with `index`),
|
||||
and wait until the task says it sent everything from the database to index
|
||||
4. Wait until the index tasks exits
|
||||
|
||||
To start the initial indexing, run the `build` command:
|
||||
|
||||
=== "OTP"
|
||||
```sh
|
||||
./bin/pleroma_ctl search.elasticsearch index activities --cluster Pleroma.Search.Elasticsearch.Cluster
|
||||
```
|
||||
|
||||
=== "From Source"
|
||||
```sh
|
||||
mix elasticsearch.build activities --cluster Pleroma.Search.Elasticsearch.Cluster
|
||||
```
|
9
lib/mix/tasks/pleroma/search/elasticsearch.ex
Normal file
9
lib/mix/tasks/pleroma/search/elasticsearch.ex
Normal file
|
@ -0,0 +1,9 @@
|
|||
defmodule Mix.Tasks.Pleroma.Search.Elasticsearch do
|
||||
alias Mix.Tasks.Elasticsearch.Build
|
||||
import Mix.Pleroma
|
||||
|
||||
def run(["index" | args]) do
|
||||
start_pleroma()
|
||||
Build.run(args)
|
||||
end
|
||||
end
|
144
lib/mix/tasks/pleroma/search/meilisearch.ex
Normal file
144
lib/mix/tasks/pleroma/search/meilisearch.ex
Normal file
|
@ -0,0 +1,144 @@
|
|||
# Pleroma: A lightweight social networking server
|
||||
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
|
||||
# SPDX-License-Identifier: AGPL-3.0-only
|
||||
|
||||
defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
|
||||
require Pleroma.Constants
|
||||
|
||||
import Mix.Pleroma
|
||||
import Ecto.Query
|
||||
|
||||
import Pleroma.Search.Meilisearch,
|
||||
only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete!: 1]
|
||||
|
||||
def run(["index"]) do
|
||||
start_pleroma()
|
||||
|
||||
meili_version =
|
||||
(
|
||||
{:ok, result} = meili_get("/version")
|
||||
|
||||
result["pkgVersion"]
|
||||
)
|
||||
|
||||
# The ranking rule syntax was changed but nothing about that is mentioned in the changelog
|
||||
if not Version.match?(meili_version, ">= 0.25.0") do
|
||||
raise "Meilisearch <0.24.0 not supported"
|
||||
end
|
||||
|
||||
{:ok, _} =
|
||||
meili_post(
|
||||
"/indexes/objects/settings/ranking-rules",
|
||||
[
|
||||
"published:desc",
|
||||
"words",
|
||||
"exactness",
|
||||
"proximity",
|
||||
"typo",
|
||||
"attribute",
|
||||
"sort"
|
||||
]
|
||||
)
|
||||
|
||||
{:ok, _} =
|
||||
meili_post(
|
||||
"/indexes/objects/settings/searchable-attributes",
|
||||
[
|
||||
"content"
|
||||
]
|
||||
)
|
||||
|
||||
IO.puts("Created indices. Starting to insert posts.")
|
||||
|
||||
chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
|
||||
|
||||
Pleroma.Repo.transaction(
|
||||
fn ->
|
||||
query =
|
||||
from(Pleroma.Object,
|
||||
# Only index public and unlisted posts which are notes and have some text
|
||||
where:
|
||||
fragment("data->>'type' = 'Note'") and
|
||||
(fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
|
||||
fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
|
||||
order_by: [desc: fragment("data->'published'")]
|
||||
)
|
||||
|
||||
count = query |> Pleroma.Repo.aggregate(:count, :data)
|
||||
IO.puts("Entries to index: #{count}")
|
||||
|
||||
Pleroma.Repo.stream(
|
||||
query,
|
||||
timeout: :infinity
|
||||
)
|
||||
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|
||||
|> Stream.filter(fn o -> not is_nil(o) end)
|
||||
|> Stream.chunk_every(chunk_size)
|
||||
|> Stream.transform(0, fn objects, acc ->
|
||||
new_acc = acc + Enum.count(objects)
|
||||
|
||||
# Reset to the beginning of the line and rewrite it
|
||||
IO.write("\r")
|
||||
IO.write("Indexed #{new_acc} entries")
|
||||
|
||||
{[objects], new_acc}
|
||||
end)
|
||||
|> Stream.each(fn objects ->
|
||||
result =
|
||||
meili_put(
|
||||
"/indexes/objects/documents",
|
||||
objects
|
||||
)
|
||||
|
||||
with {:ok, res} <- result do
|
||||
if not Map.has_key?(res, "uid") do
|
||||
IO.puts("\nFailed to index: #{inspect(result)}")
|
||||
end
|
||||
else
|
||||
e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
|
||||
end
|
||||
end)
|
||||
|> Stream.run()
|
||||
end,
|
||||
timeout: :infinity
|
||||
)
|
||||
|
||||
IO.write("\n")
|
||||
end
|
||||
|
||||
def run(["clear"]) do
|
||||
start_pleroma()
|
||||
|
||||
meili_delete!("/indexes/objects/documents")
|
||||
end
|
||||
|
||||
def run(["show-keys", master_key]) do
|
||||
start_pleroma()
|
||||
|
||||
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
|
||||
|
||||
{:ok, result} =
|
||||
Pleroma.HTTP.get(
|
||||
Path.join(endpoint, "/keys"),
|
||||
[{"Authorization", "Bearer #{master_key}"}]
|
||||
)
|
||||
|
||||
decoded = Jason.decode!(result.body)
|
||||
|
||||
if decoded["results"] do
|
||||
Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
|
||||
IO.puts("#{desc}: #{key}")
|
||||
end)
|
||||
else
|
||||
IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
|
||||
end
|
||||
end
|
||||
|
||||
def run(["stats"]) do
|
||||
start_pleroma()
|
||||
|
||||
{:ok, result} = meili_get("/indexes/objects/stats")
|
||||
IO.puts("Number of entries: #{result["numberOfDocuments"]}")
|
||||
IO.puts("Indexing? #{result["isIndexing"]}")
|
||||
end
|
||||
end
|
Loading…
Reference in a new issue