Merge pull request 'MeiliSearch Integration' (#10204) from PrivateGER/calckey:feat/meilisearch-integration into develop

Reviewed-on: https://codeberg.org/calckey/calckey/pulls/10204
This commit is contained in:
Kainoa Kanter 2023-05-29 03:31:38 +00:00
commit 43f5cdbcc9
15 changed files with 545 additions and 27 deletions

View file

@ -82,6 +82,14 @@ redis:
# user:
# pass:
# ┌───────────────────────────┐
#───┘ Meilisearch configuration └─────────────────────────────────────
#meilisearch:
# host: meilisearch
# port: 7700
# ssl: false
# apiKey:
# ┌───────────────┐
#───┘ ID generation └───────────────────────────────────────────

View file

@ -8,7 +8,7 @@ services:
depends_on:
- db
- redis
- sonic
- meilisearch
ports:
- "3000:3000"
networks:
@ -40,17 +40,33 @@ services:
volumes:
- ./db:/var/lib/postgresql/data
sonic:
restart: unless-stopped
image: docker.io/valeriansaliou/sonic:v1.4.0
networks:
- calcnet
volumes:
- ./sonic:/var/lib/sonic/store
- ./sonic/config.cfg:/etc/sonic.cfg
### Only one of the below should be used.
### Meilisearch is better overall, but resource-intensive. Sonic is a very light full text search engine.
# meilisearch:
# container_name: meilisearch
# image: getmeili/meilisearch:v1.1.1
# environment:
# - MEILI_ENV=${MEILI_ENV:-development}
# ports:
# - "7700:7700"
# networks:
# - calcnet
# volumes:
# - ./meili_data:/meili_data
# restart: unless-stopped
# sonic:
# restart: unless-stopped
# image: docker.io/valeriansaliou/sonic:v1.4.0
# networks:
# - calcnet
# volumes:
# - ./sonic:/var/lib/sonic/store
# - ./sonic/config.cfg:/etc/sonic.cfg
networks:
calcnet:
# web:
# external:
# name: web
# web:
# external:
# name: web

View file

@ -5,6 +5,7 @@ introMisskey: "Welcome! Calckey is an open source, decentralized social media pl
\ that's free forever! \U0001F680"
monthAndDay: "{month}/{day}"
search: "Search"
search_placeholder: "Enter search terms..."
notifications: "Notifications"
username: "Username"
password: "Password"
@ -1551,6 +1552,10 @@ _widgets:
serverInfo: "Server Info"
_userList:
chooseList: "Select a list"
meiliStatus: "Server Status"
meiliSize: "Index size"
meiliIndexCount: "Indexed posts"
_cw:
hide: "Hide"
show: "Show content"

View file

@ -85,6 +85,7 @@
"koa-send": "5.0.1",
"koa-slow": "2.1.0",
"koa-views": "7.0.2",
"meilisearch": "0.32.4",
"mfm-js": "0.23.3",
"mime-types": "2.1.35",
"multer": "1.4.4-lts.1",

View file

@ -39,6 +39,12 @@ export type Source = {
collection?: string;
bucket?: string;
};
meilisearch: {
host: string;
port: number;
apiKey?: string;
ssl: boolean;
};
proxy?: string;
proxySmtp?: string;

View file

@ -1,6 +1,7 @@
import si from "systeminformation";
import Xev from "xev";
import * as osUtils from "os-utils";
import meilisearch from "../db/meilisearch.js";
const ev = new Xev();
@ -24,6 +25,7 @@ export default function () {
const memStats = await mem();
const netStats = await net();
const fsStats = await fs();
const meilisearchStats = await meilisearchStatus();
const stats = {
cpu: roundCpu(cpu),
@ -39,6 +41,7 @@ export default function () {
r: round(Math.max(0, fsStats.rIO_sec ?? 0)),
w: round(Math.max(0, fsStats.wIO_sec ?? 0)),
},
meilisearch: meilisearchStats,
};
ev.emit("serverStats", stats);
log.unshift(stats);
@ -77,3 +80,15 @@ async function fs() {
const data = await si.disksIO().catch(() => ({ rIO_sec: 0, wIO_sec: 0 }));
return data || { rIO_sec: 0, wIO_sec: 0 };
}
async function meilisearchStatus() {
if (meilisearch) {
return meilisearch.serverStats();
} else {
return {
health: "unconfigured",
size: 0,
indexed_count: 0,
};
}
}

View file

@ -0,0 +1,266 @@
import {Health, MeiliSearch, Stats} from "meilisearch";
import {dbLogger} from "./logger.js";
import config from "@/config/index.js";
import {Note} from "@/models/entities/note.js";
import * as url from "url";
import {ILocalUser, User} from "@/models/entities/user.js";
import {Followings, Users} from "@/models/index.js";
const logger = dbLogger.createSubLogger("meilisearch", "gray", false);
logger.info("Connecting to MeiliSearch");
const hasConfig =
config.meilisearch &&
(config.meilisearch.host ||
config.meilisearch.port ||
config.meilisearch.apiKey);
const host = hasConfig ? config.meilisearch.host ?? "localhost" : "";
const port = hasConfig ? config.meilisearch.port ?? 7700 : 0;
const auth = hasConfig ? config.meilisearch.apiKey ?? "" : "";
const ssl = hasConfig ? config.meilisearch.ssl ?? false : false;
const client: MeiliSearch = new MeiliSearch({
host: `${ssl ? "https" : "http"}://${host}:${port}`,
apiKey: auth,
});
const posts = client.index("posts");
posts
.updateSearchableAttributes(["text"])
.catch((e) =>
logger.error(`Setting searchable attr failed, searches won't work: ${e}`),
);
posts
.updateFilterableAttributes([
"userName",
"userHost",
"mediaAttachment",
"createdAt",
"userId",
])
.catch((e) =>
logger.error(
`Setting filterable attr failed, advanced searches won't work: ${e}`,
),
);
posts
.updateSortableAttributes(["createdAt"])
.catch((e) =>
logger.error(
`Setting sortable attr failed, placeholder searches won't sort properly: ${e}`,
),
);
logger.info("Connected to MeiliSearch");
export type MeilisearchNote = {
id: string;
text: string;
userId: string;
userHost: string;
userName: string;
channelId: string;
mediaAttachment: string;
createdAt: number;
};
export default hasConfig
? {
search: async (
query: string,
limit: number,
offset: number,
userCtx: ILocalUser | null,
) => {
/// Advanced search syntax
/// from:user => filter by user + optional domain
/// has:image/video/audio/text/file => filter by attachment types
/// domain:domain.com => filter by domain
/// before:Date => show posts made before Date
/// after: Date => show posts made after Date
/// "text" => get posts with exact text between quotes
/// filter:following => show results only from users you follow
/// filter:followers => show results only from followers
let constructedFilters: string[] = [];
let splitSearch = query.split(" ");
// Detect search operators and remove them from the actual query
let filteredSearchTerms = (
await Promise.all(
splitSearch.map(async (term) => {
if (term.startsWith("has:")) {
let fileType = term.slice(4);
constructedFilters.push(`mediaAttachment = "${fileType}"`);
return null;
} else if (term.startsWith("from:")) {
let user = term.slice(5);
constructedFilters.push(`userName = ${user}`);
return null;
} else if (term.startsWith("domain:")) {
let domain = term.slice(7);
constructedFilters.push(`userHost = ${domain}`);
return null;
} else if (term.startsWith("after:")) {
let timestamp = term.slice(6);
// Try to parse the timestamp as JavaScript Date
let date = Date.parse(timestamp);
if (isNaN(date)) return null;
constructedFilters.push(`createdAt > ${date / 1000}`);
return null;
} else if (term.startsWith("before:")) {
let timestamp = term.slice(7);
// Try to parse the timestamp as JavaScript Date
let date = Date.parse(timestamp);
if (isNaN(date)) return null;
constructedFilters.push(`createdAt < ${date / 1000}`);
return null;
} else if (term.startsWith("filter:following")) {
// Check if we got a context user
if (userCtx) {
// Fetch user follows from DB
let followedUsers = await Followings.find({
where: {
followerId: userCtx.id,
},
select: {
followeeId: true,
},
});
let followIDs = followedUsers.map((user) => user.followeeId);
if (followIDs.length === 0) return null;
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
} else {
logger.warn(
"search filtered to follows called without user context",
);
}
return null;
} else if (term.startsWith("filter:followers")) {
// Check if we got a context user
if (userCtx) {
// Fetch users follows from DB
let followedUsers = await Followings.find({
where: {
followeeId: userCtx.id,
},
select: {
followerId: true,
},
});
let followIDs = followedUsers.map((user) => user.followerId);
if (followIDs.length === 0) return null;
constructedFilters.push(`userId IN [${followIDs.join(",")}]`);
} else {
logger.warn(
"search filtered to followers called without user context",
);
}
return null;
}
return term;
}),
)
).filter((term) => term !== null);
let sortRules = [];
// An empty search term with defined filters means we have a placeholder search => https://www.meilisearch.com/docs/reference/api/search#placeholder-search
// These have to be ordered manually, otherwise the *oldest* posts are returned first, which we don't want
if (filteredSearchTerms.length === 0 && constructedFilters.length > 0) {
sortRules.push("createdAt:desc");
}
logger.info(`Searching for ${filteredSearchTerms.join(" ")}`);
logger.info(`Limit: ${limit}`);
logger.info(`Offset: ${offset}`);
logger.info(`Filters: ${constructedFilters}`);
logger.info(`Ordering: ${sortRules}`);
return posts.search(filteredSearchTerms.join(" "), {
limit: limit,
offset: offset,
filter: constructedFilters,
sort: sortRules,
});
},
ingestNote: async (ingestNotes: Note | Note[]) => {
if (ingestNotes instanceof Note) {
ingestNotes = [ingestNotes];
}
let indexingBatch: MeilisearchNote[] = [];
for (let note of ingestNotes) {
if (note.user === undefined) {
note.user = await Users.findOne({
where: {
id: note.userId,
},
});
}
let attachmentType = "";
if (note.attachedFileTypes.length > 0) {
attachmentType = note.attachedFileTypes[0].split("/")[0];
switch (attachmentType) {
case "image":
case "video":
case "audio":
case "text":
break;
default:
attachmentType = "file";
break;
}
}
indexingBatch.push(<MeilisearchNote>{
id: note.id.toString(),
text: note.text ? note.text : "",
userId: note.userId,
userHost:
note.userHost !== ""
? note.userHost
: url.parse(config.host).host,
channelId: note.channelId ? note.channelId : "",
mediaAttachment: attachmentType,
userName: note.user?.username ?? "UNKNOWN",
createdAt: note.createdAt.getTime() / 1000, // division by 1000 is necessary because Node returns in ms-accuracy
});
}
return posts
.addDocuments(indexingBatch, {
primaryKey: "id",
})
.then(() =>
console.log(`sent ${indexingBatch.length} posts for indexing`),
);
},
serverStats: async () => {
let health: Health = await client.health();
let stats: Stats = await client.getStats();
return {
health: health.status,
size: stats.databaseSize,
indexed_count: stats.indexes["posts"].numberOfDocuments,
};
},
}
: null;

View file

@ -1,10 +1,11 @@
import type Bull from "bull";
import { queueLogger } from "../../logger.js";
import { Notes } from "@/models/index.js";
import { MoreThan } from "typeorm";
import { index } from "@/services/note/create.js";
import { Note } from "@/models/entities/note.js";
import {queueLogger} from "../../logger.js";
import {Notes} from "@/models/index.js";
import {MoreThan} from "typeorm";
import {index} from "@/services/note/create.js";
import {Note} from "@/models/entities/note.js";
import meilisearch from "../../../db/meilisearch.js";
const logger = queueLogger.createSubLogger("index-all-notes");
@ -32,12 +33,13 @@ export default async function indexAllNotes(
try {
notes = await Notes.find({
where: {
...(cursor ? { id: MoreThan(cursor) } : {}),
...(cursor ? {id: MoreThan(cursor)} : {}),
},
take: take,
order: {
id: 1,
},
relations: ["user"],
});
} catch (e) {
logger.error(`Failed to query notes ${e}`);
@ -58,11 +60,16 @@ export default async function indexAllNotes(
for (let i = 0; i < notes.length; i += batch) {
const chunk = notes.slice(i, i + batch);
await Promise.all(chunk.map((note) => index(note)));
if (meilisearch) {
await meilisearch.ingestNote(chunk);
}
await Promise.all(chunk.map((note) => index(note, true)));
indexedCount += chunk.length;
const pct = (indexedCount / total) * 100;
job.update({ indexedCount, cursor, total });
job.update({indexedCount, cursor, total});
job.progress(+pct.toFixed(1));
logger.info(`Indexed notes ${indexedCount}/${total ? total : "?"}`);
}

View file

@ -4,6 +4,7 @@ import { Note } from "@/models/entities/note.js";
import config from "@/config/index.js";
import es from "../../../../db/elasticsearch.js";
import sonic from "../../../../db/sonic.js";
import meilisearch, {MeilisearchNote} from "../../../../db/meilisearch.js";
import define from "../../define.js";
import { makePaginationQuery } from "../../common/make-pagination-query.js";
import { generateVisibilityQuery } from "../../common/generate-visibility-query.js";
@ -62,7 +63,7 @@ export const paramDef = {
} as const;
export default define(meta, paramDef, async (ps, me) => {
if (es == null && sonic == null) {
if (es == null && sonic == null && meilisearch == null) {
const query = makePaginationQuery(
Notes.createQueryBuilder("note"),
ps.sinceId,
@ -170,6 +171,70 @@ export default define(meta, paramDef, async (ps, me) => {
found.length = ps.limit;
}
return found;
} else if (meilisearch) {
let start = 0;
const chunkSize = 100;
// Use meilisearch to fetch and step through all search results that could match the requirements
const ids = [];
while (true) {
const results = await meilisearch.search(ps.query, chunkSize, start, me);
start += chunkSize;
if (results.hits.length === 0) {
break;
}
const res = results.hits
.filter((key: MeilisearchNote) => {
if (ps.userId && key.userId !== ps.userId) {
return false;
}
if (ps.channelId && key.channelId !== ps.channelId) {
return false;
}
if (ps.sinceId && key.id <= ps.sinceId) {
return false;
}
if (ps.untilId && key.id >= ps.untilId) {
return false;
}
return true;
})
.map((key) => key.id);
ids.push(...res);
}
// Sort all the results by note id DESC (newest first)
ids.sort((a, b) => b - a);
// Fetch the notes from the database until we have enough to satisfy the limit
start = 0;
const found = [];
while (found.length < ps.limit && start < ids.length) {
const chunk = ids.slice(start, start + chunkSize);
const notes: Note[] = await Notes.find({
where: {
id: In(chunk),
},
order: {
id: "DESC",
},
});
// The notes are checked for visibility and muted/blocked users when packed
found.push(...(await Notes.packMany(notes, me)));
start += chunkSize;
}
// If we have more results than the limit, trim them
if (found.length > ps.limit) {
found.length = ps.limit;
}
return found;
} else {
const userQuery =

View file

@ -1,6 +1,7 @@
import * as os from "node:os";
import si from "systeminformation";
import define from "../define.js";
import meilisearch from "../../../db/meilisearch.js";
export const meta = {
requireCredential: false,
@ -18,6 +19,7 @@ export const paramDef = {
export default define(meta, paramDef, async () => {
const memStats = await si.mem();
const fsStats = await si.fsSize();
const meilisearchStats = await meilisearchStatus();
return {
machine: os.hostname(),
@ -34,3 +36,15 @@ export default define(meta, paramDef, async () => {
},
};
});
async function meilisearchStatus() {
if (meilisearch) {
return meilisearch.serverStats();
} else {
return {
health: "unconfigured",
size: 0,
indexed_count: 0,
};
}
}

View file

@ -67,6 +67,7 @@ import type { UserProfile } from "@/models/entities/user-profile.js";
import { db } from "@/db/postgre.js";
import { getActiveWebhooks } from "@/misc/webhook-cache.js";
import { shouldSilenceInstance } from "@/misc/should-block-instance.js";
import meilisearch from "../../db/meilisearch.js";
const mutedWordsCache = new Cache<
{ userId: UserProfile["userId"]; mutedWords: UserProfile["mutedWords"] }[]
@ -748,7 +749,7 @@ async function insertNote(
}
}
export async function index(note: Note): Promise<void> {
export async function index(note: Note, reindexing: boolean): Promise<void> {
if (!note.text) return;
if (config.elasticsearch && es) {
@ -776,6 +777,10 @@ export async function index(note: Note): Promise<void> {
note.text,
);
}
if (meilisearch && !reindexing) {
await meilisearch.ingestNote(note);
}
}
async function notifyToWatchersOfRenotee(

View file

@ -5,6 +5,17 @@ import { mainRouter } from "@/router";
export async function search() {
const { canceled, result: query } = await os.inputText({
title: i18n.ts.search,
placeholder: i18n.ts.search_placeholder,
text:
"Advanced search operators\n" +
"from:user => filter by user\n" +
"has:image/video/audio/text/file => filter by attachment types\n" +
"domain:domain.com => filter by domain\n" +
"before:Date => show posts made before Date\n" +
"after:Date => show posts made after Date\n" +
'"text" => get posts with exact text between quotes\n' +
"filter:following => show results only from users you follow\n" +
"filter:followers => show results only from followers\n",
});
if (canceled || query == null || query === "") return;

View file

@ -38,6 +38,11 @@
:connection="connection"
:meta="meta"
/>
<XMeili
v-else-if="widgetProps.view === 5"
:connection="connection"
:meta="meta"
/>
</div>
</MkContainer>
</template>
@ -56,6 +61,7 @@ import XNet from "./net.vue";
import XCpu from "./cpu.vue";
import XMemory from "./mem.vue";
import XDisk from "./disk.vue";
import XMeili from "./meilisearch.vue";
import MkContainer from "@/components/MkContainer.vue";
import { GetFormResultType } from "@/scripts/form";
import * as os from "@/os";
@ -102,7 +108,7 @@ os.api("server-info", {}).then((res) => {
});
const toggleView = () => {
if (widgetProps.view === 4) {
if (widgetProps.view === 5) {
widgetProps.view = 0;
} else {
widgetProps.view++;

View file

@ -0,0 +1,62 @@
<template>
<div class="ms_stats">
<div>
<p><i class="ph-file-search ph-bold ph-lg"></i>MeiliSearch</p>
<p>{{ i18n.ts._widgets.meiliStatus }}: {{ available }}</p>
<p>{{ i18n.ts._widgets.meiliSize }}: {{ bytes(total_size, 2) }}</p>
<p>{{ i18n.ts._widgets.meiliIndexCount }}: {{ index_count }}</p>
</div>
</div>
<br />
</template>
<script lang="ts" setup>
import {onBeforeUnmount, onMounted} from "vue";
import bytes from "@/filters/bytes";
import {i18n} from "@/i18n";
const props = defineProps<{
connection: any;
meta: any;
}>();
let total_size: number = $ref(0);
let index_count: number = $ref(0);
let available: string = $ref("unavailable");
function onStats(stats) {
total_size = stats.meilisearch.size;
index_count = stats.meilisearch.indexed_count;
available = stats.meilisearch.health;
}
onMounted(() => {
props.connection.on("stats", onStats);
});
onBeforeUnmount(() => {
props.connection.off("stats", onStats);
});
</script>
<style lang="scss" scoped>
.ms_stats {
padding: 16px;
> div {
> p {
margin: 0;
font-size: 0.8em;
&:first-child {
font-weight: bold;
margin-bottom: 4px;
> i {
margin-right: 4px;
}
}
}
}
}
</style>

View file

@ -263,6 +263,9 @@ importers:
koa-views:
specifier: 7.0.2
version: 7.0.2(@types/koa@2.13.5)(ejs@3.1.8)(pug@3.0.2)
meilisearch:
specifier: ^0.32.4
version: 0.32.4
mfm-js:
specifier: 0.23.3
version: 0.23.3
@ -2819,7 +2822,7 @@ packages:
'@types/webgl-ext': 0.0.30
'@webgpu/types': 0.1.16
long: 4.0.0
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
transitivePeerDependencies:
- encoding
@ -2835,7 +2838,7 @@ packages:
'@types/webgl-ext': 0.0.30
'@webgpu/types': 0.1.21
long: 4.0.0
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
transitivePeerDependencies:
- encoding
@ -2849,7 +2852,7 @@ packages:
dependencies:
'@tensorflow/tfjs-core': 3.21.0
'@types/node-fetch': 2.6.2
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
string_decoder: 1.3.0
transitivePeerDependencies:
@ -2864,7 +2867,7 @@ packages:
dependencies:
'@tensorflow/tfjs-core': 4.2.0
'@types/node-fetch': 2.6.2
node-fetch: 2.6.8
node-fetch: 2.6.11
seedrandom: 3.0.5
string_decoder: 1.3.0
transitivePeerDependencies:
@ -5938,6 +5941,14 @@ packages:
- encoding
dev: true
/cross-fetch@3.1.6:
resolution: {integrity: sha512-riRvo06crlE8HiqOwIpQhxwdOk4fOeR7FVM/wXoxchFEqMNUjvbs3bfo4OTgMEMHzppd4DxFBDbyySj8Cv781g==}
dependencies:
node-fetch: 2.6.11
transitivePeerDependencies:
- encoding
dev: false
/cross-spawn@5.1.0:
resolution: {integrity: sha512-pTgQJ5KC0d2hcY8eyL1IzlBPYjTkyH72XRZPnLyKus2mBfNjQs3klqbJU2VILqZryAZUt9JOb3h/mWMy23/f5A==}
dependencies:
@ -10386,6 +10397,14 @@ packages:
engines: {node: '>= 0.6'}
dev: false
/meilisearch@0.32.4:
resolution: {integrity: sha512-QvPtQ6F2TaqAT9fw072/MDjSCMpQifdtUBFeIk3M5jSnFpeSiv1iwfJWNfP6ByaCgR/s++K1Cqtf9vjcZe7prg==}
dependencies:
cross-fetch: 3.1.6
transitivePeerDependencies:
- encoding
dev: false
/meow@9.0.0:
resolution: {integrity: sha512-+obSblOQmRhcyBt62furQqRAQpNyWXo8BuQ5bN7dG8wmwQ+vwHKp/rCFD4CrTP8CsDQD1sjoZ94K417XEUk8IQ==}
engines: {node: '>=10'}
@ -10854,6 +10873,18 @@ packages:
resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==}
engines: {node: '>=10.5.0'}
/node-fetch@2.6.11:
resolution: {integrity: sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==}
engines: {node: 4.x || >=6.0.0}
peerDependencies:
encoding: ^0.1.0
peerDependenciesMeta:
encoding:
optional: true
dependencies:
whatwg-url: 5.0.0
dev: false
/node-fetch@2.6.7:
resolution: {integrity: sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==}
engines: {node: 4.x || >=6.0.0}