From 768165776d566a773224888ba51231f8c2064a05 Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Sat, 18 Nov 2023 20:21:45 +0100 Subject: [PATCH] [backend] Allow case sensitive and word search for postgres FTS --- packages/backend/src/misc/sql-regex-escape.ts | 3 ++ .../server/api/common/generate-fts-query.ts | 34 ++++++++++++++----- 2 files changed, 29 insertions(+), 8 deletions(-) create mode 100644 packages/backend/src/misc/sql-regex-escape.ts diff --git a/packages/backend/src/misc/sql-regex-escape.ts b/packages/backend/src/misc/sql-regex-escape.ts new file mode 100644 index 000000000..abeec5ee4 --- /dev/null +++ b/packages/backend/src/misc/sql-regex-escape.ts @@ -0,0 +1,3 @@ +export function sqlRegexEscape(s: string) { + return s.replace(/([!$()*+.:<=>?[\\\]^{|}-])/g, "\\$1"); +} diff --git a/packages/backend/src/server/api/common/generate-fts-query.ts b/packages/backend/src/server/api/common/generate-fts-query.ts index 331e2c013..43644e202 100644 --- a/packages/backend/src/server/api/common/generate-fts-query.ts +++ b/packages/backend/src/server/api/common/generate-fts-query.ts @@ -1,7 +1,7 @@ -import { Brackets, IsNull, Not, SelectQueryBuilder } from "typeorm"; +import { Brackets, SelectQueryBuilder, WhereExpressionBuilder } from "typeorm"; import { sqlLikeEscape } from "@/misc/sql-like-escape.js"; +import { sqlRegexEscape } from "@/misc/sql-regex-escape.js"; import { Followings, Users } from "@/models/index.js"; -import { FILE_TYPE_BROWSERSAFE } from "@/const.js"; const filters = { "from": fromFilter, @@ -26,17 +26,24 @@ const filters = { } as Record, search: string, id: number) => any> //TODO: editing the query should be possible, clicking search again resets it (it should be a twitter-like top of the page kind of deal) +//TODO: UI and guide are missing for search|match: and case: filters export function generateFtsQuery(query: SelectQueryBuilder, q: string): void { const components = q.trim().split(" "); const terms: string[] = []; let finalTerms: string[] = []; let counter = 0; + let caseSensitive = false; + let matchWords = false; for (const component of components) { const split = component.split(":"); if (split.length > 1 && filters[split[0]] !== undefined) filters[split[0]](query, split.slice(1).join(":"), counter++); + else if(split.length > 1 && (split[0] === "search" || split[0] === "match")) + matchWords = split[1] === 'word' || split[1] === 'words'; + else if(split.length > 1 && split[0] === "case") + caseSensitive = split[1] === 'sensitive'; else terms.push(component); } @@ -62,8 +69,7 @@ export function generateFtsQuery(query: SelectQueryBuilder, q: string): voi query.andWhere(new Brackets(qb => { for (const term of extractToken(terms, idx, i).split(' OR ')) { const id = counter++; - qb.orWhere(`note.text ILIKE :q_${id}`); - query.setParameter(`q_${id}`, `%${sqlLikeEscape(term)}%`); + appendSearchQuery(term, 'or', query, qb, id, term.startsWith('-'), matchWords, caseSensitive); } })); state = 'idle'; @@ -76,10 +82,7 @@ export function generateFtsQuery(query: SelectQueryBuilder, q: string): voi for (const term of finalTerms) { const id = counter++; - if (term.startsWith('-')) query.andWhere(`note.text NOT ILIKE :q_${id}`); - else query.andWhere(`note.text ILIKE :q_${id}`); - - query.setParameter(`q_${id}`, `%${sqlLikeEscape(term.substring(term.startsWith('-') ? 1 : 0))}%`); + appendSearchQuery(term, 'and', query, query, id, term.startsWith('-'), matchWords, caseSensitive); } } @@ -218,3 +221,18 @@ function extractToken(array: string[], start: number, end: number, trim: boolean function trimStartAndEnd(str: string) { return str.substring(1, str.length - 1); } + +function appendSearchQuery(term: string, mode: 'and' | 'or', query: SelectQueryBuilder, qb: SelectQueryBuilder | WhereExpressionBuilder, id: number, negate: boolean, matchWords: boolean, caseSensitive: boolean) { + const sql = `note.text ${getSearchMatchOperator(negate, matchWords, caseSensitive)} :q_${id}`; + if (mode === 'and') qb.andWhere(sql); + else if (mode === 'or') qb.orWhere(sql); + query.setParameter(`q_${id}`, escapeSqlSearchParam(term.substring(negate ? 1 : 0), matchWords)); +} + +function getSearchMatchOperator(negate: boolean, matchWords: boolean, caseSensitive: boolean) { + return `${negate ? 'NOT ' : ''}${matchWords ? caseSensitive ? '~' : '~*' : caseSensitive ? 'LIKE' : 'ILIKE'}`; +} + +function escapeSqlSearchParam(param: string, matchWords: boolean) { + return matchWords ? `\\y${sqlRegexEscape(param)}\\y` : `%${sqlLikeEscape(param)}%`; +}