Support meilisearch

This commit is contained in:
journey-ad 2024-07-29 20:50:36 +08:00
parent 2e634df92d
commit ac038e9a11
7 changed files with 454 additions and 92 deletions

View File

@ -18,7 +18,7 @@ A more modern magnet search website program, developed using [Next.js 14](https:
The most convenient way to deploy is using Docker Compose. Refer to the [docker-compose.yml](./docker-compose.yml)
### Running with docker run
#### Run Docker Container Manually
If not using Docker Compose, you can run each container separately using the following commands:
@ -74,6 +74,79 @@ CREATE INDEX idx_torrents_name_1 ON torrents USING gin (name gin_trgm_ops);
CREATE INDEX idx_torrent_files_path_1 ON torrent_files USING gin (path gin_trgm_ops);
```
### (Optional) Enhanced Search with Meilisearch
After running bitmagnet for several months, the database size may reach tens of millions of records, making standard gin indexing less effective. To improve query performance, consider using [Meilisearch](https://github.com/meilisearch/meilisearch) as a full-text search engine. Properly configured, it can respond to queries across tens of millions of records within a few hundred milliseconds.
Refer to the Meilisearch [installation guide](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation) for deployment. For data synchronization, see the official meilisync PostgreSQL [guide](https://www.meilisearch.com/docs/guides/database/meilisync_postgresql).
> [!NOTE]
> meilisync requires the `wal2json` plugin for PostgreSQL and `wal_level=logical` logging. See the [Dockerfile](https://gist.github.com/journey-ad/77096356f2d65ecd6259b8546f39a1d6) for reference.
>
> If bitmagnet has been running for a while, it is recommended to pause crawler tasks and perform a full data sync, which may take some time. Without pausing, transactions during the full sync will be recorded in the wal logs, possibly consuming significant disk space.
To enable search filtering and sorting, set `filterableAttributes` for:
- `created_at`
- `size`
And `sortableAttributes` for:
- `created_at`
- `files_count`
- `size`
Finally, configure the following environment variables in Bitmagnet-Next-Web to enable Meilisearch enhanced search:
- `MEILISEARCH_API_URL`: Meilisearch instance URL
- `MEILISEARCH_API_KEY`: Meilisearch instance API Key
#### Meilisearch Configuration Reference
```json
{
...
"filterableAttributes": [
"created_at",
"size"
],
"sortableAttributes": [
"created_at",
"files_count",
"size"
],
...
}
```
#### meilisync Configuration Reference
```yaml
debug: false
meilisearch:
api_url: http://meilisearch:7700/ # Meilisearch instance URL
api_key: 'master_key' # Meilisearch instance master_key
insert_size: 1000
insert_interval: 10
progress:
type: file
path: './progress.json' # Save sync progress, create an empty JSON file in the specified directory beforehand, or meilisync will error
source:
type: postgres # Specify database type
host: postgres # Database host
port: 5432 # Database port
database: bitmagnet # Database name
user: postgres # Connection username
password: postgres # Connection password
sync:
- table: torrents # Sync torrents table to Meilisearch
pk: info_hash # Set primary key to info_hash
full: true # Enable full sync
fields: # Fields to sync
info_hash:
name:
size:
files_count:
extension:
created_at:
updated_at:
```
## Development Guide
Before starting development, create a `.env.local` file in the project root directory and fill in the environment variables:

View File

@ -18,7 +18,7 @@
最方便的部署方式是用 Docker Compose参考 [docker-compose.yml](./docker-compose.yml) 配置
#### 使用 docker run 运行
#### 手动运行 docker 容器
如果不使用 Docker Compose可以使用以下命令分别运行各个容器
@ -74,6 +74,79 @@ CREATE INDEX idx_torrents_name_1 ON torrents USING gin (name gin_trgm_ops);
CREATE INDEX idx_torrent_files_path_1 ON torrent_files USING gin (path gin_trgm_ops);
```
### (可选)使用 Meilisearch 增强搜索性能
在 bitmagnet 运行数月后,数据库规模可能达到千万级别,普通的 gin 索引可能会力不从心,为了进一步提升查询性能,可以考虑使用 [Meilisearch](https://github.com/meilisearch/meilisearch) 作为全文搜索引擎,配置好后可实现千万数据任意搜索词数百毫秒内响应
参考[Meilisearch 安装指引](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation)进行部署,同步数据可参考官网的[meilisync同步PostgreSQL教程](https://www.meilisearch.com/docs/guides/database/meilisync_postgresql)
> [!NOTE]
> meilisync 需要 PostgreSQL 安装 `wal2json` 插件,并启用 `wal_level=logical` 日志级别后才可以使用同步功能,参考 [Dockerfile](https://gist.github.com/journey-ad/77096356f2d65ecd6259b8546f39a1d6)
>
> 如果 bitmagnet 已经运行了一段时间,建议先暂停爬虫任务进行一次全量同步,全量同步耗时比较久需要耐心等待。若未暂停爬虫任务,全量同步期间的事务将被记录在 wal 日志中,可能会产生大量磁盘空间占用
为实现搜索筛选和排序功能,需要启用以下字段的 `filterableAttributes` 属性:
- `created_at`
- `size`
和以下字段的 `sortableAttributes` 属性:
- `created_at`
- `files_count`
- `size`
最后在 Bitmagnet-Next-Web 部署时配置以下环境变量即可启用 Meilisearch 增强搜索:
- `MEILISEARCH_API_URL`Meilisearch 实例地址
- `MEILISEARCH_API_KEY`Meilisearch 实例的 API Key
#### Meilisearch 配置参考
```json
{
...
"filterableAttributes": [
"created_at",
"size"
],
"sortableAttributes": [
"created_at",
"files_count",
"size"
],
...
}
```
#### meilisync 配置参考
```yaml
debug: false
meilisearch:
api_url: http://meilisearch:7700/ # Meilisearch 实例地址
api_key: 'master_key' # Meilisearch 实例的 master_key
insert_size: 1000
insert_interval: 10
progress:
type: file
path: './progress.json' # 保存同步进度需要提前在对应目录创建一个空JSON文件否则 meilisync 会报错
source:
type: postgres # 指定数据库类型
host: postgres # 数据库host
port: 5432 # 数据库端口
database: bitmagnet # 数据库名
user: postgres # 连接用户名
password: postgres # 连接密码
sync:
- table: torrents # 同步 torrents 表到 Meilisearch
pk: info_hash # 设置主键为 info_hash
full: true # 启用全量同步
fields: # 需要同步的字段
info_hash:
name:
size:
files_count:
extension:
created_at:
updated_at:
```
## 开发指引
开发之前,需要先在项目根目录创建一个 `.env.local` 文件,并填写环境变量:

View File

@ -1,6 +1,8 @@
import { query } from "@/lib/pgdb";
import { jiebaCut } from "@/lib/jieba";
import meiliClient from "@/lib/meilisearch";
import { SEARCH_KEYWORD_SPLIT_REGEX } from "@/config/constant";
import { getTimestamp } from "@/utils/index";
type Torrent = {
info_hash: Buffer; // The hash info of the torrent
@ -73,7 +75,7 @@ export function formatTorrent(row: Torrent) {
}
// Utility functions for query building
const buildOrderBy = (sortType: keyof typeof orderByMap) => {
const buildOrderBy = (sortType: "size" | "count" | "date") => {
const orderByMap = {
size: "torrents.size DESC",
count: "COALESCE(torrents.files_count, 0) DESC",
@ -83,7 +85,9 @@ const buildOrderBy = (sortType: keyof typeof orderByMap) => {
return orderByMap[sortType] || "torrents.created_at DESC";
};
const buildTimeFilter = (filterTime: keyof typeof timeFilterMap) => {
const buildTimeFilter = (
filterTime: "gt-1day" | "gt-7day" | "gt-31day" | "gt-365day",
) => {
const timeFilterMap = {
"gt-1day": "AND torrents.created_at > now() - interval '1 day'",
"gt-7day": "AND torrents.created_at > now() - interval '1 week'",
@ -94,7 +98,14 @@ const buildTimeFilter = (filterTime: keyof typeof timeFilterMap) => {
return timeFilterMap[filterTime] || "";
};
const buildSizeFilter = (filterSize: keyof typeof sizeFilterMap) => {
const buildSizeFilter = (
filterSize:
| "lt100mb"
| "gt100mb-lt500mb"
| "gt500mb-lt1gb"
| "gt1gb-lt5gb"
| "gt5gb",
) => {
const sizeFilterMap = {
lt100mb: "AND torrents.size < 100 * 1024 * 1024::bigint",
"gt100mb-lt500mb":
@ -109,6 +120,74 @@ const buildSizeFilter = (filterSize: keyof typeof sizeFilterMap) => {
return sizeFilterMap[filterSize] || "";
};
// Build Meili Sort
const buildMeiliSort = (sortType: "size" | "count" | "date") => {
const sortMap = {
size: "size:desc",
count: "files_count:desc",
date: "created_at:asc",
};
const sort = [sortMap[sortType] || "created_at:desc"];
console.log(sort);
return sort;
};
// Build Meili Filter
const buildMeiliFilter = (queryInput: any) => {
const { sortType, filterTime, filterSize } = queryInput;
let filterList = [];
switch (filterTime) {
case "gt-1day":
filterList.push(`created_at > ${getTimestamp(-1, "day")}`);
break;
case "gt-7day":
filterList.push(`created_at > ${getTimestamp(-7, "day")}`);
break;
case "gt-31day":
filterList.push(`created_at > ${getTimestamp(-1, "month")}`);
break;
case "gt-365day":
filterList.push(`created_at > ${getTimestamp(-1, "year")}`);
break;
}
switch (filterSize) {
case "lt100mb":
filterList.push(`size < ${100 * 1024 * 1024}`);
break;
case "gt100mb-lt500mb":
filterList.push(
`size >= ${100 * 1024 * 1024} AND size < ${500 * 1024 * 1024}`,
);
break;
case "gt500mb-lt1gb":
filterList.push(
`size >= ${500 * 1024 * 1024} AND size < ${1 * 1024 * 1024 * 1024}`,
);
break;
case "gt1gb-lt5gb":
filterList.push(
`size >= ${1 * 1024 * 1024 * 1024} AND size < ${5 * 1024 * 1024 * 1024}`,
);
break;
case "gt5gb":
filterList.push(`size >= ${5 * 1024 * 1024 * 1024}`);
break;
}
if (!filterList.length) return [];
const filter = [filterList.join(" AND ")];
console.log(filter);
return filter;
};
const QUOTED_KEYWORD_REGEX = /"([^"]+)"/g;
const extractKeywords = (
keyword: string,
@ -159,78 +238,42 @@ const extractKeywords = (
return keywords;
};
export async function search(_: any, { queryInput }: any) {
try {
console.info("-".repeat(50));
console.info("search params", queryInput);
const dbsearch = async ({ queryInput }: any) => {
// Build SQL conditions and parameters
const orderBy = buildOrderBy(queryInput.sortType);
const timeFilter = buildTimeFilter(queryInput.filterTime);
const sizeFilter = buildSizeFilter(queryInput.filterSize);
// trim keyword
queryInput.keyword = queryInput.keyword.trim();
const keywords = extractKeywords(queryInput.keyword);
const no_result = {
keywords: [queryInput.keyword],
torrents: [],
total_count: 0,
has_more: false,
};
// Construct the keyword filter condition
const requiredKeywords: string[] = [];
const optionalKeywords: string[] = [];
// Return an empty result if no keywords are provided
if (queryInput.keyword.length < 2) {
return no_result;
keywords.forEach(({ required }, i) => {
const condition = `torrents.name ILIKE $${i + 1}`;
if (required) {
requiredKeywords.push(condition);
} else {
optionalKeywords.push(condition);
}
});
const REGEX_HASH = /^[a-f0-9]{40}$/;
const fullConditions = [...requiredKeywords];
if (REGEX_HASH.test(queryInput.keyword)) {
const torrent = await torrentByHash(_, { hash: queryInput.keyword });
if (optionalKeywords.length > 0) {
optionalKeywords.push("TRUE");
fullConditions.push(`(${optionalKeywords.join(" OR ")})`);
}
if (torrent) {
return {
keywords: [queryInput.keyword],
torrents: [torrent],
total_count: 1,
has_more: false,
};
}
const keywordFilter = fullConditions.join(" AND ");
return no_result;
}
const keywordsParams = keywords.map(({ keyword }) => `%${keyword}%`);
const keywordsPlain = keywords.map(({ keyword }) => keyword);
// Build SQL conditions and parameters
const orderBy = buildOrderBy(queryInput.sortType);
const timeFilter = buildTimeFilter(queryInput.filterTime);
const sizeFilter = buildSizeFilter(queryInput.filterSize);
const keywords = extractKeywords(queryInput.keyword);
// Construct the keyword filter condition
const requiredKeywords: string[] = [];
const optionalKeywords: string[] = [];
keywords.forEach(({ required }, i) => {
const condition = `torrents.name ILIKE $${i + 1}`;
if (required) {
requiredKeywords.push(condition);
} else {
optionalKeywords.push(condition);
}
});
const fullConditions = [...requiredKeywords];
if (optionalKeywords.length > 0) {
optionalKeywords.push("TRUE");
fullConditions.push(`(${optionalKeywords.join(" OR ")})`);
}
const keywordFilter = fullConditions.join(" AND ");
const keywordsParams = keywords.map(({ keyword }) => `%${keyword}%`);
const keywordsPlain = keywords.map(({ keyword }) => keyword);
// SQL query to fetch filtered torrent data and files information
const sql = `
// SQL query to fetch filtered torrent data and files information
const sql = `
--
WITH filtered AS (
SELECT
@ -277,19 +320,19 @@ FROM
filtered; --
`;
const params = [...keywordsParams, queryInput.limit, queryInput.offset];
const params = [...keywordsParams, queryInput.limit, queryInput.offset];
console.debug("SQL:", sql, params);
console.debug(
"keywords:",
keywords.map((item, i) => ({ _: `$${i + 1}`, ...item })),
);
console.debug("SQL:", sql, params);
console.debug(
"keywords:",
keywords.map((item, i) => ({ _: `$${i + 1}`, ...item })),
);
const queryArr = [query(sql, params)];
const queryArr = [query(sql, params)];
// SQL query to get the total count if requested
if (queryInput.withTotalCount) {
const countSql = `
// SQL query to get the total count if requested
if (queryInput.withTotalCount) {
const countSql = `
SELECT COUNT(*) AS total
FROM (
SELECT 1
@ -300,25 +343,125 @@ FROM (
${sizeFilter}
) AS limited_total;
`;
const countParams = [...keywordsParams];
const countParams = [...keywordsParams];
queryArr.push(query(countSql, countParams));
} else {
queryArr.push(Promise.resolve({ rows: [{ total: 0 }] }) as any);
queryArr.push(query(countSql, countParams));
} else {
queryArr.push(Promise.resolve({ rows: [{ total: 0 }] }) as any);
}
// Execute queries and process results
const [{ rows: torrentsResp }, { rows: countResp }] =
await Promise.all(queryArr);
const torrents = torrentsResp.map(formatTorrent);
const total_count = countResp[0].total;
const has_more =
queryInput.withTotalCount &&
queryInput.offset + queryInput.limit < total_count;
return { keywords: keywordsPlain, torrents, total_count, has_more };
};
// invisible characters for highlighting
const _MARK_TAG = ["\u200b\u200c", "\u200b\u200d"];
// regex to extract keywords from name
const _MARK_TAG_RE = new RegExp(`${_MARK_TAG[0]}(.*?)${_MARK_TAG[1]}`, "g");
const meilisearch = async ({ queryInput }: any) => {
const { keyword, limit, offset, sortType } = queryInput;
const search = await meiliClient.torrents.search(keyword, {
offset,
limit,
sort: buildMeiliSort(sortType),
filter: buildMeiliFilter(queryInput),
attributesToSearchOn: ["name"],
attributesToRetrieve: ["info_hash", "name"],
attributesToHighlight: ["name"],
highlightPreTag: _MARK_TAG[0],
highlightPostTag: _MARK_TAG[1],
});
const { hits, estimatedTotalHits: total_count } = search;
const hashes = hits.map((item: any) => item.info_hash as string);
const torrents = await torrentByHashBatch(null, { hashes });
const has_more =
queryInput.withTotalCount &&
queryInput.offset + queryInput.limit < total_count;
const keywordsSet = hits.reduce(
(acc: Set<string>, item) => {
const { name } = item._formatted || {};
// extract keywords from name
if (name) {
[...name.matchAll(_MARK_TAG_RE)].forEach((match) => {
const [_, keyword] = match;
if (keyword.length > 1 || !/[a-zA-Z0-9]/.test(keyword)) {
acc.add(keyword);
}
});
}
return acc;
},
new Set<string>([keyword]),
);
return { keywords: [...keywordsSet], torrents, total_count, has_more };
};
const searchResolver = async ({ queryInput }: any) => {
// meilisearch resolver
if (meiliClient.enabled) {
return meilisearch({ queryInput });
} else {
return dbsearch({ queryInput });
}
};
export async function search(_: any, { queryInput }: any) {
try {
console.info("-".repeat(50));
console.info("search params", queryInput);
// trim keyword
queryInput.keyword = queryInput.keyword.trim();
const no_result = {
keywords: [queryInput.keyword],
torrents: [],
total_count: 0,
has_more: false,
};
// Return an empty result if no keywords are provided
if (queryInput.keyword.length < 2) {
return no_result;
}
// Execute queries and process results
const [{ rows: torrentsResp }, { rows: countResp }] =
await Promise.all(queryArr);
const REGEX_HASH = /^[a-f0-9]{40}$/;
const torrents = torrentsResp.map(formatTorrent);
const total_count = countResp[0].total;
if (REGEX_HASH.test(queryInput.keyword)) {
const torrent = await torrentByHash(_, { hash: queryInput.keyword });
const has_more =
queryInput.withTotalCount &&
queryInput.offset + queryInput.limit < total_count;
if (torrent) {
return {
keywords: [queryInput.keyword],
torrents: [torrent],
total_count: 1,
has_more: false,
};
}
return { keywords: keywordsPlain, torrents, total_count, has_more };
return no_result;
}
return searchResolver({ queryInput });
} catch (error) {
console.error("Error in search resolver:", error);
throw new Error("Failed to execute search query");
@ -364,6 +507,51 @@ GROUP BY t.info_hash, t.name, t.size, t.created_at, t.updated_at, t.files_count;
}
}
export async function torrentByHashBatch(
_: any,
{ hashes }: { hashes: string[] },
) {
try {
const byteaHashes = hashes.map((hash) => Buffer.from(hash, "hex"));
// SQL query to fetch torrent data and files information by hash
const sql = `
SELECT
t.info_hash,
t.name,
t.size,
t.created_at,
t.updated_at,
t.files_count,
(
SELECT json_agg(json_build_object(
'index', f.index,
'path', f.path,
'size', f.size,
'extension', f.extension
))
FROM torrent_files f
WHERE f.info_hash = t.info_hash
) AS files
FROM torrents t
WHERE t.info_hash = ANY($1)
GROUP BY t.info_hash;
`;
const params = [byteaHashes];
const { rows } = await query(sql, params);
const torrents = rows.map(formatTorrent).sort((a, b) => {
return hashes.indexOf(a.hash) > hashes.indexOf(b.hash) ? 1 : -1;
});
return torrents;
} catch (error) {
console.error("Error in torrentByHashBatch resolver:", error);
throw new Error("Failed to fetch torrent by hash");
}
}
export async function statsInfo() {
try {
const sql = `

21
lib/meilisearch.ts Normal file
View File

@ -0,0 +1,21 @@
import { Index, MeiliSearch } from "meilisearch";
const MEILISEARCH_API_URL = process.env.MEILISEARCH_API_URL,
MEILISEARCH_API_KEY = process.env.MEILISEARCH_API_KEY;
const meiliClient = {
enabled: MEILISEARCH_API_URL !== undefined,
client: null as unknown as MeiliSearch,
torrents: null as unknown as Index,
};
if (meiliClient.enabled) {
meiliClient.client = new MeiliSearch({
host: MEILISEARCH_API_URL as string,
apiKey: MEILISEARCH_API_KEY,
});
meiliClient.torrents = meiliClient.client.index("torrents");
}
export default meiliClient;

View File

@ -29,6 +29,7 @@
"graphql-tag": "^2.12.6",
"intl-messageformat": "^10.5.0",
"js-cookie": "^3.0.5",
"meilisearch": "^0.41.0",
"next": "14.2.3",
"next-intl": "^3.14.1",
"next-themes": "^0.2.1",

View File

@ -1,6 +1,6 @@
{
"compilerOptions": {
"target": "es5",
"target": "es2015",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,

View File

@ -1,3 +1,5 @@
import type { ManipulateType } from "dayjs";
import dayjs from "dayjs";
import Cookie from "js-cookie";
@ -47,6 +49,10 @@ export function formatDate(
return dateStr;
}
export function getTimestamp(diff = 0, unit = "second" as ManipulateType) {
return dayjs().add(diff, unit).unix();
}
export function getSizeColor(size: number | string) {
size = Number(size);