From 6c60ca4a17bd2282622b2cd39d4f0b155cd44dd1 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sun, 11 Aug 2024 12:58:24 -0700 Subject: [PATCH] chore(index): skip web documents if body is empty (#2831) * chore(index): skip web documents if body is empty * Update .changes/unreleased/Fixed and Improvements-20240811-124728.yaml --- .../Fixed and Improvements-20240811-124728.yaml | 3 +++ crates/tabby-index/src/doc/public.rs | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Fixed and Improvements-20240811-124728.yaml diff --git a/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml b/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml new file mode 100644 index 000000000..5a43fe30e --- /dev/null +++ b/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml @@ -0,0 +1,3 @@ +kind: Fixed and Improvements +body: Skip web documents if body is empty +time: 2024-08-11T12:47:28.678694-07:00 diff --git a/crates/tabby-index/src/doc/public.rs b/crates/tabby-index/src/doc/public.rs index a5b6af378..f871bbe4f 100644 --- a/crates/tabby-index/src/doc/public.rs +++ b/crates/tabby-index/src/doc/public.rs @@ -47,15 +47,18 @@ impl DocIndexer { }; stream! { + let is_document_empty = document.body.trim().is_empty(); let (id, s) = self.builder.build(document).await; self.indexer.delete(&id); - for await doc in s.buffer_unordered(std::cmp::max(std::thread::available_parallelism().unwrap().get() * 2, 32)) { - if let Ok(Some(doc)) = doc { - self.indexer.add(doc).await; + + if !is_document_empty { + for await doc in s.buffer_unordered(std::cmp::max(std::thread::available_parallelism().unwrap().get() * 2, 32)) { + if let Ok(Some(doc)) = doc { + self.indexer.add(doc).await; + } } } }.count().await; - true }