diff --git a/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml b/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml new file mode 100644 index 000000000..5a43fe30e --- /dev/null +++ b/.changes/unreleased/Fixed and Improvements-20240811-124728.yaml @@ -0,0 +1,3 @@ +kind: Fixed and Improvements +body: Skip web documents if body is empty +time: 2024-08-11T12:47:28.678694-07:00 diff --git a/crates/tabby-index/src/doc/public.rs b/crates/tabby-index/src/doc/public.rs index a5b6af378..f871bbe4f 100644 --- a/crates/tabby-index/src/doc/public.rs +++ b/crates/tabby-index/src/doc/public.rs @@ -47,15 +47,18 @@ impl DocIndexer { }; stream! { + let is_document_empty = document.body.trim().is_empty(); let (id, s) = self.builder.build(document).await; self.indexer.delete(&id); - for await doc in s.buffer_unordered(std::cmp::max(std::thread::available_parallelism().unwrap().get() * 2, 32)) { - if let Ok(Some(doc)) = doc { - self.indexer.add(doc).await; + + if !is_document_empty { + for await doc in s.buffer_unordered(std::cmp::max(std::thread::available_parallelism().unwrap().get() * 2, 32)) { + if let Ok(Some(doc)) = doc { + self.indexer.add(doc).await; + } } } }.count().await; - true }