From 141c20966af95a49cf95e6cb815c4e7a7168a66e Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sun, 23 Jun 2024 13:57:30 +0800 Subject: [PATCH] chore(scheduler): ignore katana request without status code (#2481) --- crates/tabby-scheduler/src/crawl/mod.rs | 8 ++++---- crates/tabby-scheduler/src/crawl/types.rs | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/tabby-scheduler/src/crawl/mod.rs b/crates/tabby-scheduler/src/crawl/mod.rs index 0e7b0670f..9a887ba64 100644 --- a/crates/tabby-scheduler/src/crawl/mod.rs +++ b/crates/tabby-scheduler/src/crawl/mod.rs @@ -46,7 +46,7 @@ async fn crawl_url(start_url: &str) -> anyhow::Result Option { let (html, metadata) = { let (node, metadata) = Readability::new() .base_url(Url::parse(&data.request.endpoint).ok()?) - .parse(&data.response.body); + .parse(&data.response.body?); let mut html_bytes = vec![]; node.serialize(&mut html_bytes).ok()?; @@ -134,9 +134,9 @@ mod tests { raw: "GET / HTTP/1.1\nHost: example.com\n".to_owned(), }, response: types::KatanaResponse { - status_code: 200, + status_code: Some(200), headers, - body: "

Hello, World!

".to_owned(), + body: Some("

Hello, World!

".to_owned()), technologies: Default::default(), raw: "HTTP/1.1 200 OK\nContent-Type: text/html\n".to_owned(), }, diff --git a/crates/tabby-scheduler/src/crawl/types.rs b/crates/tabby-scheduler/src/crawl/types.rs index f52586a0c..78eeeb4eb 100644 --- a/crates/tabby-scheduler/src/crawl/types.rs +++ b/crates/tabby-scheduler/src/crawl/types.rs @@ -18,9 +18,9 @@ pub struct KatanaRequest { #[derive(Deserialize, Debug)] pub struct KatanaResponse { - pub status_code: u16, + pub status_code: Option, pub headers: HashMap, - pub body: String, + pub body: Option, pub technologies: Vec, pub raw: String, }