mirror of
https://github.com/TabbyML/tabby
synced 2024-11-22 08:21:59 +00:00
chore(scheduler): ignore katana request without status code (#2481)
This commit is contained in:
parent
cb9091289f
commit
141c20966a
@ -46,7 +46,7 @@ async fn crawl_url(start_url: &str) -> anyhow::Result<impl Stream<Item = KatanaR
|
||||
};
|
||||
|
||||
// Skip if the status code is not 200
|
||||
if data.response.status_code != 200 {
|
||||
if data.response.status_code != Some(200) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ fn to_document(data: KatanaRequestResponse) -> Option<CrawledDocument> {
|
||||
let (html, metadata) = {
|
||||
let (node, metadata) = Readability::new()
|
||||
.base_url(Url::parse(&data.request.endpoint).ok()?)
|
||||
.parse(&data.response.body);
|
||||
.parse(&data.response.body?);
|
||||
|
||||
let mut html_bytes = vec![];
|
||||
node.serialize(&mut html_bytes).ok()?;
|
||||
@ -134,9 +134,9 @@ mod tests {
|
||||
raw: "GET / HTTP/1.1\nHost: example.com\n".to_owned(),
|
||||
},
|
||||
response: types::KatanaResponse {
|
||||
status_code: 200,
|
||||
status_code: Some(200),
|
||||
headers,
|
||||
body: "<p>Hello, World!</p>".to_owned(),
|
||||
body: Some("<p>Hello, World!</p>".to_owned()),
|
||||
technologies: Default::default(),
|
||||
raw: "HTTP/1.1 200 OK\nContent-Type: text/html\n".to_owned(),
|
||||
},
|
||||
|
@ -18,9 +18,9 @@ pub struct KatanaRequest {
|
||||
|
||||
#[derive(Deserialize, Debug)]
|
||||
pub struct KatanaResponse {
|
||||
pub status_code: u16,
|
||||
pub status_code: Option<u16>,
|
||||
pub headers: HashMap<String, String>,
|
||||
pub body: String,
|
||||
pub body: Option<String>,
|
||||
pub technologies: Vec<String>,
|
||||
pub raw: String,
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user