From 21b4ded487b553feed7243add744a9b04c3a2d1a Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Sat, 27 Jul 2024 09:37:01 -0700 Subject: [PATCH] fix: when connecting to localhost endpoint, do not use proxy settings (#2736) * fix: when connecting to localhost endpoint, do not use proxy settings * update * update --- .../Fixed and Improvements-20240726-202912.yaml | 3 +++ crates/http-api-bindings/src/chat/mod.rs | 7 ++++++- crates/http-api-bindings/src/completion/llama.rs | 4 +++- crates/http-api-bindings/src/embedding/llama.rs | 4 +++- crates/http-api-bindings/src/lib.rs | 14 ++++++++++++++ crates/llama-cpp-server/src/supervisor.rs | 2 +- 6 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 .changes/unreleased/Fixed and Improvements-20240726-202912.yaml diff --git a/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml b/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml new file mode 100644 index 000000000..c597bb7b2 --- /dev/null +++ b/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml @@ -0,0 +1,3 @@ +kind: Fixed and Improvements +body: When connecting to localhost model servers, skip the proxy settings +time: 2024-07-26T20:29:12.300644-07:00 diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs index 8cc02ac65..ed55a8d39 100644 --- a/crates/http-api-bindings/src/chat/mod.rs +++ b/crates/http-api-bindings/src/chat/mod.rs @@ -4,6 +4,8 @@ use async_openai::config::OpenAIConfig; use tabby_common::config::HttpModelConfig; use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig}; +use crate::create_reqwest_client; + pub async fn create(model: &HttpModelConfig) -> Arc { let config = OpenAIConfig::default() .with_api_base(model.api_endpoint.clone()) @@ -24,5 +26,8 @@ pub async fn create(model: &HttpModelConfig) -> Arc { let config = builder.build().expect("Failed to build config"); - Arc::new(async_openai::Client::with_config(config)) + Arc::new( + async_openai::Client::with_config(config) + .with_http_client(create_reqwest_client(&model.api_endpoint)), + ) } diff --git a/crates/http-api-bindings/src/completion/llama.rs b/crates/http-api-bindings/src/completion/llama.rs index 4eed2fc74..617e840a3 100644 --- a/crates/http-api-bindings/src/completion/llama.rs +++ b/crates/http-api-bindings/src/completion/llama.rs @@ -5,6 +5,8 @@ use reqwest_eventsource::{Event, EventSource}; use serde::{Deserialize, Serialize}; use tabby_inference::{CompletionOptions, CompletionStream}; +use crate::create_reqwest_client; + pub struct LlamaCppEngine { client: reqwest::Client, api_endpoint: String, @@ -13,7 +15,7 @@ pub struct LlamaCppEngine { impl LlamaCppEngine { pub fn create(api_endpoint: &str, api_key: Option) -> Self { - let client = reqwest::Client::new(); + let client = create_reqwest_client(api_endpoint); Self { client, diff --git a/crates/http-api-bindings/src/embedding/llama.rs b/crates/http-api-bindings/src/embedding/llama.rs index 638142b48..2925517ca 100644 --- a/crates/http-api-bindings/src/embedding/llama.rs +++ b/crates/http-api-bindings/src/embedding/llama.rs @@ -2,6 +2,8 @@ use async_trait::async_trait; use serde::{Deserialize, Serialize}; use tabby_inference::Embedding; +use crate::create_reqwest_client; + pub struct LlamaCppEngine { client: reqwest::Client, api_endpoint: String, @@ -10,7 +12,7 @@ pub struct LlamaCppEngine { impl LlamaCppEngine { pub fn create(api_endpoint: &str, api_key: Option) -> Self { - let client = reqwest::Client::new(); + let client = create_reqwest_client(api_endpoint); Self { client, diff --git a/crates/http-api-bindings/src/lib.rs b/crates/http-api-bindings/src/lib.rs index 2e67569c8..41e781142 100644 --- a/crates/http-api-bindings/src/lib.rs +++ b/crates/http-api-bindings/src/lib.rs @@ -5,3 +5,17 @@ mod embedding; pub use chat::create as create_chat; pub use completion::{build_completion_prompt, create}; pub use embedding::create as create_embedding; + +fn create_reqwest_client(api_endpoint: &str) -> reqwest::Client { + let builder = reqwest::Client::builder(); + + let is_localhost = api_endpoint.starts_with("http://localhost") + || api_endpoint.starts_with("http://127.0.0.1"); + let builder = if is_localhost { + builder.no_proxy() + } else { + builder + }; + + builder.build().unwrap() +} diff --git a/crates/llama-cpp-server/src/supervisor.rs b/crates/llama-cpp-server/src/supervisor.rs index 05732ae76..fe13738e4 100644 --- a/crates/llama-cpp-server/src/supervisor.rs +++ b/crates/llama-cpp-server/src/supervisor.rs @@ -122,7 +122,7 @@ impl LlamaCppSupervisor { pub async fn start(&self) { debug!("Waiting for llama-server <{}> to start...", self.name); - let client = reqwest::Client::new(); + let client = reqwest::Client::builder().no_proxy().build().unwrap(); loop { let Ok(resp) = client.get(api_endpoint(self.port) + "/health").send().await else { continue;