fix: when connecting to localhost endpoint, do not use proxy settings (#2736)

* fix: when connecting to localhost endpoint, do not use proxy settings * update * update
2024-11-21 16:03:07 +00:00 · 2024-07-27 09:37:01 -07:00 · 2024-07-27 09:37:01 -07:00 · 21b4ded487
commit 21b4ded487
parent 53c028ec9b
6 changed files with 30 additions and 4 deletions
--- a/Improvements-20240726-202912.yaml
+++ b/Improvements-20240726-202912.yaml
@ -0,0 +1,3 @@
+kind: Fixed and Improvements
+body: When connecting to localhost model servers, skip the proxy settings
+time: 2024-07-26T20:29:12.300644-07:00
--- a/crates/http-api-bindings/src/chat/mod.rs
+++ b/crates/http-api-bindings/src/chat/mod.rs
@ -4,6 +4,8 @@ use async_openai::config::OpenAIConfig;
 use tabby_common::config::HttpModelConfig;
 use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig};

+use crate::create_reqwest_client;
+
 pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
    let config = OpenAIConfig::default()
        .with_api_base(model.api_endpoint.clone())
@ -24,5 +26,8 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {

    let config = builder.build().expect("Failed to build config");

-    Arc::new(async_openai::Client::with_config(config))
+    Arc::new(
+        async_openai::Client::with_config(config)
+            .with_http_client(create_reqwest_client(&model.api_endpoint)),
+    )
 }
--- a/crates/http-api-bindings/src/completion/llama.rs
+++ b/crates/http-api-bindings/src/completion/llama.rs
@ -5,6 +5,8 @@ use reqwest_eventsource::{Event, EventSource};
 use serde::{Deserialize, Serialize};
 use tabby_inference::{CompletionOptions, CompletionStream};

+use crate::create_reqwest_client;
+
 pub struct LlamaCppEngine {
    client: reqwest::Client,
    api_endpoint: String,
@ -13,7 +15,7 @@ pub struct LlamaCppEngine {

 impl LlamaCppEngine {
    pub fn create(api_endpoint: &str, api_key: Option<String>) -> Self {
-        let client = reqwest::Client::new();
+        let client = create_reqwest_client(api_endpoint);

        Self {
            client,
--- a/crates/http-api-bindings/src/embedding/llama.rs
+++ b/crates/http-api-bindings/src/embedding/llama.rs
@ -2,6 +2,8 @@ use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
 use tabby_inference::Embedding;

+use crate::create_reqwest_client;
+
 pub struct LlamaCppEngine {
    client: reqwest::Client,
    api_endpoint: String,
@ -10,7 +12,7 @@ pub struct LlamaCppEngine {

 impl LlamaCppEngine {
    pub fn create(api_endpoint: &str, api_key: Option<String>) -> Self {
-        let client = reqwest::Client::new();
+        let client = create_reqwest_client(api_endpoint);

        Self {
            client,
--- a/crates/http-api-bindings/src/lib.rs
+++ b/crates/http-api-bindings/src/lib.rs
@ -5,3 +5,17 @@ mod embedding;
 pub use chat::create as create_chat;
 pub use completion::{build_completion_prompt, create};
 pub use embedding::create as create_embedding;
+
+fn create_reqwest_client(api_endpoint: &str) -> reqwest::Client {
+    let builder = reqwest::Client::builder();
+
+    let is_localhost = api_endpoint.starts_with("http://localhost")
+        || api_endpoint.starts_with("http://127.0.0.1");
+    let builder = if is_localhost {
+        builder.no_proxy()
+    } else {
+        builder
+    };
+
+    builder.build().unwrap()
+}
--- a/crates/llama-cpp-server/src/supervisor.rs
+++ b/crates/llama-cpp-server/src/supervisor.rs
@ -122,7 +122,7 @@ impl LlamaCppSupervisor {

    pub async fn start(&self) {
        debug!("Waiting for llama-server <{}> to start...", self.name);
-        let client = reqwest::Client::new();
+        let client = reqwest::Client::builder().no_proxy().build().unwrap();
        loop {
            let Ok(resp) = client.get(api_endpoint(self.port) + "/health").send().await else {
                continue;