From 21b4ded487b553feed7243add744a9b04c3a2d1a Mon Sep 17 00:00:00 2001
From: Meng Zhang <meng@tabbyml.com>
Date: Sat, 27 Jul 2024 09:37:01 -0700
Subject: [PATCH] fix: when connecting to localhost endpoint, do not use proxy
 settings (#2736)

* fix: when connecting to localhost endpoint, do not use proxy settings

* update

* update
---
 .../Fixed and Improvements-20240726-202912.yaml    |  3 +++
 crates/http-api-bindings/src/chat/mod.rs           |  7 ++++++-
 crates/http-api-bindings/src/completion/llama.rs   |  4 +++-
 crates/http-api-bindings/src/embedding/llama.rs    |  4 +++-
 crates/http-api-bindings/src/lib.rs                | 14 ++++++++++++++
 crates/llama-cpp-server/src/supervisor.rs          |  2 +-
 6 files changed, 30 insertions(+), 4 deletions(-)
 create mode 100644 .changes/unreleased/Fixed and Improvements-20240726-202912.yaml
diff --git a/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml b/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml
new file mode 100644
index 000000000..c597bb7b2
--- /dev/null
+++ b/.changes/unreleased/Fixed and Improvements-20240726-202912.yaml	
@@ -0,0 +1,3 @@
+kind: Fixed and Improvements
+body: When connecting to localhost model servers, skip the proxy settings
+time: 2024-07-26T20:29:12.300644-07:00
diff --git a/crates/http-api-bindings/src/chat/mod.rs b/crates/http-api-bindings/src/chat/mod.rs
index 8cc02ac65..ed55a8d39 100644
--- a/crates/http-api-bindings/src/chat/mod.rs
+++ b/crates/http-api-bindings/src/chat/mod.rs
@@ -4,6 +4,8 @@ use async_openai::config::OpenAIConfig;
 use tabby_common::config::HttpModelConfig;
 use tabby_inference::{ChatCompletionStream, ExtendedOpenAIConfig};
 
+use crate::create_reqwest_client;
+
 pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
     let config = OpenAIConfig::default()
         .with_api_base(model.api_endpoint.clone())
@@ -24,5 +26,8 @@ pub async fn create(model: &HttpModelConfig) -> Arc<dyn ChatCompletionStream> {
 
     let config = builder.build().expect("Failed to build config");
 
-    Arc::new(async_openai::Client::with_config(config))
+    Arc::new(
+        async_openai::Client::with_config(config)
+            .with_http_client(create_reqwest_client(&model.api_endpoint)),
+    )
 }
diff --git a/crates/http-api-bindings/src/completion/llama.rs b/crates/http-api-bindings/src/completion/llama.rs
index 4eed2fc74..617e840a3 100644
--- a/crates/http-api-bindings/src/completion/llama.rs
+++ b/crates/http-api-bindings/src/completion/llama.rs
@@ -5,6 +5,8 @@ use reqwest_eventsource::{Event, EventSource};
 use serde::{Deserialize, Serialize};
 use tabby_inference::{CompletionOptions, CompletionStream};
 
+use crate::create_reqwest_client;
+
 pub struct LlamaCppEngine {
     client: reqwest::Client,
     api_endpoint: String,
@@ -13,7 +15,7 @@ pub struct LlamaCppEngine {
 
 impl LlamaCppEngine {
     pub fn create(api_endpoint: &str, api_key: Option<String>) -> Self {
-        let client = reqwest::Client::new();
+        let client = create_reqwest_client(api_endpoint);
 
         Self {
             client,
diff --git a/crates/http-api-bindings/src/embedding/llama.rs b/crates/http-api-bindings/src/embedding/llama.rs
index 638142b48..2925517ca 100644
--- a/crates/http-api-bindings/src/embedding/llama.rs
+++ b/crates/http-api-bindings/src/embedding/llama.rs
@@ -2,6 +2,8 @@ use async_trait::async_trait;
 use serde::{Deserialize, Serialize};
 use tabby_inference::Embedding;
 
+use crate::create_reqwest_client;
+
 pub struct LlamaCppEngine {
     client: reqwest::Client,
     api_endpoint: String,
@@ -10,7 +12,7 @@ pub struct LlamaCppEngine {
 
 impl LlamaCppEngine {
     pub fn create(api_endpoint: &str, api_key: Option<String>) -> Self {
-        let client = reqwest::Client::new();
+        let client = create_reqwest_client(api_endpoint);
 
         Self {
             client,
diff --git a/crates/http-api-bindings/src/lib.rs b/crates/http-api-bindings/src/lib.rs
index 2e67569c8..41e781142 100644
--- a/crates/http-api-bindings/src/lib.rs
+++ b/crates/http-api-bindings/src/lib.rs
@@ -5,3 +5,17 @@ mod embedding;
 pub use chat::create as create_chat;
 pub use completion::{build_completion_prompt, create};
 pub use embedding::create as create_embedding;
+
+fn create_reqwest_client(api_endpoint: &str) -> reqwest::Client {
+    let builder = reqwest::Client::builder();
+
+    let is_localhost = api_endpoint.starts_with("http://localhost")
+        || api_endpoint.starts_with("http://127.0.0.1");
+    let builder = if is_localhost {
+        builder.no_proxy()
+    } else {
+        builder
+    };
+
+    builder.build().unwrap()
+}
diff --git a/crates/llama-cpp-server/src/supervisor.rs b/crates/llama-cpp-server/src/supervisor.rs
index 05732ae76..fe13738e4 100644
--- a/crates/llama-cpp-server/src/supervisor.rs
+++ b/crates/llama-cpp-server/src/supervisor.rs
@@ -122,7 +122,7 @@ impl LlamaCppSupervisor {
 
     pub async fn start(&self) {
         debug!("Waiting for llama-server <{}> to start...", self.name);
-        let client = reqwest::Client::new();
+        let client = reqwest::Client::builder().no_proxy().build().unwrap();
         loop {
             let Ok(resp) = client.get(api_endpoint(self.port) + "/health").send().await else {
                 continue;