feat(git): support query with quoted string for repository_grep (#2784)

* feat(git): support query with quoted string for repository_grep

* [autofix.ci] apply automated fixes

---------

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
Meng Zhang 2024-08-06 12:49:45 -07:00 committed by GitHub
parent eaec8605a0
commit 0438091b33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 96 additions and 14 deletions

View File

@ -10,7 +10,7 @@ use futures::Stream;
use git2::TreeWalkResult;
pub use query::GrepQuery;
use searcher::GrepSearcher;
use tracing::warn;
use tracing::{debug, warn};
use super::{bytes2path, rev_to_commit};
@ -53,6 +53,7 @@ pub fn grep(
let rev = rev.map(|s| s.to_owned());
let query = query.clone();
debug!("{:?}", query);
let searcher = query.searcher()?;
let task =
tokio::task::spawn_blocking(move || grep_impl(repository, rev.as_deref(), searcher, tx));

View File

@ -9,7 +9,7 @@ use ignore::types::TypesBuilder;
use super::searcher::GrepSearcher;
#[derive(Default, Clone)]
#[derive(Default, Clone, Debug)]
pub struct GrepQuery {
patterns: Vec<String>,
negative_patterns: Vec<String>,
@ -123,19 +123,23 @@ impl FromStr for GrepQuery {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let mut builder = GrepQueryBuilder::default();
for part in s.split_whitespace() {
if part.starts_with('-') {
builder = match part {
_ if part.starts_with("-lang:") => builder.negative_file_type(&part[6..]),
_ if part.starts_with("-f:") => builder.negative_file_pattern(&part[3..]),
_ => builder.negative_pattern(&part[1..]),
};
for (negative, part) in tokenize_query(s) {
if negative {
match part {
_ if part.starts_with("lang:") => {
builder = builder.negative_file_type(&part[5..])
}
_ if part.starts_with("f:") => {
builder = builder.negative_file_pattern(&part[2..])
}
_ => builder = builder.negative_pattern(part),
}
} else {
builder = match part {
_ if part.starts_with("lang:") => builder.file_type(&part[5..]),
_ if part.starts_with("f:") => builder.file_pattern(&part[2..]),
_ => builder.pattern(part),
};
match part {
_ if part.starts_with("lang:") => builder = builder.file_type(&part[5..]),
_ if part.starts_with("f:") => builder = builder.file_pattern(&part[2..]),
_ => builder = builder.pattern(part),
}
}
}
@ -200,6 +204,63 @@ fn has_uppercase_literal(expr: &str) -> bool {
expr.chars().any(|c| c.is_ascii_uppercase())
}
/// Tokenize a query string, and respectes quoted strings.
/// When a token is prefixed with a `-`, it is considered a negative pattern.
///
/// Quote characters can be escaped with a backslash.
/// Returns the list of tokens, and whether they are negative patterns.
fn tokenize_query(query: &str) -> Vec<(bool, String)> {
let mut tokens = vec![];
let mut current = String::new();
let mut negative = false;
let mut quoted = false;
let mut escaped = false;
for c in query.chars() {
if escaped {
current.push(c);
escaped = false;
continue;
}
match c {
' ' if !quoted => {
if !current.is_empty() {
tokens.push((negative, current.clone()));
current.clear();
negative = false;
}
}
'-' if !quoted => {
if !current.is_empty() {
tokens.push((negative, current.clone()));
current.clear();
}
negative = true;
}
'"' => {
if quoted {
tokens.push((negative, current.clone()));
current.clear();
}
quoted = !quoted;
}
'\\' => {
escaped = true;
}
_ => {
current.push(c);
}
}
}
if !current.is_empty() {
tokens.push((negative, current));
}
tokens
}
#[cfg(test)]
mod tests {
use super::*;
@ -212,4 +273,24 @@ mod tests {
assert_eq!(query.negative_file_patterns, vec!["*.rs"]);
assert_eq!(query.file_types, vec!["rust"]);
}
#[test]
fn test_tokenize_query() {
let query = r#"lang:rust -f:*.rs foo bar -baz "qux quux", -"corge grault" "\"abc\" dd""#;
let tokens = tokenize_query(query);
assert_eq!(
tokens,
vec![
(false, "lang:rust".to_owned()),
(true, "f:*.rs".to_owned()),
(false, "foo".to_owned()),
(false, "bar".to_owned()),
(true, "baz".to_owned()),
(false, "qux quux".to_owned()),
(false, ",".to_owned()),
(true, "corge grault".to_owned()),
(true, "\"abc\" dd".to_owned())
]
);
}
}