Skip to content

Commit f1169f9

Browse files
author
Gal Ben David
committed
using Aho Corasick to search for file paths patterns
1 parent 87ade55 commit f1169f9

File tree

2 files changed

+35
-20
lines changed

2 files changed

+35
-20
lines changed

Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,12 @@ name = "pyrepscan"
3535
crate-type = ["cdylib"]
3636

3737
[dependencies]
38-
regex = "1.6.0"
38+
aho-corasick = "0.7.18"
3939
chrono = "0.4.19"
40-
num_cpus = "1.13.1"
41-
parking_lot = "0.12.1"
4240
crossbeam = "0.8.1"
4341
crossbeam-utils = "0.8.10"
42+
parking_lot = "0.12.1"
43+
regex = "1.6.0"
4444

4545
[dependencies.libgit2-sys]
4646
version = "0.13.4"

src/rules_manager.rs

+32-17
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
use pyo3::exceptions::PyRuntimeError;
2-
use pyo3::prelude::*;
3-
use regex::Regex;
1+
use std::path::Path;
42
use std::collections::{HashMap, HashSet};
3+
use regex::Regex;
4+
use pyo3::prelude::*;
5+
use pyo3::exceptions::PyRuntimeError;
6+
use aho_corasick::AhoCorasick;
57

68
struct ContentRule {
79
name: String,
@@ -16,19 +18,31 @@ struct FilePathRule {
1618
}
1719

1820
#[pyclass]
19-
#[derive(Default)]
2021
pub struct RulesManager {
2122
file_extensions_to_skip: HashSet<String>,
22-
file_paths_to_skip: HashSet<String>,
23+
file_paths_to_skip: Vec<String>,
24+
file_paths_to_skip_ac: Option<AhoCorasick>,
2325
content_rules: Vec<ContentRule>,
2426
file_path_rules: Vec<FilePathRule>,
2527
}
2628

29+
impl Default for RulesManager {
30+
fn default() -> Self {
31+
Self::new()
32+
}
33+
}
34+
2735
#[pymethods]
2836
impl RulesManager {
2937
#[new]
3038
pub fn new() -> Self {
31-
RulesManager::default()
39+
RulesManager {
40+
file_extensions_to_skip: HashSet::default(),
41+
file_paths_to_skip: Vec::default(),
42+
file_paths_to_skip_ac: None,
43+
content_rules: Vec::default(),
44+
file_path_rules: Vec::default(),
45+
}
3246
}
3347

3448
pub fn add_content_rule(
@@ -168,7 +182,12 @@ impl RulesManager {
168182
PyRuntimeError::new_err("File path can not be empty")
169183
)
170184
}
171-
self.file_paths_to_skip.insert(file_path.to_ascii_lowercase());
185+
self.file_paths_to_skip.push(file_path.to_ascii_lowercase());
186+
self.file_paths_to_skip_ac = Some(
187+
AhoCorasick::new_auto_configured(
188+
self.file_paths_to_skip.as_slice()
189+
)
190+
);
172191

173192
Ok(())
174193
}
@@ -177,20 +196,16 @@ impl RulesManager {
177196
&self,
178197
file_path: &str,
179198
) -> bool {
180-
if self.file_extensions_to_skip.iter().any(
181-
|file_extension_to_skip| {
182-
file_path.ends_with(file_extension_to_skip)
199+
if let Some(file_extension) = Path::new(file_path).extension() {
200+
if self.file_extensions_to_skip.contains(file_extension.to_string_lossy().as_ref()) {
201+
return false;
183202
}
184-
) {
185-
return false;
186203
}
187204

188-
if self.file_paths_to_skip.iter().any(
189-
|file_path_to_skip| {
190-
file_path.contains(file_path_to_skip)
205+
if let Some(file_paths_to_skip_patterns) = &self.file_paths_to_skip_ac {
206+
if file_paths_to_skip_patterns.is_match(file_path) {
207+
return false;
191208
}
192-
) {
193-
return false;
194209
}
195210

196211
true

0 commit comments

Comments
 (0)