Skip to content

Commit c5302cd

Browse files
prevalence db update
1 parent dc399c3 commit c5302cd

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

assets/rules_prevalence.json.gz

5.91 KB
Binary file not shown.

capa/render/default.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
77
# See the License for the specific language governing permissions and limitations under the License.
88

9+
import gzip
910
import json
1011
import collections
12+
from typing import Dict
1113
from pathlib import Path
1214

1315
import tabulate
@@ -85,16 +87,18 @@ def render_capabilities(doc: rd.ResultDocument, ostream: StringIO):
8587
+-------------------------------------------------------+-------------------------------------------------+
8688
"""
8789

88-
def load_rules_prevalence(file: Path) -> dict:
90+
def load_rules_prevalence(file: Path) -> Dict[str, str]:
8991
try:
90-
return json.load(file.open("r"))
92+
with gzip.open(file, "rb") as gzfile:
93+
return json.loads(gzfile.read().decode("utf-8"))
9194
except FileNotFoundError:
9295
raise FileNotFoundError(f"File '{file}' not found.")
9396
except Exception as e:
9497
raise RuntimeError(f"An error occurred while loading '{file}': {e}")
9598

9699
subrule_matches = find_subrule_matches(doc)
97-
rules_prevalence = load_rules_prevalence(Path("./assets/rules_prevalence.json"))
100+
CD = Path(__file__).resolve().parent.parent.parent
101+
rules_prevalence = load_rules_prevalence(CD / "assets" / "rules_prevalence.json.gz")
98102

99103
# seperate rules based on their prevalence
100104
common = []
@@ -106,17 +110,14 @@ def load_rules_prevalence(file: Path) -> dict:
106110
count = len(rule.matches)
107111
matches = f"({count} matches)" if count > 1 else ""
108112

109-
rule_prevalence = float(rules_prevalence.get(rule.meta.name, 0))
110-
if rule_prevalence < 0:
111-
raise ValueError("Match probability cannot be negative")
113+
prevalence = rules_prevalence.get(rule.meta.name, None)
112114

113-
prevalences = [rutils.bold("rare"), rutils.bold("common"), "unknown"]
114-
115-
if rule_prevalence == 0 or rule_prevalence >= 0.05:
116-
prevalence = prevalences[2] if rule_prevalence == 0 else prevalences[1]
117-
common.append((rule.meta.namespace, rule.meta.name, matches, prevalence))
115+
if prevalence == "rare":
116+
rare.append((rule.meta.namespace, rule.meta.name, matches, rutils.bold(prevalence)))
117+
elif prevalence == "common":
118+
common.append((rule.meta.namespace, rule.meta.name, matches, rutils.bold(prevalence)))
118119
else:
119-
rare.append((rule.meta.namespace, rule.meta.name, matches, prevalences[0]))
120+
common.append((rule.meta.namespace, rule.meta.name, matches, "unknown"))
120121

121122
rows = []
122123

0 commit comments

Comments
 (0)