@@ -555,6 +555,8 @@ type sca_match = {
555
555
(* Note that in addition to "reachable" there are also the notions of
556
556
* "vulnerable" and "exploitable".
557
557
* coupling: see also SCA_match.ml
558
+ * TODO? have a Direct of xxx and Transitive of sca_transitive_match_kind?
559
+ * better so can be reused in other types such as tr_cache_result?
558
560
*)
559
561
type sca_match_kind = [
560
562
(* This is used for "parity" or "upgrade-only" rules. transitivity
@@ -1839,6 +1841,68 @@ type scan_config = {
1839
1841
?ci_config_from_cloud: ci_config_from_cloud option;
1840
1842
}
1841
1843
1844
+ (* ------------------------------------------- *)
1845
+ (* Transitive reachabilitiy (TR) caching comms *)
1846
+ (* ------------------------------------------- *)
1847
+ (* We want essentially to cache semgrep computation on third party packages
1848
+ * to quickly know (rule_id x package_version) -> sca_transitive_match_kind
1849
+ * to avoid downloading and recomputing each time the same thing.
1850
+ *)
1851
+
1852
+ (* The "key".
1853
+ * The rule_id and resolved_url should form a valid key for our TR cache
1854
+ * database table. Indeed, semgrep should always return the same result when
1855
+ * using the same rule and same resolved_url package. The content at the
1856
+ * URL should hopefully not change (we could md5sum it just in case) and
1857
+ * the content of the rule_id should also not change (could md5sum it maybe too).
1858
+ * I've added tr_version below just in case we want to invalidate past
1859
+ * cached entries (e.g., the semgrep engine itself changed enough that
1860
+ * some past cached results might be wrong and should be recomputed)
1861
+ *)
1862
+ type tr_cache_key = {
1863
+ rule_id: rule_id;
1864
+ (* this can be the checksum of the content of the rule (JSON or YAML form) *)
1865
+ rule_version: string;
1866
+ (* does not have to match the Semgrep CLI version; can be bumped only
1867
+ * when we think the match should be recomputed
1868
+ * TODO: to be set in Transitive_reachability.ml tr_version constant
1869
+ *)
1870
+ engine_version: int;
1871
+ (* ex: http://some-website/hello-world.0.1.2.tgz like in found_dependency
1872
+ * 'resolved_url' field, but could be anything to describe a particular
1873
+ * package. We could rely on https://github.com/package-url/purl-spec
1874
+ *)
1875
+ package_url: string;
1876
+ (* extra key just in case (e.g., "prod" vs "dev") *)
1877
+ extra: string;
1878
+ }
1879
+
1880
+ (* The "value" *)
1881
+ type tr_cache_match_result = {
1882
+ (* alt: cache just sca_match? or sca_match_kind? or even define a separate
1883
+ * sca_transitive_match type? which would be smaller than storing
1884
+ * the whole set of matches
1885
+ * alt: cache the whole cli_output? (which also contains the errors)
1886
+ *)
1887
+ matches: cli_match list;
1888
+ }
1889
+
1890
+ (* Sent by the CLI to the POST /api/???? *)
1891
+ type tr_query_cache_request = {
1892
+ entries: tr_cache_key list;
1893
+ }
1894
+
1895
+ (* Response by the backend the the POST /api/???? *)
1896
+ type tr_query_cache_response = {
1897
+ cached: (tr_cache_key * tr_cache_match_result) list;
1898
+ }
1899
+
1900
+ (* Sent by the CLI to the POST /api/??? *)
1901
+ type tr_add_cache_request = {
1902
+ new_entries: (tr_cache_key * tr_cache_match_result) list;
1903
+ }
1904
+ (* TODO: tr_add_cache_response: string result (Ok | Error) *)
1905
+
1842
1906
(* ----------------------------- *)
1843
1907
(* TODO a better CI config from cloud *)
1844
1908
(* ----------------------------- *)
@@ -2407,6 +2471,10 @@ type resolution_result = [
2407
2471
| ResolutionError of resolution_error_kind list
2408
2472
]
2409
2473
2474
+ (* ----------------------------- *)
2475
+ (* SCA transitive reachability *)
2476
+ (* ----------------------------- *)
2477
+
2410
2478
type transitive_finding = {
2411
2479
(* the important part is the sca_match in core_match_extra that
2412
2480
* we need to adjust and especially the sca_match_kind.
@@ -2424,7 +2492,7 @@ type transitive_reachability_filter_params = {
2424
2492
}
2425
2493
2426
2494
(* ----------------------------- *)
2427
- (* SCA part 4: Symbol analysis *)
2495
+ (* Symbol analysis *)
2428
2496
(* ----------------------------- *)
2429
2497
2430
2498
(* "Symbol analysis" is about determining the third-party functions which
0 commit comments