awslabs · victornicolet · Oct 28, 2024 · Oct 24, 2024 · Oct 24, 2024 · Oct 25, 2024
diff --git a/analysis/annotations/annotations.go b/analysis/annotations/annotations.go
@@ -17,7 +17,9 @@ package annotations
 import (
 	"fmt"
 	"go/ast"
+	"go/token"
 	"regexp"
+	"strconv"
 	"strings"
 
 	"github.com/awslabs/ar-go-tools/analysis/config"
@@ -41,6 +43,8 @@ const (
 	Sanitizer
 	// SetOptions is the kind of SetOptions(...) annotations
 	SetOptions
+	// Ignore is an empty annotation for a line
+	Ignore
 )
 
 // AnyTag is the special tag used to match any other tag
@@ -98,6 +102,24 @@ func (a Annotation) IsMatchingAnnotation(kind AnnotationKind, tag string) bool {
 	return a.Kind == kind && (tag == AnyTag || (len(a.Tags) > 0 && a.Tags[0] == AnyTag) || slices.Contains(a.Tags, tag))
 }
 
+// LinePos is a simple line-file position indicator.
+type LinePos struct {
+	Line int
+	File string
+}
+
+// NewLinePos returns a LinePos from a token position. The column and offset are abstracted away.
+func NewLinePos(pos token.Position) LinePos {
+	return LinePos{
+		Line: pos.Line,
+		File: pos.Filename,
+	}
+}
+
+func (l LinePos) String() string {
+	return l.File + ":" + strconv.Itoa(l.Line)
+}
+
 // A FunctionAnnotation groups the annotations relative to a function into main annotations for the entire function
 // and parameter annotations for each parameter
 type FunctionAnnotation struct {
@@ -152,6 +174,17 @@ type ProgramAnnotations struct {
 	Consts map[*ssa.NamedConst][]Annotation
 	// Globals is the map of global variable annotations (TODO: implementation)
 	Globals map[*ssa.Global][]Annotation
+	// Positional is the map of line-file location to annotations that are not attached to a specific construct.
+	// There can be only one annotation per line.
+	Positional map[LinePos]Annotation
+}
+
+// IsIgnoredPos returns true when the given position is on the same line as an //argot:ignore annotation.
+func (pa ProgramAnnotations) IsIgnoredPos(pos token.Position, tag string) bool {
+	if posAnnot, hasPosAnnot := pa.Positional[NewLinePos(pos)]; hasPosAnnot {
+		return posAnnot.Kind == Ignore && (posAnnot.Tags[0] == tag || posAnnot.Tags[0] == AnyTag)
+	}
+	return false
 }
 
 // Count returns the total number of annotations in the program
@@ -178,19 +211,20 @@ func (pa ProgramAnnotations) Iter(fx func(a Annotation)) {
 	for _, cst := range pa.Globals {
 		funcutil.Iter(cst, fx)
 	}
+	for _, cst := range pa.Positional {
+		fx(cst)
+	}
 }
 
 // CompleteFromSyntax takes a set of program annotations and adds additional non-ssa linked annotations
 // to the annotations
-func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []*packages.Package) {
-	for _, pkg := range pkgs {
-		for _, astFile := range pkg.Syntax {
-			pa.loadPackageDocAnnotations(astFile.Doc)
-			for _, comments := range astFile.Comments {
-				for _, comment := range comments.List {
-					if annotationContents := extractAnnotation(comment); annotationContents != nil {
-						pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()).String())
-					}
+func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkg *packages.Package) {
+	for _, astFile := range pkg.Syntax {
+		pa.loadPackageDocAnnotations(astFile.Doc)
+		for _, comments := range astFile.Comments {
+			for _, comment := range comments.List {
+				if annotationContents := extractAnnotation(comment); annotationContents != nil {
+					pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()))
 				}
 			}
 		}
@@ -205,11 +239,12 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []
 // will also print warnings when some syntactic components of the comments look like they should be an annotation.
 func LoadAnnotations(logger *config.LogGroup, packages []*ssa.Package) (ProgramAnnotations, error) {
 	annotations := ProgramAnnotations{
-		Configs: map[string]map[string]string{},
-		Funcs:   map[*ssa.Function]FunctionAnnotation{},
-		Types:   map[*ssa.Type][]Annotation{},
-		Consts:  map[*ssa.NamedConst][]Annotation{},
-		Globals: map[*ssa.Global][]Annotation{},
+		Configs:    map[string]map[string]string{},
+		Funcs:      map[*ssa.Function]FunctionAnnotation{},
+		Types:      map[*ssa.Type][]Annotation{},
+		Consts:     map[*ssa.NamedConst][]Annotation{},
+		Globals:    map[*ssa.Global][]Annotation{},
+		Positional: map[LinePos]Annotation{},
 	}
 	for _, pkg := range packages {
 		for _, m := range pkg.Members {
@@ -287,21 +322,31 @@ func (pa ProgramAnnotations) loadPackageDocAnnotations(doc *ast.CommentGroup) {
 // loadFileAnnotations loads the annotation that are not tied to a specific ssa node. This includes:
 // - config annotations
 // - positional annotations
-func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position string) {
+func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position token.Position) {
 	if len(annotationContents) <= 1 {
 		logger.Warnf("ignoring argot annotation with no arguments at %s", position)
 		return
 	}
 	switch annotationContents[0] {
 	case ConfigTarget:
 		pa.loadConfigTargetAnnotation(logger, annotationContents, position)
+	case IgnoreTarget:
+		pa.addIgnoreLineAnnotation(position, annotationContents)
+	}
+}
+
+// addIgnoreLineAnnotation adds a Ignore annotation at the position's line
+func (pa ProgramAnnotations) addIgnoreLineAnnotation(position token.Position, annotationContents []string) {
+	pa.Positional[NewLinePos(position)] = Annotation{
+		Kind: Ignore,
+		Tags: []string{annotationContents[1]}, // only the tag is used
 	}
 }
 
 // loadConfigTargetAnnotation loads a config annotation. Config annotations look like
 // "//argot:config tag SetOptions(option-name-1=value1,option-name-2=value2)" and are always linked to a specific problem
 // tag.
-func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position string) {
+func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position token.Position) {
 	if len(annotationContents) <= 2 {
 		logger.Warnf("argot:config expects a target tag and one or more SetOptions")
 		logger.Warnf("a comment is likely missing something at %s", position)

diff --git a/analysis/annotations/annotations_test.go b/analysis/annotations/annotations_test.go
@@ -36,9 +36,35 @@ func TestLoadAnnotations(t *testing.T) {
 	testProgram.Prog.Build()
 	logger := config.NewLogGroup(testProgram.Config)
 	a, err := annotations.LoadAnnotations(logger, testProgram.Prog.AllPackages())
+	a.CompleteFromSyntax(logger, testProgram.Pkgs[0])
 	if err != nil {
 		t.Errorf("error loading annotations: %s", err)
 	}
+
+	// Positional annotations like //argot:ignore
+	t.Logf("%d positional annotations", len(a.Positional))
+	if !funcutil.ExistsInMap(a.Positional, func(k annotations.LinePos, _ annotations.Annotation) bool {
+		return k.Line == 43
+	}) {
+		t.Errorf("Expected annotation at line 43.")
+	}
+
+	for pos, annot := range a.Positional {
+		if pos.Line == 43 && (annot.Kind != annotations.Ignore || !funcutil.Contains(annot.Tags, "_")) {
+			t.Errorf("Expected annotation at line 43 to be //argot:ignore _ but its %v", annot)
+		}
+	}
+
+	// Config annotations like //argot:config
+	t.Logf("%d config annotations", len(a.Configs))
+	if _, hasOpt := a.Configs["_"]; !hasOpt {
+		t.Errorf("expected a config option for _")
+	}
+	if a.Configs["_"]["log-level"] != "3" {
+		t.Errorf("Expected config option log-level set to 3 by annotation.")
+	}
+
+	// Function annotations like //argot:function and //argot:param
 	t.Logf("%d functions scanned", len(a.Funcs))
 	for ssaFunc, functionAnnotation := range a.Funcs {
 		switch ssaFunc.Name() {

diff --git a/analysis/annotations/testdata/main.go b/analysis/annotations/testdata/main.go
@@ -16,6 +16,8 @@ package main
 
 import "fmt"
 
+//argot:config _ @SetOptions(log-level=3) configures log-level for all problems
+
 // sink and source annotation are relative to data categories (e.g. in this example bar, io and html)
 // Sink(_) means it is always a sink
 
@@ -38,7 +40,7 @@ func foo() string {
 //
 //argot:function Sink(_)
 func superSensitiveFunction(iWillPrintYouUnencrypted string) {
-	fmt.Println(iWillPrintYouUnencrypted)
+	fmt.Println(iWillPrintYouUnencrypted) //argot:ignore _
 }
 
 // sanitizerOfIo

diff --git a/analysis/dataflow/inter_procedural.go b/analysis/dataflow/inter_procedural.go
@@ -312,12 +312,15 @@ func (g *InterProceduralFlowGraph) RunVisitorOnEntryPoints(visitor Visitor,
 	// entrypoints at once, but this would require a finer context-tracking mechanism than what the NodeWithCallStack
 	// implements.
 	// If the maximum number of alarms has been reached, stop early.
+	i := 0
 	for _, entry := range entryPoints {
 		if !g.AnalyzerState.TestAlarmCount() {
-			g.AnalyzerState.Logger.Warnf("Some entrypoints are skipped, max number of alarms reached.")
+			g.AnalyzerState.Logger.Warnf("%d entrypoints are skipped, max number of alarms reached.",
+				len(entryPoints)-i)
 			return
 		}
 		visitor.Visit(g.AnalyzerState, entry)
+		i++
 	}
 }
 

diff --git a/analysis/dataflow/state.go b/analysis/dataflow/state.go
@@ -25,6 +25,7 @@ import (
 	"github.com/awslabs/ar-go-tools/analysis/config"
 	"github.com/awslabs/ar-go-tools/analysis/lang"
 	"github.com/awslabs/ar-go-tools/analysis/summaries"
+	"github.com/awslabs/ar-go-tools/internal/analysisutil"
 	"github.com/awslabs/ar-go-tools/internal/pointer"
 	"golang.org/x/tools/go/callgraph"
 	"golang.org/x/tools/go/callgraph/cha"
@@ -38,7 +39,7 @@ type AnalyzerState struct {
 	// Annotations contains all the annotations of the program
 	Annotations annotations.ProgramAnnotations
 
-	// The logger used during the analysis (can be used to control output.
+	// The logger used during the analysis (can be used to control output).
 	Logger *config.LogGroup
 
 	// The configuration file for the analysis
@@ -120,10 +121,23 @@ func NewAnalyzerState(p *ssa.Program, pkgs []*packages.Package, l *config.LogGro
 	steps []func(*AnalyzerState)) (*AnalyzerState, error) {
 	var allContracts []Contract
 
-	// Load annotations
+	// Load annotations by scanning all packages' syntax
 	pa, err := annotations.LoadAnnotations(l, p.AllPackages())
+
 	if pkgs != nil {
-		pa.CompleteFromSyntax(l, pkgs)
+		for _, pkg := range pkgs {
+			analysisutil.VisitPackages(pkg, func(p *packages.Package) bool {
+				// Don't scan stdlib for annotations!
+				if summaries.IsStdPackageName(p.Name) {
+					return false
+				}
+				// TODO: find a way to not scan dependencies if there is demand. Currently, it is unlikely that some
+				// dependencies will contain argot annotations.
+				l.Debugf("Scan %s for annotations.\n", p.PkgPath)
+				pa.CompleteFromSyntax(l, p)
+				return true
+			})
+		}
 	}
 
 	if err != nil {
@@ -217,6 +231,7 @@ func NewDefaultAnalyzer(p *ssa.Program, pkgs []*packages.Package) (*AnalyzerStat
 // CopyTo copies pointers in receiver into argument (shallow copy of everything except mutex).
 // Do not use two copies in separate routines.
 func (s *AnalyzerState) CopyTo(b *AnalyzerState) {
+	b.Annotations = s.Annotations
 	b.BoundingInfo = s.BoundingInfo
 	b.Config = s.Config
 	b.EscapeAnalysisState = s.EscapeAnalysisState

diff --git a/analysis/taint/dataflow_visitor.go b/analysis/taint/dataflow_visitor.go
@@ -140,21 +140,28 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) {
 		// you are tracking flows to logging but don't care about integers and booleans for example).
 		if isFiltered(s, v.taintSpec, cur.Node) {
 			if _, isIf := cur.Node.(*df.IfNode); !isIf || v.taintSpec.FailOnImplicitFlow {
-				logger.Infof("Filtered value: %s\n", cur.Node.String())
-				logger.Infof("At: %s\n", cur.Node.Position(s))
+				// Filtered values logged at debug level -- there can be many of those.
+				logger.Debugf("Filtered value: %s\n", cur.Node.String())
+				logger.Debugf("At: %s\n", cur.Node.Position(s))
 			}
 			continue
 		}
 
 		// If node is sink, then we reached a sink from a source, and we must log the taint flow.
 		if isSink(s, v.taintSpec, cur.Node) && cur.Status.Kind == df.DefaultTracing {
-			if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
-				reportTaintFlow(s, v.currentSource, cur)
-			}
-			// Stop if there is a limit on number of alarms, and it has been reached.
-			if !s.IncrementAndTestAlarms() {
-				logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
-				return
+			// Don't report taint flow if the sink location is annotated with //argot:ignore
+			if s.Annotations.IsIgnoredPos(cur.Node.Position(s), v.taintSpec.Tag) {
+				s.Logger.Infof("//argot:ignore taint flow to %s",
+					cur.Node.Position(s))
+			} else {
+				if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
+					reportTaintFlow(s, v.currentSource, cur)
+				}
+				// Stop if there is a limit on number of alarms, and it has been reached.
+				if !s.IncrementAndTestAlarms() {
+					logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
+					return
+				}
 			}
 			// A sink does not have successors in the taint flow analysis (but other sinks can be reached
 			// as there are still values flowing).

diff --git a/analysis/taint/report.go b/analysis/taint/report.go
@@ -76,7 +76,8 @@ func reportCoverage(coverage map[string]bool, coverageWriter io.StringWriter) {
 }
 
 // reportTaintFlow reports a taint flow by writing to a file if the configuration has the ReportPaths flag set,
-// and writing in the logger
+// and writing in the logger.
+// The function checks the annotations to suppress reports where there are //argot:ignore annotations.
 func reportTaintFlow(c *dataflow.AnalyzerState, source dataflow.NodeWithTrace, sink *dataflow.VisitorNode) {
 	c.Logger.Infof(" !!!! TAINT FLOW !!!!")
 	c.Logger.Infof(" 💀 Sink reached at %s\n", formatutil.Red(sink.Node.Position(c)))

diff --git a/analysis/taint/taint.go b/analysis/taint/taint.go
@@ -140,6 +140,7 @@ func Analyze(cfg *config.Config, prog *ssa.Program, pkgs []*packages.Package) (A
 			}
 		}
 		visitor := NewVisitor(&taintSpec)
+		state.Logger.Infof("Analyzing taint-tracking problem %s", taintSpec.Tag)
 		analysis.RunInterProcedural(state, visitor, analysis.InterProceduralParams{
 			// The entry points are specific to each taint tracking problem (unlike in the intra-procedural pass)
 			IsEntrypoint: func(node ssa.Node) bool { return IsSourceNode(state, &taintSpec, node) },

diff --git a/analysis/taint/testdata/annotations/main.go b/analysis/taint/testdata/annotations/main.go
@@ -22,7 +22,7 @@ import (
 )
 
 // Specific problem-level config options can be set in annotations
-//argot:config lim SetOptions(max-alarms=2,unsafe-max-depth=2)
+//argot:config lim SetOptions(max-alarms=12,unsafe-max-depth=2)
 
 // bar
 // It is also a source for the problem hybridDef also defined in the config.json
@@ -79,7 +79,7 @@ func main() {
 	sink(s)                          //  @Sink(ex1)
 	sinkOnSecondArg(s, "ok")         // only second argument of this is a sink
 	sinkOnSecondArg("ok", s)         // @Sink(ex2)
-	sink(sanitizeSecondArg(s, "ok")) // @Sink(ex1)
+	sink(sanitizeSecondArg(s, "ok")) //argot:ignore _ (but should be an ex1 sink)
 	sink(sanitizeSecondArg("ok", s))
 	sink(sanitizer(s))
 	fmt.Println(s)            // @Sink(hybridDef)

diff --git a/doc/01_taint.md b/doc/01_taint.md
@@ -202,6 +202,17 @@ options:
     max-alarms: 2
 ```
 
+#### Finding Suppression
+
+You may encounter false positives in the taint analysis, some of which cannot be easily resolved by making the configuration more precise or by changing the code.
+When you are confident the finding is a false positive, you can suppress the findings of the taint analysis on a specific line by using the `//argot:ignore problem-tag` annotation. 
+For example:
+```go
+...
+   callSink(notReaalyTaintedData) //argot:ignore _ 
+```
+Will suppress findings for all taint problems. Taint problems can be associated with a `tag: tagName` in the configuration, and you can suppress findings specifically for `tagName` by using `//argot:ignore tagName`. 
+
 #### Warning Suppression 
 The use can set the setting `warn: false` to suppress warnings during the analysis. This means that if the analysis encounters program constructs that make it unsound, those will not be reported. This setting does not affect the soundness of the analysis, but it will cause the tool to not report when your program falls beyond the soundness guarantees.
 

diff --git a/internal/analysisutil/iterators.go b/internal/analysisutil/iterators.go
@@ -0,0 +1,39 @@
+// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package analysisutil
+
+import (
+	"golang.org/x/tools/go/packages"
+)
+
+// VisitPackages calls f recursively on the root package provided, and then all the imports provided f returns
+// true for that package. If f returns false, the imports will not be visited.
+func VisitPackages(root *packages.Package, f func(p *packages.Package) bool) {
+	seen := map[*packages.Package]bool{}
+	queue := []*packages.Package{root}
+	for len(queue) > 0 {
+		cur := queue[0]
+		queue = queue[1:]
+		if seen[cur] {
+			continue
+		}
+		if f(cur) {
+			for _, importedPkg := range cur.Imports {
+				queue = append(queue, importedPkg)
+			}
+		}
+		seen[cur] = true
+	}
+}
diff --git a/internal/funcutil/collections.go b/internal/funcutil/collections.go
@@ -134,6 +134,16 @@ func Exists[T any](a []T, f func(T) bool) bool {
 	return false
 }
 
+// ExistsInMap returns true when there exists some k,x in map a such that f(k,x), otherwise false.
+func ExistsInMap[K comparable, T any](a map[K]T, f func(K, T) bool) bool {
+	for k, x := range a {
+		if f(k, x) {
+			return true
+		}
+	}
+	return false
+}
+
 // FindMap returns Some(f(x)) when there exists some x in slice a such that p(f(x)), otherwise None.
 func FindMap[T any, R any](a []T, f func(T) R, p func(R) bool) Optional[R] {
 	for _, x := range a {