Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Using argot:ignore annotations in the taint analysis. #101

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 61 additions & 16 deletions analysis/annotations/annotations.go
Original file line number Diff line number Diff line change
@@ -17,7 +17,9 @@ package annotations
import (
"fmt"
"go/ast"
"go/token"
"regexp"
"strconv"
"strings"

"github.com/awslabs/ar-go-tools/analysis/config"
@@ -41,6 +43,8 @@ const (
Sanitizer
// SetOptions is the kind of SetOptions(...) annotations
SetOptions
// Ignore is an empty annotation for a line
Ignore
)

// AnyTag is the special tag used to match any other tag
@@ -98,6 +102,24 @@ func (a Annotation) IsMatchingAnnotation(kind AnnotationKind, tag string) bool {
return a.Kind == kind && (tag == AnyTag || (len(a.Tags) > 0 && a.Tags[0] == AnyTag) || slices.Contains(a.Tags, tag))
}

// LinePos is a simple line-file position indicator.
type LinePos struct {
Line int
File string
}

// NewLinePos returns a LinePos from a token position. The column and offset are abstracted away.
func NewLinePos(pos token.Position) LinePos {
return LinePos{
Line: pos.Line,
File: pos.Filename,
}
}

func (l LinePos) String() string {
return l.File + ":" + strconv.Itoa(l.Line)
}

// A FunctionAnnotation groups the annotations relative to a function into main annotations for the entire function
// and parameter annotations for each parameter
type FunctionAnnotation struct {
@@ -152,6 +174,17 @@ type ProgramAnnotations struct {
Consts map[*ssa.NamedConst][]Annotation
// Globals is the map of global variable annotations (TODO: implementation)
Globals map[*ssa.Global][]Annotation
// Positional is the map of line-file location to annotations that are not attached to a specific construct.
// There can be only one annotation per line.
Positional map[LinePos]Annotation
}

// IsIgnoredPos returns true when the given position is on the same line as an //argot:ignore annotation.
func (pa ProgramAnnotations) IsIgnoredPos(pos token.Position, tag string) bool {
if posAnnot, hasPosAnnot := pa.Positional[NewLinePos(pos)]; hasPosAnnot {
return posAnnot.Kind == Ignore && (posAnnot.Tags[0] == tag || posAnnot.Tags[0] == AnyTag)
}
return false
}

// Count returns the total number of annotations in the program
@@ -178,19 +211,20 @@ func (pa ProgramAnnotations) Iter(fx func(a Annotation)) {
for _, cst := range pa.Globals {
funcutil.Iter(cst, fx)
}
for _, cst := range pa.Positional {
fx(cst)
}
}

// CompleteFromSyntax takes a set of program annotations and adds additional non-ssa linked annotations
// to the annotations
func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []*packages.Package) {
for _, pkg := range pkgs {
for _, astFile := range pkg.Syntax {
pa.loadPackageDocAnnotations(astFile.Doc)
for _, comments := range astFile.Comments {
for _, comment := range comments.List {
if annotationContents := extractAnnotation(comment); annotationContents != nil {
pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()).String())
}
func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkg *packages.Package) {
for _, astFile := range pkg.Syntax {
pa.loadPackageDocAnnotations(astFile.Doc)
for _, comments := range astFile.Comments {
for _, comment := range comments.List {
if annotationContents := extractAnnotation(comment); annotationContents != nil {
pa.loadFileAnnotations(logger, annotationContents, pkg.Fset.Position(comment.Pos()))
}
}
}
@@ -205,11 +239,12 @@ func (pa ProgramAnnotations) CompleteFromSyntax(logger *config.LogGroup, pkgs []
// will also print warnings when some syntactic components of the comments look like they should be an annotation.
func LoadAnnotations(logger *config.LogGroup, packages []*ssa.Package) (ProgramAnnotations, error) {
annotations := ProgramAnnotations{
Configs: map[string]map[string]string{},
Funcs: map[*ssa.Function]FunctionAnnotation{},
Types: map[*ssa.Type][]Annotation{},
Consts: map[*ssa.NamedConst][]Annotation{},
Globals: map[*ssa.Global][]Annotation{},
Configs: map[string]map[string]string{},
Funcs: map[*ssa.Function]FunctionAnnotation{},
Types: map[*ssa.Type][]Annotation{},
Consts: map[*ssa.NamedConst][]Annotation{},
Globals: map[*ssa.Global][]Annotation{},
Positional: map[LinePos]Annotation{},
}
for _, pkg := range packages {
for _, m := range pkg.Members {
@@ -287,21 +322,31 @@ func (pa ProgramAnnotations) loadPackageDocAnnotations(doc *ast.CommentGroup) {
// loadFileAnnotations loads the annotation that are not tied to a specific ssa node. This includes:
// - config annotations
// - positional annotations
func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position string) {
func (pa ProgramAnnotations) loadFileAnnotations(logger *config.LogGroup, annotationContents []string, position token.Position) {
if len(annotationContents) <= 1 {
logger.Warnf("ignoring argot annotation with no arguments at %s", position)
return
}
switch annotationContents[0] {
case ConfigTarget:
pa.loadConfigTargetAnnotation(logger, annotationContents, position)
case IgnoreTarget:
pa.addIgnoreLineAnnotation(position, annotationContents)
}
}

// addIgnoreLineAnnotation adds a Ignore annotation at the position's line
func (pa ProgramAnnotations) addIgnoreLineAnnotation(position token.Position, annotationContents []string) {
pa.Positional[NewLinePos(position)] = Annotation{
Kind: Ignore,
Tags: []string{annotationContents[1]}, // only the tag is used
}
}

// loadConfigTargetAnnotation loads a config annotation. Config annotations look like
// "//argot:config tag SetOptions(option-name-1=value1,option-name-2=value2)" and are always linked to a specific problem
// tag.
func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position string) {
func (pa ProgramAnnotations) loadConfigTargetAnnotation(logger *config.LogGroup, annotationContents []string, position token.Position) {
if len(annotationContents) <= 2 {
logger.Warnf("argot:config expects a target tag and one or more SetOptions")
logger.Warnf("a comment is likely missing something at %s", position)
26 changes: 26 additions & 0 deletions analysis/annotations/annotations_test.go
Original file line number Diff line number Diff line change
@@ -36,9 +36,35 @@ func TestLoadAnnotations(t *testing.T) {
testProgram.Prog.Build()
logger := config.NewLogGroup(testProgram.Config)
a, err := annotations.LoadAnnotations(logger, testProgram.Prog.AllPackages())
a.CompleteFromSyntax(logger, testProgram.Pkgs[0])
if err != nil {
t.Errorf("error loading annotations: %s", err)
}

// Positional annotations like //argot:ignore
t.Logf("%d positional annotations", len(a.Positional))
if !funcutil.ExistsInMap(a.Positional, func(k annotations.LinePos, _ annotations.Annotation) bool {
return k.Line == 43
}) {
t.Errorf("Expected annotation at line 43.")
}

for pos, annot := range a.Positional {
if pos.Line == 43 && (annot.Kind != annotations.Ignore || !funcutil.Contains(annot.Tags, "_")) {
t.Errorf("Expected annotation at line 43 to be //argot:ignore _ but its %v", annot)
}
}

// Config annotations like //argot:config
t.Logf("%d config annotations", len(a.Configs))
if _, hasOpt := a.Configs["_"]; !hasOpt {
t.Errorf("expected a config option for _")
}
if a.Configs["_"]["log-level"] != "3" {
t.Errorf("Expected config option log-level set to 3 by annotation.")
}

// Function annotations like //argot:function and //argot:param
t.Logf("%d functions scanned", len(a.Funcs))
for ssaFunc, functionAnnotation := range a.Funcs {
switch ssaFunc.Name() {
4 changes: 3 additions & 1 deletion analysis/annotations/testdata/main.go
Original file line number Diff line number Diff line change
@@ -16,6 +16,8 @@ package main

import "fmt"

//argot:config _ @SetOptions(log-level=3) configures log-level for all problems

// sink and source annotation are relative to data categories (e.g. in this example bar, io and html)
// Sink(_) means it is always a sink

@@ -38,7 +40,7 @@ func foo() string {
//
//argot:function Sink(_)
func superSensitiveFunction(iWillPrintYouUnencrypted string) {
fmt.Println(iWillPrintYouUnencrypted)
fmt.Println(iWillPrintYouUnencrypted) //argot:ignore _
}

// sanitizerOfIo
5 changes: 4 additions & 1 deletion analysis/dataflow/inter_procedural.go
Original file line number Diff line number Diff line change
@@ -312,12 +312,15 @@ func (g *InterProceduralFlowGraph) RunVisitorOnEntryPoints(visitor Visitor,
// entrypoints at once, but this would require a finer context-tracking mechanism than what the NodeWithCallStack
// implements.
// If the maximum number of alarms has been reached, stop early.
i := 0
for _, entry := range entryPoints {
if !g.AnalyzerState.TestAlarmCount() {
g.AnalyzerState.Logger.Warnf("Some entrypoints are skipped, max number of alarms reached.")
g.AnalyzerState.Logger.Warnf("%d entrypoints are skipped, max number of alarms reached.",
len(entryPoints)-i)
return
}
visitor.Visit(g.AnalyzerState, entry)
i++
}
}

21 changes: 18 additions & 3 deletions analysis/dataflow/state.go
Original file line number Diff line number Diff line change
@@ -25,6 +25,7 @@ import (
"github.com/awslabs/ar-go-tools/analysis/config"
"github.com/awslabs/ar-go-tools/analysis/lang"
"github.com/awslabs/ar-go-tools/analysis/summaries"
"github.com/awslabs/ar-go-tools/internal/analysisutil"
"github.com/awslabs/ar-go-tools/internal/pointer"
"golang.org/x/tools/go/callgraph"
"golang.org/x/tools/go/callgraph/cha"
@@ -38,7 +39,7 @@ type AnalyzerState struct {
// Annotations contains all the annotations of the program
Annotations annotations.ProgramAnnotations

// The logger used during the analysis (can be used to control output.
// The logger used during the analysis (can be used to control output).
Logger *config.LogGroup

// The configuration file for the analysis
@@ -120,10 +121,23 @@ func NewAnalyzerState(p *ssa.Program, pkgs []*packages.Package, l *config.LogGro
steps []func(*AnalyzerState)) (*AnalyzerState, error) {
var allContracts []Contract

// Load annotations
// Load annotations by scanning all packages' syntax
pa, err := annotations.LoadAnnotations(l, p.AllPackages())

if pkgs != nil {
pa.CompleteFromSyntax(l, pkgs)
for _, pkg := range pkgs {
analysisutil.VisitPackages(pkg, func(p *packages.Package) bool {
// Don't scan stdlib for annotations!
if summaries.IsStdPackageName(p.Name) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also avoid scanning dependencies? Not sure how to identify them though

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll leave a TODO and make an issue. Based on demand we can figure out how to implement it, but early on it's unlikely the dependencies will have annotations, and scanning is relatively fast compared to all the other analysis steps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue #102 for tracking this.

return false
}
// TODO: find a way to not scan dependencies if there is demand. Currently, it is unlikely that some
// dependencies will contain argot annotations.
l.Debugf("Scan %s for annotations.\n", p.PkgPath)
pa.CompleteFromSyntax(l, p)
return true
})
}
}

if err != nil {
@@ -217,6 +231,7 @@ func NewDefaultAnalyzer(p *ssa.Program, pkgs []*packages.Package) (*AnalyzerStat
// CopyTo copies pointers in receiver into argument (shallow copy of everything except mutex).
// Do not use two copies in separate routines.
func (s *AnalyzerState) CopyTo(b *AnalyzerState) {
b.Annotations = s.Annotations
b.BoundingInfo = s.BoundingInfo
b.Config = s.Config
b.EscapeAnalysisState = s.EscapeAnalysisState
25 changes: 16 additions & 9 deletions analysis/taint/dataflow_visitor.go
Original file line number Diff line number Diff line change
@@ -140,21 +140,28 @@ func (v *Visitor) Visit(s *df.AnalyzerState, source df.NodeWithTrace) {
// you are tracking flows to logging but don't care about integers and booleans for example).
if isFiltered(s, v.taintSpec, cur.Node) {
if _, isIf := cur.Node.(*df.IfNode); !isIf || v.taintSpec.FailOnImplicitFlow {
logger.Infof("Filtered value: %s\n", cur.Node.String())
logger.Infof("At: %s\n", cur.Node.Position(s))
// Filtered values logged at debug level -- there can be many of those.
logger.Debugf("Filtered value: %s\n", cur.Node.String())
logger.Debugf("At: %s\n", cur.Node.Position(s))
}
continue
}

// If node is sink, then we reached a sink from a source, and we must log the taint flow.
if isSink(s, v.taintSpec, cur.Node) && cur.Status.Kind == df.DefaultTracing {
if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
reportTaintFlow(s, v.currentSource, cur)
}
// Stop if there is a limit on number of alarms, and it has been reached.
if !s.IncrementAndTestAlarms() {
logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
return
// Don't report taint flow if the sink location is annotated with //argot:ignore
if s.Annotations.IsIgnoredPos(cur.Node.Position(s), v.taintSpec.Tag) {
s.Logger.Infof("//argot:ignore taint flow to %s",
cur.Node.Position(s))
} else {
if v.taints.addNewPathCandidate(NewFlowNode(v.currentSource), NewFlowNode(cur.NodeWithTrace)) {
reportTaintFlow(s, v.currentSource, cur)
}
// Stop if there is a limit on number of alarms, and it has been reached.
if !s.IncrementAndTestAlarms() {
logger.Warnf("Reached the limit of %d alarms.", s.Config.MaxAlarms)
return
}
}
// A sink does not have successors in the taint flow analysis (but other sinks can be reached
// as there are still values flowing).
3 changes: 2 additions & 1 deletion analysis/taint/report.go
Original file line number Diff line number Diff line change
@@ -76,7 +76,8 @@ func reportCoverage(coverage map[string]bool, coverageWriter io.StringWriter) {
}

// reportTaintFlow reports a taint flow by writing to a file if the configuration has the ReportPaths flag set,
// and writing in the logger
// and writing in the logger.
// The function checks the annotations to suppress reports where there are //argot:ignore annotations.
func reportTaintFlow(c *dataflow.AnalyzerState, source dataflow.NodeWithTrace, sink *dataflow.VisitorNode) {
c.Logger.Infof(" !!!! TAINT FLOW !!!!")
c.Logger.Infof(" 💀 Sink reached at %s\n", formatutil.Red(sink.Node.Position(c)))
1 change: 1 addition & 0 deletions analysis/taint/taint.go
Original file line number Diff line number Diff line change
@@ -140,6 +140,7 @@ func Analyze(cfg *config.Config, prog *ssa.Program, pkgs []*packages.Package) (A
}
}
visitor := NewVisitor(&taintSpec)
state.Logger.Infof("Analyzing taint-tracking problem %s", taintSpec.Tag)
analysis.RunInterProcedural(state, visitor, analysis.InterProceduralParams{
// The entry points are specific to each taint tracking problem (unlike in the intra-procedural pass)
IsEntrypoint: func(node ssa.Node) bool { return IsSourceNode(state, &taintSpec, node) },
4 changes: 2 additions & 2 deletions analysis/taint/testdata/annotations/main.go
Original file line number Diff line number Diff line change
@@ -22,7 +22,7 @@ import (
)

// Specific problem-level config options can be set in annotations
//argot:config lim SetOptions(max-alarms=2,unsafe-max-depth=2)
//argot:config lim SetOptions(max-alarms=12,unsafe-max-depth=2)

// bar
// It is also a source for the problem hybridDef also defined in the config.json
@@ -79,7 +79,7 @@ func main() {
sink(s) // @Sink(ex1)
sinkOnSecondArg(s, "ok") // only second argument of this is a sink
sinkOnSecondArg("ok", s) // @Sink(ex2)
sink(sanitizeSecondArg(s, "ok")) // @Sink(ex1)
sink(sanitizeSecondArg(s, "ok")) //argot:ignore _ (but should be an ex1 sink)
sink(sanitizeSecondArg("ok", s))
sink(sanitizer(s))
fmt.Println(s) // @Sink(hybridDef)
11 changes: 11 additions & 0 deletions doc/01_taint.md
Original file line number Diff line number Diff line change
@@ -202,6 +202,17 @@ options:
max-alarms: 2
```

#### Finding Suppression

You may encounter false positives in the taint analysis, some of which cannot be easily resolved by making the configuration more precise or by changing the code.
When you are confident the finding is a false positive, you can suppress the findings of the taint analysis on a specific line by using the `//argot:ignore problem-tag` annotation.
For example:
```go
...
callSink(notReaalyTaintedData) //argot:ignore _
```
Will suppress findings for all taint problems. Taint problems can be associated with a `tag: tagName` in the configuration, and you can suppress findings specifically for `tagName` by using `//argot:ignore tagName`.

#### Warning Suppression
The use can set the setting `warn: false` to suppress warnings during the analysis. This means that if the analysis encounters program constructs that make it unsound, those will not be reported. This setting does not affect the soundness of the analysis, but it will cause the tool to not report when your program falls beyond the soundness guarantees.

39 changes: 39 additions & 0 deletions internal/analysisutil/iterators.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package analysisutil

import (
"golang.org/x/tools/go/packages"
)

// VisitPackages calls f recursively on the root package provided, and then all the imports provided f returns
// true for that package. If f returns false, the imports will not be visited.
func VisitPackages(root *packages.Package, f func(p *packages.Package) bool) {
seen := map[*packages.Package]bool{}
queue := []*packages.Package{root}
for len(queue) > 0 {
cur := queue[0]
queue = queue[1:]
if seen[cur] {
continue
}
if f(cur) {
for _, importedPkg := range cur.Imports {
queue = append(queue, importedPkg)
}
}
seen[cur] = true
}
}
10 changes: 10 additions & 0 deletions internal/funcutil/collections.go
Original file line number Diff line number Diff line change
@@ -134,6 +134,16 @@ func Exists[T any](a []T, f func(T) bool) bool {
return false
}

// ExistsInMap returns true when there exists some k,x in map a such that f(k,x), otherwise false.
func ExistsInMap[K comparable, T any](a map[K]T, f func(K, T) bool) bool {
for k, x := range a {
if f(k, x) {
return true
}
}
return false
}

// FindMap returns Some(f(x)) when there exists some x in slice a such that p(f(x)), otherwise None.
func FindMap[T any, R any](a []T, f func(T) R, p func(R) bool) Optional[R] {
for _, x := range a {