Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(checks): improve argument handling in AVD-DS-0001 #326

Merged
merged 1 commit into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 76 additions & 62 deletions checks/docker/latest_tag.rego
Original file line number Diff line number Diff line change
Expand Up @@ -19,90 +19,104 @@ import rego.v1

import data.lib.docker

# returns element after AS
get_alias(values) := alias if {
"as" == lower(values[i])
alias = values[i + 1]
}
resolve_alias(values) := values[i + 1] if "as" == lower(values[i])

get_aliases contains aliases if {
from_cmd := docker.from[_]
aliases := get_alias(from_cmd.Value)
}
all_aliases := [resolve_alias(from.Value) | some from in docker.from]

is_alias(img) if {
img == get_aliases[_]
}
is_alias(img) if img in all_aliases

# image_names returns the image in FROM statement.
image_names contains image_name if {
from := docker.from[_]
image_name := from.Value[0]
}
image_names := [from.Value[0] | some from in docker.from]

# image_tags returns the image and tag.
parse_tag(name) := [img, tag] if {
[img, tag] = split(name, ":")
}
parse_tag(name) := [img, tag] if [img, tag] = split(name, ":")

# image_tags returns the image and "latest" if a tag is not specified.
parse_tag(img) := [img, tag] if {
tag := "latest"
not contains(img, ":")
parse_tag(img) := [img, "latest"] if not contains(img, ":")

# parses the image and tag if the reference does not does not reference any variables
parse_image_and_tag(from, _) := [img, tag] if {
reference := from.Value[0]
not contains(reference, "$")
[img, tag] = parse_tag(reference)
}

#base scenario
image_tags contains output if {
from := docker.from[_]
name := from.Value[0]
not startswith(name, "$")
[img, tag] = parse_tag(name)
output := {
"img": img,
"tag": tag,
"cmd": from,
}
global_vars := stage_args(input.Stages[0])

# see https://docs.docker.com/build/building/variables/#scoping
stage_vars(stage_num) := object.union(
global_vars,
stage_args(input.Stages[stage_num]),
) if {
stage_num != 0
} else := global_vars

stage_args(stage) := {name: def_value |
some instruction in stage.Commands
instruction.Cmd == "arg"
[name, def_value] = parse_arg(instruction.Value[0])
}

# If variable is used with FROM then it's value should contain a tag
image_tags contains output if {
some i, j, k, l
from := docker.from[i]
name := from.Value[0]
parse_arg(raw) := [name, trim(def_value, "\"")] if {
[name, def_value] := regex.split(`\s*=\s*`, raw)
} else := [raw, ""]

cmd_obj := input.Stages[j].Commands[k]
variable_pattern := `\$\{[^}]+\}|\$[a-zA-Z_][a-zA-Z0-9_]*`

possibilities := {"arg", "env"}
cmd_obj.Cmd == possibilities[l]
find_var_refs(s) := regex.find_n(variable_pattern, s, -1)

startswith(name, "$")
eval_string(s, vars) := strings.replace_n(patterns, s) if {
patterns := {var_ref: variable |
some var_ref in find_var_refs(s)
variable := object.get(vars, extract_var_name(var_ref), "")
}
}

bare_var := trim_prefix(name, "$")
extract_var_name(s) := substring(s, 2, count(s) - 3) if startswith(s, "${")

startswith(cmd_obj.Value[0], bare_var)
extract_var_name(s) := substring(s, 1, count(s) - 1) if not startswith(s, "${")

[_, bare_image_name] := regex.split(`\s*=\s*`, cmd_obj.Value[0])
# parses the image and tag if the evaluated reference does not end with a variable
# and does not contain a tag part. Example: ${REGISTRY}/foo
parse_image_and_tag(from, vars) := [img, tag] if {
reference := from.Value[0]
contains(reference, "$")

[img, tag] = parse_tag(bare_image_name)
output := {
"img": img,
"tag": tag,
"cmd": from,
}
res := eval_string(reference, vars)
not contains(res, ":")
not is_string_ending_with_var(res)

[img, tag] = parse_tag(res)
}

# fail_latest is true if image is not scratch
# and image is not an alias
# and tag is latest.
fail_latest contains output if {
output := image_tags[_]
output.img != "scratch"
not is_alias(output.img)
output.tag == "latest"
# checks if the string ends with a variable.
is_string_ending_with_var(reference) if {
some var_ref in find_var_refs(reference)
endswith(reference, var_ref)
}

# parses the image and tag if the evaluated reference contains a tag
# that does not reference any variable. Example: ${REGISTRY}/foo:bar
parse_image_and_tag(from, vars) := [img, tag] if {
reference := from.Value[0]
contains(reference, "$")

res := eval_string(reference, vars)
[img, tag] := split(res, ":")
not contains(tag, "$")
}

deny contains res if {
output := fail_latest[_]
msg := sprintf("Specify a tag in the 'FROM' statement for image '%s'", [output.img])
res := result.new(msg, output.cmd)
some instruction in input.Stages[i].Commands
instruction.Cmd == "from"

vars := stage_vars(i)
[img, tag] := parse_image_and_tag(instruction, vars)

img != "scratch"
not is_alias(img)
tag == "latest"

msg := sprintf("Specify a tag in the 'FROM' statement for image '%s'", [img])
res := result.new(msg, instruction)
}
121 changes: 121 additions & 0 deletions checks/docker/latest_tag_test.rego
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,124 @@ test_multi_stage_no_tag_denied if {
count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'alpine'"
}

test_deny_latest_tag_ref_to_global_arg_with_default_value if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [{
"Cmd": "arg",
"Value": ["TAG=\"latest\""],
}]},
{"Name": "foo:${TAG}", "Commands": [{
"Cmd": "from",
"Value": ["foo:${TAG}"],
}]},
]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'foo'"
}

test_allow_tag_ref_to_global_arg_without_default_value if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [{
"Cmd": "arg",
"Value": ["TAG"],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the tag doesn't have a value doesn't it default to latest? Or this test is testing another case?

Copy link
Contributor Author

@nikpivkin nikpivkin Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If image ref is alpine:$TAG and the user has not specified an argument, then by default the argument will be an empty string and image ref will be the invalid string: alpine:, so we can't say that the latest tag is being used.

❯ cat Dockerfile
ARG TAG
FROM alpine:$TAG

❯ docker build --no-cache -t test-img . -f Dockerfile
...
--------------------
   1 |     ARG TAG
   2 | >>> FROM alpine:$TAG
--------------------
ERROR: failed to solve: failed to parse stage name "alpine:": invalid reference format

But if the image ref is alpine$TAG and the user has not specified an argument, then the ref will be equal to alpine, which is a valid reference and the tag will be equal to latest.

❯ cat Dockerfile
ARG TAG
FROM alpine$TAG%

❯ docker build --no-cache -t test-img . -f Dockerfile
[+] Building 0.0s (5/5) FINISHED                                                                                            docker:default
 => [internal] load build definition from Dockerfile                                                                                  0.0s
 => => transferring dockerfile: 60B                                                                                                   0.0s
 => [internal] load metadata for docker.io/library/alpine:latest                                                                      0.0s
 => [internal] load .dockerignore                                                                                                     0.0s
 => => transferring context: 2B                                                                                                       0.0s
 => CACHED [1/1] FROM docker.io/library/alpine:latest

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see the difference now. Thanks!

}]},
{"Name": "foo:${TAG}", "Commands": [{
"Cmd": "from",
"Value": ["foo:${TAG}"],
}]},
]}

count(r) == 0
}

test_deny_image_ref_to_global_arg_without_default_value if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [{
"Cmd": "arg",
"Value": ["REGISTRY"],
}]},
{"Name": "${REGISTRY}/ubuntu", "Commands": [{
"Cmd": "from",
"Value": ["${REGISTRY}/ubuntu"],
}]},
]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image '/ubuntu'"
}

test_deny_global_arg_is_overrided_to_latest if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [{
"Cmd": "arg",
"Value": ["TAG=test"],
}]},
{"Name": "foo:${TAG}", "Commands": [
{
"Cmd": "arg",
"Value": ["TAG=latest"],
},
{
"Cmd": "from",
"Value": ["foo:${TAG}"],
},
]},
{"Name": "bar:${TAG}", "Commands": [{
"Cmd": "from",
"Value": ["bar:${TAG}"],
}]},
]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'foo'"
}

test_deny_image_ref_to_multiple_args_but_tag_latest if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [
{
"Cmd": "arg",
"Value": ["REPO=repo"],
},
{
"Cmd": "arg",
"Value": ["IMAGE=image"],
},
]},
{"Name": "$REPO/$IMAGE:latest", "Commands": [{
"Cmd": "from",
"Value": ["$REPO/$IMAGE:latest"],
}]},
]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'repo/image'"
}

test_deny_empty_tag_arg if {
r := deny with input as {"Stages": [
{"Name": "", "Commands": [{
"Cmd": "arg",
"Value": ["TAG"],
}]},
{"Name": "alpine$TAG", "Commands": [{
"Cmd": "from",
"Value": ["alpine$TAG"],
}]},
]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'alpine'"
}

test_deny_missing_tag_arg if {
r := deny with input as {"Stages": [{"Name": "alpine$TAG", "Commands": [{
"Cmd": "from",
"Value": ["alpine$TAG"],
}]}]}

count(r) == 1
r[_].msg == "Specify a tag in the 'FROM' statement for image 'alpine'"
}
Loading