Skip to content

Commit

Permalink
Add logic for automated hashing of CSM Mesh name (#42)
Browse files Browse the repository at this point in the history
  • Loading branch information
arvindbr8 authored Sep 19, 2023
1 parent 57a5b7b commit 6f96a8f
Show file tree
Hide file tree
Showing 6 changed files with 471 additions and 12 deletions.
92 changes: 92 additions & 0 deletions csmnamer/hash.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// DO NOT EDIT: This is a sync of services_platform/thetis/common/gke_net/naming.go
// and should not be modified to maintain functional consistency.

package csmnamer

import (
"crypto/sha256"
"strconv"
)

// lookup table to maintain entropy when converting bytes to string.
var table []string

func init() {
for i := 0; i < 10; i++ {
table = append(table, strconv.Itoa(i))
}
for i := 0; i < 26; i++ {
table = append(table, string('a'+rune(i)))
}
}

// Hash creates a content hash string of length n of s utilizing sha256.
// Note that 256 is not evenly divisible by 36, so the first four elements
// will be slightly more likely (3.125% chance) than the rest (2.734375% chance).
// This results in a per-character chance of collision of
// (4 * ((8/256)^2) + (36-4) * ((7/256)^2)) instead of (1 / 36).
// For an 8 character hash string (used for cluster UID and suffix hash), this
// comes out to 3.600e-13 instead of 3.545e-13, which is a negligibly larger
// chance of collision.
func Hash(s string, n int) string {
var h string
bytes := sha256.Sum256(([]byte)(s))
for i := 0; i < n && i < len(bytes); i++ {
idx := int(bytes[i]) % len(table)
h += table[idx]
}
return h
}

// TrimFieldsEvenly trims the fields evenly and keeps the total length <= max.
// Truncation is spread in ratio with their original length, meaning smaller
// fields will be truncated less than longer ones.
func TrimFieldsEvenly(max int, fields ...string) []string {
if max <= 0 {
return fields
}
total := 0
for _, s := range fields {
total += len(s)
}
if total <= max {
return fields
}

// Distribute truncation evenly among the fields.
excess := total - max
remaining := max
var lengths []int
for _, s := range fields {
// Scale truncation to shorten longer fields more than ones that are already
// short.
l := len(s) - len(s)*excess/total - 1
lengths = append(lengths, l)
remaining -= l
}
// Add fractional space that was rounded down.
for i := 0; i < remaining; i++ {
lengths[i]++
}

var ret []string
for i, l := range lengths {
ret = append(ret, fields[i][:l])
}

return ret
}
100 changes: 100 additions & 0 deletions csmnamer/hasher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// DO NOT EDIT: This is a sync of services_platform/thetis/common/gke_net/naming_test.go
// and should not be modified to maintain functional consistency.

package csmnamer

import "testing"

func TestTrimFieldsEvenly(t *testing.T) {
longString := "01234567890123456789012345678901234567890123456789"
cases := []struct {
desc string
fields []string
want []string
max int
}{
{
desc: "no-change",
fields: []string{longString},
want: []string{longString},
max: 100,
},
{
desc: "equal-to-max-and-no-change",
fields: []string{longString, longString},
want: []string{longString, longString},
max: 100,
},
{
desc: "equally-trimmed-to-half",
fields: []string{longString, longString},
want: []string{longString[:25], longString[:25]},
max: 50,
},
{
desc: "trimmed-to-only-10",
fields: []string{longString, longString, longString},
want: []string{longString[:4], longString[:3], longString[:3]},
max: 10,
},
{
desc: "trimmed-to-only-3",
fields: []string{longString, longString, longString},
want: []string{longString[:1], longString[:1], longString[:1]},
max: 3,
},
{
desc: "one-long-field-with-one-short-field",
fields: []string{longString, longString[:10]},
want: []string{"01234567890123456", "012"},
max: 20,
},
{
desc: "one-long-field-with-one-short-field-and-trimmed-to-1",
fields: []string{longString, longString[:1]},
want: []string{longString[:1], ""},
max: 1,
},
{
desc: "one-long-field-with-one-short-field-and-trimmed-to-5",
fields: []string{longString, longString[:1]},
want: []string{longString[:5], ""},
max: 5,
},
}

for _, tc := range cases {
t.Run(tc.desc, func(t *testing.T) {
got := TrimFieldsEvenly(tc.max, tc.fields...)
if len(got) != len(tc.want) {
t.Fatalf("TrimFieldsEvenly(): got length %d, want %d", len(got), len(tc.want))
}

totalLen := 0
for i := range got {
totalLen += len(got[i])
if got[i] != tc.want[i] {
t.Errorf("TrimFieldsEvenly(): got the %d field to be %q, want %q", i, got[i], tc.want[i])
}
}

if tc.max < totalLen {
t.Errorf("TrimFieldsEvenly(): got total length %d, want less than %d", totalLen, tc.max)
}
})
}
}
103 changes: 103 additions & 0 deletions csmnamer/namer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// DO NOT EDIT: This code is a subset of services_platform/thetis/gateway/core/v1alpha2/common/appnettranslator/gsm/namer.go
// and should not be modified to maintain functional consistency.

package csmnamer

import (
"fmt"
"strings"
"unicode"
)

const (
// Length limit for hash created from fields that uniquely identify a GCE resource and
// appended as a suffix to the resource name
nHashLen = 12
// max length of a GCE resource name.
resourceNameMaxLen = 63
// clusterUIDLen is the length of cluster UID, computed as a hash of ClusterName
// prefix used for GCE resource names created by GAMMA mesh.
clusterUIDLen = 4
// csmMeshPrefix is the prefix override used in the CSMMesh use cases.
csmMeshPrefix = "gsmmesh"
)

type MeshNamer struct {
ClusterName string
Location string
}

func (m *MeshNamer) GenerateMeshId() string {
return readableResourceName(m.ClusterName, m.Location)
}

// Returns a readable resource name in the following format
// {prefix}-{component#0}-{component#1}...-{hash}
// The length of the returned resource name is guarantee to be within
// resourceNameLen which is the maximum length of a GCE resource. A component
// will only be included explicitly in the resource name if it doesn't have an
// invalid character (any character that is not a letter, digit or '-').
// Components in the resource name maybe trimmed to fit the maximum length
// requirement. {hash} uniquely identifies the component set.
func readableResourceName(components ...string) string {
// clusterHash enforces uniqueness of resources of different clusters in
// the same project.
clusterHash := Hash(strings.Join(components, ";"), clusterUIDLen)
prefix := csmMeshPrefix + "-" + clusterHash
// resourceHash enforces uniqueness of resources of the same cluster.
resourceHash := Hash(strings.Join(components, ";"), nHashLen)
// Ideally we explicitly include all components in the GCP resource name, so
// it's easier to be related to the corresponding k8s resource(s). However,
// only certain characters are allowed in a GCP resource name(e.g. a common
// character '.' in hostnames is not allowed in GCP resource name).
var explicitComponents []string
for _, c := range components {
// Only explicitly include a component in GCP resource name if all
// characters in it are allowed. Omitting a component here is okay since
// the resourceHash already represents the full component set.
if allCharAllowedInResourceName(c) {
explicitComponents = append(explicitComponents, c)
}
}
// The maximum total length of components is determined by subtracting length
// of the following substring from the maximum length of resource name:
// * prefix
// * separators "-". There will be len(explicitComponents) + 1 of them.
// * hash
componentsMaxLen := resourceNameMaxLen - len(prefix) - (len(explicitComponents) + 1) - len(resourceHash)
// Drop components from the resource name if the allowed maximum total length
// of them is less them the total number of components. (This happens when
// there are too many components)
if componentsMaxLen < len(explicitComponents) {
return fmt.Sprintf("%s-%s", prefix, resourceHash)
}
// Trim components to fit the allowed maximum total length.
trimmed := TrimFieldsEvenly(componentsMaxLen, explicitComponents...)
return fmt.Sprintf("%s-%s-%s", prefix, strings.Join(trimmed, "-"), resourceHash)
}

func allCharAllowedInResourceName(s string) bool {
if len(s) == 0 {
return false
}
for _, r := range s {
if !(unicode.IsDigit(r) || unicode.IsLetter(r) || r == '-') {
return false
}
}
return true
}
Loading

0 comments on commit 6f96a8f

Please sign in to comment.