Skip to content

Commit 1aa5192

Browse files
authored
fix: GH Actions BGESmall (#3)
* fix: bge-small actions * fix: handle max_model_length overflow * chore: log maxLen * fix: math.Abs() on the overflown val * fix: coercing to int, then abs() * fix: min(MaxInt32, model_max_length)
1 parent 8d48594 commit 1aa5192

File tree

2 files changed

+24
-22
lines changed

2 files changed

+24
-22
lines changed

fastembed.go

+4-1
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,10 @@ func loadTokenizer(modelPath string, maxLength int) (*tokenizer.Tokenizer, error
334334
return nil, err
335335
}
336336

337-
maxLength = min(maxLength, int(tokenizerConfig["model_max_length"].(float64)))
337+
// Handle overflow when coercing to int, major hassle.
338+
modelMaxLen := int(math.Min(float64(math.MaxInt32), math.Abs(tokenizerConfig["model_max_length"].(float64))))
339+
maxLength = min(maxLength, modelMaxLen)
340+
338341
tknzer.WithTruncation(&tokenizer.TruncationParams{
339342
MaxLength: maxLength,
340343
Strategy: tokenizer.LongestFirst,

fastembed_test.go

+20-21
Original file line numberDiff line numberDiff line change
@@ -47,31 +47,30 @@ func TestEmbedAllMiniLML6V2(t *testing.T) {
4747
}
4848
}
4949

50-
//
5150
// Breaks on GH Actions
5251
// --- FAIL: TestEmbedBGESmallEN (2.29s)
53-
// fastembed_test.go:63: Expected no error, got The tensor's shape ([1 512]) requires 512 elements, but only 8 were provided
5452
//
55-
// func TestEmbedBGESmallEN(t *testing.T) {
56-
// // Test with a single input
57-
// fe, err := NewFlagEmbedding(&InitOptions{
58-
// Model: BGESmallEN,
59-
// })
60-
// defer fe.Destroy()
61-
// if err != nil {
62-
// t.Fatalf("Expected no error, got %v", err)
63-
// }
64-
// input := []string{"Is the world doing okay?"}
65-
// result, err := fe.Embed(input, 1)
66-
// if err != nil {
67-
// t.Fatalf("Expected no error, got %v", err)
68-
// }
53+
// fastembed_test.go:63: Expected no error, got The tensor's shape ([1 512]) requires 512 elements, but only 8 were provided
54+
func TestEmbedBGESmallEN(t *testing.T) {
55+
// Test with a single input
56+
fe, err := NewFlagEmbedding(&InitOptions{
57+
Model: BGESmallEN,
58+
})
59+
defer fe.Destroy()
60+
if err != nil {
61+
t.Fatalf("Expected no error, got %v", err)
62+
}
63+
input := []string{"Is the world doing okay?"}
64+
result, err := fe.Embed(input, 1)
65+
if err != nil {
66+
t.Fatalf("Expected no error, got %v", err)
67+
}
6968

70-
// fmt.Printf("result: %v\n", result[0][0:10])
71-
// if len(result) != len(input) {
72-
// t.Errorf("Expected result length %v, got %v", len(input), len(result))
73-
// }
74-
// }
69+
fmt.Printf("result: %v\n", result[0][0:10])
70+
if len(result) != len(input) {
71+
t.Errorf("Expected result length %v, got %v", len(input), len(result))
72+
}
73+
}
7574

7675
// A model type "Unigram" is not yet supported by the tokenizer
7776
// Ref: https://github.com/sugarme/tokenizer/blob/448e79b1ed65947b8c6343bf9aa39e78364f45c8/pretrained/model.go#L152

0 commit comments

Comments
 (0)