Skip to content

Commit fb9de19

Browse files
authored
Revert "Use px/agent_status_diagnostics script within px cli to detect missing kernel headers (#2065)" (#2081)
Summary: Revert "Use `px/agent_status_diagnostics` script within px cli to detect missing kernel headers (#2065)" This reverts commit 3c9c4bd. While testing the latest cloud release, I noticed that the functionality added in #2065 can cause `px deploy` to hang indefinitely at the "Wait for healthcheck" step. The deploy will finish successfully, but it's a poor user experience. There seems to be a goroutine blocking issue that is dependent on cluster size. On 1 and 2 node clusters that I tested #2065 on, the issue doesn't surface. However, the issue reproduces reliably on the larger clusters that have pixie deployed to. Let's revert and release a new cli version once this is tracked down. Relevant Issues: #2051 Type of change: /kind bugfix Test Plan: N/A Changelog Message: Reverted the recent advanced diagnostics added during `px deploy` as in some cases it can cause that caused `px deploy` to hang Signed-off-by: Dom Del Nano <[email protected]>
1 parent 5ea9e6e commit fb9de19

File tree

8 files changed

+146
-329
lines changed

8 files changed

+146
-329
lines changed

src/pixie_cli/pkg/cmd/collect_logs.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ import (
2727
"github.com/spf13/viper"
2828

2929
"px.dev/pixie/src/pixie_cli/pkg/utils"
30-
"px.dev/pixie/src/pixie_cli/pkg/vizier"
30+
"px.dev/pixie/src/utils/shared/k8s"
3131
)
3232

3333
func init() {
@@ -42,7 +42,7 @@ var CollectLogsCmd = &cobra.Command{
4242
viper.BindPFlag("namespace", cmd.Flags().Lookup("namespace"))
4343
},
4444
Run: func(cmd *cobra.Command, args []string) {
45-
c := vizier.NewLogCollector(mustCreateBundleReader(), viper.GetString("cloud_addr"))
45+
c := k8s.NewLogCollector()
4646
fName := fmt.Sprintf("pixie_logs_%s.zip", time.Now().Format("20060102150405"))
4747
err := c.CollectPixieLogs(fName)
4848
if err != nil {

src/pixie_cli/pkg/cmd/deploy.go

+64-19
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"context"
2323
"errors"
2424
"fmt"
25+
"io"
2526
"os"
2627
"strings"
2728
"time"
@@ -71,7 +72,6 @@ var BlockListedLabels = []string{
7172
}
7273

7374
func init() {
74-
DeployCmd.Flags().StringP("bundle", "b", "", "Path/URL to bundle file")
7575
DeployCmd.Flags().StringP("extract_yaml", "e", "", "Directory to extract the Pixie yamls to")
7676
DeployCmd.Flags().StringP("vizier_version", "v", "", "Pixie version to deploy")
7777
DeployCmd.Flags().BoolP("check", "c", true, "Check whether the cluster can run Pixie")
@@ -106,7 +106,6 @@ var DeployCmd = &cobra.Command{
106106
Use: "deploy",
107107
Short: "Deploys Pixie on the current K8s cluster",
108108
PreRun: func(cmd *cobra.Command, args []string) {
109-
viper.BindPFlag("bundle", cmd.Flags().Lookup("bundle"))
110109
viper.BindPFlag("extract_yaml", cmd.Flags().Lookup("extract_yaml"))
111110
viper.BindPFlag("vizier_version", cmd.Flags().Lookup("vizier_version"))
112111
viper.BindPFlag("check", cmd.Flags().Lookup("check"))
@@ -605,6 +604,61 @@ func deploy(cloudConn *grpc.ClientConn, clientset *kubernetes.Clientset, vzClien
605604
return clusterID
606605
}
607606

607+
func runSimpleHealthCheckScript(cloudAddr string, clusterID uuid.UUID) error {
608+
v, err := vizier.ConnectionToVizierByID(cloudAddr, clusterID)
609+
br := mustCreateBundleReader()
610+
if err != nil {
611+
return err
612+
}
613+
execScript := br.MustGetScript(script.AgentStatusScript)
614+
615+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
616+
defer cancel()
617+
618+
resp, err := v.ExecuteScriptStream(ctx, execScript, nil)
619+
if err != nil {
620+
return err
621+
}
622+
623+
// TODO(zasgar): Make this use the Null output. We can't right now
624+
// because of fatal message on vizier failure.
625+
errCh := make(chan error)
626+
// Eat all responses.
627+
go func() {
628+
for {
629+
select {
630+
case <-ctx.Done():
631+
if ctx.Err() != nil {
632+
errCh <- ctx.Err()
633+
return
634+
}
635+
errCh <- nil
636+
return
637+
case msg := <-resp:
638+
if msg == nil {
639+
errCh <- nil
640+
return
641+
}
642+
if msg.Err != nil {
643+
if msg.Err == io.EOF {
644+
errCh <- nil
645+
return
646+
}
647+
errCh <- msg.Err
648+
return
649+
}
650+
if msg.Resp.Status != nil && msg.Resp.Status.Code != 0 {
651+
errCh <- errors.New(msg.Resp.Status.Message)
652+
}
653+
// Eat messages.
654+
}
655+
}
656+
}()
657+
658+
err = <-errCh
659+
return err
660+
}
661+
608662
func waitForHealthCheckTaskGenerator(cloudAddr string, clusterID uuid.UUID) func() error {
609663
return func() error {
610664
timeout := time.NewTimer(5 * time.Minute)
@@ -614,15 +668,10 @@ func waitForHealthCheckTaskGenerator(cloudAddr string, clusterID uuid.UUID) func
614668
case <-timeout.C:
615669
return errors.New("timeout waiting for healthcheck (it is possible that Pixie stabilized after the healthcheck timeout. To check if Pixie successfully deployed, run `px debug pods`)")
616670
default:
617-
_, err := vizier.RunSimpleHealthCheckScript(mustCreateBundleReader(), cloudAddr, clusterID)
671+
err := runSimpleHealthCheckScript(cloudAddr, clusterID)
618672
if err == nil {
619673
return nil
620674
}
621-
// The health check warning error indicates the cluster successfully deployed, but there are some warnings.
622-
// Return the error to end the polling and show the warnings.
623-
if _, ok := err.(*vizier.HealthCheckWarning); ok {
624-
return err
625-
}
626675
time.Sleep(5 * time.Second)
627676
}
628677
}
@@ -642,17 +691,13 @@ func waitForHealthCheck(cloudAddr string, clusterID uuid.UUID, clientset *kubern
642691
hc := utils.NewSerialTaskRunner(healthCheckJobs)
643692
err := hc.RunAndMonitor()
644693
if err != nil {
645-
if _, ok := err.(*vizier.HealthCheckWarning); ok {
646-
utils.WithError(err).Error("Pixie healthcheck detected the following warnings:")
647-
} else {
648-
_ = pxanalytics.Client().Enqueue(&analytics.Track{
649-
UserId: pxconfig.Cfg().UniqueClientID,
650-
Event: "Deploy Healthcheck Failed",
651-
Properties: analytics.NewProperties().
652-
Set("err", err.Error()),
653-
})
654-
utils.WithError(err).Fatal("Failed Pixie healthcheck")
655-
}
694+
_ = pxanalytics.Client().Enqueue(&analytics.Track{
695+
UserId: pxconfig.Cfg().UniqueClientID,
696+
Event: "Deploy Healthcheck Failed",
697+
Properties: analytics.NewProperties().
698+
Set("err", err.Error()),
699+
})
700+
utils.WithError(err).Fatal("Failed Pixie healthcheck")
656701
}
657702
_ = pxanalytics.Client().Enqueue(&analytics.Track{
658703
UserId: pxconfig.Cfg().UniqueClientID,

src/pixie_cli/pkg/cmd/root.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ var RootCmd = &cobra.Command{
203203

204204
// Name a variable to store a slice of commands that don't require cloudAddr
205205
var cmdsCloudAddrNotReqd = []*cobra.Command{
206+
CollectLogsCmd,
206207
VersionCmd,
207208
}
208209

@@ -244,7 +245,7 @@ func checkAuthForCmd(c *cobra.Command) {
244245
os.Exit(1)
245246
}
246247
switch c {
247-
case CollectLogsCmd, DeployCmd, UpdateCmd, GetCmd, DeployKeyCmd, APIKeyCmd:
248+
case DeployCmd, UpdateCmd, GetCmd, DeployKeyCmd, APIKeyCmd:
248249
utils.Errorf("These commands are unsupported in Direct Vizier mode.")
249250
os.Exit(1)
250251
default:
@@ -253,7 +254,7 @@ func checkAuthForCmd(c *cobra.Command) {
253254
}
254255

255256
switch c {
256-
case CollectLogsCmd, DeployCmd, UpdateCmd, RunCmd, LiveCmd, GetCmd, ScriptCmd, DeployKeyCmd, APIKeyCmd:
257+
case DeployCmd, UpdateCmd, RunCmd, LiveCmd, GetCmd, ScriptCmd, DeployKeyCmd, APIKeyCmd:
257258
authenticated := auth.IsAuthenticated(viper.GetString("cloud_addr"))
258259
if !authenticated {
259260
utils.Errorf("Failed to authenticate. Please retry `px auth login`.")

src/pixie_cli/pkg/vizier/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ go_library(
2525
"data_formatter.go",
2626
"errors.go",
2727
"lister.go",
28-
"logs.go",
2928
"script.go",
3029
"stream_adapter.go",
3130
"utils.go",

src/pixie_cli/pkg/vizier/logs.go

-144
This file was deleted.

0 commit comments

Comments
 (0)