-
Notifications
You must be signed in to change notification settings - Fork 21
Support localhost connection to k8s api server #1077
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,55 @@ | ||
| package util | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "net/http" | ||
| "strings" | ||
|
|
||
| "k8s.io/client-go/rest" | ||
| ctrl "sigs.k8s.io/controller-runtime" | ||
| ) | ||
|
|
||
| const ( | ||
| localhostIP = "127.0.0.1" | ||
| localhostIPv6 = "::1" | ||
| localhostPort = "6443" | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you know if we can get the K8s config like the port from ENV parameter? |
||
| ) | ||
|
|
||
| func GetConfig() *rest.Config { | ||
| cfg := ctrl.GetConfigOrDie() | ||
| cfg.Timeout = TCPReadTimeout | ||
| if !getHealth(cfg) { | ||
| hosts := strings.Split(cfg.Host, ":") | ||
| // cfg.Host is in the form of https://host:port | ||
| if len(hosts) > 3 { | ||
| cfg.Host = fmt.Sprintf("https://[%s]:%s", localhostIPv6, localhostPort) | ||
| } else { | ||
| cfg.Host = fmt.Sprintf("https://%s:%s", localhostIP, localhostPort) | ||
| } | ||
| log.Info("Failed to connect to configured kubernetes API server, set to loopback address", "host", cfg.Host) | ||
| } | ||
| return cfg | ||
| } | ||
|
|
||
| func getHealth(cfg *rest.Config) bool { | ||
| client, err := rest.HTTPClientFor(cfg) | ||
| if err != nil { | ||
| log.Error(err, "Failed to create client for config", "config", cfg) | ||
| return false | ||
| } | ||
|
|
||
| healthUrl := cfg.Host + "/healthz" | ||
| resp, err := client.Get(healthUrl) | ||
| if err != nil { | ||
| log.Error(err, "Failed to connect to Kubernetes API Server", "url", healthUrl) | ||
| return false | ||
| } | ||
| defer resp.Body.Close() | ||
|
|
||
| if resp.StatusCode != http.StatusOK { | ||
| log.Error(nil, "Kubernetes healthz check failed", "status", resp.Status) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a way to distinguish TCP connection error, TLS handshake validation error? |
||
| return false | ||
| } | ||
| log.Debug("Connection is health", "url", healthUrl) | ||
| return true | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| package util | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "errors" | ||
| "io" | ||
| "net/http" | ||
| "testing" | ||
|
|
||
| "github.com/agiledragon/gomonkey/v2" | ||
| "github.com/stretchr/testify/assert" | ||
| "k8s.io/client-go/rest" | ||
| ctrl "sigs.k8s.io/controller-runtime" | ||
| ) | ||
|
|
||
| func TestGetConfig(t *testing.T) { | ||
| patches := gomonkey.ApplyFunc(ctrl.GetConfigOrDie, func() *rest.Config { | ||
| return &rest.Config{ | ||
| Host: "https://10.0.0.1:443", | ||
| } | ||
| }) | ||
| defer patches.Reset() | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| preparedFunc func() *gomonkey.Patches | ||
| expectedHost string | ||
| }{ | ||
| { | ||
| name: "healthyTraffic", | ||
| preparedFunc: func() *gomonkey.Patches { | ||
| return gomonkey.ApplyFunc((*http.Client).Get, func(c *http.Client, url string) (resp *http.Response, err error) { | ||
| return &http.Response{ | ||
| StatusCode: http.StatusOK, | ||
| Body: io.NopCloser(bytes.NewBufferString(`{"ok": true}`)), | ||
| }, nil | ||
| }) | ||
| }, | ||
| expectedHost: "https://10.0.0.1:443", | ||
| }, | ||
| { | ||
| name: "unhealthyTrafficAndIPv4", | ||
| preparedFunc: func() *gomonkey.Patches { | ||
| return gomonkey.ApplyFunc((*http.Client).Get, func(c *http.Client, url string) (resp *http.Response, err error) { | ||
| return &http.Response{ | ||
| StatusCode: http.StatusNotFound, | ||
| Body: io.NopCloser(bytes.NewBufferString(`{"ok": false}`)), | ||
| }, nil | ||
| }) | ||
| }, | ||
| expectedHost: "https://127.0.0.1:6443", | ||
| }, | ||
| { | ||
| name: "errorTrafficAndIPv6", | ||
| preparedFunc: func() *gomonkey.Patches { | ||
| patches := gomonkey.ApplyFunc(ctrl.GetConfigOrDie, func() *rest.Config { | ||
| return &rest.Config{ | ||
| Host: "https://aa:bb:cc:dd:ee:ff:443", | ||
| } | ||
| }) | ||
| patches.ApplyFunc((*http.Client).Get, func(c *http.Client, url string) (resp *http.Response, err error) { | ||
| return nil, errors.New("mock get failure") | ||
| }) | ||
| return patches | ||
| }, | ||
| expectedHost: "https://[::1]:6443", | ||
| }, | ||
| } | ||
|
|
||
| for _, tt := range tests { | ||
| t.Run(tt.name, func(t *testing.T) { | ||
| patches := tt.preparedFunc() | ||
| defer patches.Reset() | ||
| cfg := GetConfig() | ||
| assert.Equal(t, tt.expectedHost, cfg.Host) | ||
| }) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So the API server address switch only can occur in the startup stage, right? Then if the eth1 down during the NSX operator runtime, what will happen?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes it only occurs in the startup stage.
The current case is when wcp enabled between backup and restore, cpvm eth1 will be down after NSX restore and we rely on NSX Operator to recover it. In this case, NSX Operator will always restarts as NSX connection will be down due to restore. In other cases eth1 may be down, shall we always expect NSX or WCP side to bring it back, and it might be fine NSX Operator does not work during that time?
If there is use case that NSX Operator should switch from cluster ip to localhost at runtime, maybe we can leverage the liveness probe to force the nsx operator restarting. Actually we need to refactor the liveness probe in a following up PR as currently it will try to check the eth1, i.e. get api like http://172.26.0.3:8384/healthz
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've checked this in HA mode, and found operator will restart after eht1 down automatically because the lease renewal failed.
Updated: But in non-HA mode, operator will not restart, but the api server call will fail with errors like
{"error": "Put \"https://172.24.0.1:443/apis/crd.nsx.vmware.com/v1alpha1/namespaces/ns-1/subnetsets/pod-default/status\": http2: client connection lost"}