diff --git a/calico-vpp-agent/cmd/calico_vpp_dataplane.go b/calico-vpp-agent/cmd/calico_vpp_dataplane.go index 1c6eb271..841a8491 100644 --- a/calico-vpp-agent/cmd/calico_vpp_dataplane.go +++ b/calico-vpp-agent/cmd/calico_vpp_dataplane.go @@ -154,11 +154,9 @@ func main() { serviceServer := services.NewServiceServer(vpp, k8sclient, log.WithFields(logrus.Fields{"component": "services"})) prometheusServer := prometheus.NewPrometheusServer(vpp, log.WithFields(logrus.Fields{"component": "prometheus"})) localSIDWatcher := watchers.NewLocalSIDWatcher(vpp, clientv3, log.WithFields(logrus.Fields{"subcomponent": "localsid-watcher"})) - felixServer, err := felix.NewFelixServer(vpp, log.WithFields(logrus.Fields{"component": "policy"})) - if err != nil { - log.Fatalf("Failed to create policy server %s", err) - } - err = felix.InstallFelixPlugin() + felixServer := felix.NewFelixServer(vpp, clientv3, log.WithFields(logrus.Fields{"component": "policy"})) + felixWatcher := watchers.NewFelixWatcher(felixServer.GetFelixServerEventChan(), log.WithFields(logrus.Fields{"component": "felix watcher"})) + err = watchers.InstallFelixPlugin() if err != nil { log.Fatalf("could not install felix plugin: %s", err) } @@ -175,8 +173,10 @@ func main() { peerWatcher.SetBGPConf(bgpConf) routingServer.SetBGPConf(bgpConf) serviceServer.SetBGPConf(bgpConf) + felixServer.SetBGPConf(bgpConf) Go(felixServer.ServeFelix) + Go(felixWatcher.WatchFelix) /* * Mark as unhealthy while waiting for Felix config @@ -188,19 +188,17 @@ func main() { ticker := time.NewTicker(10 * time.Second) defer ticker.Stop() - var felixConfig interface{} - var ourBGPSpec interface{} + var felixConfig *felixconfig.Config + var ourBGPSpec *common.LocalNodeSpec felixConfigReceived := false bgpSpecReceived := false for !felixConfigReceived || !bgpSpecReceived { select { - case value := <-felixServer.FelixConfigChan: - felixConfig = value + case felixConfig = <-felixServer.FelixConfigChan: felixConfigReceived = true log.Info("FelixConfig received from calico pod") - case value := <-felixServer.GotOurNodeBGPchan: - ourBGPSpec = value + case ourBGPSpec = <-felixServer.GotOurNodeBGPchan(): bgpSpecReceived = true log.Info("BGP spec received from node add") case <-t.Dying(): @@ -220,19 +218,13 @@ func main() { healthServer.SetComponentStatus(health.ComponentFelix, true, "Felix config received") log.Info("Felix configuration received") - if ourBGPSpec != nil { - bgpSpec, ok := ourBGPSpec.(*common.LocalNodeSpec) - if !ok { - panic("ourBGPSpec is not *common.LocalNodeSpec") - } - prefixWatcher.SetOurBGPSpec(bgpSpec) - connectivityServer.SetOurBGPSpec(bgpSpec) - routingServer.SetOurBGPSpec(bgpSpec) - serviceServer.SetOurBGPSpec(bgpSpec) - localSIDWatcher.SetOurBGPSpec(bgpSpec) - netWatcher.SetOurBGPSpec(bgpSpec) - cniServer.SetOurBGPSpec(bgpSpec) - } + prefixWatcher.SetOurBGPSpec(ourBGPSpec) + connectivityServer.SetOurBGPSpec(ourBGPSpec) + routingServer.SetOurBGPSpec(ourBGPSpec) + serviceServer.SetOurBGPSpec(ourBGPSpec) + localSIDWatcher.SetOurBGPSpec(ourBGPSpec) + netWatcher.SetOurBGPSpec(ourBGPSpec) + cniServer.SetOurBGPSpec(ourBGPSpec) if *config.GetCalicoVppFeatureGates().MultinetEnabled { Go(netWatcher.WatchNetworks) @@ -246,14 +238,8 @@ func main() { } } - if felixConfig != nil { - felixCfg, ok := felixConfig.(*felixconfig.Config) - if !ok { - panic("ourBGPSpec is not *felixconfig.Config") - } - cniServer.SetFelixConfig(felixCfg) - connectivityServer.SetFelixConfig(felixCfg) - } + cniServer.SetFelixConfig(felixConfig) + connectivityServer.SetFelixConfig(felixConfig) Go(routeWatcher.WatchRoutes) Go(linkWatcher.WatchLinks) diff --git a/calico-vpp-agent/cni/cni_pod_test.go b/calico-vpp-agent/cni/cni_pod_test.go index 9a4517b0..e59e1661 100644 --- a/calico-vpp-agent/cni/cni_pod_test.go +++ b/calico-vpp-agent/cni/cni_pod_test.go @@ -35,7 +35,6 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/tests/mocks" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/testutils" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" @@ -323,7 +322,7 @@ var _ = Describe("Pod-related functionality of CNI", func() { Context("With MultiNet configuration (and multinet VRF and loopback already configured)", func() { var ( - networkDefinition *watchers.NetworkDefinition + networkDefinition *common.NetworkDefinition pubSubHandlerMock *mocks.PubSubHandlerMock ) @@ -355,9 +354,9 @@ var _ = Describe("Pod-related functionality of CNI", func() { } // NetworkDefinition CRD information caught by NetWatcher and send with additional information // (VRF and loopback created by watcher) to the cni server as common.NetAdded CalicoVPPEvent - networkDefinition = &watchers.NetworkDefinition{ - VRF: watchers.VRF{Tables: tables}, - PodVRF: watchers.VRF{Tables: podTables}, + networkDefinition = &common.NetworkDefinition{ + VRF: common.VRF{Tables: tables}, + PodVRF: common.VRF{Tables: podTables}, Vni: uint32(0), // important only for VXLAN tunnel going out of node Name: networkName, Range: "10.1.1.0/24", // IP range for secondary network defined by multinet diff --git a/calico-vpp-agent/cni/cni_server.go b/calico-vpp-agent/cni/cni_server.go index 0b259350..b1b9ce9e 100644 --- a/calico-vpp-agent/cni/cni_server.go +++ b/calico-vpp-agent/cni/cni_server.go @@ -36,7 +36,6 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/cni/model" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/cni/podinterface" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" @@ -53,7 +52,7 @@ type Server struct { podInterfaceMap map[string]model.LocalPodSpec lock sync.Mutex /* protects Add/DelVppInterace/RescanState */ - cniEventChan chan common.CalicoVppEvent + cniEventChan chan any memifDriver *podinterface.MemifPodInterfaceDriver tuntapDriver *podinterface.TunTapPodInterfaceDriver @@ -65,7 +64,7 @@ type Server struct { RedirectToHostClassifyTableIndex uint32 networkDefinitions sync.Map - cniMultinetEventChan chan common.CalicoVppEvent + cniMultinetEventChan chan any nodeBGPSpec *common.LocalNodeSpec } @@ -96,9 +95,9 @@ func (s *Server) Add(ctx context.Context, request *cniproto.AddRequest) (*cnipro if !ok { return nil, fmt.Errorf("trying to create a pod in an unexisting network %s", podSpec.NetworkName) } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("Value is not of type *watchers.NetworkDefinition") + panic("Value is not of type *common.NetworkDefinition") } _, route, err := net.ParseCIDR(networkDefinition.Range) if err == nil { @@ -292,7 +291,7 @@ func NewCNIServer(vpp *vpplink.VppLink, felixServerIpam common.FelixServerIpam, log: log, felixServerIpam: felixServerIpam, - cniEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + cniEventChan: make(chan any, common.ChanSize), grpcServer: grpc.NewServer(), podInterfaceMap: make(map[string]model.LocalPodSpec), @@ -301,7 +300,7 @@ func NewCNIServer(vpp *vpplink.VppLink, felixServerIpam common.FelixServerIpam, vclDriver: podinterface.NewVclPodInterfaceDriver(vpp, log, felixServerIpam), loopbackDriver: podinterface.NewLoopbackPodInterfaceDriver(vpp, log, felixServerIpam), - cniMultinetEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + cniMultinetEventChan: make(chan any, common.ChanSize), } reg := common.RegisterHandler(server.cniEventChan, "CNI server events") reg.ExpectEvents( @@ -322,7 +321,11 @@ forloop: select { case <-t.Dying(): break forloop - case evt := <-s.cniEventChan: + case msg := <-s.cniEventChan: + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch evt.Type { case common.FelixConfChanged: if new, _ := evt.New.(*felixConfig.Config); new != nil { @@ -437,21 +440,25 @@ func (s *Server) ServeCNI(t *tomb.Tomb) error { case <-t.Dying(): s.log.Warn("Cni server asked to exit") return - case event := <-s.cniMultinetEventChan: + case msg := <-s.cniMultinetEventChan: + event, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch event.Type { case common.NetsSynced: netsSynced <- true case common.NetAddedOrUpdated: - netDef, ok := event.New.(*watchers.NetworkDefinition) + netDef, ok := event.New.(*common.NetworkDefinition) if !ok { - s.log.Errorf("event.New is not a *watchers.NetworkDefinition %v", event.New) + s.log.Errorf("event.New is not a *common.NetworkDefinition %v", event.New) continue } s.networkDefinitions.Store(netDef.Name, netDef) case common.NetDeleted: - netDef, ok := event.Old.(*watchers.NetworkDefinition) + netDef, ok := event.Old.(*common.NetworkDefinition) if !ok { - s.log.Errorf("event.Old is not a *watchers.NetworkDefinition %v", event.Old) + s.log.Errorf("event.Old is not a *common.NetworkDefinition %v", event.Old) continue } s.networkDefinitions.Delete(netDef.Name) @@ -491,6 +498,6 @@ func (s *Server) ServeCNI(t *tomb.Tomb) error { // ForceAddingNetworkDefinition will add another NetworkDefinition to this CNI server. // The usage is mainly for testing purposes. -func (s *Server) ForceAddingNetworkDefinition(networkDefinition *watchers.NetworkDefinition) { +func (s *Server) ForceAddingNetworkDefinition(networkDefinition *common.NetworkDefinition) { s.networkDefinitions.Store(networkDefinition.Name, networkDefinition) } diff --git a/calico-vpp-agent/cni/network_vpp.go b/calico-vpp-agent/cni/network_vpp.go index 88c7319c..eb8f8934 100644 --- a/calico-vpp-agent/cni/network_vpp.go +++ b/calico-vpp-agent/cni/network_vpp.go @@ -24,7 +24,6 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/cni/model" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" @@ -248,9 +247,9 @@ func (s *Server) AddVppInterface(podSpec *model.LocalPodSpec, doHostSideConf boo if !ok { s.log.Errorf("network not found %s", podSpec.NetworkName) } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } vni = networkDefinition.Vni } @@ -320,9 +319,9 @@ func (s *Server) DelVppInterface(podSpec *model.LocalPodSpec) { if !ok { deleteLocalPodAddress = false } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } vni = networkDefinition.Vni } diff --git a/calico-vpp-agent/cni/network_vpp_routes.go b/calico-vpp-agent/cni/network_vpp_routes.go index df7aed2a..8c8c6438 100644 --- a/calico-vpp-agent/cni/network_vpp_routes.go +++ b/calico-vpp-agent/cni/network_vpp_routes.go @@ -20,7 +20,6 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/cni/model" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" "github.com/projectcalico/vpp-dataplane/v3/vpplink" "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" ) @@ -37,9 +36,9 @@ func (s *Server) RoutePodInterface(podSpec *model.LocalPodSpec, stack *vpplink.C if !ok { s.log.Errorf("network not found %s", podSpec.NetworkName) } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } table = networkDefinition.VRF.Tables[idx] } @@ -88,9 +87,9 @@ func (s *Server) UnroutePodInterface(podSpec *model.LocalPodSpec, swIfIndex uint if !ok { s.log.Errorf("network not found %s", podSpec.NetworkName) } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } table = networkDefinition.VRF.Tables[idx] } @@ -242,9 +241,9 @@ func (s *Server) CreatePodVRF(podSpec *model.LocalPodSpec, stack *vpplink.Cleanu if !ok { return errors.Errorf("network not found %s", podSpec.NetworkName) } - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } vrfIndex = networkDefinition.PodVRF.Tables[idx] } @@ -402,9 +401,9 @@ func (s *Server) DeletePodVRF(podSpec *model.LocalPodSpec) { if !ok { s.log.Errorf("network not found %s", podSpec.NetworkName) } else { - networkDefinition, ok := value.(*watchers.NetworkDefinition) + networkDefinition, ok := value.(*common.NetworkDefinition) if !ok || networkDefinition == nil { - panic("networkDefinition not of type *watchers.NetworkDefinition") + panic("networkDefinition not of type *common.NetworkDefinition") } vrfIndex = networkDefinition.PodVRF.Tables[idx] } diff --git a/calico-vpp-agent/common/pubsub.go b/calico-vpp-agent/common/pubsub.go index daef558c..1ceb7248 100644 --- a/calico-vpp-agent/common/pubsub.go +++ b/calico-vpp-agent/common/pubsub.go @@ -85,18 +85,15 @@ type PubSubHandlerRegistration struct { /* Name for the registration, for logging & debugging */ name string /* Channel where to send events */ - channel chan CalicoVppEvent + channel chan any /* Receive only these events. If empty we'll receive all */ expectedEvents map[CalicoVppEventType]bool - /* Receive all events */ - expectAllEvents bool } func (reg *PubSubHandlerRegistration) ExpectEvents(eventTypes ...CalicoVppEventType) { for _, eventType := range eventTypes { reg.expectedEvents[eventType] = true } - reg.expectAllEvents = false } type PubSub struct { @@ -104,12 +101,11 @@ type PubSub struct { pubSubHandlerRegistrations []*PubSubHandlerRegistration } -func RegisterHandler(channel chan CalicoVppEvent, name string) *PubSubHandlerRegistration { +func RegisterHandler(channel chan any, name string) *PubSubHandlerRegistration { reg := &PubSubHandlerRegistration{ - channel: channel, - name: name, - expectedEvents: make(map[CalicoVppEventType]bool), - expectAllEvents: true, /* By default receive everything, unless we ask for a filter */ + channel: channel, + name: name, + expectedEvents: make(map[CalicoVppEventType]bool), } ThePubSub.pubSubHandlerRegistrations = append(ThePubSub.pubSubHandlerRegistrations, reg) return reg @@ -128,7 +124,7 @@ func redactPassword(event CalicoVppEvent) string { func SendEvent(event CalicoVppEvent) { ThePubSub.log.Debugf("Broadcasting event %s", redactPassword(event)) for _, reg := range ThePubSub.pubSubHandlerRegistrations { - if reg.expectAllEvents || reg.expectedEvents[event.Type] { + if reg.expectedEvents[event.Type] { reg.channel <- event } } diff --git a/calico-vpp-agent/common/types.go b/calico-vpp-agent/common/types.go new file mode 100644 index 00000000..a451de97 --- /dev/null +++ b/calico-vpp-agent/common/types.go @@ -0,0 +1,53 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package common + +type VRF struct { + Tables [2]uint32 // one for ipv4, one for ipv6 +} + +type NetworkDefinition struct { + // VRF is the main table used for the corresponding physical network + VRF VRF + // PodVRF is the table used for the pods in the corresponding physical network + PodVRF VRF + Vni uint32 + PhysicalNetworkName string + Name string + Range string + NetAttachDefs string +} + +// FelixSocketSyncState describes the status of the +// felix socket connection. It applies mostly to policies +type FelixSocketSyncState int + +const ( + StateDisconnected FelixSocketSyncState = iota + StateConnected + StateSyncing + StateInSync +) + +func (state FelixSocketSyncState) IsPending() bool { + return state != StateInSync +} + +// FelixSocketStateChanged is emitted when the state +// of the socket changed. Typically connection and disconnection. +type FelixSocketStateChanged struct { + NewState FelixSocketSyncState +} diff --git a/calico-vpp-agent/connectivity/connectivity_server.go b/calico-vpp-agent/connectivity/connectivity_server.go index 34849720..3012e1e7 100644 --- a/calico-vpp-agent/connectivity/connectivity_server.go +++ b/calico-vpp-agent/connectivity/connectivity_server.go @@ -27,7 +27,6 @@ import ( "gopkg.in/tomb.v2" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" "github.com/projectcalico/vpp-dataplane/v3/config" "github.com/projectcalico/vpp-dataplane/v3/vpplink" ) @@ -45,9 +44,9 @@ type ConnectivityServer struct { felixConfig *felixConfig.Config nodeByAddr map[string]common.LocalNodeSpec - connectivityEventChan chan common.CalicoVppEvent + connectivityEventChan chan any - networks map[uint32]watchers.NetworkDefinition + networks map[uint32]common.NetworkDefinition } type change uint8 @@ -73,9 +72,9 @@ func NewConnectivityServer(vpp *vpplink.VppLink, felixServerIpam common.FelixSer felixServerIpam: felixServerIpam, Clientv3: clientv3, connectivityMap: make(map[string]common.NodeConnectivity), - connectivityEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + connectivityEventChan: make(chan any, common.ChanSize), nodeByAddr: make(map[string]common.LocalNodeSpec), - networks: make(map[uint32]watchers.NetworkDefinition), + networks: make(map[uint32]common.NetworkDefinition), } reg := common.RegisterHandler(server.connectivityEventChan, "connectivity server events") @@ -149,19 +148,23 @@ func (s *ConnectivityServer) ServeConnectivity(t *tomb.Tomb) error { case <-t.Dying(): s.log.Warn("Connectivity Server asked to stop") return nil - case evt := <-s.connectivityEventChan: + case msg := <-s.connectivityEventChan: /* Note: we will only receive events we ask for when registering the chan */ + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch evt.Type { case common.NetAddedOrUpdated: - new, ok := evt.New.(*watchers.NetworkDefinition) + new, ok := evt.New.(*common.NetworkDefinition) if !ok { - s.log.Errorf("evt.New is not a *watchers.NetworkDefinition %v", evt.New) + s.log.Errorf("evt.New is not a *common.NetworkDefinition %v", evt.New) } s.networks[new.Vni] = *new case common.NetDeleted: - old, ok := evt.Old.(*watchers.NetworkDefinition) + old, ok := evt.Old.(*common.NetworkDefinition) if !ok { - s.log.Errorf("evt.Old is not a *watchers.NetworkDefinition %v", evt.Old) + s.log.Errorf("evt.Old is not a *common.NetworkDefinition %v", evt.Old) } delete(s.networks, old.Vni) case common.ConnectivityAdded: diff --git a/calico-vpp-agent/felix/cache/cache.go b/calico-vpp-agent/felix/cache/cache.go new file mode 100644 index 00000000..35765231 --- /dev/null +++ b/calico-vpp-agent/felix/cache/cache.go @@ -0,0 +1,98 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package cache + +import ( + "net" + + felixConfig "github.com/projectcalico/calico/felix/config" + "github.com/projectcalico/calico/felix/proto" + "github.com/sirupsen/logrus" + + calicov3 "github.com/projectcalico/api/pkg/apis/projectcalico/v3" + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" + "github.com/projectcalico/vpp-dataplane/v3/config" +) + +type Cache struct { + log *logrus.Entry + + FelixConfig *felixConfig.Config + NodeByAddr map[string]common.LocalNodeSpec + Networks map[uint32]*common.NetworkDefinition + NetworkDefinitions map[string]*common.NetworkDefinition + IPPoolMap map[string]*proto.IPAMPool + NodeStatesByName map[string]*common.LocalNodeSpec + BGPConf *calicov3.BGPConfigurationSpec +} + +func NewCache(log *logrus.Entry) *Cache { + return &Cache{ + log: log, + NodeByAddr: make(map[string]common.LocalNodeSpec), + FelixConfig: felixConfig.New(), + Networks: make(map[uint32]*common.NetworkDefinition), + NetworkDefinitions: make(map[string]*common.NetworkDefinition), + IPPoolMap: make(map[string]*proto.IPAMPool), + NodeStatesByName: make(map[string]*common.LocalNodeSpec), + } +} + +// match checks whether we have an IP pool which contains the given prefix. +// If we have, it returns the pool. +func (cache *Cache) GetPrefixIPPool(prefix *net.IPNet) *proto.IPAMPool { + for _, pool := range cache.IPPoolMap { + in, err := ipamPoolContains(pool, prefix) + if err != nil { + cache.log.Warnf("ipamPoolContains errored: %v", err) + continue + } + if in { + return pool + } + } + cache.log.Warnf("No pool found for %s", prefix) + for k, pool := range cache.IPPoolMap { + cache.log.Debugf("Available %s=%v", k, pool) + } + return nil +} + +// ipamPoolContains returns true if the IPPool contains 'prefix' +func ipamPoolContains(pool *proto.IPAMPool, prefix *net.IPNet) (bool, error) { + _, poolCIDR, _ := net.ParseCIDR(pool.GetCidr()) // this field is validated so this should never error + poolCIDRLen, poolCIDRBits := poolCIDR.Mask.Size() + prefixLen, prefixBits := prefix.Mask.Size() + return poolCIDRBits == prefixBits && poolCIDR.Contains(prefix.IP) && prefixLen >= poolCIDRLen, nil +} + +func (cache *Cache) GetNodeIP4() *net.IP { + if spec, found := cache.NodeStatesByName[*config.NodeName]; found { + if spec.IPv4Address != nil { + return &spec.IPv4Address.IP + } + } + return nil +} + +func (cache *Cache) GetNodeIP6() *net.IP { + if spec, found := cache.NodeStatesByName[*config.NodeName]; found { + if spec.IPv6Address != nil { + return &spec.IPv6Address.IP + } + } + return nil +} diff --git a/calico-vpp-agent/felix/felix_server.go b/calico-vpp-agent/felix/felix_server.go index 211fe133..d5c3a415 100644 --- a/calico-vpp-agent/felix/felix_server.go +++ b/calico-vpp-agent/felix/felix_server.go @@ -16,135 +16,54 @@ package felix import ( - "encoding/json" "fmt" - "io" "net" - "os" - "reflect" - "regexp" - "strings" "sync" "github.com/pkg/errors" + calicov3 "github.com/projectcalico/api/pkg/apis/projectcalico/v3" felixConfig "github.com/projectcalico/calico/felix/config" + "github.com/projectcalico/calico/felix/proto" + calicov3cli "github.com/projectcalico/calico/libcalico-go/lib/clientv3" "github.com/sirupsen/logrus" "gopkg.in/tomb.v2" - nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" - "github.com/projectcalico/calico/felix/proto" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/cni/model" "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" - "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/watchers" - "github.com/projectcalico/vpp-dataplane/v3/config" + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/felix/cache" + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/felix/policies" "github.com/projectcalico/vpp-dataplane/v3/vpplink" - "github.com/projectcalico/vpp-dataplane/v3/vpplink/generated/bindings/npol" - "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" -) - -const ( - FelixPluginSrcPath = "/bin/felix-api-proxy" - FelixPluginDstPath = "/var/lib/calico/felix-plugins/felix-api-proxy" -) - -type SyncState int - -const ( - StateDisconnected SyncState = iota - StateConnected - StateSyncing - StateInSync ) -type NodeWatcherRestartError struct{} - -func (e NodeWatcherRestartError) Error() string { - return "node configuration changed, restarting" -} - // Server holds all the data required to configure the policies defined by felix in VPP type Server struct { - log *logrus.Entry - vpp *vpplink.VppLink - - state SyncState - nextSeqNumber uint64 - - endpointsLock sync.Mutex - endpointsInterfaces map[WorkloadEndpointID]map[string]uint32 - - configuredState *PolicyState - pendingState *PolicyState - - /* failSafe policies allow traffic on some ports irrespective of the policy */ - failSafePolicy *Policy - /* workloadToHost may drop traffic that goes from the pods to the host */ - workloadsToHostPolicy *Policy - defaultTap0IngressConf []uint32 - defaultTap0EgressConf []uint32 - /* always allow traffic coming from host to the pods (for healthchecks and so on) */ - // AllowFromHostPolicy persists the policy allowing host --> pod communications. - // See CreateAllowFromHostPolicy definition - AllowFromHostPolicy *Policy - // allPodsIpset persists the ipset containing all the workload endpoints (pods) addresses - allPodsIpset *IPSet - /* allow traffic between uplink/tunnels and tap interfaces */ - allowToHostPolicy *Policy - /* deny all policy for heps with no policies defined */ - ip4 *net.IP - ip6 *net.IP - interfacesMap map[string]interfaceDetails - - felixServerEventChan chan common.CalicoVppEvent - networkDefinitions map[string]*watchers.NetworkDefinition + log *logrus.Entry + vpp *vpplink.VppLink + cache *cache.Cache - tunnelSwIfIndexes map[uint32]bool - tunnelSwIfIndexesLock sync.Mutex + felixServerEventChan chan any felixConfigReceived bool - FelixConfigChan chan interface{} - felixConfig *felixConfig.Config + FelixConfigChan chan *felixConfig.Config - ippoolmap map[string]*proto.IPAMPool - ippoolLock sync.RWMutex - - nodeStatesByName map[string]*common.LocalNodeSpec - nodeByWGPublicKey map[string]string - - GotOurNodeBGPchan chan interface{} - GotOurNodeBGPchanOnce sync.Once + ippoolLock sync.RWMutex + policiesHandler *policies.PoliciesHandler } // NewFelixServer creates a felix server -func NewFelixServer(vpp *vpplink.VppLink, log *logrus.Entry) (*Server, error) { - var err error - +func NewFelixServer(vpp *vpplink.VppLink, clientv3 calicov3cli.Interface, log *logrus.Entry) *Server { + cache := cache.NewCache(log) server := &Server{ log: log, vpp: vpp, - state: StateDisconnected, - nextSeqNumber: 0, - - endpointsInterfaces: make(map[WorkloadEndpointID]map[string]uint32), + felixServerEventChan: make(chan any, common.ChanSize), - configuredState: NewPolicyState(), - pendingState: NewPolicyState(), - - felixServerEventChan: make(chan common.CalicoVppEvent, common.ChanSize), - - networkDefinitions: make(map[string]*watchers.NetworkDefinition), - - tunnelSwIfIndexes: make(map[uint32]bool), felixConfigReceived: false, - FelixConfigChan: make(chan interface{}), - felixConfig: felixConfig.New(), - - ippoolmap: make(map[string]*proto.IPAMPool), + FelixConfigChan: make(chan *felixConfig.Config), - nodeStatesByName: make(map[string]*common.LocalNodeSpec), - GotOurNodeBGPchan: make(chan interface{}), + cache: cache, + policiesHandler: policies.NewPoliciesHandler(vpp, cache, clientv3, log), } reg := common.RegisterHandler(server.felixServerEventChan, "felix server events") @@ -155,1240 +74,35 @@ func NewFelixServer(vpp *vpplink.VppLink, log *logrus.Entry) (*Server, error) { common.TunnelDeleted, common.NetAddedOrUpdated, common.NetDeleted, + common.ConnectivityAdded, + common.ConnectivityDeleted, + common.SRv6PolicyAdded, + common.SRv6PolicyDeleted, ) - server.interfacesMap, err = server.mapTagToInterfaceDetails() - if err != nil { - return nil, errors.Wrapf(err, "error in mapping uplink to tap interfaces") - } - - // Cleanup potentially left over socket - err = os.RemoveAll(config.FelixDataplaneSocket) - if err != nil { - return nil, errors.Wrapf(err, "Could not delete socket %s", config.FelixDataplaneSocket) - } - - return server, nil -} - -type interfaceDetails struct { - tapIndex uint32 - uplinkIndex uint32 - addresses []string -} - -func (s *Server) mapTagToInterfaceDetails() (tagIfDetails map[string]interfaceDetails, err error) { - tagIfDetails = make(map[string]interfaceDetails) - uplinkSwifindexes, err := s.vpp.SearchInterfacesWithTagPrefix("main-") - if err != nil { - return nil, err - } - tapSwifindexes, err := s.vpp.SearchInterfacesWithTagPrefix("host-") - if err != nil { - return nil, err - } - for intf, uplink := range uplinkSwifindexes { - tap, found := tapSwifindexes["host-"+intf[5:]] - if found { - ip4adds, err := s.vpp.AddrList(uplink, false) - if err != nil { - return nil, err - } - ip6adds, err := s.vpp.AddrList(uplink, true) - if err != nil { - return nil, err - } - adds := append(ip4adds, ip6adds...) - addresses := []string{} - for _, add := range adds { - addresses = append(addresses, add.IPNet.IP.String()) - } - tagIfDetails[intf[5:]] = interfaceDetails{tap, uplink, addresses} - } else { - return nil, errors.Errorf("uplink interface %d not corresponding to a tap interface", uplink) - } - } - return tagIfDetails, nil -} - -func InstallFelixPlugin() (err error) { - err = os.RemoveAll(FelixPluginDstPath) - if err != nil { - logrus.Warnf("Could not delete %s: %v", FelixPluginDstPath, err) - } - - in, err := os.Open(FelixPluginSrcPath) - if err != nil { - return errors.Wrap(err, "cannot open felix plugin to copy") - } - defer in.Close() - - out, err := os.OpenFile(FelixPluginDstPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) - if err != nil { - return errors.Wrap(err, "cannot open felix plugin to write") - } - defer func() { - cerr := out.Close() - if err == nil { - err = errors.Wrap(cerr, "cannot close felix plugin file") - } - }() - if _, err = io.Copy(out, in); err != nil { - return errors.Wrap(err, "cannot copy data") - } - err = out.Sync() - return errors.Wrapf(err, "could not sync felix plugin changes") -} - -func (s *Server) getEndpointToHostAction() types.RuleAction { - if strings.ToUpper(s.felixConfig.DefaultEndpointToHostAction) == "ACCEPT" { - return types.ActionAllow - } - return types.ActionDeny -} - -// workloadAdded is called by the CNI server when a container interface is created, -// either during startup when reconnecting the interfaces, or when a new pod is created -func (s *Server) workloadAdded(id *WorkloadEndpointID, swIfIndex uint32, ifName string, containerIPs []*net.IPNet) { - // TODO: Send WorkloadEndpointStatusUpdate to felix - s.endpointsLock.Lock() - defer s.endpointsLock.Unlock() - - intf, existing := s.endpointsInterfaces[*id] - - if existing { - for _, exInt := range intf { - if swIfIndex == exInt { - return - } - } - // VPP restarted and interfaces are being reconnected - s.log.Warnf("workload endpoint changed interfaces, did VPP restart? %v %v -> %d", id, intf, swIfIndex) - s.endpointsInterfaces[*id][ifName] = swIfIndex - } - - s.log.Infof("policy(add) Workload id=%v swIfIndex=%d", id, swIfIndex) - if s.endpointsInterfaces[*id] == nil { - s.endpointsInterfaces[*id] = map[string]uint32{ifName: swIfIndex} - } else { - s.endpointsInterfaces[*id][ifName] = swIfIndex - } - - if s.state == StateInSync { - wep, ok := s.configuredState.WorkloadEndpoints[*id] - if !ok { - s.log.Infof("not creating wep in workloadadded") - // Nothing to configure - } else { - s.log.Infof("creating wep in workloadadded") - err := wep.Create(s.vpp, []uint32{swIfIndex}, s.configuredState, id.Network) - if err != nil { - s.log.Errorf("Error processing workload addition: %s", err) - } - } - } - // EndpointToHostAction - allMembers := []string{} - for _, containerIP := range containerIPs { - allMembers = append(allMembers, containerIP.IP.String()) - } - err := s.allPodsIpset.AddMembers(allMembers, true, s.vpp) - if err != nil { - s.log.Errorf("Error processing workload addition: %s", err) - } -} - -// WorkloadRemoved is called by the CNI server when the interface of a pod is deleted -func (s *Server) WorkloadRemoved(id *WorkloadEndpointID, containerIPs []*net.IPNet) { - // TODO: Send WorkloadEndpointStatusRemove to felix - s.endpointsLock.Lock() - defer s.endpointsLock.Unlock() - - _, existing := s.endpointsInterfaces[*id] - if !existing { - s.log.Warnf("nonexistent workload endpoint removed %v", id) - return - } - s.log.Infof("policy(del) workload id=%v", id) - - if s.state == StateInSync { - wep, ok := s.configuredState.WorkloadEndpoints[*id] - if !ok { - // Nothing to clean up - } else { - err := wep.Delete(s.vpp) - if err != nil { - s.log.Errorf("Error processing workload removal: %s", err) - } - } - } - delete(s.endpointsInterfaces, *id) - // EndpointToHostAction - allMembers := []string{} - for _, containerIP := range containerIPs { - allMembers = append(allMembers, containerIP.IP.String()) - } - err := s.allPodsIpset.RemoveMembers(allMembers, true, s.vpp) - if err != nil { - s.log.Errorf("Error processing workload remove: %s", err) - } -} - -func (s *Server) handleFelixServerEvents(evt common.CalicoVppEvent) error { - /* Note: we will only receive events we ask for when registering the chan */ - switch evt.Type { - case common.NetAddedOrUpdated: - netDef, ok := evt.New.(*watchers.NetworkDefinition) - if !ok { - return fmt.Errorf("evt.New is not a (*watchers.NetworkDefinition) %v", evt.New) - } - s.networkDefinitions[netDef.Name] = netDef - case common.NetDeleted: - netDef, ok := evt.Old.(*watchers.NetworkDefinition) - if !ok { - return fmt.Errorf("evt.Old is not a (*watchers.NetworkDefinition) %v", evt.Old) - } - delete(s.networkDefinitions, netDef.Name) - case common.PodAdded: - podSpec, ok := evt.New.(*model.LocalPodSpec) - if !ok { - return fmt.Errorf("evt.New is not a (*model.LocalPodSpec) %v", evt.New) - } - swIfIndex := podSpec.TunTapSwIfIndex - if swIfIndex == vpplink.InvalidID { - swIfIndex = podSpec.MemifSwIfIndex - } - s.workloadAdded(&WorkloadEndpointID{ - OrchestratorID: podSpec.OrchestratorID, - WorkloadID: podSpec.WorkloadID, - EndpointID: podSpec.EndpointID, - Network: podSpec.NetworkName, - }, swIfIndex, podSpec.InterfaceName, podSpec.GetContainerIPs()) - case common.PodDeleted: - podSpec, ok := evt.Old.(*model.LocalPodSpec) - if !ok { - return fmt.Errorf("evt.Old is not a (*model.LocalPodSpec) %v", evt.Old) - } - if podSpec != nil { - s.WorkloadRemoved(&WorkloadEndpointID{ - OrchestratorID: podSpec.OrchestratorID, - WorkloadID: podSpec.WorkloadID, - EndpointID: podSpec.EndpointID, - Network: podSpec.NetworkName, - }, podSpec.GetContainerIPs()) - } - case common.TunnelAdded: - swIfIndex, ok := evt.New.(uint32) - if !ok { - return fmt.Errorf("evt.New not a uint32 %v", evt.New) - } - - s.tunnelSwIfIndexesLock.Lock() - s.tunnelSwIfIndexes[swIfIndex] = true - s.tunnelSwIfIndexesLock.Unlock() - - pending := true - switch s.state { - case StateSyncing, StateConnected: - case StateInSync: - pending = false - default: - return fmt.Errorf("got tunnel %d add but not in syncing or synced state", swIfIndex) - } - state := s.currentState(pending) - for _, h := range state.HostEndpoints { - err := h.handleTunnelChange(swIfIndex, true /* isAdd */, pending) - if err != nil { - return err - } - } - case common.TunnelDeleted: - swIfIndex, ok := evt.Old.(uint32) - if !ok { - return fmt.Errorf("evt.Old not a uint32 %v", evt.Old) - } - - s.tunnelSwIfIndexesLock.Lock() - delete(s.tunnelSwIfIndexes, swIfIndex) - s.tunnelSwIfIndexesLock.Unlock() - - pending := true - switch s.state { - case StateSyncing, StateConnected: - case StateInSync: - pending = false - default: - return fmt.Errorf("got tunnel %d del but not in syncing or synced state", swIfIndex) - } - state := s.currentState(pending) - for _, h := range state.HostEndpoints { - err := h.handleTunnelChange(swIfIndex, false /* isAdd */, pending) - if err != nil { - return err - } - } - } - return nil -} - -// Serve runs the felix server -func (s *Server) ServeFelix(t *tomb.Tomb) error { - s.log.Info("Starting felix server") - - listener, err := net.Listen("unix", config.FelixDataplaneSocket) - if err != nil { - return errors.Wrapf(err, "Could not bind to unix://%s", config.FelixDataplaneSocket) - } - defer func() { - listener.Close() - os.RemoveAll(config.FelixDataplaneSocket) - }() - err = s.createAllPodsIpset() - if err != nil { - return errors.Wrap(err, "Error in createallPodsIpset") - } - err = s.createEndpointToHostPolicy() - if err != nil { - return errors.Wrap(err, "Error in createEndpointToHostPolicy") - } - err = s.createAllowFromHostPolicy() - if err != nil { - return errors.Wrap(err, "Error in creating AllowFromHostPolicy") - } - err = s.createAllowToHostPolicy() - if err != nil { - return errors.Wrap(err, "Error in createAllowToHostPolicy") - } - err = s.createFailSafePolicies() - if err != nil { - return errors.Wrap(err, "Error in createFailSafePolicies") - } - for { - s.state = StateDisconnected - // Accept only one connection - conn, err := listener.Accept() - if err != nil { - return errors.Wrap(err, "cannot accept felix client connection") - } - s.log.Infof("Accepted connection from felix") - s.state = StateConnected - - felixUpdates := s.MessageReader(conn) - innerLoop: - for { - select { - case <-t.Dying(): - s.log.Warn("Felix server exiting") - err = conn.Close() - if err != nil { - s.log.WithError(err).Warn("Error closing unix connection to felix API proxy") - } - s.log.Infof("Waiting for SyncFelix to stop...") - return nil - case evt := <-s.felixServerEventChan: - err = s.handleFelixServerEvents(evt) - if err != nil { - s.log.WithError(err).Warn("Error handling FelixServerEvents") - } - // <-felixUpdates & handleFelixUpdate does the bulk of the policy sync job. It starts by reconciling the current - // configured state in VPP (empty at first) with what is sent by felix, and once both are in - // sync, it keeps processing felix updates. It also sends endpoint updates to felix when the - // CNI component adds or deletes container interfaces. - case msg, ok := <-felixUpdates: - if !ok { - s.log.Infof("Felix MessageReader closed") - break innerLoop - } - err = s.handleFelixUpdate(msg) - if err != nil { - switch err.(type) { - case NodeWatcherRestartError: - return err - default: - s.log.WithError(err).Error("Error processing update from felix, restarting") - // TODO: Restart VPP as well? State is left over there... - break innerLoop - } - } - } - } - err = conn.Close() - if err != nil { - s.log.WithError(err).Warn("Error closing unix connection to felix API proxy") - } - s.log.Infof("SyncFelix exited, reconnecting to felix") - } -} - -func (s *Server) handleFelixUpdate(msg interface{}) (err error) { - s.log.Debugf("Got message from felix: %#v", msg) - switch m := msg.(type) { - case *proto.ConfigUpdate: - err = s.handleConfigUpdate(m) - case *proto.InSync: - err = s.handleInSync(m) - default: - pending := true - switch s.state { - case StateSyncing: - case StateInSync: - pending = false - default: - return fmt.Errorf("got message %#v but not in syncing or synced state", m) - } - switch m := msg.(type) { - case *proto.IPSetUpdate: - err = s.handleIpsetUpdate(m, pending) - case *proto.IPSetDeltaUpdate: - err = s.handleIpsetDeltaUpdate(m, pending) - case *proto.IPSetRemove: - err = s.handleIpsetRemove(m, pending) - case *proto.ActivePolicyUpdate: - err = s.handleActivePolicyUpdate(m, pending) - case *proto.ActivePolicyRemove: - err = s.handleActivePolicyRemove(m, pending) - case *proto.ActiveProfileUpdate: - err = s.handleActiveProfileUpdate(m, pending) - case *proto.ActiveProfileRemove: - err = s.handleActiveProfileRemove(m, pending) - case *proto.HostEndpointUpdate: - err = s.handleHostEndpointUpdate(m, pending) - case *proto.HostEndpointRemove: - err = s.handleHostEndpointRemove(m, pending) - case *proto.WorkloadEndpointUpdate: - err = s.handleWorkloadEndpointUpdate(m, pending) - case *proto.WorkloadEndpointRemove: - err = s.handleWorkloadEndpointRemove(m, pending) - case *proto.HostMetadataUpdate: - err = s.handleHostMetadataUpdate(m, pending) - case *proto.HostMetadataRemove: - err = s.handleHostMetadataRemove(m, pending) - case *proto.HostMetadataV4V6Update: - err = s.handleHostMetadataV4V6Update(m, pending) - case *proto.HostMetadataV4V6Remove: - err = s.handleHostMetadataV4V6Remove(m, pending) - case *proto.IPAMPoolUpdate: - err = s.handleIpamPoolUpdate(m, pending) - case *proto.IPAMPoolRemove: - err = s.handleIpamPoolRemove(m, pending) - case *proto.ServiceAccountUpdate: - err = s.handleServiceAccountUpdate(m, pending) - case *proto.ServiceAccountRemove: - err = s.handleServiceAccountRemove(m, pending) - case *proto.NamespaceUpdate: - err = s.handleNamespaceUpdate(m, pending) - case *proto.NamespaceRemove: - err = s.handleNamespaceRemove(m, pending) - case *proto.GlobalBGPConfigUpdate: - err = s.handleGlobalBGPConfigUpdate(m, pending) - case *proto.WireguardEndpointUpdate: - err = s.handleWireguardEndpointUpdate(m, pending) - case *proto.WireguardEndpointRemove: - err = s.handleWireguardEndpointRemove(m, pending) - default: - s.log.Warnf("Unhandled message from felix: %v", m) - } - } - return err -} - -func (s *Server) currentState(pending bool) *PolicyState { - if pending { - return s.pendingState - } - return s.configuredState -} - -/** - * remove add the fields of type `file` we dont need and for which the - * parsing will fail - * - * This logic is extracted from `loadParams` in [0] - * [0] projectcalico/felix/config/config_params.go:Config - * it applies the regex only on the reflected struct definition, - * not on the live data. - * - **/ -func removeFelixConfigFileField(rawData map[string]string) { - config := felixConfig.Config{} - kind := reflect.TypeOf(config) - metaRegexp := regexp.MustCompile(`^([^;(]+)(?:\(([^)]*)\))?;` + - `([^;]*)(?:;` + - `([^;]*))?$`) - for ii := 0; ii < kind.NumField(); ii++ { - field := kind.Field(ii) - tag := field.Tag.Get("config") - if tag == "" { - continue - } - captures := metaRegexp.FindStringSubmatch(tag) - kind := captures[1] // Type: "int|oneof|bool|port-list|..." - if kind == "file" { - delete(rawData, field.Name) - } - } -} - -// the msg.Config map[string]string is the serialized object -// projectcalico/felix/config/config_params.go:Config -func (s *Server) handleConfigUpdate(msg *proto.ConfigUpdate) (err error) { - if s.state != StateConnected { - return fmt.Errorf("received ConfigUpdate but server is not in Connected state! state: %v", s.state) - } - s.log.Infof("Got config from felix: %+v", msg) - s.state = StateSyncing - - oldFelixConfig := s.felixConfig - removeFelixConfigFileField(msg.Config) - s.felixConfig = felixConfig.New() - _, err = s.felixConfig.UpdateFrom(msg.Config, felixConfig.InternalOverride) - if err != nil { - return err - } - changed := !reflect.DeepEqual(oldFelixConfig.RawValues(), s.felixConfig.RawValues()) - - // Note: This function will be called each time the Felix config changes. - // If we start handling config settings that require agent restart, - // we'll need to add a mechanism for that - if !s.felixConfigReceived { - s.felixConfigReceived = true - s.FelixConfigChan <- s.felixConfig - } - - if !changed { - return nil - } - - common.SendEvent(common.CalicoVppEvent{ - Type: common.FelixConfChanged, - New: s.felixConfig, - Old: oldFelixConfig, - }) - - if s.felixConfig.DefaultEndpointToHostAction != oldFelixConfig.DefaultEndpointToHostAction { - s.log.Infof("Change in EndpointToHostAction to %+v", s.getEndpointToHostAction()) - workloadsToHostAllowRule := &Rule{ - VppID: types.InvalidID, - Rule: &types.Rule{ - Action: s.getEndpointToHostAction(), - }, - SrcIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, - } - policy := s.workloadsToHostPolicy.DeepCopy() - policy.InboundRules = []*Rule{workloadsToHostAllowRule} - err := s.workloadsToHostPolicy.Update(s.vpp, policy, - &PolicyState{IPSets: map[string]*IPSet{"calico-vpp-wep-addr-ipset": s.allPodsIpset}}) - if err != nil { - return errors.Wrap(err, "error updating workloadsToHostPolicy") - } - } - if !protoPortListEqual(s.felixConfig.FailsafeInboundHostPorts, oldFelixConfig.FailsafeInboundHostPorts) || - !protoPortListEqual(s.felixConfig.FailsafeOutboundHostPorts, oldFelixConfig.FailsafeOutboundHostPorts) { - err = s.createFailSafePolicies() - if err != nil { - return errors.Wrap(err, "error updating FailSafePolicies") - } - } - - return nil -} - -func protoPortListEqual(a, b []felixConfig.ProtoPort) bool { - if len(a) != len(b) { - return false - } - for i, elemA := range a { - elemB := b[i] - if elemA.Net != elemB.Net { - return false - } - if elemA.Protocol != elemB.Protocol { - return false - } - if elemA.Port != elemB.Port { - return false - } - } - return true -} - -func (s *Server) handleInSync(msg *proto.InSync) (err error) { - if s.state != StateSyncing { - return fmt.Errorf("received InSync but state was not syncing") - } - s.endpointsLock.Lock() - defer s.endpointsLock.Unlock() - - s.state = StateInSync - s.log.Infof("Policies now in sync") - return s.applyPendingState() -} - -func (s *Server) handleIpsetUpdate(msg *proto.IPSetUpdate, pending bool) (err error) { - ips, err := fromIPSetUpdate(msg) - if err != nil { - return errors.Wrap(err, "cannot process IPSetUpdate") - } - state := s.currentState(pending) - _, ok := state.IPSets[msg.GetId()] - if ok { - return fmt.Errorf("received new ipset for ID %s that already exists", msg.GetId()) - } - if !pending { - err = ips.Create(s.vpp) - if err != nil { - return errors.Wrapf(err, "cannot create ipset %s", msg.GetId()) - } - } - state.IPSets[msg.GetId()] = ips - s.log.Debugf("Handled Ipset Update pending=%t id=%s %s", pending, msg.GetId(), ips) - return nil -} - -func (s *Server) handleIpsetDeltaUpdate(msg *proto.IPSetDeltaUpdate, pending bool) (err error) { - ips, ok := s.currentState(pending).IPSets[msg.GetId()] - if !ok { - return fmt.Errorf("received delta update for non-existent ipset") - } - err = ips.AddMembers(msg.GetAddedMembers(), !pending, s.vpp) - if err != nil { - return errors.Wrap(err, "cannot process ipset delta update") - } - err = ips.RemoveMembers(msg.GetRemovedMembers(), !pending, s.vpp) - if err != nil { - return errors.Wrap(err, "cannot process ipset delta update") - } - s.log.Debugf("Handled Ipset delta Update pending=%t id=%s %s", pending, msg.GetId(), ips) - return nil -} - -func (s *Server) handleIpsetRemove(msg *proto.IPSetRemove, pending bool) (err error) { - state := s.currentState(pending) - ips, ok := state.IPSets[msg.GetId()] - if !ok { - s.log.Warnf("Received ipset delete for ID %s that doesn't exists", msg.GetId()) - return nil - } - if !pending { - err = ips.Delete(s.vpp) - if err != nil { - return errors.Wrapf(err, "cannot delete ipset %s", msg.GetId()) - } - } - s.log.Debugf("Handled Ipset remove pending=%t id=%s %s", pending, msg.GetId(), ips) - delete(state.IPSets, msg.GetId()) - return nil -} - -func (s *Server) handleActivePolicyUpdate(msg *proto.ActivePolicyUpdate, pending bool) (err error) { - state := s.currentState(pending) - id := PolicyID{ - Tier: msg.Id.Tier, - Name: msg.Id.Name, - } - p, err := fromProtoPolicy(msg.Policy, "") - if err != nil { - return errors.Wrapf(err, "cannot process policy update") - } - - s.log.Infof("Handling ActivePolicyUpdate pending=%t id=%s %s", pending, id, p) - existing, ok := state.Policies[id] - if ok { // Policy with this ID already exists - if pending { - // Just replace policy in pending state - state.Policies[id] = p - } else { - err := existing.Update(s.vpp, p, state) - if err != nil { - return errors.Wrap(err, "cannot update policy") - } - } - } else { - // Create it in state - state.Policies[id] = p - if !pending { - err := p.Create(s.vpp, state) - if err != nil { - return errors.Wrap(err, "cannot create policy") - } - } - } - - for network := range s.networkDefinitions { - id := PolicyID{ - Tier: msg.Id.Tier, - Name: msg.Id.Name, - Network: network, - } - p, err := fromProtoPolicy(msg.Policy, network) - if err != nil { - return errors.Wrapf(err, "cannot process policy update") - } - - s.log.Infof("Handling ActivePolicyUpdate pending=%t id=%s %s", pending, id, p) - - existing, ok := state.Policies[id] - if ok { // Policy with this ID already exists - if pending { - // Just replace policy in pending state - state.Policies[id] = p - } else { - err := existing.Update(s.vpp, p, state) - if err != nil { - return errors.Wrap(err, "cannot update policy") - } - } - } else { - // Create it in state - state.Policies[id] = p - if !pending { - err := p.Create(s.vpp, state) - if err != nil { - return errors.Wrap(err, "cannot create policy") - } - } - } - - } - return nil -} - -func (s *Server) handleActivePolicyRemove(msg *proto.ActivePolicyRemove, pending bool) (err error) { - state := s.currentState(pending) - id := PolicyID{ - Tier: msg.Id.Tier, - Name: msg.Id.Name, - } - s.log.Infof("policy(del) Handling ActivePolicyRemove pending=%t id=%s", pending, id) - - for policyID := range state.Policies { - if policyID.Name == id.Name && policyID.Tier == id.Tier { - existing, ok := state.Policies[policyID] - if !ok { - s.log.Warnf("Received policy delete for Tier %s Name %s that doesn't exists", id.Tier, id.Name) - return nil - } - if !pending { - err = existing.Delete(s.vpp, state) - if err != nil { - return errors.Wrap(err, "error deleting policy") - } - } - delete(state.Policies, policyID) - } - } - return nil -} - -func (s *Server) handleActiveProfileUpdate(msg *proto.ActiveProfileUpdate, pending bool) (err error) { - state := s.currentState(pending) - id := msg.Id.Name - p, err := fromProtoProfile(msg.Profile) - if err != nil { - return errors.Wrapf(err, "cannot process profile update") - } - - existing, ok := state.Profiles[id] - if ok { // Policy with this ID already exists - if pending { - // Just replace policy in pending state - state.Profiles[id] = p - } else { - err := existing.Update(s.vpp, p, state) - if err != nil { - return errors.Wrap(err, "cannot update profile") - } - } - } else { - // Create it in state - state.Profiles[id] = p - if !pending { - err := p.Create(s.vpp, state) - if err != nil { - return errors.Wrap(err, "cannot create profile") - } - } - } - s.log.Infof("policy(upd) Handled Profile Update pending=%t id=%s existing=%s new=%s", pending, id, existing, p) - return nil -} - -func (s *Server) handleActiveProfileRemove(msg *proto.ActiveProfileRemove, pending bool) (err error) { - state := s.currentState(pending) - id := msg.Id.Name - existing, ok := state.Profiles[id] - if !ok { - s.log.Warnf("Received profile delete for Name %s that doesn't exists", id) - return nil - } - if !pending { - err = existing.Delete(s.vpp, state) - if err != nil { - return errors.Wrap(err, "error deleting profile") - } - } - s.log.Infof("policy(del) Handled Profile Remove pending=%t id=%s policy=%s", pending, id, existing) - delete(state.Profiles, id) - return nil + return server } -func (s *Server) getAllTunnelSwIfIndexes() (swIfIndexes []uint32) { - s.tunnelSwIfIndexesLock.Lock() - defer s.tunnelSwIfIndexesLock.Unlock() - - swIfIndexes = make([]uint32, 0) - for k := range s.tunnelSwIfIndexes { - swIfIndexes = append(swIfIndexes, k) - } - return swIfIndexes +func (s *Server) GetFelixServerEventChan() chan any { + return s.felixServerEventChan } -func (s *Server) handleHostEndpointUpdate(msg *proto.HostEndpointUpdate, pending bool) (err error) { - state := s.currentState(pending) - id := fromProtoHostEndpointID(msg.Id) - hep := fromProtoHostEndpoint(msg.Endpoint, s) - if hep.InterfaceName != "" && hep.InterfaceName != "*" { - interfaceDetails, found := s.interfacesMap[hep.InterfaceName] - if found { - hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, interfaceDetails.uplinkIndex) - hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, interfaceDetails.tapIndex) - } else { - // we are not supposed to fallback to expectedIPs if interfaceName doesn't match - // this is the current behavior in calico linux - s.log.Errorf("cannot find host endpoint: interface named %s does not exist", hep.InterfaceName) - } - } else if hep.InterfaceName == "" && hep.expectedIPs != nil { - for _, existingIf := range s.interfacesMap { - interfaceFound: - for _, address := range existingIf.addresses { - for _, expectedIP := range hep.expectedIPs { - if address == expectedIP { - hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, existingIf.uplinkIndex) - hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, existingIf.tapIndex) - break interfaceFound - } - } - } - } - } else if hep.InterfaceName == "*" { - for _, interfaceDetails := range s.interfacesMap { - hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, interfaceDetails.uplinkIndex) - hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, interfaceDetails.tapIndex) - } - } - hep.TunnelSwIfIndexes = s.getAllTunnelSwIfIndexes() - if len(hep.UplinkSwIfIndexes) == 0 || len(hep.TapSwIfIndexes) == 0 { - s.log.Warnf("No interface in vpp for host endpoint id=%s hep=%s", id.EndpointID, hep.String()) - return nil - } - - existing, found := state.HostEndpoints[*id] - if found { - if pending { - hep.currentForwardConf = existing.currentForwardConf - state.HostEndpoints[*id] = hep - } else { - err := existing.Update(s.vpp, hep, state) - if err != nil { - return errors.Wrap(err, "cannot update host endpoint") - } - } - s.log.Infof("policy(upd) Updating host endpoint id=%s found=%t existing=%s new=%s", *id, found, existing, hep) - } else { - state.HostEndpoints[*id] = hep - if !pending { - err := hep.Create(s.vpp, state) - if err != nil { - return errors.Wrap(err, "cannot create host endpoint") - } - } - s.log.Infof("policy(add) Updating host endpoint id=%s found=%t new=%s", *id, found, hep) - } - return nil +func (s *Server) GotOurNodeBGPchan() chan *common.LocalNodeSpec { + return s.policiesHandler.GotOurNodeBGPchan } -func (s *Server) handleHostEndpointRemove(msg *proto.HostEndpointRemove, pending bool) (err error) { - state := s.currentState(pending) - id := fromProtoHostEndpointID(msg.Id) - existing, ok := state.HostEndpoints[*id] - if !ok { - s.log.Warnf("Received host endpoint delete for id=%s that doesn't exists", id) - return nil - } - if !pending && len(existing.UplinkSwIfIndexes) != 0 { - err = existing.Delete(s.vpp, s.configuredState) - if err != nil { - return errors.Wrap(err, "error deleting host endpoint") - } - } - s.log.Infof("policy(del) Handled Host Endpoint Remove pending=%t id=%s %s", pending, id, existing) - delete(state.HostEndpoints, *id) - return nil -} - -func (s *Server) getAllWorkloadEndpointIdsFromUpdate(msg *proto.WorkloadEndpointUpdate) []*WorkloadEndpointID { - id := fromProtoEndpointID(msg.Id) - idsNetworks := []*WorkloadEndpointID{id} - netStatusesJSON, found := msg.Endpoint.Annotations["k8s.v1.cni.cncf.io/network-status"] - if !found { - s.log.Infof("no network status for pod, no multiple networks") - } else { - var netStatuses []nettypes.NetworkStatus - err := json.Unmarshal([]byte(netStatusesJSON), &netStatuses) - if err != nil { - s.log.Error(err) - } - for _, networkStatus := range netStatuses { - for netDefName, netDef := range s.networkDefinitions { - if networkStatus.Name == netDef.NetAttachDefs { - id := &WorkloadEndpointID{OrchestratorID: id.OrchestratorID, WorkloadID: id.WorkloadID, EndpointID: id.EndpointID, Network: netDefName} - idsNetworks = append(idsNetworks, id) - } - } - } - } - return idsNetworks -} - -func (s *Server) handleWorkloadEndpointUpdate(msg *proto.WorkloadEndpointUpdate, pending bool) (err error) { - s.endpointsLock.Lock() - defer s.endpointsLock.Unlock() - - state := s.currentState(pending) - idsNetworks := s.getAllWorkloadEndpointIdsFromUpdate(msg) - for _, id := range idsNetworks { - wep := fromProtoWorkload(msg.Endpoint, s) - existing, found := state.WorkloadEndpoints[*id] - swIfIndexMap, swIfIndexFound := s.endpointsInterfaces[*id] - - if found { - if pending || !swIfIndexFound { - state.WorkloadEndpoints[*id] = wep - s.log.Infof("policy(upd) Workload Endpoint Update pending=%t id=%s existing=%s new=%s swIf=??", pending, *id, existing, wep) - } else { - err := existing.Update(s.vpp, wep, state, id.Network) - if err != nil { - return errors.Wrap(err, "cannot update workload endpoint") - } - s.log.Infof("policy(upd) Workload Endpoint Update pending=%t id=%s existing=%s new=%s swIf=%v", pending, *id, existing, wep, swIfIndexMap) - } - } else { - state.WorkloadEndpoints[*id] = wep - if !pending && swIfIndexFound { - swIfIndexList := []uint32{} - for _, idx := range swIfIndexMap { - swIfIndexList = append(swIfIndexList, idx) - } - err := wep.Create(s.vpp, swIfIndexList, state, id.Network) - if err != nil { - return errors.Wrap(err, "cannot create workload endpoint") - } - s.log.Infof("policy(add) Workload Endpoint add pending=%t id=%s new=%s swIf=%v", pending, *id, wep, swIfIndexMap) - } else { - s.log.Infof("policy(add) Workload Endpoint add pending=%t id=%s new=%s swIf=??", pending, *id, wep) - } - } - } - return nil -} - -func (s *Server) handleWorkloadEndpointRemove(msg *proto.WorkloadEndpointRemove, pending bool) (err error) { - s.endpointsLock.Lock() - defer s.endpointsLock.Unlock() - - state := s.currentState(pending) - id := fromProtoEndpointID(msg.Id) - existing, ok := state.WorkloadEndpoints[*id] - if !ok { - s.log.Warnf("Received workload endpoint delete for %v that doesn't exists", id) - return nil - } - if !pending && len(existing.SwIfIndex) != 0 { - err = existing.Delete(s.vpp) - if err != nil { - return errors.Wrap(err, "error deleting workload endpoint") - } - } - s.log.Infof("policy(del) Handled Workload Endpoint Remove pending=%t id=%s existing=%s", pending, *id, existing) - delete(state.WorkloadEndpoints, *id) - for existingID := range state.WorkloadEndpoints { - if existingID.OrchestratorID == id.OrchestratorID && existingID.WorkloadID == id.WorkloadID { - if !pending && len(existing.SwIfIndex) != 0 { - err = existing.Delete(s.vpp) - if err != nil { - return errors.Wrap(err, "error deleting workload endpoint") - } - } - s.log.Infof("policy(del) Handled Workload Endpoint Remove pending=%t id=%s existing=%s", pending, existingID, existing) - delete(state.WorkloadEndpoints, existingID) - } - } - return nil -} - -func (s *Server) handleHostMetadataUpdate(msg *proto.HostMetadataUpdate, pending bool) (err error) { - s.log.Debugf("Ignoring HostMetadataUpdate") - return nil -} - -func (s *Server) handleHostMetadataRemove(msg *proto.HostMetadataRemove, pending bool) (err error) { - s.log.Debugf("Ignoring HostMetadataRemove") - return nil -} - -func (s *Server) handleWireguardEndpointUpdate(msg *proto.WireguardEndpointUpdate, pending bool) (err error) { - s.log.Infof("Received wireguard public key %+v", msg) - var old *common.NodeWireguardPublicKey - _, ok := s.nodeByWGPublicKey[msg.Hostname] - if ok { - old = &common.NodeWireguardPublicKey{Name: msg.Hostname, WireguardPublicKey: s.nodeByWGPublicKey[msg.Hostname]} - } else { - old = &common.NodeWireguardPublicKey{Name: msg.Hostname} - } - new := &common.NodeWireguardPublicKey{Name: msg.Hostname, WireguardPublicKey: msg.PublicKey} - common.SendEvent(common.CalicoVppEvent{ - Type: common.WireguardPublicKeyChanged, - Old: old, - New: new, - }) - return nil -} - -func (s *Server) handleWireguardEndpointRemove(msg *proto.WireguardEndpointRemove, pending bool) (err error) { - return nil -} - -func (s *Server) handleHostMetadataV4V6Update(msg *proto.HostMetadataV4V6Update, pending bool) (err error) { - localNodeSpec, err := common.NewLocalNodeSpec(msg) - if err != nil { - return errors.Wrapf(err, "handleHostMetadataV4V6Update errored") - } - old, found := s.nodeStatesByName[localNodeSpec.Name] - - if localNodeSpec.Name == *config.NodeName && - (localNodeSpec.IPv4Address != nil || localNodeSpec.IPv6Address != nil) { - /* We found a BGP Spec that seems valid enough */ - s.GotOurNodeBGPchanOnce.Do(func() { - s.GotOurNodeBGPchan <- localNodeSpec - }) - if localNodeSpec.IPv4Address != nil { - s.ip4 = &localNodeSpec.IPv4Address.IP - } - if localNodeSpec.IPv6Address != nil { - s.ip6 = &localNodeSpec.IPv6Address.IP - } - err = s.createAllowFromHostPolicy() - if err != nil { - return errors.Wrap(err, "Error in creating AllowFromHostPolicy") - } - err = s.createAllowToHostPolicy() - if err != nil { - return errors.Wrap(err, "Error in createAllowToHostPolicy") - } - } - - // This is used by the routing server to process Wireguard key updates - // As a result we only send an event when a node is updated, not when it is added or deleted - common.SendEvent(common.CalicoVppEvent{ - Type: common.PeerNodeStateChanged, - Old: old, - New: localNodeSpec, - }) - - if !found { - s.configureRemoteNodeSnat(localNodeSpec, true /* isAdd */) - } else { - change := common.GetIPNetChangeType(old.IPv4Address, localNodeSpec.IPv4Address) | common.GetIPNetChangeType(old.IPv6Address, localNodeSpec.IPv6Address) - if change&(common.ChangeDeleted|common.ChangeUpdated) != 0 && localNodeSpec.Name == *config.NodeName { - // restart if our BGP config changed - return NodeWatcherRestartError{} - } - if change != common.ChangeSame { - s.configureRemoteNodeSnat(old, false /* isAdd */) - s.configureRemoteNodeSnat(localNodeSpec, true /* isAdd */) - } - } - - s.nodeStatesByName[localNodeSpec.Name] = localNodeSpec - return nil -} - -func (s *Server) configureRemoteNodeSnat(node *common.LocalNodeSpec, isAdd bool) { - if node.IPv4Address != nil { - err := s.vpp.CnatAddDelSnatPrefix(common.ToMaxLenCIDR(node.IPv4Address.IP), isAdd) - if err != nil { - s.log.Errorf("error configuring snat prefix for current node (%v): %v", node.IPv4Address.IP, err) - } - } - if node.IPv6Address != nil { - err := s.vpp.CnatAddDelSnatPrefix(common.ToMaxLenCIDR(node.IPv6Address.IP), isAdd) - if err != nil { - s.log.Errorf("error configuring snat prefix for current node (%v): %v", node.IPv6Address.IP, err) - } - } -} - -func (s *Server) handleHostMetadataV4V6Remove(msg *proto.HostMetadataV4V6Remove, pending bool) (err error) { - old, found := s.nodeStatesByName[msg.Hostname] - if !found { - return fmt.Errorf("node %s to delete not found", msg.Hostname) - } - - common.SendEvent(common.CalicoVppEvent{ - Type: common.PeerNodeStateChanged, - Old: old, - }) - if old.Name == *config.NodeName { - // restart if our BGP config changed - return NodeWatcherRestartError{} - } - - s.configureRemoteNodeSnat(old, false /* isAdd */) - return nil -} - -func (s *Server) handleIpamPoolUpdate(msg *proto.IPAMPoolUpdate, pending bool) (err error) { - if msg.GetId() == "" { - s.log.Debugf("Empty pool") - return nil - } - s.ippoolLock.Lock() - defer s.ippoolLock.Unlock() - - newIpamPool := msg.GetPool() - oldIpamPool, found := s.ippoolmap[msg.GetId()] - if found && ipamPoolEquals(newIpamPool, oldIpamPool) { - s.log.Infof("Unchanged pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) - return nil - } else if found { - s.log.Infof("Updating pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) - s.ippoolmap[msg.GetId()] = newIpamPool - if newIpamPool.GetCidr() != oldIpamPool.GetCidr() || - newIpamPool.GetMasquerade() != oldIpamPool.GetMasquerade() { - var err, err2 error - err = s.addDelSnatPrefix(oldIpamPool, false /* isAdd */) - err2 = s.addDelSnatPrefix(newIpamPool, true /* isAdd */) - if err != nil || err2 != nil { - return errors.Errorf("error updating snat prefix del:%s, add:%s", err, err2) - } - common.SendEvent(common.CalicoVppEvent{ - Type: common.IpamConfChanged, - Old: ipamPoolCopy(oldIpamPool), - New: ipamPoolCopy(newIpamPool), - }) - } - } else { - s.log.Infof("Adding pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) - s.ippoolmap[msg.GetId()] = newIpamPool - s.log.Debugf("Pool %v Added, handler called", msg) - err = s.addDelSnatPrefix(newIpamPool, true /* isAdd */) - if err != nil { - return errors.Wrap(err, "error handling ipam add") - } - common.SendEvent(common.CalicoVppEvent{ - Type: common.IpamConfChanged, - Old: nil, - New: ipamPoolCopy(newIpamPool), - }) - } - return nil -} - -func (s *Server) handleIpamPoolRemove(msg *proto.IPAMPoolRemove, pending bool) (err error) { - if msg.GetId() == "" { - s.log.Debugf("Empty pool") - return nil - } - - s.ippoolLock.Lock() - defer s.ippoolLock.Unlock() - oldIpamPool, found := s.ippoolmap[msg.GetId()] - if found { - delete(s.ippoolmap, msg.GetId()) - s.log.Infof("Deleting pool: %s", msg.GetId()) - s.log.Debugf("Pool %s deleted, handler called", oldIpamPool.Cidr) - err = s.addDelSnatPrefix(oldIpamPool, false /* isAdd */) - if err != nil { - return errors.Wrap(err, "error handling ipam deletion") - } - common.SendEvent(common.CalicoVppEvent{ - Type: common.IpamConfChanged, - Old: ipamPoolCopy(oldIpamPool), - New: nil, - }) - } else { - s.log.Warnf("Deleting unknown ippool") - return nil - } - return nil -} - -func ipamPoolCopy(ipamPool *proto.IPAMPool) *proto.IPAMPool { - if ipamPool != nil { - return &proto.IPAMPool{ - Cidr: ipamPool.Cidr, - Masquerade: ipamPool.Masquerade, - IpipMode: ipamPool.IpipMode, - VxlanMode: ipamPool.VxlanMode, - } - } - return nil -} - -// Compare only the fields that make a difference for this agent i.e. the fields that have an impact on routing -func ipamPoolEquals(a *proto.IPAMPool, b *proto.IPAMPool) bool { - if (a == nil || b == nil) && a != b { - return false - } - if a.Cidr != b.Cidr { - return false - } - if a.IpipMode != b.IpipMode { - return false - } - if a.VxlanMode != b.VxlanMode { - return false - } - return true +func (s *Server) GetCache() *cache.Cache { + return s.cache } -// addDelSnatPrefix configures IP Pool prefixes so that we don't source-NAT the packets going -// to these addresses. All the IP Pools prefixes are configured that way so that pod <-> pod -// communications are never source-nated in the cluster -// Note(aloaugus) - I think the iptables dataplane behaves differently and uses the k8s level -// pod CIDR for this rather than the individual pool prefixes -func (s *Server) addDelSnatPrefix(pool *proto.IPAMPool, isAdd bool) (err error) { - _, ipNet, err := net.ParseCIDR(pool.GetCidr()) - if err != nil { - return errors.Wrapf(err, "Couldn't parse pool CIDR %s", pool.Cidr) - } - err = s.vpp.CnatAddDelSnatPrefix(ipNet, isAdd) - if err != nil { - return errors.Wrapf(err, "Couldn't configure SNAT prefix") - } - return nil +func (s *Server) SetBGPConf(bgpConf *calicov3.BGPConfigurationSpec) { + s.cache.BGPConf = bgpConf } -// match checks whether we have an IP pool which contains the given prefix. -// If we have, it returns the pool. func (s *Server) GetPrefixIPPool(prefix *net.IPNet) *proto.IPAMPool { s.ippoolLock.RLock() defer s.ippoolLock.RUnlock() - for _, pool := range s.ippoolmap { - in, err := ipamPoolContains(pool, prefix) - if err != nil { - s.log.Warnf("ipamPoolContains errored: %v", err) - continue - } - if in { - return pool - } - } - s.log.Warnf("No pool found for %s", prefix) - for k, pool := range s.ippoolmap { - s.log.Debugf("Available %s=%v", k, pool) - } - return nil + return s.cache.GetPrefixIPPool(prefix) } func (s *Server) IPNetNeedsSNAT(prefix *net.IPNet) bool { @@ -1400,360 +114,150 @@ func (s *Server) IPNetNeedsSNAT(prefix *net.IPNet) bool { } } -// ipamPoolContains returns true if the IPPool contains 'prefix' -func ipamPoolContains(pool *proto.IPAMPool, prefix *net.IPNet) (bool, error) { - _, poolCIDR, _ := net.ParseCIDR(pool.GetCidr()) // this field is validated so this should never error - poolCIDRLen, poolCIDRBits := poolCIDR.Mask.Size() - prefixLen, prefixBits := prefix.Mask.Size() - return poolCIDRBits == prefixBits && poolCIDR.Contains(prefix.IP) && prefixLen >= poolCIDRLen, nil -} - -func (s *Server) handleServiceAccountUpdate(msg *proto.ServiceAccountUpdate, pending bool) (err error) { - s.log.Debugf("Ignoring ServiceAccountUpdate") - return nil -} - -func (s *Server) handleServiceAccountRemove(msg *proto.ServiceAccountRemove, pending bool) (err error) { - s.log.Debugf("Ignoring ServiceAccountRemove") - return nil -} - -func (s *Server) handleNamespaceUpdate(msg *proto.NamespaceUpdate, pending bool) (err error) { - s.log.Debugf("Ignoring NamespaceUpdate") - return nil -} - -func (s *Server) handleNamespaceRemove(msg *proto.NamespaceRemove, pending bool) (err error) { - s.log.Debugf("Ignoring NamespaceRemove") - return nil -} - -func (s *Server) handleGlobalBGPConfigUpdate(msg *proto.GlobalBGPConfigUpdate, pending bool) (err error) { - s.log.Infof("Got GlobalBGPConfigUpdate") - common.SendEvent(common.CalicoVppEvent{ - Type: common.BGPConfChanged, - }) - return nil -} - -// Reconciles the pending state with the configured state -func (s *Server) applyPendingState() (err error) { - s.log.Infof("Reconciliating pending policy state with configured state") - // Stupid algorithm for now, delete all that is in configured state, and then recreate everything - for _, wep := range s.configuredState.WorkloadEndpoints { - if len(wep.SwIfIndex) != 0 { - err = wep.Delete(s.vpp) - if err != nil { - return errors.Wrap(err, "cannot cleanup workload endpoint") - } - } - } - for _, policy := range s.configuredState.Policies { - err = policy.Delete(s.vpp, s.configuredState) - if err != nil { - s.log.Warnf("error deleting policy: %v", err) - } - } - for _, profile := range s.configuredState.Profiles { - err = profile.Delete(s.vpp, s.configuredState) - if err != nil { - s.log.Warnf("error deleting profile: %v", err) - } - } - for _, ipset := range s.configuredState.IPSets { - err = ipset.Delete(s.vpp) - if err != nil { - s.log.Warnf("error deleting ipset: %v", err) - } - } - for _, hep := range s.configuredState.HostEndpoints { - if len(hep.UplinkSwIfIndexes) != 0 { - err = hep.Delete(s.vpp, s.configuredState) - if err != nil { - s.log.Warnf("error deleting hostendpoint : %v", err) - } - } - } +// Serve runs the felix server +// it does the bulk of the policy sync job. It starts by reconciling the current +// configured state in VPP (empty at first) with what is sent by felix, and once both are in +// sync, it keeps processing felix updates. It also sends endpoint updates to felix when the +// CNI component adds or deletes container interfaces. +func (s *Server) ServeFelix(t *tomb.Tomb) error { + s.log.Info("Starting felix server") - s.configuredState = s.pendingState - s.pendingState = NewPolicyState() - for _, ipset := range s.configuredState.IPSets { - err = ipset.Create(s.vpp) - if err != nil { - return errors.Wrap(err, "error creating ipset") - } - } - for _, profile := range s.configuredState.Profiles { - err = profile.Create(s.vpp, s.configuredState) - if err != nil { - return errors.Wrap(err, "error creating profile") - } - } - for _, policy := range s.configuredState.Policies { - err = policy.Create(s.vpp, s.configuredState) - if err != nil { - return errors.Wrap(err, "error creating policy") - } + err := s.policiesHandler.PoliciesHandlerInit() + if err != nil { + return errors.Wrap(err, "Error in PoliciesHandlerInit") } - for id, wep := range s.configuredState.WorkloadEndpoints { - intf, intfFound := s.endpointsInterfaces[id] - if intfFound { - swIfIndexList := []uint32{} - for _, idx := range intf { - swIfIndexList = append(swIfIndexList, idx) - } - err = wep.Create(s.vpp, swIfIndexList, s.configuredState, id.Network) + for { + select { + case <-t.Dying(): + s.log.Warn("Felix server exiting") + return nil + case msg := <-s.felixServerEventChan: + err = s.handleFelixServerEvents(msg) if err != nil { - return errors.Wrap(err, "cannot configure workload endpoint") + return errors.Wrapf(err, "Error handling FelixServerEvents") } } } - for _, hep := range s.configuredState.HostEndpoints { - err = hep.Create(s.vpp, s.configuredState) - if err != nil { - return errors.Wrap(err, "cannot create host endpoint") - } - } - s.log.Infof("Reconciliation done") - return nil } -func (s *Server) createAllowToHostPolicy() (err error) { - s.log.Infof("Creating policy to allow traffic to host that is applied on uplink") - ruleIn := &Rule{ - VppID: types.InvalidID, - RuleID: "calicovpp-internal-allowtohost", - Rule: &types.Rule{ - Action: types.ActionAllow, - DstNet: []net.IPNet{}, - }, - } - ruleOut := &Rule{ - VppID: types.InvalidID, - RuleID: "calicovpp-internal-allowtohost", - Rule: &types.Rule{ - Action: types.ActionAllow, - SrcNet: []net.IPNet{}, - }, - } - if s.ip4 != nil { - ruleIn.DstNet = append(ruleIn.DstNet, *common.FullyQualified(*s.ip4)) - ruleOut.SrcNet = append(ruleOut.SrcNet, *common.FullyQualified(*s.ip4)) - } - if s.ip6 != nil { - ruleIn.DstNet = append(ruleIn.DstNet, *common.FullyQualified(*s.ip6)) - ruleOut.SrcNet = append(ruleOut.SrcNet, *common.FullyQualified(*s.ip6)) - } - - allowToHostPolicy := &Policy{ - Policy: &types.Policy{}, - VppID: types.InvalidID, - } - allowToHostPolicy.InboundRules = append(allowToHostPolicy.InboundRules, ruleIn) - allowToHostPolicy.OutboundRules = append(allowToHostPolicy.OutboundRules, ruleOut) - if s.allowToHostPolicy == nil { - err = allowToHostPolicy.Create(s.vpp, nil) - } else { - allowToHostPolicy.VppID = s.allowToHostPolicy.VppID - err = s.allowToHostPolicy.Update(s.vpp, allowToHostPolicy, nil) - } - s.allowToHostPolicy = allowToHostPolicy - if err != nil { - return errors.Wrap(err, "cannot create policy to allow traffic to host") - } - s.log.Infof("Created policy to allow traffic to host with ID: %+v", s.allowToHostPolicy.VppID) - return nil -} - -func (s *Server) createAllPodsIpset() (err error) { - ipset := NewIPSet() - err = ipset.Create(s.vpp) - if err != nil { - return err - } - s.allPodsIpset = ipset - return nil -} - -// createAllowFromHostPolicy creates a policy allowing host->pod communications. This is needed -// to maintain vanilla Calico's behavior where the host can always reach pods. -// This policy is applied in Egress on the host endpoint tap (i.e. linux -> VPP) -// and on the Ingress of Workload endpoints (i.e. VPP -> pod) -func (s *Server) createAllowFromHostPolicy() (err error) { - s.log.Infof("Creating rules to allow traffic from host to pods with egress policies") - ruleOut := &Rule{ - VppID: types.InvalidID, - RuleID: "calicovpp-internal-egressallowfromhost", - Rule: &types.Rule{ - Action: types.ActionAllow, - }, - DstIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, - } - ps := PolicyState{IPSets: map[string]*IPSet{"calico-vpp-wep-addr-ipset": s.allPodsIpset}} - s.log.Infof("Creating rules to allow traffic from host to pods with ingress policies") - ruleIn := &Rule{ - VppID: types.InvalidID, - RuleID: "calicovpp-internal-ingressallowfromhost", - Rule: &types.Rule{ - Action: types.ActionAllow, - SrcNet: []net.IPNet{}, - }, - } - if s.ip4 != nil { - ruleIn.SrcNet = append(ruleIn.SrcNet, *common.FullyQualified(*s.ip4)) - } - if s.ip6 != nil { - ruleIn.SrcNet = append(ruleIn.SrcNet, *common.FullyQualified(*s.ip6)) - } - - allowFromHostPolicy := &Policy{ - Policy: &types.Policy{}, - VppID: types.InvalidID, - } - allowFromHostPolicy.OutboundRules = append(allowFromHostPolicy.OutboundRules, ruleOut) - allowFromHostPolicy.InboundRules = append(allowFromHostPolicy.InboundRules, ruleIn) - if s.AllowFromHostPolicy == nil { - err = allowFromHostPolicy.Create(s.vpp, &ps) - } else { - allowFromHostPolicy.VppID = s.AllowFromHostPolicy.VppID - err = s.AllowFromHostPolicy.Update(s.vpp, allowFromHostPolicy, &ps) - } - s.AllowFromHostPolicy = allowFromHostPolicy - if err != nil { - return errors.Wrap(err, "cannot create policy to allow traffic from host to pods") - } - s.log.Infof("Created allow from host to pods traffic with ID: %+v", s.AllowFromHostPolicy.VppID) - return nil -} - -func (s *Server) createEndpointToHostPolicy( /*may be return*/ ) (err error) { - workloadsToHostPolicy := &Policy{ - Policy: &types.Policy{}, - VppID: types.InvalidID, - } - workloadsToHostRule := &Rule{ - VppID: types.InvalidID, - Rule: &types.Rule{ - Action: s.getEndpointToHostAction(), - }, - SrcIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, - } - ps := PolicyState{IPSets: map[string]*IPSet{"calico-vpp-wep-addr-ipset": s.allPodsIpset}} - workloadsToHostPolicy.InboundRules = append(workloadsToHostPolicy.InboundRules, workloadsToHostRule) - - err = workloadsToHostPolicy.Create(s.vpp, &ps) - if err != nil { - return err - } - s.workloadsToHostPolicy = workloadsToHostPolicy - - conf := types.NewInterfaceConfig() - conf.IngressPolicyIDs = append(conf.IngressPolicyIDs, s.workloadsToHostPolicy.VppID) - conf.PolicyDefaultTx = npol.NPOL_DEFAULT_ALLOW - conf.PolicyDefaultRx = npol.NPOL_DEFAULT_ALLOW - swifindexes, err := s.vpp.SearchInterfacesWithTagPrefix("host-") // tap0 interfaces - if err != nil { - s.log.Error(err) - } - for _, swifindex := range swifindexes { - err = s.vpp.ConfigurePolicies(uint32(swifindex), conf, 0) - if err != nil { - s.log.Error("cannot create policy to drop traffic to host") - } - } - s.defaultTap0IngressConf = conf.IngressPolicyIDs - s.defaultTap0EgressConf = conf.EgressPolicyIDs - return nil -} - -// createFailSafePolicies ensures the failsafe policies defined in the Felixconfiguration exist in VPP. -// check https://github.com/projectcalico/calico/blob/master/felix/rules/static.go :: failsafeInChain for the linux implementation -// To be noted. This does not implement the doNotTrack case as we do not yet support doNotTrack policies. -func (s *Server) createFailSafePolicies() (err error) { - failSafePol := &Policy{ - Policy: &types.Policy{}, - VppID: types.InvalidID, - } - - if len(s.felixConfig.FailsafeInboundHostPorts) != 0 { - for _, protoPort := range s.felixConfig.FailsafeInboundHostPorts { - protocol, err := parseProtocol(&proto.Protocol{NumberOrName: &proto.Protocol_Name{Name: protoPort.Protocol}}) - if err != nil { - s.log.WithError(err).Error("Failed to parse protocol in inbound failsafe rule. Skipping failsafe rule") - continue +func (s *Server) handleFelixServerEvents(msg interface{}) (err error) { + s.log.Debugf("Got message from felix: %#v", msg) + switch evt := msg.(type) { + case *proto.ConfigUpdate: + err = s.handleConfigUpdate(evt) + case *proto.InSync: + err = s.policiesHandler.OnInSync(evt) + case *common.FelixSocketStateChanged: + s.policiesHandler.OnFelixSocketStateChanged(evt) + case *proto.IPSetUpdate: + err = s.policiesHandler.OnIpsetUpdate(evt) + case *proto.IPSetDeltaUpdate: + err = s.policiesHandler.OnIpsetDeltaUpdate(evt) + case *proto.IPSetRemove: + err = s.policiesHandler.OnIpsetRemove(evt) + case *proto.ActivePolicyUpdate: + err = s.policiesHandler.OnActivePolicyUpdate(evt) + case *proto.ActivePolicyRemove: + err = s.policiesHandler.OnActivePolicyRemove(evt) + case *proto.ActiveProfileUpdate: + err = s.policiesHandler.OnActiveProfileUpdate(evt) + case *proto.ActiveProfileRemove: + err = s.policiesHandler.OnActiveProfileRemove(evt) + case *proto.HostEndpointUpdate: + err = s.policiesHandler.OnHostEndpointUpdate(evt) + case *proto.HostEndpointRemove: + err = s.policiesHandler.OnHostEndpointRemove(evt) + case *proto.WorkloadEndpointUpdate: + err = s.policiesHandler.OnWorkloadEndpointUpdate(evt) + case *proto.WorkloadEndpointRemove: + err = s.policiesHandler.OnWorkloadEndpointRemove(evt) + case *proto.HostMetadataUpdate: + s.log.Debugf("Ignoring HostMetadataUpdate") + case *proto.HostMetadataRemove: + s.log.Debugf("Ignoring HostMetadataRemove") + case *proto.HostMetadataV4V6Update: + err = s.policiesHandler.OnHostMetadataV4V6Update(evt) + case *proto.HostMetadataV4V6Remove: + err = s.policiesHandler.OnHostMetadataV4V6Remove(evt) + case *proto.IPAMPoolUpdate: + err = s.handleIpamPoolUpdate(evt) + case *proto.IPAMPoolRemove: + err = s.handleIpamPoolRemove(evt) + case *proto.ServiceAccountUpdate: + s.log.Debugf("Ignoring ServiceAccountUpdate") + case *proto.ServiceAccountRemove: + s.log.Debugf("Ignoring ServiceAccountRemove") + case *proto.NamespaceUpdate: + s.log.Debugf("Ignoring NamespaceUpdate") + case *proto.NamespaceRemove: + s.log.Debugf("Ignoring NamespaceRemove") + case *proto.GlobalBGPConfigUpdate: + s.log.Infof("Got GlobalBGPConfigUpdate") + common.SendEvent(common.CalicoVppEvent{ + Type: common.BGPConfChanged, + }) + case common.CalicoVppEvent: + /* Note: we will only receive events we ask for when registering the chan */ + switch evt.Type { + case common.NetAddedOrUpdated: + new, ok := evt.New.(*common.NetworkDefinition) + if !ok { + return fmt.Errorf("evt.New is not a (*common.NetworkDefinition) %v", evt.New) } - rule := &Rule{ - VppID: types.InvalidID, - RuleID: fmt.Sprintf("failsafe-in-%s-%s-%d", protoPort.Net, protoPort.Protocol, protoPort.Port), - Rule: &types.Rule{ - Action: types.ActionAllow, - // Ports are always filtered on the destination of packets - DstPortRange: []types.PortRange{{First: protoPort.Port, Last: protoPort.Port}}, - Filters: []types.RuleFilter{{ - ShouldMatch: true, - Type: types.NpolFilterProto, - Value: int(protocol), - }}, - }, + s.cache.NetworkDefinitions[new.Name] = new + s.cache.Networks[new.Vni] = new + case common.NetDeleted: + netDef, ok := evt.Old.(*common.NetworkDefinition) + if !ok { + return fmt.Errorf("evt.Old is not a (*common.NetworkDefinition) %v", evt.Old) } - if protoPort.Net != "" { - _, protoPortNet, err := net.ParseCIDR(protoPort.Net) - if err != nil { - s.log.WithError(err).Error("Failed to parse CIDR in inbound failsafe rule. Skipping failsafe rule") - continue - } - // Inbound packets are checked for where they come FROM - rule.SrcNet = append(rule.SrcNet, *protoPortNet) + delete(s.cache.NetworkDefinitions, netDef.Name) + delete(s.cache.Networks, netDef.Vni) + case common.PodAdded: + podSpec, ok := evt.New.(*model.LocalPodSpec) + if !ok { + return fmt.Errorf("evt.New is not a (*model.LocalPodSpec) %v", evt.New) } - failSafePol.InboundRules = append(failSafePol.InboundRules, rule) - } - } - - if len(s.felixConfig.FailsafeOutboundHostPorts) != 0 { - for _, protoPort := range s.felixConfig.FailsafeOutboundHostPorts { - protocol, err := parseProtocol(&proto.Protocol{NumberOrName: &proto.Protocol_Name{Name: protoPort.Protocol}}) - if err != nil { - s.log.WithError(err).Error("Failed to parse protocol in outbound failsafe rule. Skipping failsafe rule") - continue + swIfIndex := podSpec.TunTapSwIfIndex + if swIfIndex == vpplink.InvalidID { + swIfIndex = podSpec.MemifSwIfIndex } - rule := &Rule{ - VppID: types.InvalidID, - RuleID: fmt.Sprintf("failsafe-out-%s-%s-%d", protoPort.Net, protoPort.Protocol, protoPort.Port), - Rule: &types.Rule{ - Action: types.ActionAllow, - // Ports are always filtered on the destination of packets - DstPortRange: []types.PortRange{{First: protoPort.Port, Last: protoPort.Port}}, - Filters: []types.RuleFilter{{ - ShouldMatch: true, - Type: types.NpolFilterProto, - Value: int(protocol), - }}, - }, + s.policiesHandler.OnWorkloadAdded(&policies.WorkloadEndpointID{ + OrchestratorID: podSpec.OrchestratorID, + WorkloadID: podSpec.WorkloadID, + EndpointID: podSpec.EndpointID, + Network: podSpec.NetworkName, + }, swIfIndex, podSpec.InterfaceName, podSpec.GetContainerIPs()) + case common.PodDeleted: + podSpec, ok := evt.Old.(*model.LocalPodSpec) + if !ok { + return fmt.Errorf("evt.Old is not a (*model.LocalPodSpec) %v", evt.Old) + } + if podSpec != nil { + s.policiesHandler.OnWorkloadRemoved(&policies.WorkloadEndpointID{ + OrchestratorID: podSpec.OrchestratorID, + WorkloadID: podSpec.WorkloadID, + EndpointID: podSpec.EndpointID, + Network: podSpec.NetworkName, + }, podSpec.GetContainerIPs()) + } + case common.TunnelAdded: + swIfIndex, ok := evt.New.(uint32) + if !ok { + return fmt.Errorf("evt.New not a uint32 %v", evt.New) } - if protoPort.Net != "" { - _, protoPortNet, err := net.ParseCIDR(protoPort.Net) - if err != nil { - s.log.WithError(err).Error("Failed to parse CIDR in outbound failsafe rule. Skipping failsafe rule") - continue - } - // Outbound packets are checked for where they go TO - rule.DstNet = append(rule.DstNet, *protoPortNet) + s.policiesHandler.OnTunnelAdded(swIfIndex) + case common.TunnelDeleted: + swIfIndex, ok := evt.Old.(uint32) + if !ok { + return fmt.Errorf("evt.Old not a uint32 %v", evt.Old) } - failSafePol.OutboundRules = append(failSafePol.OutboundRules, rule) + s.policiesHandler.OnTunnelDelete(swIfIndex) + default: + s.log.Warnf("Unhandled CalicoVppEvent.Type: %s", evt.Type) } + default: + s.log.Warnf("Unhandled message from felix: %v", evt) } - - if s.failSafePolicy == nil { - err = failSafePol.Create(s.vpp, nil) - - } else { - failSafePol.VppID = s.failSafePolicy.VppID - err = s.failSafePolicy.Update(s.vpp, failSafePol, nil) - } - if err != nil { - return err - } - s.failSafePolicy = failSafePol - s.log.Infof("Created failsafe policy with ID %+v", s.failSafePolicy.VppID) - return nil + return err } diff --git a/calico-vpp-agent/felix/felixconfig.go b/calico-vpp-agent/felix/felixconfig.go new file mode 100644 index 00000000..1b650f8a --- /dev/null +++ b/calico-vpp-agent/felix/felixconfig.go @@ -0,0 +1,89 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package felix + +import ( + "reflect" + "regexp" + + felixConfig "github.com/projectcalico/calico/felix/config" + + "github.com/projectcalico/calico/felix/proto" +) + +/** + * remove add the fields of type `file` we dont need and for which the + * parsing will fail + * + * This logic is extracted from `loadParams` in [0] + * [0] projectcalico/felix/config/config_params.go:Config + * it applies the regex only on the reflected struct definition, + * not on the live data. + * + **/ +func removeFelixConfigFileField(rawData map[string]string) { + config := felixConfig.Config{} + kind := reflect.TypeOf(config) + metaRegexp := regexp.MustCompile(`^([^;(]+)(?:\(([^)]*)\))?;` + + `([^;]*)(?:;` + + `([^;]*))?$`) + for ii := 0; ii < kind.NumField(); ii++ { + field := kind.Field(ii) + tag := field.Tag.Get("config") + if tag == "" { + continue + } + captures := metaRegexp.FindStringSubmatch(tag) + kind := captures[1] // Type: "int|oneof|bool|port-list|..." + if kind == "file" { + delete(rawData, field.Name) + } + } +} + +// the msg.Config map[string]string is the serialized object +// projectcalico/felix/config/config_params.go:Config +func (s *Server) handleConfigUpdate(msg *proto.ConfigUpdate) (err error) { + s.log.Infof("Got config from felix: %+v", msg) + + oldFelixConfig := s.cache.FelixConfig + removeFelixConfigFileField(msg.Config) + s.cache.FelixConfig = felixConfig.New() + _, err = s.cache.FelixConfig.UpdateFrom(msg.Config, felixConfig.InternalOverride) + if err != nil { + return err + } + changed := !reflect.DeepEqual( + oldFelixConfig.RawValues(), + s.cache.FelixConfig.RawValues(), + ) + + // Note: This function will be called each time the Felix config changes. + // If we start handling config settings that require agent restart, + // we'll need to add a mechanism for that + if !s.felixConfigReceived { + s.felixConfigReceived = true + s.FelixConfigChan <- s.cache.FelixConfig + } + + if !changed { + return nil + } + + s.policiesHandler.OnFelixConfChanged(oldFelixConfig, s.cache.FelixConfig) + + return nil +} diff --git a/calico-vpp-agent/felix/ipam.go b/calico-vpp-agent/felix/ipam.go new file mode 100644 index 00000000..9d5b26e1 --- /dev/null +++ b/calico-vpp-agent/felix/ipam.go @@ -0,0 +1,146 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package felix + +import ( + "net" + + "github.com/pkg/errors" + + "github.com/projectcalico/calico/felix/proto" + + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" +) + +func (s *Server) handleIpamPoolUpdate(msg *proto.IPAMPoolUpdate) (err error) { + if msg.GetId() == "" { + s.log.Debugf("Empty pool") + return nil + } + s.ippoolLock.Lock() + defer s.ippoolLock.Unlock() + + newIpamPool := msg.GetPool() + oldIpamPool, found := s.cache.IPPoolMap[msg.GetId()] + if found && ipamPoolEquals(newIpamPool, oldIpamPool) { + s.log.Infof("Unchanged pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) + return nil + } else if found { + s.log.Infof("Updating pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) + s.cache.IPPoolMap[msg.GetId()] = newIpamPool + if newIpamPool.GetCidr() != oldIpamPool.GetCidr() || + newIpamPool.GetMasquerade() != oldIpamPool.GetMasquerade() { + var err, err2 error + err = s.addDelSnatPrefix(oldIpamPool, false /* isAdd */) + err2 = s.addDelSnatPrefix(newIpamPool, true /* isAdd */) + if err != nil || err2 != nil { + return errors.Errorf("error updating snat prefix del:%s, add:%s", err, err2) + } + common.SendEvent(common.CalicoVppEvent{ + Type: common.IpamConfChanged, + Old: ipamPoolCopy(oldIpamPool), + New: ipamPoolCopy(newIpamPool), + }) + } + } else { + s.log.Infof("Adding pool: %s, nat:%t", msg.GetId(), newIpamPool.GetMasquerade()) + s.cache.IPPoolMap[msg.GetId()] = newIpamPool + s.log.Debugf("Pool %v Added, handler called", msg) + err = s.addDelSnatPrefix(newIpamPool, true /* isAdd */) + if err != nil { + return errors.Wrap(err, "error handling ipam add") + } + common.SendEvent(common.CalicoVppEvent{ + Type: common.IpamConfChanged, + New: ipamPoolCopy(newIpamPool), + }) + } + return nil +} + +func (s *Server) handleIpamPoolRemove(msg *proto.IPAMPoolRemove) (err error) { + if msg.GetId() == "" { + s.log.Debugf("Empty pool") + return nil + } + s.ippoolLock.Lock() + defer s.ippoolLock.Unlock() + oldIpamPool, found := s.cache.IPPoolMap[msg.GetId()] + if found { + delete(s.cache.IPPoolMap, msg.GetId()) + s.log.Infof("Deleting pool: %s", msg.GetId()) + s.log.Debugf("Pool %s deleted, handler called", oldIpamPool.Cidr) + err = s.addDelSnatPrefix(oldIpamPool, false /* isAdd */) + if err != nil { + return errors.Wrap(err, "error handling ipam deletion") + } + common.SendEvent(common.CalicoVppEvent{ + Type: common.IpamConfChanged, + Old: ipamPoolCopy(oldIpamPool), + New: nil, + }) + } else { + s.log.Warnf("Deleting unknown ippool") + return nil + } + return nil +} + +func ipamPoolCopy(ipamPool *proto.IPAMPool) *proto.IPAMPool { + if ipamPool != nil { + return &proto.IPAMPool{ + Cidr: ipamPool.Cidr, + Masquerade: ipamPool.Masquerade, + IpipMode: ipamPool.IpipMode, + VxlanMode: ipamPool.VxlanMode, + } + } + return nil +} + +// Compare only the fields that make a difference for this agent i.e. the fields that have an impact on routing +func ipamPoolEquals(a *proto.IPAMPool, b *proto.IPAMPool) bool { + if (a == nil || b == nil) && a != b { + return false + } + if a.Cidr != b.Cidr { + return false + } + if a.IpipMode != b.IpipMode { + return false + } + if a.VxlanMode != b.VxlanMode { + return false + } + return true +} + +// addDelSnatPrefix configures IP Pool prefixes so that we don't source-NAT the packets going +// to these addresses. All the IP Pools prefixes are configured that way so that pod <-> pod +// communications are never source-nated in the cluster +// Note(aloaugus) - I think the iptables dataplane behaves differently and uses the k8s level +// pod CIDR for this rather than the individual pool prefixes +func (s *Server) addDelSnatPrefix(pool *proto.IPAMPool, isAdd bool) (err error) { + _, ipNet, err := net.ParseCIDR(pool.GetCidr()) + if err != nil { + return errors.Wrapf(err, "Couldn't parse pool CIDR %s", pool.Cidr) + } + err = s.vpp.CnatAddDelSnatPrefix(ipNet, isAdd) + if err != nil { + return errors.Wrapf(err, "Couldn't configure SNAT prefix") + } + return nil +} diff --git a/calico-vpp-agent/felix/host_endpoint.go b/calico-vpp-agent/felix/policies/host_endpoint.go similarity index 51% rename from calico-vpp-agent/felix/host_endpoint.go rename to calico-vpp-agent/felix/policies/host_endpoint.go index 3cf93d06..3e9e46e5 100644 --- a/calico-vpp-agent/felix/host_endpoint.go +++ b/calico-vpp-agent/felix/policies/host_endpoint.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package policies import ( "fmt" @@ -21,7 +21,6 @@ import ( "github.com/pkg/errors" "github.com/projectcalico/calico/felix/proto" - "github.com/projectcalico/vpp-dataplane/v3/vpplink" "github.com/projectcalico/vpp-dataplane/v3/vpplink/generated/bindings/npol" "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" ) @@ -41,16 +40,15 @@ type HostEndpoint struct { Profiles []string Tiers []Tier ForwardTiers []Tier - server *Server InterfaceName string - expectedIPs []string + ExpectedIPs []string - currentForwardConf *types.InterfaceConfig + CurrentForwardConf *types.InterfaceConfig } func (h *HostEndpoint) String() string { s := fmt.Sprintf("ifName=%s", h.InterfaceName) - s += types.StrListToString(" expectedIPs=", h.expectedIPs) + s += types.StrListToString(" ExpectedIPs=", h.ExpectedIPs) s += types.IntListToString(" uplink=", h.UplinkSwIfIndexes) s += types.IntListToString(" tap=", h.TapSwIfIndexes) s += types.IntListToString(" tunnel=", h.TunnelSwIfIndexes) @@ -60,23 +58,22 @@ func (h *HostEndpoint) String() string { return s } -func fromProtoHostEndpointID(ep *proto.HostEndpointID) *HostEndpointID { +func FromProtoHostEndpointID(ep *proto.HostEndpointID) *HostEndpointID { return &HostEndpointID{ EndpointID: ep.EndpointId, } } -func fromProtoHostEndpoint(hep *proto.HostEndpoint, server *Server) *HostEndpoint { +func FromProtoHostEndpoint(hep *proto.HostEndpoint) (*HostEndpoint, error) { r := &HostEndpoint{ Profiles: hep.ProfileIds, - server: server, UplinkSwIfIndexes: []uint32{}, TapSwIfIndexes: []uint32{}, TunnelSwIfIndexes: []uint32{}, InterfaceName: hep.Name, Tiers: make([]Tier, 0), ForwardTiers: make([]Tier, 0), - expectedIPs: append(hep.ExpectedIpv4Addrs, hep.ExpectedIpv6Addrs...), + ExpectedIPs: append(hep.ExpectedIpv4Addrs, hep.ExpectedIpv6Addrs...), } for _, tier := range hep.Tiers { r.Tiers = append(r.Tiers, Tier{ @@ -94,45 +91,15 @@ func fromProtoHostEndpoint(hep *proto.HostEndpoint, server *Server) *HostEndpoin } for _, tier := range hep.PreDnatTiers { if tier != nil { - server.log.Error("Existing PreDnatTiers, not implemented") + return nil, fmt.Errorf("existing PreDnatTiers, not implemented") } } for _, tier := range hep.UntrackedTiers { if tier != nil { - server.log.Error("Existing UntrackedTiers, not implemented") + return nil, fmt.Errorf("existing UntrackedTiers, not implemented") } } - return r -} - -func (h *HostEndpoint) handleTunnelChange(swIfIndex uint32, isAdd bool, pending bool) (err error) { - if isAdd { - newTunnel := true - for _, v := range h.TunnelSwIfIndexes { - if v == swIfIndex { - newTunnel = false - } - } - if newTunnel { - h.TunnelSwIfIndexes = append(h.TunnelSwIfIndexes, swIfIndex) - h.server.log.Infof("Configuring policies on added tunnel [%d]", swIfIndex) - if !pending { - h.server.log.Infof("policy(upd) interface swif=%d", swIfIndex) - err = h.server.vpp.ConfigurePolicies(swIfIndex, h.currentForwardConf, 1 /*invertRxTx*/) - if err != nil { - return errors.Wrapf(err, "cannot configure policies on tunnel interface %d", swIfIndex) - } - } - } - } else { // delete case - for index, existingSwifindex := range h.TunnelSwIfIndexes { - if existingSwifindex == swIfIndex { - // we don't delete the policies because they are auto-deleted when interfaces are removed - h.TunnelSwIfIndexes = append(h.TunnelSwIfIndexes[:index], h.TunnelSwIfIndexes[index+1:]...) - } - } - } - return err + return r, nil } func (h *HostEndpoint) getUserDefinedPolicies(state *PolicyState, tiers []Tier) (conf *types.InterfaceConfig, err error) { @@ -172,18 +139,16 @@ func (h *HostEndpoint) getUserDefinedPolicies(state *PolicyState, tiers []Tier) return conf, nil } -/* - This function creates the interface configuration for the host, applied on the vpptap0 - interface i.e. the tap interface from VPP to the host - that we use as controlpoint for HostEndpoint implementation - We have an implicit workloadsToHostPolicy policy that controls the traffic from - workloads to their host: it is defined by felixConfig.DefaultEndpointToHostAction - We have an implicit failsafe rules policy defined by felixConfig as well. - - If there are no policies the default should be pass to profiles - If there are policies the default should be deny (profiles are ignored) -*/ -func (h *HostEndpoint) getTapPolicies(state *PolicyState) (conf *types.InterfaceConfig, err error) { +// This function creates the interface configuration for the host, applied on the vpptap0 +// interface i.e. the tap interface from VPP to the host +// that we use as controlpoint for HostEndpoint implementation +// We have an implicit workloadsToHostPolicy policy that controls the traffic from +// workloads to their host: it is defined by felixConfig.DefaultEndpointToHostAction +// We have an implicit failsafe rules policy defined by felixConfig as well. +// +// If there are no policies the default should be pass to profiles +// If there are policies the default should be deny (profiles are ignored) +func (s *PoliciesHandler) getTapPolicies(h *HostEndpoint, state *PolicyState) (conf *types.InterfaceConfig, err error) { conf, err = h.getUserDefinedPolicies(state, h.Tiers) if err != nil { return nil, errors.Wrap(err, "cannot create host policies for TapConf") @@ -194,7 +159,7 @@ func (h *HostEndpoint) getTapPolicies(state *PolicyState) (conf *types.Interface // (except for traffic allowed by failsafe rules). // note: this applies to ingress and egress separately, so if you don't have // ingress only you drop ingress - conf.IngressPolicyIDs = []uint32{h.server.workloadsToHostPolicy.VppID, h.server.failSafePolicy.VppID} + conf.IngressPolicyIDs = []uint32{s.workloadsToHostPolicy.VppID, s.failSafePolicy.VppID} conf.PolicyDefaultTx = npol.NPOL_DEFAULT_DENY } else { if len(conf.IngressPolicyIDs) > 0 { @@ -202,11 +167,11 @@ func (h *HostEndpoint) getTapPolicies(state *PolicyState) (conf *types.Interface } else if len(conf.ProfileIDs) > 0 { conf.PolicyDefaultTx = npol.NPOL_DEFAULT_PASS } - conf.IngressPolicyIDs = append([]uint32{h.server.failSafePolicy.VppID}, conf.IngressPolicyIDs...) - conf.IngressPolicyIDs = append([]uint32{h.server.workloadsToHostPolicy.VppID}, conf.IngressPolicyIDs...) + conf.IngressPolicyIDs = append([]uint32{s.failSafePolicy.VppID}, conf.IngressPolicyIDs...) + conf.IngressPolicyIDs = append([]uint32{s.workloadsToHostPolicy.VppID}, conf.IngressPolicyIDs...) } if len(conf.EgressPolicyIDs) == 0 && len(conf.ProfileIDs) == 0 { - conf.EgressPolicyIDs = []uint32{h.server.AllowFromHostPolicy.VppID, h.server.failSafePolicy.VppID} + conf.EgressPolicyIDs = []uint32{s.AllowFromHostPolicy.VppID, s.failSafePolicy.VppID} conf.PolicyDefaultRx = npol.NPOL_DEFAULT_DENY } else { if len(conf.EgressPolicyIDs) > 0 { @@ -214,25 +179,25 @@ func (h *HostEndpoint) getTapPolicies(state *PolicyState) (conf *types.Interface } else if len(conf.ProfileIDs) > 0 { conf.PolicyDefaultRx = npol.NPOL_DEFAULT_PASS } - conf.EgressPolicyIDs = append([]uint32{h.server.failSafePolicy.VppID}, conf.EgressPolicyIDs...) - conf.EgressPolicyIDs = append([]uint32{h.server.AllowFromHostPolicy.VppID}, conf.EgressPolicyIDs...) + conf.EgressPolicyIDs = append([]uint32{s.failSafePolicy.VppID}, conf.EgressPolicyIDs...) + conf.EgressPolicyIDs = append([]uint32{s.AllowFromHostPolicy.VppID}, conf.EgressPolicyIDs...) } return conf, nil } -func (h *HostEndpoint) getForwardPolicies(state *PolicyState) (conf *types.InterfaceConfig, err error) { +func (s *PoliciesHandler) getForwardPolicies(h *HostEndpoint, state *PolicyState) (conf *types.InterfaceConfig, err error) { conf, err = h.getUserDefinedPolicies(state, h.ForwardTiers) if err != nil { return nil, errors.Wrap(err, "cannot create host policies for forwardConf") } if len(conf.EgressPolicyIDs) > 0 { - conf.EgressPolicyIDs = append([]uint32{h.server.allowToHostPolicy.VppID}, conf.EgressPolicyIDs...) + conf.EgressPolicyIDs = append([]uint32{s.allowToHostPolicy.VppID}, conf.EgressPolicyIDs...) conf.PolicyDefaultRx = npol.NPOL_DEFAULT_DENY } else if len(conf.ProfileIDs) > 0 { conf.PolicyDefaultRx = npol.NPOL_DEFAULT_PASS } if len(conf.IngressPolicyIDs) > 0 { - conf.IngressPolicyIDs = append([]uint32{h.server.allowToHostPolicy.VppID}, conf.IngressPolicyIDs...) + conf.IngressPolicyIDs = append([]uint32{s.allowToHostPolicy.VppID}, conf.IngressPolicyIDs...) conf.PolicyDefaultTx = npol.NPOL_DEFAULT_DENY } else if len(conf.ProfileIDs) > 0 { conf.PolicyDefaultTx = npol.NPOL_DEFAULT_PASS @@ -240,26 +205,26 @@ func (h *HostEndpoint) getForwardPolicies(state *PolicyState) (conf *types.Inter return conf, nil } -func (h *HostEndpoint) Create(vpp *vpplink.VppLink, state *PolicyState) (err error) { - forwardConf, err := h.getForwardPolicies(state) +func (s *PoliciesHandler) CreateHostEndpoint(h *HostEndpoint, state *PolicyState) (err error) { + forwardConf, err := s.getForwardPolicies(h, state) if err != nil { return err } for _, swIfIndex := range append(h.UplinkSwIfIndexes, h.TunnelSwIfIndexes...) { - h.server.log.Infof("policy(add) interface swif=%d conf=%v", swIfIndex, forwardConf) - err = vpp.ConfigurePolicies(swIfIndex, forwardConf, 1 /*invertRxTx*/) + s.log.Infof("policy(add) interface swif=%d conf=%v", swIfIndex, forwardConf) + err = s.vpp.ConfigurePolicies(swIfIndex, forwardConf, 1 /*invertRxTx*/) if err != nil { return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) } } - h.currentForwardConf = forwardConf - tapConf, err := h.getTapPolicies(state) + h.CurrentForwardConf = forwardConf + tapConf, err := s.getTapPolicies(h, state) if err != nil { return err } for _, swIfIndex := range h.TapSwIfIndexes { - h.server.log.Infof("policy(add) interface swif=%d conf=%v", swIfIndex, tapConf) - err = vpp.ConfigurePolicies(swIfIndex, tapConf, 0) + s.log.Infof("policy(add) interface swif=%d conf=%v", swIfIndex, tapConf) + err = s.vpp.ConfigurePolicies(swIfIndex, tapConf, 0) if err != nil { return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) } @@ -267,26 +232,26 @@ func (h *HostEndpoint) Create(vpp *vpplink.VppLink, state *PolicyState) (err err return nil } -func (h *HostEndpoint) Update(vpp *vpplink.VppLink, new *HostEndpoint, state *PolicyState) (err error) { - forwardConf, err := new.getForwardPolicies(state) +func (s *PoliciesHandler) UpdateHostEndpoint(h *HostEndpoint, new *HostEndpoint, state *PolicyState) (err error) { + forwardConf, err := s.getForwardPolicies(new, state) if err != nil { return err } for _, swIfIndex := range append(h.UplinkSwIfIndexes, h.TunnelSwIfIndexes...) { - h.server.log.Infof("policy(upd) interface swif=%d conf=%v", swIfIndex, forwardConf) - err = vpp.ConfigurePolicies(swIfIndex, forwardConf, 1 /* invertRxTx */) + s.log.Infof("policy(upd) interface swif=%d conf=%v", swIfIndex, forwardConf) + err = s.vpp.ConfigurePolicies(swIfIndex, forwardConf, 1 /* invertRxTx */) if err != nil { return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) } } - h.currentForwardConf = forwardConf - tapConf, err := new.getTapPolicies(state) + h.CurrentForwardConf = forwardConf + tapConf, err := s.getTapPolicies(new, state) if err != nil { return err } for _, swIfIndex := range h.TapSwIfIndexes { - h.server.log.Infof("policy(upd) interface swif=%d conf=%v", swIfIndex, tapConf) - err = vpp.ConfigurePolicies(swIfIndex, tapConf, 0) + s.log.Infof("policy(upd) interface swif=%d conf=%v", swIfIndex, tapConf) + err = s.vpp.ConfigurePolicies(swIfIndex, tapConf, 0) if err != nil { return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) } @@ -298,22 +263,22 @@ func (h *HostEndpoint) Update(vpp *vpplink.VppLink, new *HostEndpoint, state *Po return nil } -func (h *HostEndpoint) Delete(vpp *vpplink.VppLink, state *PolicyState) (err error) { +func (s *PoliciesHandler) DeleteHostEndpoint(h *HostEndpoint, state *PolicyState) (err error) { for _, swIfIndex := range append(h.UplinkSwIfIndexes, h.TunnelSwIfIndexes...) { // Unconfigure forward policies - h.server.log.Infof("policy(del) interface swif=%d", swIfIndex) - err = vpp.ConfigurePolicies(swIfIndex, types.NewInterfaceConfig(), 0) + s.log.Infof("policy(del) interface swif=%d", swIfIndex) + err = s.vpp.ConfigurePolicies(swIfIndex, types.NewInterfaceConfig(), 0) if err != nil { return errors.Wrapf(err, "cannot unconfigure policies on interface %d", swIfIndex) } } for _, swIfIndex := range h.TapSwIfIndexes { // Unconfigure tap0 policies - h.server.log.Infof("policy(del) interface swif=%d", swIfIndex) + s.log.Infof("policy(del) interface swif=%d", swIfIndex) conf := types.NewInterfaceConfig() - conf.IngressPolicyIDs = h.server.defaultTap0IngressConf - conf.EgressPolicyIDs = h.server.defaultTap0EgressConf - err = vpp.ConfigurePolicies(swIfIndex, conf, 0) + conf.IngressPolicyIDs = s.defaultTap0IngressConf + conf.EgressPolicyIDs = s.defaultTap0EgressConf + err = s.vpp.ConfigurePolicies(swIfIndex, conf, 0) if err != nil { return errors.Wrapf(err, "cannot unconfigure policies on interface %d", swIfIndex) } @@ -323,3 +288,97 @@ func (h *HostEndpoint) Delete(vpp *vpplink.VppLink, state *PolicyState) (err err h.TunnelSwIfIndexes = []uint32{} return nil } + +func (s *PoliciesHandler) getAllTunnelSwIfIndexes() (swIfIndexes []uint32) { + swIfIndexes = make([]uint32, 0) + for k := range s.tunnelSwIfIndexes { + swIfIndexes = append(swIfIndexes, k) + } + return swIfIndexes +} + +func (s *PoliciesHandler) OnHostEndpointUpdate(msg *proto.HostEndpointUpdate) (err error) { + state := s.GetState() + id := FromProtoHostEndpointID(msg.Id) + hep, err := FromProtoHostEndpoint(msg.Endpoint) + if err != nil { + return err + } + if hep.InterfaceName != "" && hep.InterfaceName != "*" { + interfaceDetails, found := s.interfacesMap[hep.InterfaceName] + if found { + hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, interfaceDetails.uplinkIndex) + hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, interfaceDetails.tapIndex) + } else { + // we are not supposed to fallback to expectedIPs if interfaceName doesn't match + // this is the current behavior in calico linux + s.log.Errorf("cannot find host endpoint: interface named %s does not exist", hep.InterfaceName) + } + } else if hep.InterfaceName == "" && hep.ExpectedIPs != nil { + for _, existingIf := range s.interfacesMap { + interfaceFound: + for _, address := range existingIf.addresses { + for _, expectedIP := range hep.ExpectedIPs { + if address == expectedIP { + hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, existingIf.uplinkIndex) + hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, existingIf.tapIndex) + break interfaceFound + } + } + } + } + } else if hep.InterfaceName == "*" { + for _, interfaceDetails := range s.interfacesMap { + hep.UplinkSwIfIndexes = append(hep.UplinkSwIfIndexes, interfaceDetails.uplinkIndex) + hep.TapSwIfIndexes = append(hep.TapSwIfIndexes, interfaceDetails.tapIndex) + } + } + hep.TunnelSwIfIndexes = s.getAllTunnelSwIfIndexes() + if len(hep.UplinkSwIfIndexes) == 0 || len(hep.TapSwIfIndexes) == 0 { + s.log.Warnf("No interface in vpp for host endpoint id=%s hep=%s", id.EndpointID, hep.String()) + return nil + } + + existing, found := state.HostEndpoints[*id] + if found { + if s.state.IsPending() { + hep.CurrentForwardConf = existing.CurrentForwardConf + state.HostEndpoints[*id] = hep + } else { + err := s.UpdateHostEndpoint(existing, hep, state) + if err != nil { + return errors.Wrap(err, "cannot update host endpoint") + } + } + s.log.Infof("policy(upd) Updating host endpoint id=%s found=%t existing=%s new=%s", *id, found, existing, hep) + } else { + state.HostEndpoints[*id] = hep + if !s.state.IsPending() { + err := s.CreateHostEndpoint(hep, state) + if err != nil { + return errors.Wrap(err, "cannot create host endpoint") + } + } + s.log.Infof("policy(add) Updating host endpoint id=%s found=%t new=%s", *id, found, hep) + } + return nil +} + +func (s *PoliciesHandler) OnHostEndpointRemove(msg *proto.HostEndpointRemove) (err error) { + state := s.GetState() + id := FromProtoHostEndpointID(msg.Id) + existing, ok := state.HostEndpoints[*id] + if !ok { + s.log.Warnf("Received host endpoint delete for id=%s that doesn't exists", id) + return nil + } + if !s.state.IsPending() && len(existing.UplinkSwIfIndexes) != 0 { + err = s.DeleteHostEndpoint(existing, s.configuredState) + if err != nil { + return errors.Wrap(err, "error deleting host endpoint") + } + } + s.log.Infof("policy(del) Handled Host Endpoint Remove pending=%t id=%s %s", s.state.IsPending(), id, existing) + delete(state.HostEndpoints, *id) + return nil +} diff --git a/calico-vpp-agent/felix/policies/hostmetadata.go b/calico-vpp-agent/felix/policies/hostmetadata.go new file mode 100644 index 00000000..f2f29df9 --- /dev/null +++ b/calico-vpp-agent/felix/policies/hostmetadata.go @@ -0,0 +1,115 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policies + +import ( + "fmt" + + "github.com/pkg/errors" + "github.com/projectcalico/calico/felix/proto" + + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" + "github.com/projectcalico/vpp-dataplane/v3/config" +) + +type NodeWatcherRestartError struct{} + +func (e NodeWatcherRestartError) Error() string { + return "node configuration changed, restarting" +} + +func (s *PoliciesHandler) OnHostMetadataV4V6Update(msg *proto.HostMetadataV4V6Update) (err error) { + localNodeSpec, err := common.NewLocalNodeSpec(msg) + if err != nil { + return errors.Wrapf(err, "OnHostMetadataV4V6Update errored") + } + old, found := s.cache.NodeStatesByName[localNodeSpec.Name] + + if localNodeSpec.Name == *config.NodeName && + (localNodeSpec.IPv4Address != nil || localNodeSpec.IPv6Address != nil) { + /* We found a BGP Spec that seems valid enough */ + s.GotOurNodeBGPchanOnce.Do(func() { + s.GotOurNodeBGPchan <- localNodeSpec + }) + err = s.createAllowFromHostPolicy() + if err != nil { + return errors.Wrap(err, "Error in creating AllowFromHostPolicy") + } + err = s.createAllowToHostPolicy() + if err != nil { + return errors.Wrap(err, "Error in createAllowToHostPolicy") + } + } + + // This is used by the routing server to process Wireguard key updates + // As a result we only send an event when a node is updated, not when it is added or deleted + common.SendEvent(common.CalicoVppEvent{ + Type: common.PeerNodeStateChanged, + Old: old, + New: localNodeSpec, + }) + + if !found { + s.configureRemoteNodeSnat(localNodeSpec, true /* isAdd */) + } else { + change := common.GetIPNetChangeType(old.IPv4Address, localNodeSpec.IPv4Address) | common.GetIPNetChangeType(old.IPv6Address, localNodeSpec.IPv6Address) + if change&(common.ChangeDeleted|common.ChangeUpdated) != 0 && localNodeSpec.Name == *config.NodeName { + // restart if our BGP config changed + return NodeWatcherRestartError{} + } + if change != common.ChangeSame { + s.configureRemoteNodeSnat(old, false /* isAdd */) + s.configureRemoteNodeSnat(localNodeSpec, true /* isAdd */) + } + } + + s.cache.NodeStatesByName[localNodeSpec.Name] = localNodeSpec + return nil +} + +func (s *PoliciesHandler) OnHostMetadataV4V6Remove(msg *proto.HostMetadataV4V6Remove) (err error) { + old, found := s.cache.NodeStatesByName[msg.Hostname] + if !found { + return fmt.Errorf("node %s to delete not found", msg.Hostname) + } + + common.SendEvent(common.CalicoVppEvent{ + Type: common.PeerNodeStateChanged, + Old: old, + }) + if old.Name == *config.NodeName { + // restart if our BGP config changed + return NodeWatcherRestartError{} + } + + s.configureRemoteNodeSnat(old, false /* isAdd */) + return nil +} + +func (s *PoliciesHandler) configureRemoteNodeSnat(node *common.LocalNodeSpec, isAdd bool) { + if node.IPv4Address != nil { + err := s.vpp.CnatAddDelSnatPrefix(common.ToMaxLenCIDR(node.IPv4Address.IP), isAdd) + if err != nil { + s.log.Errorf("error configuring snat prefix for current node (%v): %v", node.IPv4Address.IP, err) + } + } + if node.IPv6Address != nil { + err := s.vpp.CnatAddDelSnatPrefix(common.ToMaxLenCIDR(node.IPv6Address.IP), isAdd) + if err != nil { + s.log.Errorf("error configuring snat prefix for current node (%v): %v", node.IPv6Address.IP, err) + } + } +} diff --git a/calico-vpp-agent/felix/ipset.go b/calico-vpp-agent/felix/policies/ipset.go similarity index 77% rename from calico-vpp-agent/felix/ipset.go rename to calico-vpp-agent/felix/policies/ipset.go index 1dd8de70..74859240 100644 --- a/calico-vpp-agent/felix/ipset.go +++ b/calico-vpp-agent/felix/policies/ipset.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package policies import ( "fmt" @@ -153,7 +153,7 @@ func toNetArray(addrs map[string]*net.IPNet) []*net.IPNet { return array } -func fromIPSetUpdate(ips *proto.IPSetUpdate) (i *IPSet, err error) { +func FromIPSetUpdate(ips *proto.IPSetUpdate) (i *IPSet, err error) { i = NewIPSet() switch ips.GetType() { case proto.IPSetUpdate_IP: @@ -292,3 +292,59 @@ func (i *IPSet) RemoveMembers(members []string, apply bool, vpp *vpplink.VppLink } return err } + +func (s *PoliciesHandler) OnIpsetUpdate(msg *proto.IPSetUpdate) (err error) { + ips, err := FromIPSetUpdate(msg) + if err != nil { + return errors.Wrap(err, "cannot process IPSetUpdate") + } + state := s.GetState() + _, ok := state.IPSets[msg.GetId()] + if ok { + return fmt.Errorf("received new ipset for ID %s that already exists", msg.GetId()) + } + if !s.state.IsPending() { + err = ips.Create(s.vpp) + if err != nil { + return errors.Wrapf(err, "cannot create ipset %s", msg.GetId()) + } + } + state.IPSets[msg.GetId()] = ips + s.log.Debugf("Handled Ipset Update pending=%t id=%s %s", s.state.IsPending(), msg.GetId(), ips) + return nil +} + +func (s *PoliciesHandler) OnIpsetDeltaUpdate(msg *proto.IPSetDeltaUpdate) (err error) { + ips, ok := s.GetState().IPSets[msg.GetId()] + if !ok { + return fmt.Errorf("received delta update for non-existent ipset") + } + err = ips.AddMembers(msg.GetAddedMembers(), !s.state.IsPending(), s.vpp) + if err != nil { + return errors.Wrap(err, "cannot process ipset delta update") + } + err = ips.RemoveMembers(msg.GetRemovedMembers(), !s.state.IsPending(), s.vpp) + if err != nil { + return errors.Wrap(err, "cannot process ipset delta update") + } + s.log.Debugf("Handled Ipset delta Update pending=%t id=%s %s", s.state.IsPending(), msg.GetId(), ips) + return nil +} + +func (s *PoliciesHandler) OnIpsetRemove(msg *proto.IPSetRemove) (err error) { + state := s.GetState() + ips, ok := state.IPSets[msg.GetId()] + if !ok { + s.log.Warnf("Received ipset delete for ID %s that doesn't exists", msg.GetId()) + return nil + } + if !s.state.IsPending() { + err = ips.Delete(s.vpp) + if err != nil { + return errors.Wrapf(err, "cannot delete ipset %s", msg.GetId()) + } + } + s.log.Debugf("Handled Ipset remove pending=%t id=%s %s", s.state.IsPending(), msg.GetId(), ips) + delete(state.IPSets, msg.GetId()) + return nil +} diff --git a/calico-vpp-agent/felix/policies/policies_handler.go b/calico-vpp-agent/felix/policies/policies_handler.go new file mode 100644 index 00000000..1f59af53 --- /dev/null +++ b/calico-vpp-agent/felix/policies/policies_handler.go @@ -0,0 +1,366 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policies + +import ( + "fmt" + "net" + "strings" + "sync" + + "github.com/pkg/errors" + felixConfig "github.com/projectcalico/calico/felix/config" + "github.com/projectcalico/calico/felix/proto" + calicov3cli "github.com/projectcalico/calico/libcalico-go/lib/clientv3" + "github.com/sirupsen/logrus" + + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/felix/cache" + "github.com/projectcalico/vpp-dataplane/v3/config" + "github.com/projectcalico/vpp-dataplane/v3/vpplink" + "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" +) + +// Server holds all the data required to configure the policies defined by felix in VPP +type PoliciesHandler struct { + log *logrus.Entry + vpp *vpplink.VppLink + cache *cache.Cache + + endpointsInterfaces map[WorkloadEndpointID]map[string]uint32 + tunnelSwIfIndexes map[uint32]bool + interfacesMap map[string]interfaceDetails + + configuredState *PolicyState + pendingState *PolicyState + + state common.FelixSocketSyncState + + /* failSafe policies allow traffic on some ports irrespective of the policy */ + failSafePolicy *Policy + /* workloadToHost may drop traffic that goes from the pods to the host */ + workloadsToHostPolicy *Policy + defaultTap0IngressConf []uint32 + defaultTap0EgressConf []uint32 + /* always allow traffic coming from host to the pods (for healthchecks and so on) */ + // AllowFromHostPolicy persists the policy allowing host --> pod communications. + // See CreateAllowFromHostPolicy definition + AllowFromHostPolicy *Policy + // allPodsIpset persists the ipset containing all the workload endpoints (pods) addresses + allPodsIpset *IPSet + /* allow traffic between uplink/tunnels and tap interfaces */ + allowToHostPolicy *Policy + + GotOurNodeBGPchan chan *common.LocalNodeSpec + GotOurNodeBGPchanOnce sync.Once +} + +func NewPoliciesHandler(vpp *vpplink.VppLink, cache *cache.Cache, clientv3 calicov3cli.Interface, log *logrus.Entry) *PoliciesHandler { + return &PoliciesHandler{ + log: log, + vpp: vpp, + cache: cache, + endpointsInterfaces: make(map[WorkloadEndpointID]map[string]uint32), + tunnelSwIfIndexes: make(map[uint32]bool), + + configuredState: NewPolicyState(), + pendingState: NewPolicyState(), + state: common.StateDisconnected, + + GotOurNodeBGPchan: make(chan *common.LocalNodeSpec), + } +} + +func (s *PoliciesHandler) GetState() *PolicyState { + if s.state.IsPending() { + return s.pendingState + } + return s.configuredState +} + +func (s *PoliciesHandler) OnInSync(msg *proto.InSync) (err error) { + if s.state != common.StateSyncing { + return fmt.Errorf("received InSync but state was not syncing") + } + + s.state = common.StateInSync + s.log.Infof("Policies now in sync") + return s.applyPendingState() +} + +// workloadAdded is called by the CNI server when a container interface is created, +// either during startup when reconnecting the interfaces, or when a new pod is created +func (s *PoliciesHandler) OnWorkloadAdded(id *WorkloadEndpointID, swIfIndex uint32, ifName string, containerIPs []*net.IPNet) { + // TODO: Send WorkloadEndpointStatusUpdate to felix + + intf, existing := s.endpointsInterfaces[*id] + + if existing { + for _, exInt := range intf { + if swIfIndex == exInt { + return + } + } + // VPP restarted and interfaces are being reconnected + s.log.Warnf("workload endpoint changed interfaces, did VPP restart? %v %v -> %d", id, intf, swIfIndex) + s.endpointsInterfaces[*id][ifName] = swIfIndex + } + + s.log.Infof("policy(add) Workload id=%v swIfIndex=%d", id, swIfIndex) + if s.endpointsInterfaces[*id] == nil { + s.endpointsInterfaces[*id] = map[string]uint32{ifName: swIfIndex} + } else { + s.endpointsInterfaces[*id][ifName] = swIfIndex + } + + if s.state == common.StateInSync { + wep, ok := s.configuredState.WorkloadEndpoints[*id] + if !ok { + s.log.Infof("not creating wep in workloadadded") + // Nothing to configure + } else { + s.log.Infof("creating wep in workloadadded") + err := s.CreateWorkloadEndpoint(wep, []uint32{swIfIndex}, s.configuredState, id.Network) + if err != nil { + s.log.Errorf("Error processing workload addition: %s", err) + } + } + } + // EndpointToHostAction + allMembers := []string{} + for _, containerIP := range containerIPs { + allMembers = append(allMembers, containerIP.IP.String()) + } + err := s.allPodsIpset.AddMembers(allMembers, true, s.vpp) + if err != nil { + s.log.Errorf("Error processing workload addition: %s", err) + } +} + +// WorkloadRemoved is called by the CNI server when the interface of a pod is deleted +func (s *PoliciesHandler) OnWorkloadRemoved(id *WorkloadEndpointID, containerIPs []*net.IPNet) { + // TODO: Send WorkloadEndpointStatusRemove to felix + + _, existing := s.endpointsInterfaces[*id] + if !existing { + s.log.Warnf("nonexistent workload endpoint removed %v", id) + return + } + s.log.Infof("policy(del) workload id=%v", id) + + if s.state == common.StateInSync { + wep, ok := s.configuredState.WorkloadEndpoints[*id] + if !ok { + // Nothing to clean up + } else { + err := s.DeleteWorkloadEndpoint(wep) + if err != nil { + s.log.Errorf("Error processing workload removal: %s", err) + } + } + } + delete(s.endpointsInterfaces, *id) + // EndpointToHostAction + allMembers := []string{} + for _, containerIP := range containerIPs { + allMembers = append(allMembers, containerIP.IP.String()) + } + err := s.allPodsIpset.RemoveMembers(allMembers, true, s.vpp) + if err != nil { + s.log.Errorf("Error processing workload remove: %s", err) + } +} + +func (s *PoliciesHandler) OnTunnelAdded(swIfIndex uint32) { + s.tunnelSwIfIndexes[swIfIndex] = true + for _, h := range s.GetState().HostEndpoints { + newTunnel := true + for _, v := range h.TunnelSwIfIndexes { + if v == swIfIndex { + newTunnel = false + } + } + if newTunnel { + h.TunnelSwIfIndexes = append(h.TunnelSwIfIndexes, swIfIndex) + s.log.Infof("Configuring policies on added tunnel [%d]", swIfIndex) + if !s.state.IsPending() { + s.log.Infof("policy(upd) interface swif=%d", swIfIndex) + err := s.vpp.ConfigurePolicies(swIfIndex, h.CurrentForwardConf, 1 /*invertRxTx*/) + if err != nil { + s.log.WithError(err).Errorf("OnTunnelAdded: cannot configure policies on tunnel interface %d", swIfIndex) + } + } + } + } +} +func (s *PoliciesHandler) OnTunnelDelete(swIfIndex uint32) { + delete(s.tunnelSwIfIndexes, swIfIndex) + state := s.GetState() + for _, h := range state.HostEndpoints { + for index, existingSwifindex := range h.TunnelSwIfIndexes { + if existingSwifindex == swIfIndex { + // we don't delete the policies because they are auto-deleted when interfaces are removed + h.TunnelSwIfIndexes = append(h.TunnelSwIfIndexes[:index], h.TunnelSwIfIndexes[index+1:]...) + } + } + } +} + +func (s *PoliciesHandler) OnFelixSocketStateChanged(evt *common.FelixSocketStateChanged) { + s.state = evt.NewState +} + +func (s *PoliciesHandler) OnFelixConfChanged(old, new *felixConfig.Config) { + if s.state != common.StateConnected { + s.log.Errorf("received ConfigUpdate but server is not in Connected state! state: %v", s.state) + return + } + s.state = common.StateSyncing + if s.cache.FelixConfig.DefaultEndpointToHostAction != old.DefaultEndpointToHostAction { + s.log.Infof("Change in EndpointToHostAction to %+v", s.getEndpointToHostAction()) + workloadsToHostAllowRule := &Rule{ + VppID: types.InvalidID, + Rule: &types.Rule{ + Action: s.getEndpointToHostAction(), + }, + SrcIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, + } + policy := s.workloadsToHostPolicy.DeepCopy() + policy.InboundRules = []*Rule{workloadsToHostAllowRule} + err := s.workloadsToHostPolicy.Update(s.vpp, policy, + &PolicyState{ + IPSets: map[string]*IPSet{ + "calico-vpp-wep-addr-ipset": s.allPodsIpset, + }, + }) + if err != nil { + s.log.Errorf("error updating workloadsToHostPolicy %v", err) + return + } + } + if !protoPortListEqual(s.cache.FelixConfig.FailsafeInboundHostPorts, old.FailsafeInboundHostPorts) || + !protoPortListEqual(s.cache.FelixConfig.FailsafeOutboundHostPorts, old.FailsafeOutboundHostPorts) { + err := s.createFailSafePolicies() + if err != nil { + s.log.Errorf("error updating FailSafePolicies %v", err) + return + } + } +} + +// Reconciles the pending state with the configured state +func (s *PoliciesHandler) applyPendingState() (err error) { + s.log.Infof("Reconciliating pending policy state with configured state") + // Stupid algorithm for now, delete all that is in configured state, and then recreate everything + for _, wep := range s.configuredState.WorkloadEndpoints { + if len(wep.SwIfIndex) != 0 { + err = s.DeleteWorkloadEndpoint(wep) + if err != nil { + return errors.Wrap(err, "cannot cleanup workload endpoint") + } + } + } + for _, policy := range s.configuredState.Policies { + err = policy.Delete(s.vpp, s.configuredState) + if err != nil { + s.log.Warnf("error deleting policy: %v", err) + } + } + for _, profile := range s.configuredState.Profiles { + err = profile.Delete(s.vpp, s.configuredState) + if err != nil { + s.log.Warnf("error deleting profile: %v", err) + } + } + for _, ipset := range s.configuredState.IPSets { + err = ipset.Delete(s.vpp) + if err != nil { + s.log.Warnf("error deleting ipset: %v", err) + } + } + for _, hep := range s.configuredState.HostEndpoints { + if len(hep.UplinkSwIfIndexes) != 0 { + err = s.DeleteHostEndpoint(hep, s.configuredState) + if err != nil { + s.log.Warnf("error deleting hostendpoint : %v", err) + } + } + } + + s.configuredState = s.pendingState + s.pendingState = NewPolicyState() + for _, ipset := range s.configuredState.IPSets { + err = ipset.Create(s.vpp) + if err != nil { + return errors.Wrap(err, "error creating ipset") + } + } + for _, profile := range s.configuredState.Profiles { + err = profile.Create(s.vpp, s.configuredState) + if err != nil { + return errors.Wrap(err, "error creating profile") + } + } + for _, policy := range s.configuredState.Policies { + err = policy.Create(s.vpp, s.configuredState) + if err != nil { + return errors.Wrap(err, "error creating policy") + } + } + for id, wep := range s.configuredState.WorkloadEndpoints { + intf, intfFound := s.endpointsInterfaces[id] + if intfFound { + swIfIndexList := []uint32{} + for _, idx := range intf { + swIfIndexList = append(swIfIndexList, idx) + } + err = s.CreateWorkloadEndpoint(wep, swIfIndexList, s.configuredState, id.Network) + if err != nil { + return errors.Wrap(err, "cannot configure workload endpoint") + } + } + } + for _, hep := range s.configuredState.HostEndpoints { + err = s.CreateHostEndpoint(hep, s.configuredState) + if err != nil { + return errors.Wrap(err, "cannot create host endpoint") + } + } + s.log.Infof("Reconciliation done") + return nil +} + +func (s *PoliciesHandler) OnNodeAddUpdate(node *common.LocalNodeSpec) { + if node.Name == *config.NodeName { + err := s.createAllowFromHostPolicy() + if err != nil { + s.log.Errorf("Error in creating AllowFromHostPolicy %v", err) + return + } + err = s.createAllowToHostPolicy() + if err != nil { + s.log.Errorf("Error in createAllowToHostPolicy %v", err) + return + } + } +} + +func (s *PoliciesHandler) getEndpointToHostAction() types.RuleAction { + if strings.ToUpper(s.cache.FelixConfig.DefaultEndpointToHostAction) == "ACCEPT" { + return types.ActionAllow + } + return types.ActionDeny +} diff --git a/calico-vpp-agent/felix/policies/policies_init.go b/calico-vpp-agent/felix/policies/policies_init.go new file mode 100644 index 00000000..59160380 --- /dev/null +++ b/calico-vpp-agent/felix/policies/policies_init.go @@ -0,0 +1,300 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policies + +import ( + "fmt" + "net" + + "github.com/pkg/errors" + "github.com/projectcalico/calico/felix/proto" + + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" + "github.com/projectcalico/vpp-dataplane/v3/vpplink/generated/bindings/npol" + "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" +) + +func (s *PoliciesHandler) createAllPodsIpset() (err error) { + ipset := NewIPSet() + err = ipset.Create(s.vpp) + if err != nil { + return err + } + s.allPodsIpset = ipset + return nil +} + +// createAllowFromHostPolicy creates a policy allowing host->pod communications. This is needed +// to maintain vanilla Calico's behavior where the host can always reach pods. +// This policy is applied in Egress on the host endpoint tap (i.e. linux -> VPP) +// and on the Ingress of Workload endpoints (i.e. VPP -> pod) +func (s *PoliciesHandler) createAllowFromHostPolicy() (err error) { + s.log.Infof("Creating rules to allow traffic from host to pods with egress policies") + ruleOut := &Rule{ + VppID: types.InvalidID, + RuleID: "calicovpp-internal-egressallowfromhost", + Rule: &types.Rule{ + Action: types.ActionAllow, + }, + DstIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, + } + ps := PolicyState{IPSets: map[string]*IPSet{"calico-vpp-wep-addr-ipset": s.allPodsIpset}} + s.log.Infof("Creating rules to allow traffic from host to pods with ingress policies") + ruleIn := &Rule{ + VppID: types.InvalidID, + RuleID: "calicovpp-internal-ingressallowfromhost", + Rule: &types.Rule{ + Action: types.ActionAllow, + SrcNet: []net.IPNet{}, + }, + } + if s.cache.GetNodeIP4() != nil { + ruleIn.SrcNet = append(ruleIn.SrcNet, *common.FullyQualified(*s.cache.GetNodeIP4())) + } + if s.cache.GetNodeIP6() != nil { + ruleIn.SrcNet = append(ruleIn.SrcNet, *common.FullyQualified(*s.cache.GetNodeIP6())) + } + + allowFromHostPolicy := &Policy{ + Policy: &types.Policy{}, + VppID: types.InvalidID, + } + allowFromHostPolicy.OutboundRules = append(allowFromHostPolicy.OutboundRules, ruleOut) + allowFromHostPolicy.InboundRules = append(allowFromHostPolicy.InboundRules, ruleIn) + if s.AllowFromHostPolicy == nil { + err = allowFromHostPolicy.Create(s.vpp, &ps) + } else { + allowFromHostPolicy.VppID = s.AllowFromHostPolicy.VppID + err = s.AllowFromHostPolicy.Update(s.vpp, allowFromHostPolicy, &ps) + } + s.AllowFromHostPolicy = allowFromHostPolicy + if err != nil { + return errors.Wrap(err, "cannot create policy to allow traffic from host to pods") + } + s.log.Infof("Created allow from host to pods traffic with ID: %+v", s.AllowFromHostPolicy.VppID) + return nil +} + +func (s *PoliciesHandler) createEndpointToHostPolicy( /*may be return*/ ) (err error) { + workloadsToHostPolicy := &Policy{ + Policy: &types.Policy{}, + VppID: types.InvalidID, + } + workloadsToHostRule := &Rule{ + VppID: types.InvalidID, + Rule: &types.Rule{ + Action: s.getEndpointToHostAction(), + }, + SrcIPSetNames: []string{"calico-vpp-wep-addr-ipset"}, + } + ps := PolicyState{ + IPSets: map[string]*IPSet{ + "calico-vpp-wep-addr-ipset": s.allPodsIpset, + }, + } + workloadsToHostPolicy.InboundRules = append(workloadsToHostPolicy.InboundRules, workloadsToHostRule) + + err = workloadsToHostPolicy.Create(s.vpp, &ps) + if err != nil { + return err + } + s.workloadsToHostPolicy = workloadsToHostPolicy + + conf := types.NewInterfaceConfig() + conf.IngressPolicyIDs = append(conf.IngressPolicyIDs, s.workloadsToHostPolicy.VppID) + conf.PolicyDefaultTx = npol.NPOL_DEFAULT_ALLOW + conf.PolicyDefaultRx = npol.NPOL_DEFAULT_ALLOW + swifindexes, err := s.vpp.SearchInterfacesWithTagPrefix("host-") // tap0 interfaces + if err != nil { + s.log.Error(err) + } + for _, swifindex := range swifindexes { + err = s.vpp.ConfigurePolicies(uint32(swifindex), conf, 0) + if err != nil { + s.log.Error("cannot create policy to drop traffic to host") + } + } + s.defaultTap0IngressConf = conf.IngressPolicyIDs + s.defaultTap0EgressConf = conf.EgressPolicyIDs + return nil +} + +// createFailSafePolicies ensures the failsafe policies defined in the Felixconfiguration exist in VPP. +// check https://github.com/projectcalico/calico/blob/master/felix/rules/static.go :: failsafeInChain for the linux implementation +// To be noted. This does not implement the doNotTrack case as we do not yet support doNotTrack +func (s *PoliciesHandler) createFailSafePolicies() (err error) { + failSafePol := &Policy{ + Policy: &types.Policy{}, + VppID: types.InvalidID, + } + + if len(s.cache.FelixConfig.FailsafeInboundHostPorts) != 0 { + for _, protoPort := range s.cache.FelixConfig.FailsafeInboundHostPorts { + protocol, err := ParseProtocol(&proto.Protocol{NumberOrName: &proto.Protocol_Name{Name: protoPort.Protocol}}) + if err != nil { + s.log.WithError(err).Error("Failed to parse protocol in inbound failsafe rule. Skipping failsafe rule") + continue + } + rule := &Rule{ + VppID: types.InvalidID, + RuleID: fmt.Sprintf("failsafe-in-%s-%s-%d", protoPort.Net, protoPort.Protocol, protoPort.Port), + Rule: &types.Rule{ + Action: types.ActionAllow, + // Ports are always filtered on the destination of packets + DstPortRange: []types.PortRange{{First: protoPort.Port, Last: protoPort.Port}}, + Filters: []types.RuleFilter{{ + ShouldMatch: true, + Type: types.NpolFilterProto, + Value: int(protocol), + }}, + }, + } + if protoPort.Net != "" { + _, protoPortNet, err := net.ParseCIDR(protoPort.Net) + if err != nil { + s.log.WithError(err).Error("Failed to parse CIDR in inbound failsafe rule. Skipping failsafe rule") + continue + } + // Inbound packets are checked for where they come FROM + rule.SrcNet = append(rule.SrcNet, *protoPortNet) + } + failSafePol.InboundRules = append(failSafePol.InboundRules, rule) + } + } + + if len(s.cache.FelixConfig.FailsafeOutboundHostPorts) != 0 { + for _, protoPort := range s.cache.FelixConfig.FailsafeOutboundHostPorts { + protocol, err := ParseProtocol(&proto.Protocol{NumberOrName: &proto.Protocol_Name{Name: protoPort.Protocol}}) + if err != nil { + s.log.WithError(err).Error("Failed to parse protocol in outbound failsafe rule. Skipping failsafe rule") + continue + } + rule := &Rule{ + VppID: types.InvalidID, + RuleID: fmt.Sprintf("failsafe-out-%s-%s-%d", protoPort.Net, protoPort.Protocol, protoPort.Port), + Rule: &types.Rule{ + Action: types.ActionAllow, + // Ports are always filtered on the destination of packets + DstPortRange: []types.PortRange{{First: protoPort.Port, Last: protoPort.Port}}, + Filters: []types.RuleFilter{{ + ShouldMatch: true, + Type: types.NpolFilterProto, + Value: int(protocol), + }}, + }, + } + if protoPort.Net != "" { + _, protoPortNet, err := net.ParseCIDR(protoPort.Net) + if err != nil { + s.log.WithError(err).Error("Failed to parse CIDR in outbound failsafe rule. Skipping failsafe rule") + continue + } + // Outbound packets are checked for where they go TO + rule.DstNet = append(rule.DstNet, *protoPortNet) + } + failSafePol.OutboundRules = append(failSafePol.OutboundRules, rule) + } + } + + if s.failSafePolicy == nil { + err = failSafePol.Create(s.vpp, nil) + + } else { + failSafePol.VppID = s.failSafePolicy.VppID + err = s.failSafePolicy.Update(s.vpp, failSafePol, nil) + } + if err != nil { + return err + } + s.failSafePolicy = failSafePol + s.log.Infof("Created failsafe policy with ID %+v", s.failSafePolicy.VppID) + return nil +} + +func (s *PoliciesHandler) createAllowToHostPolicy() (err error) { + s.log.Infof("Creating policy to allow traffic to host that is applied on uplink") + ruleIn := &Rule{ + VppID: types.InvalidID, + RuleID: "calicovpp-internal-allowtohost", + Rule: &types.Rule{ + Action: types.ActionAllow, + DstNet: []net.IPNet{}, + }, + } + ruleOut := &Rule{ + VppID: types.InvalidID, + RuleID: "calicovpp-internal-allowtohost", + Rule: &types.Rule{ + Action: types.ActionAllow, + SrcNet: []net.IPNet{}, + }, + } + if s.cache.GetNodeIP4() != nil { + ruleIn.DstNet = append(ruleIn.DstNet, *common.FullyQualified(*s.cache.GetNodeIP4())) + ruleOut.SrcNet = append(ruleOut.SrcNet, *common.FullyQualified(*s.cache.GetNodeIP4())) + } + if s.cache.GetNodeIP6() != nil { + ruleIn.DstNet = append(ruleIn.DstNet, *common.FullyQualified(*s.cache.GetNodeIP6())) + ruleOut.SrcNet = append(ruleOut.SrcNet, *common.FullyQualified(*s.cache.GetNodeIP6())) + } + + allowToHostPolicy := &Policy{ + Policy: &types.Policy{}, + VppID: types.InvalidID, + } + allowToHostPolicy.InboundRules = append(allowToHostPolicy.InboundRules, ruleIn) + allowToHostPolicy.OutboundRules = append(allowToHostPolicy.OutboundRules, ruleOut) + if s.allowToHostPolicy == nil { + err = allowToHostPolicy.Create(s.vpp, nil) + } else { + allowToHostPolicy.VppID = s.allowToHostPolicy.VppID + err = s.allowToHostPolicy.Update(s.vpp, allowToHostPolicy, nil) + } + s.allowToHostPolicy = allowToHostPolicy + if err != nil { + return errors.Wrap(err, "cannot create policy to allow traffic to host") + } + s.log.Infof("Created policy to allow traffic to host with ID: %+v", s.allowToHostPolicy.VppID) + return nil +} + +func (s *PoliciesHandler) PoliciesHandlerInit() error { + err := s.createAllPodsIpset() + if err != nil { + return errors.Wrap(err, "Error in createallPodsIpset") + } + err = s.createEndpointToHostPolicy() + if err != nil { + return errors.Wrap(err, "Error in createEndpointToHostPolicy") + } + err = s.createAllowFromHostPolicy() + if err != nil { + return errors.Wrap(err, "Error in creating AllowFromHostPolicy") + } + err = s.createAllowToHostPolicy() + if err != nil { + return errors.Wrap(err, "Error in createAllowToHostPolicy") + } + err = s.createFailSafePolicies() + if err != nil { + return errors.Wrap(err, "Error in createFailSafePolicies") + } + s.interfacesMap, err = mapTagToInterfaceDetails(s.vpp) + if err != nil { + return errors.Wrap(err, "Error in mapping uplink to tap interfaces") + } + return nil +} diff --git a/calico-vpp-agent/felix/policy.go b/calico-vpp-agent/felix/policies/policy.go similarity index 58% rename from calico-vpp-agent/felix/policy.go rename to calico-vpp-agent/felix/policies/policy.go index f0f46e61..6f4e1ce9 100644 --- a/calico-vpp-agent/felix/policy.go +++ b/calico-vpp-agent/felix/policies/policy.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package policies import ( "fmt" @@ -80,7 +80,7 @@ func ruleInNetwork(r *proto.Rule, network string) bool { return network == "" } -func fromProtoPolicy(p *proto.Policy, network string) (policy *Policy, err error) { +func FromProtoPolicy(p *proto.Policy, network string) (policy *Policy, err error) { policy = &Policy{ Policy: &types.Policy{}, VppID: types.InvalidID, @@ -118,7 +118,7 @@ func fromProtoPolicy(p *proto.Policy, network string) (policy *Policy, err error return policy, nil } -func fromProtoProfile(p *proto.Profile) (profile *Policy, err error) { +func FromProtoProfile(p *proto.Profile) (profile *Policy, err error) { profile = &Policy{ Policy: &types.Policy{}, VppID: types.InvalidID, @@ -233,3 +233,155 @@ func (p *Policy) Delete(vpp *vpplink.VppLink, state *PolicyState) (err error) { p.VppID = types.InvalidID return nil } + +func (s *PoliciesHandler) OnActivePolicyUpdate(msg *proto.ActivePolicyUpdate) (err error) { + state := s.GetState() + id := PolicyID{ + Tier: msg.Id.Tier, + Name: msg.Id.Name, + } + p, err := FromProtoPolicy(msg.Policy, "") + if err != nil { + return errors.Wrapf(err, "cannot process policy update") + } + + s.log.Infof("Handling ActivePolicyUpdate pending=%t id=%s %s", s.state.IsPending(), id, p) + existing, ok := state.Policies[id] + if ok { // Policy with this ID already exists + if s.state.IsPending() { + // Just replace policy in pending state + state.Policies[id] = p + } else { + err := existing.Update(s.vpp, p, state) + if err != nil { + return errors.Wrap(err, "cannot update policy") + } + } + } else { + // Create it in state + state.Policies[id] = p + if !s.state.IsPending() { + err := p.Create(s.vpp, state) + if err != nil { + return errors.Wrap(err, "cannot create policy") + } + } + } + + for network := range s.cache.NetworkDefinitions { + id := PolicyID{ + Tier: msg.Id.Tier, + Name: msg.Id.Name, + Network: network, + } + p, err := FromProtoPolicy(msg.Policy, network) + if err != nil { + return errors.Wrapf(err, "cannot process policy update") + } + + s.log.Infof("Handling ActivePolicyUpdate pending=%t id=%s %s", s.state.IsPending(), id, p) + + existing, ok := state.Policies[id] + if ok { // Policy with this ID already exists + if s.state.IsPending() { + // Just replace policy in pending state + state.Policies[id] = p + } else { + err := existing.Update(s.vpp, p, state) + if err != nil { + return errors.Wrap(err, "cannot update policy") + } + } + } else { + // Create it in state + state.Policies[id] = p + if !s.state.IsPending() { + err := p.Create(s.vpp, state) + if err != nil { + return errors.Wrap(err, "cannot create policy") + } + } + } + + } + return nil +} + +func (s *PoliciesHandler) OnActivePolicyRemove(msg *proto.ActivePolicyRemove) (err error) { + state := s.GetState() + id := PolicyID{ + Tier: msg.Id.Tier, + Name: msg.Id.Name, + } + s.log.Infof("policy(del) Handling ActivePolicyRemove pending=%t id=%s", s.state.IsPending(), id) + + for policyID := range state.Policies { + if policyID.Name == id.Name && policyID.Tier == id.Tier { + existing, ok := state.Policies[policyID] + if !ok { + s.log.Warnf("Received policy delete for Tier %s Name %s that doesn't exists", id.Tier, id.Name) + return nil + } + if !s.state.IsPending() { + err = existing.Delete(s.vpp, state) + if err != nil { + return errors.Wrap(err, "error deleting policy") + } + } + delete(state.Policies, policyID) + } + } + return nil +} + +func (s *PoliciesHandler) OnActiveProfileUpdate(msg *proto.ActiveProfileUpdate) (err error) { + state := s.GetState() + id := msg.Id.Name + p, err := FromProtoProfile(msg.Profile) + if err != nil { + return errors.Wrapf(err, "cannot process profile update") + } + + existing, ok := state.Profiles[id] + if ok { // Policy with this ID already exists + if s.state.IsPending() { + // Just replace policy in pending state + state.Profiles[id] = p + } else { + err := existing.Update(s.vpp, p, state) + if err != nil { + return errors.Wrap(err, "cannot update profile") + } + } + } else { + // Create it in state + state.Profiles[id] = p + if !s.state.IsPending() { + err := p.Create(s.vpp, state) + if err != nil { + return errors.Wrap(err, "cannot create profile") + } + } + } + s.log.Infof("policy(upd) Handled Profile Update pending=%t id=%s existing=%s new=%s", s.state.IsPending(), id, existing, p) + return nil +} + +func (s *PoliciesHandler) OnActiveProfileRemove(msg *proto.ActiveProfileRemove) (err error) { + state := s.GetState() + id := msg.Id.Name + existing, ok := state.Profiles[id] + if !ok { + s.log.Warnf("Received profile delete for Name %s that doesn't exists", id) + return nil + } + if !s.state.IsPending() { + err = existing.Delete(s.vpp, state) + if err != nil { + return errors.Wrap(err, "error deleting profile") + } + } + s.log.Infof("policy(del) Handled Profile Remove pending=%t id=%s policy=%s", s.state.IsPending(), id, existing) + delete(state.Profiles, id) + return nil +} diff --git a/calico-vpp-agent/felix/policy_state.go b/calico-vpp-agent/felix/policies/policy_state.go similarity index 98% rename from calico-vpp-agent/felix/policy_state.go rename to calico-vpp-agent/felix/policies/policy_state.go index 1d38741f..ec097522 100644 --- a/calico-vpp-agent/felix/policy_state.go +++ b/calico-vpp-agent/felix/policies/policy_state.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package policies type PolicyState struct { IPSets map[string]*IPSet diff --git a/calico-vpp-agent/felix/rule.go b/calico-vpp-agent/felix/policies/rule.go similarity index 98% rename from calico-vpp-agent/felix/rule.go rename to calico-vpp-agent/felix/policies/rule.go index 9d9fd2e8..3577cae2 100644 --- a/calico-vpp-agent/felix/rule.go +++ b/calico-vpp-agent/felix/policies/rule.go @@ -13,7 +13,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package policies import ( "fmt" @@ -127,7 +127,7 @@ func fromProtoRule(r *proto.Rule) (rule *Rule, err error) { if r.NotProtocol != nil { return nil, fmt.Errorf("protocol and NotProtocol specified in Rule") } - proto, err := parseProtocol(r.Protocol) + proto, err := ParseProtocol(r.Protocol) if err != nil { return nil, err } @@ -138,7 +138,7 @@ func fromProtoRule(r *proto.Rule) (rule *Rule, err error) { }) } if r.NotProtocol != nil { - proto, err := parseProtocol(r.NotProtocol) + proto, err := ParseProtocol(r.NotProtocol) if err != nil { return nil, err } @@ -220,7 +220,7 @@ func fromProtoRule(r *proto.Rule) (rule *Rule, err error) { return rule, nil } -func parseProtocol(pr *proto.Protocol) (types.IPProto, error) { +func ParseProtocol(pr *proto.Protocol) (types.IPProto, error) { switch u := pr.NumberOrName.(type) { case *proto.Protocol_Name: switch strings.ToLower(u.Name) { diff --git a/calico-vpp-agent/felix/policies/utils.go b/calico-vpp-agent/felix/policies/utils.go new file mode 100644 index 00000000..2a34fe46 --- /dev/null +++ b/calico-vpp-agent/felix/policies/utils.go @@ -0,0 +1,82 @@ +// Copyright (C) 2025 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policies + +import ( + "github.com/pkg/errors" + felixConfig "github.com/projectcalico/calico/felix/config" + + "github.com/projectcalico/vpp-dataplane/v3/vpplink" +) + +func protoPortListEqual(a, b []felixConfig.ProtoPort) bool { + if len(a) != len(b) { + return false + } + for i, elemA := range a { + elemB := b[i] + if elemA.Net != elemB.Net { + return false + } + if elemA.Protocol != elemB.Protocol { + return false + } + if elemA.Port != elemB.Port { + return false + } + } + return true +} + +type interfaceDetails struct { + tapIndex uint32 + uplinkIndex uint32 + addresses []string +} + +func mapTagToInterfaceDetails(vpp *vpplink.VppLink) (tagIfDetails map[string]interfaceDetails, err error) { + tagIfDetails = make(map[string]interfaceDetails) + uplinkSwifindexes, err := vpp.SearchInterfacesWithTagPrefix("main-") + if err != nil { + return nil, err + } + tapSwifindexes, err := vpp.SearchInterfacesWithTagPrefix("host-") + if err != nil { + return nil, err + } + for intf, uplink := range uplinkSwifindexes { + tap, found := tapSwifindexes["host-"+intf[5:]] + if found { + ip4adds, err := vpp.AddrList(uplink, false) + if err != nil { + return nil, err + } + ip6adds, err := vpp.AddrList(uplink, true) + if err != nil { + return nil, err + } + adds := append(ip4adds, ip6adds...) + addresses := []string{} + for _, add := range adds { + addresses = append(addresses, add.IPNet.IP.String()) + } + tagIfDetails[intf[5:]] = interfaceDetails{tap, uplink, addresses} + } else { + return nil, errors.Errorf("uplink interface %d not corresponding to a tap interface", uplink) + } + } + return tagIfDetails, nil +} diff --git a/calico-vpp-agent/felix/policies/workload_endpoint.go b/calico-vpp-agent/felix/policies/workload_endpoint.go new file mode 100644 index 00000000..f573080d --- /dev/null +++ b/calico-vpp-agent/felix/policies/workload_endpoint.go @@ -0,0 +1,288 @@ +// Copyright (C) 2020 Cisco Systems Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package policies + +import ( + "encoding/json" + "fmt" + + nettypes "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" + "github.com/pkg/errors" + "github.com/projectcalico/calico/felix/proto" + + "github.com/projectcalico/vpp-dataplane/v3/vpplink/generated/bindings/npol" + "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" +) + +type WorkloadEndpointID struct { + OrchestratorID string + WorkloadID string + EndpointID string + Network string +} + +func (wi *WorkloadEndpointID) String() string { + return fmt.Sprintf("%s:%s:%s:%s", wi.OrchestratorID, wi.WorkloadID, wi.EndpointID, wi.Network) +} + +type Tier struct { + Name string + IngressPolicies []string + EgressPolicies []string +} + +func (tr *Tier) String() string { + s := fmt.Sprintf("name=%s", tr.Name) + s += types.StrListToString(" IngressPolicies=", tr.IngressPolicies) + s += types.StrListToString(" EgressPolicies=", tr.EgressPolicies) + return s +} + +type WorkloadEndpoint struct { + SwIfIndex []uint32 + Profiles []string + Tiers []Tier +} + +func (w *WorkloadEndpoint) String() string { + s := fmt.Sprintf("if=%d profiles=%s tiers=%s", w.SwIfIndex, w.Profiles, w.Tiers) + s += types.StrListToString(" Profiles=", w.Profiles) + s += types.StrableListToString(" Tiers=", w.Tiers) + return s +} + +func FromProtoEndpointID(ep *proto.WorkloadEndpointID) *WorkloadEndpointID { + return &WorkloadEndpointID{ + OrchestratorID: ep.OrchestratorId, + WorkloadID: ep.WorkloadId, + EndpointID: ep.EndpointId, + } +} + +func FromProtoWorkload(wep *proto.WorkloadEndpoint) *WorkloadEndpoint { + r := &WorkloadEndpoint{ + SwIfIndex: []uint32{}, + Profiles: wep.ProfileIds, + } + for _, tier := range wep.Tiers { + r.Tiers = append(r.Tiers, Tier{ + Name: tier.Name, + IngressPolicies: tier.IngressPolicies, + EgressPolicies: tier.EgressPolicies, + }) + } + return r +} + +func (s *PoliciesHandler) getWepUserDefinedPolicies(w *WorkloadEndpoint, state *PolicyState, network string) (conf *types.InterfaceConfig, err error) { + conf = types.NewInterfaceConfig() + for _, tier := range w.Tiers { + for _, polName := range tier.IngressPolicies { + pol, ok := state.Policies[PolicyID{Tier: tier.Name, Name: polName, Network: network}] + if !ok { + return nil, fmt.Errorf("in policy %s tier %s not found for workload endpoint", polName, tier.Name) + } + if pol.VppID == types.InvalidID { + return nil, fmt.Errorf("in policy %s tier %s not yet created in VPP", polName, tier.Name) + } + conf.IngressPolicyIDs = append(conf.IngressPolicyIDs, pol.VppID) + } + for _, polName := range tier.EgressPolicies { + pol, ok := state.Policies[PolicyID{Tier: tier.Name, Name: polName, Network: network}] + if !ok { + return nil, fmt.Errorf("out policy %s tier %s not found for workload endpoint", polName, tier.Name) + } + if pol.VppID == types.InvalidID { + return nil, fmt.Errorf("out policy %s tier %s not yet created in VPP", polName, tier.Name) + } + conf.EgressPolicyIDs = append(conf.EgressPolicyIDs, pol.VppID) + } + } + for _, profileName := range w.Profiles { + prof, ok := state.Profiles[profileName] + if !ok { + return nil, fmt.Errorf("profile %s not found for workload endpoint", profileName) + } + if prof.VppID == types.InvalidID { + return nil, fmt.Errorf("profile %s not yet created in VPP", profileName) + } + conf.ProfileIDs = append(conf.ProfileIDs, prof.VppID) + } + if len(conf.IngressPolicyIDs) > 0 { + conf.IngressPolicyIDs = append([]uint32{s.AllowFromHostPolicy.VppID}, conf.IngressPolicyIDs...) + } + return conf, nil +} + +// getWorkloadPolicies creates the interface configuration for a workload (pod) interface +// We have an implicit ingress policy that allows traffic coming from the host +// see createAllowFromHostPolicy() +// If there are no policies the default should be pass to profiles +// If there are policies the default should be deny (profiles are ignored) +func (s *PoliciesHandler) getWorkloadPolicies(w *WorkloadEndpoint, state *PolicyState, network string) (conf *types.InterfaceConfig, err error) { + conf, err = s.getWepUserDefinedPolicies(w, state, network) + if err != nil { + return nil, errors.Wrap(err, "cannot create workload policies") + } + if len(conf.IngressPolicyIDs) > 0 { + conf.IngressPolicyIDs = append([]uint32{s.AllowFromHostPolicy.VppID}, conf.IngressPolicyIDs...) + conf.PolicyDefaultTx = npol.NPOL_DEFAULT_DENY + } else if len(conf.ProfileIDs) > 0 { + conf.PolicyDefaultTx = npol.NPOL_DEFAULT_PASS + } + if len(conf.EgressPolicyIDs) > 0 { + conf.PolicyDefaultRx = npol.NPOL_DEFAULT_DENY + } else if len(conf.ProfileIDs) > 0 { + conf.PolicyDefaultRx = npol.NPOL_DEFAULT_PASS + } + return conf, nil +} + +func (s *PoliciesHandler) CreateWorkloadEndpoint(w *WorkloadEndpoint, swIfIndexes []uint32, state *PolicyState, network string) (err error) { + conf, err := s.getWorkloadPolicies(w, state, network) + if err != nil { + return err + } + for _, swIfIndex := range swIfIndexes { + err = s.vpp.ConfigurePolicies(swIfIndex, conf, 0) + if err != nil { + return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) + } + } + + w.SwIfIndex = append(w.SwIfIndex, swIfIndexes...) + return nil +} + +func (s *PoliciesHandler) updateWorkloadEndpoint(w *WorkloadEndpoint, new *WorkloadEndpoint, state *PolicyState, network string) (err error) { + conf, err := s.getWorkloadPolicies(new, state, network) + if err != nil { + return err + } + for _, swIfIndex := range w.SwIfIndex { + err = s.vpp.ConfigurePolicies(swIfIndex, conf, 0) + if err != nil { + return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) + } + } + // Update local policy with new data + w.Profiles = new.Profiles + w.Tiers = new.Tiers + return nil +} + +func (s *PoliciesHandler) DeleteWorkloadEndpoint(w *WorkloadEndpoint) (err error) { + if len(w.SwIfIndex) == 0 { + return fmt.Errorf("deleting unconfigured wep") + } + // Nothing to do in VPP, policies are cleared when the interface is removed + w.SwIfIndex = []uint32{} + return nil +} + +func (s *PoliciesHandler) getAllWorkloadEndpointIdsFromUpdate(msg *proto.WorkloadEndpointUpdate) []*WorkloadEndpointID { + id := FromProtoEndpointID(msg.Id) + idsNetworks := []*WorkloadEndpointID{id} + netStatusesJSON, found := msg.Endpoint.Annotations["k8s.v1.cni.cncf.io/network-status"] + if !found { + s.log.Infof("no network status for pod, no multiple networks") + } else { + var netStatuses []nettypes.NetworkStatus + err := json.Unmarshal([]byte(netStatusesJSON), &netStatuses) + if err != nil { + s.log.Error(err) + } + for _, networkStatus := range netStatuses { + for netDefName, netDef := range s.cache.NetworkDefinitions { + if networkStatus.Name == netDef.NetAttachDefs { + id := &WorkloadEndpointID{OrchestratorID: id.OrchestratorID, WorkloadID: id.WorkloadID, EndpointID: id.EndpointID, Network: netDefName} + idsNetworks = append(idsNetworks, id) + } + } + } + } + return idsNetworks +} + +func (s *PoliciesHandler) OnWorkloadEndpointUpdate(msg *proto.WorkloadEndpointUpdate) (err error) { + state := s.GetState() + idsNetworks := s.getAllWorkloadEndpointIdsFromUpdate(msg) + for _, id := range idsNetworks { + wep := FromProtoWorkload(msg.Endpoint) + existing, found := state.WorkloadEndpoints[*id] + swIfIndexMap, swIfIndexFound := s.endpointsInterfaces[*id] + + if found { + if s.state.IsPending() || !swIfIndexFound { + state.WorkloadEndpoints[*id] = wep + s.log.Infof("policy(upd) Workload Endpoint Update pending=%t id=%s existing=%s new=%s swIf=??", s.state.IsPending(), *id, existing, wep) + } else { + err := s.updateWorkloadEndpoint(existing, wep, state, id.Network) + if err != nil { + return errors.Wrap(err, "cannot update workload endpoint") + } + s.log.Infof("policy(upd) Workload Endpoint Update pending=%t id=%s existing=%s new=%s swIf=%v", s.state.IsPending(), *id, existing, wep, swIfIndexMap) + } + } else { + state.WorkloadEndpoints[*id] = wep + if !s.state.IsPending() && swIfIndexFound { + swIfIndexList := []uint32{} + for _, idx := range swIfIndexMap { + swIfIndexList = append(swIfIndexList, idx) + } + err := s.CreateWorkloadEndpoint(wep, swIfIndexList, state, id.Network) + if err != nil { + return errors.Wrap(err, "cannot create workload endpoint") + } + s.log.Infof("policy(add) Workload Endpoint add pending=%t id=%s new=%s swIf=%v", s.state.IsPending(), *id, wep, swIfIndexMap) + } else { + s.log.Infof("policy(add) Workload Endpoint add pending=%t id=%s new=%s swIf=??", s.state.IsPending(), *id, wep) + } + } + } + return nil +} + +func (s *PoliciesHandler) OnWorkloadEndpointRemove(msg *proto.WorkloadEndpointRemove) (err error) { + state := s.GetState() + id := FromProtoEndpointID(msg.Id) + existing, ok := state.WorkloadEndpoints[*id] + if !ok { + s.log.Warnf("Received workload endpoint delete for %v that doesn't exists", id) + return nil + } + if !s.state.IsPending() && len(existing.SwIfIndex) != 0 { + err = s.DeleteWorkloadEndpoint(existing) + if err != nil { + return errors.Wrap(err, "error deleting workload endpoint") + } + } + s.log.Infof("policy(del) Handled Workload Endpoint Remove pending=%t id=%s existing=%s", s.state.IsPending(), *id, existing) + delete(state.WorkloadEndpoints, *id) + for existingID := range state.WorkloadEndpoints { + if existingID.OrchestratorID == id.OrchestratorID && existingID.WorkloadID == id.WorkloadID { + if !s.state.IsPending() && len(existing.SwIfIndex) != 0 { + err = s.DeleteWorkloadEndpoint(existing) + if err != nil { + return errors.Wrap(err, "error deleting workload endpoint") + } + } + s.log.Infof("policy(del) Handled Workload Endpoint Remove pending=%t id=%s existing=%s", s.state.IsPending(), existingID, existing) + delete(state.WorkloadEndpoints, existingID) + } + } + return nil +} diff --git a/calico-vpp-agent/felix/workload_endpoint.go b/calico-vpp-agent/felix/workload_endpoint.go deleted file mode 100644 index cb0e4f89..00000000 --- a/calico-vpp-agent/felix/workload_endpoint.go +++ /dev/null @@ -1,194 +0,0 @@ -// Copyright (C) 2020 Cisco Systems Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -// implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package felix - -import ( - "fmt" - - "github.com/pkg/errors" - "github.com/projectcalico/calico/felix/proto" - - "github.com/projectcalico/vpp-dataplane/v3/vpplink" - "github.com/projectcalico/vpp-dataplane/v3/vpplink/generated/bindings/npol" - "github.com/projectcalico/vpp-dataplane/v3/vpplink/types" -) - -type WorkloadEndpointID struct { - OrchestratorID string - WorkloadID string - EndpointID string - Network string -} - -func (wi *WorkloadEndpointID) String() string { - return fmt.Sprintf("%s:%s:%s:%s", wi.OrchestratorID, wi.WorkloadID, wi.EndpointID, wi.Network) -} - -type Tier struct { - Name string - IngressPolicies []string - EgressPolicies []string -} - -func (tr *Tier) String() string { - s := fmt.Sprintf("name=%s", tr.Name) - s += types.StrListToString(" IngressPolicies=", tr.IngressPolicies) - s += types.StrListToString(" EgressPolicies=", tr.EgressPolicies) - return s -} - -type WorkloadEndpoint struct { - SwIfIndex []uint32 - Profiles []string - Tiers []Tier - server *Server -} - -func (w *WorkloadEndpoint) String() string { - s := fmt.Sprintf("if=%d profiles=%s tiers=%s", w.SwIfIndex, w.Profiles, w.Tiers) - s += types.StrListToString(" Profiles=", w.Profiles) - s += types.StrableListToString(" Tiers=", w.Tiers) - return s -} - -func fromProtoEndpointID(ep *proto.WorkloadEndpointID) *WorkloadEndpointID { - return &WorkloadEndpointID{ - OrchestratorID: ep.OrchestratorId, - WorkloadID: ep.WorkloadId, - EndpointID: ep.EndpointId, - } -} - -func fromProtoWorkload(wep *proto.WorkloadEndpoint, server *Server) *WorkloadEndpoint { - r := &WorkloadEndpoint{ - SwIfIndex: []uint32{}, - Profiles: wep.ProfileIds, - server: server, - } - for _, tier := range wep.Tiers { - r.Tiers = append(r.Tiers, Tier{ - Name: tier.Name, - IngressPolicies: tier.IngressPolicies, - EgressPolicies: tier.EgressPolicies, - }) - } - return r -} - -func (w *WorkloadEndpoint) getUserDefinedPolicies(state *PolicyState, network string) (conf *types.InterfaceConfig, err error) { - conf = types.NewInterfaceConfig() - for _, tier := range w.Tiers { - for _, polName := range tier.IngressPolicies { - pol, ok := state.Policies[PolicyID{Tier: tier.Name, Name: polName, Network: network}] - if !ok { - return nil, fmt.Errorf("in policy %s tier %s not found for workload endpoint", polName, tier.Name) - } - if pol.VppID == types.InvalidID { - return nil, fmt.Errorf("in policy %s tier %s not yet created in VPP", polName, tier.Name) - } - conf.IngressPolicyIDs = append(conf.IngressPolicyIDs, pol.VppID) - } - for _, polName := range tier.EgressPolicies { - pol, ok := state.Policies[PolicyID{Tier: tier.Name, Name: polName, Network: network}] - if !ok { - return nil, fmt.Errorf("out policy %s tier %s not found for workload endpoint", polName, tier.Name) - } - if pol.VppID == types.InvalidID { - return nil, fmt.Errorf("out policy %s tier %s not yet created in VPP", polName, tier.Name) - } - conf.EgressPolicyIDs = append(conf.EgressPolicyIDs, pol.VppID) - } - } - for _, profileName := range w.Profiles { - prof, ok := state.Profiles[profileName] - if !ok { - return nil, fmt.Errorf("profile %s not found for workload endpoint", profileName) - } - if prof.VppID == types.InvalidID { - return nil, fmt.Errorf("profile %s not yet created in VPP", profileName) - } - conf.ProfileIDs = append(conf.ProfileIDs, prof.VppID) - } - return conf, nil -} - -/* - This function creates the interface configuration for a workload (pod) interface - We have an implicit ingress policy that allows traffic coming from the host - see createAllowFromHostPolicy() - If there are no policies the default should be pass to profiles - If there are policies the default should be deny (profiles are ignored) -*/ -func (w *WorkloadEndpoint) getWorkloadPolicies(state *PolicyState, network string) (conf *types.InterfaceConfig, err error) { - conf, err = w.getUserDefinedPolicies(state, network) - if err != nil { - return nil, errors.Wrap(err, "cannot create workload policies") - } - if len(conf.IngressPolicyIDs) > 0 { - conf.IngressPolicyIDs = append([]uint32{w.server.AllowFromHostPolicy.VppID}, conf.IngressPolicyIDs...) - conf.PolicyDefaultTx = npol.NPOL_DEFAULT_DENY - } else if len(conf.ProfileIDs) > 0 { - conf.PolicyDefaultTx = npol.NPOL_DEFAULT_PASS - } - if len(conf.EgressPolicyIDs) > 0 { - conf.PolicyDefaultRx = npol.NPOL_DEFAULT_DENY - } else if len(conf.ProfileIDs) > 0 { - conf.PolicyDefaultRx = npol.NPOL_DEFAULT_PASS - } - return conf, nil -} - -func (w *WorkloadEndpoint) Create(vpp *vpplink.VppLink, swIfIndexes []uint32, state *PolicyState, network string) (err error) { - conf, err := w.getWorkloadPolicies(state, network) - if err != nil { - return err - } - for _, swIfIndex := range swIfIndexes { - err = vpp.ConfigurePolicies(swIfIndex, conf, 0) - if err != nil { - return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) - } - } - - w.SwIfIndex = append(w.SwIfIndex, swIfIndexes...) - return nil -} - -func (w *WorkloadEndpoint) Update(vpp *vpplink.VppLink, new *WorkloadEndpoint, state *PolicyState, network string) (err error) { - conf, err := new.getWorkloadPolicies(state, network) - if err != nil { - return err - } - for _, swIfIndex := range w.SwIfIndex { - err = vpp.ConfigurePolicies(swIfIndex, conf, 0) - if err != nil { - return errors.Wrapf(err, "cannot configure policies on interface %d", swIfIndex) - } - } - // Update local policy with new data - w.Profiles = new.Profiles - w.Tiers = new.Tiers - return nil -} - -func (w *WorkloadEndpoint) Delete(vpp *vpplink.VppLink) (err error) { - if len(w.SwIfIndex) == 0 { - return fmt.Errorf("deleting unconfigured wep") - } - // Nothing to do in VPP, policies are cleared when the interface is removed - w.SwIfIndex = []uint32{} - return nil -} diff --git a/calico-vpp-agent/prometheus/prometheus.go b/calico-vpp-agent/prometheus/prometheus.go index e74eade5..f6ef9e63 100644 --- a/calico-vpp-agent/prometheus/prometheus.go +++ b/calico-vpp-agent/prometheus/prometheus.go @@ -49,7 +49,7 @@ type PrometheusServer struct { podInterfacesDetailsBySwifIndex map[uint32]podInterfaceDetails podInterfacesByKey map[string]model.LocalPodSpec statsclient *statsclient.StatsClient - channel chan common.CalicoVppEvent + channel chan any lock sync.Mutex httpServer *http.Server exporter *prometheusExporter.Exporter @@ -65,7 +65,7 @@ func NewPrometheusServer(vpp *vpplink.VppLink, log *logrus.Entry) *PrometheusSer server := &PrometheusServer{ log: log, vpp: vpp, - channel: make(chan common.CalicoVppEvent, 10), + channel: make(chan any, 10), podInterfacesByKey: make(map[string]model.LocalPodSpec), podInterfacesDetailsBySwifIndex: make(map[uint32]podInterfaceDetails), statsclient: statsclient.NewStatsClient("" /* default socket name */), @@ -415,7 +415,11 @@ func (p *PrometheusServer) ServePrometheus(t *tomb.Tomb) error { go func() { for t.Alive() { /* Note: we will only receive events we ask for when registering the chan */ - evt := <-p.channel + msg := <-p.channel + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch evt.Type { case common.PodAdded: podSpec, ok := evt.New.(*model.LocalPodSpec) diff --git a/calico-vpp-agent/routing/bgp_watcher.go b/calico-vpp-agent/routing/bgp_watcher.go index 56e69103..78f1f760 100644 --- a/calico-vpp-agent/routing/bgp_watcher.go +++ b/calico-vpp-agent/routing/bgp_watcher.go @@ -466,7 +466,11 @@ func (s *Server) WatchBGPPath(t *tomb.Tomb) error { stopBGPMonitoring() s.log.Infof("Routing Server asked to stop") return nil - case evt := <-s.routingServerEventChan: + case msg := <-s.routingServerEventChan: + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } /* Note: we will only receive events we ask for when registering the chan */ switch evt.Type { case common.LocalPodAddressAdded: diff --git a/calico-vpp-agent/routing/routing_server.go b/calico-vpp-agent/routing/routing_server.go index 046e1a28..89a5081b 100644 --- a/calico-vpp-agent/routing/routing_server.go +++ b/calico-vpp-agent/routing/routing_server.go @@ -55,7 +55,7 @@ type Server struct { bgpFilters map[string]*calicov3.BGPFilter bgpPeers map[string]*watchers.LocalBGPPeer - routingServerEventChan chan common.CalicoVppEvent + routingServerEventChan chan any nodeBGPSpec *common.LocalNodeSpec } @@ -82,7 +82,7 @@ func NewRoutingServer(vpp *vpplink.VppLink, bgpServer *bgpserver.BgpServer, log BGPServer: bgpServer, localAddressMap: make(map[string]localAddress), - routingServerEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + routingServerEventChan: make(chan any, common.ChanSize), bgpFilters: make(map[string]*calicov3.BGPFilter), bgpPeers: make(map[string]*watchers.LocalBGPPeer), } diff --git a/calico-vpp-agent/tests/mocks/pubsub_handler.go b/calico-vpp-agent/tests/mocks/pubsub_handler.go index 61ee919b..4027c4e4 100644 --- a/calico-vpp-agent/tests/mocks/pubsub_handler.go +++ b/calico-vpp-agent/tests/mocks/pubsub_handler.go @@ -21,7 +21,7 @@ import ( // PubSubHandlerMock is mocking the handlers registering to common.ThePubSub type PubSubHandlerMock struct { - eventChan chan common.CalicoVppEvent + eventChan chan any ReceivedEvents []common.CalicoVppEvent expectedEventTypes []common.CalicoVppEventType t tomb.Tomb @@ -30,7 +30,7 @@ type PubSubHandlerMock struct { // NewPubSubHandlerMock creates new instance of PubSubHandlerMock func NewPubSubHandlerMock(expectedEventTypes ...common.CalicoVppEventType) *PubSubHandlerMock { handler := &PubSubHandlerMock{ - eventChan: make(chan common.CalicoVppEvent, common.ChanSize), + eventChan: make(chan any, common.ChanSize), ReceivedEvents: make([]common.CalicoVppEvent, 0, 10), expectedEventTypes: expectedEventTypes, } @@ -56,7 +56,11 @@ func (m *PubSubHandlerMock) receiveLoop() error { case <-m.t.Dying(): close(m.eventChan) return nil - case event := <-m.eventChan: + case msg := <-m.eventChan: + event, ok := msg.(common.CalicoVppEvent) + if !ok { + panic("expected CalicoVppEventType") + } m.ReceivedEvents = append(m.ReceivedEvents, event) } } diff --git a/calico-vpp-agent/testutils/testutils.go b/calico-vpp-agent/testutils/testutils.go index e619c618..fad0023a 100644 --- a/calico-vpp-agent/testutils/testutils.go +++ b/calico-vpp-agent/testutils/testutils.go @@ -236,7 +236,8 @@ func DpoNetworkNameFieldName() string { // InterfaceTagForLocalTunTunnel constructs the tag for the VPP side of the tap tunnel the same way as cni server func InterfaceTagForLocalTunTunnel(interfaceName, netns string) string { - return InterfaceTagForLocalTunnel(podinterface.NewTunTapPodInterfaceDriver(nil, nil, nil).Name, + return InterfaceTagForLocalTunnel( + podinterface.NewTunTapPodInterfaceDriver(nil, nil, nil).Name, interfaceName, netns) } diff --git a/calico-vpp-agent/watchers/bgp_configuration_watcher.go b/calico-vpp-agent/watchers/bgp_configuration_watcher.go index c23e8f8a..c6996a82 100644 --- a/calico-vpp-agent/watchers/bgp_configuration_watcher.go +++ b/calico-vpp-agent/watchers/bgp_configuration_watcher.go @@ -35,7 +35,7 @@ import ( type BGPConfigurationWatcher struct { log *logrus.Entry clientv3 calicov3cli.Interface - BGPConfigurationWatcherEventChan chan common.CalicoVppEvent + BGPConfigurationWatcherEventChan chan any BGPConf *calicov3.BGPConfigurationSpec } @@ -43,7 +43,7 @@ func NewBGPConfigurationWatcher(clientv3 calicov3cli.Interface, log *logrus.Entr w := BGPConfigurationWatcher{ log: log, clientv3: clientv3, - BGPConfigurationWatcherEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + BGPConfigurationWatcherEventChan: make(chan any, common.ChanSize), } reg := common.RegisterHandler(w.BGPConfigurationWatcherEventChan, "BGP Config watcher events") reg.ExpectEvents(common.BGPConfChanged) @@ -132,7 +132,11 @@ func (w *BGPConfigurationWatcher) WatchBGPConfiguration(t *tomb.Tomb) error { case <-t.Dying(): w.log.Warn("BGPConf watcher stopped") return nil - case evt := <-w.BGPConfigurationWatcherEventChan: + case msg := <-w.BGPConfigurationWatcherEventChan: + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch evt.Type { case common.BGPConfChanged: oldBGPConf := w.BGPConf diff --git a/calico-vpp-agent/felix/messages.go b/calico-vpp-agent/watchers/felix.go similarity index 51% rename from calico-vpp-agent/felix/messages.go rename to calico-vpp-agent/watchers/felix.go index ff2a5846..cdb411f4 100644 --- a/calico-vpp-agent/felix/messages.go +++ b/calico-vpp-agent/watchers/felix.go @@ -1,4 +1,4 @@ -// Copyright (C) 2020 Cisco Systems Inc. +// Copyright (C) 2025 Cisco Systems Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -13,30 +13,114 @@ // See the License for the specific language governing permissions and // limitations under the License. -package felix +package watchers import ( "bytes" "encoding/binary" - "errors" + goerr "errors" "io" "net" + "os" + "github.com/pkg/errors" "github.com/projectcalico/calico/felix/proto" + "github.com/sirupsen/logrus" pb "google.golang.org/protobuf/proto" + "gopkg.in/tomb.v2" + + "github.com/projectcalico/vpp-dataplane/v3/calico-vpp-agent/common" + "github.com/projectcalico/vpp-dataplane/v3/config" ) -func (s *Server) MessageReader(conn net.Conn) <-chan interface{} { - ch := make(chan interface{}) +type FelixWatcher struct { + log *logrus.Entry + nextSeqNumber uint64 + felixServerEventChan chan any +} + +func NewFelixWatcher(felixServerEventChan chan any, log *logrus.Entry) *FelixWatcher { + return &FelixWatcher{ + log: log, + nextSeqNumber: 0, + felixServerEventChan: felixServerEventChan, + } +} + +// Serve runs the felix server +func (fw *FelixWatcher) WatchFelix(t *tomb.Tomb) error { + fw.log.Info("Starting felix Watcher") + // Cleanup potentially left over socket + err := os.RemoveAll(config.FelixDataplaneSocket) + if err != nil { + return errors.Wrapf(err, "Could not delete socket %s", config.FelixDataplaneSocket) + } + + listener, err := net.Listen("unix", config.FelixDataplaneSocket) + if err != nil { + return errors.Wrapf(err, "Could not bind to unix://%s", config.FelixDataplaneSocket) + } + defer func() { + listener.Close() + os.RemoveAll(config.FelixDataplaneSocket) + }() + for { + fw.felixServerEventChan <- &common.FelixSocketStateChanged{ + NewState: common.StateDisconnected, + } + // Accept only one connection + conn, err := listener.Accept() + if err != nil { + return errors.Wrap(err, "cannot accept felix client connection") + } + fw.log.Infof("Accepted connection from felix") + fw.felixServerEventChan <- &common.FelixSocketStateChanged{ + NewState: common.StateConnected, + } + + felixUpdates := fw.MessageReader(conn) + innerLoop: + for { + select { + case <-t.Dying(): + fw.log.Warn("Felix server exiting") + err = conn.Close() + if err != nil { + fw.log.WithError(err).Warn("Error closing unix connection to felix API proxy") + } + fw.log.Infof("Waiting for SyncFelix to stop...") + return nil + // <-felixUpdates & handleFelixUpdate does the bulk of the policy sync job. It starts by reconciling the current + // configured state in VPP (empty at first) with what is sent by felix, and once both are in + // sync, it keeps processing felix updates. It also sends endpoint updates to felix when the + // CNI component adds or deletes container interfaces. + case msg, ok := <-felixUpdates: + if !ok { + fw.log.Infof("Felix MessageReader closed") + break innerLoop + } + fw.felixServerEventChan <- msg + } + } + err = conn.Close() + if err != nil { + fw.log.WithError(err).Warn("Error closing unix connection to felix API proxy") + } + fw.log.Infof("SyncFelix exited, reconnecting to felix") + } +} + +func (fw *FelixWatcher) MessageReader(conn net.Conn) <-chan any { + ch := make(chan any) go func() { for { - msg, err := s.RecvMessage(conn) + msg, err := fw.RecvMessage(conn) if err != nil { - if errors.Is(err, io.EOF) && msg == nil { - s.log.Debug("EOF on felix-dataplane.sock") + if goerr.Is(err, io.EOF) && msg == nil { + fw.log.Debug("EOF on felix-dataplane.sock") } else { - s.log.Errorf("Error on felix-dataplane.sock err=%v msg=%v", err, msg) + fw.log.Errorf("Error on felix-dataplane.sock err=%v msg=%v", err, msg) } break } @@ -51,7 +135,7 @@ func (s *Server) MessageReader(conn net.Conn) <-chan interface{} { return ch } -func (s *Server) RecvMessage(conn net.Conn) (msg interface{}, err error) { +func (fw *FelixWatcher) RecvMessage(conn net.Conn) (msg interface{}, err error) { buf := make([]byte, 8) _, err = io.ReadFull(conn, buf) if err != nil { @@ -126,22 +210,22 @@ func (s *Server) RecvMessage(conn net.Conn) (msg interface{}, err error) { msg = payload.GlobalBgpConfigUpdate default: - s.log.WithField("payload", payload).Debug("Ignoring unknown message from felix") + fw.log.WithField("payload", payload).Debug("Ignoring unknown message from felix") } - s.log.WithField("msg", msg).Debug("Received message from dataplane.") + fw.log.WithField("msg", msg).Debug("Received message from dataplane.") return } -func (s *Server) SendMessage(conn net.Conn, msg interface{}) (err error) { - s.log.Debugf("Writing msg (%v) to felix: %#v", s.nextSeqNumber, msg) +func (fw *FelixWatcher) SendMessage(conn net.Conn, msg interface{}) (err error) { + fw.log.Debugf("Writing msg (%v) to felix: %#v", fw.nextSeqNumber, msg) // Wrap the payload message in an envelope so that protobuf takes care of deserialising // it as the correct type. envelope := &proto.FromDataplane{ - SequenceNumber: s.nextSeqNumber, + SequenceNumber: fw.nextSeqNumber, } - s.nextSeqNumber++ + fw.nextSeqNumber++ switch msg := msg.(type) { case *proto.ProcessStatusUpdate: envelope.Payload = &proto.FromDataplane_ProcessStatusUpdate{ProcessStatusUpdate: msg} @@ -156,13 +240,11 @@ func (s *Server) SendMessage(conn net.Conn, msg interface{}) (err error) { case *proto.WireguardStatusUpdate: envelope.Payload = &proto.FromDataplane_WireguardStatusUpdate{WireguardStatusUpdate: msg} default: - s.log.WithField("msg", msg).Panic("Unknown message type") + fw.log.WithField("msg", msg).Panic("Unknown message type") } data, err := pb.Marshal(envelope) - if err != nil { - s.log.WithError(err).WithField("msg", msg).Panic( - "Failed to marshal data") + fw.log.WithError(err).WithField("msg", msg).Panic("Failed to marshal data") } lengthBytes := make([]byte, 8) @@ -173,14 +255,43 @@ func (s *Server) SendMessage(conn net.Conn, msg interface{}) (err error) { for { _, err := messageBuf.WriteTo(conn) if err == io.ErrShortWrite { - s.log.Warn("Short write to felix; buffer full?") + fw.log.Warn("Short write to felix; buffer full?") continue } if err != nil { return err } - s.log.Debug("Wrote message to felix") + fw.log.Debug("Wrote message to felix") break } return nil } + +func InstallFelixPlugin() (err error) { + err = os.RemoveAll(config.FelixPluginDstPath) + if err != nil { + logrus.Warnf("Could not delete %s: %v", config.FelixPluginDstPath, err) + } + + in, err := os.Open(config.FelixPluginSrcPath) + if err != nil { + return errors.Wrap(err, "cannot open felix plugin to copy") + } + defer in.Close() + + out, err := os.OpenFile(config.FelixPluginDstPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0755) + if err != nil { + return errors.Wrap(err, "cannot open felix plugin to write") + } + defer func() { + cerr := out.Close() + if err == nil { + err = errors.Wrap(cerr, "cannot close felix plugin file") + } + }() + if _, err = io.Copy(out, in); err != nil { + return errors.Wrap(err, "cannot copy data") + } + err = out.Sync() + return errors.Wrapf(err, "could not sync felix plugin changes") +} diff --git a/calico-vpp-agent/watchers/net_watcher.go b/calico-vpp-agent/watchers/net_watcher.go index 0c58071e..d185d20d 100644 --- a/calico-vpp-agent/watchers/net_watcher.go +++ b/calico-vpp-agent/watchers/net_watcher.go @@ -36,28 +36,12 @@ import ( "github.com/projectcalico/vpp-dataplane/v3/vpplink" ) -type VRF struct { - Tables [2]uint32 // one for ipv4, one for ipv6 -} - -type NetworkDefinition struct { - // VRF is the main table used for the corresponding physical network - VRF VRF - // PodVRF is the table used for the pods in the corresponding physical network - PodVRF VRF - Vni uint32 - PhysicalNetworkName string - Name string - Range string - NetAttachDefs string -} - type NetWatcher struct { log *logrus.Entry vpp *vpplink.VppLink client client.WithWatch stop chan struct{} - networkDefinitions map[string]*NetworkDefinition + networkDefinitions map[string]*common.NetworkDefinition nads map[string]string InSync chan interface{} nodeBGPSpec *common.LocalNodeSpec @@ -78,7 +62,7 @@ func NewNetWatcher(vpp *vpplink.VppLink, log *logrus.Entry) *NetWatcher { vpp: vpp, client: *kubernetesClient, stop: make(chan struct{}), - networkDefinitions: make(map[string]*NetworkDefinition), + networkDefinitions: make(map[string]*common.NetworkDefinition), nads: make(map[string]string), InSync: make(chan interface{}), } @@ -319,7 +303,7 @@ func (w *NetWatcher) OnNetDeleted(netName string) error { return nil } -func (w *NetWatcher) CreateNetwork(networkName string, networkVni uint32, netRange string, phyNet string) (netDef *NetworkDefinition, err error) { +func (w *NetWatcher) CreateNetwork(networkName string, networkVni uint32, netRange string, phyNet string) (netDef *common.NetworkDefinition, err error) { /* Create and Setup the per-network VRF */ if _, ok := w.networkDefinitions[networkName]; ok { return w.networkDefinitions[networkName], nil @@ -327,9 +311,9 @@ func (w *NetWatcher) CreateNetwork(networkName string, networkVni uint32, netRan w.log.Infof("adding network %s", networkName) vrfID := common.VppManagerInfo.PhysicalNets[phyNet].VrfID podVrfID := common.VppManagerInfo.PhysicalNets[phyNet].PodVrfID - netDef = &NetworkDefinition{ - VRF: VRF{Tables: [2]uint32{vrfID, vrfID}}, - PodVRF: VRF{Tables: [2]uint32{podVrfID, podVrfID}}, + netDef = &common.NetworkDefinition{ + VRF: common.VRF{Tables: [2]uint32{vrfID, vrfID}}, + PodVRF: common.VRF{Tables: [2]uint32{podVrfID, podVrfID}}, Vni: uint32(networkVni), PhysicalNetworkName: phyNet, Name: networkName, @@ -338,7 +322,7 @@ func (w *NetWatcher) CreateNetwork(networkName string, networkVni uint32, netRan return netDef, nil } -func (w *NetWatcher) DeleteNetwork(networkName string) (*NetworkDefinition, error) { +func (w *NetWatcher) DeleteNetwork(networkName string) (*common.NetworkDefinition, error) { if _, ok := w.networkDefinitions[networkName]; !ok { return nil, errors.Errorf("non-existent network deleted: %s", networkName) } diff --git a/calico-vpp-agent/watchers/peers_watcher.go b/calico-vpp-agent/watchers/peers_watcher.go index 5c3d54f5..281e9551 100644 --- a/calico-vpp-agent/watchers/peers_watcher.go +++ b/calico-vpp-agent/watchers/peers_watcher.go @@ -68,7 +68,7 @@ type PeerWatcher struct { secretWatcher *secretWatcher nodeStatesByName map[string]common.LocalNodeSpec - peerWatcherEventChan chan common.CalicoVppEvent + peerWatcherEventChan chan any BGPConf *calicov3.BGPConfigurationSpec watcher watch.Interface currentWatchRevision string @@ -180,7 +180,11 @@ func (w *PeerWatcher) WatchBGPPeers(t *tomb.Tomb) error { default: w.log.Info("Peers updated, reevaluating peerings") } - case evt := <-w.peerWatcherEventChan: + case msg := <-w.peerWatcherEventChan: + evt, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } /* Note: we will only receive events we ask for when registering the chan */ switch evt.Type { case common.PeerNodeStateChanged: @@ -541,7 +545,7 @@ func NewPeerWatcher(clientv3 calicov3cli.Interface, k8sclient *kubernetes.Client clientv3: clientv3, nodeStatesByName: make(map[string]common.LocalNodeSpec), log: log, - peerWatcherEventChan: make(chan common.CalicoVppEvent, common.ChanSize), + peerWatcherEventChan: make(chan any, common.ChanSize), } w.secretWatcher, err = NewSecretWatcher(&w, k8sclient) if err != nil { diff --git a/calico-vpp-agent/watchers/uplink_route_watcher.go b/calico-vpp-agent/watchers/uplink_route_watcher.go index 8432cdac..048642fd 100644 --- a/calico-vpp-agent/watchers/uplink_route_watcher.go +++ b/calico-vpp-agent/watchers/uplink_route_watcher.go @@ -41,13 +41,13 @@ type RouteWatcher struct { addrNetlinkFailed chan struct{} addrUpdate chan struct{} closeLock sync.Mutex - eventChan chan common.CalicoVppEvent + eventChan chan any log *log.Entry } func NewRouteWatcher(log *log.Entry) *RouteWatcher { routeWatcher := &RouteWatcher{ - eventChan: make(chan common.CalicoVppEvent, common.ChanSize), + eventChan: make(chan any, common.ChanSize), log: log, } reg := common.RegisterHandler(routeWatcher.eventChan, "route watcher events") @@ -237,12 +237,16 @@ func (r *RouteWatcher) WatchRoutes(t *tomb.Tomb) error { } r.log.Warn("Route watcher stopped") return nil - case event := <-r.eventChan: + case msg := <-r.eventChan: + event, ok := msg.(common.CalicoVppEvent) + if !ok { + continue + } switch event.Type { case common.NetDeleted: - netDef, ok := event.Old.(*NetworkDefinition) + netDef, ok := event.Old.(*common.NetworkDefinition) if !ok { - r.log.Errorf("event.Old is not a (*NetworkDefinition) %v", event.Old) + r.log.Errorf("event.Old is not a (*common.NetworkDefinition) %v", event.Old) goto restart } key := netDef.Range @@ -259,9 +263,9 @@ func (r *RouteWatcher) WatchRoutes(t *tomb.Tomb) error { } } case common.NetAddedOrUpdated: - netDef, ok := event.New.(*NetworkDefinition) + netDef, ok := event.New.(*common.NetworkDefinition) if !ok { - r.log.Errorf("event.New is not a (*NetworkDefinition) %v", event.New) + r.log.Errorf("event.New is not a (*common.NetworkDefinition) %v", event.New) goto restart } key := netDef.Range diff --git a/config/config.go b/config/config.go index 2c584085..8720eb0f 100644 --- a/config/config.go +++ b/config/config.go @@ -45,6 +45,9 @@ const ( CalicoVppPidFile = "/var/run/vpp/calico_vpp.pid" CalicoVppVersionFile = "/etc/calicovppversion" + FelixPluginSrcPath = "/bin/felix-api-proxy" + FelixPluginDstPath = "/var/lib/calico/felix-plugins/felix-api-proxy" + DefaultVXLANVni = 4096 DefaultVXLANPort = 4789 DefaultWireguardPort = 51820