From 46ce82a0853c0d0125ca4d59486b28552d38a772 Mon Sep 17 00:00:00 2001 From: Mikhail Peshkov Date: Mon, 8 Dec 2025 12:39:32 +0700 Subject: [PATCH 1/6] Add stats pull scheme for nonreplicated partitions --- .../core/disk_registry_based_part_counters.h | 28 ++ .../partition_nonrepl/part_mirror_actor.cpp | 19 +- .../partition_nonrepl/part_mirror_actor.h | 20 ++ .../part_mirror_actor_stats.cpp | 202 +++++++++++--- .../part_mirror_resync_actor.cpp | 16 +- .../part_mirror_resync_actor.h | 21 ++ .../part_mirror_resync_actor_stats.cpp | 173 ++++++++++-- .../partition_nonrepl/part_nonrepl_actor.cpp | 12 +- .../partition_nonrepl/part_nonrepl_actor.h | 9 + .../part_nonrepl_actor_stats.cpp | 65 +++-- .../part_nonrepl_events_private.h | 63 +++++ .../part_nonrepl_migration_common_actor.cpp | 17 +- .../part_nonrepl_migration_common_actor.h | 21 ++ ...t_nonrepl_migration_common_actor_stats.cpp | 201 ++++++++++++-- .../part_nonrepl_rdma_actor.cpp | 12 +- .../part_nonrepl_rdma_actor.h | 8 + .../part_nonrepl_rdma_actor_stats.cpp | 53 +++- ...d_partition_statistics_collector_actor.cpp | 114 ++++++++ ...sed_partition_statistics_collector_actor.h | 50 ++++ .../libs/storage/volume/actors/ya.make | 2 + .../libs/storage/volume/volume_actor.cpp | 14 +- .../libs/storage/volume/volume_actor.h | 36 +++ .../storage/volume/volume_actor_stats.cpp | 120 ++++++-- .../libs/storage/volume/volume_ut_stats.cpp | 261 ++++++++++++++++++ 24 files changed, 1401 insertions(+), 136 deletions(-) create mode 100644 cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h create mode 100644 cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp create mode 100644 cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h diff --git a/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h b/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h new file mode 100644 index 00000000000..f18db5d13ef --- /dev/null +++ b/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h @@ -0,0 +1,28 @@ +#pragma once + +#include "disk_counters.h" + +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +struct TPartNonreplCountersData +{ + ui64 NetworkBytes; + TDuration CpuUsage; + TPartitionDiskCountersPtr DiskCounters; + + TPartNonreplCountersData( + ui64 networkBytes, + TDuration cpuUsage, + TPartitionDiskCountersPtr diskCounters) + : NetworkBytes(networkBytes) + , CpuUsage(cpuUsage) + , DiskCounters(std::move(diskCounters)) + {} +}; + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp index f457450d01f..610b9b68e8a 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp @@ -86,8 +86,9 @@ TMirrorPartitionActor::~TMirrorPartitionActor() = default; void TMirrorPartitionActor::Bootstrap(const TActorContext& ctx) { SetupPartitions(ctx); - ScheduleCountersUpdate(ctx); - + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } if (Config->GetDataScrubbingEnabled() && !ResyncActorId) { StartScrubbingRange(ctx, 0); } @@ -887,6 +888,15 @@ STFUNC(TMirrorPartitionActor::StateWork) HFunc(TEvPartition::TEvReleaseRange, HandleReleaseRange); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); IgnoreFunc(TEvents::TEvPoisonTaken); @@ -945,6 +955,11 @@ STFUNC(TMirrorPartitionActor::StateZombie) IgnoreFunc(TEvPartition::TEvReleaseRange); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); + IgnoreFunc(TEvents::TEvPoisonPill); HFunc(TEvents::TEvPoisonTaken, HandlePoisonTaken); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h index 9fd8d22500e..11a6f335d5f 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -113,6 +114,9 @@ class TMirrorPartitionActor final const size_t MultiAgentWriteRequestSizeThreshold = 0; size_t MultiAgentWriteRoundRobinSeed = 0; + TRequestInfoPtr StatisticRequestInfo; + ui64 StatisticSeqNo = 0; + public: TMirrorPartitionActor( TStorageConfigPtr config, @@ -150,6 +154,12 @@ class TMirrorPartitionActor final EWriteRequestType SuggestWriteRequestType( const NActors::TActorContext& ctx, TBlockRange64 range); + TPartNonreplCountersData ExtractPartCounters( + const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); private: STFUNC(StateWork); @@ -228,6 +238,16 @@ class TMirrorPartitionActor final const NActors::TEvents::TEvPoisonTaken::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + template void MirrorRequest( const typename TMethod::TRequest::TPtr& ev, diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 058abadfe6f..6bd85e942e9 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NCloud::NBlockStore::NStorage { @@ -9,21 +10,23 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TMirrorPartitionActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - const ui32 replicaIndex = State.GetReplicaIndex(ev->Sender); + const ui32 replicaIndex = State.GetReplicaIndex(sender); if (replicaIndex < ReplicaCounters.size()) { - ReplicaCounters[replicaIndex] = std::move(msg->DiskCounters); - NetworkBytes += msg->NetworkBytes; - CpuUsage += CpuUsage; + ReplicaCounters[replicaIndex] = + std::move(partCountersData.DiskCounters); + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; } else { - LOG_INFO(ctx, TBlockStoreComponents::PARTITION, + LOG_INFO( + ctx, + TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), State.GetReplicaInfos()[0].Config->GetName().Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); @@ -32,12 +35,25 @@ void TMirrorPartitionActor::HandlePartCounters( //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionActor::SendStats(const TActorContext& ctx) +void TMirrorPartitionActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) { - if (!StatActorId) { - return; - } + auto* msg = ev->Get(); + + TPartNonreplCountersData partCountersData( + msg->NetworkBytes, + msg->CpuUsage, + std::move(msg->DiskCounters)); + + UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); +} + +//////////////////////////////////////////////////////////////////////////////// +TPartNonreplCountersData TMirrorPartitionActor::ExtractPartCounters( + const TActorContext& ctx) +{ auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); @@ -54,12 +70,12 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) stats->Simple.IORequestsInFlight.Reset(); for (const auto& counters: ReplicaCounters) { if (counters) { - stats->Simple.BytesCount.Value = Max( - stats->Simple.BytesCount.Value, - counters->Simple.BytesCount.Value); - stats->Simple.IORequestsInFlight.Value = Max( - stats->Simple.IORequestsInFlight.Value, - counters->Simple.IORequestsInFlight.Value); + stats->Simple.BytesCount.Value = + Max(stats->Simple.BytesCount.Value, + counters->Simple.BytesCount.Value); + stats->Simple.IORequestsInFlight.Value = + Max(stats->Simple.IORequestsInFlight.Value, + counters->Simple.IORequestsInFlight.Value); } } @@ -67,23 +83,16 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) stats->Simple.ScrubbingProgress.Value = 100 * GetScrubbingRange().Start / State.GetBlockCount(); stats->Cumulative.ScrubbingThroughput.Value = ScrubbingThroughput; - auto request = - std::make_unique( - MakeIntrusive(), - std::move(stats), - DiskId, - NetworkBytes, - CpuUsage); + + TPartNonreplCountersData partCounters( + NetworkBytes, + CpuUsage, + std::move(stats)); NetworkBytes = 0; CpuUsage = {}; ScrubbingThroughput = 0; - NCloud::Send( - ctx, - StatActorId, - std::move(request)); - const bool scrubbingEnabled = Config->GetDataScrubbingEnabled() && !ResyncActorId; auto scrubberCounters = std::make_unique( @@ -95,6 +104,133 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) std::move(Fixed), std::move(FixedPartial)); NCloud::Send(ctx, StatActorId, std::move(scrubberCounters)); + + return partCounters; +} + +void TMirrorPartitionActor::SendStats(const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); + + auto request = + std::make_unique( + MakeIntrusive(), + std::move(diskCounters), + DiskId, + networkBytes, + cpuUsage); + + NCloud::Send( + ctx, + StatActorId, + std::move(request)); + +} + +//////////////////////////////////////////////////////////////////////////////// + +void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Mirror actor got new request"), + CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig + ->GetHistogramCounterOptions()), // diskCounters + 0, // networkBytes + TDuration{}, // cpuUsage + SelfId(), + DiskId)); + StatisticRequestInfo.Reset(); + } + + auto statActorIds = State.GetReplicaActorsBypassingProxies(); + + if (statActorIds.empty()) { + auto&& [networkBytes, cpuUsage, diskCounters] = + ExtractPartCounters(ctx); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError(E_INVALID_STATE, "Mirror actor hasn't replicas"), + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + DiskId)); + return; + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds), + ++StatisticSeqNo); +} + +void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate::TEvDiskRegistryBasedPartCountersCombined:: + TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to empty " + "StatisticRequestInfo.", + DiskId.Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if (msg->SeqNo < StatisticSeqNo) { + return; + } + + for (auto& counters: msg->Counters) { + TPartNonreplCountersData partCountersData( + counters.NetworkBytes, + counters.CpuUsage, + std::move(counters.DiskCounters)); + + UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + DiskId)); + + StatisticRequestInfo.Reset();; } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp index e3e5c857ab2..d703da47b40 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp @@ -62,7 +62,9 @@ TMirrorPartitionResyncActor::~TMirrorPartitionResyncActor() = default; void TMirrorPartitionResyncActor::Bootstrap(const TActorContext& ctx) { SetupPartitions(ctx); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } ContinueResyncIfNeeded(ctx); Become(&TThis::StateWork); @@ -310,6 +312,14 @@ STFUNC(TMirrorPartitionResyncActor::StateWork) TEvVolume::TEvDiskRegistryBasedPartitionCounters, HandlePartCounters); HFunc(TEvVolume::TEvScrubberCounters, HandleScrubberCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); @@ -357,6 +367,10 @@ STFUNC(TMirrorPartitionResyncActor::StateZombie) IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolume::TEvDiskRegistryBasedPartitionCounters); IgnoreFunc(TEvVolume::TEvScrubberCounters); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); IgnoreFunc(TEvents::TEvPoisonPill); HFunc(TEvents::TEvPoisonTaken, HandlePoisonTaken); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h index 01ecf12d107..adcb458209f 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -93,6 +94,9 @@ class TMirrorPartitionResyncActor final TBackoffDelayProvider BackoffProvider; + TRequestInfoPtr StatisticRequestInfo; + ui64 StatisticSeqNo = 0; + public: TMirrorPartitionResyncActor( TStorageConfigPtr config, @@ -135,6 +139,13 @@ class TMirrorPartitionResyncActor final bool IsAnybodyAlive() const; void ReplyAndDie(const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); + + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -227,6 +238,16 @@ class TMirrorPartitionResyncActor final const TEvNonreplPartitionPrivate::TEvGetDeviceForRangeRequest::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); BLOCKSTORE_IMPLEMENT_REQUEST(WriteBlocks, TEvService); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocksLocal, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp index 85cc4346c11..4aef888984a 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NCloud::NBlockStore::NStorage { @@ -9,15 +10,14 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionResyncActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TMirrorPartitionResyncActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - bool knownSender = ev->Sender == MirrorActorId; + bool knownSender = sender == MirrorActorId; for (const auto& replica: Replicas) { - knownSender |= replica.ActorId == ev->Sender; + knownSender |= replica.ActorId == sender; } if (!knownSender) { @@ -25,7 +25,7 @@ void TMirrorPartitionResyncActor::HandlePartCounters( ctx, TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), PartConfig->GetName().Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); @@ -33,13 +33,29 @@ void TMirrorPartitionResyncActor::HandlePartCounters( } if (!MirrorCounters) { - MirrorCounters = std::move(msg->DiskCounters); + MirrorCounters = std::move(partCountersData.DiskCounters); } else { - MirrorCounters->AggregateWith(*msg->DiskCounters); + MirrorCounters->AggregateWith(*partCountersData.DiskCounters); } - NetworkBytes += msg->NetworkBytes; - CpuUsage += msg->CpuUsage; + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; +} + +//////////////////////////////////////////////////////////////////////////////// + +void TMirrorPartitionResyncActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + TPartNonreplCountersData partCountersData( + msg->NetworkBytes, + msg->CpuUsage, + std::move(msg->DiskCounters)); + + UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); } void TMirrorPartitionResyncActor::HandleScrubberCounters( @@ -49,9 +65,119 @@ void TMirrorPartitionResyncActor::HandleScrubberCounters( ForwardMessageToActor(ev, ctx, StatActorId); } +void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Mirror resync actor got new request"), + CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig + ->GetHistogramCounterOptions()), // diskCounters + 0, // networkBytes + TDuration{}, // cpuUsage + SelfId(), + PartConfig->GetName())); + StatisticRequestInfo.Reset(); + } + + if (!MirrorActorId && Replicas.empty()) { + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError( + E_INVALID_STATE, + "Part mirror resync actor hasn't replicas and mirror " + "actor"), + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + PartConfig->GetName())); + return; + } + + TVector statActorIds; + + if (MirrorActorId) { + statActorIds.push_back(MirrorActorId); + } + + for (const auto& replica: Replicas) { + statActorIds.push_back(replica.ActorId); + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds), + ++StatisticSeqNo); +} + +void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate::TEvDiskRegistryBasedPartCountersCombined:: + TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror resync actor statistics due to empty " + "StatisticRequestInfo.", + PartConfig->GetName().Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if (msg->SeqNo < StatisticSeqNo) { + return; + } + + for (auto& counters: msg->Counters) { + TPartNonreplCountersData partCountersData( + counters.NetworkBytes, + counters.CpuUsage, + std::move(counters.DiskCounters)); + + UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + PartConfig->GetName())); + + StatisticRequestInfo.Reset();; +} + //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TMirrorPartitionResyncActor::ExtractPartCounters() { auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, @@ -62,18 +188,27 @@ void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) MirrorCounters.reset(); } + TPartNonreplCountersData counters(NetworkBytes, CpuUsage, std::move(stats)); + + NetworkBytes = 0; + CpuUsage = TDuration(); + + return counters; +} + +void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) +{ + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + auto request = std::make_unique( MakeIntrusive(), - std::move(stats), + std::move(diskCounters), PartConfig->GetName(), - NetworkBytes, - CpuUsage); + networkBytes, + cpuUsage); NCloud::Send(ctx, StatActorId, std::move(request)); - - NetworkBytes = 0; - CpuUsage = TDuration(); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp index cd266f6754d..d264fafd2fc 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp @@ -188,7 +188,9 @@ TRequestTimeoutPolicy TNonreplicatedPartitionActor::MakeTimeoutPolicyForRequest( void TNonreplicatedPartitionActor::Bootstrap(const TActorContext& ctx) { Become(&TThis::StateWork); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } } bool TNonreplicatedPartitionActor::CheckReadWriteBlockRange(const TBlockRange64& range) const @@ -731,6 +733,11 @@ STFUNC(TNonreplicatedPartitionActor::StateWork) HFunc(TEvNonreplPartitionPrivate::TEvAgentIsUnavailable, HandleAgentIsUnavailable); HFunc(TEvNonreplPartitionPrivate::TEvAgentIsBackOnline, HandleAgentIsBackOnline); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); @@ -790,6 +797,9 @@ STFUNC(TNonreplicatedPartitionActor::StateZombie) IgnoreFunc(TEvVolumePrivate::TEvDeviceTimedOutResponse); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h index a4d8daa4569..c7eff47fcbf 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -111,6 +112,9 @@ class TNonreplicatedPartitionActor final void ReplyAndDie(const NActors::TActorContext& ctx); + TPartNonreplCountersData ExtractPartCounters( + const NActors::TActorContext& ctx); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -198,6 +202,11 @@ class TNonreplicatedPartitionActor final const TEvNonreplPartitionPrivate::TEvAgentIsBackOnline::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + bool HandleRequests(STFUNC_SIG); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp index aecc53df0bc..e670218855e 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp @@ -21,31 +21,23 @@ void TNonreplicatedPartitionActor::UpdateStats( //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TNonreplicatedPartitionActor::ExtractPartCounters( + const TActorContext& ctx) { - if (!StatActorId) { - return; - } - PartCounters->Simple.IORequestsInFlight.Set( - RequestsInProgress.GetRequestCount() - ); + RequestsInProgress.GetRequestCount()); PartCounters->Simple.BytesCount.Set( - PartConfig->GetBlockCount() * PartConfig->GetBlockSize() - ); + PartConfig->GetBlockCount() * PartConfig->GetBlockSize()); PartCounters->Simple.HasBrokenDevice.Set( CalculateHasBrokenDeviceCounterValue(ctx, false)); PartCounters->Simple.HasBrokenDeviceSilent.Set( CalculateHasBrokenDeviceCounterValue(ctx, true)); - auto request = - std::make_unique( - MakeIntrusive(), - std::move(PartCounters), - PartConfig->GetName(), - NetworkBytes, - CpuUsage); + TPartNonreplCountersData counters( + NetworkBytes, + CpuUsage, + std::move(PartCounters)); NetworkBytes = 0; CpuUsage = {}; @@ -54,7 +46,48 @@ void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); + return counters; +} + +void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); + + auto request = + std::make_unique( + MakeIntrusive(), + std::move(diskCounters), + PartConfig->GetName(), + networkBytes, + cpuUsage); + NCloud::Send(ctx, StatActorId, std::move(request)); } +//////////////////////////////////////////////////////////////////////////////// + +void TNonreplicatedPartitionActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + PartConfig->GetName())); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h index f1d0f41ab79..87ebfe99f1d 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -390,6 +391,52 @@ struct TEvNonreplPartitionPrivate {} }; + // + // GetDiskRegistryBasedPartCounters + // + + struct TGetDiskRegistryBasedPartCountersRequest + { + TGetDiskRegistryBasedPartCountersRequest() = default; + }; + + struct TGetDiskRegistryBasedPartCountersResponse + { + TPartitionDiskCountersPtr DiskCounters; + ui64 NetworkBytes; + TDuration CpuUsage; + NActors::TActorId ActorId; + TString DiskId; + + TGetDiskRegistryBasedPartCountersResponse( + TPartitionDiskCountersPtr diskCounters, + ui64 networkBytes, + TDuration cpuUsage, + const NActors::TActorId& actorId, + TString diskId) + : DiskCounters(std::move(diskCounters)) + , NetworkBytes(networkBytes) + , CpuUsage(cpuUsage) + , ActorId(actorId) + , DiskId(std::move(diskId)) + {} + }; + + // + // DiskRegistryBasedPartCountersCombined + // + + struct TDiskRegistryBasedPartCountersCombined + { + const ui64 SeqNo; + + TVector Counters; + + explicit TDiskRegistryBasedPartCountersCombined(ui64 seqNo) + : SeqNo(seqNo) + {} + }; + // // Events declaration // @@ -423,6 +470,10 @@ struct TEvNonreplPartitionPrivate EvLaggingMigrationDisabled, EvLaggingMigrationEnabled, EvInconsistentDiskAgent, + EvGetDiskRegistryBasedPartCountersRequest, + EvGetDiskRegistryBasedPartCountersResponse, + EvDiskRegistryBasedPartCountersCombined, + BLOCKSTORE_PARTITION_NONREPL_REQUESTS_PRIVATE(BLOCKSTORE_DECLARE_EVENT_IDS) @@ -524,6 +575,18 @@ struct TEvNonreplPartitionPrivate using TEvInconsistentDiskAgent = TRequestEvent; + using TEvGetDiskRegistryBasedPartCountersRequest = TRequestEvent< + TGetDiskRegistryBasedPartCountersRequest, + EvGetDiskRegistryBasedPartCountersRequest>; + + using TEvGetDiskRegistryBasedPartCountersResponse = TResponseEvent< + TGetDiskRegistryBasedPartCountersResponse, + EvGetDiskRegistryBasedPartCountersResponse>; + + using TEvDiskRegistryBasedPartCountersCombined = TResponseEvent< + TDiskRegistryBasedPartCountersCombined, + EvDiskRegistryBasedPartCountersCombined>; + BLOCKSTORE_PARTITION_NONREPL_REQUESTS_PRIVATE(BLOCKSTORE_DECLARE_PROTO_EVENTS) }; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp index 2933e1a899f..696d2884cda 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp @@ -96,7 +96,9 @@ TNonreplicatedPartitionMigrationCommonActor:: void TNonreplicatedPartitionMigrationCommonActor::Bootstrap( const TActorContext& ctx) { - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } Become(&TThis::StateWork); @@ -306,6 +308,15 @@ STFUNC(TNonreplicatedPartitionMigrationCommonActor::StateWork) TEvVolume::TEvDeleteCheckpointDataRequest, HandleDeleteCheckpointData); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); + HFunc(NActors::TEvents::TEvWakeup, HandleWakeup); HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); @@ -365,6 +376,10 @@ STFUNC(TNonreplicatedPartitionMigrationCommonActor::StateZombie) IgnoreFunc(TEvVolume::TEvMigrationStateUpdated); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolume::TEvDiskRegistryBasedPartitionCounters); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); IgnoreFunc(TEvStatsServicePrivate::TEvRegisterTrafficSourceResponse); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h index fe00000b8e9..0c50f8d7807 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -187,6 +188,9 @@ class TNonreplicatedPartitionMigrationCommonActor TBackoffDelayProvider BackoffProvider; + TRequestInfoPtr StatisticRequestInfo; + ui64 StatisticSeqNo = 0; + protected: // Derived class that wishes to handle wakeup messages should make its own // enum which starts with `WR_REASON_COUNT` value. @@ -300,6 +304,13 @@ class TNonreplicatedPartitionMigrationCommonActor void OnMigrationNonRetriableError(const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); + + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -336,6 +347,16 @@ class TNonreplicatedPartitionMigrationCommonActor const NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + template void MirrorRequest( const typename TMethod::TRequest::TPtr& ev, diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp index 6b4ec4e3edd..eea3ebead71 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp @@ -1,6 +1,7 @@ #include "part_nonrepl_migration_common_actor.h" #include +#include namespace NCloud::NBlockStore::NStorage { @@ -8,37 +9,50 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionMigrationCommonActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TNonreplicatedPartitionMigrationCommonActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - if (ev->Sender == SrcActorId) { - SrcCounters = std::move(msg->DiskCounters); - } else if (ev->Sender == DstActorId) { - DstCounters = std::move(msg->DiskCounters); + if (sender == SrcActorId) { + SrcCounters = std::move(partCountersData.DiskCounters); + } else if (sender == DstActorId) { + DstCounters = std::move(partCountersData.DiskCounters); } else { - LOG_INFO(ctx, TBlockStoreComponents::PARTITION, + LOG_INFO( + ctx, + TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), DiskId.Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); } - NetworkBytes += msg->NetworkBytes; - CpuUsage += msg->CpuUsage; + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; } //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionMigrationCommonActor::SendStats( +void TNonreplicatedPartitionMigrationCommonActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, const TActorContext& ctx) { - if (!StatActorId) { - return; - } + auto* msg = ev->Get(); + + TPartNonreplCountersData partCountersData( + msg->NetworkBytes, + msg->CpuUsage, + std::move(msg->DiskCounters)); + + UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); +} + +//////////////////////////////////////////////////////////////////////////////// +TPartNonreplCountersData +TNonreplicatedPartitionMigrationCommonActor::ExtractPartCounters() +{ auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); @@ -54,12 +68,12 @@ void TNonreplicatedPartitionMigrationCommonActor::SendStats( if (SrcCounters && DstActorId && DstCounters) { // for some counters default AggregateWith logic is suboptimal for // mirrored partitions - stats->Simple.BytesCount.Value = Max( - SrcCounters->Simple.BytesCount.Value, - DstCounters->Simple.BytesCount.Value); - stats->Simple.IORequestsInFlight.Value = Max( - SrcCounters->Simple.IORequestsInFlight.Value, - DstCounters->Simple.IORequestsInFlight.Value); + stats->Simple.BytesCount.Value = + Max(SrcCounters->Simple.BytesCount.Value, + DstCounters->Simple.BytesCount.Value); + stats->Simple.IORequestsInFlight.Value = + Max(SrcCounters->Simple.IORequestsInFlight.Value, + DstCounters->Simple.IORequestsInFlight.Value); } stats->AggregateWith(*MigrationCounters); @@ -67,18 +81,147 @@ void TNonreplicatedPartitionMigrationCommonActor::SendStats( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); + TPartNonreplCountersData counters(NetworkBytes, CpuUsage, std::move(stats)); + + NetworkBytes = 0; + CpuUsage = {}; + + return counters; +} + +void TNonreplicatedPartitionMigrationCommonActor::SendStats( + const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + auto request = std::make_unique( MakeIntrusive(), - std::move(stats), + std::move(diskCounters), DiskId, - NetworkBytes, - CpuUsage); - - NetworkBytes = 0; - CpuUsage = {}; + networkBytes, + cpuUsage); NCloud::Send(ctx, StatActorId, std::move(request)); } +//////////////////////////////////////////////////////////////////////////////// + +void TNonreplicatedPartitionMigrationCommonActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Migration actor got new request"), + CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig + ->GetHistogramCounterOptions()), // diskCounters + 0, // networkBytes + TDuration{}, // cpuUsage + SelfId(), + DiskId)); + StatisticRequestInfo.Reset(); + } + + TVector statActorIds; + + if (SrcActorId) { + statActorIds.push_back(SrcActorId); + } + + if (DstActorId) { + statActorIds.push_back(DstActorId); + } + + if (statActorIds.empty()) { + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError( + E_INVALID_STATE, + "Nonreplicated migration actor hasn't src and dst " + "actors"), + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + DiskId)); + + return; + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds), + ++StatisticSeqNo); +} + +void TNonreplicatedPartitionMigrationCommonActor:: + HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send migration actor statistics due to empty " + "StatisticRequestInfo.", + DiskId.Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if (msg->SeqNo < StatisticSeqNo) { + return; + } + + for (auto& counters: msg->Counters) { + TPartNonreplCountersData partCountersData( + counters.NetworkBytes, + counters.CpuUsage, + std::move(counters.DiskCounters)); + + UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + } + + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + DiskId)); + + StatisticRequestInfo.Reset(); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp index 998f2a01eb6..652484d769c 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp @@ -75,7 +75,9 @@ void TNonreplicatedPartitionRdmaActor::Bootstrap(const TActorContext& ctx) } Become(&TThis::StateWork); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } ctx.Schedule( Config->GetNonReplicatedMinRequestTimeoutSSD(), new TEvents::TEvWakeup()); @@ -905,6 +907,11 @@ STFUNC(TNonreplicatedPartitionRdmaActor::StateWork) TEvVolumePrivate::TEvDeviceTimedOutResponse, HandleDeviceTimedOutResponse); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( @@ -963,6 +970,9 @@ STFUNC(TNonreplicatedPartitionRdmaActor::StateZombie) IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolumePrivate::TEvDeviceTimedOutResponse); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h index bbae963e208..c5190c66c5b 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -133,6 +134,8 @@ class TNonreplicatedPartitionRdmaActor final void ReplyAndDie(const NActors::TActorContext& ctx); + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -207,6 +210,11 @@ class TNonreplicatedPartitionRdmaActor final const TEvVolumePrivate::TEvDeviceTimedOutResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + bool HandleRequests(STFUNC_SIG); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp index 610348d0838..8dfeb28de15 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp @@ -21,21 +21,17 @@ void TNonreplicatedPartitionRdmaActor::UpdateStats( //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TNonreplicatedPartitionRdmaActor::ExtractPartCounters() { PartCounters->Simple.IORequestsInFlight.Set( RequestsInProgress.GetRequestCount()); PartCounters->Simple.BytesCount.Set( - PartConfig->GetBlockCount() * PartConfig->GetBlockSize() - ); + PartConfig->GetBlockCount() * PartConfig->GetBlockSize()); - auto request = - std::make_unique( - MakeIntrusive(), - std::move(PartCounters), - PartConfig->GetName(), - NetworkBytes, - CpuUsage); + TPartNonreplCountersData counters( + NetworkBytes, + CpuUsage, + std::move(PartCounters)); NetworkBytes = 0; CpuUsage = {}; @@ -44,7 +40,44 @@ void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); + return counters; +} + +void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) +{ + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + auto request = + std::make_unique( + MakeIntrusive(), + std::move(diskCounters), + PartConfig->GetName(), + networkBytes, + cpuUsage); + NCloud::Send(ctx, StatActorId, std::move(request)); } +//////////////////////////////////////////////////////////////////////////////// + +void TNonreplicatedPartitionRdmaActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + std::move(diskCounters), + networkBytes, + cpuUsage, + SelfId(), + PartConfig->GetName())); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp new file mode 100644 index 00000000000..d7fb1dbdf46 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -0,0 +1,114 @@ +#include "disk_registry_based_partition_statistics_collector_actor.h" + +#include +#include + +#include + +using namespace NActors; + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +TDiskRegistryBasedPartitionStatisticsCollectorActor:: + TDiskRegistryBasedPartitionStatisticsCollectorActor( + const TActorId& owner, + TVector statActorIds, + ui64 seqNo) + : Owner(owner) + , StatActorIds(std::move(statActorIds)) + , Response(seqNo) +{} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::Bootstrap( + const TActorContext& ctx) +{ + Become(&TThis::StateWork); + + for (const auto& statActorId: StatActorIds) { + NCloud::Send( + ctx, + statActorId, + std::make_unique()); + } + + ctx.Schedule(UpdateCountersInterval, new TEvents::TEvWakeup()); +} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::SendStatistics( + const TActorContext& ctx) +{ + NCloud::Send( + ctx, + Owner, + std::make_unique( + std::move(LastError), + std::move(Response))); + + Die(ctx); +} + +//////////////////////////////////////////////////////////////////////////////// + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + NCloud::Send( + ctx, + Owner, + std::make_unique( + MakeError( + E_TIMEOUT, + "Failed to update disk registry based partition counters."), + std::move(Response))); + + Die(ctx); +} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor:: + HandleGetDiskRegistryBasedPartCountersResponse( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + if (HasError(msg->Error)) { + LastError = msg->Error; + } + + Response.Counters.push_back(std::move(*msg)); + + if (Response.Counters.size() == StatActorIds.size()) { + SendStatistics(ctx); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +STFUNC(TDiskRegistryBasedPartitionStatisticsCollectorActor::StateWork) +{ + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvWakeup, HandleTimeout); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse, + HandleGetDiskRegistryBasedPartCountersResponse) + + default: + HandleUnexpectedEvent( + ev, + TBlockStoreComponents::VOLUME, + __PRETTY_FUNCTION__); + break; + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h new file mode 100644 index 00000000000..3908734ec95 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h @@ -0,0 +1,50 @@ +#pragma once + +#include + +#include +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +/////////////////////////////////////////////////////////////////////////////// + +class TDiskRegistryBasedPartitionStatisticsCollectorActor final + : public NActors::TActorBootstrapped< + TDiskRegistryBasedPartitionStatisticsCollectorActor> +{ +private: + const NActors::TActorId Owner; + + const TVector StatActorIds; + + TEvNonreplPartitionPrivate::TDiskRegistryBasedPartCountersCombined Response; + + NProto::TError LastError; + +public: + TDiskRegistryBasedPartitionStatisticsCollectorActor( + const NActors::TActorId& owner, + TVector statActorIds, + ui64 seqNo); + + void Bootstrap(const NActors::TActorContext& ctx); + +private: + void SendStatistics(const NActors::TActorContext& ctx); + +private: + STFUNC(StateWork); + + void HandleTimeout( + const NActors::TEvents::TEvWakeup::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleGetDiskRegistryBasedPartCountersResponse( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const NActors::TActorContext& ctx); +}; + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/actors/ya.make b/cloud/blockstore/libs/storage/volume/actors/ya.make index 3110e8bc38a..992dd61f3d6 100644 --- a/cloud/blockstore/libs/storage/volume/actors/ya.make +++ b/cloud/blockstore/libs/storage/volume/actors/ya.make @@ -14,6 +14,8 @@ SRCS( release_devices_actor.cpp shadow_disk_actor.cpp volume_as_partition_actor.cpp + partition_statistics_collector_actor.cpp + disk_registry_based_partition_statistics_collector_actor.cpp ) PEERDIR( diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.cpp b/cloud/blockstore/libs/storage/volume/volume_actor.cpp index 900fa0e96b9..8099d0a82f8 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor.cpp @@ -680,12 +680,13 @@ void TVolumeActor::HandleUpdateCounters( // if we use pull scheme we must send request to the partitions // to collect statistics - if (Config->GetUsePullSchemeForVolumeStatistics() && - State && !State->IsDiskRegistryMediaKind() && + if (Config->GetUsePullSchemeForVolumeStatistics() && State && GetVolumeStatus() != EStatus::STATUS_INACTIVE) { ScheduleRegularUpdates(ctx); - SendStatisticRequests(ctx); + State->IsDiskRegistryMediaKind() + ? SendStatisticRequestForDiskRegistryBasedPartition(ctx) + : SendStatisticRequests(ctx); return; } @@ -1128,6 +1129,11 @@ STFUNC(TVolumeActor::StateWork) HFunc( TEvVolumePrivate::TEvDiskRegistryDeviceOperationFinished, HandleDiskRegistryDeviceOperationFinished); + + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse, + HandleGetDiskRegistryBasedPartCountersResponse); IgnoreFunc(TEvLocal::TEvTabletMetrics); @@ -1197,6 +1203,8 @@ STFUNC(TVolumeActor::StateZombie) IgnoreFunc(TEvPartitionCommonPrivate::TEvPartCountersCombined); IgnoreFunc(TEvService::TEvDestroyVolumeResponse); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse); IgnoreFunc(TEvVolumePrivate::TEvDiskRegistryDeviceOperationStarted); IgnoreFunc(TEvVolumePrivate::TEvDiskRegistryDeviceOperationFinished); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.h b/cloud/blockstore/libs/storage/volume/volume_actor.h index 56e4a1cc6d8..eeb41a34891 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.h +++ b/cloud/blockstore/libs/storage/volume/volume_actor.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include #include +#include #include #include @@ -434,6 +436,28 @@ class TVolumeActor final {} }; + struct TDataForUpdatingDiskRegistryBasedPartCounters + { + const NActors::TActorId Sender; + const ui64 Cookie; + const TString DiskId; + TCallContextPtr CallContext; + TPartNonreplCountersData PartCountersData; + + TDataForUpdatingDiskRegistryBasedPartCounters( + const NActors::TActorId& sender, + ui64 cookie, + TString diskId, + TCallContextPtr callContext, + TPartNonreplCountersData partCountersData) + : Sender(sender) + , Cookie(cookie) + , DiskId(std::move(diskId)) + , CallContext(std::move(callContext)) + , PartCountersData(std::move(partCountersData)) + {} + }; + public: TVolumeActor( const NActors::TActorId& owner, @@ -674,12 +698,19 @@ class TVolumeActor final void SendStatisticRequests(const NActors::TActorContext& ctx); + void SendStatisticRequestForDiskRegistryBasedPartition( + const NActors::TActorContext& ctx); + void CleanupHistory( const NActors::TActorContext& ctx, const NActors::TActorId& sender, ui64 cookie, TCallContextPtr callContext); + void UpdateDiskRegistryBasedPartCounters( + const NActors::TActorContext& ctx, + TDataForUpdatingDiskRegistryBasedPartCounters data); + const TString& GetDiskId() const; private: @@ -1303,6 +1334,11 @@ class TVolumeActor final TPoisonCallback onPartitionStopped); void StartPartitionsImpl(const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCountersResponse( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const NActors::TActorContext& ctx); + BLOCKSTORE_VOLUME_REQUESTS(BLOCKSTORE_IMPLEMENT_REQUEST, TEvVolume) BLOCKSTORE_VOLUME_REQUESTS_PRIVATE(BLOCKSTORE_IMPLEMENT_REQUEST, TEvVolumePrivate) BLOCKSTORE_VOLUME_REQUESTS_FWD_SERVICE(BLOCKSTORE_IMPLEMENT_REQUEST, TEvService) diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp index 23329bc8c73..90d9668de50 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp @@ -220,24 +220,24 @@ void TVolumeActor::HandleScrubberCounters( State->UpdateScrubberCounters(std::move(scrubbingInfo)); } -void TVolumeActor::HandleDiskRegistryBasedPartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TVolumeActor::UpdateDiskRegistryBasedPartCounters( + const TActorContext& ctx, + TDataForUpdatingDiskRegistryBasedPartCounters data) { Y_DEBUG_ABORT_UNLESS(State->IsDiskRegistryMediaKind()); - auto* msg = ev->Get(); - if (auto* resourceMetrics = GetResourceMetrics(); resourceMetrics) { bool changed = false; - if (msg->CpuUsage) { + if (data.PartCountersData.CpuUsage) { resourceMetrics->CPU.Increment( - msg->CpuUsage.MicroSeconds(), + data.PartCountersData.CpuUsage.MicroSeconds(), ctx.Now()); changed = true; } - if (msg->NetworkBytes) { - resourceMetrics->Network.Increment(msg->NetworkBytes, ctx.Now()); + if (data.PartCountersData.NetworkBytes) { + resourceMetrics->Network.Increment( + data.PartCountersData.NetworkBytes, + ctx.Now()); changed = true; } @@ -246,13 +246,10 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( } } - auto requestInfo = CreateRequestInfo( - ev->Sender, - ev->Cookie, - msg->CallContext - ); + auto requestInfo = + CreateRequestInfo(data.Sender, data.Cookie, data.CallContext); - auto* statInfo = State->GetPartitionStatByDiskId(msg->DiskId); + auto* statInfo = State->GetPartitionStatByDiskId(data.DiskId); if (!statInfo) { LOG_INFO( @@ -260,8 +257,8 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( TBlockStoreComponents::VOLUME, "%s Counters from partition %s (%s) do not belong to disk", LogTitle.GetWithTime().c_str(), - ToString(ev->Sender).c_str(), - msg->DiskId.Quote().c_str()); + ToString(data.Sender).c_str(), + data.DiskId.Quote().c_str()); return; } @@ -271,11 +268,13 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( DiagnosticsConfig->GetHistogramCounterOptions()); } - statInfo->LastCounters->Add(*msg->DiskCounters); + statInfo->LastCounters->Add(*data.PartCountersData.DiskCounters); - UpdateCachedStats(*msg->DiskCounters, statInfo->CachedCounters); + UpdateCachedStats( + *data.PartCountersData.DiskCounters, + statInfo->CachedCounters); CopyPartCountersToCachedStats( - *msg->DiskCounters, + *data.PartCountersData.DiskCounters, statInfo->CachedCountersProto); TVolumeDatabase::TPartStats partStats; @@ -287,6 +286,59 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( std::move(partStats)); } +void TVolumeActor::HandleDiskRegistryBasedPartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) +{ + + auto* msg = ev->Get(); + + TPartNonreplCountersData partCountersData( + msg->NetworkBytes, + msg->CpuUsage, + std::move(msg->DiskCounters)); + + TDataForUpdatingDiskRegistryBasedPartCounters data( + ev->Sender, + ev->Cookie, + std::move(msg->DiskId), + std::move(msg->CallContext), + std::move(partCountersData)); + + UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); +} + +void TVolumeActor::HandleGetDiskRegistryBasedPartCountersResponse( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + if (HasError(msg->Error)) { + LOG_ERROR( + ctx, + TBlockStoreComponents::VOLUME, + "%s Failed to update disk registry based part counters. Error: %s", + LogTitle.GetWithTime().c_str(), + FormatError(msg->Error).c_str()); + } + + TPartNonreplCountersData partCountersData( + msg->NetworkBytes, + msg->CpuUsage, + std::move(msg->DiskCounters)); + + TDataForUpdatingDiskRegistryBasedPartCounters data( + msg->ActorId, + ev->Cookie, + std::move(msg->DiskId), + MakeIntrusive(), + std::move(partCountersData)); + + UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); +} + std::optional TVolumeActor::UpdatePartCounters( const TActorContext& ctx, TPartCountersData& partCountersData) @@ -454,6 +506,18 @@ void TVolumeActor::CompleteSavePartStats( LogTitle.GetWithTime().c_str(), args.PartStats.TabletId); + if (Config->GetUsePullSchemeForVolumeStatistics() && + State->IsDiskRegistryMediaKind()) + { + UpdateCounters(ctx); + CleanupHistory( + ctx, + SelfId(), // sender + 0, // cookie + MakeIntrusive() // callContext + ); + } + NCloud::Send( ctx, SelfId(), @@ -829,4 +893,20 @@ void TVolumeActor::CleanupHistory( Config->GetVolumeHistoryCleanupItemCount()); } +void TVolumeActor::SendStatisticRequestForDiskRegistryBasedPartition( + const TActorContext& ctx) +{ + STORAGE_VERIFY_C( + State->GetDiskRegistryBasedPartitionActor(), + TWellKnownEntityTypes::TABLET, + TabletID(), + "Empty disk registry based partition actor"); + + NCloud::Send( + ctx, + State->GetDiskRegistryBasedPartitionActor(), + std::make_unique()); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp index e36429484ed..dffaa4cbe94 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp @@ -1508,6 +1508,267 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) 1, 0); } + + Y_UNIT_TEST(ShouldPullStatisticsFromDiskRegistryBasedPartitionWithResync) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetAcquireNonReplicatedDevices(true); + config.SetUseMirrorResync(true); + config.SetForceMirrorResync(true); + + auto state = MakeIntrusive(); + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(TDuration::Seconds(1)); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } + + Y_UNIT_TEST( + ShouldPullStatisticsFromDiskRegistryBasedPartitionWithMigration) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetMaxMigrationBandwidth(999'999'999); + + auto state = MakeIntrusive(); + state->MigrationMode = EMigrationMode::InProgress; + state->ReplicaCount = 2; + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(TDuration::Seconds(1)); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } + + Y_UNIT_TEST( + ShouldPullStatisticsFromDiskRegistryBasedPartitionWithFreshDevices) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetMaxMigrationBandwidth(999'999'999); + + auto state = MakeIntrusive(); + state->MigrationMode = EMigrationMode::InProgress; + state->DeviceReplacementUUIDs = {"uuid1"}; + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(TDuration::Seconds(1)); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } + + Y_UNIT_TEST(ShouldPullStatisticsFromDiskRegistryBasedPartitionWithTimeout) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetAcquireNonReplicatedDevices(true); + + auto state = MakeIntrusive(); + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + TVolumeClient volume(*runtime); + + TActorId volumeId; + bool answerSeen = false; + bool statUpdated = false; + bool firstRequest = false; + bool isGrabResponse = false; + + runtime->SetEventFilter( + [&](TTestActorRuntimeBase& runtime, TAutoPtr& ev) + { + Y_UNUSED(runtime); + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersRequest && + !firstRequest) + { + firstRequest = true; + volumeId = ev->Sender; + return false; + } + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersResponse && + !isGrabResponse) + { + isGrabResponse = true; + return true; + } + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersResponse && + ev->Recipient == volumeId) + { + answerSeen = true; + auto* msg = ev->Get< + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse>(); + UNIT_ASSERT(HasError(msg->Error)); + + return false; + } + + if (ev->GetTypeRewrite() == TEvVolumePrivate::EvPartStatsSaved) + { + statUpdated = true; + } + + return false; + }); + + runtime->SetScheduledEventFilter( + [&](TTestActorRuntimeBase& runtime, + TAutoPtr& ev, + TDuration delay, + TInstant& deadline) + { + Y_UNUSED(runtime); + Y_UNUSED(delay); + Y_UNUSED(deadline); + if (ev->GetTypeRewrite() == TEvVolumePrivate::EvUpdateCounters) + { + return true; + } + return false; + }); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(UpdateCountersInterval); + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvVolumePrivate::EvPartStatsSaved); + runtime->DispatchEvents(options); + + // Check that we grab response to do error timeout + UNIT_ASSERT(isGrabResponse); + + // Check that we see answer with error + UNIT_ASSERT(answerSeen); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } } } // namespace NCloud::NBlockStore::NStorage From 43889cd19450ccc17671330926a2c3d1353de48f Mon Sep 17 00:00:00 2001 From: Pavel Misko Date: Mon, 15 Dec 2025 21:56:40 +0100 Subject: [PATCH 2/6] xxx --- cloud/blockstore/libs/storage/api/volume.h | 14 ++-- .../libs/storage/core/disk_counters.h | 22 ++++-- .../core/disk_registry_based_part_counters.h | 28 -------- .../partition_nonrepl/part_mirror_actor.h | 1 - .../part_mirror_actor_stats.cpp | 71 ++++++------------- .../part_mirror_resync_actor.h | 1 - .../part_mirror_resync_actor_stats.cpp | 60 +++++----------- .../partition_nonrepl/part_nonrepl_actor.h | 1 - .../part_nonrepl_actor_stats.cpp | 30 +++----- .../part_nonrepl_events_private.h | 16 ++--- .../part_nonrepl_migration_common_actor.h | 1 - ...t_nonrepl_migration_common_actor_stats.cpp | 60 +++++----------- .../part_nonrepl_rdma_actor.h | 1 - .../part_nonrepl_rdma_actor_stats.cpp | 30 +++----- .../libs/storage/partition_nonrepl/ut_env.h | 4 +- .../libs/storage/volume/actors/ya.make | 1 - .../libs/storage/volume/volume_actor.h | 1 - .../storage/volume/volume_actor_stats.cpp | 15 +--- .../libs/storage/volume/volume_ut_stats.cpp | 23 +++--- 19 files changed, 117 insertions(+), 263 deletions(-) delete mode 100644 cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h diff --git a/cloud/blockstore/libs/storage/api/volume.h b/cloud/blockstore/libs/storage/api/volume.h index bcd8d422b9d..1898e32648c 100644 --- a/cloud/blockstore/libs/storage/api/volume.h +++ b/cloud/blockstore/libs/storage/api/volume.h @@ -143,20 +143,14 @@ struct TEvVolume struct TDiskRegistryBasedPartitionCounters { - TPartitionDiskCountersPtr DiskCounters; TString DiskId; - ui64 NetworkBytes = 0; - TDuration CpuUsage; + TPartNonreplCountersData CountersData; TDiskRegistryBasedPartitionCounters( - TPartitionDiskCountersPtr diskCounters, TString diskId, - ui64 networkBytes, - TDuration cpuUsage) - : DiskCounters(std::move(diskCounters)) - , DiskId(std::move(diskId)) - , NetworkBytes(networkBytes) - , CpuUsage(cpuUsage) + TPartNonreplCountersData countersData) + : DiskId(std::move(diskId)) + , CountersData(std::move(countersData)) {} }; diff --git a/cloud/blockstore/libs/storage/core/disk_counters.h b/cloud/blockstore/libs/storage/core/disk_counters.h index 54a148d67ec..ba9f590ea64 100644 --- a/cloud/blockstore/libs/storage/core/disk_counters.h +++ b/cloud/blockstore/libs/storage/core/disk_counters.h @@ -9,6 +9,14 @@ namespace NCloud::NBlockStore::NStorage { //////////////////////////////////////////////////////////////////////////////// +struct TPartitionDiskCounters; +struct TVolumeSelfCounters; + +using TPartitionDiskCountersPtr = std::unique_ptr; +using TVolumeSelfCountersPtr = std::unique_ptr; + +//////////////////////////////////////////////////////////////////////////////// + enum class EPublishingPolicy { All, @@ -835,6 +843,15 @@ struct TPartitionDiskCounters //////////////////////////////////////////////////////////////////////////////// +struct TPartNonreplCountersData +{ + TPartitionDiskCountersPtr DiskCounters; + ui64 NetworkBytes = 0; + TDuration CpuUsage; +}; + +//////////////////////////////////////////////////////////////////////////////// + struct TVolumeSelfCounters { TVolumeSelfSimpleCounters Simple; @@ -861,11 +878,6 @@ struct TVolumeSelfCounters //////////////////////////////////////////////////////////////////////////////// -using TPartitionDiskCountersPtr = std::unique_ptr; -using TVolumeSelfCountersPtr = std::unique_ptr; - -//////////////////////////////////////////////////////////////////////////////// - TPartitionDiskCountersPtr CreatePartitionDiskCounters( EPublishingPolicy policy, EHistogramCounterOptions histCounterOptions); diff --git a/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h b/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h deleted file mode 100644 index f18db5d13ef..00000000000 --- a/cloud/blockstore/libs/storage/core/disk_registry_based_part_counters.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once - -#include "disk_counters.h" - -#include -#include - -namespace NCloud::NBlockStore::NStorage { - -//////////////////////////////////////////////////////////////////////////////// - -struct TPartNonreplCountersData -{ - ui64 NetworkBytes; - TDuration CpuUsage; - TPartitionDiskCountersPtr DiskCounters; - - TPartNonreplCountersData( - ui64 networkBytes, - TDuration cpuUsage, - TPartitionDiskCountersPtr diskCounters) - : NetworkBytes(networkBytes) - , CpuUsage(cpuUsage) - , DiskCounters(std::move(diskCounters)) - {} -}; - -} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h index 11a6f335d5f..68870e5b784 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 6bd85e942e9..144cd182028 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -40,13 +40,7 @@ void TMirrorPartitionActor::HandlePartCounters( const TActorContext& ctx) { auto* msg = ev->Get(); - - TPartNonreplCountersData partCountersData( - msg->NetworkBytes, - msg->CpuUsage, - std::move(msg->DiskCounters)); - - UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); } //////////////////////////////////////////////////////////////////////////////// @@ -82,16 +76,8 @@ TPartNonreplCountersData TMirrorPartitionActor::ExtractPartCounters( stats->Simple.ChecksumMismatches.Value = ChecksumMismatches; stats->Simple.ScrubbingProgress.Value = 100 * GetScrubbingRange().Start / State.GetBlockCount(); - stats->Cumulative.ScrubbingThroughput.Value = ScrubbingThroughput; - - TPartNonreplCountersData partCounters( - NetworkBytes, - CpuUsage, - std::move(stats)); - - NetworkBytes = 0; - CpuUsage = {}; - ScrubbingThroughput = 0; + stats->Cumulative.ScrubbingThroughput.Value = + std::exchange(ScrubbingThroughput, {}); const bool scrubbingEnabled = Config->GetDataScrubbingEnabled() && !ResyncActorId; @@ -105,7 +91,11 @@ TPartNonreplCountersData TMirrorPartitionActor::ExtractPartCounters( std::move(FixedPartial)); NCloud::Send(ctx, StatActorId, std::move(scrubberCounters)); - return partCounters; + return { + .DiskCounters = std::move(stats), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; } void TMirrorPartitionActor::SendStats(const TActorContext& ctx) @@ -114,15 +104,11 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) return; } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); - auto request = std::make_unique( MakeIntrusive(), - std::move(diskCounters), DiskId, - networkBytes, - cpuUsage); + ExtractPartCounters(ctx)); NCloud::Send( ctx, @@ -145,34 +131,28 @@ void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( std::make_unique( MakeError(E_REJECTED, "Mirror actor got new request"), - CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig - ->GetHistogramCounterOptions()), // diskCounters - 0, // networkBytes - TDuration{}, // cpuUsage SelfId(), - DiskId)); + DiskId, + TPartNonreplCountersData{ + .DiskCounters = CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig->GetHistogramCounterOptions()), + })); StatisticRequestInfo.Reset(); } auto statActorIds = State.GetReplicaActorsBypassingProxies(); if (statActorIds.empty()) { - auto&& [networkBytes, cpuUsage, diskCounters] = - ExtractPartCounters(ctx); - NCloud::Reply( ctx, *ev, std::make_unique( MakeError(E_INVALID_STATE, "Mirror actor hasn't replicas"), - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - DiskId)); + DiskId, + ExtractPartCounters(ctx))); return; } @@ -208,29 +188,20 @@ void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( } for (auto& counters: msg->Counters) { - TPartNonreplCountersData partCountersData( - counters.NetworkBytes, - counters.CpuUsage, - std::move(counters.DiskCounters)); - - UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); - NCloud::Reply( ctx, *StatisticRequestInfo, std::make_unique( msg->Error, - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - DiskId)); + DiskId, + ExtractPartCounters(ctx))); - StatisticRequestInfo.Reset();; + StatisticRequestInfo.Reset(); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h index adcb458209f..c595bb8ec28 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp index 4aef888984a..705e73564f4 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp @@ -49,13 +49,7 @@ void TMirrorPartitionResyncActor::HandlePartCounters( const TActorContext& ctx) { auto* msg = ev->Get(); - - TPartNonreplCountersData partCountersData( - msg->NetworkBytes, - msg->CpuUsage, - std::move(msg->DiskCounters)); - - UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); } void TMirrorPartitionResyncActor::HandleScrubberCounters( @@ -77,20 +71,16 @@ void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( std::make_unique( MakeError(E_REJECTED, "Mirror resync actor got new request"), - CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig - ->GetHistogramCounterOptions()), // diskCounters - 0, // networkBytes - TDuration{}, // cpuUsage SelfId(), - PartConfig->GetName())); + PartConfig->GetName(), + TPartNonreplCountersData{ + .DiskCounters = CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig->GetHistogramCounterOptions())})); StatisticRequestInfo.Reset(); } if (!MirrorActorId && Replicas.empty()) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - NCloud::Reply( ctx, *ev, @@ -100,11 +90,9 @@ void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( E_INVALID_STATE, "Part mirror resync actor hasn't replicas and mirror " "actor"), - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - PartConfig->GetName())); + PartConfig->GetName(), + ExtractPartCounters())); return; } @@ -150,27 +138,18 @@ void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( } for (auto& counters: msg->Counters) { - TPartNonreplCountersData partCountersData( - counters.NetworkBytes, - counters.CpuUsage, - std::move(counters.DiskCounters)); - - UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - NCloud::Reply( ctx, *StatisticRequestInfo, std::make_unique( msg->Error, - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - PartConfig->GetName())); + PartConfig->GetName(), + ExtractPartCounters())); StatisticRequestInfo.Reset();; } @@ -188,25 +167,20 @@ TPartNonreplCountersData TMirrorPartitionResyncActor::ExtractPartCounters() MirrorCounters.reset(); } - TPartNonreplCountersData counters(NetworkBytes, CpuUsage, std::move(stats)); - - NetworkBytes = 0; - CpuUsage = TDuration(); - - return counters; + return { + .DiskCounters = std::exchange(stats, {}), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; } void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - auto request = std::make_unique( MakeIntrusive(), - std::move(diskCounters), PartConfig->GetName(), - networkBytes, - cpuUsage); + ExtractPartCounters()); NCloud::Send(ctx, StatActorId, std::move(request)); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h index c7eff47fcbf..ace899275b8 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp index e670218855e..0f40f0a8273 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp @@ -34,19 +34,15 @@ TPartNonreplCountersData TNonreplicatedPartitionActor::ExtractPartCounters( PartCounters->Simple.HasBrokenDeviceSilent.Set( CalculateHasBrokenDeviceCounterValue(ctx, true)); - TPartNonreplCountersData counters( - NetworkBytes, - CpuUsage, - std::move(PartCounters)); - - NetworkBytes = 0; - CpuUsage = {}; - - PartCounters = CreatePartitionDiskCounters( + auto partCounters = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); - return counters; + return { + .DiskCounters = std::exchange(PartCounters, std::move(partCounters)), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; } void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) @@ -55,15 +51,11 @@ void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) return; } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); - auto request = std::make_unique( MakeIntrusive(), - std::move(diskCounters), PartConfig->GetName(), - networkBytes, - cpuUsage); + ExtractPartCounters(ctx)); NCloud::Send(ctx, StatActorId, std::move(request)); } @@ -76,18 +68,14 @@ void TNonreplicatedPartitionActor:: TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, const TActorContext& ctx) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(ctx); - NCloud::Reply( ctx, *ev, std::make_unique( - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - PartConfig->GetName())); + PartConfig->GetName(), + ExtractPartCounters(ctx))); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h index 87ebfe99f1d..066e7dc62b7 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h @@ -402,23 +402,17 @@ struct TEvNonreplPartitionPrivate struct TGetDiskRegistryBasedPartCountersResponse { - TPartitionDiskCountersPtr DiskCounters; - ui64 NetworkBytes; - TDuration CpuUsage; NActors::TActorId ActorId; TString DiskId; + TPartNonreplCountersData CountersData; TGetDiskRegistryBasedPartCountersResponse( - TPartitionDiskCountersPtr diskCounters, - ui64 networkBytes, - TDuration cpuUsage, const NActors::TActorId& actorId, - TString diskId) - : DiskCounters(std::move(diskCounters)) - , NetworkBytes(networkBytes) - , CpuUsage(cpuUsage) - , ActorId(actorId) + TString diskId, + TPartNonreplCountersData countersData) + : ActorId(actorId) , DiskId(std::move(diskId)) + , CountersData(std::move(countersData)) {} }; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h index 0c50f8d7807..21e978eaa67 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp index eea3ebead71..17c502994f7 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp @@ -39,13 +39,7 @@ void TNonreplicatedPartitionMigrationCommonActor::HandlePartCounters( const TActorContext& ctx) { auto* msg = ev->Get(); - - TPartNonreplCountersData partCountersData( - msg->NetworkBytes, - msg->CpuUsage, - std::move(msg->DiskCounters)); - - UpdateCounters(ctx, ev->Sender, std::move(partCountersData)); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); } //////////////////////////////////////////////////////////////////////////////// @@ -81,12 +75,11 @@ TNonreplicatedPartitionMigrationCommonActor::ExtractPartCounters() EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); - TPartNonreplCountersData counters(NetworkBytes, CpuUsage, std::move(stats)); - - NetworkBytes = 0; - CpuUsage = {}; - - return counters; + return { + .DiskCounters = std::move(stats), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}) + }; } void TNonreplicatedPartitionMigrationCommonActor::SendStats( @@ -96,15 +89,11 @@ void TNonreplicatedPartitionMigrationCommonActor::SendStats( return; } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - auto request = std::make_unique( MakeIntrusive(), - std::move(diskCounters), DiskId, - networkBytes, - cpuUsage); + ExtractPartCounters()); NCloud::Send(ctx, StatActorId, std::move(request)); } @@ -124,14 +113,12 @@ void TNonreplicatedPartitionMigrationCommonActor:: std::make_unique( MakeError(E_REJECTED, "Migration actor got new request"), - CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig - ->GetHistogramCounterOptions()), // diskCounters - 0, // networkBytes - TDuration{}, // cpuUsage SelfId(), - DiskId)); + DiskId, + TPartNonreplCountersData{ + .DiskCounters = CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig->GetHistogramCounterOptions())})); StatisticRequestInfo.Reset(); } @@ -146,8 +133,6 @@ void TNonreplicatedPartitionMigrationCommonActor:: } if (statActorIds.empty()) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - NCloud::Reply( ctx, *ev, @@ -157,11 +142,9 @@ void TNonreplicatedPartitionMigrationCommonActor:: E_INVALID_STATE, "Nonreplicated migration actor hasn't src and dst " "actors"), - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - DiskId)); + DiskId, + ExtractPartCounters())); return; } @@ -199,27 +182,18 @@ void TNonreplicatedPartitionMigrationCommonActor:: } for (auto& counters: msg->Counters) { - TPartNonreplCountersData partCountersData( - counters.NetworkBytes, - counters.CpuUsage, - std::move(counters.DiskCounters)); - - UpdateCounters(ctx, counters.ActorId, std::move(partCountersData)); + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - NCloud::Reply( ctx, *StatisticRequestInfo, std::make_unique( msg->Error, - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - DiskId)); + DiskId, + ExtractPartCounters())); StatisticRequestInfo.Reset(); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h index c5190c66c5b..e922dc7110a 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp index 8dfeb28de15..c4d2d0eef92 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp @@ -28,32 +28,24 @@ TPartNonreplCountersData TNonreplicatedPartitionRdmaActor::ExtractPartCounters() PartCounters->Simple.BytesCount.Set( PartConfig->GetBlockCount() * PartConfig->GetBlockSize()); - TPartNonreplCountersData counters( - NetworkBytes, - CpuUsage, - std::move(PartCounters)); - - NetworkBytes = 0; - CpuUsage = {}; - - PartCounters = CreatePartitionDiskCounters( + auto partCounters = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); - return counters; + return { + .DiskCounters = std::exchange(PartCounters, std::move(partCounters)), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; } void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - auto request = std::make_unique( MakeIntrusive(), - std::move(diskCounters), PartConfig->GetName(), - networkBytes, - cpuUsage); + ExtractPartCounters()); NCloud::Send(ctx, StatActorId, std::move(request)); } @@ -66,18 +58,14 @@ void TNonreplicatedPartitionRdmaActor:: TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, const TActorContext& ctx) { - auto&& [networkBytes, cpuUsage, diskCounters] = ExtractPartCounters(); - NCloud::Reply( ctx, *ev, std::make_unique( - std::move(diskCounters), - networkBytes, - cpuUsage, SelfId(), - PartConfig->GetName())); + PartConfig->GetName(), + ExtractPartCounters())); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h index a0ec5ee9c7f..cf7ae782797 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h @@ -188,12 +188,14 @@ class TDummyActor final const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, const NActors::TActorContext& ctx) { + auto* msg = ev->Get(); + auto event = std::make_unique( MakeStorageStatsServiceId(), ev->Sender, new TEvStatsService::TEvVolumePartCounters( "", // diskId - std::move(ev->Get()->DiskCounters), + std::move(msg->CountersData.DiskCounters), 0, 0, false, diff --git a/cloud/blockstore/libs/storage/volume/actors/ya.make b/cloud/blockstore/libs/storage/volume/actors/ya.make index 992dd61f3d6..4677bdf4699 100644 --- a/cloud/blockstore/libs/storage/volume/actors/ya.make +++ b/cloud/blockstore/libs/storage/volume/actors/ya.make @@ -14,7 +14,6 @@ SRCS( release_devices_actor.cpp shadow_disk_actor.cpp volume_as_partition_actor.cpp - partition_statistics_collector_actor.cpp disk_registry_based_partition_statistics_collector_actor.cpp ) diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.h b/cloud/blockstore/libs/storage/volume/volume_actor.h index eeb41a34891..e14cc8439d9 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.h +++ b/cloud/blockstore/libs/storage/volume/volume_actor.h @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp index 90d9668de50..ac96064f292 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp @@ -290,20 +290,14 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, const TActorContext& ctx) { - auto* msg = ev->Get(); - TPartNonreplCountersData partCountersData( - msg->NetworkBytes, - msg->CpuUsage, - std::move(msg->DiskCounters)); - TDataForUpdatingDiskRegistryBasedPartCounters data( ev->Sender, ev->Cookie, std::move(msg->DiskId), std::move(msg->CallContext), - std::move(partCountersData)); + std::move(msg->CountersData)); UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); } @@ -324,17 +318,12 @@ void TVolumeActor::HandleGetDiskRegistryBasedPartCountersResponse( FormatError(msg->Error).c_str()); } - TPartNonreplCountersData partCountersData( - msg->NetworkBytes, - msg->CpuUsage, - std::move(msg->DiskCounters)); - TDataForUpdatingDiskRegistryBasedPartCounters data( msg->ActorId, ev->Cookie, std::move(msg->DiskId), MakeIntrusive(), - std::move(partCountersData)); + std::move(msg->CountersData)); UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); } diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp index dffaa4cbe94..f8bec1e27a3 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp @@ -589,9 +589,10 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - if (msg->NetworkBytes || msg->CpuUsage) { - network = std::max(network, msg->NetworkBytes); - cpu = std::max(cpu, msg->CpuUsage); + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + if (networkBytes || cpuUsage) { + network = std::max(network, networkBytes); + cpu = std::max(cpu, cpuUsage); ++nonEmptyReports; } } @@ -708,9 +709,10 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - if (msg->NetworkBytes || msg->CpuUsage) { - network = std::max(network, msg->NetworkBytes); - cpu = std::max(cpu, msg->CpuUsage); + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + if (networkBytes || cpuUsage) { + network = std::max(network, networkBytes); + cpu = std::max(cpu, cpuUsage); } } @@ -843,18 +845,19 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) case TEvVolume::EvDiskRegistryBasedPartitionCounters: { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - totalNetworkBytes += msg->NetworkBytes; + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + totalNetworkBytes += networkBytes; // Resync is done by 4MiB ranges. Checksum is an 8-byte // request. UNIT_ASSERT_VALUES_EQUAL( 0, - msg->NetworkBytes % 4_MB % 8); - if (msg->NetworkBytes) { + networkBytes % 4_MB % 8); + if (networkBytes) { nonEmptyStatsUpdates++; UNIT_ASSERT_VALUES_UNEQUAL( TDuration(), - msg->CpuUsage); + cpuUsage); } break; } From ad198ea3db2f536b6501abf14e8dfeda015d64d5 Mon Sep 17 00:00:00 2001 From: Mikhail Peshkov Date: Tue, 16 Dec 2025 13:25:23 +0700 Subject: [PATCH 3/6] refactor --- .../storage/partition_nonrepl/part_mirror_actor_stats.cpp | 4 ---- ...sk_registry_based_partition_statistics_collector_actor.cpp | 2 +- ...disk_registry_based_partition_statistics_collector_actor.h | 2 +- 3 files changed, 2 insertions(+), 6 deletions(-) diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 144cd182028..25265cd8f3d 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -183,10 +183,6 @@ void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( auto* msg = ev->Get(); - if (msg->SeqNo < StatisticSeqNo) { - return; - } - for (auto& counters: msg->Counters) { UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp index d7fb1dbdf46..71859c206f6 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -37,7 +37,7 @@ void TDiskRegistryBasedPartitionStatisticsCollectorActor::Bootstrap( ctx.Schedule(UpdateCountersInterval, new TEvents::TEvWakeup()); } -void TDiskRegistryBasedPartitionStatisticsCollectorActor::SendStatistics( +void TDiskRegistryBasedPartitionStatisticsCollectorActor::ReplyAndDie( const TActorContext& ctx) { NCloud::Send( diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h index 3908734ec95..b99b791e402 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h @@ -32,7 +32,7 @@ class TDiskRegistryBasedPartitionStatisticsCollectorActor final void Bootstrap(const NActors::TActorContext& ctx); private: - void SendStatistics(const NActors::TActorContext& ctx); + void ReplyAndDie(const NActors::TActorContext& ctx); private: STFUNC(StateWork); From 12d0eed407296329fdc2d2084c41da7c07348431 Mon Sep 17 00:00:00 2001 From: Mikhail Peshkov Date: Fri, 19 Dec 2025 17:55:37 +0700 Subject: [PATCH 4/6] compile fix --- ...disk_registry_based_partition_statistics_collector_actor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp index 71859c206f6..953a5381526 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -87,7 +87,7 @@ void TDiskRegistryBasedPartitionStatisticsCollectorActor:: Response.Counters.push_back(std::move(*msg)); if (Response.Counters.size() == StatActorIds.size()) { - SendStatistics(ctx); + ReplyAndDie(ctx); } } From 081608cb7c2c02740831deed0065f87fb0599409 Mon Sep 17 00:00:00 2001 From: Mikhail Peshkov Date: Fri, 19 Dec 2025 19:43:34 +0700 Subject: [PATCH 5/6] get rid of SeqNo --- .../libs/storage/partition_nonrepl/part_mirror_actor.h | 1 - .../storage/partition_nonrepl/part_mirror_actor_stats.cpp | 3 +-- .../storage/partition_nonrepl/part_mirror_resync_actor.h | 1 - .../partition_nonrepl/part_mirror_resync_actor_stats.cpp | 7 +------ .../partition_nonrepl/part_nonrepl_events_private.h | 5 +---- .../part_nonrepl_migration_common_actor.h | 1 - .../part_nonrepl_migration_common_actor_stats.cpp | 7 +------ ...registry_based_partition_statistics_collector_actor.cpp | 7 ++----- ...k_registry_based_partition_statistics_collector_actor.h | 3 +-- 9 files changed, 7 insertions(+), 28 deletions(-) diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h index 68870e5b784..119aa5e5cb2 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h @@ -114,7 +114,6 @@ class TMirrorPartitionActor final size_t MultiAgentWriteRoundRobinSeed = 0; TRequestInfoPtr StatisticRequestInfo; - ui64 StatisticSeqNo = 0; public: TMirrorPartitionActor( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 25265cd8f3d..2d02d0c4e86 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -162,8 +162,7 @@ void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( NCloud::Register( ctx, SelfId(), - std::move(statActorIds), - ++StatisticSeqNo); + std::move(statActorIds)); } void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h index c595bb8ec28..058f56f8d12 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h @@ -94,7 +94,6 @@ class TMirrorPartitionResyncActor final TBackoffDelayProvider BackoffProvider; TRequestInfoPtr StatisticRequestInfo; - ui64 StatisticSeqNo = 0; public: TMirrorPartitionResyncActor( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp index 705e73564f4..7e396f222b5 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp @@ -112,8 +112,7 @@ void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( NCloud::Register( ctx, SelfId(), - std::move(statActorIds), - ++StatisticSeqNo); + std::move(statActorIds)); } void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( @@ -133,10 +132,6 @@ void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( auto* msg = ev->Get(); - if (msg->SeqNo < StatisticSeqNo) { - return; - } - for (auto& counters: msg->Counters) { UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h index 066e7dc62b7..de0c41f740f 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h @@ -422,12 +422,9 @@ struct TEvNonreplPartitionPrivate struct TDiskRegistryBasedPartCountersCombined { - const ui64 SeqNo; - TVector Counters; - explicit TDiskRegistryBasedPartCountersCombined(ui64 seqNo) - : SeqNo(seqNo) + explicit TDiskRegistryBasedPartCountersCombined() {} }; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h index 21e978eaa67..ad7cabff1dd 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h @@ -188,7 +188,6 @@ class TNonreplicatedPartitionMigrationCommonActor TBackoffDelayProvider BackoffProvider; TRequestInfoPtr StatisticRequestInfo; - ui64 StatisticSeqNo = 0; protected: // Derived class that wishes to handle wakeup messages should make its own diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp index 17c502994f7..5d1ccf219a7 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp @@ -155,8 +155,7 @@ void TNonreplicatedPartitionMigrationCommonActor:: NCloud::Register( ctx, SelfId(), - std::move(statActorIds), - ++StatisticSeqNo); + std::move(statActorIds)); } void TNonreplicatedPartitionMigrationCommonActor:: @@ -177,10 +176,6 @@ void TNonreplicatedPartitionMigrationCommonActor:: auto* msg = ev->Get(); - if (msg->SeqNo < StatisticSeqNo) { - return; - } - for (auto& counters: msg->Counters) { UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp index 953a5381526..a717bccec46 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -14,11 +14,9 @@ namespace NCloud::NBlockStore::NStorage { TDiskRegistryBasedPartitionStatisticsCollectorActor:: TDiskRegistryBasedPartitionStatisticsCollectorActor( const TActorId& owner, - TVector statActorIds, - ui64 seqNo) + TVector statActorIds) : Owner(owner) , StatActorIds(std::move(statActorIds)) - , Response(seqNo) {} void TDiskRegistryBasedPartitionStatisticsCollectorActor::Bootstrap( @@ -45,8 +43,7 @@ void TDiskRegistryBasedPartitionStatisticsCollectorActor::ReplyAndDie( Owner, std::make_unique( - std::move(LastError), - std::move(Response))); + std::move(LastError))); Die(ctx); } diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h index b99b791e402..a7eb327c9ec 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h @@ -26,8 +26,7 @@ class TDiskRegistryBasedPartitionStatisticsCollectorActor final public: TDiskRegistryBasedPartitionStatisticsCollectorActor( const NActors::TActorId& owner, - TVector statActorIds, - ui64 seqNo); + TVector statActorIds); void Bootstrap(const NActors::TActorContext& ctx); From 3ff9d8074465fcd0018eaa37e5121b7e2c1886f7 Mon Sep 17 00:00:00 2001 From: Mikhail Peshkov Date: Mon, 29 Dec 2025 14:11:23 +0700 Subject: [PATCH 6/6] fix response and delete extra DiskCounters --- .../part_mirror_actor_stats.cpp | 18 +++++--- .../part_mirror_resync_actor_stats.cpp | 17 ++++++-- ...t_nonrepl_migration_common_actor_stats.cpp | 17 ++++++-- ...d_partition_statistics_collector_actor.cpp | 3 +- .../libs/storage/volume/volume_ut_stats.cpp | 41 ++++++++++++++++++- 5 files changed, 80 insertions(+), 16 deletions(-) diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 2d02d0c4e86..50776559488 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -133,11 +133,7 @@ void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( MakeError(E_REJECTED, "Mirror actor got new request"), SelfId(), DiskId, - TPartNonreplCountersData{ - .DiskCounters = CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig->GetHistogramCounterOptions()), - })); + TPartNonreplCountersData{})); StatisticRequestInfo.Reset(); } @@ -182,7 +178,19 @@ void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( auto* msg = ev->Get(); + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + DiskId.Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp index 7e396f222b5..55a4a79f701 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp @@ -73,10 +73,7 @@ void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( MakeError(E_REJECTED, "Mirror resync actor got new request"), SelfId(), PartConfig->GetName(), - TPartNonreplCountersData{ - .DiskCounters = CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig->GetHistogramCounterOptions())})); + TPartNonreplCountersData{})); StatisticRequestInfo.Reset(); } @@ -132,7 +129,19 @@ void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( auto* msg = ev->Get(); + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + PartConfig->GetName().Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp index 5d1ccf219a7..8c2c70746d8 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp @@ -115,10 +115,7 @@ void TNonreplicatedPartitionMigrationCommonActor:: MakeError(E_REJECTED, "Migration actor got new request"), SelfId(), DiskId, - TPartNonreplCountersData{ - .DiskCounters = CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig->GetHistogramCounterOptions())})); + TPartNonreplCountersData{})); StatisticRequestInfo.Reset(); } @@ -176,7 +173,19 @@ void TNonreplicatedPartitionMigrationCommonActor:: auto* msg = ev->Get(); + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + DiskId.Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); } diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp index a717bccec46..5e8576bb26a 100644 --- a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -43,7 +43,8 @@ void TDiskRegistryBasedPartitionStatisticsCollectorActor::ReplyAndDie( Owner, std::make_unique( - std::move(LastError))); + std::move(LastError), + std::move(Response))); Die(ctx); } diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp index f8bec1e27a3..a79e1cd7914 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp @@ -1526,6 +1526,24 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) auto runtime = PrepareTestActorRuntime(config, state); bool statUpdated = false; + struct TReadAndWriteByteCount + { + ui64 ReadByteCount = 0; + ui64 WriteByteCount = 0; + ui64 DirectCopyByteCount = 0; + }; + TMap statsForDisks; + auto statEventInterceptor = runtime->AddObserver( + [&](TEvStatsService::TEvVolumePartCounters::TPtr& event) { + auto* msg = event->Get(); + statsForDisks[msg->DiskId].ReadByteCount += + msg->DiskCounters->RequestCounters.ReadBlocks.RequestBytes; + statsForDisks[msg->DiskId].WriteByteCount += + msg->DiskCounters->RequestCounters.WriteBlocks.RequestBytes; + statsForDisks[msg->DiskId].DirectCopyByteCount += + msg->DiskCounters->RequestCounters.CopyBlocks.RequestBytes; + }); + auto _ = runtime->AddObserver( [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) { @@ -1535,6 +1553,12 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) TVolumeClient volume(*runtime); + auto clientInfo = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + volume.UpdateVolumeConfig( 0, 0, @@ -1551,14 +1575,27 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) ); volume.WaitReady(); + + volume.AddClient(clientInfo); + volume.SendToPipe( std::make_unique()); - runtime->AdvanceCurrentTime(TDuration::Seconds(1)); - runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + volume.WriteBlocks( + TBlockRange64::WithLength(0, 1024), + clientInfo.GetClientId(), + 1); + + volume.ReadBlocks( + TBlockRange64::WithLength(0, 1024), + clientInfo.GetClientId()); + runtime->AdvanceCurrentTime(UpdateCountersInterval); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); // Check that statistics was updated UNIT_ASSERT(statUpdated); + UNIT_ASSERT(statsForDisks["vol0"].WriteByteCount != 0); + UNIT_ASSERT(statsForDisks["vol0"].ReadByteCount != 0); } Y_UNIT_TEST(