diff --git a/cloud/blockstore/libs/storage/api/volume.h b/cloud/blockstore/libs/storage/api/volume.h index bcd8d422b9d..1898e32648c 100644 --- a/cloud/blockstore/libs/storage/api/volume.h +++ b/cloud/blockstore/libs/storage/api/volume.h @@ -143,20 +143,14 @@ struct TEvVolume struct TDiskRegistryBasedPartitionCounters { - TPartitionDiskCountersPtr DiskCounters; TString DiskId; - ui64 NetworkBytes = 0; - TDuration CpuUsage; + TPartNonreplCountersData CountersData; TDiskRegistryBasedPartitionCounters( - TPartitionDiskCountersPtr diskCounters, TString diskId, - ui64 networkBytes, - TDuration cpuUsage) - : DiskCounters(std::move(diskCounters)) - , DiskId(std::move(diskId)) - , NetworkBytes(networkBytes) - , CpuUsage(cpuUsage) + TPartNonreplCountersData countersData) + : DiskId(std::move(diskId)) + , CountersData(std::move(countersData)) {} }; diff --git a/cloud/blockstore/libs/storage/core/disk_counters.h b/cloud/blockstore/libs/storage/core/disk_counters.h index 54a148d67ec..ba9f590ea64 100644 --- a/cloud/blockstore/libs/storage/core/disk_counters.h +++ b/cloud/blockstore/libs/storage/core/disk_counters.h @@ -9,6 +9,14 @@ namespace NCloud::NBlockStore::NStorage { //////////////////////////////////////////////////////////////////////////////// +struct TPartitionDiskCounters; +struct TVolumeSelfCounters; + +using TPartitionDiskCountersPtr = std::unique_ptr; +using TVolumeSelfCountersPtr = std::unique_ptr; + +//////////////////////////////////////////////////////////////////////////////// + enum class EPublishingPolicy { All, @@ -835,6 +843,15 @@ struct TPartitionDiskCounters //////////////////////////////////////////////////////////////////////////////// +struct TPartNonreplCountersData +{ + TPartitionDiskCountersPtr DiskCounters; + ui64 NetworkBytes = 0; + TDuration CpuUsage; +}; + +//////////////////////////////////////////////////////////////////////////////// + struct TVolumeSelfCounters { TVolumeSelfSimpleCounters Simple; @@ -861,11 +878,6 @@ struct TVolumeSelfCounters //////////////////////////////////////////////////////////////////////////////// -using TPartitionDiskCountersPtr = std::unique_ptr; -using TVolumeSelfCountersPtr = std::unique_ptr; - -//////////////////////////////////////////////////////////////////////////////// - TPartitionDiskCountersPtr CreatePartitionDiskCounters( EPublishingPolicy policy, EHistogramCounterOptions histCounterOptions); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp index f457450d01f..610b9b68e8a 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp @@ -86,8 +86,9 @@ TMirrorPartitionActor::~TMirrorPartitionActor() = default; void TMirrorPartitionActor::Bootstrap(const TActorContext& ctx) { SetupPartitions(ctx); - ScheduleCountersUpdate(ctx); - + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } if (Config->GetDataScrubbingEnabled() && !ResyncActorId) { StartScrubbingRange(ctx, 0); } @@ -887,6 +888,15 @@ STFUNC(TMirrorPartitionActor::StateWork) HFunc(TEvPartition::TEvReleaseRange, HandleReleaseRange); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); IgnoreFunc(TEvents::TEvPoisonTaken); @@ -945,6 +955,11 @@ STFUNC(TMirrorPartitionActor::StateZombie) IgnoreFunc(TEvPartition::TEvReleaseRange); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); + IgnoreFunc(TEvents::TEvPoisonPill); HFunc(TEvents::TEvPoisonTaken, HandlePoisonTaken); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h index 9fd8d22500e..119aa5e5cb2 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h @@ -113,6 +113,8 @@ class TMirrorPartitionActor final const size_t MultiAgentWriteRequestSizeThreshold = 0; size_t MultiAgentWriteRoundRobinSeed = 0; + TRequestInfoPtr StatisticRequestInfo; + public: TMirrorPartitionActor( TStorageConfigPtr config, @@ -150,6 +152,12 @@ class TMirrorPartitionActor final EWriteRequestType SuggestWriteRequestType( const NActors::TActorContext& ctx, TBlockRange64 range); + TPartNonreplCountersData ExtractPartCounters( + const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); private: STFUNC(StateWork); @@ -228,6 +236,16 @@ class TMirrorPartitionActor final const NActors::TEvents::TEvPoisonTaken::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + template void MirrorRequest( const typename TMethod::TRequest::TPtr& ev, diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp index 058abadfe6f..50776559488 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor_stats.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NCloud::NBlockStore::NStorage { @@ -9,21 +10,23 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TMirrorPartitionActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - const ui32 replicaIndex = State.GetReplicaIndex(ev->Sender); + const ui32 replicaIndex = State.GetReplicaIndex(sender); if (replicaIndex < ReplicaCounters.size()) { - ReplicaCounters[replicaIndex] = std::move(msg->DiskCounters); - NetworkBytes += msg->NetworkBytes; - CpuUsage += CpuUsage; + ReplicaCounters[replicaIndex] = + std::move(partCountersData.DiskCounters); + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; } else { - LOG_INFO(ctx, TBlockStoreComponents::PARTITION, + LOG_INFO( + ctx, + TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), State.GetReplicaInfos()[0].Config->GetName().Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); @@ -32,12 +35,19 @@ void TMirrorPartitionActor::HandlePartCounters( //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionActor::SendStats(const TActorContext& ctx) +void TMirrorPartitionActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) { - if (!StatActorId) { - return; - } + auto* msg = ev->Get(); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); +} + +//////////////////////////////////////////////////////////////////////////////// +TPartNonreplCountersData TMirrorPartitionActor::ExtractPartCounters( + const TActorContext& ctx) +{ auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); @@ -54,35 +64,20 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) stats->Simple.IORequestsInFlight.Reset(); for (const auto& counters: ReplicaCounters) { if (counters) { - stats->Simple.BytesCount.Value = Max( - stats->Simple.BytesCount.Value, - counters->Simple.BytesCount.Value); - stats->Simple.IORequestsInFlight.Value = Max( - stats->Simple.IORequestsInFlight.Value, - counters->Simple.IORequestsInFlight.Value); + stats->Simple.BytesCount.Value = + Max(stats->Simple.BytesCount.Value, + counters->Simple.BytesCount.Value); + stats->Simple.IORequestsInFlight.Value = + Max(stats->Simple.IORequestsInFlight.Value, + counters->Simple.IORequestsInFlight.Value); } } stats->Simple.ChecksumMismatches.Value = ChecksumMismatches; stats->Simple.ScrubbingProgress.Value = 100 * GetScrubbingRange().Start / State.GetBlockCount(); - stats->Cumulative.ScrubbingThroughput.Value = ScrubbingThroughput; - auto request = - std::make_unique( - MakeIntrusive(), - std::move(stats), - DiskId, - NetworkBytes, - CpuUsage); - - NetworkBytes = 0; - CpuUsage = {}; - ScrubbingThroughput = 0; - - NCloud::Send( - ctx, - StatActorId, - std::move(request)); + stats->Cumulative.ScrubbingThroughput.Value = + std::exchange(ScrubbingThroughput, {}); const bool scrubbingEnabled = Config->GetDataScrubbingEnabled() && !ResyncActorId; @@ -95,6 +90,121 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) std::move(Fixed), std::move(FixedPartial)); NCloud::Send(ctx, StatActorId, std::move(scrubberCounters)); + + return { + .DiskCounters = std::move(stats), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; +} + +void TMirrorPartitionActor::SendStats(const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + + auto request = + std::make_unique( + MakeIntrusive(), + DiskId, + ExtractPartCounters(ctx)); + + NCloud::Send( + ctx, + StatActorId, + std::move(request)); + +} + +//////////////////////////////////////////////////////////////////////////////// + +void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Mirror actor got new request"), + SelfId(), + DiskId, + TPartNonreplCountersData{})); + StatisticRequestInfo.Reset(); + } + + auto statActorIds = State.GetReplicaActorsBypassingProxies(); + + if (statActorIds.empty()) { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError(E_INVALID_STATE, "Mirror actor hasn't replicas"), + SelfId(), + DiskId, + ExtractPartCounters(ctx))); + return; + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds)); +} + +void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate::TEvDiskRegistryBasedPartCountersCombined:: + TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to empty " + "StatisticRequestInfo.", + DiskId.Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + DiskId.Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); + } + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + SelfId(), + DiskId, + ExtractPartCounters(ctx))); + + StatisticRequestInfo.Reset(); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp index e3e5c857ab2..d703da47b40 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.cpp @@ -62,7 +62,9 @@ TMirrorPartitionResyncActor::~TMirrorPartitionResyncActor() = default; void TMirrorPartitionResyncActor::Bootstrap(const TActorContext& ctx) { SetupPartitions(ctx); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } ContinueResyncIfNeeded(ctx); Become(&TThis::StateWork); @@ -310,6 +312,14 @@ STFUNC(TMirrorPartitionResyncActor::StateWork) TEvVolume::TEvDiskRegistryBasedPartitionCounters, HandlePartCounters); HFunc(TEvVolume::TEvScrubberCounters, HandleScrubberCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); @@ -357,6 +367,10 @@ STFUNC(TMirrorPartitionResyncActor::StateZombie) IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolume::TEvDiskRegistryBasedPartitionCounters); IgnoreFunc(TEvVolume::TEvScrubberCounters); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); IgnoreFunc(TEvents::TEvPoisonPill); HFunc(TEvents::TEvPoisonTaken, HandlePoisonTaken); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h index 01ecf12d107..058f56f8d12 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor.h @@ -93,6 +93,8 @@ class TMirrorPartitionResyncActor final TBackoffDelayProvider BackoffProvider; + TRequestInfoPtr StatisticRequestInfo; + public: TMirrorPartitionResyncActor( TStorageConfigPtr config, @@ -135,6 +137,13 @@ class TMirrorPartitionResyncActor final bool IsAnybodyAlive() const; void ReplyAndDie(const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); + + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -227,6 +236,16 @@ class TMirrorPartitionResyncActor final const TEvNonreplPartitionPrivate::TEvGetDeviceForRangeRequest::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); BLOCKSTORE_IMPLEMENT_REQUEST(WriteBlocks, TEvService); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocksLocal, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp index 85cc4346c11..55a4a79f701 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_actor_stats.cpp @@ -2,6 +2,7 @@ #include #include +#include namespace NCloud::NBlockStore::NStorage { @@ -9,15 +10,14 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionResyncActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TMirrorPartitionResyncActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - bool knownSender = ev->Sender == MirrorActorId; + bool knownSender = sender == MirrorActorId; for (const auto& replica: Replicas) { - knownSender |= replica.ActorId == ev->Sender; + knownSender |= replica.ActorId == sender; } if (!knownSender) { @@ -25,7 +25,7 @@ void TMirrorPartitionResyncActor::HandlePartCounters( ctx, TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), PartConfig->GetName().Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); @@ -33,13 +33,23 @@ void TMirrorPartitionResyncActor::HandlePartCounters( } if (!MirrorCounters) { - MirrorCounters = std::move(msg->DiskCounters); + MirrorCounters = std::move(partCountersData.DiskCounters); } else { - MirrorCounters->AggregateWith(*msg->DiskCounters); + MirrorCounters->AggregateWith(*partCountersData.DiskCounters); } - NetworkBytes += msg->NetworkBytes; - CpuUsage += msg->CpuUsage; + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; +} + +//////////////////////////////////////////////////////////////////////////////// + +void TMirrorPartitionResyncActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); } void TMirrorPartitionResyncActor::HandleScrubberCounters( @@ -49,9 +59,108 @@ void TMirrorPartitionResyncActor::HandleScrubberCounters( ForwardMessageToActor(ev, ctx, StatActorId); } +void TMirrorPartitionResyncActor::HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Mirror resync actor got new request"), + SelfId(), + PartConfig->GetName(), + TPartNonreplCountersData{})); + StatisticRequestInfo.Reset(); + } + + if (!MirrorActorId && Replicas.empty()) { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError( + E_INVALID_STATE, + "Part mirror resync actor hasn't replicas and mirror " + "actor"), + SelfId(), + PartConfig->GetName(), + ExtractPartCounters())); + return; + } + + TVector statActorIds; + + if (MirrorActorId) { + statActorIds.push_back(MirrorActorId); + } + + for (const auto& replica: Replicas) { + statActorIds.push_back(replica.ActorId); + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds)); +} + +void TMirrorPartitionResyncActor::HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate::TEvDiskRegistryBasedPartCountersCombined:: + TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror resync actor statistics due to empty " + "StatisticRequestInfo.", + PartConfig->GetName().Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + PartConfig->GetName().Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); + } + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + SelfId(), + PartConfig->GetName(), + ExtractPartCounters())); + + StatisticRequestInfo.Reset();; +} + //////////////////////////////////////////////////////////////////////////////// -void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TMirrorPartitionResyncActor::ExtractPartCounters() { auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, @@ -62,18 +171,22 @@ void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) MirrorCounters.reset(); } + return { + .DiskCounters = std::exchange(stats, {}), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; +} + +void TMirrorPartitionResyncActor::SendStats(const TActorContext& ctx) +{ auto request = std::make_unique( MakeIntrusive(), - std::move(stats), PartConfig->GetName(), - NetworkBytes, - CpuUsage); + ExtractPartCounters()); NCloud::Send(ctx, StatActorId, std::move(request)); - - NetworkBytes = 0; - CpuUsage = TDuration(); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp index cd266f6754d..d264fafd2fc 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.cpp @@ -188,7 +188,9 @@ TRequestTimeoutPolicy TNonreplicatedPartitionActor::MakeTimeoutPolicyForRequest( void TNonreplicatedPartitionActor::Bootstrap(const TActorContext& ctx) { Become(&TThis::StateWork); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } } bool TNonreplicatedPartitionActor::CheckReadWriteBlockRange(const TBlockRange64& range) const @@ -731,6 +733,11 @@ STFUNC(TNonreplicatedPartitionActor::StateWork) HFunc(TEvNonreplPartitionPrivate::TEvAgentIsUnavailable, HandleAgentIsUnavailable); HFunc(TEvNonreplPartitionPrivate::TEvAgentIsBackOnline, HandleAgentIsBackOnline); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); @@ -790,6 +797,9 @@ STFUNC(TNonreplicatedPartitionActor::StateZombie) IgnoreFunc(TEvVolumePrivate::TEvDeviceTimedOutResponse); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h index a4d8daa4569..ace899275b8 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor.h @@ -111,6 +111,9 @@ class TNonreplicatedPartitionActor final void ReplyAndDie(const NActors::TActorContext& ctx); + TPartNonreplCountersData ExtractPartCounters( + const NActors::TActorContext& ctx); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -198,6 +201,11 @@ class TNonreplicatedPartitionActor final const TEvNonreplPartitionPrivate::TEvAgentIsBackOnline::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + bool HandleRequests(STFUNC_SIG); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp index aecc53df0bc..0f40f0a8273 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_actor_stats.cpp @@ -21,40 +21,61 @@ void TNonreplicatedPartitionActor::UpdateStats( //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TNonreplicatedPartitionActor::ExtractPartCounters( + const TActorContext& ctx) { - if (!StatActorId) { - return; - } - PartCounters->Simple.IORequestsInFlight.Set( - RequestsInProgress.GetRequestCount() - ); + RequestsInProgress.GetRequestCount()); PartCounters->Simple.BytesCount.Set( - PartConfig->GetBlockCount() * PartConfig->GetBlockSize() - ); + PartConfig->GetBlockCount() * PartConfig->GetBlockSize()); PartCounters->Simple.HasBrokenDevice.Set( CalculateHasBrokenDeviceCounterValue(ctx, false)); PartCounters->Simple.HasBrokenDeviceSilent.Set( CalculateHasBrokenDeviceCounterValue(ctx, true)); + auto partCounters = CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig->GetHistogramCounterOptions()); + + return { + .DiskCounters = std::exchange(PartCounters, std::move(partCounters)), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; +} + +void TNonreplicatedPartitionActor::SendStats(const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + auto request = std::make_unique( MakeIntrusive(), - std::move(PartCounters), PartConfig->GetName(), - NetworkBytes, - CpuUsage); + ExtractPartCounters(ctx)); - NetworkBytes = 0; - CpuUsage = {}; + NCloud::Send(ctx, StatActorId, std::move(request)); +} - PartCounters = CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig->GetHistogramCounterOptions()); +//////////////////////////////////////////////////////////////////////////////// - NCloud::Send(ctx, StatActorId, std::move(request)); +void TNonreplicatedPartitionActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + NCloud::Reply( + ctx, + *ev, + std::make_unique( + SelfId(), + PartConfig->GetName(), + ExtractPartCounters(ctx))); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h index f1d0f41ab79..de0c41f740f 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -390,6 +391,43 @@ struct TEvNonreplPartitionPrivate {} }; + // + // GetDiskRegistryBasedPartCounters + // + + struct TGetDiskRegistryBasedPartCountersRequest + { + TGetDiskRegistryBasedPartCountersRequest() = default; + }; + + struct TGetDiskRegistryBasedPartCountersResponse + { + NActors::TActorId ActorId; + TString DiskId; + TPartNonreplCountersData CountersData; + + TGetDiskRegistryBasedPartCountersResponse( + const NActors::TActorId& actorId, + TString diskId, + TPartNonreplCountersData countersData) + : ActorId(actorId) + , DiskId(std::move(diskId)) + , CountersData(std::move(countersData)) + {} + }; + + // + // DiskRegistryBasedPartCountersCombined + // + + struct TDiskRegistryBasedPartCountersCombined + { + TVector Counters; + + explicit TDiskRegistryBasedPartCountersCombined() + {} + }; + // // Events declaration // @@ -423,6 +461,10 @@ struct TEvNonreplPartitionPrivate EvLaggingMigrationDisabled, EvLaggingMigrationEnabled, EvInconsistentDiskAgent, + EvGetDiskRegistryBasedPartCountersRequest, + EvGetDiskRegistryBasedPartCountersResponse, + EvDiskRegistryBasedPartCountersCombined, + BLOCKSTORE_PARTITION_NONREPL_REQUESTS_PRIVATE(BLOCKSTORE_DECLARE_EVENT_IDS) @@ -524,6 +566,18 @@ struct TEvNonreplPartitionPrivate using TEvInconsistentDiskAgent = TRequestEvent; + using TEvGetDiskRegistryBasedPartCountersRequest = TRequestEvent< + TGetDiskRegistryBasedPartCountersRequest, + EvGetDiskRegistryBasedPartCountersRequest>; + + using TEvGetDiskRegistryBasedPartCountersResponse = TResponseEvent< + TGetDiskRegistryBasedPartCountersResponse, + EvGetDiskRegistryBasedPartCountersResponse>; + + using TEvDiskRegistryBasedPartCountersCombined = TResponseEvent< + TDiskRegistryBasedPartCountersCombined, + EvDiskRegistryBasedPartCountersCombined>; + BLOCKSTORE_PARTITION_NONREPL_REQUESTS_PRIVATE(BLOCKSTORE_DECLARE_PROTO_EVENTS) }; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp index 2933e1a899f..696d2884cda 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.cpp @@ -96,7 +96,9 @@ TNonreplicatedPartitionMigrationCommonActor:: void TNonreplicatedPartitionMigrationCommonActor::Bootstrap( const TActorContext& ctx) { - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } Become(&TThis::StateWork); @@ -306,6 +308,15 @@ STFUNC(TNonreplicatedPartitionMigrationCommonActor::StateWork) TEvVolume::TEvDeleteCheckpointDataRequest, HandleDeleteCheckpointData); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + HFunc( + TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined, + HandleDiskRegistryBasedPartCountersCombined); + HFunc(NActors::TEvents::TEvWakeup, HandleWakeup); HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); @@ -365,6 +376,10 @@ STFUNC(TNonreplicatedPartitionMigrationCommonActor::StateZombie) IgnoreFunc(TEvVolume::TEvMigrationStateUpdated); IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolume::TEvDiskRegistryBasedPartitionCounters); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined); IgnoreFunc(TEvStatsServicePrivate::TEvRegisterTrafficSourceResponse); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h index fe00000b8e9..ad7cabff1dd 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor.h @@ -187,6 +187,8 @@ class TNonreplicatedPartitionMigrationCommonActor TBackoffDelayProvider BackoffProvider; + TRequestInfoPtr StatisticRequestInfo; + protected: // Derived class that wishes to handle wakeup messages should make its own // enum which starts with `WR_REASON_COUNT` value. @@ -300,6 +302,13 @@ class TNonreplicatedPartitionMigrationCommonActor void OnMigrationNonRetriableError(const NActors::TActorContext& ctx); + void UpdateCounters( + const NActors::TActorContext& ctx, + const NActors::TActorId& sender, + TPartNonreplCountersData partCountersData); + + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -336,6 +345,16 @@ class TNonreplicatedPartitionMigrationCommonActor const NActors::TEvents::TEvPoisonPill::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const NActors::TActorContext& ctx); + template void MirrorRequest( const typename TMethod::TRequest::TPtr& ev, diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp index 6b4ec4e3edd..8c2c70746d8 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_stats.cpp @@ -1,6 +1,7 @@ #include "part_nonrepl_migration_common_actor.h" #include +#include namespace NCloud::NBlockStore::NStorage { @@ -8,37 +9,44 @@ using namespace NActors; //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionMigrationCommonActor::HandlePartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TNonreplicatedPartitionMigrationCommonActor::UpdateCounters( + const TActorContext& ctx, + const TActorId& sender, + TPartNonreplCountersData partCountersData) { - auto* msg = ev->Get(); - - if (ev->Sender == SrcActorId) { - SrcCounters = std::move(msg->DiskCounters); - } else if (ev->Sender == DstActorId) { - DstCounters = std::move(msg->DiskCounters); + if (sender == SrcActorId) { + SrcCounters = std::move(partCountersData.DiskCounters); + } else if (sender == DstActorId) { + DstCounters = std::move(partCountersData.DiskCounters); } else { - LOG_INFO(ctx, TBlockStoreComponents::PARTITION, + LOG_INFO( + ctx, + TBlockStoreComponents::PARTITION, "Partition %s for disk %s counters not found", - ToString(ev->Sender).c_str(), + ToString(sender).c_str(), DiskId.Quote().c_str()); Y_DEBUG_ABORT_UNLESS(0); } - NetworkBytes += msg->NetworkBytes; - CpuUsage += msg->CpuUsage; + NetworkBytes += partCountersData.NetworkBytes; + CpuUsage += partCountersData.CpuUsage; } //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionMigrationCommonActor::SendStats( +void TNonreplicatedPartitionMigrationCommonActor::HandlePartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, const TActorContext& ctx) { - if (!StatActorId) { - return; - } + auto* msg = ev->Get(); + UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); +} + +//////////////////////////////////////////////////////////////////////////////// +TPartNonreplCountersData +TNonreplicatedPartitionMigrationCommonActor::ExtractPartCounters() +{ auto stats = CreatePartitionDiskCounters( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); @@ -54,12 +62,12 @@ void TNonreplicatedPartitionMigrationCommonActor::SendStats( if (SrcCounters && DstActorId && DstCounters) { // for some counters default AggregateWith logic is suboptimal for // mirrored partitions - stats->Simple.BytesCount.Value = Max( - SrcCounters->Simple.BytesCount.Value, - DstCounters->Simple.BytesCount.Value); - stats->Simple.IORequestsInFlight.Value = Max( - SrcCounters->Simple.IORequestsInFlight.Value, - DstCounters->Simple.IORequestsInFlight.Value); + stats->Simple.BytesCount.Value = + Max(SrcCounters->Simple.BytesCount.Value, + DstCounters->Simple.BytesCount.Value); + stats->Simple.IORequestsInFlight.Value = + Max(SrcCounters->Simple.IORequestsInFlight.Value, + DstCounters->Simple.IORequestsInFlight.Value); } stats->AggregateWith(*MigrationCounters); @@ -67,18 +75,131 @@ void TNonreplicatedPartitionMigrationCommonActor::SendStats( EPublishingPolicy::DiskRegistryBased, DiagnosticsConfig->GetHistogramCounterOptions()); + return { + .DiskCounters = std::move(stats), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}) + }; +} + +void TNonreplicatedPartitionMigrationCommonActor::SendStats( + const TActorContext& ctx) +{ + if (!StatActorId) { + return; + } + auto request = std::make_unique( MakeIntrusive(), - std::move(stats), DiskId, - NetworkBytes, - CpuUsage); - - NetworkBytes = 0; - CpuUsage = {}; + ExtractPartCounters()); NCloud::Send(ctx, StatActorId, std::move(request)); } +//////////////////////////////////////////////////////////////////////////////// + +void TNonreplicatedPartitionMigrationCommonActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (StatisticRequestInfo) { + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + MakeError(E_REJECTED, "Migration actor got new request"), + SelfId(), + DiskId, + TPartNonreplCountersData{})); + StatisticRequestInfo.Reset(); + } + + TVector statActorIds; + + if (SrcActorId) { + statActorIds.push_back(SrcActorId); + } + + if (DstActorId) { + statActorIds.push_back(DstActorId); + } + + if (statActorIds.empty()) { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError( + E_INVALID_STATE, + "Nonreplicated migration actor hasn't src and dst " + "actors"), + SelfId(), + DiskId, + ExtractPartCounters())); + + return; + } + + StatisticRequestInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + + NCloud::Register( + ctx, + SelfId(), + std::move(statActorIds)); +} + +void TNonreplicatedPartitionMigrationCommonActor:: + HandleDiskRegistryBasedPartCountersCombined( + const TEvNonreplPartitionPrivate:: + TEvDiskRegistryBasedPartCountersCombined::TPtr& ev, + const TActorContext& ctx) +{ + if (!StatisticRequestInfo) { + LOG_ERROR( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send migration actor statistics due to empty " + "StatisticRequestInfo.", + DiskId.Quote().c_str()); + return; + } + + auto* msg = ev->Get(); + + if(HasError(msg->Error)) { + LOG_WARN( + ctx, + TBlockStoreComponents::PARTITION_NONREPL, + "[%s] Failed to send mirror actor statistics due to error: %s", + DiskId.Quote().c_str(), + msg->Error.GetMessage().c_str()); + } + + for (auto& counters: msg->Counters) { + if(!counters.CountersData.DiskCounters) { + continue; + } + UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); + } + + NCloud::Reply( + ctx, + *StatisticRequestInfo, + std::make_unique( + msg->Error, + SelfId(), + DiskId, + ExtractPartCounters())); + + StatisticRequestInfo.Reset(); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp index 998f2a01eb6..652484d769c 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.cpp @@ -75,7 +75,9 @@ void TNonreplicatedPartitionRdmaActor::Bootstrap(const TActorContext& ctx) } Become(&TThis::StateWork); - ScheduleCountersUpdate(ctx); + if (!Config->GetUsePullSchemeForVolumeStatistics()) { + ScheduleCountersUpdate(ctx); + } ctx.Schedule( Config->GetNonReplicatedMinRequestTimeoutSSD(), new TEvents::TEvWakeup()); @@ -905,6 +907,11 @@ STFUNC(TNonreplicatedPartitionRdmaActor::StateWork) TEvVolumePrivate::TEvDeviceTimedOutResponse, HandleDeviceTimedOutResponse); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest, + HandleGetDiskRegistryBasedPartCounters); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( @@ -963,6 +970,9 @@ STFUNC(TNonreplicatedPartitionRdmaActor::StateZombie) IgnoreFunc(TEvVolume::TEvRWClientIdChanged); IgnoreFunc(TEvVolumePrivate::TEvDeviceTimedOutResponse); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest); + default: if (!HandleRequests(ev)) { HandleUnexpectedEvent( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h index bbae963e208..e922dc7110a 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor.h @@ -133,6 +133,8 @@ class TNonreplicatedPartitionRdmaActor final void ReplyAndDie(const NActors::TActorContext& ctx); + TPartNonreplCountersData ExtractPartCounters(); + private: STFUNC(StateWork); STFUNC(StateZombie); @@ -207,6 +209,11 @@ class TNonreplicatedPartitionRdmaActor final const TEvVolumePrivate::TEvDeviceTimedOutResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const NActors::TActorContext& ctx); + bool HandleRequests(STFUNC_SIG); BLOCKSTORE_IMPLEMENT_REQUEST(ReadBlocks, TEvService); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp index 610348d0838..c4d2d0eef92 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_actor_stats.cpp @@ -21,30 +21,51 @@ void TNonreplicatedPartitionRdmaActor::UpdateStats( //////////////////////////////////////////////////////////////////////////////// -void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) +TPartNonreplCountersData TNonreplicatedPartitionRdmaActor::ExtractPartCounters() { PartCounters->Simple.IORequestsInFlight.Set( RequestsInProgress.GetRequestCount()); PartCounters->Simple.BytesCount.Set( - PartConfig->GetBlockCount() * PartConfig->GetBlockSize() - ); + PartConfig->GetBlockCount() * PartConfig->GetBlockSize()); + + auto partCounters = CreatePartitionDiskCounters( + EPublishingPolicy::DiskRegistryBased, + DiagnosticsConfig->GetHistogramCounterOptions()); + return { + .DiskCounters = std::exchange(PartCounters, std::move(partCounters)), + .NetworkBytes = std::exchange(NetworkBytes, {}), + .CpuUsage = std::exchange(CpuUsage, {}), + }; +} + +void TNonreplicatedPartitionRdmaActor::SendStats(const TActorContext& ctx) +{ auto request = std::make_unique( MakeIntrusive(), - std::move(PartCounters), PartConfig->GetName(), - NetworkBytes, - CpuUsage); + ExtractPartCounters()); - NetworkBytes = 0; - CpuUsage = {}; + NCloud::Send(ctx, StatActorId, std::move(request)); +} - PartCounters = CreatePartitionDiskCounters( - EPublishingPolicy::DiskRegistryBased, - DiagnosticsConfig->GetHistogramCounterOptions()); +//////////////////////////////////////////////////////////////////////////////// - NCloud::Send(ctx, StatActorId, std::move(request)); +void TNonreplicatedPartitionRdmaActor:: + HandleGetDiskRegistryBasedPartCounters( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, + const TActorContext& ctx) +{ + NCloud::Reply( + ctx, + *ev, + std::make_unique( + SelfId(), + PartConfig->GetName(), + ExtractPartCounters())); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h index a0ec5ee9c7f..cf7ae782797 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h @@ -188,12 +188,14 @@ class TDummyActor final const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, const NActors::TActorContext& ctx) { + auto* msg = ev->Get(); + auto event = std::make_unique( MakeStorageStatsServiceId(), ev->Sender, new TEvStatsService::TEvVolumePartCounters( "", // diskId - std::move(ev->Get()->DiskCounters), + std::move(msg->CountersData.DiskCounters), 0, 0, false, diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp new file mode 100644 index 00000000000..5e8576bb26a --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.cpp @@ -0,0 +1,112 @@ +#include "disk_registry_based_partition_statistics_collector_actor.h" + +#include +#include + +#include + +using namespace NActors; + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +TDiskRegistryBasedPartitionStatisticsCollectorActor:: + TDiskRegistryBasedPartitionStatisticsCollectorActor( + const TActorId& owner, + TVector statActorIds) + : Owner(owner) + , StatActorIds(std::move(statActorIds)) +{} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::Bootstrap( + const TActorContext& ctx) +{ + Become(&TThis::StateWork); + + for (const auto& statActorId: StatActorIds) { + NCloud::Send( + ctx, + statActorId, + std::make_unique()); + } + + ctx.Schedule(UpdateCountersInterval, new TEvents::TEvWakeup()); +} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::ReplyAndDie( + const TActorContext& ctx) +{ + NCloud::Send( + ctx, + Owner, + std::make_unique( + std::move(LastError), + std::move(Response))); + + Die(ctx); +} + +//////////////////////////////////////////////////////////////////////////////// + +void TDiskRegistryBasedPartitionStatisticsCollectorActor::HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + NCloud::Send( + ctx, + Owner, + std::make_unique( + MakeError( + E_TIMEOUT, + "Failed to update disk registry based partition counters."), + std::move(Response))); + + Die(ctx); +} + +void TDiskRegistryBasedPartitionStatisticsCollectorActor:: + HandleGetDiskRegistryBasedPartCountersResponse( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + if (HasError(msg->Error)) { + LastError = msg->Error; + } + + Response.Counters.push_back(std::move(*msg)); + + if (Response.Counters.size() == StatActorIds.size()) { + ReplyAndDie(ctx); + } +} + +//////////////////////////////////////////////////////////////////////////////// + +STFUNC(TDiskRegistryBasedPartitionStatisticsCollectorActor::StateWork) +{ + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvWakeup, HandleTimeout); + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse, + HandleGetDiskRegistryBasedPartCountersResponse) + + default: + HandleUnexpectedEvent( + ev, + TBlockStoreComponents::VOLUME, + __PRETTY_FUNCTION__); + break; + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h new file mode 100644 index 00000000000..a7eb327c9ec --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#include +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +/////////////////////////////////////////////////////////////////////////////// + +class TDiskRegistryBasedPartitionStatisticsCollectorActor final + : public NActors::TActorBootstrapped< + TDiskRegistryBasedPartitionStatisticsCollectorActor> +{ +private: + const NActors::TActorId Owner; + + const TVector StatActorIds; + + TEvNonreplPartitionPrivate::TDiskRegistryBasedPartCountersCombined Response; + + NProto::TError LastError; + +public: + TDiskRegistryBasedPartitionStatisticsCollectorActor( + const NActors::TActorId& owner, + TVector statActorIds); + + void Bootstrap(const NActors::TActorContext& ctx); + +private: + void ReplyAndDie(const NActors::TActorContext& ctx); + +private: + STFUNC(StateWork); + + void HandleTimeout( + const NActors::TEvents::TEvWakeup::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleGetDiskRegistryBasedPartCountersResponse( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const NActors::TActorContext& ctx); +}; + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/actors/ya.make b/cloud/blockstore/libs/storage/volume/actors/ya.make index 3110e8bc38a..4677bdf4699 100644 --- a/cloud/blockstore/libs/storage/volume/actors/ya.make +++ b/cloud/blockstore/libs/storage/volume/actors/ya.make @@ -14,6 +14,7 @@ SRCS( release_devices_actor.cpp shadow_disk_actor.cpp volume_as_partition_actor.cpp + disk_registry_based_partition_statistics_collector_actor.cpp ) PEERDIR( diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.cpp b/cloud/blockstore/libs/storage/volume/volume_actor.cpp index 900fa0e96b9..8099d0a82f8 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor.cpp @@ -680,12 +680,13 @@ void TVolumeActor::HandleUpdateCounters( // if we use pull scheme we must send request to the partitions // to collect statistics - if (Config->GetUsePullSchemeForVolumeStatistics() && - State && !State->IsDiskRegistryMediaKind() && + if (Config->GetUsePullSchemeForVolumeStatistics() && State && GetVolumeStatus() != EStatus::STATUS_INACTIVE) { ScheduleRegularUpdates(ctx); - SendStatisticRequests(ctx); + State->IsDiskRegistryMediaKind() + ? SendStatisticRequestForDiskRegistryBasedPartition(ctx) + : SendStatisticRequests(ctx); return; } @@ -1128,6 +1129,11 @@ STFUNC(TVolumeActor::StateWork) HFunc( TEvVolumePrivate::TEvDiskRegistryDeviceOperationFinished, HandleDiskRegistryDeviceOperationFinished); + + HFunc( + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse, + HandleGetDiskRegistryBasedPartCountersResponse); IgnoreFunc(TEvLocal::TEvTabletMetrics); @@ -1197,6 +1203,8 @@ STFUNC(TVolumeActor::StateZombie) IgnoreFunc(TEvPartitionCommonPrivate::TEvPartCountersCombined); IgnoreFunc(TEvService::TEvDestroyVolumeResponse); + IgnoreFunc(TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse); IgnoreFunc(TEvVolumePrivate::TEvDiskRegistryDeviceOperationStarted); IgnoreFunc(TEvVolumePrivate::TEvDiskRegistryDeviceOperationFinished); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.h b/cloud/blockstore/libs/storage/volume/volume_actor.h index 56e4a1cc6d8..e14cc8439d9 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.h +++ b/cloud/blockstore/libs/storage/volume/volume_actor.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -434,6 +435,28 @@ class TVolumeActor final {} }; + struct TDataForUpdatingDiskRegistryBasedPartCounters + { + const NActors::TActorId Sender; + const ui64 Cookie; + const TString DiskId; + TCallContextPtr CallContext; + TPartNonreplCountersData PartCountersData; + + TDataForUpdatingDiskRegistryBasedPartCounters( + const NActors::TActorId& sender, + ui64 cookie, + TString diskId, + TCallContextPtr callContext, + TPartNonreplCountersData partCountersData) + : Sender(sender) + , Cookie(cookie) + , DiskId(std::move(diskId)) + , CallContext(std::move(callContext)) + , PartCountersData(std::move(partCountersData)) + {} + }; + public: TVolumeActor( const NActors::TActorId& owner, @@ -674,12 +697,19 @@ class TVolumeActor final void SendStatisticRequests(const NActors::TActorContext& ctx); + void SendStatisticRequestForDiskRegistryBasedPartition( + const NActors::TActorContext& ctx); + void CleanupHistory( const NActors::TActorContext& ctx, const NActors::TActorId& sender, ui64 cookie, TCallContextPtr callContext); + void UpdateDiskRegistryBasedPartCounters( + const NActors::TActorContext& ctx, + TDataForUpdatingDiskRegistryBasedPartCounters data); + const TString& GetDiskId() const; private: @@ -1303,6 +1333,11 @@ class TVolumeActor final TPoisonCallback onPartitionStopped); void StartPartitionsImpl(const NActors::TActorContext& ctx); + void HandleGetDiskRegistryBasedPartCountersResponse( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const NActors::TActorContext& ctx); + BLOCKSTORE_VOLUME_REQUESTS(BLOCKSTORE_IMPLEMENT_REQUEST, TEvVolume) BLOCKSTORE_VOLUME_REQUESTS_PRIVATE(BLOCKSTORE_IMPLEMENT_REQUEST, TEvVolumePrivate) BLOCKSTORE_VOLUME_REQUESTS_FWD_SERVICE(BLOCKSTORE_IMPLEMENT_REQUEST, TEvService) diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp index 23329bc8c73..ac96064f292 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_stats.cpp @@ -220,24 +220,24 @@ void TVolumeActor::HandleScrubberCounters( State->UpdateScrubberCounters(std::move(scrubbingInfo)); } -void TVolumeActor::HandleDiskRegistryBasedPartCounters( - const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, - const TActorContext& ctx) +void TVolumeActor::UpdateDiskRegistryBasedPartCounters( + const TActorContext& ctx, + TDataForUpdatingDiskRegistryBasedPartCounters data) { Y_DEBUG_ABORT_UNLESS(State->IsDiskRegistryMediaKind()); - auto* msg = ev->Get(); - if (auto* resourceMetrics = GetResourceMetrics(); resourceMetrics) { bool changed = false; - if (msg->CpuUsage) { + if (data.PartCountersData.CpuUsage) { resourceMetrics->CPU.Increment( - msg->CpuUsage.MicroSeconds(), + data.PartCountersData.CpuUsage.MicroSeconds(), ctx.Now()); changed = true; } - if (msg->NetworkBytes) { - resourceMetrics->Network.Increment(msg->NetworkBytes, ctx.Now()); + if (data.PartCountersData.NetworkBytes) { + resourceMetrics->Network.Increment( + data.PartCountersData.NetworkBytes, + ctx.Now()); changed = true; } @@ -246,13 +246,10 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( } } - auto requestInfo = CreateRequestInfo( - ev->Sender, - ev->Cookie, - msg->CallContext - ); + auto requestInfo = + CreateRequestInfo(data.Sender, data.Cookie, data.CallContext); - auto* statInfo = State->GetPartitionStatByDiskId(msg->DiskId); + auto* statInfo = State->GetPartitionStatByDiskId(data.DiskId); if (!statInfo) { LOG_INFO( @@ -260,8 +257,8 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( TBlockStoreComponents::VOLUME, "%s Counters from partition %s (%s) do not belong to disk", LogTitle.GetWithTime().c_str(), - ToString(ev->Sender).c_str(), - msg->DiskId.Quote().c_str()); + ToString(data.Sender).c_str(), + data.DiskId.Quote().c_str()); return; } @@ -271,11 +268,13 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( DiagnosticsConfig->GetHistogramCounterOptions()); } - statInfo->LastCounters->Add(*msg->DiskCounters); + statInfo->LastCounters->Add(*data.PartCountersData.DiskCounters); - UpdateCachedStats(*msg->DiskCounters, statInfo->CachedCounters); + UpdateCachedStats( + *data.PartCountersData.DiskCounters, + statInfo->CachedCounters); CopyPartCountersToCachedStats( - *msg->DiskCounters, + *data.PartCountersData.DiskCounters, statInfo->CachedCountersProto); TVolumeDatabase::TPartStats partStats; @@ -287,6 +286,48 @@ void TVolumeActor::HandleDiskRegistryBasedPartCounters( std::move(partStats)); } +void TVolumeActor::HandleDiskRegistryBasedPartCounters( + const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + TDataForUpdatingDiskRegistryBasedPartCounters data( + ev->Sender, + ev->Cookie, + std::move(msg->DiskId), + std::move(msg->CallContext), + std::move(msg->CountersData)); + + UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); +} + +void TVolumeActor::HandleGetDiskRegistryBasedPartCountersResponse( + const TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse::TPtr& ev, + const TActorContext& ctx) +{ + auto* msg = ev->Get(); + + if (HasError(msg->Error)) { + LOG_ERROR( + ctx, + TBlockStoreComponents::VOLUME, + "%s Failed to update disk registry based part counters. Error: %s", + LogTitle.GetWithTime().c_str(), + FormatError(msg->Error).c_str()); + } + + TDataForUpdatingDiskRegistryBasedPartCounters data( + msg->ActorId, + ev->Cookie, + std::move(msg->DiskId), + MakeIntrusive(), + std::move(msg->CountersData)); + + UpdateDiskRegistryBasedPartCounters(ctx, std::move(data)); +} + std::optional TVolumeActor::UpdatePartCounters( const TActorContext& ctx, TPartCountersData& partCountersData) @@ -454,6 +495,18 @@ void TVolumeActor::CompleteSavePartStats( LogTitle.GetWithTime().c_str(), args.PartStats.TabletId); + if (Config->GetUsePullSchemeForVolumeStatistics() && + State->IsDiskRegistryMediaKind()) + { + UpdateCounters(ctx); + CleanupHistory( + ctx, + SelfId(), // sender + 0, // cookie + MakeIntrusive() // callContext + ); + } + NCloud::Send( ctx, SelfId(), @@ -829,4 +882,20 @@ void TVolumeActor::CleanupHistory( Config->GetVolumeHistoryCleanupItemCount()); } +void TVolumeActor::SendStatisticRequestForDiskRegistryBasedPartition( + const TActorContext& ctx) +{ + STORAGE_VERIFY_C( + State->GetDiskRegistryBasedPartitionActor(), + TWellKnownEntityTypes::TABLET, + TabletID(), + "Empty disk registry based partition actor"); + + NCloud::Send( + ctx, + State->GetDiskRegistryBasedPartitionActor(), + std::make_unique()); +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp index e36429484ed..a79e1cd7914 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp @@ -589,9 +589,10 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - if (msg->NetworkBytes || msg->CpuUsage) { - network = std::max(network, msg->NetworkBytes); - cpu = std::max(cpu, msg->CpuUsage); + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + if (networkBytes || cpuUsage) { + network = std::max(network, networkBytes); + cpu = std::max(cpu, cpuUsage); ++nonEmptyReports; } } @@ -708,9 +709,10 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - if (msg->NetworkBytes || msg->CpuUsage) { - network = std::max(network, msg->NetworkBytes); - cpu = std::max(cpu, msg->CpuUsage); + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + if (networkBytes || cpuUsage) { + network = std::max(network, networkBytes); + cpu = std::max(cpu, cpuUsage); } } @@ -843,18 +845,19 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) case TEvVolume::EvDiskRegistryBasedPartitionCounters: { auto* msg = event->Get< TEvVolume::TEvDiskRegistryBasedPartitionCounters>(); - totalNetworkBytes += msg->NetworkBytes; + auto& [_, networkBytes, cpuUsage] = msg->CountersData; + totalNetworkBytes += networkBytes; // Resync is done by 4MiB ranges. Checksum is an 8-byte // request. UNIT_ASSERT_VALUES_EQUAL( 0, - msg->NetworkBytes % 4_MB % 8); - if (msg->NetworkBytes) { + networkBytes % 4_MB % 8); + if (networkBytes) { nonEmptyStatsUpdates++; UNIT_ASSERT_VALUES_UNEQUAL( TDuration(), - msg->CpuUsage); + cpuUsage); } break; } @@ -1508,6 +1511,304 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) 1, 0); } + + Y_UNIT_TEST(ShouldPullStatisticsFromDiskRegistryBasedPartitionWithResync) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetAcquireNonReplicatedDevices(true); + config.SetUseMirrorResync(true); + config.SetForceMirrorResync(true); + + auto state = MakeIntrusive(); + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + struct TReadAndWriteByteCount + { + ui64 ReadByteCount = 0; + ui64 WriteByteCount = 0; + ui64 DirectCopyByteCount = 0; + }; + TMap statsForDisks; + auto statEventInterceptor = runtime->AddObserver( + [&](TEvStatsService::TEvVolumePartCounters::TPtr& event) { + auto* msg = event->Get(); + statsForDisks[msg->DiskId].ReadByteCount += + msg->DiskCounters->RequestCounters.ReadBlocks.RequestBytes; + statsForDisks[msg->DiskId].WriteByteCount += + msg->DiskCounters->RequestCounters.WriteBlocks.RequestBytes; + statsForDisks[msg->DiskId].DirectCopyByteCount += + msg->DiskCounters->RequestCounters.CopyBlocks.RequestBytes; + }); + + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + auto clientInfo = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + + volume.AddClient(clientInfo); + + volume.SendToPipe( + std::make_unique()); + + volume.WriteBlocks( + TBlockRange64::WithLength(0, 1024), + clientInfo.GetClientId(), + 1); + + volume.ReadBlocks( + TBlockRange64::WithLength(0, 1024), + clientInfo.GetClientId()); + + runtime->AdvanceCurrentTime(UpdateCountersInterval); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + UNIT_ASSERT(statsForDisks["vol0"].WriteByteCount != 0); + UNIT_ASSERT(statsForDisks["vol0"].ReadByteCount != 0); + } + + Y_UNIT_TEST( + ShouldPullStatisticsFromDiskRegistryBasedPartitionWithMigration) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetMaxMigrationBandwidth(999'999'999); + + auto state = MakeIntrusive(); + state->MigrationMode = EMigrationMode::InProgress; + state->ReplicaCount = 2; + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(TDuration::Seconds(1)); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } + + Y_UNIT_TEST( + ShouldPullStatisticsFromDiskRegistryBasedPartitionWithFreshDevices) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetMaxMigrationBandwidth(999'999'999); + + auto state = MakeIntrusive(); + state->MigrationMode = EMigrationMode::InProgress; + state->DeviceReplacementUUIDs = {"uuid1"}; + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + + bool statUpdated = false; + auto _ = runtime->AddObserver( + [&](TEvVolumePrivate::TEvPartStatsSaved::TPtr& ev) + { + Y_UNUSED(ev); + statUpdated = true; + }); + + TVolumeClient volume(*runtime); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(TDuration::Seconds(1)); + runtime->DispatchEvents({}, TDuration::MilliSeconds(10)); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } + + Y_UNIT_TEST(ShouldPullStatisticsFromDiskRegistryBasedPartitionWithTimeout) + { + NProto::TStorageServiceConfig config; + config.SetUsePullSchemeForVolumeStatistics(true); + config.SetAcquireNonReplicatedDevices(true); + + auto state = MakeIntrusive(); + state->ReplicaCount = 2; + + auto runtime = PrepareTestActorRuntime(config, state); + TVolumeClient volume(*runtime); + + TActorId volumeId; + bool answerSeen = false; + bool statUpdated = false; + bool firstRequest = false; + bool isGrabResponse = false; + + runtime->SetEventFilter( + [&](TTestActorRuntimeBase& runtime, TAutoPtr& ev) + { + Y_UNUSED(runtime); + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersRequest && + !firstRequest) + { + firstRequest = true; + volumeId = ev->Sender; + return false; + } + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersResponse && + !isGrabResponse) + { + isGrabResponse = true; + return true; + } + + if (ev->GetTypeRewrite() == + TEvNonreplPartitionPrivate:: + EvGetDiskRegistryBasedPartCountersResponse && + ev->Recipient == volumeId) + { + answerSeen = true; + auto* msg = ev->Get< + TEvNonreplPartitionPrivate:: + TEvGetDiskRegistryBasedPartCountersResponse>(); + UNIT_ASSERT(HasError(msg->Error)); + + return false; + } + + if (ev->GetTypeRewrite() == TEvVolumePrivate::EvPartStatsSaved) + { + statUpdated = true; + } + + return false; + }); + + runtime->SetScheduledEventFilter( + [&](TTestActorRuntimeBase& runtime, + TAutoPtr& ev, + TDuration delay, + TInstant& deadline) + { + Y_UNUSED(runtime); + Y_UNUSED(delay); + Y_UNUSED(deadline); + if (ev->GetTypeRewrite() == TEvVolumePrivate::EvUpdateCounters) + { + return true; + } + return false; + }); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitionCount + ); + + volume.WaitReady(); + volume.SendToPipe( + std::make_unique()); + + runtime->AdvanceCurrentTime(UpdateCountersInterval); + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvVolumePrivate::EvPartStatsSaved); + runtime->DispatchEvents(options); + + // Check that we grab response to do error timeout + UNIT_ASSERT(isGrabResponse); + + // Check that we see answer with error + UNIT_ASSERT(answerSeen); + + // Check that statistics was updated + UNIT_ASSERT(statUpdated); + } } } // namespace NCloud::NBlockStore::NStorage