-
Notifications
You must be signed in to change notification settings - Fork 37
[NBS] Pull scheme for non replicated disks #4792
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,28 +2,31 @@ | |
|
|
||
| #include <cloud/blockstore/libs/storage/api/volume.h> | ||
| #include <cloud/blockstore/libs/storage/core/config.h> | ||
| #include <cloud/blockstore/libs/storage/volume/actors/disk_registry_based_partition_statistics_collector_actor.h> | ||
|
|
||
| namespace NCloud::NBlockStore::NStorage { | ||
|
|
||
| using namespace NActors; | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| void TMirrorPartitionActor::HandlePartCounters( | ||
| const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, | ||
| const TActorContext& ctx) | ||
| void TMirrorPartitionActor::UpdateCounters( | ||
| const TActorContext& ctx, | ||
| const TActorId& sender, | ||
| TPartNonreplCountersData partCountersData) | ||
| { | ||
| auto* msg = ev->Get(); | ||
|
|
||
| const ui32 replicaIndex = State.GetReplicaIndex(ev->Sender); | ||
| const ui32 replicaIndex = State.GetReplicaIndex(sender); | ||
| if (replicaIndex < ReplicaCounters.size()) { | ||
| ReplicaCounters[replicaIndex] = std::move(msg->DiskCounters); | ||
| NetworkBytes += msg->NetworkBytes; | ||
| CpuUsage += CpuUsage; | ||
| ReplicaCounters[replicaIndex] = | ||
| std::move(partCountersData.DiskCounters); | ||
| NetworkBytes += partCountersData.NetworkBytes; | ||
| CpuUsage += partCountersData.CpuUsage; | ||
| } else { | ||
| LOG_INFO(ctx, TBlockStoreComponents::PARTITION, | ||
| LOG_INFO( | ||
| ctx, | ||
| TBlockStoreComponents::PARTITION, | ||
| "Partition %s for disk %s counters not found", | ||
| ToString(ev->Sender).c_str(), | ||
| ToString(sender).c_str(), | ||
| State.GetReplicaInfos()[0].Config->GetName().Quote().c_str()); | ||
|
|
||
| Y_DEBUG_ABORT_UNLESS(0); | ||
|
|
@@ -32,12 +35,19 @@ void TMirrorPartitionActor::HandlePartCounters( | |
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| void TMirrorPartitionActor::SendStats(const TActorContext& ctx) | ||
| void TMirrorPartitionActor::HandlePartCounters( | ||
| const TEvVolume::TEvDiskRegistryBasedPartitionCounters::TPtr& ev, | ||
| const TActorContext& ctx) | ||
| { | ||
| if (!StatActorId) { | ||
| return; | ||
| } | ||
| auto* msg = ev->Get(); | ||
| UpdateCounters(ctx, ev->Sender, std::move(msg->CountersData)); | ||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| TPartNonreplCountersData TMirrorPartitionActor::ExtractPartCounters( | ||
| const TActorContext& ctx) | ||
| { | ||
| auto stats = CreatePartitionDiskCounters( | ||
| EPublishingPolicy::DiskRegistryBased, | ||
| DiagnosticsConfig->GetHistogramCounterOptions()); | ||
|
|
@@ -54,35 +64,20 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) | |
| stats->Simple.IORequestsInFlight.Reset(); | ||
| for (const auto& counters: ReplicaCounters) { | ||
| if (counters) { | ||
| stats->Simple.BytesCount.Value = Max( | ||
| stats->Simple.BytesCount.Value, | ||
| counters->Simple.BytesCount.Value); | ||
| stats->Simple.IORequestsInFlight.Value = Max( | ||
| stats->Simple.IORequestsInFlight.Value, | ||
| counters->Simple.IORequestsInFlight.Value); | ||
| stats->Simple.BytesCount.Value = | ||
| Max(stats->Simple.BytesCount.Value, | ||
| counters->Simple.BytesCount.Value); | ||
| stats->Simple.IORequestsInFlight.Value = | ||
| Max(stats->Simple.IORequestsInFlight.Value, | ||
| counters->Simple.IORequestsInFlight.Value); | ||
| } | ||
| } | ||
|
|
||
| stats->Simple.ChecksumMismatches.Value = ChecksumMismatches; | ||
| stats->Simple.ScrubbingProgress.Value = | ||
| 100 * GetScrubbingRange().Start / State.GetBlockCount(); | ||
| stats->Cumulative.ScrubbingThroughput.Value = ScrubbingThroughput; | ||
| auto request = | ||
| std::make_unique<TEvVolume::TEvDiskRegistryBasedPartitionCounters>( | ||
| MakeIntrusive<TCallContext>(), | ||
| std::move(stats), | ||
| DiskId, | ||
| NetworkBytes, | ||
| CpuUsage); | ||
|
|
||
| NetworkBytes = 0; | ||
| CpuUsage = {}; | ||
| ScrubbingThroughput = 0; | ||
|
|
||
| NCloud::Send( | ||
| ctx, | ||
| StatActorId, | ||
| std::move(request)); | ||
| stats->Cumulative.ScrubbingThroughput.Value = | ||
| std::exchange(ScrubbingThroughput, {}); | ||
|
|
||
| const bool scrubbingEnabled = | ||
| Config->GetDataScrubbingEnabled() && !ResyncActorId; | ||
|
|
@@ -95,6 +90,121 @@ void TMirrorPartitionActor::SendStats(const TActorContext& ctx) | |
| std::move(Fixed), | ||
| std::move(FixedPartial)); | ||
| NCloud::Send(ctx, StatActorId, std::move(scrubberCounters)); | ||
|
|
||
| return { | ||
| .DiskCounters = std::move(stats), | ||
| .NetworkBytes = std::exchange(NetworkBytes, {}), | ||
| .CpuUsage = std::exchange(CpuUsage, {}), | ||
| }; | ||
| } | ||
|
|
||
| void TMirrorPartitionActor::SendStats(const TActorContext& ctx) | ||
| { | ||
| if (!StatActorId) { | ||
| return; | ||
| } | ||
|
|
||
| auto request = | ||
| std::make_unique<TEvVolume::TEvDiskRegistryBasedPartitionCounters>( | ||
| MakeIntrusive<TCallContext>(), | ||
| DiskId, | ||
| ExtractPartCounters(ctx)); | ||
|
|
||
| NCloud::Send( | ||
| ctx, | ||
| StatActorId, | ||
| std::move(request)); | ||
|
|
||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
||
| void TMirrorPartitionActor::HandleGetDiskRegistryBasedPartCounters( | ||
| const TEvNonreplPartitionPrivate:: | ||
| TEvGetDiskRegistryBasedPartCountersRequest::TPtr& ev, | ||
| const TActorContext& ctx) | ||
| { | ||
| if (StatisticRequestInfo) { | ||
| NCloud::Reply( | ||
| ctx, | ||
| *StatisticRequestInfo, | ||
| std::make_unique<TEvNonreplPartitionPrivate:: | ||
| TEvGetDiskRegistryBasedPartCountersResponse>( | ||
| MakeError(E_REJECTED, "Mirror actor got new request"), | ||
| SelfId(), | ||
| DiskId, | ||
| TPartNonreplCountersData{})); | ||
| StatisticRequestInfo.Reset(); | ||
| } | ||
|
|
||
| auto statActorIds = State.GetReplicaActorsBypassingProxies(); | ||
|
|
||
| if (statActorIds.empty()) { | ||
| NCloud::Reply( | ||
| ctx, | ||
| *ev, | ||
| std::make_unique<TEvNonreplPartitionPrivate:: | ||
| TEvGetDiskRegistryBasedPartCountersResponse>( | ||
| MakeError(E_INVALID_STATE, "Mirror actor hasn't replicas"), | ||
| SelfId(), | ||
| DiskId, | ||
| ExtractPartCounters(ctx))); | ||
| return; | ||
| } | ||
|
|
||
| StatisticRequestInfo = | ||
| CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); | ||
|
|
||
| NCloud::Register<TDiskRegistryBasedPartitionStatisticsCollectorActor>( | ||
| ctx, | ||
| SelfId(), | ||
| std::move(statActorIds)); | ||
| } | ||
|
|
||
| void TMirrorPartitionActor::HandleDiskRegistryBasedPartCountersCombined( | ||
| const TEvNonreplPartitionPrivate::TEvDiskRegistryBasedPartCountersCombined:: | ||
| TPtr& ev, | ||
| const TActorContext& ctx) | ||
| { | ||
| if (!StatisticRequestInfo) { | ||
| LOG_ERROR( | ||
| ctx, | ||
| TBlockStoreComponents::PARTITION_NONREPL, | ||
| "[%s] Failed to send mirror actor statistics due to empty " | ||
| "StatisticRequestInfo.", | ||
| DiskId.Quote().c_str()); | ||
| return; | ||
| } | ||
|
|
||
| auto* msg = ev->Get(); | ||
|
|
||
| if(HasError(msg->Error)) { | ||
| LOG_WARN( | ||
| ctx, | ||
| TBlockStoreComponents::PARTITION_NONREPL, | ||
| "[%s] Failed to send mirror actor statistics due to error: %s", | ||
| DiskId.Quote().c_str(), | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LogTitle.GetWithTime().c_str() |
||
| msg->Error.GetMessage().c_str()); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. FormatError(msg->Error).c_str() |
||
| } | ||
|
|
||
| for (auto& counters: msg->Counters) { | ||
| if(!counters.CountersData.DiskCounters) { | ||
| continue; | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Такой код может привести к неверной статистике. Если не вызвать |
||
| UpdateCounters(ctx, counters.ActorId, std::move(counters.CountersData)); | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. тут (и не только тут) нужно сделать проверку на nullptr у счетчиков |
||
|
|
||
| NCloud::Reply( | ||
| ctx, | ||
| *StatisticRequestInfo, | ||
| std::make_unique<TEvNonreplPartitionPrivate:: | ||
| TEvGetDiskRegistryBasedPartCountersResponse>( | ||
| msg->Error, | ||
| SelfId(), | ||
| DiskId, | ||
| ExtractPartCounters(ctx))); | ||
|
|
||
| StatisticRequestInfo.Reset(); | ||
| } | ||
|
|
||
| } // namespace NCloud::NBlockStore::NStorage | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Здесь (и не только здесь) нужно условие про
HasError(msg->GetError()). Если ошибка есть,: залогировать её WARN'ом