From 342568c286ee370deee0d09d5ad833906ea54e57 Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sat, 8 Nov 2025 14:49:25 -0500 Subject: [PATCH 1/2] Rework Surface Cache Signed-off-by: Isaac Marovitz --- MarathonRecomp/gpu/video.cpp | 187 +++++++++++++++-------- MarathonRecomp/gpu/video.h | 1 - MarathonRecomp/patches/video_patches.cpp | 4 - 3 files changed, 120 insertions(+), 72 deletions(-) diff --git a/MarathonRecomp/gpu/video.cpp b/MarathonRecomp/gpu/video.cpp index 08c8c74e..4f47198e 100644 --- a/MarathonRecomp/gpu/video.cpp +++ b/MarathonRecomp/gpu/video.cpp @@ -387,6 +387,38 @@ static constexpr uint32_t CONDITIONAL_SURVEY_MAX = 64; static std::unique_ptr g_conditionalSurveyBuffer; static std::unique_ptr g_conditionalSurveyDescriptorSet; +struct SurfaceGroupKey +{ + uint32_t edramBase; + RenderFormat format; + RenderSampleCounts sampleCount; + uint32_t width; + uint32_t height; + + bool operator==(const SurfaceGroupKey& other) const { + return edramBase == other.edramBase && + format == other.format && + sampleCount == other.sampleCount && + width == other.width && + height == other.height; + } +}; + +template<> +struct std::hash { + size_t operator()(const SurfaceGroupKey& k) const noexcept { + return XXH3_64bits(&k, sizeof(k)); + } +}; + +struct SurfaceGroup +{ + std::vector surfaces; + std::unique_ptr sharedTexture; +}; + +static ankerl::unordered_dense::map> g_surfaceGroups; + enum { TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D, @@ -764,12 +796,43 @@ static void DestructTempResources() { const auto surface = reinterpret_cast(resource); + bool ownedByGroup = false; + for (auto it = g_surfaceGroups.begin(); it != g_surfaceGroups.end();) + { + auto& group = it->second; + auto surfIt = std::find(group->surfaces.begin(), group->surfaces.end(), surface); + + if (surfIt != group->surfaces.end()) + { + group->surfaces.erase(surfIt); + ownedByGroup = true; + if (group->surfaces.empty()) + { + it = g_surfaceGroups.erase(it); + } else + { + ++it; + } + + break; + } + + ++it; + } + if (surface->descriptorIndex != NULL) { g_textureDescriptorSet->setTexture(surface->descriptorIndex, nullptr, {}); g_textureDescriptorAllocator.free(surface->descriptorIndex); } + // Only destroy textureHolder if the surface owns it (not in a group) + // If in a group, the group's sharedTexture owns it + if (!ownedByGroup && surface->textureHolder) + { + surface->textureHolder.reset(); + } + surface->~GuestSurface(); break; } @@ -2370,14 +2433,6 @@ static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, static void DestructResource(GuestResource* resource) { - // Needed for hack in CreateSurface (remove if fix it) - if (resource->type == ResourceType::RenderTarget || resource->type == ResourceType::DepthStencil) - { - const auto surface = reinterpret_cast(resource); - if (surface->wasCached) { - return; - } - } RenderCommand cmd; cmd.type = RenderCommandType::DestructResource; cmd.destructResource.resource = resource; @@ -3511,73 +3566,71 @@ static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format return buffer; } -static std::vector> g_surfaceCache; - -// TODO: Singleplayer (possibly) uses the same memory location in EDRAM for HDR and FB0 surfaces, -// so we just remember who was created first and use that instead of creating a new one. -static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample, GuestSurfaceCreateParams* params) +static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample, GuestSurfaceCreateParams* params) { - GuestSurface* surface = nullptr; - uint32_t baseValue = params ? params->base.get() : -1; - if (params) { - for (auto& entry : g_surfaceCache) { - GuestSurface* cachedSurface = entry.first; - uint32_t cachedBase = entry.second; - if (cachedSurface && - cachedSurface->width == width && - cachedSurface->height == height && - cachedSurface->guestFormat == format && - cachedBase == baseValue) { - surface = cachedSurface; - break; - } - } + // printf("CreateSurface: w: %d, h: %d, f: %d, ms: %d\n", width, height, format, multiSample); + RenderTextureDesc desc; + desc.dimension = RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = 1; + desc.mipLevels = 1; + desc.arraySize = 1; + // desc.multisampling.sampleCount = multiSample != 0 && Config::AntiAliasing != EAntiAliasing::None ? int32_t(Config::AntiAliasing.Value) : RenderSampleCount::COUNT_1; + if (multiSample == 0) { + desc.multisampling.sampleCount = RenderSampleCount::COUNT_1; + } else { + desc.multisampling.sampleCount = multiSample == 1 ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_4; } - if (!surface) { - // printf("CreateSurface: w: %d, h: %d, f: %d, ms: %d\n", width, height, format, multiSample); - RenderTextureDesc desc; - desc.dimension = RenderTextureDimension::TEXTURE_2D; - desc.width = width; - desc.height = height; - desc.depth = 1; - desc.mipLevels = 1; - desc.arraySize = 1; - // desc.multisampling.sampleCount = multiSample != 0 && Config::AntiAliasing != EAntiAliasing::None ? int32_t(Config::AntiAliasing.Value) : RenderSampleCount::COUNT_1; - if (multiSample == 0) { - desc.multisampling.sampleCount = RenderSampleCount::COUNT_1; - } else { - desc.multisampling.sampleCount = multiSample == 1 ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_4; - } - desc.format = ConvertFormat(format); - desc.flags = RenderFormatIsDepth(desc.format) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; + desc.format = ConvertFormat(format); + desc.flags = RenderFormatIsDepth(desc.format) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; - surface = g_userHeap.AllocPhysical(RenderFormatIsDepth(desc.format) ? - ResourceType::DepthStencil : ResourceType::RenderTarget); + auto surface = g_userHeap.AllocPhysical(RenderFormatIsDepth(desc.format) ? + ResourceType::DepthStencil : ResourceType::RenderTarget); + surface->textureHolder = g_device->createTexture(desc); + surface->texture = surface->textureHolder.get(); + surface->width = width; + surface->height = height; + surface->format = desc.format; + surface->guestFormat = format; + surface->sampleCount = desc.multisampling.sampleCount; + + if (params) { + const SurfaceGroupKey key + { + params->base.get(), + desc.format, + desc.multisampling.sampleCount, + width, + height + }; + + auto& group = g_surfaceGroups[key]; + + if (!group) { + group = std::make_unique(); + group->sharedTexture = g_device->createTexture(desc); + } + + surface->texture = group->sharedTexture.get(); + group->surfaces.push_back(surface); + } else { surface->textureHolder = g_device->createTexture(desc); surface->texture = surface->textureHolder.get(); - surface->width = width; - surface->height = height; - surface->format = desc.format; - surface->guestFormat = format; - surface->sampleCount = desc.multisampling.sampleCount; + } - RenderTextureViewDesc viewDesc; - viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; - viewDesc.format = desc.format; - viewDesc.mipLevels = 1; - surface->textureView = surface->textureHolder->createTextureView(viewDesc); - surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); - g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); + RenderTextureViewDesc viewDesc; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; + viewDesc.format = desc.format; + viewDesc.mipLevels = 1; + surface->textureView = surface->textureHolder->createTextureView(viewDesc); + surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); + g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); - #ifdef _DEBUG + #ifdef _DEBUG surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); #endif - if (params) { - surface->wasCached = true; - g_surfaceCache.emplace_back(surface, baseValue); - } - } return surface; } @@ -3755,7 +3808,7 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf { srcLayout = RenderTextureLayout::RESOLVE_SOURCE; dstLayout = RenderTextureLayout::RESOLVE_DEST; - shaderResolve = false; + // shaderResolve = false; } } @@ -3801,7 +3854,7 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS else commandList->resolveTexture(texture->texture, surface->texture); - shaderResolve = false; + // shaderResolve = false; } } diff --git a/MarathonRecomp/gpu/video.h b/MarathonRecomp/gpu/video.h index 09f78f26..21c56205 100644 --- a/MarathonRecomp/gpu/video.h +++ b/MarathonRecomp/gpu/video.h @@ -240,7 +240,6 @@ struct GuestSurface : GuestBaseTexture ankerl::unordered_dense::map> framebuffers; RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; ankerl::unordered_dense::map destinationTextures; - bool wasCached = false; }; enum GuestDeclType diff --git a/MarathonRecomp/patches/video_patches.cpp b/MarathonRecomp/patches/video_patches.cpp index ecb8b483..408844ac 100644 --- a/MarathonRecomp/patches/video_patches.cpp +++ b/MarathonRecomp/patches/video_patches.cpp @@ -117,10 +117,6 @@ PPC_FUNC(sub_82619B88) ReflectionScaleFactor(Config::ReflectionResolution)); ctx.r6.u32 = static_cast(static_cast(ctx.r6.u32) * ReflectionScaleFactor(Config::ReflectionResolution)); - - // Bad hack to stop EDRAM cache from messing up - if (Config::ReflectionResolution == EReflectionResolution::Full) - ctx.r5.u32++; } #if _DEBUG From 5fd81bfb5f953cc41c2079d0a4f1c69e3f2ff0fb Mon Sep 17 00:00:00 2001 From: Isaac Marovitz Date: Sat, 8 Nov 2025 15:18:42 -0500 Subject: [PATCH 2/2] =?UTF-8?q?=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Isaac Marovitz --- MarathonRecomp/gpu/video.cpp | 94 +++++++++++++++++++++++- MarathonRecomp/patches/video_patches.cpp | 4 + 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/MarathonRecomp/gpu/video.cpp b/MarathonRecomp/gpu/video.cpp index 4f47198e..679bfc55 100644 --- a/MarathonRecomp/gpu/video.cpp +++ b/MarathonRecomp/gpu/video.cpp @@ -414,7 +414,8 @@ struct std::hash { struct SurfaceGroup { std::vector surfaces; - std::unique_ptr sharedTexture; + GuestSurface* currentSurface = nullptr; + GuestSurface* currentDepthSurface = nullptr; }; static ankerl::unordered_dense::map> g_surfaceGroups; @@ -805,6 +806,13 @@ static void DestructTempResources() if (surfIt != group->surfaces.end()) { group->surfaces.erase(surfIt); + + // Clear current surface pointers if they match + if (group->currentSurface == surface) + group->currentSurface = nullptr; + if (group->currentDepthSurface == surface) + group->currentDepthSurface = nullptr; + ownedByGroup = true; if (group->surfaces.empty()) { @@ -3610,10 +3618,8 @@ static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t for if (!group) { group = std::make_unique(); - group->sharedTexture = g_device->createTexture(desc); } - surface->texture = group->sharedTexture.get(); group->surfaces.push_back(surface); } else { surface->textureHolder = g_device->createTexture(desc); @@ -3759,9 +3765,70 @@ static void SetRenderTarget(GuestDevice* device, uint32_t index, GuestSurface* r } } +static void HandleSurfaceAliasing(GuestSurface* newSurface, bool isDepthSurface) +{ + if (newSurface == nullptr) + return; + + for (auto& [key, group] : g_surfaceGroups) { + auto it = std::find(group->surfaces.begin(), group->surfaces.end(), newSurface); + if (it != group->surfaces.end()) { + // Determine which current surface to check based on type + GuestSurface*& currentSurface = isDepthSurface ? + group->currentDepthSurface : + group->currentSurface; + + if (currentSurface != nullptr && currentSurface != newSurface) { + // Need to copy from previous surface to new surface + auto& commandList = g_commandLists[g_frame]; + + RenderTextureLayout srcLayout = isDepthSurface ? + RenderTextureLayout::DEPTH_READ : + RenderTextureLayout::SHADER_READ; + RenderTextureLayout dstLayout = isDepthSurface ? + RenderTextureLayout::COPY_DEST : + RenderTextureLayout::COPY_DEST; + + commandList->barriers(RenderBarrierStage::COPY, { + RenderTextureBarrier(currentSurface->texture, srcLayout), + RenderTextureBarrier(newSurface->texture, dstLayout) + }); + + if (isDepthSurface) { + // For depth, use copyTextureRegion or resolve if MSAA differs + if (currentSurface->sampleCount == newSurface->sampleCount) { + commandList->copyTexture(newSurface->texture, currentSurface->texture); + } else { + // If sample counts differ, need to resolve + if (g_capabilities.resolveModes) { + commandList->resolveTextureRegion( + newSurface->texture, 0, 0, + currentSurface->texture, nullptr, + RenderResolveMode::MIN); + } + } + } else { + // For color, simple copy or resolve + if (currentSurface->sampleCount == newSurface->sampleCount) { + commandList->copyTexture(newSurface->texture, currentSurface->texture); + } else { + commandList->resolveTexture(newSurface->texture, currentSurface->texture); + } + } + } + + currentSurface = newSurface; + break; + } + } +} + static void ProcSetRenderTarget(const RenderCommand& cmd) { const auto& args = cmd.setRenderTarget; + + HandleSurfaceAliasing(args.renderTarget, false); + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_renderTarget, args.renderTarget); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.renderTargetFormat, args.renderTarget != nullptr ? args.renderTarget->format : RenderFormat::UNKNOWN); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.sampleCount, args.renderTarget != nullptr ? args.renderTarget->sampleCount : RenderSampleCount::COUNT_1); @@ -3784,6 +3851,8 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd) { const auto& args = cmd.setDepthStencilSurface; + HandleSurfaceAliasing(args.depthStencil, true); + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_depthStencil, args.depthStencil); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN); } @@ -4111,6 +4180,25 @@ static void ProcClear(const RenderCommand& cmd) { const auto& args = cmd.clear; + // Clear invalidates aliased surface data, so update group tracking + if (args.flags & D3DCLEAR_TARGET) { + for (auto& [key, group] : g_surfaceGroups) { + if (std::find(group->surfaces.begin(), group->surfaces.end(), g_renderTarget) != group->surfaces.end()) { + group->currentSurface = g_renderTarget; + break; + } + } + } + + if (args.flags & (D3DCLEAR_ZBUFFER | D3DCLEAR_STENCIL)) { + for (auto& [key, group] : g_surfaceGroups) { + if (std::find(group->surfaces.begin(), group->surfaces.end(), g_depthStencil) != group->surfaces.end()) { + group->currentDepthSurface = g_depthStencil; + break; + } + } + } + if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil)) { FlushBarriers(); diff --git a/MarathonRecomp/patches/video_patches.cpp b/MarathonRecomp/patches/video_patches.cpp index 408844ac..ecb8b483 100644 --- a/MarathonRecomp/patches/video_patches.cpp +++ b/MarathonRecomp/patches/video_patches.cpp @@ -117,6 +117,10 @@ PPC_FUNC(sub_82619B88) ReflectionScaleFactor(Config::ReflectionResolution)); ctx.r6.u32 = static_cast(static_cast(ctx.r6.u32) * ReflectionScaleFactor(Config::ReflectionResolution)); + + // Bad hack to stop EDRAM cache from messing up + if (Config::ReflectionResolution == EReflectionResolution::Full) + ctx.r5.u32++; } #if _DEBUG