diff --git a/MarathonRecomp/gpu/video.cpp b/MarathonRecomp/gpu/video.cpp index 08c8c74e..679bfc55 100644 --- a/MarathonRecomp/gpu/video.cpp +++ b/MarathonRecomp/gpu/video.cpp @@ -387,6 +387,39 @@ static constexpr uint32_t CONDITIONAL_SURVEY_MAX = 64; static std::unique_ptr g_conditionalSurveyBuffer; static std::unique_ptr g_conditionalSurveyDescriptorSet; +struct SurfaceGroupKey +{ + uint32_t edramBase; + RenderFormat format; + RenderSampleCounts sampleCount; + uint32_t width; + uint32_t height; + + bool operator==(const SurfaceGroupKey& other) const { + return edramBase == other.edramBase && + format == other.format && + sampleCount == other.sampleCount && + width == other.width && + height == other.height; + } +}; + +template<> +struct std::hash { + size_t operator()(const SurfaceGroupKey& k) const noexcept { + return XXH3_64bits(&k, sizeof(k)); + } +}; + +struct SurfaceGroup +{ + std::vector surfaces; + GuestSurface* currentSurface = nullptr; + GuestSurface* currentDepthSurface = nullptr; +}; + +static ankerl::unordered_dense::map> g_surfaceGroups; + enum { TEXTURE_DESCRIPTOR_NULL_TEXTURE_2D, @@ -764,12 +797,50 @@ static void DestructTempResources() { const auto surface = reinterpret_cast(resource); + bool ownedByGroup = false; + for (auto it = g_surfaceGroups.begin(); it != g_surfaceGroups.end();) + { + auto& group = it->second; + auto surfIt = std::find(group->surfaces.begin(), group->surfaces.end(), surface); + + if (surfIt != group->surfaces.end()) + { + group->surfaces.erase(surfIt); + + // Clear current surface pointers if they match + if (group->currentSurface == surface) + group->currentSurface = nullptr; + if (group->currentDepthSurface == surface) + group->currentDepthSurface = nullptr; + + ownedByGroup = true; + if (group->surfaces.empty()) + { + it = g_surfaceGroups.erase(it); + } else + { + ++it; + } + + break; + } + + ++it; + } + if (surface->descriptorIndex != NULL) { g_textureDescriptorSet->setTexture(surface->descriptorIndex, nullptr, {}); g_textureDescriptorAllocator.free(surface->descriptorIndex); } + // Only destroy textureHolder if the surface owns it (not in a group) + // If in a group, the group's sharedTexture owns it + if (!ownedByGroup && surface->textureHolder) + { + surface->textureHolder.reset(); + } + surface->~GuestSurface(); break; } @@ -2370,14 +2441,6 @@ static uint32_t CreateDevice(uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, static void DestructResource(GuestResource* resource) { - // Needed for hack in CreateSurface (remove if fix it) - if (resource->type == ResourceType::RenderTarget || resource->type == ResourceType::DepthStencil) - { - const auto surface = reinterpret_cast(resource); - if (surface->wasCached) { - return; - } - } RenderCommand cmd; cmd.type = RenderCommandType::DestructResource; cmd.destructResource.resource = resource; @@ -3511,73 +3574,69 @@ static GuestBuffer* CreateIndexBuffer(uint32_t length, uint32_t, uint32_t format return buffer; } -static std::vector> g_surfaceCache; - -// TODO: Singleplayer (possibly) uses the same memory location in EDRAM for HDR and FB0 surfaces, -// so we just remember who was created first and use that instead of creating a new one. -static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample, GuestSurfaceCreateParams* params) +static GuestSurface* CreateSurface(uint32_t width, uint32_t height, uint32_t format, uint32_t multiSample, GuestSurfaceCreateParams* params) { - GuestSurface* surface = nullptr; - uint32_t baseValue = params ? params->base.get() : -1; - if (params) { - for (auto& entry : g_surfaceCache) { - GuestSurface* cachedSurface = entry.first; - uint32_t cachedBase = entry.second; - if (cachedSurface && - cachedSurface->width == width && - cachedSurface->height == height && - cachedSurface->guestFormat == format && - cachedBase == baseValue) { - surface = cachedSurface; - break; - } - } + // printf("CreateSurface: w: %d, h: %d, f: %d, ms: %d\n", width, height, format, multiSample); + RenderTextureDesc desc; + desc.dimension = RenderTextureDimension::TEXTURE_2D; + desc.width = width; + desc.height = height; + desc.depth = 1; + desc.mipLevels = 1; + desc.arraySize = 1; + // desc.multisampling.sampleCount = multiSample != 0 && Config::AntiAliasing != EAntiAliasing::None ? int32_t(Config::AntiAliasing.Value) : RenderSampleCount::COUNT_1; + if (multiSample == 0) { + desc.multisampling.sampleCount = RenderSampleCount::COUNT_1; + } else { + desc.multisampling.sampleCount = multiSample == 1 ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_4; } - if (!surface) { - // printf("CreateSurface: w: %d, h: %d, f: %d, ms: %d\n", width, height, format, multiSample); - RenderTextureDesc desc; - desc.dimension = RenderTextureDimension::TEXTURE_2D; - desc.width = width; - desc.height = height; - desc.depth = 1; - desc.mipLevels = 1; - desc.arraySize = 1; - // desc.multisampling.sampleCount = multiSample != 0 && Config::AntiAliasing != EAntiAliasing::None ? int32_t(Config::AntiAliasing.Value) : RenderSampleCount::COUNT_1; - if (multiSample == 0) { - desc.multisampling.sampleCount = RenderSampleCount::COUNT_1; - } else { - desc.multisampling.sampleCount = multiSample == 1 ? RenderSampleCount::COUNT_2 : RenderSampleCount::COUNT_4; - } - desc.format = ConvertFormat(format); - desc.flags = RenderFormatIsDepth(desc.format) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; + desc.format = ConvertFormat(format); + desc.flags = RenderFormatIsDepth(desc.format) ? RenderTextureFlag::DEPTH_TARGET : RenderTextureFlag::RENDER_TARGET; + + auto surface = g_userHeap.AllocPhysical(RenderFormatIsDepth(desc.format) ? + ResourceType::DepthStencil : ResourceType::RenderTarget); + + surface->textureHolder = g_device->createTexture(desc); + surface->texture = surface->textureHolder.get(); + surface->width = width; + surface->height = height; + surface->format = desc.format; + surface->guestFormat = format; + surface->sampleCount = desc.multisampling.sampleCount; + + if (params) { + const SurfaceGroupKey key + { + params->base.get(), + desc.format, + desc.multisampling.sampleCount, + width, + height + }; + + auto& group = g_surfaceGroups[key]; - surface = g_userHeap.AllocPhysical(RenderFormatIsDepth(desc.format) ? - ResourceType::DepthStencil : ResourceType::RenderTarget); + if (!group) { + group = std::make_unique(); + } + group->surfaces.push_back(surface); + } else { surface->textureHolder = g_device->createTexture(desc); surface->texture = surface->textureHolder.get(); - surface->width = width; - surface->height = height; - surface->format = desc.format; - surface->guestFormat = format; - surface->sampleCount = desc.multisampling.sampleCount; + } - RenderTextureViewDesc viewDesc; - viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; - viewDesc.format = desc.format; - viewDesc.mipLevels = 1; - surface->textureView = surface->textureHolder->createTextureView(viewDesc); - surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); - g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); + RenderTextureViewDesc viewDesc; + viewDesc.dimension = RenderTextureViewDimension::TEXTURE_2D; + viewDesc.format = desc.format; + viewDesc.mipLevels = 1; + surface->textureView = surface->textureHolder->createTextureView(viewDesc); + surface->descriptorIndex = g_textureDescriptorAllocator.allocate(); + g_textureDescriptorSet->setTexture(surface->descriptorIndex, surface->textureHolder.get(), RenderTextureLayout::SHADER_READ, surface->textureView.get()); - #ifdef _DEBUG + #ifdef _DEBUG surface->texture->setName(fmt::format("{} {:X}", desc.flags & RenderTextureFlag::RENDER_TARGET ? "Render Target" : "Depth Stencil", g_memory.MapVirtual(surface))); #endif - if (params) { - surface->wasCached = true; - g_surfaceCache.emplace_back(surface, baseValue); - } - } return surface; } @@ -3706,9 +3765,70 @@ static void SetRenderTarget(GuestDevice* device, uint32_t index, GuestSurface* r } } +static void HandleSurfaceAliasing(GuestSurface* newSurface, bool isDepthSurface) +{ + if (newSurface == nullptr) + return; + + for (auto& [key, group] : g_surfaceGroups) { + auto it = std::find(group->surfaces.begin(), group->surfaces.end(), newSurface); + if (it != group->surfaces.end()) { + // Determine which current surface to check based on type + GuestSurface*& currentSurface = isDepthSurface ? + group->currentDepthSurface : + group->currentSurface; + + if (currentSurface != nullptr && currentSurface != newSurface) { + // Need to copy from previous surface to new surface + auto& commandList = g_commandLists[g_frame]; + + RenderTextureLayout srcLayout = isDepthSurface ? + RenderTextureLayout::DEPTH_READ : + RenderTextureLayout::SHADER_READ; + RenderTextureLayout dstLayout = isDepthSurface ? + RenderTextureLayout::COPY_DEST : + RenderTextureLayout::COPY_DEST; + + commandList->barriers(RenderBarrierStage::COPY, { + RenderTextureBarrier(currentSurface->texture, srcLayout), + RenderTextureBarrier(newSurface->texture, dstLayout) + }); + + if (isDepthSurface) { + // For depth, use copyTextureRegion or resolve if MSAA differs + if (currentSurface->sampleCount == newSurface->sampleCount) { + commandList->copyTexture(newSurface->texture, currentSurface->texture); + } else { + // If sample counts differ, need to resolve + if (g_capabilities.resolveModes) { + commandList->resolveTextureRegion( + newSurface->texture, 0, 0, + currentSurface->texture, nullptr, + RenderResolveMode::MIN); + } + } + } else { + // For color, simple copy or resolve + if (currentSurface->sampleCount == newSurface->sampleCount) { + commandList->copyTexture(newSurface->texture, currentSurface->texture); + } else { + commandList->resolveTexture(newSurface->texture, currentSurface->texture); + } + } + } + + currentSurface = newSurface; + break; + } + } +} + static void ProcSetRenderTarget(const RenderCommand& cmd) { const auto& args = cmd.setRenderTarget; + + HandleSurfaceAliasing(args.renderTarget, false); + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_renderTarget, args.renderTarget); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.renderTargetFormat, args.renderTarget != nullptr ? args.renderTarget->format : RenderFormat::UNKNOWN); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.sampleCount, args.renderTarget != nullptr ? args.renderTarget->sampleCount : RenderSampleCount::COUNT_1); @@ -3731,6 +3851,8 @@ static void ProcSetDepthStencilSurface(const RenderCommand& cmd) { const auto& args = cmd.setDepthStencilSurface; + HandleSurfaceAliasing(args.depthStencil, true); + SetDirtyValue(g_dirtyStates.renderTargetAndDepthStencil, g_depthStencil, args.depthStencil); SetDirtyValue(g_dirtyStates.pipelineState, g_pipelineState.depthStencilFormat, args.depthStencil != nullptr ? args.depthStencil->format : RenderFormat::UNKNOWN); } @@ -3755,7 +3877,7 @@ static bool PopulateBarriersForStretchRect(GuestSurface* renderTarget, GuestSurf { srcLayout = RenderTextureLayout::RESOLVE_SOURCE; dstLayout = RenderTextureLayout::RESOLVE_DEST; - shaderResolve = false; + // shaderResolve = false; } } @@ -3801,7 +3923,7 @@ static void ExecutePendingStretchRectCommands(GuestSurface* renderTarget, GuestS else commandList->resolveTexture(texture->texture, surface->texture); - shaderResolve = false; + // shaderResolve = false; } } @@ -4058,6 +4180,25 @@ static void ProcClear(const RenderCommand& cmd) { const auto& args = cmd.clear; + // Clear invalidates aliased surface data, so update group tracking + if (args.flags & D3DCLEAR_TARGET) { + for (auto& [key, group] : g_surfaceGroups) { + if (std::find(group->surfaces.begin(), group->surfaces.end(), g_renderTarget) != group->surfaces.end()) { + group->currentSurface = g_renderTarget; + break; + } + } + } + + if (args.flags & (D3DCLEAR_ZBUFFER | D3DCLEAR_STENCIL)) { + for (auto& [key, group] : g_surfaceGroups) { + if (std::find(group->surfaces.begin(), group->surfaces.end(), g_depthStencil) != group->surfaces.end()) { + group->currentDepthSurface = g_depthStencil; + break; + } + } + } + if (PopulateBarriersForStretchRect(g_renderTarget, g_depthStencil)) { FlushBarriers(); diff --git a/MarathonRecomp/gpu/video.h b/MarathonRecomp/gpu/video.h index 09f78f26..21c56205 100644 --- a/MarathonRecomp/gpu/video.h +++ b/MarathonRecomp/gpu/video.h @@ -240,7 +240,6 @@ struct GuestSurface : GuestBaseTexture ankerl::unordered_dense::map> framebuffers; RenderSampleCounts sampleCount = RenderSampleCount::COUNT_1; ankerl::unordered_dense::map destinationTextures; - bool wasCached = false; }; enum GuestDeclType