blob: 8818d4a28844668719d07383549f6dcb7fba275e [file] [log] [blame]
/*
* Copyright (C) 2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "QueueImpl.h"
#include "TextureImpl.h"
#include "RenderPassImpl.h"
#include "ComputePassImpl.h"
#include "BlitPassImpl.h"
#include "HostAccessPassImpl.h"
#include <array>
#include <cassert>
#include <boost/optional.hpp>
namespace WebGPU {
QueueImpl::QueueImpl(vk::Device device, vk::CommandPool commandPool, std::vector<vk::UniqueImageView>& deviceImageViews, vk::SwapchainKHR swapchain, vk::Extent2D size, vk::Format swapchainFormat, vk::Queue&& queue) : device(device), deviceImageViews(deviceImageViews), swapchain(swapchain), commandPool(commandPool), size(size), swapchainFormat(swapchainFormat), queue(std::move(queue)) {
assert(deviceImageViews.size() > activeSemaphoreIndex);
for (std::size_t i = 0; i < deviceImageViews.size(); ++i) {
swapchainBufferSemaphores.push_back({});
swapchainBufferFences.push_back({});
activePasses.push_back({});
fixupCommandBuffers.push_back({});
hostAccessIndices.push_back({});
}
prepareCurrentFrame();
}
void QueueImpl::prepareCurrentFrame() {
auto& currentSemaphoreVector = swapchainBufferSemaphores[activeSemaphoreIndex];
currentSemaphoreVector.clear();
swapchainBufferFences[activeSemaphoreIndex].clear();
activePasses[activeSemaphoreIndex].clear();
fixupCommandBuffers[activeSemaphoreIndex].clear();
hostAccessIndices[activeSemaphoreIndex].clear();
currentSemaphoreVector.emplace_back(device.createSemaphoreUnique(vk::SemaphoreCreateInfo()));
auto result = device.acquireNextImageKHR(swapchain, std::numeric_limits<uint64_t>::max(), *currentSemaphoreVector[0], vk::Fence());
assert(result.result == vk::Result::eSuccess);
activeSwapchainIndex = result.value;
}
vk::UniqueRenderPass QueueImpl::createSpecificRenderPass(const std::vector<vk::ImageView>& imageViews, const std::vector<vk::Format>& formats) {
std::vector<vk::AttachmentDescription> attachmentDescriptions;
std::vector<vk::AttachmentReference> attachmentReferences;
for (std::size_t i = 0; i < imageViews.size(); ++i) {
auto attachmentDescription = vk::AttachmentDescription()
.setFormat(formats[i])
.setSamples(vk::SampleCountFlagBits::e1)
.setLoadOp(vk::AttachmentLoadOp::eClear)
.setStoreOp(vk::AttachmentStoreOp::eStore)
.setStencilLoadOp(vk::AttachmentLoadOp::eDontCare)
.setStencilStoreOp(vk::AttachmentStoreOp::eDontCare)
.setInitialLayout(vk::ImageLayout::eUndefined)
.setFinalLayout(vk::ImageLayout::ePresentSrcKHR);
attachmentDescriptions.emplace_back(std::move(attachmentDescription));
auto attachmentReference = vk::AttachmentReference().setAttachment(static_cast<uint32_t>(i)).setLayout(vk::ImageLayout::eColorAttachmentOptimal);
attachmentReferences.emplace_back(std::move(attachmentReference));
}
const auto subpassDescription = vk::SubpassDescription()
.setPipelineBindPoint(vk::PipelineBindPoint::eGraphics)
.setInputAttachmentCount(0)
.setPInputAttachments(nullptr)
.setColorAttachmentCount(static_cast<uint32_t>(attachmentReferences.size()))
.setPColorAttachments(attachmentReferences.data())
.setPResolveAttachments(nullptr)
.setPDepthStencilAttachment(nullptr)
.setPreserveAttachmentCount(0)
.setPPreserveAttachments(nullptr);
const auto renderPassCreateInfo = vk::RenderPassCreateInfo()
.setAttachmentCount(static_cast<uint32_t>(attachmentDescriptions.size()))
.setPAttachments(attachmentDescriptions.data())
.setSubpassCount(1)
.setPSubpasses(&subpassDescription)
.setDependencyCount(0)
.setPDependencies(nullptr);
return device.createRenderPassUnique(renderPassCreateInfo);
}
vk::UniqueFramebuffer QueueImpl::createFramebuffer(vk::RenderPass renderpass, const std::vector<vk::ImageView>& imageViews) {
const auto framebufferCreateInfo = vk::FramebufferCreateInfo()
.setRenderPass(renderpass)
.setAttachmentCount(static_cast<uint32_t>(imageViews.size()))
.setPAttachments(imageViews.data())
.setWidth(size.width)
.setHeight(size.height)
.setLayers(1);
return device.createFramebufferUnique(framebufferCreateInfo);
}
std::unique_ptr<RenderPass> QueueImpl::createRenderPass(const std::vector<std::reference_wrapper<Texture>>* textures) {
std::vector<vk::ImageView> imageViews;
std::vector<vk::ClearValue> clearValues;
std::vector<vk::Format> formats;
std::vector<std::reference_wrapper<TextureImpl>> destinationTextures;
if (textures == nullptr) {
imageViews.emplace_back(*deviceImageViews[activeSwapchainIndex]);
clearValues.emplace_back(vk::ClearColorValue(std::array<float, 4>({ 0.0f, 0.0f, 0.0f, 1.0f })));
formats.push_back(swapchainFormat);
}
else {
for (auto& textureWrapper : *textures) {
Texture& texture = textureWrapper;
auto downcast = dynamic_cast<TextureImpl*>(&texture);
assert(downcast != nullptr);
TextureImpl& downcastedTexture = *downcast;
destinationTextures.emplace_back(downcastedTexture);
imageViews.emplace_back(downcastedTexture.getImageView());
clearValues.emplace_back(vk::ClearColorValue(std::array<float, 4>({ 0.0f, 0.0f, 0.0f, 1.0f })));
formats.emplace_back(downcastedTexture.getFormat());
}
}
const auto commandBufferAllocateInfo = vk::CommandBufferAllocateInfo().setCommandPool(commandPool).setLevel(vk::CommandBufferLevel::ePrimary).setCommandBufferCount(1);
auto commandBuffers = device.allocateCommandBuffersUnique(commandBufferAllocateInfo);
auto& commandBuffer = commandBuffers[0];
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
commandBuffer->begin(commandBufferBeginInfo);
auto renderPass = createSpecificRenderPass(imageViews, formats);
auto framebuffer = createFramebuffer(*renderPass, imageViews);
const auto renderPassBeginInfo = vk::RenderPassBeginInfo()
.setRenderPass(*renderPass)
.setFramebuffer(*framebuffer)
.setRenderArea(vk::Rect2D(vk::Offset2D(0, 0), size))
.setClearValueCount(static_cast<uint32_t>(clearValues.size()))
.setPClearValues(clearValues.data());
commandBuffer->beginRenderPass(renderPassBeginInfo, vk::SubpassContents::eInline);
return std::unique_ptr<RenderPass>(new RenderPassImpl(device, std::move(commandBuffer), std::move(framebuffer), std::move(renderPass), destinationTextures));
}
void QueueImpl::commitRenderPass(std::unique_ptr<RenderPass>&& renderPass) {
auto* downcast = dynamic_cast<PassImpl*>(renderPass.get());
assert(downcast != nullptr);
renderPass.release();
std::unique_ptr<PassImpl> pass(downcast);
commitPass(*pass);
activePasses[activeSemaphoreIndex].emplace_back(std::move(pass));
}
std::unique_ptr<ComputePass> QueueImpl::createComputePass() {
const auto commandBufferAllocateInfo = vk::CommandBufferAllocateInfo().setCommandPool(commandPool).setLevel(vk::CommandBufferLevel::ePrimary).setCommandBufferCount(1);
auto commandBuffers = device.allocateCommandBuffersUnique(commandBufferAllocateInfo);
auto& commandBuffer = commandBuffers[0];
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
commandBuffer->begin(commandBufferBeginInfo);
return std::unique_ptr<ComputePass>(new ComputePassImpl(device, std::move(commandBuffer)));
}
void QueueImpl::commitComputePass(std::unique_ptr<ComputePass>&& computePass) {
auto* downcast = dynamic_cast<PassImpl*>(computePass.get());
assert(downcast != nullptr);
computePass.release();
std::unique_ptr<PassImpl> pass(downcast);
commitPass(*pass);
activePasses[activeSemaphoreIndex].emplace_back(std::move(pass));
}
std::unique_ptr<BlitPass> QueueImpl::createBlitPass() {
const auto commandBufferAllocateInfo = vk::CommandBufferAllocateInfo().setCommandPool(commandPool).setLevel(vk::CommandBufferLevel::ePrimary).setCommandBufferCount(1);
auto commandBuffers = device.allocateCommandBuffersUnique(commandBufferAllocateInfo);
auto& commandBuffer = commandBuffers[0];
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
commandBuffer->begin(commandBufferBeginInfo);
return std::unique_ptr<BlitPass>(new BlitPassImpl(device, std::move(commandBuffer)));
}
void QueueImpl::commitBlitPass(std::unique_ptr<BlitPass>&& blitPass) {
auto* downcast = dynamic_cast<PassImpl*>(blitPass.get());
assert(downcast != nullptr);
blitPass.release();
std::unique_ptr<PassImpl> pass(downcast);
commitPass(*pass);
activePasses[activeSemaphoreIndex].emplace_back(std::move(pass));
}
std::unique_ptr<HostAccessPass> QueueImpl::createHostAccessPass() {
const auto commandBufferAllocateInfo = vk::CommandBufferAllocateInfo().setCommandPool(commandPool).setLevel(vk::CommandBufferLevel::ePrimary).setCommandBufferCount(1);
auto commandBuffers = device.allocateCommandBuffersUnique(commandBufferAllocateInfo);
auto& commandBuffer = commandBuffers[0];
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
commandBuffer->begin(commandBufferBeginInfo);
return std::unique_ptr<HostAccessPass>(new HostAccessPassImpl(device, std::move(commandBuffer)));
}
void QueueImpl::commitHostAccessPass(std::unique_ptr<HostAccessPass>&& hostAccessPass) {
{
auto* downcast = dynamic_cast<HostAccessPassImpl*>(hostAccessPass.get());
assert(downcast != nullptr);
auto& pass = *downcast;
// FIXME: Asking the GPU to wait on the CPU causes my (NVidia) driver to crash inside vkQueuePresentKHR().
// However, I don't think this is a structural problem; we could just as easily have created a new resource
// and used the transfer queue to copy the contents.
// pass.getCommandBuffer().waitEvents({ pass.getFinishedEvent() }, vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, {}, {}, {});
}
auto* downcast = dynamic_cast<PassImpl*>(hostAccessPass.get());
assert(downcast != nullptr);
hostAccessPass.release();
std::unique_ptr<PassImpl> pass(downcast);
hostAccessIndices[activeSemaphoreIndex].push_back(activePasses[activeSemaphoreIndex].size());
commitPass(*pass);
activePasses[activeSemaphoreIndex].emplace_back(std::move(pass));
}
void QueueImpl::present() {
vk::Semaphore waitSemaphore = *swapchainBufferSemaphores[activeSemaphoreIndex].back();
vk::Result result;
const auto presentInfo = vk::PresentInfoKHR()
.setWaitSemaphoreCount(1)
.setPWaitSemaphores(&waitSemaphore)
.setSwapchainCount(1)
.setPSwapchains(&swapchain)
.setPImageIndices(&activeSwapchainIndex)
.setPResults(&result);
auto result2 = queue.presentKHR(presentInfo);
assert(result == vk::Result::eSuccess);
assert(result2 == vk::Result::eSuccess);
activeSemaphoreIndex = (activeSemaphoreIndex + 1) % deviceImageViews.size();
assert(swapchainBufferFences[activeSemaphoreIndex].size() == activePasses[activeSemaphoreIndex].size());
std::vector<vk::Fence> fences;
std::size_t index = 0;
for (std::size_t hostAccessIndex : hostAccessIndices[activeSemaphoreIndex]) {
for (; index < hostAccessIndex; ++index)
fences.push_back(*swapchainBufferFences[activeSemaphoreIndex][index]);
result = device.waitForFences(fences, VK_TRUE, std::numeric_limits<uint64_t>::max());
device.resetFences(fences);
assert(result == vk::Result::eSuccess);
fences.clear();
auto* downcast = dynamic_cast<HostAccessPassImpl*>(activePasses[activeSemaphoreIndex][index].get());
assert(downcast);
auto& hostAccessPass = *downcast;
hostAccessPass.execute();
device.setEvent(hostAccessPass.getFinishedEvent());
}
for (; index < swapchainBufferFences[activeSemaphoreIndex].size(); ++index)
fences.push_back(*swapchainBufferFences[activeSemaphoreIndex][index]);
result = device.waitForFences(fences, VK_TRUE, std::numeric_limits<uint64_t>::max());
device.resetFences(fences);
assert(result == vk::Result::eSuccess);
prepareCurrentFrame();
}
void QueueImpl::commitPass(PassImpl& pass) {
pass.getCommandBuffer().end();
const vk::PipelineStageFlags waitStageMask[] = { vk::PipelineStageFlagBits::eTopOfPipe };
vk::Semaphore waitSemaphore = *swapchainBufferSemaphores[activeSemaphoreIndex].back();
swapchainBufferSemaphores[activeSemaphoreIndex].emplace_back(device.createSemaphoreUnique(vk::SemaphoreCreateInfo()));
vk::Semaphore signalSemaphore = *swapchainBufferSemaphores[activeSemaphoreIndex].back();
swapchainBufferFences[activeSemaphoreIndex].push_back(device.createFenceUnique(vk::FenceCreateInfo()));
vk::Fence signalFence = *swapchainBufferFences[activeSemaphoreIndex].back();
// FIXME: We may want to defer this to Present time in order to coalesce the barriers better than just doing them lazily here.
// FIXME: Compute-only workflows may never call present, which means we need some sort of flush() call. Or we can just say that
// present() is the same thing as flush, and is smart enough to not actually present if it isn't hooked up to a display.
auto fixup1 = synchronizeResources(pass);
auto commandBuffer = pass.getCommandBuffer();
// FIXME: This is done eagerly because the API may want to update the contents of a resource from the CPU.
// Instead, we should schedule this update along with the queue so we can insert the correct barriers in front of it.
auto fixup2 = synchronizeResources(pass);
vk::CommandBuffer commandBuffers[] = { fixup1, commandBuffer, fixup2 };
auto submitInfo = vk::SubmitInfo()
.setWaitSemaphoreCount(1)
.setPWaitSemaphores(&waitSemaphore)
.setPWaitDstStageMask(waitStageMask)
.setCommandBufferCount(3)
.setPCommandBuffers(commandBuffers)
.setSignalSemaphoreCount(1)
.setPSignalSemaphores(&signalSemaphore);
queue.submit({ submitInfo }, signalFence);
}
vk::CommandBuffer QueueImpl::synchronizeResources(PassImpl& pass) {
const auto commandBufferAllocateInfo = vk::CommandBufferAllocateInfo().setCommandPool(commandPool).setLevel(vk::CommandBufferLevel::ePrimary).setCommandBufferCount(1);
auto commandBuffers = device.allocateCommandBuffersUnique(commandBufferAllocateInfo);
auto& commandBuffer = commandBuffers[0];
const auto commandBufferBeginInfo = vk::CommandBufferBeginInfo().setFlags(vk::CommandBufferUsageFlagBits::eOneTimeSubmit);
commandBuffer->begin(commandBufferBeginInfo);
// FIXME: Issue smaller, cheaper, more specific barriers than these.
std::vector<vk::BufferMemoryBarrier> bufferMemoryBarriers;
pass.iterateBuffers([&](BufferImpl& buffer) {
bufferMemoryBarriers.emplace_back(vk::BufferMemoryBarrier()
.setSrcAccessMask(vk::AccessFlagBits::eIndirectCommandRead | vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eHostRead | vk::AccessFlagBits::eHostWrite)
.setDstAccessMask(vk::AccessFlagBits::eIndirectCommandRead | vk::AccessFlagBits::eIndexRead | vk::AccessFlagBits::eVertexAttributeRead | vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eHostRead | vk::AccessFlagBits::eHostWrite)
.setSrcQueueFamilyIndex(0)
.setDstQueueFamilyIndex(0)
.setBuffer(buffer.getBuffer())
.setOffset(0)
.setSize(VK_WHOLE_SIZE));
});
std::vector<vk::ImageMemoryBarrier> imageMemoryBarriers;
pass.iterateTextures([&](TextureImpl& texture) {
imageMemoryBarriers.emplace_back(vk::ImageMemoryBarrier()
.setSrcAccessMask(vk::AccessFlagBits::eInputAttachmentRead | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eHostRead | vk::AccessFlagBits::eHostWrite)
.setDstAccessMask(vk::AccessFlagBits::eInputAttachmentRead | vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite | vk::AccessFlagBits::eHostRead | vk::AccessFlagBits::eHostWrite)
.setOldLayout(texture.getTransferredToGPU() ? vk::ImageLayout::eGeneral : vk::ImageLayout::ePreinitialized)
.setNewLayout(vk::ImageLayout::eGeneral)
.setSrcQueueFamilyIndex(0)
.setDstQueueFamilyIndex(0)
.setImage(texture.getImage())
.setSubresourceRange(vk::ImageSubresourceRange(vk::ImageAspectFlagBits::eColor, 0, 1, 0, 1)));
texture.setTransferredToGPU(true);
});
commandBuffer->pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eAllCommands, vk::DependencyFlagBits(), {}, bufferMemoryBarriers, imageMemoryBarriers);
commandBuffer->end();
auto& frameCommandBuffers = fixupCommandBuffers[activeSemaphoreIndex];
frameCommandBuffers.push_back(std::move(commandBuffer));
return *frameCommandBuffers.back();
}
QueueImpl::~QueueImpl() {
for (std::size_t i = 0; i < deviceImageViews.size(); ++i) {
activeSemaphoreIndex = (activeSemaphoreIndex + 1) % deviceImageViews.size();
assert(swapchainBufferFences[activeSemaphoreIndex].size() == activePasses[activeSemaphoreIndex].size());
std::vector<vk::Fence> fences;
for (std::size_t index = 0; index < swapchainBufferFences[activeSemaphoreIndex].size(); ++index)
fences.push_back(*swapchainBufferFences[activeSemaphoreIndex][index]);
auto result = device.waitForFences(fences, VK_TRUE, std::numeric_limits<uint64_t>::max());
device.resetFences(fences);
assert(result == vk::Result::eSuccess);
swapchainBufferSemaphores[activeSemaphoreIndex].clear();
swapchainBufferFences[activeSemaphoreIndex].clear();
activePasses[activeSemaphoreIndex].clear();
fixupCommandBuffers[activeSemaphoreIndex].clear();
hostAccessIndices[activeSemaphoreIndex].clear();
}
}
}