Skip to content
Closed
7 changes: 7 additions & 0 deletions include/API/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,20 @@

namespace offloadtest {

enum class BufferUsage {
Storage,
VertexBuffer,
};

struct BufferCreateDesc {
MemoryLocation Location;
BufferUsage Usage;
};

class Buffer {
public:
virtual ~Buffer();
virtual size_t getSizeInBytes() const = 0;

Buffer(const Buffer &) = delete;
Buffer &operator=(const Buffer &) = delete;
Expand Down
29 changes: 29 additions & 0 deletions include/API/Resources.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,35 @@ inline uint32_t getFormatSizeInBytes(Format Format) {
llvm_unreachable("All Format cases handled");
}

// Returns the number of components per element for the given format.
inline uint32_t getComponentCount(Format Format) {
switch (Format) {
case Format::R16Sint:
case Format::R16Uint:
case Format::R32Sint:
case Format::R32Uint:
case Format::R32Float:
case Format::D32Float:
return 1;
case Format::RG16Sint:
case Format::RG16Uint:
case Format::RG32Sint:
case Format::RG32Uint:
case Format::RG32Float:
case Format::D32FloatS8Uint:
return 2;
case Format::RGB32Float:
return 3;
case Format::RGBA16Sint:
case Format::RGBA16Uint:
case Format::RGBA32Sint:
case Format::RGBA32Uint:
case Format::RGBA32Float:
return 4;
}
llvm_unreachable("All Format cases handled");
}

inline bool isDepthFormat(Format Format) {
switch (Format) {
case Format::R16Sint:
Expand Down
114 changes: 93 additions & 21 deletions include/Support/Pipeline.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@
#ifndef OFFLOADTEST_SUPPORT_PIPELINE_H
#define OFFLOADTEST_SUPPORT_PIPELINE_H

#include "API/Resources.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/StringRef.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/YAMLTraits.h"
#include <cassert>
#include <limits>
#include <memory>
#include <string>
Expand Down Expand Up @@ -336,35 +340,65 @@ struct RuntimeSettings {
dx::Settings DX;
};

struct VertexAttribute {
DataFormat Format;
int Channels;
int Offset;
std::string Name;

uint32_t size() const { return getFormatSize(Format) * Channels; }
// Parsed vertex stream from the YAML VertexBuffers section. Holds per-stream
// data before interleaving into the final vertex buffer.
//
// Values are stored as doubles (the YAML parser's native numeric type) rather
// than in the target format's storage type. This avoids needing format-specific
// parsing and lets us derive the vertex count directly from the number of
// values.
//
// Conversion to the target byte representation happens during interleaving
// into ParsedVertexBuffer::InterleavedData.
struct VertexStreamData {
std::string Name; // Semantic name (e.g. POSITION, COLOR).
Format Fmt;
llvm::SmallVector<double> Values;
};

struct IOBindings {
std::string VertexBuffer;
CPUBuffer *VertexBufferPtr;
llvm::SmallVector<VertexAttribute> VertexAttributes;

std::string RenderTarget;
CPUBuffer *RTargetBufferPtr;
// Parsed vertex buffer from the YAML VertexBuffers section. The parser
// interleaves per-stream data into InterleavedData.
//
// TODO: Add support for de-interleaved data?
struct ParsedVertexBuffer {
std::string Name;
llvm::SmallVector<VertexStreamData> Streams;
// Interleaved vertex data, computed by the parser from per-stream data.
std::unique_ptr<char[]> InterleavedData;
size_t InterleavedSize = 0;

uint32_t getVertexStride() const {
uint32_t getStride() const {
uint32_t Stride = 0;
for (auto VA : VertexAttributes)
Stride += VA.size();
for (const auto &S : Streams)
Stride += getFormatSizeInBytes(S.Fmt);
return Stride;
}

// Returns the byte offset of the stream at the given index.
uint32_t getOffset(uint32_t Index) const {
assert(Index < Streams.size() && "Stream index out of bounds");
uint32_t Offset = 0;
for (uint32_t I = 0; I < Index; ++I)
Offset += getFormatSizeInBytes(Streams[I].Fmt);
return Offset;
}

uint32_t getVertexCount() const {
return VertexBufferPtr->size() / getVertexStride();
uint32_t Stride = getStride();
if (Stride == 0)
return 0;
return InterleavedSize / Stride;
}
};

struct IOBindings {
std::string VertexBuffer;
ParsedVertexBuffer *VertexBufferPtr = nullptr;

std::string RenderTarget;
CPUBuffer *RTargetBufferPtr = nullptr;
};

// Describes a contiguous group of bytes in a push constant block.
struct PushConstantValue {
// Format used to describe those bytes in the YAML.
Expand Down Expand Up @@ -415,6 +449,7 @@ struct Pipeline {
IOBindings Bindings;
llvm::SmallVector<PushConstantBlock> PushConstants;
llvm::SmallVector<CPUBuffer> Buffers;
llvm::SmallVector<ParsedVertexBuffer> VertexBuffers;
llvm::SmallVector<Sampler> Samplers;
llvm::SmallVector<Result> Results;
llvm::SmallVector<DescriptorSet> Sets;
Expand All @@ -441,6 +476,13 @@ struct Pipeline {
return nullptr;
}

ParsedVertexBuffer *getVertexBuffer(llvm::StringRef Name) {
for (auto &VB : VertexBuffers)
if (Name == VB.Name)
return &VB;
return nullptr;
}

Sampler *getSampler(llvm::StringRef Name) {
for (auto &S : Samplers)
if (Name == S.Name)
Expand All @@ -463,7 +505,8 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::Sampler)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::Shader)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::dx::RootParameter)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::Result)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::VertexAttribute)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::VertexStreamData)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::ParsedVertexBuffer)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::SpecializationConstant)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::PushConstantBlock)
LLVM_YAML_IS_SEQUENCE_VECTOR(offloadtest::PushConstantValue)
Expand Down Expand Up @@ -515,8 +558,12 @@ template <> struct MappingTraits<offloadtest::PushConstantBlock> {
static void mapping(IO &I, offloadtest::PushConstantBlock &B);
};

template <> struct MappingTraits<offloadtest::VertexAttribute> {
static void mapping(IO &I, offloadtest::VertexAttribute &A);
template <> struct MappingTraits<offloadtest::VertexStreamData> {
static void mapping(IO &I, offloadtest::VertexStreamData &S);
};

template <> struct MappingTraits<offloadtest::ParsedVertexBuffer> {
static void mapping(IO &I, offloadtest::ParsedVertexBuffer &VB);
};

template <> struct MappingTraits<offloadtest::OutputProperties> {
Expand Down Expand Up @@ -547,6 +594,31 @@ template <> struct MappingTraits<offloadtest::SpecializationConstant> {
static void mapping(IO &I, offloadtest::SpecializationConstant &C);
};

template <> struct ScalarEnumerationTraits<offloadtest::Format> {
static void enumeration(IO &I, offloadtest::Format &V) {
#define ENUM_CASE(Val) I.enumCase(V, #Val, offloadtest::Format::Val)
ENUM_CASE(R16Sint);
ENUM_CASE(R16Uint);
ENUM_CASE(RG16Sint);
ENUM_CASE(RG16Uint);
ENUM_CASE(RGBA16Sint);
ENUM_CASE(RGBA16Uint);
ENUM_CASE(R32Sint);
ENUM_CASE(R32Uint);
ENUM_CASE(R32Float);
ENUM_CASE(RG32Sint);
ENUM_CASE(RG32Uint);
ENUM_CASE(RG32Float);
ENUM_CASE(RGB32Float);
ENUM_CASE(RGBA32Sint);
ENUM_CASE(RGBA32Uint);
ENUM_CASE(RGBA32Float);
ENUM_CASE(D32Float);
ENUM_CASE(D32FloatS8Uint);
#undef ENUM_CASE
}
};

template <> struct ScalarEnumerationTraits<offloadtest::Rule> {
static void enumeration(IO &I, offloadtest::Rule &V) {
#define ENUM_CASE(Val) I.enumCase(V, #Val, offloadtest::Rule::Val)
Expand Down
69 changes: 41 additions & 28 deletions lib/API/DX/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

#include "API/Capabilities.h"
#include "API/Device.h"
#include "API/FormatConversion.h"
#include "DXFeatures.h"
#include "Support/Pipeline.h"
#include "Support/WinError.h"
Expand Down Expand Up @@ -293,6 +294,8 @@ class DXBuffer : public offloadtest::Buffer {
DXBuffer(ComPtr<ID3D12Resource> Buffer, llvm::StringRef Name,
BufferCreateDesc Desc, size_t SizeInBytes)
: Buffer(Buffer), Name(Name), Desc(Desc), SizeInBytes(SizeInBytes) {}

size_t getSizeInBytes() const override { return SizeInBytes; }
};

class DXTexture : public offloadtest::Texture {
Expand Down Expand Up @@ -482,7 +485,7 @@ class DXDevice : public offloadtest::Device {
std::shared_ptr<DXTexture> RT;
std::shared_ptr<DXBuffer> RTReadback;
std::shared_ptr<DXTexture> DS;
ComPtr<ID3D12Resource> VB;
std::shared_ptr<DXBuffer> VB;

llvm::SmallVector<DescriptorTable> DescTables;
llvm::SmallVector<ResourcePair> RootResources;
Expand Down Expand Up @@ -514,9 +517,9 @@ class DXDevice : public offloadtest::Device {
const D3D12_HEAP_TYPE HeapType = getDXHeapType(Desc.Location);

const D3D12_RESOURCE_FLAGS Flags =
HeapType == D3D12_HEAP_TYPE_READBACK
? D3D12_RESOURCE_FLAG_NONE
: D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
HeapType == D3D12_HEAP_TYPE_DEFAULT
? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS
: D3D12_RESOURCE_FLAG_NONE;

const D3D12_HEAP_PROPERTIES HeapProps = CD3DX12_HEAP_PROPERTIES(HeapType);
const D3D12_RESOURCE_DESC BufferDesc =
Expand Down Expand Up @@ -1585,6 +1588,7 @@ class DXDevice : public offloadtest::Device {
// Create readback buffer sized for the pixel data (raw bytes).
BufferCreateDesc BufDesc = {};
BufDesc.Location = MemoryLocation::GpuToCpu;
BufDesc.Usage = BufferUsage::Storage;
auto BufOrErr = createBuffer("RTReadback", BufDesc, OutBuf.size());
if (!BufOrErr)
return BufOrErr.takeError();
Expand All @@ -1608,29 +1612,31 @@ class DXDevice : public offloadtest::Device {
return llvm::createStringError(
std::errc::invalid_argument,
"No vertex buffer bound for graphics pipeline.");
const CPUBuffer &VB = *P.Bindings.VertexBufferPtr;
const uint64_t VBSize = VB.size();
D3D12_RESOURCE_DESC const Desc = CD3DX12_RESOURCE_DESC::Buffer(VBSize);
CD3DX12_HEAP_PROPERTIES HeapProps =
CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_UPLOAD);
if (auto Err = HR::toError(Device->CreateCommittedResource(
&HeapProps, D3D12_HEAP_FLAG_NONE, &Desc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(&IS.VB)),
"Failed to create vertex buffer"))
return Err;

const ParsedVertexBuffer &PVB = *P.Bindings.VertexBufferPtr;

BufferCreateDesc BufDesc = {};
BufDesc.Location = MemoryLocation::CpuToGpu;
BufDesc.Usage = BufferUsage::VertexBuffer;
auto BufOrErr = createBuffer("VertexBuffer", BufDesc, PVB.InterleavedSize);
if (!BufOrErr)
return BufOrErr.takeError();
IS.VB = std::static_pointer_cast<DXBuffer>(*BufOrErr);

// TODO: Currently uses a single CpuToGpu mapped buffer. For optimal GPU
// performance on discrete GPUs, use a staging buffer + copy to a GpuOnly
// vertex buffer instead.
void *Ptr = nullptr;
if (auto Err = HR::toError(IS.VB->Map(0, nullptr, &Ptr),
if (auto Err = HR::toError(IS.VB->Buffer->Map(0, nullptr, &Ptr),
"Failed to map vertex buffer"))
return Err;
memcpy(Ptr, VB.Data[0].get(), VBSize);
IS.VB->Unmap(0, nullptr);
memcpy(Ptr, PVB.InterleavedData.get(), IS.VB->getSizeInBytes());
IS.VB->Buffer->Unmap(0, nullptr);

D3D12_VERTEX_BUFFER_VIEW VBView = {};
VBView.BufferLocation = IS.VB->GetGPUVirtualAddress();
VBView.SizeInBytes = static_cast<UINT>(VBSize);
VBView.StrideInBytes = P.Bindings.getVertexStride();
VBView.BufferLocation = IS.VB->Buffer->GetGPUVirtualAddress();
VBView.SizeInBytes = static_cast<UINT>(IS.VB->getSizeInBytes());
VBView.StrideInBytes = PVB.getStride();

IS.CB->CmdList->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
IS.CB->CmdList->IASetVertexBuffers(0, 1, &VBView);
Expand All @@ -1639,13 +1645,16 @@ class DXDevice : public offloadtest::Device {
}

llvm::Error createGraphicsPSO(Pipeline &P, InvocationState &IS) {
// Create the input layout based on the vertex attributes.
if (!IS.VB)
return llvm::createStringError(std::errc::invalid_argument,
"Vertex buffer not initialized.");
// Create the input layout from the parsed vertex buffer streams.
const ParsedVertexBuffer &PVB = *P.Bindings.VertexBufferPtr;
std::vector<D3D12_INPUT_ELEMENT_DESC> InputLayout;
for (size_t I = 0; I < P.Bindings.VertexAttributes.size(); ++I) {
const VertexAttribute &Attr = P.Bindings.VertexAttributes[I];
InputLayout.push_back({Attr.Name.c_str(), 0,
getDXFormat(Attr.Format, Attr.Channels), 0,
static_cast<UINT>(Attr.Offset),
for (uint32_t I = 0; I < PVB.Streams.size(); ++I) {
const VertexStreamData &S = PVB.Streams[I];
InputLayout.push_back({S.Name.c_str(), 0, getDXGIFormat(S.Fmt), 0,
PVB.getOffset(I),
D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0});
}

Expand Down Expand Up @@ -1699,6 +1708,9 @@ class DXDevice : public offloadtest::Device {
}

llvm::Error createGraphicsCommands(Pipeline &P, InvocationState &IS) {
if (!IS.VB)
return llvm::createStringError(std::errc::invalid_argument,
"Vertex buffer not initialized.");
IS.CB->CmdList->SetGraphicsRootSignature(IS.RootSig.Get());
if (IS.DescHeap) {
ID3D12DescriptorHeap *const Heaps[] = {IS.DescHeap.Get()};
Expand Down Expand Up @@ -1735,7 +1747,8 @@ class DXDevice : public offloadtest::Device {
static_cast<LONG>(VP.Height)};
IS.CB->CmdList->RSSetScissorRects(1, &Scissor);

IS.CB->CmdList->DrawInstanced(P.Bindings.getVertexCount(), 1, 0, 0);
IS.CB->CmdList->DrawInstanced(P.Bindings.VertexBufferPtr->getVertexCount(),
1, 0, 0);

// Transition the render target to copy source and copy to the readback
// buffer.
Expand Down
Loading
Loading