Video: Clearly separate Texture and EFB Copy formats

Improve bookkeeping around formats. Hopefully make code less confusing.

- Rename TlutFormat -> TLUTFormat to follow conventions.
- Use enum classes to prevent using a Texture format where an EFB Copy format
  is expected or vice-versa.
- Use common EFBCopyFormat names regardless of depth and YUV configurations.
This commit is contained in:
N.E.C 2017-07-30 12:45:55 -07:00
parent 9649494f67
commit c3a57bbad5
27 changed files with 1275 additions and 1319 deletions

View File

@ -87,9 +87,9 @@ void PSTextureEncoder::Shutdown()
SAFE_RELEASE(m_out); SAFE_RELEASE(m_out);
} }
void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_width, void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) const EFBRectangle& src_rect, bool scale_by_half)
{ {
if (!m_ready) // Make sure we initialized OK if (!m_ready) // Make sure we initialized OK
return; return;
@ -100,7 +100,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
// FIXME: Instead of resolving EFB, it would be better to pick out a // FIXME: Instead of resolving EFB, it would be better to pick out a
// single sample from each pixel. The game may break if it isn't // single sample from each pixel. The game may break if it isn't
// expecting the blurred edges around multisampled shapes. // expecting the blurred edges around multisampled shapes.
ID3D11ShaderResourceView* pEFB = is_depth_copy ? ID3D11ShaderResourceView* pEFB = params.depth ?
FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() : FramebufferManager::GetResolvedEFBDepthTexture()->GetSRV() :
FramebufferManager::GetResolvedEFBColorTexture()->GetSRV(); FramebufferManager::GetResolvedEFBColorTexture()->GetSRV();
@ -119,12 +119,12 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr); D3D::context->OMSetRenderTargets(1, &m_outRTV, nullptr);
EFBEncodeParams params; EFBEncodeParams encode_params;
params.SrcLeft = src_rect.left; encode_params.SrcLeft = src_rect.left;
params.SrcTop = src_rect.top; encode_params.SrcTop = src_rect.top;
params.DestWidth = native_width; encode_params.DestWidth = native_width;
params.ScaleFactor = scale_by_half ? 2 : 1; encode_params.ScaleFactor = scale_by_half ? 2 : 1;
D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &params, 0, 0); D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &encode_params, 0, 0);
D3D::stateman->SetPixelConstants(m_encodeParams); D3D::stateman->SetPixelConstants(m_encodeParams);
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x // We also linear filtering for both box filtering and downsampling higher resolutions to 1x
@ -137,7 +137,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
D3D::SetPointCopySampler(); D3D::SetPointCopySampler();
D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(), D3D::drawShadedTexQuad(pEFB, targetRect.AsRECT(), g_renderer->GetTargetWidth(),
g_renderer->GetTargetHeight(), GetEncodingPixelShader(format), g_renderer->GetTargetHeight(), GetEncodingPixelShader(params),
VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleVertexShader(),
VertexShaderCache::GetSimpleInputLayout()); VertexShaderCache::GetSimpleInputLayout());
@ -168,18 +168,18 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyFormat& format, u32 native_w
FramebufferManager::GetEFBDepthTexture()->GetDSV()); FramebufferManager::GetEFBDepthTexture()->GetDSV());
} }
ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyFormat& format) ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyParams& params)
{ {
auto iter = m_encoding_shaders.find(format); auto iter = m_encoding_shaders.find(params);
if (iter != m_encoding_shaders.end()) if (iter != m_encoding_shaders.end())
return iter->second; return iter->second;
D3DBlob* bytecode = nullptr; D3DBlob* bytecode = nullptr;
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::D3D); const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::D3D);
if (!D3D::CompilePixelShader(shader, &bytecode)) if (!D3D::CompilePixelShader(shader, &bytecode))
{ {
PanicAlert("Failed to compile texture encoding shader."); PanicAlert("Failed to compile texture encoding shader.");
m_encoding_shaders[format] = nullptr; m_encoding_shaders[params] = nullptr;
return nullptr; return nullptr;
} }
@ -188,7 +188,7 @@ ID3D11PixelShader* PSTextureEncoder::GetEncodingPixelShader(const EFBCopyFormat&
D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), nullptr, &newShader); D3D::device->CreatePixelShader(bytecode->Data(), bytecode->Size(), nullptr, &newShader);
CHECK(SUCCEEDED(hr), "create efb encoder pixel shader"); CHECK(SUCCEEDED(hr), "create efb encoder pixel shader");
m_encoding_shaders.emplace(format, newShader); m_encoding_shaders.emplace(params, newShader);
return newShader; return newShader;
} }
} }

View File

@ -32,12 +32,12 @@ public:
void Init(); void Init();
void Shutdown(); void Shutdown();
void Encode(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
bool scale_by_half); bool scale_by_half);
private: private:
ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyFormat& format); ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params);
bool m_ready; bool m_ready;
@ -45,6 +45,6 @@ private:
ID3D11RenderTargetView* m_outRTV; ID3D11RenderTargetView* m_outRTV;
ID3D11Texture2D* m_outStage; ID3D11Texture2D* m_outStage;
ID3D11Buffer* m_encodeParams; ID3D11Buffer* m_encodeParams;
std::map<EFBCopyFormat, ID3D11PixelShader*> m_encoding_shaders; std::map<EFBCopyParams, ID3D11PixelShader*> m_encoding_shaders;
}; };
} }

View File

@ -38,12 +38,12 @@ std::unique_ptr<AbstractTexture> TextureCache::CreateTexture(const TextureConfig
return std::make_unique<DXTexture>(config); return std::make_unique<DXTexture>(config);
} }
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) const EFBRectangle& src_rect, bool scale_by_half)
{ {
g_encoder->Encode(dst, format, native_width, bytes_per_row, num_blocks_y, memory_stride, g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect,
is_depth_copy, src_rect, scale_by_half); scale_by_half);
} }
const char palette_shader[] = const char palette_shader[] =
@ -126,8 +126,8 @@ void main(
} }
)HLSL"; )HLSL";
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
TlutFormat format) const void* palette, TLUTFormat format)
{ {
DXTexture* source_texture = static_cast<DXTexture*>(source->texture.get()); DXTexture* source_texture = static_cast<DXTexture*>(source->texture.get());
DXTexture* destination_texture = static_cast<DXTexture*>(destination->texture.get()); DXTexture* destination_texture = static_cast<DXTexture*>(destination->texture.get());
@ -144,7 +144,7 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
D3D::stateman->SetTexture(1, palette_buf_srv); D3D::stateman->SetTexture(1, palette_buf_srv);
// TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.)
float params[4] = {(source->format & 0xf) == GX_TF_I4 ? 15.f : 255.f}; float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f};
D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0); D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, &params, 0, 0);
D3D::stateman->SetPixelConstants(palette_uniform); D3D::stateman->SetPixelConstants(palette_uniform);
@ -163,8 +163,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
// Create texture copy // Create texture copy
D3D::drawShadedTexQuad( D3D::drawShadedTexQuad(
source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(), source_texture->GetRawTexIdentifier()->GetSRV(), &sourcerect, source->GetWidth(),
source->GetHeight(), palette_pixel_shader[format], VertexShaderCache::GetSimpleVertexShader(), source->GetHeight(), palette_pixel_shader[static_cast<int>(format)],
VertexShaderCache::GetSimpleInputLayout(), GeometryShaderCache::GetCopyGeometryShader()); VertexShaderCache::GetSimpleVertexShader(), VertexShaderCache::GetSimpleInputLayout(),
GeometryShaderCache::GetCopyGeometryShader());
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(),
FramebufferManager::GetEFBDepthTexture()->GetDSV()); FramebufferManager::GetEFBDepthTexture()->GetDSV());
@ -190,9 +191,9 @@ TextureCache::TextureCache()
palette_buf = nullptr; palette_buf = nullptr;
palette_buf_srv = nullptr; palette_buf_srv = nullptr;
palette_uniform = nullptr; palette_uniform = nullptr;
palette_pixel_shader[GX_TL_IA8] = GetConvertShader("IA8"); palette_pixel_shader[static_cast<int>(TLUTFormat::IA8)] = GetConvertShader("IA8");
palette_pixel_shader[GX_TL_RGB565] = GetConvertShader("RGB565"); palette_pixel_shader[static_cast<int>(TLUTFormat::RGB565)] = GetConvertShader("RGB565");
palette_pixel_shader[GX_TL_RGB5A3] = GetConvertShader("RGB5A3"); palette_pixel_shader[static_cast<int>(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3");
auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE); auto lutBd = CD3D11_BUFFER_DESC(sizeof(u16) * 256, D3D11_BIND_SHADER_RESOURCE);
HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf); HRESULT hr = D3D::device->CreateBuffer(&lutBd, nullptr, &palette_buf);
CHECK(SUCCEEDED(hr), "create palette decoder lut buffer"); CHECK(SUCCEEDED(hr), "create palette decoder lut buffer");

View File

@ -28,12 +28,12 @@ private:
return 0; return 0;
}; };
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TlutFormat format) override; TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) override; bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override; bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;

View File

@ -20,14 +20,14 @@ public:
~TextureCache() {} ~TextureCache() {}
bool CompileShaders() override { return true; } bool CompileShaders() override { return true; }
void DeleteShaders() override {} void DeleteShaders() override {}
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette, void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TlutFormat format) override TLUTFormat format) override
{ {
} }
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) override bool scale_by_half) override
{ {
} }

View File

@ -9,6 +9,7 @@
#include <memory> #include <memory>
#include <vector> #include <vector>
#include "Common/Assert.h"
#include "Common/GL/GLInterfaceBase.h" #include "Common/GL/GLInterfaceBase.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "Common/StringUtil.h" #include "Common/StringUtil.h"
@ -32,12 +33,18 @@ namespace OGL
{ {
static u32 s_ColorCbufid; static u32 s_ColorCbufid;
static u32 s_DepthCbufid; static u32 s_DepthCbufid;
static SHADER s_palette_pixel_shader[3];
struct PaletteShader
{
SHADER shader;
GLuint buffer_offset_uniform;
GLuint multiplier_uniform;
GLuint copy_position_uniform;
};
static PaletteShader s_palette_shader[3];
static std::unique_ptr<StreamBuffer> s_palette_stream_buffer; static std::unique_ptr<StreamBuffer> s_palette_stream_buffer;
static GLuint s_palette_resolv_texture; static GLuint s_palette_resolv_texture = 0;
static GLuint s_palette_buffer_offset_uniform[3];
static GLuint s_palette_multiplier_uniform[3];
static GLuint s_palette_copy_position_uniform[3];
struct TextureDecodingProgramInfo struct TextureDecodingProgramInfo
{ {
@ -64,12 +71,12 @@ std::unique_ptr<AbstractTexture> TextureCache::CreateTexture(const TextureConfig
return std::make_unique<OGLTexture>(config); return std::make_unique<OGLTexture>(config);
} }
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) const EFBRectangle& src_rect, bool scale_by_half)
{ {
TextureConverter::EncodeToRamFromTexture(dst, format, native_width, bytes_per_row, num_blocks_y, TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y,
memory_stride, is_depth_copy, src_rect, scale_by_half); memory_stride, src_rect, scale_by_half);
} }
TextureCache::TextureCache() TextureCache::TextureCache()
@ -126,6 +133,23 @@ GLuint TextureCache::GetColorCopyPositionUniform() const
return m_colorCopyPositionUniform; return m_colorCopyPositionUniform;
} }
static bool CompilePaletteShader(TLUTFormat tlutfmt, const std::string& vcode,
const std::string& pcode, const std::string& gcode)
{
_assert_(IsValidTLUTFormat(tlutfmt));
PaletteShader& shader = s_palette_shader[static_cast<int>(tlutfmt)];
if (!ProgramShaderCache::CompileShader(shader.shader, vcode, pcode, gcode))
return false;
shader.buffer_offset_uniform =
glGetUniformLocation(shader.shader.glprogid, "texture_buffer_offset");
shader.multiplier_uniform = glGetUniformLocation(shader.shader.glprogid, "multiplier");
shader.copy_position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position");
return true;
}
bool TextureCache::CompileShaders() bool TextureCache::CompileShaders()
{ {
constexpr const char* color_copy_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" constexpr const char* color_copy_program = "SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n"
@ -315,44 +339,17 @@ bool TextureCache::CompileShaders()
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
{ {
if (!ProgramShaderCache::CompileShader( if (!CompilePaletteShader(TLUTFormat::IA8, StringFromFormat(vertex_program, prefix, prefix),
s_palette_pixel_shader[GX_TL_IA8], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_IA8" + palette_shader, geo_program))
"#define DECODE DecodePixel_IA8" + palette_shader, geo_program))
{
return false; return false;
}
s_palette_buffer_offset_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_IA8] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_IA8].glprogid, "copy_position");
if (!ProgramShaderCache::CompileShader( if (!CompilePaletteShader(TLUTFormat::RGB565, StringFromFormat(vertex_program, prefix, prefix),
s_palette_pixel_shader[GX_TL_RGB565], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_RGB565" + palette_shader, geo_program))
"#define DECODE DecodePixel_RGB565" + palette_shader, geo_program))
{
return false; return false;
}
s_palette_buffer_offset_uniform[GX_TL_RGB565] = glGetUniformLocation(
s_palette_pixel_shader[GX_TL_RGB565].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_RGB565] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_RGB565] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB565].glprogid, "copy_position");
if (!ProgramShaderCache::CompileShader( if (!CompilePaletteShader(TLUTFormat::RGB5A3, StringFromFormat(vertex_program, prefix, prefix),
s_palette_pixel_shader[GX_TL_RGB5A3], StringFromFormat(vertex_program, prefix, prefix), "#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program))
"#define DECODE DecodePixel_RGB5A3" + palette_shader, geo_program))
{
return false; return false;
}
s_palette_buffer_offset_uniform[GX_TL_RGB5A3] = glGetUniformLocation(
s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "texture_buffer_offset");
s_palette_multiplier_uniform[GX_TL_RGB5A3] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "multiplier");
s_palette_copy_position_uniform[GX_TL_RGB5A3] =
glGetUniformLocation(s_palette_pixel_shader[GX_TL_RGB5A3].glprogid, "copy_position");
} }
return true; return true;
@ -364,16 +361,19 @@ void TextureCache::DeleteShaders()
m_depthMatrixProgram.Destroy(); m_depthMatrixProgram.Destroy();
if (g_ActiveConfig.backend_info.bSupportsPaletteConversion) if (g_ActiveConfig.backend_info.bSupportsPaletteConversion)
for (auto& shader : s_palette_pixel_shader) for (auto& shader : s_palette_shader)
shader.Destroy(); shader.shader.Destroy();
} }
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
TlutFormat format) const void* palette, TLUTFormat tlutfmt)
{ {
if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion) if (!g_ActiveConfig.backend_info.bSupportsPaletteConversion)
return; return;
_assert_(IsValidTLUTFormat(tlutfmt));
const PaletteShader& palette_shader = s_palette_shader[static_cast<int>(tlutfmt)];
g_renderer->ResetAPIState(); g_renderer->ResetAPIState();
OGLTexture* source_texture = static_cast<OGLTexture*>(source->texture.get()); OGLTexture* source_texture = static_cast<OGLTexture*>(source->texture.get());
@ -385,16 +385,17 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer()); FramebufferManager::SetFramebuffer(destination_texture->GetFramebuffer());
glViewport(0, 0, destination->GetWidth(), destination->GetHeight()); glViewport(0, 0, destination->GetWidth(), destination->GetHeight());
s_palette_pixel_shader[format].Bind(); palette_shader.shader.Bind();
// C14 textures are currently unsupported // C14 textures are currently unsupported
int size = (source->format & 0xf) == GX_TF_I4 ? 32 : 512; int size = source->format == TextureFormat::I4 ? 32 : 512;
auto buffer = s_palette_stream_buffer->Map(size); auto buffer = s_palette_stream_buffer->Map(size);
memcpy(buffer.first, palette, size); memcpy(buffer.first, palette, size);
s_palette_stream_buffer->Unmap(size); s_palette_stream_buffer->Unmap(size);
glUniform1i(s_palette_buffer_offset_uniform[format], buffer.second / 2); glUniform1i(palette_shader.buffer_offset_uniform, buffer.second / 2);
glUniform1f(s_palette_multiplier_uniform[format], (source->format & 0xf) == 0 ? 15.0f : 255.0f); glUniform1f(palette_shader.multiplier_uniform,
glUniform4f(s_palette_copy_position_uniform[format], 0.0f, 0.0f, source->format == TextureFormat::I4 ? 15.0f : 255.0f);
glUniform4f(palette_shader.copy_position_uniform, 0.0f, 0.0f,
static_cast<float>(source->GetWidth()), static_cast<float>(source->GetHeight())); static_cast<float>(source->GetWidth()), static_cast<float>(source->GetHeight()));
glActiveTexture(GL_TEXTURE10); glActiveTexture(GL_TEXTURE10);
@ -441,7 +442,7 @@ void DestroyTextureDecodingResources()
s_texture_decoding_program_info.clear(); s_texture_decoding_program_info.clear();
} }
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{ {
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format)); auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key); auto iter = s_texture_decoding_program_info.find(key);
@ -483,7 +484,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat pal
void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height, size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride, u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format) const u8* palette, TLUTFormat palette_format)
{ {
auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format)); auto key = std::make_pair(static_cast<u32>(format), static_cast<u32>(palette_format));
auto iter = s_texture_decoding_program_info.find(key); auto iter = s_texture_decoding_program_info.find(key);

View File

@ -26,23 +26,23 @@ public:
static TextureCache* GetInstance(); static TextureCache* GetInstance();
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override; bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width, TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette, u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override; TLUTFormat palette_format) override;
const SHADER& GetColorCopyProgram() const; const SHADER& GetColorCopyProgram() const;
GLuint GetColorCopyPositionUniform() const; GLuint GetColorCopyPositionUniform() const;
private: private:
std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override; std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TlutFormat format) override; TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) override; bool scale_by_half) override;
void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect,
bool scale_by_half, unsigned int cbuf_id, const float* colmat) override; bool scale_by_half, unsigned int cbuf_id, const float* colmat) override;

View File

@ -51,7 +51,7 @@ struct EncodingProgram
SHADER program; SHADER program;
GLint copy_position_uniform; GLint copy_position_uniform;
}; };
static std::map<EFBCopyFormat, EncodingProgram> s_encoding_programs; static std::map<EFBCopyParams, EncodingProgram> s_encoding_programs;
static GLuint s_PBO = 0; // for readback with different strides static GLuint s_PBO = 0; // for readback with different strides
@ -136,13 +136,13 @@ static void CreatePrograms()
ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb); ProgramShaderCache::CompileShader(s_yuyvToRgbProgram, VProgramYuyvToRgb, FProgramYuyvToRgb);
} }
static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyFormat& format) static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params)
{ {
auto iter = s_encoding_programs.find(format); auto iter = s_encoding_programs.find(params);
if (iter != s_encoding_programs.end()) if (iter != s_encoding_programs.end())
return iter->second; return iter->second;
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::OpenGL); const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::OpenGL);
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader) if (g_ActiveConfig.iLog & CONF_SAVESHADERS && shader)
@ -166,7 +166,7 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyFormat& format)
PanicAlert("Failed to compile texture encoding shader."); PanicAlert("Failed to compile texture encoding shader.");
program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position");
return s_encoding_programs.emplace(format, program).first->second; return s_encoding_programs.emplace(params, program).first->second;
} }
void Init() void Init()
@ -271,24 +271,24 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
} }
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyFormat& format, u32 native_width, void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) const EFBRectangle& src_rect, bool scale_by_half)
{ {
g_renderer->ResetAPIState(); g_renderer->ResetAPIState();
EncodingProgram& texconv_shader = GetOrCreateEncodingShader(format); EncodingProgram& texconv_shader = GetOrCreateEncodingShader(params);
texconv_shader.program.Bind(); texconv_shader.program.Bind();
glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width,
scale_by_half ? 2 : 1); scale_by_half ? 2 : 1);
const GLuint read_texture = is_depth_copy ? const GLuint read_texture = params.depth ?
FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetDepthTarget(src_rect) :
FramebufferManager::ResolveAndGetRenderTarget(src_rect); FramebufferManager::ResolveAndGetRenderTarget(src_rect);
EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride, EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride,
scale_by_half && !is_depth_copy); scale_by_half && !params.depth);
FramebufferManager::SetFramebuffer(0); FramebufferManager::SetFramebuffer(0);
g_renderer->RestoreAPIState(); g_renderer->RestoreAPIState();

View File

@ -7,9 +7,10 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/GL/GLUtil.h" #include "Common/GL/GLUtil.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
struct EFBCopyParams;
namespace OGL namespace OGL
{ {
// Converts textures between formats using shaders // Converts textures between formats using shaders
@ -25,9 +26,9 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des
void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture); void DecodeToTexture(u32 xfbAddr, int srcWidth, int srcHeight, GLuint destTexture);
// returns size of the encoded data (in bytes) // returns size of the encoded data (in bytes)
void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyFormat& format, u32 native_width, void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half); const EFBRectangle& src_rect, bool scale_by_half);
} }
} // namespace OGL } // namespace OGL

View File

@ -50,13 +50,13 @@ class TextureCache : public TextureCacheBase
public: public:
bool CompileShaders() override { return true; } bool CompileShaders() override { return true; }
void DeleteShaders() override {} void DeleteShaders() override {}
void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette, void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TlutFormat format) override TLUTFormat format) override
{ {
} }
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) override bool scale_by_half) override
{ {
EfbCopy::CopyEfb(); EfbCopy::CopyEfb();
} }

File diff suppressed because it is too large Load Diff

View File

@ -111,13 +111,14 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
TexMode0& tm0 = texUnit.texMode0[subTexmap]; TexMode0& tm0 = texUnit.texMode0[subTexmap];
TexImage0& ti0 = texUnit.texImage0[subTexmap]; TexImage0& ti0 = texUnit.texImage0[subTexmap];
TexTLUT& texTlut = texUnit.texTlut[subTexmap]; TexTLUT& texTlut = texUnit.texTlut[subTexmap];
TlutFormat tlutfmt = (TlutFormat)texTlut.tlut_format; TextureFormat texfmt = static_cast<TextureFormat>(ti0.format);
TLUTFormat tlutfmt = static_cast<TLUTFormat>(texTlut.tlut_format);
u8 *imageSrc, *imageSrcOdd = nullptr; u8 *imageSrc, *imageSrcOdd = nullptr;
if (texUnit.texImage1[subTexmap].image_type) if (texUnit.texImage1[subTexmap].image_type)
{ {
imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE]; imageSrc = &texMem[texUnit.texImage1[subTexmap].tmem_even * TMEM_LINE_SIZE];
if (ti0.format == GX_TF_RGBA8) if (texfmt == TextureFormat::RGBA8)
imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE]; imageSrcOdd = &texMem[texUnit.texImage2[subTexmap].tmem_odd * TMEM_LINE_SIZE];
} }
else else
@ -139,9 +140,9 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
int mipWidth = imageWidth + 1; int mipWidth = imageWidth + 1;
int mipHeight = imageHeight + 1; int mipHeight = imageHeight + 1;
int fmtWidth = TexDecoder_GetBlockWidthInTexels(ti0.format); int fmtWidth = TexDecoder_GetBlockWidthInTexels(texfmt);
int fmtHeight = TexDecoder_GetBlockHeightInTexels(ti0.format); int fmtHeight = TexDecoder_GetBlockHeightInTexels(texfmt);
int fmtDepth = TexDecoder_GetTexelSizeInNibbles(ti0.format); int fmtDepth = TexDecoder_GetTexelSizeInNibbles(texfmt);
imageWidth >>= mip; imageWidth >>= mip;
imageHeight >>= mip; imageHeight >>= mip;
@ -186,21 +187,21 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageSPlus1, tm0.wrap_s, imageWidth); WrapCoord(&imageSPlus1, tm0.wrap_s, imageWidth);
WrapCoord(&imageTPlus1, tm0.wrap_t, imageHeight); WrapCoord(&imageTPlus1, tm0.wrap_t, imageHeight);
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].image_type))
{ {
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageT, imageWidth, texfmt, tlut,
tlutfmt); tlutfmt);
SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT)); SetTexel(sampledTex, texel, (128 - fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, ti0.format, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageT, imageWidth, texfmt, tlut,
tlut, tlutfmt); tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (128 - fractT)); AddTexel(sampledTex, texel, (fractS) * (128 - fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, ti0.format, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageS, imageTPlus1, imageWidth, texfmt, tlut,
tlut, tlutfmt); tlutfmt);
AddTexel(sampledTex, texel, (128 - fractS) * (fractT)); AddTexel(sampledTex, texel, (128 - fractS) * (fractT));
TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, ti0.format, TexDecoder_DecodeTexel(sampledTex, imageSrc, imageSPlus1, imageTPlus1, imageWidth, texfmt,
tlut, tlutfmt); tlut, tlutfmt);
AddTexel(sampledTex, texel, (fractS) * (fractT)); AddTexel(sampledTex, texel, (fractS) * (fractT));
} }
@ -238,9 +239,8 @@ void SampleMip(s32 s, s32 t, s32 mip, bool linear, u8 texmap, u8* sample)
WrapCoord(&imageS, tm0.wrap_s, imageWidth); WrapCoord(&imageS, tm0.wrap_s, imageWidth);
WrapCoord(&imageT, tm0.wrap_t, imageHeight); WrapCoord(&imageT, tm0.wrap_t, imageHeight);
if (!(ti0.format == GX_TF_RGBA8 && texUnit.texImage1[subTexmap].image_type)) if (!(texfmt == TextureFormat::RGBA8 && texUnit.texImage1[subTexmap].image_type))
TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, ti0.format, tlut, TexDecoder_DecodeTexel(sample, imageSrc, imageS, imageT, imageWidth, texfmt, tlut, tlutfmt);
tlutfmt);
else else
TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT, TexDecoder_DecodeTexelRGBA8FromTmem(sample, imageSrc, imageSrcOdd, imageS, imageT,
imageWidth); imageWidth);

View File

@ -95,8 +95,8 @@ bool TextureCache::Initialize()
return true; return true;
} }
void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
TlutFormat format) const void* palette, TLUTFormat format)
{ {
m_texture_converter->ConvertTexture(destination, source, m_render_pass, palette, format); m_texture_converter->ConvertTexture(destination, source, m_render_pass, palette, format);
@ -111,9 +111,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source,
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
} }
void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half) const EFBRectangle& src_rect, bool scale_by_half)
{ {
// Flush EFB pokes first, as they're expected to be included. // Flush EFB pokes first, as they're expected to be included.
FramebufferManager::GetInstance()->FlushEFBPokes(); FramebufferManager::GetInstance()->FlushEFBPokes();
@ -128,7 +128,7 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_widt
region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(), region = Util::ClampRect2D(region, FramebufferManager::GetInstance()->GetEFBWidth(),
FramebufferManager::GetInstance()->GetEFBHeight()); FramebufferManager::GetInstance()->GetEFBHeight());
Texture2D* src_texture; Texture2D* src_texture;
if (is_depth_copy) if (params.depth)
src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region); src_texture = FramebufferManager::GetInstance()->ResolveEFBDepthTexture(region);
else else
src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region); src_texture = FramebufferManager::GetInstance()->ResolveEFBColorTexture(region);
@ -144,15 +144,15 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_widt
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, format, native_width, m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width,
bytes_per_row, num_blocks_y, memory_stride, bytes_per_row, num_blocks_y, memory_stride, src_rect,
is_depth_copy, src_rect, scale_by_half); scale_by_half);
// Transition back to original state // Transition back to original state
src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout);
} }
bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{ {
return m_texture_converter->SupportsTextureDecoding(format, palette_format); return m_texture_converter->SupportsTextureDecoding(format, palette_format);
} }
@ -160,7 +160,7 @@ bool TextureCache::SupportsGPUTextureDecode(TextureFormat format, TlutFormat pal
void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height, size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride, u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format) const u8* palette, TLUTFormat palette_format)
{ {
// Group compute shader dispatches together in the init command buffer. That way we don't have to // Group compute shader dispatches together in the init command buffer. That way we don't have to
// pay a penalty for switching from graphics->compute, or end/restart our render pass. // pay a penalty for switching from graphics->compute, or end/restart our render pass.

View File

@ -33,19 +33,19 @@ public:
std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override; std::unique_ptr<AbstractTexture> CreateTexture(const TextureConfig& config) override;
void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void* palette, void ConvertTexture(TCacheEntry* destination, TCacheEntry* source, const void* palette,
TlutFormat format) override; TLUTFormat format) override;
void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) override; bool scale_by_half) override;
bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) override; bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override;
void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size, void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, size_t data_size,
TextureFormat format, u32 width, u32 height, u32 aligned_width, TextureFormat format, u32 width, u32 height, u32 aligned_width,
u32 aligned_height, u32 row_stride, const u8* palette, u32 aligned_height, u32 row_stride, const u8* palette,
TlutFormat palette_format) override; TLUTFormat palette_format) override;
VkShaderModule GetCopyShader() const; VkShaderModule GetCopyShader() const;
VkRenderPass GetTextureCopyRenderPass() const; VkRenderPass GetTextureCopyRenderPass() const;

View File

@ -166,7 +166,7 @@ TextureConverter::GetCommandBufferForTextureConversion(const TextureCache::TCach
void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
TextureCacheBase::TCacheEntry* src_entry, TextureCacheBase::TCacheEntry* src_entry,
VkRenderPass render_pass, const void* palette, VkRenderPass render_pass, const void* palette,
TlutFormat palette_format) TLUTFormat palette_format)
{ {
struct PSUniformBlock struct PSUniformBlock
{ {
@ -182,7 +182,7 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
_assert_(destination_texture->GetConfig().rendertarget); _assert_(destination_texture->GetConfig().rendertarget);
// We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater. // We want to align to 2 bytes (R16) or the device's texel buffer alignment, whichever is greater.
size_t palette_size = (src_entry->format & 0xF) == GX_TF_I4 ? 32 : 512; size_t palette_size = src_entry->format == TextureFormat::I4 ? 32 : 512;
if (!ReserveTexelBufferStorage(palette_size, sizeof(u16))) if (!ReserveTexelBufferStorage(palette_size, sizeof(u16)))
return; return;
@ -201,13 +201,13 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
UtilityShaderDraw draw(command_buffer, UtilityShaderDraw draw(command_buffer,
g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION), g_object_cache->GetPipelineLayout(PIPELINE_LAYOUT_TEXTURE_CONVERSION),
render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE, render_pass, g_shader_cache->GetScreenQuadVertexShader(), VK_NULL_HANDLE,
m_palette_conversion_shaders[palette_format]); m_palette_conversion_shaders[static_cast<int>(palette_format)]);
VkRect2D region = {{0, 0}, {dst_entry->GetWidth(), dst_entry->GetHeight()}}; VkRect2D region = {{0, 0}, {dst_entry->GetWidth(), dst_entry->GetHeight()}};
draw.BeginRenderPass(destination_texture->GetFramebuffer(), region); draw.BeginRenderPass(destination_texture->GetFramebuffer(), region);
PSUniformBlock uniforms = {}; PSUniformBlock uniforms = {};
uniforms.multiplier = (src_entry->format & 0xF) == GX_TF_I4 ? 15.0f : 255.0f; uniforms.multiplier = src_entry->format == TextureFormat::I4 ? 15.0f : 255.0f;
uniforms.texel_buffer_offset = static_cast<int>(palette_offset / sizeof(u16)); uniforms.texel_buffer_offset = static_cast<int>(palette_offset / sizeof(u16));
draw.SetPushConstants(&uniforms, sizeof(uniforms)); draw.SetPushConstants(&uniforms, sizeof(uniforms));
draw.SetPSSampler(0, source_texture->GetRawTexIdentifier()->GetView(), draw.SetPSSampler(0, source_texture->GetRawTexIdentifier()->GetView(),
@ -219,16 +219,15 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
} }
void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr,
const EFBCopyFormat& format, u32 native_width, const EFBCopyParams& params, u32 native_width,
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
bool is_depth_copy, const EFBRectangle& src_rect, const EFBRectangle& src_rect, bool scale_by_half)
bool scale_by_half)
{ {
VkShaderModule shader = GetEncodingShader(format); VkShaderModule shader = GetEncodingShader(params);
if (shader == VK_NULL_HANDLE) if (shader == VK_NULL_HANDLE)
{ {
ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u", format.efb_format, ERROR_LOG(VIDEO, "Missing encoding fragment shader for format %u->%u",
static_cast<u32>(format.copy_format)); static_cast<unsigned>(params.efb_format), static_cast<unsigned>(params.copy_format));
return; return;
} }
@ -251,7 +250,7 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p
// We also linear filtering for both box filtering and downsampling higher resolutions to 1x // We also linear filtering for both box filtering and downsampling higher resolutions to 1x
// TODO: This only produces perfect downsampling for 1.5x and 2x IR, other resolution will // TODO: This only produces perfect downsampling for 1.5x and 2x IR, other resolution will
// need more complex down filtering to average all pixels and produce the correct result. // need more complex down filtering to average all pixels and produce the correct result.
bool linear_filter = (scale_by_half && !is_depth_copy) || g_ActiveConfig.iEFBScale != SCALE_1X; bool linear_filter = (scale_by_half && !params.depth) || g_ActiveConfig.iEFBScale != SCALE_1X;
draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() : draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() :
g_object_cache->GetPointSampler()); g_object_cache->GetPointSampler());
@ -387,7 +386,7 @@ void TextureConverter::DecodeYUYVTextureFromMemory(VKTexture* dst_texture, const
draw.EndRenderPass(); draw.EndRenderPass();
} }
bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format) bool TextureConverter::SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format)
{ {
auto key = std::make_pair(format, palette_format); auto key = std::make_pair(format, palette_format);
auto iter = m_decoding_pipelines.find(key); auto iter = m_decoding_pipelines.find(key);
@ -424,7 +423,7 @@ void TextureConverter::DecodeTexture(VkCommandBuffer command_buffer,
TextureCache::TCacheEntry* entry, u32 dst_level, TextureCache::TCacheEntry* entry, u32 dst_level,
const u8* data, size_t data_size, TextureFormat format, const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 width, u32 height, u32 aligned_width, u32 aligned_height,
u32 row_stride, const u8* palette, TlutFormat palette_format) u32 row_stride, const u8* palette, TLUTFormat palette_format)
{ {
VKTexture* destination_texture = static_cast<VKTexture*>(entry->texture.get()); VKTexture* destination_texture = static_cast<VKTexture*>(entry->texture.get());
auto key = std::make_pair(format, palette_format); auto key = std::make_pair(format, palette_format);
@ -667,21 +666,21 @@ bool TextureConverter::CompilePaletteConversionShaders()
std::string palette_rgb5a3_program = StringFromFormat( std::string palette_rgb5a3_program = StringFromFormat(
"%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE); "%s\n%s", "#define DECODE DecodePixel_RGB5A3", PALETTE_CONVERSION_FRAGMENT_SHADER_SOURCE);
m_palette_conversion_shaders[GX_TL_IA8] = m_palette_conversion_shaders[static_cast<int>(TLUTFormat::IA8)] =
Util::CompileAndCreateFragmentShader(palette_ia8_program); Util::CompileAndCreateFragmentShader(palette_ia8_program);
m_palette_conversion_shaders[GX_TL_RGB565] = m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB565)] =
Util::CompileAndCreateFragmentShader(palette_rgb565_program); Util::CompileAndCreateFragmentShader(palette_rgb565_program);
m_palette_conversion_shaders[GX_TL_RGB5A3] = m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB5A3)] =
Util::CompileAndCreateFragmentShader(palette_rgb5a3_program); Util::CompileAndCreateFragmentShader(palette_rgb5a3_program);
return m_palette_conversion_shaders[GX_TL_IA8] != VK_NULL_HANDLE && return m_palette_conversion_shaders[static_cast<int>(TLUTFormat::IA8)] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[GX_TL_RGB565] != VK_NULL_HANDLE && m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB565)] != VK_NULL_HANDLE &&
m_palette_conversion_shaders[GX_TL_RGB5A3] != VK_NULL_HANDLE; m_palette_conversion_shaders[static_cast<int>(TLUTFormat::RGB5A3)] != VK_NULL_HANDLE;
} }
VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyFormat& format) VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyParams& params)
{ {
const char* shader = TextureConversionShader::GenerateEncodingShader(format, APIType::Vulkan); const char* shader = TextureConversionShader::GenerateEncodingShader(params, APIType::Vulkan);
VkShaderModule module = Util::CompileAndCreateFragmentShader(shader); VkShaderModule module = Util::CompileAndCreateFragmentShader(shader);
if (module == VK_NULL_HANDLE) if (module == VK_NULL_HANDLE)
PanicAlert("Failed to compile texture encoding shader."); PanicAlert("Failed to compile texture encoding shader.");
@ -689,14 +688,14 @@ VkShaderModule TextureConverter::CompileEncodingShader(const EFBCopyFormat& form
return module; return module;
} }
VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyFormat& format) VkShaderModule TextureConverter::GetEncodingShader(const EFBCopyParams& params)
{ {
auto iter = m_encoding_shaders.find(format); auto iter = m_encoding_shaders.find(params);
if (iter != m_encoding_shaders.end()) if (iter != m_encoding_shaders.end())
return iter->second; return iter->second;
VkShaderModule shader = CompileEncodingShader(format); VkShaderModule shader = CompileEncodingShader(params);
m_encoding_shaders.emplace(format, shader); m_encoding_shaders.emplace(params, shader);
return shader; return shader;
} }

View File

@ -33,14 +33,13 @@ public:
// Applies palette to dst_entry, using indices from src_entry. // Applies palette to dst_entry, using indices from src_entry.
void ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, void ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry,
TextureCache::TCacheEntry* src_entry, VkRenderPass render_pass, TextureCache::TCacheEntry* src_entry, VkRenderPass render_pass,
const void* palette, TlutFormat palette_format); const void* palette, TLUTFormat palette_format);
// Uses an encoding shader to copy src_texture to dest_ptr. // Uses an encoding shader to copy src_texture to dest_ptr.
// NOTE: Executes the current command buffer. // NOTE: Executes the current command buffer.
void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyFormat& format, void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params,
u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 native_width, u32 bytes_per_row, u32 num_blocks_y,
u32 memory_stride, bool is_depth_copy, const EFBRectangle& src_rect, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half);
bool scale_by_half);
// Encodes texture to guest memory in XFB (YUYV) format. // Encodes texture to guest memory in XFB (YUYV) format.
void EncodeTextureToMemoryYUYV(void* dst_ptr, u32 dst_width, u32 dst_stride, u32 dst_height, void EncodeTextureToMemoryYUYV(void* dst_ptr, u32 dst_width, u32 dst_stride, u32 dst_height,
@ -50,11 +49,11 @@ public:
void DecodeYUYVTextureFromMemory(VKTexture* dst_texture, const void* src_ptr, u32 src_width, void DecodeYUYVTextureFromMemory(VKTexture* dst_texture, const void* src_ptr, u32 src_width,
u32 src_stride, u32 src_height); u32 src_stride, u32 src_height);
bool SupportsTextureDecoding(TextureFormat format, TlutFormat palette_format); bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format);
void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry,
u32 dst_level, const u8* data, size_t data_size, TextureFormat format, u32 dst_level, const u8* data, size_t data_size, TextureFormat format,
u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride, u32 width, u32 height, u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format); const u8* palette, TLUTFormat palette_format);
private: private:
static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4; static const u32 ENCODING_TEXTURE_WIDTH = EFB_WIDTH * 4;
@ -71,8 +70,8 @@ private:
bool CompilePaletteConversionShaders(); bool CompilePaletteConversionShaders();
VkShaderModule CompileEncodingShader(const EFBCopyFormat& format); VkShaderModule CompileEncodingShader(const EFBCopyParams& params);
VkShaderModule GetEncodingShader(const EFBCopyFormat& format); VkShaderModule GetEncodingShader(const EFBCopyParams& params);
bool CreateEncodingRenderPass(); bool CreateEncodingRenderPass();
bool CreateEncodingTexture(); bool CreateEncodingTexture();
@ -105,7 +104,7 @@ private:
std::array<VkShaderModule, NUM_PALETTE_CONVERSION_SHADERS> m_palette_conversion_shaders = {}; std::array<VkShaderModule, NUM_PALETTE_CONVERSION_SHADERS> m_palette_conversion_shaders = {};
// Texture encoding - RGBA8->GX format in memory // Texture encoding - RGBA8->GX format in memory
std::map<EFBCopyFormat, VkShaderModule> m_encoding_shaders; std::map<EFBCopyParams, VkShaderModule> m_encoding_shaders;
VkRenderPass m_encoding_render_pass = VK_NULL_HANDLE; VkRenderPass m_encoding_render_pass = VK_NULL_HANDLE;
std::unique_ptr<Texture2D> m_encoding_render_texture; std::unique_ptr<Texture2D> m_encoding_render_texture;
VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE; VkFramebuffer m_encoding_render_framebuffer = VK_NULL_HANDLE;
@ -118,7 +117,7 @@ private:
VkShaderModule compute_shader; VkShaderModule compute_shader;
bool valid; bool valid;
}; };
std::map<std::pair<TextureFormat, TlutFormat>, TextureDecodingPipeline> m_decoding_pipelines; std::map<std::pair<TextureFormat, TLUTFormat>, TextureDecodingPipeline> m_decoding_pipelines;
std::unique_ptr<Texture2D> m_decoding_texture; std::unique_ptr<Texture2D> m_decoding_texture;
// XFB encoding/decoding shaders // XFB encoding/decoding shaders

View File

@ -9,6 +9,8 @@
#include "Common/BitField.h" #include "Common/BitField.h"
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
enum class EFBCopyFormat;
#pragma pack(4) #pragma pack(4)
enum enum
@ -958,7 +960,10 @@ union UPE_Copy
BitField<16, 1, u32> BitField<16, 1, u32>
auto_conv; // if 0 automatic color conversion by texture format and pixel type auto_conv; // if 0 automatic color conversion by texture format and pixel type
u32 tp_realFormat() const { return target_pixel_format / 2 + (target_pixel_format & 1) * 8; } EFBCopyFormat tp_realFormat() const
{
return static_cast<EFBCopyFormat>(target_pixel_format / 2 + (target_pixel_format & 1) * 8);
}
}; };
union BPU_PreloadTileInfo union BPU_PreloadTileInfo

View File

@ -215,7 +215,7 @@ void HiresTexture::Prefetch()
} }
std::string HiresTexture::GenBaseName(const u8* texture, size_t texture_size, const u8* tlut, std::string HiresTexture::GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format, size_t tlut_size, u32 width, u32 height, TextureFormat format,
bool has_mipmaps, bool dump) bool has_mipmaps, bool dump)
{ {
std::string name = ""; std::string name = "";
@ -385,7 +385,8 @@ u32 HiresTexture::CalculateMipCount(u32 width, u32 height)
std::shared_ptr<HiresTexture> HiresTexture::Search(const u8* texture, size_t texture_size, std::shared_ptr<HiresTexture> HiresTexture::Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width, const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps) u32 height, TextureFormat format,
bool has_mipmaps)
{ {
std::string base_filename = std::string base_filename =
GenBaseName(texture, texture_size, tlut, tlut_size, width, height, format, has_mipmaps); GenBaseName(texture, texture_size, tlut, tlut_size, width, height, format, has_mipmaps);

View File

@ -11,6 +11,8 @@
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/TextureConfig.h" #include "VideoCommon/TextureConfig.h"
enum class TextureFormat;
class HiresTexture class HiresTexture
{ {
public: public:
@ -22,10 +24,10 @@ public:
static std::shared_ptr<HiresTexture> Search(const u8* texture, size_t texture_size, static std::shared_ptr<HiresTexture> Search(const u8* texture, size_t texture_size,
const u8* tlut, size_t tlut_size, u32 width, const u8* tlut, size_t tlut_size, u32 width,
u32 height, int format, bool has_mipmaps); u32 height, TextureFormat format, bool has_mipmaps);
static std::string GenBaseName(const u8* texture, size_t texture_size, const u8* tlut, static std::string GenBaseName(const u8* texture, size_t texture_size, const u8* tlut,
size_t tlut_size, u32 width, u32 height, int format, size_t tlut_size, u32 width, u32 height, TextureFormat format,
bool has_mipmaps, bool dump = false); bool has_mipmaps, bool dump = false);
static u32 CalculateMipCount(u32 width, u32 height); static u32 CalculateMipCount(u32 width, u32 height);

View File

@ -226,8 +226,8 @@ void TextureCacheBase::SetBackupConfig(const VideoConfig& config)
backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding; backup_config.gpu_texture_decoding = config.bEnableGPUTextureDecoding;
} }
TextureCacheBase::TCacheEntry* TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, TextureCacheBase::TCacheEntry*
u8* palette, u32 tlutfmt) TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt)
{ {
TextureConfig new_config = entry->texture->GetConfig(); TextureConfig new_config = entry->texture->GetConfig();
new_config.levels = 1; new_config.levels = 1;
@ -243,7 +243,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ApplyPaletteToEntry(TCacheEntry
decoded_entry->frameCount = FRAMECOUNT_INVALID; decoded_entry->frameCount = FRAMECOUNT_INVALID;
decoded_entry->is_efb_copy = false; decoded_entry->is_efb_copy = false;
ConvertTexture(decoded_entry, entry, palette, static_cast<TlutFormat>(tlutfmt)); ConvertTexture(decoded_entry, entry, palette, tlutfmt);
textures_by_address.emplace(entry->addr, decoded_entry); textures_by_address.emplace(entry->addr, decoded_entry);
return decoded_entry; return decoded_entry;
@ -290,7 +290,8 @@ void TextureCacheBase::ScaleTextureCacheEntryTo(TextureCacheBase::TCacheEntry* e
} }
TextureCacheBase::TCacheEntry* TextureCacheBase::TCacheEntry*
TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, u32 tlutfmt) TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt)
{ {
// If the flag may_have_overlapping_textures is cleared, there are no overlapping EFB copies, // If the flag may_have_overlapping_textures is cleared, there are no overlapping EFB copies,
// which aren't applied already. It is set for new textures, and for the affected range // which aren't applied already. It is set for new textures, and for the affected range
@ -299,19 +300,17 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale
return entry_to_update; return entry_to_update;
entry_to_update->may_have_overlapping_textures = false; entry_to_update->may_have_overlapping_textures = false;
const bool isPaletteTexture = const bool isPaletteTexture = IsColorIndexed(entry_to_update->format.texfmt);
(entry_to_update->format == GX_TF_C4 || entry_to_update->format == GX_TF_C8 ||
entry_to_update->format == GX_TF_C14X2 || entry_to_update->format >= 0x10000);
// EFB copies are excluded from these updates, until there's an example where a game would // EFB copies are excluded from these updates, until there's an example where a game would
// benefit from updating. This would require more work to be done. // benefit from updating. This would require more work to be done.
if (entry_to_update->IsEfbCopy()) if (entry_to_update->IsEfbCopy())
return entry_to_update; return entry_to_update;
u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format & 0xf); u32 block_width = TexDecoder_GetBlockWidthInTexels(entry_to_update->format.texfmt);
u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format & 0xf); u32 block_height = TexDecoder_GetBlockHeightInTexels(entry_to_update->format.texfmt);
u32 block_size = block_width * block_height * u32 block_size = block_width * block_height *
TexDecoder_GetTexelSizeInNibbles(entry_to_update->format & 0xf) / 2; TexDecoder_GetTexelSizeInNibbles(entry_to_update->format.texfmt) / 2;
u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width; u32 numBlocksX = (entry_to_update->native_width + block_width - 1) / block_width;
@ -490,9 +489,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5; const u32 address = (tex.texImage3[id].image_base /* & 0x1FFFFF*/) << 5;
u32 width = tex.texImage0[id].width + 1; u32 width = tex.texImage0[id].width + 1;
u32 height = tex.texImage0[id].height + 1; u32 height = tex.texImage0[id].height + 1;
const int texformat = tex.texImage0[id].format; const TextureFormat texformat = static_cast<TextureFormat>(tex.texImage0[id].format);
const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9; const u32 tlutaddr = tex.texTlut[id].tmem_offset << 9;
const u32 tlutfmt = tex.texTlut[id].tlut_format; const TLUTFormat tlutfmt = static_cast<TLUTFormat>(tex.texTlut[id].tlut_format);
const bool use_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]); const bool use_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0[id]);
u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1; u32 tex_levels = use_mipmaps ? ((tex.texMode1[id].max_lod + 0xf) / 0x10 + 1) : 1;
const bool from_tmem = tex.texImage1[id].image_type != 0; const bool from_tmem = tex.texImage1[id].image_type != 0;
@ -511,18 +510,14 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
u64 base_hash = TEXHASH_INVALID; u64 base_hash = TEXHASH_INVALID;
u64 full_hash = TEXHASH_INVALID; u64 full_hash = TEXHASH_INVALID;
u32 full_format = texformat; TextureAndTLUTFormat full_format(texformat, tlutfmt);
const bool isPaletteTexture = const bool isPaletteTexture = IsColorIndexed(texformat);
(texformat == GX_TF_C4 || texformat == GX_TF_C8 || texformat == GX_TF_C14X2);
// Reject invalid tlut format. // Reject invalid tlut format.
if (isPaletteTexture && tlutfmt > GX_TL_RGB5A3) if (isPaletteTexture && !IsValidTLUTFormat(tlutfmt))
return nullptr; return nullptr;
if (isPaletteTexture)
full_format = texformat | (tlutfmt << 16);
const u32 texture_size = const u32 texture_size =
TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat); TexDecoder_GetTextureSizeInBytes(expandedWidth, expandedHeight, texformat);
u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2; u32 bytes_per_block = (bsw * bsh * TexDecoder_GetTexelSizeInNibbles(texformat)) / 2;
@ -766,11 +761,9 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
// banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since // banks, and if we're doing an copy we may as well just do the whole thing on the CPU, since
// there's no conversion between formats. In the future this could be extended with a separate // there's no conversion between formats. In the future this could be extended with a separate
// shader, however. // shader, however.
bool decode_on_gpu = bool decode_on_gpu = !hires_tex && g_ActiveConfig.UseGPUTextureDecoding() &&
!hires_tex && g_ActiveConfig.UseGPUTextureDecoding() && g_texture_cache->SupportsGPUTextureDecode(texformat, tlutfmt) &&
g_texture_cache->SupportsGPUTextureDecode(static_cast<TextureFormat>(texformat), !(from_tmem && texformat == TextureFormat::RGBA8);
static_cast<TlutFormat>(tlutfmt)) &&
!(from_tmem && texformat == GX_TF_RGBA8);
// create the entry/texture // create the entry/texture
TextureConfig config; TextureConfig config;
@ -796,18 +789,16 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (!hires_tex && decode_on_gpu) if (!hires_tex && decode_on_gpu)
{ {
u32 row_stride = bytes_per_block * (expandedWidth / bsw); u32 row_stride = bytes_per_block * (expandedWidth / bsw);
g_texture_cache->DecodeTextureOnGPU( g_texture_cache->DecodeTextureOnGPU(entry, 0, src_data, texture_size, texformat, width, height,
entry, 0, src_data, texture_size, static_cast<TextureFormat>(texformat), width, height, expandedWidth, expandedHeight, row_stride, tlut, tlutfmt);
expandedWidth, expandedHeight, row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
} }
else if (!hires_tex) else if (!hires_tex)
{ {
size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight; size_t decoded_texture_size = expandedWidth * sizeof(u32) * expandedHeight;
CheckTempSize(decoded_texture_size); CheckTempSize(decoded_texture_size);
if (!(texformat == GX_TF_RGBA8 && from_tmem)) if (!(texformat == TextureFormat::RGBA8 && from_tmem))
{ {
TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, TexDecoder_Decode(temp, src_data, expandedWidth, expandedHeight, texformat, tlut, tlutfmt);
(TlutFormat)tlutfmt);
} }
else else
{ {
@ -878,17 +869,16 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
if (decode_on_gpu) if (decode_on_gpu)
{ {
u32 row_stride = bytes_per_block * (expanded_mip_width / bsw); u32 row_stride = bytes_per_block * (expanded_mip_width / bsw);
g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, g_texture_cache->DecodeTextureOnGPU(entry, level, mip_src_data, mip_size, texformat,
static_cast<TextureFormat>(texformat), mip_width, mip_width, mip_height, expanded_mip_width,
mip_height, expanded_mip_width, expanded_mip_height, expanded_mip_height, row_stride, tlut, tlutfmt);
row_stride, tlut, static_cast<TlutFormat>(tlutfmt));
} }
else else
{ {
// No need to call CheckTempSize here, as mips will always be smaller than the base level. // No need to call CheckTempSize here, as mips will always be smaller than the base level.
size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height; size_t decoded_mip_size = expanded_mip_width * sizeof(u32) * expanded_mip_height;
TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat, TexDecoder_Decode(temp, mip_src_data, expanded_mip_width, expanded_mip_height, texformat,
tlut, (TlutFormat)tlutfmt); tlut, tlutfmt);
entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp, entry->texture->Load(level, mip_width, mip_height, expanded_mip_width, temp,
decoded_mip_size); decoded_mip_size);
} }
@ -908,9 +898,10 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::Load(const u32 stage)
return ReturnEntry(stage, entry); return ReturnEntry(stage, entry);
} }
void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat,
bool is_depth_copy, const EFBRectangle& srcRect, u32 dstStride, bool is_depth_copy,
bool isIntensity, bool scaleByHalf) const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf)
{ {
// Emulation methods: // Emulation methods:
// //
@ -980,81 +971,73 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f; ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 255.0f;
unsigned int cbufid = UINT_MAX; unsigned int cbufid = UINT_MAX;
u32 srcFormat = bpmem.zcontrol.pixel_format; PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
bool efbHasAlpha = srcFormat == PEControl::RGBA6_Z24; bool efbHasAlpha = srcFormat == PEControl::RGBA6_Z24;
if (is_depth_copy) if (is_depth_copy)
{ {
switch (dstFormat) switch (dstFormat)
{ {
case 0: // Z4 case EFBCopyFormat::R4: // Z4
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 0; cbufid = 0;
dstFormat |= _GX_TF_CTF;
break; break;
case 8: // Z8H case EFBCopyFormat::R8_0x1: // Z8
dstFormat |= _GX_TF_CTF; case EFBCopyFormat::R8: // Z8H
case 1: // Z8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f; colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1.0f;
cbufid = 1; cbufid = 1;
break; break;
case 3: // Z16 case EFBCopyFormat::RA8: // Z16
colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f; colmat[1] = colmat[5] = colmat[9] = colmat[12] = 1.0f;
cbufid = 2; cbufid = 2;
break; break;
case 11: // Z16 (reverse order) case EFBCopyFormat::RG8: // Z16 (reverse order)
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 3; cbufid = 3;
dstFormat |= _GX_TF_CTF;
break; break;
case 6: // Z24X8 case EFBCopyFormat::RGBA8: // Z24X8
colmat[0] = colmat[5] = colmat[10] = 1.0f; colmat[0] = colmat[5] = colmat[10] = 1.0f;
cbufid = 4; cbufid = 4;
break; break;
case 9: // Z8M case EFBCopyFormat::G8: // Z8M
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 5; cbufid = 5;
dstFormat |= _GX_TF_CTF;
break; break;
case 10: // Z8L case EFBCopyFormat::B8: // Z8L
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 6; cbufid = 6;
dstFormat |= _GX_TF_CTF;
break; break;
case 12: // Z16L - copy lower 16 depth bits case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits // expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
// stored as alpha) // stored as alpha)
// Used e.g. in Zelda: Skyward Sword // Used e.g. in Zelda: Skyward Sword
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f; colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 7; cbufid = 7;
dstFormat |= _GX_TF_CTF;
break; break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%x", dstFormat); ERROR_LOG(VIDEO, "Unknown copy zbuf format: 0x%X", static_cast<int>(dstFormat));
colmat[2] = colmat[5] = colmat[8] = 1.0f; colmat[2] = colmat[5] = colmat[8] = 1.0f;
cbufid = 8; cbufid = 8;
break; break;
} }
dstFormat |= _GX_TF_ZTF;
} }
else if (isIntensity) else if (isIntensity)
{ {
fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f; fConstAdd[0] = fConstAdd[1] = fConstAdd[2] = 16.0f / 255.0f;
switch (dstFormat) switch (dstFormat)
{ {
case 0: // I4 case EFBCopyFormat::R4: // I4
case 1: // I8 case EFBCopyFormat::R8_0x1: // I8
case 2: // IA4 case EFBCopyFormat::R8: // IA4
case 3: // IA8 case EFBCopyFormat::RA4: // IA8
case 8: // I8 case EFBCopyFormat::RA8: // I8
// TODO - verify these coefficients // TODO - verify these coefficients
colmat[0] = 0.257f; colmat[0] = 0.257f;
colmat[1] = 0.504f; colmat[1] = 0.504f;
@ -1066,13 +1049,14 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
colmat[9] = 0.504f; colmat[9] = 0.504f;
colmat[10] = 0.098f; colmat[10] = 0.098f;
if (dstFormat < 2 || dstFormat == 8) if (dstFormat == EFBCopyFormat::R4 || dstFormat == EFBCopyFormat::R8_0x1 ||
dstFormat == EFBCopyFormat::R8)
{ {
colmat[12] = 0.257f; colmat[12] = 0.257f;
colmat[13] = 0.504f; colmat[13] = 0.504f;
colmat[14] = 0.098f; colmat[14] = 0.098f;
fConstAdd[3] = 16.0f / 255.0f; fConstAdd[3] = 16.0f / 255.0f;
if (dstFormat == 0) if (dstFormat == EFBCopyFormat::R4)
{ {
ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 16.0f; ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 15.0f;
@ -1086,7 +1070,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
else // alpha else // alpha
{ {
colmat[15] = 1; colmat[15] = 1;
if (dstFormat == 2) if (dstFormat == EFBCopyFormat::RA4)
{ {
ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f / 16.0f; ColorMask[0] = ColorMask[1] = ColorMask[2] = ColorMask[3] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = ColorMask[7] = 1.0f / 15.0f;
@ -1100,7 +1084,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
break; break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%x", dstFormat); ERROR_LOG(VIDEO, "Unknown copy intensity format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 13; cbufid = 13;
break; break;
@ -1110,21 +1094,19 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
{ {
switch (dstFormat) switch (dstFormat)
{ {
case 0: // R4 case EFBCopyFormat::R4: // R4
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
ColorMask[0] = 255.0f / 16.0f; ColorMask[0] = 255.0f / 16.0f;
ColorMask[4] = 1.0f / 15.0f; ColorMask[4] = 1.0f / 15.0f;
cbufid = 14; cbufid = 14;
dstFormat |= _GX_TF_CTF;
break; break;
case 1: // R8 case EFBCopyFormat::R8_0x1: // R8
case 8: // R8 case EFBCopyFormat::R8: // R8
colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1; colmat[0] = colmat[4] = colmat[8] = colmat[12] = 1;
cbufid = 15; cbufid = 15;
dstFormat = GX_CTF_R8;
break; break;
case 2: // RA4 case EFBCopyFormat::RA4: // RA4
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f; colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[3] = 255.0f / 16.0f; ColorMask[0] = ColorMask[3] = 255.0f / 16.0f;
ColorMask[4] = ColorMask[7] = 1.0f / 15.0f; ColorMask[4] = ColorMask[7] = 1.0f / 15.0f;
@ -1136,9 +1118,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f; fConstAdd[3] = 1.0f;
cbufid = 17; cbufid = 17;
} }
dstFormat |= _GX_TF_CTF;
break; break;
case 3: // RA8 case EFBCopyFormat::RA8: // RA8
colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f; colmat[0] = colmat[4] = colmat[8] = colmat[15] = 1.0f;
cbufid = 18; cbufid = 18;
@ -1148,10 +1129,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f; fConstAdd[3] = 1.0f;
cbufid = 19; cbufid = 19;
} }
dstFormat |= _GX_TF_CTF;
break; break;
case 7: // A8 case EFBCopyFormat::A8: // A8
colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f; colmat[3] = colmat[7] = colmat[11] = colmat[15] = 1.0f;
cbufid = 20; cbufid = 20;
@ -1164,33 +1144,28 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
fConstAdd[3] = 1.0f; fConstAdd[3] = 1.0f;
cbufid = 21; cbufid = 21;
} }
dstFormat |= _GX_TF_CTF;
break; break;
case 9: // G8 case EFBCopyFormat::G8: // G8
colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f; colmat[1] = colmat[5] = colmat[9] = colmat[13] = 1.0f;
cbufid = 22; cbufid = 22;
dstFormat |= _GX_TF_CTF;
break; break;
case 10: // B8 case EFBCopyFormat::B8: // B8
colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f; colmat[2] = colmat[6] = colmat[10] = colmat[14] = 1.0f;
cbufid = 23; cbufid = 23;
dstFormat |= _GX_TF_CTF;
break; break;
case 11: // RG8 case EFBCopyFormat::RG8: // RG8
colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f; colmat[0] = colmat[4] = colmat[8] = colmat[13] = 1.0f;
cbufid = 24; cbufid = 24;
dstFormat |= _GX_TF_CTF;
break; break;
case 12: // GB8 case EFBCopyFormat::GB8: // GB8
colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f; colmat[1] = colmat[5] = colmat[9] = colmat[14] = 1.0f;
cbufid = 25; cbufid = 25;
dstFormat |= _GX_TF_CTF;
break; break;
case 4: // RGB565 case EFBCopyFormat::RGB565: // RGB565
colmat[0] = colmat[5] = colmat[10] = 1.0f; colmat[0] = colmat[5] = colmat[10] = 1.0f;
ColorMask[0] = ColorMask[2] = 255.0f / 8.0f; ColorMask[0] = ColorMask[2] = 255.0f / 8.0f;
ColorMask[4] = ColorMask[6] = 1.0f / 31.0f; ColorMask[4] = ColorMask[6] = 1.0f / 31.0f;
@ -1200,7 +1175,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
cbufid = 26; cbufid = 26;
break; break;
case 5: // RGB5A3 case EFBCopyFormat::RGB5A3: // RGB5A3
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 8.0f; ColorMask[0] = ColorMask[1] = ColorMask[2] = 255.0f / 8.0f;
ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f; ColorMask[4] = ColorMask[5] = ColorMask[6] = 1.0f / 31.0f;
@ -1215,7 +1190,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
cbufid = 28; cbufid = 28;
} }
break; break;
case 6: // RGBA8 case EFBCopyFormat::RGBA8: // RGBA8
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 29; cbufid = 29;
@ -1228,7 +1203,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
break; break;
default: default:
ERROR_LOG(VIDEO, "Unknown copy color format: 0x%x", dstFormat); ERROR_LOG(VIDEO, "Unknown copy color format: 0x%X", static_cast<int>(dstFormat));
colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f; colmat[0] = colmat[5] = colmat[10] = colmat[15] = 1.0f;
cbufid = 31; cbufid = 31;
break; break;
@ -1267,7 +1242,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
} }
// Get the base (in memory) format of this efb copy. // Get the base (in memory) format of this efb copy.
int baseFormat = TexDecoder_GetEfbCopyBaseFormat(dstFormat); TextureFormat baseFormat = TexDecoder_GetEFBCopyBaseFormat(dstFormat);
u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat); u32 blockH = TexDecoder_GetBlockHeightInTexels(baseFormat);
const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat); const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat);
@ -1280,7 +1255,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
const u32 num_blocks_x = actualWidth / blockW; const u32 num_blocks_x = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one // RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = baseFormat == GX_TF_RGBA8 ? 64 : 32; const u32 bytes_per_block = baseFormat == TextureFormat::RGBA8 ? 64 : 32;
const u32 bytes_per_row = num_blocks_x * bytes_per_block; const u32 bytes_per_row = num_blocks_x * bytes_per_block;
const u32 covered_range = num_blocks_y * dstStride; const u32 covered_range = num_blocks_y * dstStride;
@ -1290,9 +1265,8 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFo
if (copy_to_ram) if (copy_to_ram)
{ {
EFBCopyFormat format(srcFormat, static_cast<TextureFormat>(dstFormat)); EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, is_depth_copy, srcRect, CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf);
scaleByHalf);
} }
else else
{ {
@ -1510,7 +1484,7 @@ TextureCacheBase::InvalidateTexture(TexAddrCache::iterator iter)
u32 TextureCacheBase::TCacheEntry::BytesPerRow() const u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
{ {
const u32 blockW = TexDecoder_GetBlockWidthInTexels(format); const u32 blockW = TexDecoder_GetBlockWidthInTexels(format.texfmt);
// Round up source height to multiple of block size // Round up source height to multiple of block size
const u32 actualWidth = Common::AlignUp(native_width, blockW); const u32 actualWidth = Common::AlignUp(native_width, blockW);
@ -1518,14 +1492,14 @@ u32 TextureCacheBase::TCacheEntry::BytesPerRow() const
const u32 numBlocksX = actualWidth / blockW; const u32 numBlocksX = actualWidth / blockW;
// RGBA takes two cache lines per block; all others take one // RGBA takes two cache lines per block; all others take one
const u32 bytes_per_block = format == GX_TF_RGBA8 ? 64 : 32; const u32 bytes_per_block = format == TextureFormat::RGBA8 ? 64 : 32;
return numBlocksX * bytes_per_block; return numBlocksX * bytes_per_block;
} }
u32 TextureCacheBase::TCacheEntry::NumBlocksY() const u32 TextureCacheBase::TCacheEntry::NumBlocksY() const
{ {
u32 blockH = TexDecoder_GetBlockHeightInTexels(format); u32 blockH = TexDecoder_GetBlockHeightInTexels(format.texfmt);
// Round up source height to multiple of block size // Round up source height to multiple of block size
u32 actualHeight = Common::AlignUp(native_height, blockH); u32 actualHeight = Common::AlignUp(native_height, blockH);

View File

@ -21,6 +21,47 @@
struct VideoConfig; struct VideoConfig;
struct TextureAndTLUTFormat
{
TextureAndTLUTFormat(TextureFormat texfmt_ = TextureFormat::I4,
TLUTFormat tlutfmt_ = TLUTFormat::IA8)
: texfmt(texfmt_), tlutfmt(tlutfmt_)
{
}
bool operator==(const TextureAndTLUTFormat& other) const
{
if (IsColorIndexed(texfmt))
return texfmt == other.texfmt && tlutfmt == other.tlutfmt;
return texfmt == other.texfmt;
}
bool operator!=(const TextureAndTLUTFormat& other) const { return !operator==(other); }
TextureFormat texfmt;
TLUTFormat tlutfmt;
};
struct EFBCopyParams
{
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
bool yuv_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
{
}
bool operator<(const EFBCopyParams& rhs) const
{
return std::tie(efb_format, copy_format, depth, yuv) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
}
PEControl::PixelFormat efb_format;
EFBCopyFormat copy_format;
bool depth;
bool yuv;
};
class TextureCacheBase class TextureCacheBase
{ {
private: private:
@ -34,8 +75,8 @@ public:
u32 addr; u32 addr;
u32 size_in_bytes; u32 size_in_bytes;
u64 base_hash; u64 base_hash;
u64 hash; // for paletted textures, hash = base_hash ^ palette_hash u64 hash; // for paletted textures, hash = base_hash ^ palette_hash
u32 format; // bits 0-3 will contain the in-memory format. TextureAndTLUTFormat format;
u32 memory_stride; u32 memory_stride;
bool is_efb_copy; bool is_efb_copy;
bool is_custom_tex; bool is_custom_tex;
@ -62,7 +103,7 @@ public:
~TCacheEntry(); ~TCacheEntry();
void SetGeneralParameters(u32 _addr, u32 _size, u32 _format) void SetGeneralParameters(u32 _addr, u32 _size, TextureAndTLUTFormat _format)
{ {
addr = _addr; addr = _addr;
size_in_bytes = _size; size_in_bytes = _size;
@ -119,9 +160,9 @@ public:
void Invalidate(); void Invalidate();
virtual void CopyEFB(u8* dst, const EFBCopyFormat& format, u32 native_width, u32 bytes_per_row, virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row,
u32 num_blocks_y, u32 memory_stride, bool is_depth_copy, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect,
const EFBRectangle& src_rect, bool scale_by_half) = 0; bool scale_by_half) = 0;
virtual bool CompileShaders() = 0; virtual bool CompileShaders() = 0;
virtual void DeleteShaders() = 0; virtual void DeleteShaders() = 0;
@ -130,15 +171,15 @@ public:
static void InvalidateAllBindPoints() { valid_bind_points.reset(); } static void InvalidateAllBindPoints() { valid_bind_points.reset(); }
static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); } static bool IsValidBindPoint(u32 i) { return valid_bind_points.test(i); }
void BindTextures(); void BindTextures();
void CopyRenderTargetToTexture(u32 dstAddr, unsigned int dstFormat, u32 dstStride, void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride,
bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity, bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity,
bool scaleByHalf); bool scaleByHalf);
virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, void* palette, virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette,
TlutFormat format) = 0; TLUTFormat format) = 0;
// Returns true if the texture data and palette formats are supported by the GPU decoder. // Returns true if the texture data and palette formats are supported by the GPU decoder.
virtual bool SupportsGPUTextureDecode(TextureFormat format, TlutFormat palette_format) virtual bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format)
{ {
return false; return false;
} }
@ -150,7 +191,7 @@ public:
virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data, virtual void DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u8* data,
size_t data_size, TextureFormat format, u32 width, u32 height, size_t data_size, TextureFormat format, u32 width, u32 height,
u32 aligned_width, u32 aligned_height, u32 row_stride, u32 aligned_width, u32 aligned_height, u32 row_stride,
const u8* palette, TlutFormat palette_format) const u8* palette, TLUTFormat palette_format)
{ {
} }
@ -177,10 +218,11 @@ private:
void SetBackupConfig(const VideoConfig& config); void SetBackupConfig(const VideoConfig& config);
TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, u32 tlutfmt); TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt);
void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height);
TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, u32 tlutfmt); TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette,
TLUTFormat tlutfmt);
void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level); void DumpTexture(TCacheEntry* entry, std::string basename, unsigned int level);
void CheckTempSize(size_t required_size); void CheckTempSize(size_t required_size);

View File

@ -13,6 +13,7 @@
#include "Common/MathUtil.h" #include "Common/MathUtil.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "VideoCommon/RenderBase.h" #include "VideoCommon/RenderBase.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/TextureConversionShader.h"
#include "VideoCommon/VideoCommon.h" #include "VideoCommon/VideoCommon.h"
@ -23,64 +24,40 @@ static bool IntensityConstantAdded = false;
namespace TextureConversionShader namespace TextureConversionShader
{ {
u16 GetEncodedSampleCount(u32 format) u16 GetEncodedSampleCount(EFBCopyFormat format)
{ {
switch (format) switch (format)
{ {
case GX_TF_I4: case EFBCopyFormat::R4:
return 8; return 8;
case GX_TF_I8: case EFBCopyFormat::RA4:
return 4; return 4;
case GX_TF_IA4: case EFBCopyFormat::RA8:
return 4;
case GX_TF_IA8:
return 2; return 2;
case GX_TF_RGB565: case EFBCopyFormat::RGB565:
return 2; return 2;
case GX_TF_RGB5A3: case EFBCopyFormat::RGB5A3:
return 2; return 2;
case GX_TF_RGBA8: case EFBCopyFormat::RGBA8:
return 1; return 1;
case GX_CTF_R4: case EFBCopyFormat::A8:
return 8; case EFBCopyFormat::R8_0x1:
case GX_CTF_RA4: case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 4; return 4;
case GX_CTF_RA8: case EFBCopyFormat::RG8:
return 2; case EFBCopyFormat::GB8:
case GX_CTF_A8:
return 4;
case GX_CTF_R8:
return 4;
case GX_CTF_G8:
return 4;
case GX_CTF_B8:
return 4;
case GX_CTF_RG8:
return 2;
case GX_CTF_GB8:
return 2;
case GX_TF_Z8:
return 4;
case GX_TF_Z16:
return 2;
case GX_TF_Z24X8:
return 1;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8M:
return 4;
case GX_CTF_Z8L:
return 4;
case GX_CTF_Z16L:
return 2; return 2;
default: default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEncodedSampleCount)", static_cast<int>(format));
return 1; return 1;
} }
} }
// block dimensions : widthStride, heightStride // block dimensions : widthStride, heightStride
// texture dims : width, height, x offset, y offset // texture dims : width, height, x offset, y offset
static void WriteSwizzler(char*& p, u32 format, APIType ApiType) static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType)
{ {
// left, top, of source rectangle within source texture // left, top, of source rectangle within source texture
// width of the destination rectangle, scale_factor (1 or 2) // width of the destination rectangle, scale_factor (1 or 2)
@ -108,8 +85,8 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType)
WRITE(p, " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"); WRITE(p, " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n");
WRITE(p, "}\n"); WRITE(p, "}\n");
int blkW = TexDecoder_GetBlockWidthInTexels(format); int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
int blkH = TexDecoder_GetBlockHeightInTexels(format); int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
int samples = GetEncodedSampleCount(format); int samples = GetEncodedSampleCount(format);
if (ApiType == APIType::OpenGL) if (ApiType == APIType::OpenGL)
@ -180,13 +157,13 @@ static void WriteSwizzler(char*& p, u32 format, APIType ApiType)
} }
static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset,
APIType ApiType, const EFBCopyFormat& format, bool depth) APIType ApiType, const EFBCopyParams& params)
{ {
WRITE(p, " %s = ", dest); WRITE(p, " %s = ", dest);
if (!depth) if (!params.depth)
{ {
switch (format.efb_format) switch (params.efb_format)
{ {
case PEControl::RGB8_Z24: case PEControl::RGB8_Z24:
WRITE(p, "RGBA8ToRGB8("); WRITE(p, "RGBA8ToRGB8(");
@ -246,21 +223,21 @@ static void WriteEncoderEnd(char*& p)
IntensityConstantAdded = false; IntensityConstantAdded = false;
} }
static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_I8, ApiType); WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
WRITE(p, " float3 texSample;\n"); WRITE(p, " float3 texSample;\n");
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.b"); WriteColorToIntensity(p, "texSample", "ocol0.b");
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 1, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.g"); WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 2, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.r"); WriteColorToIntensity(p, "texSample", "ocol0.r");
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 3, ApiType, params);
WriteColorToIntensity(p, "texSample", "ocol0.a"); WriteColorToIntensity(p, "texSample", "ocol0.a");
WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity WRITE(p, " ocol0.rgba += IntensityConst.aaaa;\n"); // see WriteColorToIntensity
@ -268,35 +245,35 @@ static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& forma
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_I4, ApiType); WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
WRITE(p, " float3 texSample;\n"); WRITE(p, " float3 texSample;\n");
WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n"); WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgb", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.b"); WriteColorToIntensity(p, "texSample", "color0.b");
WriteSampleColor(p, "rgb", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 1, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.b"); WriteColorToIntensity(p, "texSample", "color1.b");
WriteSampleColor(p, "rgb", "texSample", 2, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 2, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.g"); WriteColorToIntensity(p, "texSample", "color0.g");
WriteSampleColor(p, "rgb", "texSample", 3, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 3, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.g"); WriteColorToIntensity(p, "texSample", "color1.g");
WriteSampleColor(p, "rgb", "texSample", 4, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 4, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.r"); WriteColorToIntensity(p, "texSample", "color0.r");
WriteSampleColor(p, "rgb", "texSample", 5, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 5, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.r"); WriteColorToIntensity(p, "texSample", "color1.r");
WriteSampleColor(p, "rgb", "texSample", 6, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 6, ApiType, params);
WriteColorToIntensity(p, "texSample", "color0.a"); WriteColorToIntensity(p, "texSample", "color0.a");
WriteSampleColor(p, "rgb", "texSample", 7, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample", 7, ApiType, params);
WriteColorToIntensity(p, "texSample", "color1.a"); WriteColorToIntensity(p, "texSample", "color1.a");
WRITE(p, " color0.rgba += IntensityConst.aaaa;\n"); WRITE(p, " color0.rgba += IntensityConst.aaaa;\n");
@ -309,16 +286,16 @@ static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& forma
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_IA8, ApiType); WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 texSample;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " ocol0.b = texSample.a;\n"); WRITE(p, " ocol0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.g"); WriteColorToIntensity(p, "texSample", "ocol0.g");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " ocol0.r = texSample.a;\n"); WRITE(p, " ocol0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "ocol0.a"); WriteColorToIntensity(p, "texSample", "ocol0.a");
@ -327,26 +304,26 @@ static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_IA4, ApiType); WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 texSample;\n");
WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n"); WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.a;\n"); WRITE(p, " color0.b = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.b"); WriteColorToIntensity(p, "texSample", "color1.b");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " color0.g = texSample.a;\n"); WRITE(p, " color0.g = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.g"); WriteColorToIntensity(p, "texSample", "color1.g");
WriteSampleColor(p, "rgba", "texSample", 2, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 2, ApiType, params);
WRITE(p, " color0.r = texSample.a;\n"); WRITE(p, " color0.r = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.r"); WriteColorToIntensity(p, "texSample", "color1.r");
WriteSampleColor(p, "rgba", "texSample", 3, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 3, ApiType, params);
WRITE(p, " color0.a = texSample.a;\n"); WRITE(p, " color0.a = texSample.a;\n");
WriteColorToIntensity(p, "texSample", "color1.a"); WriteColorToIntensity(p, "texSample", "color1.a");
@ -359,14 +336,14 @@ static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_RGB565, ApiType); WriteSwizzler(p, EFBCopyFormat::RGB565, ApiType);
WRITE(p, " float3 texSample0;\n"); WRITE(p, " float3 texSample0;\n");
WRITE(p, " float3 texSample1;\n"); WRITE(p, " float3 texSample1;\n");
WriteSampleColor(p, "rgb", "texSample0", 0, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample0", 0, ApiType, params);
WriteSampleColor(p, "rgb", "texSample1", 1, ApiType, format, false); WriteSampleColor(p, "rgb", "texSample1", 1, ApiType, params);
WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n"); WRITE(p, " float2 texRs = float2(texSample0.r, texSample1.r);\n");
WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n"); WRITE(p, " float2 texGs = float2(texSample0.g, texSample1.g);\n");
WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n"); WRITE(p, " float2 texBs = float2(texSample0.b, texSample1.b);\n");
@ -384,16 +361,16 @@ static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_RGB5A3, ApiType); WriteSwizzler(p, EFBCopyFormat::RGB5A3, ApiType);
WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 texSample;\n");
WRITE(p, " float color0;\n"); WRITE(p, " float color0;\n");
WRITE(p, " float gUpper;\n"); WRITE(p, " float gUpper;\n");
WRITE(p, " float gLower;\n"); WRITE(p, " float gLower;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits // 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
WRITE(p, "if(texSample.a > 0.878f) {\n"); WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -419,7 +396,7 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WRITE(p, "}\n"); WRITE(p, "}\n");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, "if(texSample.a > 0.878f) {\n"); WRITE(p, "if(texSample.a > 0.878f) {\n");
@ -448,21 +425,21 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyFormat& f
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_RGBA8, ApiType); WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 texSample;\n");
WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n"); WRITE(p, " float4 color1;\n");
WriteSampleColor(p, "rgba", "texSample", 0, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.a;\n"); WRITE(p, " color0.b = texSample.a;\n");
WRITE(p, " color0.g = texSample.r;\n"); WRITE(p, " color0.g = texSample.r;\n");
WRITE(p, " color1.b = texSample.g;\n"); WRITE(p, " color1.b = texSample.g;\n");
WRITE(p, " color1.g = texSample.b;\n"); WRITE(p, " color1.g = texSample.b;\n");
WriteSampleColor(p, "rgba", "texSample", 1, ApiType, format, false); WriteSampleColor(p, "rgba", "texSample", 1, ApiType, params);
WRITE(p, " color0.r = texSample.a;\n"); WRITE(p, " color0.r = texSample.a;\n");
WRITE(p, " color0.a = texSample.r;\n"); WRITE(p, " color0.a = texSample.r;\n");
WRITE(p, " color1.r = texSample.g;\n"); WRITE(p, " color1.r = texSample.g;\n");
@ -473,21 +450,20 @@ static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyFormat& fo
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyFormat& format, static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
bool depth)
{ {
WriteSwizzler(p, GX_CTF_R4, ApiType); WriteSwizzler(p, EFBCopyFormat::R4, ApiType);
WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n"); WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "color0.b", 0, ApiType, format, depth); WriteSampleColor(p, comp, "color0.b", 0, ApiType, params);
WriteSampleColor(p, comp, "color1.b", 1, ApiType, format, depth); WriteSampleColor(p, comp, "color1.b", 1, ApiType, params);
WriteSampleColor(p, comp, "color0.g", 2, ApiType, format, depth); WriteSampleColor(p, comp, "color0.g", 2, ApiType, params);
WriteSampleColor(p, comp, "color1.g", 3, ApiType, format, depth); WriteSampleColor(p, comp, "color1.g", 3, ApiType, params);
WriteSampleColor(p, comp, "color0.r", 4, ApiType, format, depth); WriteSampleColor(p, comp, "color0.r", 4, ApiType, params);
WriteSampleColor(p, comp, "color1.r", 5, ApiType, format, depth); WriteSampleColor(p, comp, "color1.r", 5, ApiType, params);
WriteSampleColor(p, comp, "color0.a", 6, ApiType, format, depth); WriteSampleColor(p, comp, "color0.a", 6, ApiType, params);
WriteSampleColor(p, comp, "color1.a", 7, ApiType, format, depth); WriteSampleColor(p, comp, "color1.a", 7, ApiType, params);
WriteToBitDepth(p, 4, "color0", "color0"); WriteToBitDepth(p, 4, "color0", "color0");
WriteToBitDepth(p, 4, "color1", "color1"); WriteToBitDepth(p, 4, "color1", "color1");
@ -496,40 +472,39 @@ static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EF
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyFormat& format, static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params)
bool depth)
{ {
WriteSwizzler(p, GX_CTF_R8, ApiType); WriteSwizzler(p, EFBCopyFormat::R8, ApiType);
WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, format, depth); WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, params);
WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, format, depth); WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, params);
WriteSampleColor(p, comp, "ocol0.r", 2, ApiType, format, depth); WriteSampleColor(p, comp, "ocol0.r", 2, ApiType, params);
WriteSampleColor(p, comp, "ocol0.a", 3, ApiType, format, depth); WriteSampleColor(p, comp, "ocol0.a", 3, ApiType, params);
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType, static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
const EFBCopyFormat& format) const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_CTF_RA4, ApiType); WriteSwizzler(p, EFBCopyFormat::RA4, ApiType);
WRITE(p, " float2 texSample;\n"); WRITE(p, " float2 texSample;\n");
WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color0;\n");
WRITE(p, " float4 color1;\n"); WRITE(p, " float4 color1;\n");
WriteSampleColor(p, comp, "texSample", 0, ApiType, format, false); WriteSampleColor(p, comp, "texSample", 0, ApiType, params);
WRITE(p, " color0.b = texSample.x;\n"); WRITE(p, " color0.b = texSample.x;\n");
WRITE(p, " color1.b = texSample.y;\n"); WRITE(p, " color1.b = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 1, ApiType, format, false); WriteSampleColor(p, comp, "texSample", 1, ApiType, params);
WRITE(p, " color0.g = texSample.x;\n"); WRITE(p, " color0.g = texSample.x;\n");
WRITE(p, " color1.g = texSample.y;\n"); WRITE(p, " color1.g = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 2, ApiType, format, false); WriteSampleColor(p, comp, "texSample", 2, ApiType, params);
WRITE(p, " color0.r = texSample.x;\n"); WRITE(p, " color0.r = texSample.x;\n");
WRITE(p, " color1.r = texSample.y;\n"); WRITE(p, " color1.r = texSample.y;\n");
WriteSampleColor(p, comp, "texSample", 3, ApiType, format, false); WriteSampleColor(p, comp, "texSample", 3, ApiType, params);
WRITE(p, " color0.a = texSample.x;\n"); WRITE(p, " color0.a = texSample.x;\n");
WRITE(p, " color1.a = texSample.y;\n"); WRITE(p, " color1.a = texSample.y;\n");
@ -541,48 +516,48 @@ static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType,
} }
static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType, static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType,
const EFBCopyFormat& format) const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_CTF_RA8, ApiType); WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, format, false); WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, params);
WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, format, false); WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, params);
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType, static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType,
const EFBCopyFormat& format) const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_CTF_Z8M, ApiType); WriteSwizzler(p, EFBCopyFormat::G8, ApiType);
WRITE(p, " float depth;\n"); WRITE(p, " float depth;\n");
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true); WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier); WRITE(p, "ocol0.b = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true); WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier); WRITE(p, "ocol0.g = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 2, ApiType, format, true); WriteSampleColor(p, "r", "depth", 2, ApiType, params);
WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier); WRITE(p, "ocol0.r = frac(depth * %s);\n", multiplier);
WriteSampleColor(p, "r", "depth", 3, ApiType, format, true); WriteSampleColor(p, "r", "depth", 3, ApiType, params);
WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier); WRITE(p, "ocol0.a = frac(depth * %s);\n", multiplier);
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_Z16, ApiType); WriteSwizzler(p, EFBCopyFormat::RA8, ApiType);
WRITE(p, " float depth;\n"); WRITE(p, " float depth;\n");
WRITE(p, " float3 expanded;\n"); WRITE(p, " float3 expanded;\n");
// byte order is reversed // byte order is reversed
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true); WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -592,7 +567,7 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WRITE(p, " ocol0.b = expanded.g / 255.0;\n"); WRITE(p, " ocol0.b = expanded.g / 255.0;\n");
WRITE(p, " ocol0.g = expanded.r / 255.0;\n"); WRITE(p, " ocol0.g = expanded.r / 255.0;\n");
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true); WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -605,16 +580,16 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_CTF_Z16L, ApiType); WriteSwizzler(p, EFBCopyFormat::GB8, ApiType);
WRITE(p, " float depth;\n"); WRITE(p, " float depth;\n");
WRITE(p, " float3 expanded;\n"); WRITE(p, " float3 expanded;\n");
// byte order is reversed // byte order is reversed
WriteSampleColor(p, "r", "depth", 0, ApiType, format, true); WriteSampleColor(p, "r", "depth", 0, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -626,7 +601,7 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& for
WRITE(p, " ocol0.b = expanded.b / 255.0;\n"); WRITE(p, " ocol0.b = expanded.b / 255.0;\n");
WRITE(p, " ocol0.g = expanded.g / 255.0;\n"); WRITE(p, " ocol0.g = expanded.g / 255.0;\n");
WriteSampleColor(p, "r", "depth", 1, ApiType, format, true); WriteSampleColor(p, "r", "depth", 1, ApiType, params);
WRITE(p, " depth *= 16777216.0;\n"); WRITE(p, " depth *= 16777216.0;\n");
WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n"); WRITE(p, " expanded.r = floor(depth / (256.0 * 256.0));\n");
@ -641,17 +616,17 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyFormat& for
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyFormat& format) static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& params)
{ {
WriteSwizzler(p, GX_TF_Z24X8, ApiType); WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType);
WRITE(p, " float depth0;\n"); WRITE(p, " float depth0;\n");
WRITE(p, " float depth1;\n"); WRITE(p, " float depth1;\n");
WRITE(p, " float3 expanded0;\n"); WRITE(p, " float3 expanded0;\n");
WRITE(p, " float3 expanded1;\n"); WRITE(p, " float3 expanded1;\n");
WriteSampleColor(p, "r", "depth0", 0, ApiType, format, true); WriteSampleColor(p, "r", "depth0", 0, ApiType, params);
WriteSampleColor(p, "r", "depth1", 1, ApiType, format, true); WriteSampleColor(p, "r", "depth1", 1, ApiType, params);
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
{ {
@ -681,87 +656,81 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyFormat& form
WriteEncoderEnd(p); WriteEncoderEnd(p);
} }
const char* GenerateEncodingShader(const EFBCopyFormat& format, APIType api_type) const char* GenerateEncodingShader(const EFBCopyParams& params, APIType api_type)
{ {
text[sizeof(text) - 1] = 0x7C; // canary text[sizeof(text) - 1] = 0x7C; // canary
char* p = text; char* p = text;
switch (format.copy_format) switch (params.copy_format)
{ {
case GX_TF_I4: case EFBCopyFormat::R4:
WriteI4Encoder(p, api_type, format); if (params.yuv)
WriteI4Encoder(p, api_type, params);
else
WriteC4Encoder(p, "r", api_type, params);
break; break;
case GX_TF_I8: case EFBCopyFormat::RA4:
WriteI8Encoder(p, api_type, format); if (params.yuv)
WriteIA4Encoder(p, api_type, params);
else
WriteCC4Encoder(p, "ar", api_type, params);
break; break;
case GX_TF_IA4: case EFBCopyFormat::RA8:
WriteIA4Encoder(p, api_type, format); if (params.yuv)
WriteIA8Encoder(p, api_type, params);
else
WriteCC8Encoder(p, "ar", api_type, params);
break; break;
case GX_TF_IA8: case EFBCopyFormat::RGB565:
WriteIA8Encoder(p, api_type, format); WriteRGB565Encoder(p, api_type, params);
break; break;
case GX_TF_RGB565: case EFBCopyFormat::RGB5A3:
WriteRGB565Encoder(p, api_type, format); WriteRGB5A3Encoder(p, api_type, params);
break; break;
case GX_TF_RGB5A3: case EFBCopyFormat::RGBA8:
WriteRGB5A3Encoder(p, api_type, format); if (params.depth)
WriteZ24Encoder(p, api_type, params);
else
WriteRGBA8Encoder(p, api_type, params);
break; break;
case GX_TF_RGBA8: case EFBCopyFormat::A8:
WriteRGBA8Encoder(p, api_type, format); WriteC8Encoder(p, "a", api_type, params);
break; break;
case GX_CTF_R4: case EFBCopyFormat::R8_0x1:
WriteC4Encoder(p, "r", api_type, format, false); case EFBCopyFormat::R8:
if (params.yuv)
WriteI8Encoder(p, api_type, params);
else
WriteC8Encoder(p, "r", api_type, params);
break; break;
case GX_CTF_RA4: case EFBCopyFormat::G8:
WriteCC4Encoder(p, "ar", api_type, format); if (params.depth)
WriteZ8Encoder(p, "256.0", api_type, params); // Z8M
else
WriteC8Encoder(p, "g", api_type, params);
break; break;
case GX_CTF_RA8: case EFBCopyFormat::B8:
WriteCC8Encoder(p, "ar", api_type, format); if (params.depth)
WriteZ8Encoder(p, "65536.0", api_type, params); // Z8L
else
WriteC8Encoder(p, "b", api_type, params);
break; break;
case GX_CTF_A8: case EFBCopyFormat::RG8:
WriteC8Encoder(p, "a", api_type, format, false); if (params.depth)
WriteZ16Encoder(p, api_type, params); // Z16H
else
WriteCC8Encoder(p, "rg", api_type, params);
break; break;
case GX_CTF_R8: case EFBCopyFormat::GB8:
WriteC8Encoder(p, "r", api_type, format, false); if (params.depth)
break; WriteZ16LEncoder(p, api_type, params); // Z16L
case GX_CTF_G8: else
WriteC8Encoder(p, "g", api_type, format, false); WriteCC8Encoder(p, "gb", api_type, params);
break;
case GX_CTF_B8:
WriteC8Encoder(p, "b", api_type, format, false);
break;
case GX_CTF_RG8:
WriteCC8Encoder(p, "rg", api_type, format);
break;
case GX_CTF_GB8:
WriteCC8Encoder(p, "gb", api_type, format);
break;
case GX_CTF_Z8H:
case GX_TF_Z8:
WriteC8Encoder(p, "r", api_type, format, true);
break;
case GX_CTF_Z16R:
case GX_TF_Z16:
WriteZ16Encoder(p, api_type, format);
break;
case GX_TF_Z24X8:
WriteZ24Encoder(p, api_type, format);
break;
case GX_CTF_Z4:
WriteC4Encoder(p, "r", api_type, format, true);
break;
case GX_CTF_Z8M:
WriteZ8Encoder(p, "256.0", api_type, format);
break;
case GX_CTF_Z8L:
WriteZ8Encoder(p, "65536.0", api_type, format);
break;
case GX_CTF_Z16L:
WriteZ16LEncoder(p, api_type, format);
break; break;
default: default:
PanicAlert("Unknown texture copy format: 0x%x\n", static_cast<u32>(format.copy_format)); PanicAlert("Invalid EFB Copy Format (0x%X)! (GenerateEncodingShader)",
static_cast<int>(params.copy_format));
break; break;
} }
@ -895,7 +864,7 @@ vec4 GetPaletteColorNormalized(uint index)
)"; )";
static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
{GX_TF_I4, {TextureFormat::I4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -930,7 +899,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
} }
)"}}, )"}},
{GX_TF_IA4, {TextureFormat::IA4,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -950,7 +919,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_I8, {TextureFormat::I8,
{BUFFER_FORMAT_R8_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R8_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -968,7 +937,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_IA8, {TextureFormat::IA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -987,7 +956,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_RGB565, {TextureFormat::RGB565,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1011,7 +980,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
} }
)"}}, )"}},
{GX_TF_RGB5A3, {TextureFormat::RGB5A3,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1045,7 +1014,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
} }
)"}}, )"}},
{GX_TF_RGBA8, {TextureFormat::RGBA8,
{BUFFER_FORMAT_R16_UINT, 0, 8, 8, false, {BUFFER_FORMAT_R16_UINT, 0, 8, 8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1081,7 +1050,7 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_CMPR, {TextureFormat::CMPR,
{BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true, {BUFFER_FORMAT_R32G32_UINT, 0, 64, 1, true,
R"( R"(
// In the compute version of this decoder, we flatten the blocks to a one-dimension array. // In the compute version of this decoder, we flatten the blocks to a one-dimension array.
@ -1199,8 +1168,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color); imageStore(output_image, ivec3(ivec2(uvec2(global_x, global_y)), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_C4, {TextureFormat::C4,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C4)), 8, 8, false, {BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C4)), 8, 8,
false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1227,8 +1197,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
} }
)"}}, )"}},
{GX_TF_C8, {TextureFormat::C8,
{BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C8)), 8, 8, false, {BUFFER_FORMAT_R8_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C8)), 8, 8,
false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1243,8 +1214,9 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
imageStore(output_image, ivec3(ivec2(coords), 0), norm_color); imageStore(output_image, ivec3(ivec2(coords), 0), norm_color);
} }
)"}}, )"}},
{GX_TF_C14X2, {TextureFormat::C14X2,
{BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(GX_TF_C14X2)), 8, 8, false, {BUFFER_FORMAT_R16_UINT, static_cast<u32>(TexDecoder_GetPaletteSize(TextureFormat::C14X2)), 8,
8, false,
R"( R"(
layout(local_size_x = 8, local_size_y = 8) in; layout(local_size_x = 8, local_size_y = 8) in;
@ -1287,7 +1259,7 @@ std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width,
(height + (info->group_size_y - 1)) / info->group_size_y}; (height + (info->group_size_y - 1)) / info->group_size_y};
} }
std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_format, std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type) APIType api_type)
{ {
const DecodingShaderInfo* info = GetDecodingShaderInfo(format); const DecodingShaderInfo* info = GetDecodingShaderInfo(format);
@ -1297,13 +1269,13 @@ std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_form
std::stringstream ss; std::stringstream ss;
switch (palette_format) switch (palette_format)
{ {
case GX_TL_IA8: case TLUTFormat::IA8:
ss << "#define PALETTE_FORMAT_IA8 1\n"; ss << "#define PALETTE_FORMAT_IA8 1\n";
break; break;
case GX_TL_RGB565: case TLUTFormat::RGB565:
ss << "#define PALETTE_FORMAT_RGB565 1\n"; ss << "#define PALETTE_FORMAT_RGB565 1\n";
break; break;
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
ss << "#define PALETTE_FORMAT_RGB5A3 1\n"; ss << "#define PALETTE_FORMAT_RGB5A3 1\n";
break; break;
} }

View File

@ -8,15 +8,18 @@
#include <utility> #include <utility>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "VideoCommon/TextureDecoder.h"
enum class APIType; enum class APIType;
enum class TextureFormat;
enum class EFBCopyFormat;
enum class TLUTFormat;
struct EFBCopyParams;
namespace TextureConversionShader namespace TextureConversionShader
{ {
u16 GetEncodedSampleCount(u32 format); u16 GetEncodedSampleCount(EFBCopyFormat format);
const char* GenerateEncodingShader(const EFBCopyFormat& format, APIType ApiType); const char* GenerateEncodingShader(const EFBCopyParams& params, APIType ApiType);
// View format of the input data to the texture decoding shader. // View format of the input data to the texture decoding shader.
enum BufferFormat enum BufferFormat
@ -51,7 +54,7 @@ u32 GetBytesPerBufferElement(BufferFormat buffer_format);
std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height); std::pair<u32, u32> GetDispatchCount(const DecodingShaderInfo* info, u32 width, u32 height);
// Returns the GLSL string containing the texture decoding shader for the specified format. // Returns the GLSL string containing the texture decoding shader for the specified format.
std::string GenerateDecodingShader(TextureFormat format, TlutFormat palette_format, std::string GenerateDecodingShader(TextureFormat format, TLUTFormat palette_format,
APIType api_type); APIType api_type);
} // namespace TextureConversionShader } // namespace TextureConversionShader

View File

@ -14,94 +14,101 @@ enum
}; };
alignas(16) extern u8 texMem[TMEM_SIZE]; alignas(16) extern u8 texMem[TMEM_SIZE];
enum TextureFormat enum class TextureFormat
{ {
// These are the texture formats that can be read by the texture mapper. // These values represent texture format in GX registers.
GX_TF_I4 = 0x0, I4 = 0x0,
GX_TF_I8 = 0x1, I8 = 0x1,
GX_TF_IA4 = 0x2, IA4 = 0x2,
GX_TF_IA8 = 0x3, IA8 = 0x3,
GX_TF_RGB565 = 0x4, RGB565 = 0x4,
GX_TF_RGB5A3 = 0x5, RGB5A3 = 0x5,
GX_TF_RGBA8 = 0x6, RGBA8 = 0x6,
GX_TF_C4 = 0x8, C4 = 0x8,
GX_TF_C8 = 0x9, C8 = 0x9,
GX_TF_C14X2 = 0xA, C14X2 = 0xA,
GX_TF_CMPR = 0xE, CMPR = 0xE,
_GX_TF_ZTF = 0x10, // flag for Z texture formats (used internally by dolphin)
// Depth texture formats (which directly map to the equivalent colour format above.)
GX_TF_Z8 = 0x1 | _GX_TF_ZTF,
GX_TF_Z16 = 0x3 | _GX_TF_ZTF,
GX_TF_Z24X8 = 0x6 | _GX_TF_ZTF,
_GX_TF_CTF = 0x20, // flag for copy-texture-format only (used internally by dolphin)
// These are extra formats that can be used when copying from efb,
// they use one of texel formats from above, but pack diffrent data into them.
GX_CTF_R4 = 0x0 | _GX_TF_CTF,
GX_CTF_RA4 = 0x2 | _GX_TF_CTF,
GX_CTF_RA8 = 0x3 | _GX_TF_CTF,
GX_CTF_YUVA8 = 0x6 | _GX_TF_CTF, // YUV 4:4:4 - Dolphin doesn't implement this format as no
// commercial games use it
GX_CTF_A8 = 0x7 | _GX_TF_CTF,
GX_CTF_R8 = 0x8 | _GX_TF_CTF,
GX_CTF_G8 = 0x9 | _GX_TF_CTF,
GX_CTF_B8 = 0xA | _GX_TF_CTF,
GX_CTF_RG8 = 0xB | _GX_TF_CTF,
GX_CTF_GB8 = 0xC | _GX_TF_CTF,
// extra depth texture formats that can be used for efb copies.
GX_CTF_Z4 = 0x0 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8H = 0x8 | _GX_TF_ZTF | _GX_TF_CTF, // This produces an identical result to to GX_TF_Z8
GX_CTF_Z8M = 0x9 | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z8L = 0xA | _GX_TF_ZTF | _GX_TF_CTF,
GX_CTF_Z16R = 0xB | _GX_TF_ZTF | _GX_TF_CTF, // Reversed version of GX_TF_Z16
GX_CTF_Z16L = 0xC | _GX_TF_ZTF | _GX_TF_CTF,
}; };
enum TlutFormat static inline bool IsColorIndexed(TextureFormat format)
{ {
GX_TL_IA8 = 0x0, return format == TextureFormat::C4 || format == TextureFormat::C8 ||
GX_TL_RGB565 = 0x1, format == TextureFormat::C14X2;
GX_TL_RGB5A3 = 0x2, }
// The EFB Copy pipeline looks like:
//
// 1. Read EFB -> 2. Select color/depth -> 3. Downscale (optional)
// -> 4. YUV conversion (optional) -> 5. Encode Tiles -> 6. Write RAM
//
// The "Encode Tiles" stage receives RGBA8 texels from previous stages and encodes them to various
// formats. EFBCopyFormat is the tile encoder mode. Note that the tile encoder does not care about
// color vs. depth or intensity formats - it only sees RGBA8 texels.
enum class EFBCopyFormat
{
// These values represent EFB copy format in GX registers.
// Most (but not all) of these values correspond to values of TextureFormat.
R4 = 0x0, // R4, I4, Z4
// FIXME: Does 0x1 (Z8) have identical results to 0x8 (Z8H)?
// Is either or both of 0x1 and 0x8 used in games?
R8_0x1 = 0x1, // R8, I8, Z8H (?)
RA4 = 0x2, // RA4, IA4
// FIXME: Earlier versions of this file named the value 0x3 "GX_TF_Z16", which does not reflect
// the results one would expect when copying from the depth buffer with this format.
// For reference: When copying from the depth buffer, R should receive the top 8 bits of
// the Z value, and A should be either 0xFF or 0 (please investigate).
// Please test original hardware and make sure dolphin-emu implements this format
// correctly.
RA8 = 0x3, // RA8, IA8, (FIXME: Z16 too?)
RGB565 = 0x4,
RGB5A3 = 0x5,
RGBA8 = 0x6, // RGBA8, Z24
A8 = 0x7,
R8 = 0x8, // R8, I8, Z8H
G8 = 0x9, // G8, Z8M
B8 = 0xA, // B8, Z8L
RG8 = 0xB, // RG8, Z16R (Note: G and R are reversed)
GB8 = 0xC, // GB8, Z16L
}; };
struct EFBCopyFormat enum class TLUTFormat
{ {
EFBCopyFormat(u32 efb_format_, TextureFormat copy_format_) // These values represent TLUT format in GX registers.
: efb_format(efb_format_), copy_format(copy_format_) IA8 = 0x0,
{ RGB565 = 0x1,
} RGB5A3 = 0x2,
bool operator<(const EFBCopyFormat& rhs) const
{
return std::tie(efb_format, copy_format) < std::tie(rhs.efb_format, rhs.copy_format);
}
u32 efb_format;
TextureFormat copy_format;
}; };
int TexDecoder_GetTexelSizeInNibbles(int format); static inline bool IsValidTLUTFormat(TLUTFormat tlutfmt)
int TexDecoder_GetTextureSizeInBytes(int width, int height, int format); {
int TexDecoder_GetBlockWidthInTexels(u32 format); return tlutfmt == TLUTFormat::IA8 || tlutfmt == TLUTFormat::RGB565 ||
int TexDecoder_GetBlockHeightInTexels(u32 format); tlutfmt == TLUTFormat::RGB5A3;
int TexDecoder_GetPaletteSize(int fmt); }
int TexDecoder_GetEfbCopyBaseFormat(int format);
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, int texformat, const u8* tlut, int TexDecoder_GetTexelSizeInNibbles(TextureFormat format);
TlutFormat tlutfmt); int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format);
int TexDecoder_GetBlockWidthInTexels(TextureFormat format);
int TexDecoder_GetBlockHeightInTexels(TextureFormat format);
int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format);
int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format);
int TexDecoder_GetPaletteSize(TextureFormat fmt);
TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format);
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TLUTFormat tlutfmt);
void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int width, void TexDecoder_DecodeRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int width,
int height); int height);
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth, int texformat, void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth,
const u8* tlut, TlutFormat tlutfmt); TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt);
void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t, void TexDecoder_DecodeTexelRGBA8FromTmem(u8* dst, const u8* src_ar, const u8* src_gb, int s, int t,
int imageWidth); int imageWidth);
void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center); void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center);
/* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */ /* Internal method, implemented by TextureDecoder_Generic and TextureDecoder_x64. */
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat, void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TlutFormat tlutfmt); const u8* tlut, TLUTFormat tlutfmt);

View File

@ -21,227 +21,176 @@ static bool TexFmt_Overlay_Center = false;
// STATE_TO_SAVE // STATE_TO_SAVE
alignas(16) u8 texMem[TMEM_SIZE]; alignas(16) u8 texMem[TMEM_SIZE];
int TexDecoder_GetTexelSizeInNibbles(int format) int TexDecoder_GetTexelSizeInNibbles(TextureFormat format)
{ {
switch (format & 0x3f) switch (format)
{ {
case GX_TF_I4: // 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 1; return 1;
case GX_TF_I8: // 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 2; return 2;
case GX_TF_IA4: // 16-bit formats
return 2; case TextureFormat::IA8:
case GX_TF_IA8: case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4; return 4;
case GX_TF_RGB565: // 32-bit formats
return 4; case TextureFormat::RGBA8:
case GX_TF_RGB5A3:
return 4;
case GX_TF_RGBA8:
return 8; return 8;
case GX_TF_C4: // Compressed format
case TextureFormat::CMPR:
return 1; return 1;
case GX_TF_C8:
return 2;
case GX_TF_C14X2:
return 4;
case GX_TF_CMPR:
return 1;
case GX_CTF_R4:
return 1;
case GX_CTF_RA4:
return 2;
case GX_CTF_RA8:
return 4;
case GX_CTF_A8:
return 2;
case GX_CTF_R8:
return 2;
case GX_CTF_G8:
return 2;
case GX_CTF_B8:
return 2;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 2;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 8;
case GX_CTF_Z4:
return 1;
case GX_CTF_Z8H:
return 2;
case GX_CTF_Z8M:
return 2;
case GX_CTF_Z8L:
return 2;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
return 4;
default: default:
PanicAlert("Unsupported Texture Format (%08x)! (GetTexelSizeInNibbles)", format); PanicAlert("Invalid Texture Format (0x%X)! (GetTexelSizeInNibbles)", static_cast<int>(format));
return 1; return 1;
} }
} }
int TexDecoder_GetTextureSizeInBytes(int width, int height, int format) int TexDecoder_GetTextureSizeInBytes(int width, int height, TextureFormat format)
{ {
return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2; return (width * height * TexDecoder_GetTexelSizeInNibbles(format)) / 2;
} }
int TexDecoder_GetBlockWidthInTexels(u32 format) int TexDecoder_GetBlockWidthInTexels(TextureFormat format)
{ {
switch (format) switch (format)
{ {
case GX_TF_I4: // 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 8; return 8;
case GX_TF_I8: // 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 8; return 8;
case GX_TF_IA4: // 16-bit formats
return 8; case TextureFormat::IA8:
case GX_TF_IA8: case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4; return 4;
case GX_TF_RGB565: // 32-bit formats
case TextureFormat::RGBA8:
return 4; return 4;
case GX_TF_RGB5A3: // Compressed format
return 4; case TextureFormat::CMPR:
case GX_TF_RGBA8:
return 4;
case GX_TF_C4:
return 8; return 8;
case GX_TF_C8:
return 8;
case GX_TF_C14X2:
return 4;
case GX_TF_CMPR:
return 8;
case GX_CTF_R4:
return 8;
case GX_CTF_RA4:
return 8;
case GX_CTF_RA8:
return 4;
case GX_CTF_A8:
return 8;
case GX_CTF_R8:
return 8;
case GX_CTF_G8:
return 8;
case GX_CTF_B8:
return 8;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 8;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 4;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8H:
return 8;
case GX_CTF_Z8M:
return 8;
case GX_CTF_Z8L:
return 8;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
return 4;
default: default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockWidthInTexels)", format); PanicAlert("Invalid Texture Format (0x%X)! (GetBlockWidthInTexels)", static_cast<int>(format));
return 8; return 8;
} }
} }
int TexDecoder_GetBlockHeightInTexels(u32 format) int TexDecoder_GetBlockHeightInTexels(TextureFormat format)
{ {
switch (format) switch (format)
{ {
case GX_TF_I4: // 4-bit formats
case TextureFormat::I4:
case TextureFormat::C4:
return 8; return 8;
case GX_TF_I8: // 8-bit formats
case TextureFormat::I8:
case TextureFormat::IA4:
case TextureFormat::C8:
return 4; return 4;
case GX_TF_IA4: // 16-bit formats
case TextureFormat::IA8:
case TextureFormat::RGB565:
case TextureFormat::RGB5A3:
case TextureFormat::C14X2:
return 4; return 4;
case GX_TF_IA8: // 32-bit formats
case TextureFormat::RGBA8:
return 4; return 4;
case GX_TF_RGB565: // Compressed format
return 4; case TextureFormat::CMPR:
case GX_TF_RGB5A3:
return 4;
case GX_TF_RGBA8:
return 4;
case GX_TF_C4:
return 8; return 8;
case GX_TF_C8: default:
PanicAlert("Invalid Texture Format (0x%X)! (GetBlockHeightInTexels)", static_cast<int>(format));
return 4; return 4;
case GX_TF_C14X2: }
return 4; }
case GX_TF_CMPR:
int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format)
{
switch (format)
{
// 4-bit formats
case EFBCopyFormat::R4:
return 8; return 8;
case GX_CTF_R4: // 8-bit formats
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 8; return 8;
case GX_CTF_RA4: // 16-bit formats
case EFBCopyFormat::RA8:
case EFBCopyFormat::RGB565:
case EFBCopyFormat::RGB5A3:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 4; return 4;
case GX_CTF_RA8: // 32-bit formats
return 4; case EFBCopyFormat::RGBA8:
case GX_CTF_A8:
return 4;
case GX_CTF_R8:
return 4;
case GX_CTF_G8:
return 4;
case GX_CTF_B8:
return 4;
case GX_CTF_RG8:
return 4;
case GX_CTF_GB8:
return 4;
case GX_TF_Z8:
return 4;
case GX_TF_Z16:
return 4;
case GX_TF_Z24X8:
return 4;
case GX_CTF_Z4:
return 8;
case GX_CTF_Z8H:
return 4;
case GX_CTF_Z8M:
return 4;
case GX_CTF_Z8L:
return 4;
case GX_CTF_Z16R:
return 4;
case GX_CTF_Z16L:
return 4; return 4;
default: default:
PanicAlert("Unsupported Texture Format (%08x)! (GetBlockHeightInTexels)", format); PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockWidthInTexels)",
static_cast<int>(format));
return 8;
}
}
int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format)
{
switch (format)
{
// 4-bit formats
case EFBCopyFormat::R4:
return 8;
// 8-bit formats
case EFBCopyFormat::A8:
case EFBCopyFormat::R8_0x1:
case EFBCopyFormat::R8:
case EFBCopyFormat::G8:
case EFBCopyFormat::B8:
return 4;
// 16-bit formats
case EFBCopyFormat::RA8:
case EFBCopyFormat::RGB565:
case EFBCopyFormat::RGB5A3:
case EFBCopyFormat::RG8:
case EFBCopyFormat::GB8:
return 4;
// 32-bit formats
case EFBCopyFormat::RGBA8:
return 4;
default:
PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBlockHeightInTexels)",
static_cast<int>(format));
return 4; return 4;
} }
} }
// returns bytes // returns bytes
int TexDecoder_GetPaletteSize(int format) int TexDecoder_GetPaletteSize(TextureFormat format)
{ {
switch (format) switch (format)
{ {
case GX_TF_C4: case TextureFormat::C4:
return 16 * 2; return 16 * 2;
case GX_TF_C8: case TextureFormat::C8:
return 256 * 2; return 256 * 2;
case GX_TF_C14X2: case TextureFormat::C14X2:
return 16384 * 2; return 16384 * 2;
default: default:
return 0; return 0;
@ -251,51 +200,33 @@ int TexDecoder_GetPaletteSize(int format)
// Get the "in memory" texture format of an EFB copy's format. // Get the "in memory" texture format of an EFB copy's format.
// With the exception of c4/c8/c14 paletted texture formats (which are handled elsewhere) // With the exception of c4/c8/c14 paletted texture formats (which are handled elsewhere)
// this is the format the game should be using when it is drawing an EFB copy back. // this is the format the game should be using when it is drawing an EFB copy back.
int TexDecoder_GetEfbCopyBaseFormat(int format) TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format)
{ {
switch (format) switch (format)
{ {
case GX_TF_I4: case EFBCopyFormat::R4:
case GX_CTF_Z4: return TextureFormat::I4;
case GX_CTF_R4: case EFBCopyFormat::A8:
return GX_TF_I4; case EFBCopyFormat::R8_0x1:
case GX_TF_I8: case EFBCopyFormat::R8:
case GX_CTF_A8: case EFBCopyFormat::G8:
case GX_CTF_R8: case EFBCopyFormat::B8:
case GX_CTF_G8: return TextureFormat::I8;
case GX_CTF_B8: case EFBCopyFormat::RA4:
case GX_TF_Z8: return TextureFormat::IA4;
case GX_CTF_Z8H: case EFBCopyFormat::RA8:
case GX_CTF_Z8M: case EFBCopyFormat::RG8:
case GX_CTF_Z8L: case EFBCopyFormat::GB8:
return GX_TF_I8; return TextureFormat::IA8;
case GX_TF_IA4: case EFBCopyFormat::RGB565:
case GX_CTF_RA4: return TextureFormat::RGB565;
return GX_TF_IA4; case EFBCopyFormat::RGB5A3:
case GX_TF_IA8: return TextureFormat::RGB5A3;
case GX_TF_Z16: case EFBCopyFormat::RGBA8:
case GX_CTF_RA8: return TextureFormat::RGBA8;
case GX_CTF_RG8:
case GX_CTF_GB8:
case GX_CTF_Z16R:
case GX_CTF_Z16L:
return GX_TF_IA8;
case GX_TF_RGB565:
return GX_TF_RGB565;
case GX_TF_RGB5A3:
return GX_TF_RGB5A3;
case GX_TF_RGBA8:
case GX_TF_Z24X8:
case GX_CTF_YUVA8:
return GX_TF_RGBA8;
// These formats can't be (directly) generated by EFB copies
case GX_TF_C4:
case GX_TF_C8:
case GX_TF_C14X2:
case GX_TF_CMPR:
default: default:
PanicAlert("Unsupported Texture Format (%08x)! (GetEfbCopyBaseFormat)", format); PanicAlert("Invalid EFB Copy Format (0x%X)! (GetEFBCopyBaseFormat)", static_cast<int>(format));
return format & 0xf; return static_cast<TextureFormat>(format);
} }
} }
@ -320,7 +251,7 @@ static const char* texfmt[] = {
"CZ16L", "0x3D", "0x3E", "0x3F", "CZ16L", "0x3D", "0x3E", "0x3F",
}; };
static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat) static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat texformat)
{ {
int w = std::min(width, 40); int w = std::min(width, 40);
int h = std::min(height, 10); int h = std::min(height, 10);
@ -334,7 +265,7 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat
yoff = 0; yoff = 0;
} }
const char* fmt = texfmt[texformat & 15]; const char* fmt = texfmt[static_cast<int>(texformat) & 15];
while (*fmt) while (*fmt)
{ {
int xcnt = 0; int xcnt = 0;
@ -363,8 +294,8 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, int texformat
} }
} }
void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, int texformat, const u8* tlut, void TexDecoder_Decode(u8* dst, const u8* src, int width, int height, TextureFormat texformat,
TlutFormat tlutfmt) const u8* tlut, TLUTFormat tlutfmt)
{ {
_TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt); _TexDecoder_DecodeImpl((u32*)dst, src, width, height, texformat, tlut, tlutfmt);
@ -409,23 +340,23 @@ static inline u32 DecodePixel_RGB5A3(u16 val)
return r | (g << 8) | (b << 16) | (a << 24); return r | (g << 8) | (b << 16) | (a << 24);
} }
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt) static inline u32 DecodePixel_Paletted(u16 pixel, TLUTFormat tlutfmt)
{ {
switch (tlutfmt) switch (tlutfmt)
{ {
case GX_TL_IA8: case TLUTFormat::IA8:
return DecodePixel_IA8(pixel); return DecodePixel_IA8(pixel);
case GX_TL_RGB565: case TLUTFormat::RGB565:
return DecodePixel_RGB565(Common::swap16(pixel)); return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel)); return DecodePixel_RGB5A3(Common::swap16(pixel));
default: default:
return 0; return 0;
} }
} }
void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth, int texformat, void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth,
const u8* tlut_, TlutFormat tlutfmt) TextureFormat texformat, const u8* tlut_, TLUTFormat tlutfmt)
{ {
/* General formula for computing texture offset /* General formula for computing texture offset
// //
@ -440,7 +371,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
switch (texformat) switch (texformat)
{ {
case GX_TF_C4: case TextureFormat::C4:
{ {
u16 sBlk = s >> 3; u16 sBlk = s >> 3;
u16 tBlk = t >> 3; u16 tBlk = t >> 3;
@ -459,7 +390,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt); *((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
} }
break; break;
case GX_TF_I4: case TextureFormat::I4:
{ {
u16 sBlk = s >> 3; u16 sBlk = s >> 3;
u16 tBlk = t >> 3; u16 tBlk = t >> 3;
@ -480,7 +411,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = val; dst[3] = val;
} }
break; break;
case GX_TF_I8: case TextureFormat::I8:
{ {
u16 sBlk = s >> 3; u16 sBlk = s >> 3;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -497,7 +428,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = val; dst[3] = val;
} }
break; break;
case GX_TF_C8: case TextureFormat::C8:
{ {
u16 sBlk = s >> 3; u16 sBlk = s >> 3;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -513,7 +444,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt); *((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
} }
break; break;
case GX_TF_IA4: case TextureFormat::IA4:
{ {
u16 sBlk = s >> 3; u16 sBlk = s >> 3;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -532,7 +463,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[3] = a; dst[3] = a;
} }
break; break;
case GX_TF_IA8: case TextureFormat::IA8:
{ {
u16 sBlk = s >> 2; u16 sBlk = s >> 2;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -548,7 +479,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_IA8(*valAddr); *((u32*)dst) = DecodePixel_IA8(*valAddr);
} }
break; break;
case GX_TF_C14X2: case TextureFormat::C14X2:
{ {
u16 sBlk = s >> 2; u16 sBlk = s >> 2;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -567,7 +498,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt); *((u32*)dst) = DecodePixel_Paletted(tlut[val], tlutfmt);
} }
break; break;
case GX_TF_RGB565: case TextureFormat::RGB565:
{ {
u16 sBlk = s >> 2; u16 sBlk = s >> 2;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -583,7 +514,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_RGB565(Common::swap16(*valAddr)); *((u32*)dst) = DecodePixel_RGB565(Common::swap16(*valAddr));
} }
break; break;
case GX_TF_RGB5A3: case TextureFormat::RGB5A3:
{ {
u16 sBlk = s >> 2; u16 sBlk = s >> 2;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -599,7 +530,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
*((u32*)dst) = DecodePixel_RGB5A3(Common::swap16(*valAddr)); *((u32*)dst) = DecodePixel_RGB5A3(Common::swap16(*valAddr));
} }
break; break;
case GX_TF_RGBA8: case TextureFormat::RGBA8:
{ {
u16 sBlk = s >> 2; u16 sBlk = s >> 2;
u16 tBlk = t >> 2; u16 tBlk = t >> 2;
@ -618,7 +549,7 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
dst[2] = valAddr[33]; dst[2] = valAddr[33];
} }
break; break;
case GX_TF_CMPR: case TextureFormat::CMPR:
{ {
u16 sDxt = s >> 2; u16 sDxt = s >> 2;
u16 tDxt = t >> 2; u16 tDxt = t >> 2;

View File

@ -57,22 +57,22 @@ static inline u32 DecodePixel_RGB5A3(u16 val)
return r | (g << 8) | (b << 16) | (a << 24); return r | (g << 8) | (b << 16) | (a << 24);
} }
static inline u32 DecodePixel_Paletted(u16 pixel, TlutFormat tlutfmt) static inline u32 DecodePixel_Paletted(u16 pixel, TLUTFormat tlutfmt)
{ {
switch (tlutfmt) switch (tlutfmt)
{ {
case GX_TL_IA8: case TLUTFormat::IA8:
return DecodePixel_IA8(pixel); return DecodePixel_IA8(pixel);
case GX_TL_RGB565: case TLUTFormat::RGB565:
return DecodePixel_RGB565(Common::swap16(pixel)); return DecodePixel_RGB565(Common::swap16(pixel));
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
return DecodePixel_RGB5A3(Common::swap16(pixel)); return DecodePixel_RGB5A3(Common::swap16(pixel));
default: default:
return 0; return 0;
} }
} }
static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt) static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, TLUTFormat tlutfmt)
{ {
const u16* tlut = (u16*)tlut_; const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++) for (int x = 0; x < 4; x++)
@ -83,7 +83,7 @@ static inline void DecodeBytes_C4(u32* dst, const u8* src, const u8* tlut_, Tlut
} }
} }
static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, TlutFormat tlutfmt) static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, TLUTFormat tlutfmt)
{ {
const u16* tlut = (u16*)tlut_; const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 8; x++) for (int x = 0; x < 8; x++)
@ -93,7 +93,7 @@ static inline void DecodeBytes_C8(u32* dst, const u8* src, const u8* tlut_, Tlut
} }
} }
static inline void DecodeBytes_C14X2(u32* dst, const u16* src, const u8* tlut_, TlutFormat tlutfmt) static inline void DecodeBytes_C14X2(u32* dst, const u16* src, const u8* tlut_, TLUTFormat tlutfmt)
{ {
const u16* tlut = (u16*)tlut_; const u16* tlut = (u16*)tlut_;
for (int x = 0; x < 4; x++) for (int x = 0; x < 4; x++)
@ -195,21 +195,21 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
// TODO: complete SSE2 optimization of less often used texture formats. // TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat, void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TlutFormat tlutfmt) const u8* tlut, TLUTFormat tlutfmt)
{ {
const int Wsteps4 = (width + 3) / 4; const int Wsteps4 = (width + 3) / 4;
const int Wsteps8 = (width + 7) / 8; const int Wsteps8 = (width + 7) / 8;
switch (texformat) switch (texformat)
{ {
case GX_TF_C4: case TextureFormat::C4:
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++, xStep++) for (int iy = 0, xStep = 8 * yStep; iy < 8; iy++, xStep++)
DecodeBytes_C4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt); DecodeBytes_C4(dst + (y + iy) * width + x, src + 4 * xStep, tlut, tlutfmt);
break; break;
case GX_TF_I4: case TextureFormat::I4:
{ {
// Reference C implementation: // Reference C implementation:
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)
@ -225,7 +225,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
} }
} }
break; break;
case GX_TF_I8: // speed critical case TextureFormat::I8: // speed critical
{ {
// Reference C implementation // Reference C implementation
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
@ -255,13 +255,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
} }
} }
break; break;
case GX_TF_C8: case TextureFormat::C8:
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt); DecodeBytes_C8((u32*)dst + (y + iy) * width + x, src + 8 * xStep, tlut, tlutfmt);
break; break;
case GX_TF_IA4: case TextureFormat::IA4:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -269,7 +269,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
DecodeBytes_IA4(dst + (y + iy) * width + x, src + 8 * xStep); DecodeBytes_IA4(dst + (y + iy) * width + x, src + 8 * xStep);
} }
break; break;
case GX_TF_IA8: case TextureFormat::IA8:
{ {
// Reference C implementation: // Reference C implementation:
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
@ -285,13 +285,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
} }
} }
break; break;
case GX_TF_C14X2: case TextureFormat::C14X2:
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++) for (int iy = 0, xStep = 4 * yStep; iy < 4; iy++, xStep++)
DecodeBytes_C14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt); DecodeBytes_C14X2(dst + (y + iy) * width + x, (u16*)(src + 8 * xStep), tlut, tlutfmt);
break; break;
case GX_TF_RGB565: case TextureFormat::RGB565:
{ {
// Reference C implementation. // Reference C implementation.
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
@ -305,7 +305,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
} }
} }
break; break;
case GX_TF_RGB5A3: case TextureFormat::RGB5A3:
{ {
// Reference C implementation: // Reference C implementation:
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
@ -314,7 +314,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
DecodeBytes_RGB5A3(dst + (y + iy) * width + x, (u16*)src); DecodeBytes_RGB5A3(dst + (y + iy) * width + x, (u16*)src);
} }
break; break;
case GX_TF_RGBA8: // speed critical case TextureFormat::RGBA8: // speed critical
{ {
// Reference C implementation. // Reference C implementation.
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
@ -327,7 +327,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
} }
} }
break; break;
case GX_TF_CMPR: // speed critical case TextureFormat::CMPR: // speed critical
// The metroid games use this format almost exclusively. // The metroid games use this format almost exclusively.
{ {
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)

View File

@ -212,12 +212,13 @@ static void DecodeDXTBlock(u32* dst, const DXTBlock* src, int pitch)
// free to make the assumption that addresses are multiples of 16 in the aligned case. // free to make the assumption that addresses are multiples of 16 in the aligned case.
// TODO: complete SSE2 optimization of less often used texture formats. // TODO: complete SSE2 optimization of less often used texture formats.
// TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads. // TODO: refactor algorithms using _mm_loadl_epi64 unaligned loads to prefer 128-bit aligned loads.
static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
switch (tlutfmt) switch (tlutfmt)
{ {
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
{ {
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -226,7 +227,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
} }
break; break;
case GX_TL_IA8: case TLUTFormat::IA8:
{ {
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -235,7 +236,7 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
} }
break; break;
case GX_TL_RGB565: case TLUTFormat::RGB565:
{ {
for (int y = 0; y < height; y += 8) for (int y = 0; y < height; y += 8)
for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 8) * Wsteps8; x < width; x += 8, yStep++)
@ -251,8 +252,8 @@ static void TexDecoder_DecodeImpl_C4(u32* dst, const u8* src, int width, int hei
FUNCTION_TARGET_SSSE3 FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL); const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L); const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -298,8 +299,9 @@ static void TexDecoder_DecodeImpl_I4_SSSE3(u32* dst, const u8* src, int width, i
} }
} }
static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL); const __m128i kMask_x0f = _mm_set1_epi32(0x0f0f0f0fL);
const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L); const __m128i kMask_xf0 = _mm_set1_epi32(0xf0f0f0f0L);
@ -390,8 +392,8 @@ static void TexDecoder_DecodeImpl_I4(u32* dst, const u8* src, int width, int hei
FUNCTION_TARGET_SSSE3 FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
// xsacha optimized with SSSE3 intrinsics // xsacha optimized with SSSE3 intrinsics
// Produces a ~10% speed improvement over SSE2 implementation // Produces a ~10% speed improvement over SSE2 implementation
@ -419,8 +421,9 @@ static void TexDecoder_DecodeImpl_I8_SSSE3(u32* dst, const u8* src, int width, i
} }
} }
static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
// JSD optimized with SSE2 intrinsics. // JSD optimized with SSE2 intrinsics.
// Produces an ~86% speed improvement over reference C implementation. // Produces an ~86% speed improvement over reference C implementation.
@ -518,12 +521,13 @@ static void TexDecoder_DecodeImpl_I8(u32* dst, const u8* src, int width, int hei
} }
} }
static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
switch (tlutfmt) switch (tlutfmt)
{ {
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -532,7 +536,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
} }
break; break;
case GX_TL_IA8: case TLUTFormat::IA8:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -541,7 +545,7 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
} }
break; break;
case GX_TL_RGB565: case TLUTFormat::RGB565:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps8; x < width; x += 8, yStep++)
@ -555,8 +559,9 @@ static void TexDecoder_DecodeImpl_C8(u32* dst, const u8* src, int width, int hei
} }
} }
static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
{ {
@ -572,8 +577,8 @@ static void TexDecoder_DecodeImpl_IA4(u32* dst, const u8* src, int width, int he
FUNCTION_TARGET_SSSE3 FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
// xsacha optimized with SSSE3 intrinsics. // xsacha optimized with SSSE3 intrinsics.
// Produces an ~50% speed improvement over SSE2 implementation. // Produces an ~50% speed improvement over SSE2 implementation.
@ -595,8 +600,9 @@ static void TexDecoder_DecodeImpl_IA8_SSSE3(u32* dst, const u8* src, int width,
} }
} }
static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height, int texformat, static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int height,
const u8* tlut, TlutFormat tlutfmt, int Wsteps4, int Wsteps8) TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8)
{ {
// JSD optimized with SSE2 intrinsics. // JSD optimized with SSE2 intrinsics.
// Produces an ~80% speed improvement over reference C implementation. // Produces an ~80% speed improvement over reference C implementation.
@ -656,12 +662,12 @@ static void TexDecoder_DecodeImpl_IA8(u32* dst, const u8* src, int width, int he
} }
static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8) int Wsteps4, int Wsteps8)
{ {
switch (tlutfmt) switch (tlutfmt)
{ {
case GX_TL_RGB5A3: case TLUTFormat::RGB5A3:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -670,7 +676,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
} }
break; break;
case GX_TL_IA8: case TLUTFormat::IA8:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -679,7 +685,7 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
} }
break; break;
case GX_TL_RGB565: case TLUTFormat::RGB565:
{ {
for (int y = 0; y < height; y += 4) for (int y = 0; y < height; y += 4)
for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++) for (int x = 0, yStep = (y / 4) * Wsteps4; x < width; x += 4, yStep++)
@ -694,8 +700,8 @@ static void TexDecoder_DecodeImpl_C14X2(u32* dst, const u8* src, int width, int
} }
static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
// JSD optimized with SSE2 intrinsics. // JSD optimized with SSE2 intrinsics.
// Produces an ~78% speed improvement over reference C implementation. // Produces an ~78% speed improvement over reference C implementation.
@ -766,8 +772,8 @@ static void TexDecoder_DecodeImpl_RGB565(u32* dst, const u8* src, int width, int
FUNCTION_TARGET_SSSE3 FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL); const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL); const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -872,8 +878,8 @@ static void TexDecoder_DecodeImpl_RGB5A3_SSSE3(u32* dst, const u8* src, int widt
} }
static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL); const __m128i kMask_x1f = _mm_set1_epi32(0x0000001fL);
const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL); const __m128i kMask_x0f = _mm_set1_epi32(0x0000000fL);
@ -993,8 +999,8 @@ static void TexDecoder_DecodeImpl_RGB5A3(u32* dst, const u8* src, int width, int
FUNCTION_TARGET_SSSE3 FUNCTION_TARGET_SSSE3
static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut,
int Wsteps4, int Wsteps8) TLUTFormat tlutfmt, int Wsteps4, int Wsteps8)
{ {
// xsacha optimized with SSSE3 instrinsics // xsacha optimized with SSSE3 instrinsics
// Produces a ~30% speed improvement over SSE2 implementation // Produces a ~30% speed improvement over SSE2 implementation
@ -1027,7 +1033,7 @@ static void TexDecoder_DecodeImpl_RGBA8_SSSE3(u32* dst, const u8* src, int width
} }
static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8) int Wsteps4, int Wsteps8)
{ {
// JSD optimized with SSE2 intrinsics // JSD optimized with SSE2 intrinsics
@ -1148,7 +1154,7 @@ static void TexDecoder_DecodeImpl_RGBA8(u32* dst, const u8* src, int width, int
} }
static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int height, static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int height,
int texformat, const u8* tlut, TlutFormat tlutfmt, TextureFormat texformat, const u8* tlut, TLUTFormat tlutfmt,
int Wsteps4, int Wsteps8) int Wsteps4, int Wsteps8)
{ {
// The metroid games use this format almost exclusively. // The metroid games use this format almost exclusively.
@ -1403,19 +1409,19 @@ static void TexDecoder_DecodeImpl_CMPR(u32* dst, const u8* src, int width, int h
} }
} }
void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int texformat, void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, TextureFormat texformat,
const u8* tlut, TlutFormat tlutfmt) const u8* tlut, TLUTFormat tlutfmt)
{ {
int Wsteps4 = (width + 3) / 4; int Wsteps4 = (width + 3) / 4;
int Wsteps8 = (width + 7) / 8; int Wsteps8 = (width + 7) / 8;
switch (texformat) switch (texformat)
{ {
case GX_TF_C4: case TextureFormat::C4:
TexDecoder_DecodeImpl_C4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_C4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
case GX_TF_I4: case TextureFormat::I4:
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_I4_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_I4_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
@ -1423,7 +1429,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
TexDecoder_DecodeImpl_I4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_I4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
case GX_TF_I8: case TextureFormat::I8:
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_I8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_I8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
@ -1431,15 +1437,15 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
TexDecoder_DecodeImpl_I8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_I8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
case GX_TF_C8: case TextureFormat::C8:
TexDecoder_DecodeImpl_C8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_C8(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
case GX_TF_IA4: case TextureFormat::IA4:
TexDecoder_DecodeImpl_IA4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_IA4(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
case GX_TF_IA8: case TextureFormat::IA8:
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_IA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_IA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
@ -1448,17 +1454,17 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8); Wsteps8);
break; break;
case GX_TF_C14X2: case TextureFormat::C14X2:
TexDecoder_DecodeImpl_C14X2(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_C14X2(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
break; break;
case GX_TF_RGB565: case TextureFormat::RGB565:
TexDecoder_DecodeImpl_RGB565(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_RGB565(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
break; break;
case GX_TF_RGB5A3: case TextureFormat::RGB5A3:
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_RGB5A3_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_RGB5A3_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
@ -1467,7 +1473,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8); Wsteps8);
break; break;
case GX_TF_RGBA8: case TextureFormat::RGBA8:
if (cpu_info.bSSSE3) if (cpu_info.bSSSE3)
TexDecoder_DecodeImpl_RGBA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, TexDecoder_DecodeImpl_RGBA8_SSSE3(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4,
Wsteps8); Wsteps8);
@ -1476,12 +1482,13 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, int
Wsteps8); Wsteps8);
break; break;
case GX_TF_CMPR: case TextureFormat::CMPR:
TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8); TexDecoder_DecodeImpl_CMPR(dst, src, width, height, texformat, tlut, tlutfmt, Wsteps4, Wsteps8);
break; break;
default: default:
PanicAlert("Unhandled texture format %d", texformat); PanicAlert("Invalid Texture Format (0x%X)! (_TexDecoder_DecodeImpl)",
static_cast<int>(texformat));
break; break;
} }
} }