dolphin/Source/Core/VideoCommon/VertexManagerBase.cpp
NanoByte011 613781c765 Cleanup and refactor of zfreeze port
Based on the feedback from pull request #1767 I have put in most of
degasus's suggestions in here now.

I think we have a real winner here as moving the code to
VertexManagerBase for a function has allowed OGL to utilize zfreeze now
:)

Correct use of the vertex pointer has also corrected most of the issue
found in pull request #1767 that JMC47 stated.  Which also for me now
has Mario Tennis working with no polygon spikes on the characters
anymore!  Shadows are still an issue and probably in the other games
with shadow problems.  Rebel Strike also seems better but random skybox
glitches can show up.
2015-01-23 03:32:31 +13:00

284 lines
8.9 KiB
C++

#include "Common/CommonTypes.h"
#include "VideoCommon/BPStructs.h"
#include "VideoCommon/Debugger.h"
#include "VideoCommon/GeometryShaderManager.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/MainBase.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
VertexManager *g_vertex_manager;
u8 *VertexManager::s_pCurBufferPointer;
u8 *VertexManager::s_pBaseBufferPointer;
u8 *VertexManager::s_pEndBufferPointer;
PrimitiveType VertexManager::current_primitive_type;
bool VertexManager::IsFlushed;
static const PrimitiveType primitive_from_gx[8] = {
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS_2
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLES
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_STRIP
PRIMITIVE_TRIANGLES, // GX_DRAW_TRIANGLE_FAN
PRIMITIVE_LINES, // GX_DRAW_LINES
PRIMITIVE_LINES, // GX_DRAW_LINE_STRIP
PRIMITIVE_POINTS, // GX_DRAW_POINTS
};
VertexManager::VertexManager()
{
IsFlushed = true;
}
VertexManager::~VertexManager()
{
}
u32 VertexManager::GetRemainingSize()
{
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
{
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
// We can't merge different kinds of primitives, so we have to flush here
if (current_primitive_type != primitive_from_gx[primitive])
Flush();
current_primitive_type = primitive_from_gx[primitive];
// Check for size in buffer, if the buffer gets full, call Flush()
if ( !IsFlushed && ( count > IndexGenerator::GetRemainingIndices() ||
count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize() ) )
{
Flush();
if (count > IndexGenerator::GetRemainingIndices())
ERROR_LOG(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking after all.");
if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
}
// need to alloc new buffer
if (IsFlushed)
{
g_vertex_manager->ResetBuffer(stride);
IsFlushed = false;
}
return DataReader(s_pCurBufferPointer, s_pEndBufferPointer);
}
void VertexManager::FlushData(u32 count, u32 stride)
{
s_pCurBufferPointer += count * stride;
}
u32 VertexManager::GetRemainingIndices(int primitive)
{
u32 index_len = MAXIBUFFERSIZE - IndexGenerator::GetIndexLen();
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 5 * 4;
case GX_DRAW_TRIANGLES:
return index_len / 4 * 3;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 1 - 1;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
}
else
{
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_QUADS_2:
return index_len / 6 * 4;
case GX_DRAW_TRIANGLES:
return index_len;
case GX_DRAW_TRIANGLE_STRIP:
return index_len / 3 + 2;
case GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;
case GX_DRAW_LINES:
return index_len;
case GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
}
}
void VertexManager::Flush()
{
if (IsFlushed)
return;
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfmem.numTexGen.numTexGens,
xfmem.numChan.numColorChans, xfmem.dualTexTrans.enabled, bpmem.ztex2.op,
(int)bpmem.blendmode.colorupdate, (int)bpmem.blendmode.alphaupdate, (int)bpmem.zmode.updateenable);
for (unsigned int i = 0; i < xfmem.numChan.numColorChans; ++i)
{
LitChannel* ch = &xfmem.color[i];
PRIM_LOG("colchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
ch = &xfmem.alpha[i];
PRIM_LOG("alpchan%d: matsrc=%d, light=0x%x, ambsrc=%d, diffunc=%d, attfunc=%d", i, ch->matsource, ch->GetFullLightMask(), ch->ambsource, ch->diffusefunc, ch->attnfunc);
}
for (unsigned int i = 0; i < xfmem.numTexGen.numTexGens; ++i)
{
TexMtxInfo tinfo = xfmem.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP) tinfo.hex &= 0x7ff;
if (tinfo.texgentype != XF_TEXGEN_REGULAR) tinfo.projection = 0;
PRIM_LOG("txgen%d: proj=%d, input=%d, gentype=%d, srcrow=%d, embsrc=%d, emblght=%d, postmtx=%d, postnorm=%d",
i, tinfo.projection, tinfo.inputform, tinfo.texgentype, tinfo.sourcerow, tinfo.embosssourceshift, tinfo.embosslightshift,
xfmem.postMtxInfo[i].index, xfmem.postMtxInfo[i].normalize);
}
PRIM_LOG("pixel: tev=%d, ind=%d, texgen=%d, dstalpha=%d, alphatest=0x%x", (int)bpmem.genMode.numtevstages+1, (int)bpmem.genMode.numindstages,
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
#endif
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
for (unsigned int i : usedtextures)
{
g_renderer->SetSamplerState(i & 3, i >> 2);
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
if (tentry)
{
// 0s are probably for no manual wrapping needed.
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
}
else
ERROR_LOG(VIDEO, "error loading texture");
}
// set global constants
VertexShaderManager::SetConstants();
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
bpmem.dstalpha.enable &&
bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (PerfQueryBase::ShouldEmulate())
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
g_vertex_manager->vFlush(useDstAlpha);
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
if (xfmem.numTexGen.numTexGens != bpmem.genMode.numtexgens)
ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
IsFlushed = true;
}
void VertexManager::DoState(PointerWrap& p)
{
g_vertex_manager->vDoState(p);
}
void VertexManager::CalculateZSlope(u32 stride)
{
float vtx[9];
float out[12];
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if zfreeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
u8* vtx_ptr = s_pCurBufferPointer - stride * (3 - i);
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
// viewport offset ignored because we only look at coordinate differences.
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
float slope_dfdx = -a / c;
float slope_dfdy = -b / c;
float slope_f0 = out[2];
PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0);
}