dolphin/Source/Core/VideoBackends/OGL/VertexManager.cpp
Michael Maltese ba6e917b49 OGL: implement Bounding Box on systems w/o SSBO
This commit should have zero performance effect if SSBOs are supported.

If they aren't (e.g. on all Macs), this commit alters FramebufferManager
to attach a new stencil buffer and VertexManager to draw to it when
bounding box is active. `BBoxRead` gets the pixel data from the buffer
and dumbly loops through it to find the bounding box.

This patch can run Paper Mario: The Thousand-Year Door at almost full
speed (50–60 FPS) without Dual-Core enabled for all common bounding
box-using actions I tested (going through pipes, Plane Mode, Paper
Mode, Prof. Frankly's gate, combat, walking around the overworld, etc.)
on my computer (macOS 10.12.3, 2.8 GHz Intel Core i7, 16 GB 1600 MHz
DDR3, and Intel Iris 1536 MB).

A few more demanding scenes (e.g. the self-building bridge on the way
to Petalburg) slow to ~15% of their speed without this patch (though
they don't run quite at full speed even on master). The slowdown is
caused almost solely by `glReadPixels` in `OGL::BoundingBox::Get`.

Other implementation ideas:

- Use a stencil buffer that's separate from the depth buffer. This would
  require ARB_texture_stencil8 / OpenGL 4.4, which isn't available on
  macOS.

- Use `glGetTexImage` instead of `glReadPixels`. This is ~5 FPS slower
  on my computer, presumably because it has to transfer the entire
  combined depth-stencil buffer instead of only the stencil data.
  Getting only stencil data from `glGetTexImage` requires
  ARB_texture_stencil8 / OpenGL 4.4, which (again) is not available on
  macOS.

- Don't use a PBO, and use `glReadPixels` synchronously. This has no
  visible performance effect on my computer, and is theoretically
  slower.
2017-03-15 17:41:32 -07:00

197 lines
5.5 KiB
C++

// Copyright 2008 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/OGL/VertexManager.h"
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "Common/CommonTypes.h"
#include "Common/FileUtil.h"
#include "Common/GL/GLExtensions/GLExtensions.h"
#include "Common/StringUtil.h"
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/ProgramShaderCache.h"
#include "VideoBackends/OGL/Render.h"
#include "VideoBackends/OGL/StreamBuffer.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/IndexGenerator.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VideoConfig.h"
namespace OGL
{
// This are the initially requested size for the buffers expressed in bytes
const u32 MAX_IBUFFER_SIZE = 2 * 1024 * 1024;
const u32 MAX_VBUFFER_SIZE = 32 * 1024 * 1024;
static std::unique_ptr<StreamBuffer> s_vertexBuffer;
static std::unique_ptr<StreamBuffer> s_indexBuffer;
static size_t s_baseVertex;
static size_t s_index_offset;
VertexManager::VertexManager() : m_cpu_v_buffer(MAX_VBUFFER_SIZE), m_cpu_i_buffer(MAX_IBUFFER_SIZE)
{
CreateDeviceObjects();
}
VertexManager::~VertexManager()
{
DestroyDeviceObjects();
}
void VertexManager::CreateDeviceObjects()
{
s_vertexBuffer = StreamBuffer::Create(GL_ARRAY_BUFFER, MAX_VBUFFER_SIZE);
m_vertex_buffers = s_vertexBuffer->m_buffer;
s_indexBuffer = StreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, MAX_IBUFFER_SIZE);
m_index_buffers = s_indexBuffer->m_buffer;
m_last_vao = 0;
}
void VertexManager::DestroyDeviceObjects()
{
s_vertexBuffer.reset();
s_indexBuffer.reset();
}
void VertexManager::PrepareDrawBuffers(u32 stride)
{
u32 vertex_data_size = IndexGenerator::GetNumVerts() * stride;
u32 index_data_size = IndexGenerator::GetIndexLen() * sizeof(u16);
s_vertexBuffer->Unmap(vertex_data_size);
s_indexBuffer->Unmap(index_data_size);
ADDSTAT(stats.thisFrame.bytesVertexStreamed, vertex_data_size);
ADDSTAT(stats.thisFrame.bytesIndexStreamed, index_data_size);
}
void VertexManager::ResetBuffer(u32 stride)
{
if (m_cull_all)
{
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
m_cur_buffer_pointer = m_base_buffer_pointer = m_cpu_v_buffer.data();
m_end_buffer_pointer = m_base_buffer_pointer + m_cpu_v_buffer.size();
IndexGenerator::Start((u16*)m_cpu_i_buffer.data());
}
else
{
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
m_cur_buffer_pointer = m_base_buffer_pointer = buffer.first;
m_end_buffer_pointer = buffer.first + MAXVBUFFERSIZE;
s_baseVertex = buffer.second / stride;
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
s_index_offset = buffer.second;
}
}
void VertexManager::Draw(u32 stride)
{
u32 index_size = IndexGenerator::GetIndexLen();
u32 max_index = IndexGenerator::GetNumVerts();
GLenum primitive_mode = 0;
switch (m_current_primitive_type)
{
case PRIMITIVE_POINTS:
primitive_mode = GL_POINTS;
glDisable(GL_CULL_FACE);
break;
case PRIMITIVE_LINES:
primitive_mode = GL_LINES;
glDisable(GL_CULL_FACE);
break;
case PRIMITIVE_TRIANGLES:
primitive_mode =
g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ? GL_TRIANGLE_STRIP : GL_TRIANGLES;
break;
}
if (g_ogl_config.bSupportsGLBaseVertex)
{
glDrawRangeElementsBaseVertex(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
(u8*)nullptr + s_index_offset, (GLint)s_baseVertex);
}
else
{
glDrawRangeElements(primitive_mode, 0, max_index, index_size, GL_UNSIGNED_SHORT,
(u8*)nullptr + s_index_offset);
}
INCSTAT(stats.thisFrame.numDrawCalls);
if (m_current_primitive_type != PRIMITIVE_TRIANGLES)
static_cast<Renderer*>(g_renderer.get())->SetGenerationMode();
}
void VertexManager::vFlush()
{
GLVertexFormat* nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
u32 stride = nativeVertexFmt->GetVertexStride();
if (m_last_vao != nativeVertexFmt->VAO)
{
glBindVertexArray(nativeVertexFmt->VAO);
m_last_vao = nativeVertexFmt->VAO;
}
PrepareDrawBuffers(stride);
ProgramShaderCache::SetShader(m_current_primitive_type);
// upload global constants
ProgramShaderCache::UploadConstants();
// setup the pointers
nativeVertexFmt->SetupVertexPointers();
if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active)
{
glEnable(GL_STENCIL_TEST);
}
Draw(stride);
if (!g_Config.backend_info.bSupportsFragmentStoresAndAtomics && ::BoundingBox::active)
{
OGL::BoundingBox::StencilWasUpdated();
glDisable(GL_STENCIL_TEST);
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)
{
// save the shaders
ProgramShaderCache::PCacheEntry prog = ProgramShaderCache::GetShaderProgram();
std::string filename = StringFromFormat(
"%sps%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(), g_ActiveConfig.iSaveTargetId);
std::ofstream fps;
OpenFStream(fps, filename, std::ios_base::out);
fps << prog.shader.strpprog;
filename = StringFromFormat("%svs%.3d.txt", File::GetUserPath(D_DUMPFRAMES_IDX).c_str(),
g_ActiveConfig.iSaveTargetId);
std::ofstream fvs;
OpenFStream(fvs, filename, std::ios_base::out);
fvs << prog.shader.strvprog;
}
#endif
g_Config.iSaveTargetId++;
ClearEFBCache();
}
} // namespace