I have a very basic OpenGL instanced rendering setup, which is compiling and running, however it is super slow, and even though I spent days of asking and reading how to fix it, I still have no clue, what causes the problem..
What does slow mean? At the moment as you can see, it draws
16 000 instances (48 000 vertices) @ 512*512px resolution / 38-43 FPS
But if I start scaling the window, up to the actual size of my monitor (2560 * 1440) the FPS drops down to 1. I expect at least half a million vertices rendered at 60FPS, that would be the goal.
The setup is very simple, I use GLFW to create the window, GLEW to setup OpenGL properly. So it looks something like this:
int main(void)
{
// ... init window and context
void *resources = setup();
// ... start of event loop
{
// ... clear, get buff data, viewport
draw(resources);
// ... swap buffs, poll events
}
cleanup(resources);
// ... clean up everything
return 0;
}
Now, the functions from the above pseudo snippet are here, they are in the instrender.c
file. This is where the actual drawing happening:
#include <stdlib.h> // srand(), rand()
#include <jemalloc/jemalloc.h> // malloc(), free()
#include <time.h> // time()
#include <stdio.h> // fprintf()
#include <GL/glew.h> // GL*
/*----------------------------------------------------------------------------*/
typedef struct resources
{
GLuint vs_id;
GLuint fs_id;
GLuint program_id;
GLuint coords_pos;
GLuint offset_pos;
GLuint colour_pos;
GLuint colour_buffer_id;
GLuint offset_buffer_id;
} Resources;
/*----------------------------------------------------------------------------*/
const char *vert_shader = " \
#version 150 core \n\
\n\
in vec2 coords; \n\
in vec2 offset; \n\
in vec3 colors; \n\
out vec3 color; \n\
\n\
void main() \n\
{ \n\
gl_Position = vec4(coords.x + offset.x, \n\
coords.y + offset.y, 0.0, 1.0); \n\
color = colors; \n\
} \n";
/*----------------------------------------------------------------------------*/
const char *frag_shader = " \
#version 150 core \n\
\n\
in vec3 color; \n\
out vec4 fragment; \n\
\n\
void main() \n\
{ \n\
fragment = vec4(color, 1.0); \n\
} \n";
/*----------------------------------------------------------------------------*/
void
load_and_compile_shader(GLuint *shader_id, const char *buffer, GLenum type)
{
// Get shader_id, pass shader source, compile it
*shader_id = glCreateShader(type);
glShaderSource(*shader_id, 1, (const GLchar **)&buffer, NULL);
glCompileShader(*shader_id);
// If there was a problem during the compilation
GLint is_compiled;
glGetShaderiv(*shader_id, GL_COMPILE_STATUS, &is_compiled);
if (!is_compiled)
{
fprintf(stderr, "Shader failed to compile\n");
// Get debugging information
GLint info_log_length;
glGetShaderiv(*shader_id, GL_INFO_LOG_LENGTH, &info_log_length);
GLchar *info_log = malloc(info_log_length*sizeof(GLchar));
glGetShaderInfoLog(*shader_id, info_log_length, NULL, info_log);
fprintf(stderr, "%s\n", info_log);
glDeleteShader(*shader_id);
free(info_log);
*shader_id = 0;
return;
}
}
/*----------------------------------------------------------------------------*/
#define IR_OFFSET_COUNT 16000
#define IR_COLOUR_COUNT IR_OFFSET_COUNT * 3
void *
setup(void)
{
Resources *rsc = malloc(sizeof(Resources));
if (!rsc)
{
fprintf(stderr, "Failed to allocate space for resources\n");
return (void *)NULL;
}
load_and_compile_shader(&rsc->vs_id, vert_shader, GL_VERTEX_SHADER);
load_and_compile_shader(&rsc->fs_id, frag_shader, GL_FRAGMENT_SHADER);
// Create new program and get program ID
rsc->program_id = glCreateProgram();
// Attach shaders
glAttachShader(rsc->program_id, rsc->vs_id);
glAttachShader(rsc->program_id, rsc->fs_id);
// Vertex coordinates
GLfloat vertices[] = { -.95f, -.95f,
-.95f, +.00f,
-.70f, -.95f };
// Vertex indices
GLushort indices[] = {0, 1, 2};
// Instance offsets
GLfloat offset[IR_OFFSET_COUNT];
srand(time(NULL));
for (int i=0; i<IR_OFFSET_COUNT; i++)
offset[i] = (GLfloat)(rand() % 200) / 100.f;
// Color values
GLfloat colors[IR_COLOUR_COUNT];
for (int i=0; i<IR_COLOUR_COUNT; i++)
colors[i] = (GLfloat)rand() / (GLfloat)RAND_MAX;
// Shader layout position
int pos_index = 0;
// Setup VAO
GLuint vertex_array_id;
glGenVertexArrays(1, &vertex_array_id);
glBindVertexArray(vertex_array_id);
// Setup coordinates VBO
GLuint vertex_buffer_id;
glGenBuffers(1, &vertex_buffer_id);
glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer_id);
glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STATIC_DRAW);
rsc->coords_pos = pos_index;
glBindAttribLocation(rsc->program_id, pos_index++, "coords");
glVertexAttribPointer(rsc->coords_pos,
2,
GL_FLOAT,
GL_FALSE,
2*sizeof(GLfloat),
(GLvoid *)NULL);
glEnableVertexAttribArray(rsc->coords_pos);
// Setup offsets VBO
glGenBuffers(1, &rsc->offset_buffer_id);
glBindBuffer(GL_ARRAY_BUFFER, rsc->offset_buffer_id);
glBufferData(GL_ARRAY_BUFFER, sizeof(offset), offset, GL_STATIC_DRAW);
rsc->offset_pos = pos_index;
glBindAttribLocation(rsc->program_id, pos_index++, "offset");
glVertexAttribPointer(rsc->offset_pos,
2,
GL_FLOAT,
GL_FALSE,
2*sizeof(GLfloat),
(GLvoid *)NULL);
glEnableVertexAttribArray(rsc->offset_pos);
glVertexAttribDivisor(rsc->offset_pos, 1);
// Setup colors VBO
glGenBuffers(1, &rsc->colour_buffer_id);
glBindBuffer(GL_ARRAY_BUFFER, rsc->colour_buffer_id);
glBufferData(GL_ARRAY_BUFFER, sizeof(colors), colors, GL_STATIC_DRAW);
rsc->colour_pos = pos_index;
glBindAttribLocation(rsc->program_id, pos_index++, "colors");
glVertexAttribPointer(rsc->colour_pos,
3,
GL_FLOAT,
GL_FALSE,
3*sizeof(GLfloat),
(GLvoid *)NULL);
glEnableVertexAttribArray(rsc->colour_pos);
glVertexAttribDivisor(rsc->colour_pos, 1);
// Link shader program
glLinkProgram(rsc->program_id);
// If there was a problem during the linking
GLint is_linked;
glGetProgramiv(rsc->program_id, GL_LINK_STATUS, &is_linked);
if (!is_linked)
{
fprintf(stderr, "Shader program linking failed.\n");
// Get debugging informations
GLint info_log_length;
glGetProgramiv(rsc->program_id, GL_INFO_LOG_LENGTH, &info_log_length);
GLchar *info_log = malloc(info_log_length*sizeof(GLchar));
glGetProgramInfoLog(rsc->program_id, info_log_length, NULL, info_log);
fprintf(stderr, "%s\n", info_log);
// Clean up
glDetachShader(rsc->program_id, rsc->vs);
glDetachShader(rsc->program_id, rsc->fs);
glDeleteProgram(rsc->program_id);
free(info_log);
rsc->program_id = 0;
return;
}
// Setup indices VBO
GLuint index_array_buffer_id;
glGenBuffers(1, &index_array_buffer_id);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_array_buffer_id);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
// Set fragment output
glBindFragDataLocation(rsc->program_id, 0, "fragment");
// Set basic GL options
glClearColor(.46f, .71f, .67f, 1.f);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
// Start using program
glUseProgram(rsc->program_id);
return rsc;
}
/*----------------------------------------------------------------------------*/
void
draw(void *resources)
{
Resources *rsc = (Resources *)resources;
glDrawElementsInstanced(GL_TRIANGLES,
/* num of elems to draw */ 3,
/* index value types */ GL_UNSIGNED_SHORT,
/* pointer to indices */ 0,
/* num of items to draw */ IR_OFFSET_COUNT);
}
/*----------------------------------------------------------------------------*/
void
cleanup(void *resources)
{
glBlendFunc(GL_ONE, GL_ZERO);
glDisable(GL_BLEND);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glUseProgram(0);
Resources *rsc = (Resources *)resources;
glDeleteShader(rsc->vs_id);
glDeleteShader(rsc->fs_id);
glDisableVertexAttribArray(rsc->coords_pos);
glDisableVertexAttribArray(rsc->offset_pos);
glDeleteProgram(rsc->program_id);
free(rsc);
}
Environment:
Video Card:
NVidia GeForce 9600M GT 512MB
OS/Compiler:
Mac OS X 10.9.3 / Apple LLVM version 5.1 (clang-503.0.40) (based on LLVM 3.4svn)
UPDATE 1:
Based on a friendly advice, which was about "over-drawing", I created a version, where the triangles only have a very limited overlapping in one direction:
Now this produces a constant 48-50FPS
, however when I scale the window up to 2560*1440 this number drops down to 22-26FPS
(which is of course way better than the previous 1FPS
but still not the one I'm looking for):
So I guess, the main problem is not over-drawing/overlapping.
UPDATE 2:
Here is a time profile I created:
As you can see, 75.7% of the time is spent to call/execute the glDrawElementsInstanced
function, and its subfunction calls.
UPDATE 3:
During the tests of the code with @syb0rg, another interesting bug appeared: on every 10-15-20th (basically at absolutely random) running the program produces this, and then crashes:
void*
unless you have to (e.g. because it can be any kind of pointer). In your case you know what it is so you can use the proper type. – ThiefMaster May 24 '14 at 21:05void *
s in the first place. Ofc the above example is a bit simplified version of the actual code, but in real life, it has to be avoid *
since the caller does not know what pointer that is. – Peter Varo May 24 '14 at 21:06make && ./build/main
-- and don't press space, ifdefOVERLAPPING_OFF
. thank you very much for the effort! – Peter Varo May 27 '14 at 14:19