I wrote a simple 2D renderer in OpenGL that works pretty well. I can render about 16,000k sprites in 1 draw call if it uses the same texture. I wanted to refactor this code out and put it in its own module to make it easier to work with.
With this, I can only draw about 100 sprites before things get really slow:
mat4* v_mat, *p_mat, *mvp_mat;
GLint model_mat_loc;
GLint view_mat_loc;
GLint proj_mat_loc;
GLint mvp_matrix_loc;
GLint pos_loc;
GLint col_loc;
GLint tex_loc;
GLuint vao;
GLuint vert_buff;
GLuint ind_buff;
int sprite_count;
int idx;
GLuint texture_id;
float* v_buff;
short* i_buff;
size_t vbo_size_in_bytes;
tran3 rot, scal, trns, tmp;
cg_cam* camera;
Shader* shader;
//creating a spritebatch object
cg_spritebatch* out = calloc(1, sizeof(cg_spritebatch));
out->shader = calloc(1, sizeof(Shader));
out->shader = s;
out->sprite_count = 16000;
out->pos_loc = get_attrib_location(out->shader, "a_pos");
out->col_loc = get_attrib_location(out->shader, "a_col");
out->mvp_matrix_loc = get_uniform_location(out->shader, "u_mvp_mat");
out->model_mat_loc = get_uniform_location(out->shader, "u_mod_mat");
out->view_mat_loc = get_uniform_location(out->shader, "u_view_mat");
out->proj_mat_loc = get_uniform_location(out->shader, "u_proj_mat");
out->tex_loc = get_uniform_location(out->shader, "u_sprite_tex");
out->vbo_size_in_bytes =
((cg_sprite_get_sizeof_vert() * out->sprite_count) +
(cg_sprite_get_sizeof_col() * out->sprite_count) +
(cg_sprite_get_sizeof_tex_coord() * out->sprite_count));
out->v_buff = calloc(1, (out->vbo_size_in_bytes));
out->i_buff = calloc(1, (cg_sprite_get_sizeof_ind() * out->sprite_count));
glGenVertexArrays(1, &out->vao);
glBindVertexArray(out->vao);
glGenBuffers(1, &out->vert_buff);
glBindBuffer(GL_ARRAY_BUFFER, out->vert_buff);
glBufferData(GL_ARRAY_BUFFER, (out->vbo_size_in_bytes), out->v_buff,
GL_STREAM_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 9 * sizeof(float),
(GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 9 * sizeof(float),
(GLvoid*)(3 * sizeof(float)));
glEnableVertexAttribArray(2);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 9 * sizeof(float),
(GLvoid*)(7 * sizeof(float)));
int len = out->sprite_count * 6;
short j = 0;
for (int i = 0; i < len; i += 6, j += 4) {
out->i_buff[i] = j;
out->i_buff[i + 1] = (short)(j + 1);
out->i_buff[i + 2] = (short)(j + 2);
out->i_buff[i + 3] = (short)(j + 2);
out->i_buff[i + 4] = (short)(j + 3);
out->i_buff[i + 5] = j;
}
glGenBuffers(1, &out->ind_buff);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, out->ind_buff);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
out->sprite_count * cg_sprite_get_sizeof_ind(), out->i_buff,
GL_STREAM_DRAW);
glBindVertexArray(0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
out->idx = 0;
out->texture_id = -1;
return out;
The main functions of this module:
void cg_spritebatch_begin(cg_spritebatch* b, cg_cam* cam) {
b->camera = cam;
b->idx = 0;
}
void flush(cg_spritebatch* b) {
glUseProgram(b->shader->shader_program);
glBindVertexArray(b->vao);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClearDepth(1.0f);
glActiveTexture(GL_TEXTURE0);
glUniform1i(b->tex_loc, 0);
glBindTexture(GL_TEXTURE_2D, b->texture_id);
cg_cam_get_matrices(&b->v_mat, &b->p_mat, &b->mvp_mat, b->camera);
glUniformMatrix4fv(b->view_mat_loc, 1, GL_FALSE, vmathM4GetData(b->v_mat));
glUniformMatrix4fv(b->proj_mat_loc, 1, GL_FALSE, vmathM4GetData(b->p_mat));
glUniformMatrix4fv(b->mvp_matrix_loc, 1, GL_FALSE,
vmathM4GetData(b->mvp_mat));
glBindBuffer(GL_ARRAY_BUFFER, b->vert_buff);
glBufferData(GL_ARRAY_BUFFER, (b->vbo_size_in_bytes), b->v_buff,
GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, b->ind_buff);
glBufferData(GL_ELEMENT_ARRAY_BUFFER,
b->sprite_count * cg_sprite_get_sizeof_ind(), b->i_buff,
GL_STREAM_DRAW);
glDrawElements(GL_TRIANGLES, b->sprite_count * cg_sprite_get_vert_count(),
GL_UNSIGNED_SHORT, 0);
glBindVertexArray(0);
b->idx = 0;
}
static void update_texture(cg_spritebatch* b, GLuint t) {
b->texture_id = t;
flush(b);
}
void cg_spritebatch_draw(cg_spritebatch* b, cg_sprite* sp) {
int idx = b->idx;
int i = idx;
idx = 0;
if (sp->texture_id != b->texture_id) {
update_texture(b, sp->texture_id);
} else if (b->idx == (36 * b->sprite_count)) {
flush(b);
}
vmathT3MakeIdentity(&b->rot);
vmathT3MakeIdentity(&b->scal);
vmathT3MakeIdentity(&b->trns);
vmathT3MakeIdentity(&b->tmp);
vmathT3MakeScale(&b->scal, &sp->scale);
vmathT3MakeRotationZYX(&b->rot, &sp->angl);
vmathT3MakeTranslation(&b->trns, &sp->pos);
vmathT3Mul(&b->tmp, &b->trns, &b->scal);
vmathT3Mul(&b->tmp, &b->tmp, &b->rot);
vmathM4MakeFromT3(&sp->m_mat, &b->tmp);
cg_quad_getquadverts(&sp->iv0, &sp->iv1, &sp->iv2, &sp->iv3, sp->quad);
vmathM4MulV4(&sp->ov0, &sp->m_mat, &sp->iv0);
vmathM4MulV4(&sp->ov1, &sp->m_mat, &sp->iv1);
vmathM4MulV4(&sp->ov2, &sp->m_mat, &sp->iv2);
vmathM4MulV4(&sp->ov3, &sp->m_mat, &sp->iv3);
/* vmathV4Prints(&sp->ov0, "v0"); */
// v0
b->v_buff[idx++] = sp->ov0.x;
b->v_buff[idx++] = sp->ov0.y;
b->v_buff[idx++] = sp->ov0.z;
b->v_buff[idx++] = sp->quad->colors[0];
b->v_buff[idx++] = sp->quad->colors[1];
b->v_buff[idx++] = sp->quad->colors[2];
b->v_buff[idx++] = sp->quad->colors[3];
b->v_buff[idx++] = sp->quad->tex_coords[0];
b->v_buff[idx++] = sp->quad->tex_coords[1];
// v1
b->v_buff[idx++] = sp->ov1.x;
b->v_buff[idx++] = sp->ov1.y;
b->v_buff[idx++] = sp->ov1.z;
b->v_buff[idx++] = sp->quad->colors[4];
b->v_buff[idx++] = sp->quad->colors[5];
b->v_buff[idx++] = sp->quad->colors[6];
b->v_buff[idx++] = sp->quad->colors[7];
b->v_buff[idx++] = sp->quad->tex_coords[2];
b->v_buff[idx++] = sp->quad->tex_coords[3];
// v2
b->v_buff[idx++] = sp->ov2.x;
b->v_buff[idx++] = sp->ov2.y;
b->v_buff[idx++] = sp->ov2.z;
b->v_buff[idx++] = sp->quad->colors[8];
b->v_buff[idx++] = sp->quad->colors[9];
b->v_buff[idx++] = sp->quad->colors[10];
b->v_buff[idx++] = sp->quad->colors[11];
b->v_buff[idx++] = sp->quad->tex_coords[4];
b->v_buff[idx++] = sp->quad->tex_coords[5];
// v3
b->v_buff[idx++] = sp->ov3.x;
b->v_buff[idx++] = sp->ov3.y;
b->v_buff[idx++] = sp->ov3.z;
b->v_buff[idx++] = sp->quad->colors[12];
b->v_buff[idx++] = sp->quad->colors[13];
b->v_buff[idx++] = sp->quad->colors[14];
b->v_buff[idx++] = sp->quad->colors[15];
b->v_buff[idx++] = sp->quad->tex_coords[6];
b->v_buff[idx++] = sp->quad->tex_coords[7];
b->idx++;
}
void cg_spritebatch_end(cg_spritebatch* b) { flush(b); }
I use these functions like this:
cg_spritebatch_begin(sb, ce_get_default_camera());
for (int i = 0; i < sc; i++) {
cg_spritebatch_draw(sb, sprites[i]);
}
cg_spritebatch_end(sb);
With this code, I can draw about 100 sprites at most before things get really terrible. With the same code outside of those loops, I can draw 16,000 sprites with no problem.
For example, here is the same code outside of the spritebatch setup that works really well:
edit fixed the unrolled loop for drawing 16,000 sprites in one draw call.
void variable_render(double alpha) {
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
cg_sprite_back_2_front_tex_id(sprites, sc);
cg_spritebatch_begin(sb, ce_get_default_camera());
for (int i = 0; i < sc; i++) {
cg_spritebatch_draw(sb, sprites[i]);
}
cg_spritebatch_end(sb);
idx = 0;
for (int i = 0; i < sc; i++) {
//--------------- start update vertex data ---------------------
sp = sprites[i];
/* printf("%d %d\n",sp->texture_id, sp->z_index); */
vmathT3MakeIdentity(&rot);
vmathT3MakeIdentity(&scal);
vmathT3MakeIdentity(&trns);
vmathT3MakeIdentity(&tmp);
vmathT3MakeScale(&scal, &sp->scale);
vmathT3MakeRotationZYX(&rot, &sp->angl);
vmathT3MakeTranslation(&trns, &sp->pos);
vmathT3Mul(&tmp, &trns, &scal); // scale then trnslate
vmathT3Mul(&tmp, &tmp, &rot); // scale then translate then rotate
vmathM4MakeFromT3(&sprites[i]->m_mat, &tmp);
cg_quad_getquadverts(&sp->iv0, &sp->iv1, &sp->iv2, &sp->iv3, sp->quad);
vmathM4MulV4(&sp->ov0, &sp->m_mat, &sp->iv0);
vmathM4MulV4(&sp->ov1, &sp->m_mat, &sp->iv1);
vmathM4MulV4(&sp->ov2, &sp->m_mat, &sp->iv2);
vmathM4MulV4(&sp->ov3, &sp->m_mat, &sp->iv3);
/* --------------- finish update vertex data --------------------- */
/* --------------- start packing data into buffers----------------- */
/* sp = sprites[i]; */
// v0
v_buff[idx++] = sp->ov0.x;
v_buff[idx++] = sp->ov0.y;
v_buff[idx++] = sp->ov0.z;
v_buff[idx++] = sp->quad->colors[0];
v_buff[idx++] = sp->quad->colors[1];
v_buff[idx++] = sp->quad->colors[2];
v_buff[idx++] = sp->quad->colors[3];
v_buff[idx++] = sp->quad->tex_coords[0];
v_buff[idx++] = sp->quad->tex_coords[1];
// v1
v_buff[idx++] = sp->ov1.x;
v_buff[idx++] = sp->ov1.y;
v_buff[idx++] = sp->ov1.z;
v_buff[idx++] = sp->quad->colors[4];
v_buff[idx++] = sp->quad->colors[5];
v_buff[idx++] = sp->quad->colors[6];
v_buff[idx++] = sp->quad->colors[7];
v_buff[idx++] = sp->quad->tex_coords[2];
v_buff[idx++] = sp->quad->tex_coords[3];
// v2
v_buff[idx++] = sp->ov2.x;
v_buff[idx++] = sp->ov2.y;
v_buff[idx++] = sp->ov2.z;
v_buff[idx++] = sp->quad->colors[8];
v_buff[idx++] = sp->quad->colors[9];
v_buff[idx++] = sp->quad->colors[10];
v_buff[idx++] = sp->quad->colors[11];
v_buff[idx++] = sp->quad->tex_coords[4];
v_buff[idx++] = sp->quad->tex_coords[5];
// v3
v_buff[idx++] = sp->ov3.x;
v_buff[idx++] = sp->ov3.y;
v_buff[idx++] = sp->ov3.z;
v_buff[idx++] = sp->quad->colors[12];
v_buff[idx++] = sp->quad->colors[13];
v_buff[idx++] = sp->quad->colors[14];
v_buff[idx++] = sp->quad->colors[15];
v_buff[idx++] = sp->quad->tex_coords[6];
v_buff[idx++] = sp->quad->tex_coords[7];
/* printf("my idx:%d\n",idx*sc); */
}
//--------------- finish packing data into buffers --------------------
glUseProgram(ce_get_default_shader()->shader_program);
glBindVertexArray(vao);
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
glEnable(GL_DEPTH_TEST);
glDepthFunc(GL_LEQUAL);
glClearDepth(1.0f);
glActiveTexture(GL_TEXTURE0);
glUniform1i(tex_loc, 0);
glBindTexture(GL_TEXTURE_2D, sp->texture_id);
cg_cam_get_matrices(&v_mat, &p_mat, &mvp_mat, ce_get_default_camera());
// projection * view * model * vertex_pos;
glUniformMatrix4fv(model_mat_loc, 1, GL_FALSE, vmathM4GetData(&sp->m_mat));
glUniformMatrix4fv(view_mat_loc, 1, GL_FALSE, vmathM4GetData(v_mat));
glUniformMatrix4fv(proj_mat_loc, 1, GL_FALSE, vmathM4GetData(p_mat));
glUniformMatrix4fv(mvp_matrix_loc, 1, GL_FALSE, vmathM4GetData(mvp_mat));
glBindBuffer(GL_ARRAY_BUFFER, vert_buff);
glBufferData(GL_ARRAY_BUFFER, (vbo_size_in_bytes), v_buff, GL_STREAM_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ind_buff);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sc * cg_sprite_get_sizeof_ind(),
i_buff, GL_STREAM_DRAW);
glDrawElements(GL_TRIANGLES, sc * cg_sprite_get_vert_count(),
GL_UNSIGNED_SHORT, 0);
glBindVertexArray(0);
debug_opengl("render loop");
}
ok the above code on my machine can render 16000 sprites in 1 draw call but of course they all need to use the same texture.
I have 4 total textures which should be able to draw the 16000 sprites with their own unique texture instead of being forced to use only one like I am doing right now.