I have a program which reads and analyses MPEG-TS files. I've been using gperftools to identify hotspots and optimize them, and 45% of all samples at this point are in my main bitstream reader function.
I've already:
- Reduced the number of overflow checks to one per call (not shown here, since the overflow check occurs before this function is called).
- Optimized for byte-aligned reads.
Update: Based on answers here and some profiling, I also did..
- Make one call to get us aligned, read as many aligned bytes as we can, and then do one more unaligned read to finish.
- Split up read_bits function and skip complex logic for whole-byte reads.
See previous edits for original code.
static uint64_t bitreader_read_bytes_aligned_unchecked(bitreader_t* b, uint8_t bytes)
{
uint64_t result = b->data[b->bytes_read];
for (size_t i = 1; i < bytes; ++i) {
result <<= 8;
result += b->data[b->bytes_read + i];
}
b->bytes_read += bytes;
return result;
}
static uint64_t bitreader_read_bits_unaligned_unchecked(bitreader_t* b, uint8_t bits)
{
uint8_t result = b->data[b->bytes_read];
result >>= 8 - bits - b->bits_read;
result &= (1 << bits) - 1;
bitreader_skip_bits_unchecked(b, bits);
return result;
}
static uint64_t bitreader_read_bits_unchecked(bitreader_t* b, uint8_t bits)
{
uint64_t result = 0;
/* Try to turn this into an aligned read */
if (b->bits_read && bits >= 8) {
size_t to_read = 8 - b->bits_read;
result = bitreader_read_bits_unaligned_unchecked(b, to_read);
bits -= to_read;
}
/* Do aligned reads if we can */
while (!b->bits_read && bits >= 8) {
result <<= 8;
result += b->data[b->bytes_read];
++b->bytes_read;
bits -= 8;
}
/* Handle leftovers */
while (bits > 0) {
size_t to_read = MIN(bits, 8 - b->bits_read);
result <<= to_read;
result += bitreader_read_bits_unaligned_unchecked(b, to_read);
bits -= to_read;
}
return result;
}
And here's a full program that demonstrates typical behavior in my application. gperftools confirms that it spends 99% of its time in bitreader_read_bits_unchecked()
.
#include <assert.h>
#include <inttypes.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
typedef struct {
const uint8_t* data;
size_t len;
size_t bytes_read;
uint8_t bits_read;
} bitreader_t;
void bitreader_init(bitreader_t* b, const uint8_t* data, size_t len)
{
b->data = data;
b->len = len;
b->bytes_read = 0;
b->bits_read = 0;
}
void bitreader_skip_bits_unchecked(bitreader_t* b, size_t bits)
{
size_t bits_read = b->bits_read + bits;
b->bytes_read += bits_read / 8;
b->bits_read = bits_read % 8;
}
uint8_t bitreader_read_bit_unchecked(bitreader_t* b)
{
uint8_t result = b->data[b->bytes_read];
result &= 128 >> b->bits_read;
bitreader_skip_bits_unchecked(b, 1);
return result;
}
static uint64_t bitreader_read_bytes_aligned_unchecked(bitreader_t* b, uint8_t bytes)
{
uint64_t result = b->data[b->bytes_read];
for (size_t i = 1; i < bytes; ++i) {
result <<= 8;
result += b->data[b->bytes_read + i];
}
b->bytes_read += bytes;
return result;
}
static uint64_t bitreader_read_bits_unaligned_unchecked(bitreader_t* b, uint8_t bits)
{
uint8_t result = b->data[b->bytes_read];
result >>= 8 - bits - b->bits_read;
result &= (1 << bits) - 1;
bitreader_skip_bits_unchecked(b, bits);
return result;
}
static uint64_t bitreader_read_bits_unchecked(bitreader_t* b, uint8_t bits)
{
uint64_t result = 0;
/* Try to turn this into an aligned read */
if (b->bits_read && bits >= 8) {
size_t to_read = 8 - b->bits_read;
result = bitreader_read_bits_unaligned_unchecked(b, to_read);
bits -= to_read;
}
/* Do aligned reads if we can */
while (!b->bits_read && bits >= 8) {
result <<= 8;
result += b->data[b->bytes_read];
++b->bytes_read;
bits -= 8;
}
/* Handle leftovers */
while (bits > 0) {
size_t to_read = MIN(bits, 8 - b->bits_read);
result <<= to_read;
result += bitreader_read_bits_unaligned_unchecked(b, to_read);
bits -= to_read;
}
return result;
}
void usage(const char* name)
{
printf("usage: %s [-v] [some file]\n", name);
}
int main(int argc, char** argv)
{
int verbose = 0;
const char* filename = NULL;
size_t i;
for (i = 1; i < (unsigned)argc; ++i) {
if (!strcmp(argv[i], "-v")) {
verbose = 1;
} else if (filename) {
usage(argv[0]);
return 1;
} else {
filename = argv[i];
}
}
if (filename == NULL) {
usage(argv[0]);
return 1;
}
FILE* f = fopen(argv[1], "rb");
if (!f) {
printf("Failed to open file %s\n", argv[1]);
return 1;
}
fseek(f, 0, SEEK_END);
long fsize = ftell(f);
fseek(f, 0, SEEK_SET);
uint8_t* fbytes = malloc(fsize);
if (!fbytes) {
printf("Failed to allocate %ld bytes when reading file %s\n",
fsize, argv[1]);
return 1;
}
fread(fbytes, fsize, 1, f);
fclose(f);
bitreader_t b;
bitreader_init(&b, fbytes, fsize);
printf("%zu\n", fsize);
uint8_t NUM_BITS[] = {1, 1, 22, 19, 61, 24, 16, 16, 16, 32, 64};
size_t NUM_BYTES_SUM = 0;
for (i = 0; i < sizeof(NUM_BITS); ++i) {
NUM_BYTES_SUM += NUM_BITS[i];
}
assert(NUM_BYTES_SUM % 8 == 0);
NUM_BYTES_SUM = NUM_BYTES_SUM / 8;
size_t loops = b.len / NUM_BYTES_SUM;
size_t l;
for (l = 0; l < loops; ++l) {
for (i = 0; i < sizeof(NUM_BITS); ++i) {
size_t num_bits = NUM_BITS[i];
uint64_t bits;
if (num_bits == 1) {
bits = bitreader_read_bit_unchecked(&b);
} else if (num_bits % 8 == 0) {
bits = bitreader_read_bytes_aligned_unchecked(&b, num_bits / 8);
} else {
bits = bitreader_read_bits_unchecked(&b, num_bits);
}
if (verbose) {
printf("Read %zu bits and got %"PRIu64"\n", num_bits, bits);
}
}
}
free(fbytes);
return 0;
}
Compile with:
gcc -o bitreader bitreader.c -O3
Run with:
./bitreader /path/to/a/big/file
min
functions: coranac.com/documents/bittrick – glampert Apr 16 '15 at 18:43NUM_BITS
array? Because if your program only does exactly what the sample program does, it might be possible to do it completely differently. – JS1 Apr 16 '15 at 20:23NUM_BITS
array is just an example of typical calls made to the bitreader. The actual usage is more likeuint16_t table_id = bitreader_read_bits(b, 16); bool indicator = bitreader_read_bit(b);
, etc. I never seek in the file though. There is one case where I rewind the bitreader, but I could do it differently (I read forward to find the end of NULL-terminated strings and then rewind). – Brendan Long Apr 16 '15 at 21:20