DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world
SSE Line Count
/*
* An implementation of 'wc -l' (count newlines in a file) that uses SSE.
*/
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>
#define TABLE_SIZE (1 << 16)
/* Simple function to count newlines by iterating over the bytes one by
* one. This is used to count the residual bytes after the fast function
* (which can only operate on blocks of sixteen bytes) finishes. */
int count_newlines_slow(char *start, char *end)
{
int num_lines = 0;
while(start < end)
{
if(*start == '\n')
num_lines++;
start++;
}
return num_lines;
}
/* Tables for count_newlines_fast. */
unsigned int bits_table[TABLE_SIZE];
void initialize_bits_table()
{
for(int i = 0; i < TABLE_SIZE; i++) {
int num = i, count = 0;
while(num) {
if(num & 0x1)
count++;
num >>= 1;
}
bits_table[i] = count;
}
}
char newlines[] = {0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A};
/* Count newlines in blocks of sixteen bytes using SSE. */
static int count_newlines_fast(char *start, char *end)
{
int count = (end-start) / 16;
char *fast_end = start + (count*16);
int num_lines = 0, tmp = 0, tmp2 = 0;
__asm__ __volatile__
("movddup newlines, %%xmm0\n" /* fill xmm0 with all newlines */
"loop:\n\t"
"movdqa %%xmm0, %%xmm1\n\t" /* fill xmm1 with all newlines */
"pcmpeqb (%2), %%xmm1\n\t" /* do 16 byte-wise compares with newlines */
"pmovmskb %%xmm1, %4\n\t" /* collapse the results of those comparisons
into the low 16 bits of eax */
"addl bits_table(,%4,4), %0\n\t" /* add the number of bits set to the count */
"addl $16, %2\n\t" /* advance the pointer by 16 bytes */
"decl %3\n\t"
"jnz loop"
: "=c" (num_lines)
: "c" (num_lines), "d" (start), "S" (count), "a" (tmp), "b" (tmp2)
: "cc"
);
return num_lines + count_newlines_slow(fast_end, end);
}
int main(int argc, char *argv[])
{
int num_lines = 0;
char buf[16 * 1024] __attribute__((aligned(128)));
initialize_bits_table();
int bytes_read;
while((bytes_read = read(STDIN_FILENO, buf, sizeof(buf))) > 0)
num_lines += count_newlines_fast(buf, buf+bytes_read);
printf("%d\n", num_lines);
}




