DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

SSE Line Count

10.23.2008
| 1094 views |
  • submit to reddit
        
/*
 * An implementation of 'wc -l' (count newlines in a file) that uses SSE.
 */
#include <unistd.h>
#include <stdio.h>
#include <fcntl.h>

#define TABLE_SIZE (1 << 16)

/* Simple function to count newlines by iterating over the bytes one by
 * one.  This is used to count the residual bytes after the fast function
 * (which can only operate on blocks of sixteen bytes) finishes. */
int count_newlines_slow(char *start, char *end)
{
  int num_lines = 0;
  while(start < end)
  {
    if(*start == '\n')
      num_lines++;
    start++;
  }
  return num_lines;
}

/* Tables for count_newlines_fast. */
unsigned int bits_table[TABLE_SIZE];
void initialize_bits_table()
{
  for(int i = 0; i < TABLE_SIZE; i++) {
    int num = i, count = 0;
    while(num) {
      if(num & 0x1)
        count++;
      num >>= 1;
    }
    bits_table[i] = count;
  }
}

char newlines[] = {0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A};

/* Count newlines in blocks of sixteen bytes using SSE. */
static int count_newlines_fast(char *start, char *end)
{
  int count = (end-start) / 16;
  char *fast_end = start + (count*16);
  int num_lines = 0, tmp = 0, tmp2 = 0;
  __asm__ __volatile__
     ("movddup newlines, %%xmm0\n"     /* fill xmm0 with all newlines */
      "loop:\n\t"
      "movdqa    %%xmm0, %%xmm1\n\t"   /* fill xmm1 with all newlines */
      "pcmpeqb   (%2), %%xmm1\n\t"     /* do 16 byte-wise compares with newlines */
      "pmovmskb  %%xmm1, %4\n\t"       /* collapse the results of those comparisons
                                          into the low 16 bits of eax */
      "addl      bits_table(,%4,4), %0\n\t"  /* add the number of bits set to the count */
      "addl      $16, %2\n\t"          /* advance the pointer by 16 bytes */
      "decl      %3\n\t"
      "jnz       loop"
      : "=c" (num_lines)
      : "c" (num_lines), "d" (start), "S" (count), "a" (tmp), "b" (tmp2)
      : "cc"
  );
  return num_lines + count_newlines_slow(fast_end, end);
}

int main(int argc, char *argv[])
{
  int num_lines = 0;
  char buf[16 * 1024] __attribute__((aligned(128)));
  initialize_bits_table();
  int bytes_read;

  while((bytes_read = read(STDIN_FILENO, buf, sizeof(buf))) > 0)
    num_lines += count_newlines_fast(buf, buf+bytes_read);

  printf("%d\n", num_lines);
}