#include <stdio.h> #include <string.h> #include <stdbool.h> #define N 100000 char buf1[1024], buf2[1024], buf3[1024], buf4[65536]; static __attribute__((noinline)) bool memeqzero1(const void *data, size_t length) { const unsigned char *p = data; while (length) { if (*p) return false; p++; length--; } return true; } static __attribute__((noinline)) bool memeqzero2(const void *data, size_t length) { const unsigned char *p = data; static unsigned long zeroes[16]; while (length > sizeof(zeroes)) { if (memcmp(zeroes, p, sizeof(zeroes))) return false; p += sizeof(zeroes); length -= sizeof(zeroes); } return memcmp(zeroes, p, length) == 0; } static __attribute__((noinline)) bool memeqzero3_rusty(const void *data, size_t length) { const unsigned char *p = data; const unsigned long zero = 0; size_t pre; pre = (size_t)p % sizeof(unsigned long); if (pre) { size_t n = sizeof(unsigned long) - pre; if (n > length) n = length; if (memcmp(p, &zero, n) != 0) return false; p += n; length -= n; } while (length > sizeof(zero)) { if (*(unsigned long *)p != zero) return false; p += sizeof(zero); length -= sizeof(zero); } return memcmp(&zero, p, length) == 0; } static __attribute__((noinline)) bool memeqzero3_paolo(const void *data, size_t length) { const unsigned char *p = data; unsigned long word; while (length & (sizeof(word) - 1)) { if (*p) return false; p++; length--; } while (length) { memcpy(&word, p, sizeof(word)); if (word) return false; p += sizeof(word); length -= sizeof(word); } } static __attribute__((noinline)) bool memeqzero4_rusty(const void *data, size_t length) { const unsigned char *p = data; size_t len; /* Check first 16 bytes manually */ for (len = 0; len < 16; len++) { if (!length) return true; if (*p) return false; p++; length--; } /* Now we know that's zero, memcmp with self. */ return memcmp(data, p, length) == 0; } static __attribute__((noinline)) bool memeqzero4_paolo(const void *data, size_t length) { const unsigned char *p = data; unsigned long word; while (__builtin_expect(length & (sizeof(word) - 1), 0)) { if (*p) return false; p++; length--; } while (__builtin_expect(length & (16 - sizeof(word)), 0)) { memcpy(&word, p, sizeof(word)); if (word) return false; p += sizeof(word); length -= sizeof(word); } /* Now we know that's zero, memcmp with self. */ return length == 0 || memcmp(data, p, length) == 0; } static inline unsigned long rdtsc() { unsigned long cycles; asm volatile("rdtsc; shlq $32, %%rdx; movl %%eax, %%eax; orq %%rdx, %%rax " : "=A"(cycles)); return cycles; } static int bench(char *buf, int size, bool(*memeqzero)(const void *, size_t)) { int i = N; int count; unsigned long start = rdtsc(); while(i--) asm volatile("" : : "r" (memeqzero(buf, size))); unsigned long end = rdtsc(); i = count = 3000000000.0 * N / (end - start); start = rdtsc(); while(i--) asm volatile("" : : "r" (memeqzero(buf, size))); end = rdtsc(); return (end - start) / count; } static __attribute__((__flatten__)) int run(bool(*memeqzero)(const void *, size_t)) { printf ("%d\t%d\t%d\t%d\n", bench(buf1, 1, memeqzero), bench(buf2, 8, memeqzero), bench(buf3, 512, memeqzero), bench(buf4, 65536, memeqzero)); } int main() { run(memeqzero1); run(memeqzero2); run(memeqzero3_rusty); run(memeqzero3_paolo); run(memeqzero4_rusty); run(memeqzero4_paolo); }