A simple example: CRC32 calculation

Introduction

In this document we will use an example of CRC32 calculation to illustrate various optimization techniques that can be used in the LXP32 assembly language.

CRC32 is a popular checksum algorithm used to detect data corruption. Multiple variants of the algorithm exist which have similar mathematical properties. The most common variant of the CRC32 checksum, sometimes called CRC-32b, is based on the following generator polynomial:

g(x) = x32 + x26 + x23 + x22 + x16 + x12 + x11 + x10 + x8 + x7 + x5 + x4 + x2 + x + 1.

This version of CRC32 can be calculated as follows:

#include <stddef.h>
#include <stdint.h>

uint32_t crc32(const char *s,size_t n) {
	uint32_t crc=0xFFFFFFFF;
	
	for(size_t i=0;i<n;i++) {
		char ch=s[i];
		for(size_t j=0;j<8;j++) {
			uint32_t b=(ch^crc)&1;
			crc>>=1;
			if(b) crc=crc^0xEDB88320;
			ch>>=1;
		}
	}
	
	return ~crc;
}

This code processes one bit at a time. 0xEDB88320 is the so-called reversed representation of the CRC32 generator polynomial intended to work with lsb-first implementations like this one. The same polynomial can be also represented as 0x04C11DB7 for msb-first implementations (this is called a normal representation).

When invoked with the "123456789" string as an argument (without the terminating null character), this function will return 0xCBF43926.

Bitwise implementation

An implementation of this algorithm in the LXP32 assembly language that is more or less equivalent to the above code is presented below:

#export Crc32_proc

/* 
 * Crc32_proc
 *
 * Calculates the most common variant of the CRC32 checksum
 *
 * Example: CRC32("123456789") = 0xCBF43926.
 *
 * Inputs
 *     r1: crc (initial CRC value, usually 0, see below)
 *     r2: p (string pointer)
 *     r3: n (string length in bytes)
 * Outputs
 *     r1: CRC value
 *
 * Note: the "crc" argument should be initially set to 0. To continue
 * the calculation, set it to the value returned by the previous call.
 * This can be useful to calculate the checksum for a long data stream.
 */

#define crc r1
#define p r2
#define n r3
#define ch r4
#define j r5
#define b r6
#define polynomial r7
#define string_end_ptr r8
#define dont_xor_ptr r9
#define bit_loop_ptr r10
#define byte_loop_ptr r11

Crc32_proc:
	cjmpe rp, n, 0 // return immediately if the string is empty
	lc polynomial, 0xEDB88320
	add string_end_ptr, p, n
	lcs dont_xor_ptr, Crc32_dont_xor
	lcs bit_loop_ptr, Crc32_bit_loop
	lcs byte_loop_ptr, Crc32_byte_loop
	not crc, crc
	
Crc32_byte_loop:
	lub ch, p
	mov j, 0

Crc32_bit_loop:
	xor b, ch, crc
	and b, b, 1
	sru crc, crc, 1
	cjmpe dont_xor_ptr, b, 0
	xor crc, crc, polynomial
	
Crc32_dont_xor:
	sru ch, ch, 1
	add j, j, 1
	cjmpul bit_loop_ptr, j, 8 // end of the bit loop
	add p, p, 1
	cjmpul byte_loop_ptr, p, string_end_ptr // end of the byte loop
	
	not crc, crc
	ret

Notes:

Bytewise implementation

It is also possible to calculate CRC32 in a bytewise manner, eliminating the bit loop altogether. This approach is about six times faster than the previous one but needs additional memory for a pre-calculated 256-word lookup table. The idea is described in Todd K. Moon, Error Correction Coding. Mathematical Methods and Algorithms, Wiley, 2005, ISBN 0-471-64800-0.

Bytewise implementation of the CRC32 algorithm can be illustrated by the following code:

#include <stddef.h>
#include <stdint.h>

uint32_t crc32_table[256];

void build_crc32_table(void) {
	for(uint32_t i=0;i<256;i++) {
		uint32_t ch=i;
		uint32_t crc=0;
		for(size_t j=0;j<8;j++) {
			uint32_t b=(ch^crc)&1;
			crc>>=1;
			if(b) crc=crc^0xEDB88320;
			ch>>=1;
		}
		crc32_table[i]=crc;
	}
}

uint32_t crc32_fast(const char *s,size_t n) {
	uint32_t crc=0xFFFFFFFF;
	
	for(size_t i=0;i<n;i++) {
		char ch=s[i];
		uint32_t t=(ch^crc)&0xFF;
		crc=(crc>>8)^crc32_table[t];
	}
	
	return ~crc;
}

build_crc32_table() must be invoked before the first call of the crc32_fast() function.

An implementation of this algorithm in the LXP32 assembly language is presented below. It incorporates the entire lookup table to avoid computing it at runtime.

#export Crc32_fast_proc

/* 
 * Crc32_fast_proc
 *
 * Calculates the most common variant of the CRC32 checksum
 * using a fast bytewise algorithm.
 *
 * Example: CRC32("123456789") = 0xCBF43926.
 *
 * Inputs
 *     r1: crc (initial CRC value, usually 0, see below)
 *     r2: p (string pointer)
 *     r3: n (string length in bytes)
 * Outputs
 *     r1: CRC value
 *
 * Note: the "crc" argument should be initially set to 0. To continue
 * the calculation, set it to the value returned by the previous call.
 * This can be useful to calculate the checksum for a long data stream.
 */

#define crc r1
#define p r2
#define n r3
#define x r4
#define string_end_ptr r5
#define constant_ff r6
#define table_ptr r7
#define loop_ptr r8

Crc32_fast_proc:
	cjmpe rp, n, 0 // return immediately if the string is empty
	add string_end_ptr, p, n
	lcs constant_ff, 0xFF
	lcs table_ptr, Crc32_fast_table
	lcs loop_ptr, Crc32_fast_loop
	not crc, crc
	
Crc32_fast_loop:
	lub x, p // x := ch
	xor x, x, crc
	and x, x, constant_ff // x := t
	sl x, x, 2
	add x, table_ptr, x // x := &crc32_table[t]
	lw x, x // x := crc32_table[t]
	sru crc, crc, 8
	xor crc, crc, x
	add p, p, 1
	cjmpul loop_ptr, p, string_end_ptr
	
	not crc, crc
	ret

Crc32_fast_table:
	.word 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA
	.word 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3
	.word 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988
	.word 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91
	.word 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE
	.word 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7
	.word 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC
	.word 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5
	.word 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172
	.word 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B
	.word 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940
	.word 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59
	.word 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116
	.word 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F
	.word 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924
	.word 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D
	.word 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A
	.word 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433
	.word 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818
	.word 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01
	.word 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E
	.word 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457
	.word 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C
	.word 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65
	.word 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2
	.word 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB
	.word 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0
	.word 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9
	.word 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086
	.word 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F
	.word 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4
	.word 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD
	.word 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A
	.word 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683
	.word 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8
	.word 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1
	.word 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE
	.word 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7
	.word 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC
	.word 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5
	.word 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252
	.word 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B
	.word 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60
	.word 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79
	.word 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236
	.word 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F
	.word 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04
	.word 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D
	.word 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A
	.word 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713
	.word 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38
	.word 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21
	.word 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E
	.word 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777
	.word 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C
	.word 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45
	.word 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2
	.word 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB
	.word 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0
	.word 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9
	.word 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6
	.word 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF
	.word 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94
	.word 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D