////////////////////////////////////////////////////////////////// // 3DES encryption example. // // Copyright(c) 2003 Impulse Accelerated Technologies, Inc. // // This implementation is based on public domain C source code by // P. Karn and is similar to the algorithm described in Part V of // Applied Cryptography by Bruce Schneier. // #ifdef WIN32 #include <windows.h> #else #include "xparameters.h" #define TIMED_TEST 1 #ifdef TIMED_TEST #include "xtmrctr.h" #endif #endif #include <stdio.h> #include "co.h" #include "des.h" /* 3DES constants, don't change these */ #define BLOCKSIZE 8 /* unsigned chars per block */ #define KS_DEPTH 48 /* key pairs */ #ifdef IMPULSE_C_TARGET #define printf xil_printf #ifdef TIMED_TEST XTmrCtr TimerCounter; #endif #endif extern co_architecture co_initialize(void *); /* Block data for C process */ static unsigned char Blocks[]={ 0x6f,0x98,0x26,0x35,0x02,0xc9,0x83,0xd7}; static unsigned long Iterations=1000; static int Asmversion = 0; #include "sp.c" // Combined SP lookup table, linked in // For best results, ensure that this is aligned on a 32-bit boundary; // Borland C++ 3.1 doesn't guarantee this! // #define SPBOX_X 8 #define SPBOX_Y 64 extern unsigned long Spbox[SPBOX_X][SPBOX_Y]; /* Combined S and P boxes */ /* Keyschedule */ DES3_KS Ks; /* Portable C code to create DES key schedules from user-provided keys * This doesn't have to be fast unless you're cracking keys or UNIX * passwords */ /* Key schedule-related tables from FIPS-46 */ /* permuted choice table (key) */ static unsigned char pc1[] = { 57, 49, 41, 33, 25, 17, 9, 1, 58, 50, 42, 34, 26, 18, 10, 2, 59, 51, 43, 35, 27, 19, 11, 3, 60, 52, 44, 36, 63, 55, 47, 39, 31, 23, 15, 7, 62, 54, 46, 38, 30, 22, 14, 6, 61, 53, 45, 37, 29, 21, 13, 5, 28, 20, 12, 4 }; /* number left rotations of pc1 */ static unsigned char totrot[] = { 1,2,4,6,8,10,12,14,15,17,19,21,23,25,27,28 }; /* permuted choice key (table) */ static unsigned char pc2[] = { 14, 17, 11, 24, 1, 5, 3, 28, 15, 6, 21, 10, 23, 19, 12, 4, 26, 8, 16, 7, 27, 20, 13, 2, 41, 52, 31, 37, 47, 55, 30, 40, 51, 45, 33, 48, 44, 49, 39, 56, 34, 53, 46, 42, 50, 36, 29, 32 }; /* End of DES-defined tables */ /* bit 0 is left-most in byte */ static int bytebit[] = { 0200,0100,040,020,010,04,02,01 }; /// Generate key schedule for encryption or decryption // depending on the value of "decrypt" // void deskey(k,key,decrypt) unsigned long k[16][2]; /* Key schedule array */ unsigned char *key; /* 64 bits (will use only 56) */ int decrypt; /* 0 = encrypt, 1 = decrypt */ { unsigned char pc1m[56]; /* place to modify pc1 into */ unsigned char pcr[56]; /* place to rotate pc1 into */ register int i,j,l; int m; unsigned char ks[8]; for (j=0; j<56; j++) { /* convert pc1 to bits of key */ l=pc1[j]-1; /* integer bit location */ m = l & 07; /* find bit */ pc1m[j]=(key[l>>3] & /* find which key byte l is in */ bytebit[m]) /* and which bit of that byte */ ? 1 : 0; /* and store 1-bit result */ } for (i=0; i<16; i++) { /* key chunk for each iteration */ memset(ks,0,sizeof(ks)); /* Clear key schedule */ for (j=0; j<56; j++) /* rotate pc1 the right amount */ pcr[j] = pc1m[(l=j+totrot[decrypt? 15-i : i])<(j<28? 28 : 56) ? l: l-28]; /* rotate left and right halves independently */ for (j=0; j<48; j++){ /* select bits individually */ /* check bit that goes to ks[j] */ if (pcr[pc2[j]-1]){ /* mask it in if it's there */ l= j % 6; ks[j/6] |= bytebit[l] >> 2; } } /* Now convert to packed odd/even interleaved form */ k[i][0] = ((long)ks[0] << 24) | ((long)ks[2] << 16) | ((long)ks[4] << 8) | ((long)ks[6]); k[i][1] = ((long)ks[1] << 24) | ((long)ks[3] << 16) | ((long)ks[5] << 8) | ((long)ks[7]); if(Asmversion){ /* The assembler versions pre-shift each subkey 2 bits * so the Spbox indexes are already computed */ k[i][0] <<= 2; k[i][1] <<= 2; } } } // Generate key schedule for triple DES in E-D-E (or D-E-D) mode. // // The key argument is taken to be 24 bytes. The first 8 bytes are K1 // for the first stage, the second 8 bytes are K2 for the middle stage // and the third 8 bytes are K3 for the last stage // void des3key(k,key,decrypt) unsigned long k[48][2]; unsigned char *key; /* 192 bits (will use only 168) */ int decrypt; /* 0 = encrypt, 1 = decrypt */ { if(!decrypt){ deskey(&k[0],&key[0],0); deskey(&k[16],&key[8],1); deskey(&k[32],&key[16],0); } else { deskey(&k[32],&key[0],1); deskey(&k[16],&key[8],0); deskey(&k[0],&key[16],1); } } void des_test(co_stream config_out, co_stream blocks_out, co_stream input_stream, co_parameter iter_param) { int iterations = (int)iter_param; int i, k; unsigned char block[8]; uint8 blockElement; unsigned long data,err; #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST Xuint32 counter; #endif #endif /* Send the keyschedule data */ HW_STREAM_OPEN(des_test,config_out, O_WRONLY, UINT_TYPE(32)); for ( k = 0; k < 2; k++ ) { for ( i = 0; i < KS_DEPTH; i++ ) { data=Ks[i][k]; HW_STREAM_WRITE(des_test,config_out,data); } } for ( i = 0; i < SPBOX_X; i++ ) { for ( k = 0; k < SPBOX_Y; k++ ) { data=Spbox[i][k]; HW_STREAM_WRITE(des_test,config_out,data); } } HW_STREAM_CLOSE(des_test,config_out); /* Send the same random block data to both processes */ HW_STREAM_OPEN(des_test,blocks_out, O_WRONLY, UINT_TYPE(8)); HW_STREAM_OPEN(des_test,input_stream,O_RDONLY,UINT_TYPE(8)); #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST XTmrCtr_Reset(&TimerCounter,0); #endif #endif for ( i = 0; i < iterations; i++ ) { for ( k = 0; k < BLOCKSIZE; k++ ) { blockElement = Blocks[k]; HW_STREAM_WRITE(des_test,blocks_out,blockElement); } for ( k = 0; k < BLOCKSIZE; k++ ) { HW_STREAM_READ(des_test,input_stream,blockElement,err); block[k]=blockElement; } } #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST counter=XTmrCtr_GetValue(&TimerCounter,0); #endif #endif HW_STREAM_CLOSE(des_test,blocks_out); HW_STREAM_CLOSE(des_test,input_stream); #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST xil_printf("FPGA processing done (%d ticks).\n\r",counter); #else xil_printf("FPGA processing done.\n\r"); #endif #endif printf("FPGA block out:"); for (i=0; i<BLOCKSIZE; i++) { printf(" %02x",block[i]); } printf("\n\r"); } // Primitive function F. // Input is r, subkey array in keys, output is XORed into l. // Each round consumes eight 6-bit subkeys, one for // each of the 8 S-boxes, 2 longs for each round. // Each long contains four 6-bit subkeys, each taking up a byte. // The first long contains, from high to low end, the subkeys for // S-boxes 1, 3, 5 & 7; the second contains the subkeys for S-boxes // 2, 4, 6 & 8 (using the origin-1 S-box numbering in the standard, // not the origin-0 numbering used elsewhere in this code) // See comments elsewhere about the pre-rotated values of r and Spbox. // #define F(l,r,key){ \ work = ((r >> 4) | (r << 28)) ^ key[0];\ l ^= Spbox[6][work & 0x3f];\ l ^= Spbox[4][(work >> 8) & 0x3f];\ l ^= Spbox[2][(work >> 16) & 0x3f];\ l ^= Spbox[0][(work >> 24) & 0x3f];\ work = r ^ key[1];\ l ^= Spbox[7][work & 0x3f];\ l ^= Spbox[5][(work >> 8) & 0x3f];\ l ^= Spbox[3][(work >> 16) & 0x3f];\ l ^= Spbox[1][(work >> 24) & 0x3f];\ } // This is the plain C 3DES process. It reads the keyschedule and block // inputs from global variables initialized in the des_producer process // and does all processing using standard C code. // // void des_c() { int i; unsigned int blockCount = 0; unsigned char block[8]; unsigned long left,right,work; #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST Xuint32 counter; #endif #endif #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST XTmrCtr_Reset(&TimerCounter,0); #endif #endif for (blockCount=0; blockCount<Iterations; blockCount++) { for ( i = 0; i < BLOCKSIZE; i++ ) { block[i] = Blocks[i]; } // Process the block... // Read input block and place in left/right in big-endian order // left = ((unsigned long)block[0] << 24) | ((unsigned long)block[1] << 16) | ((unsigned long)block[2] << 8) | (unsigned long)block[3]; right = ((unsigned long)block[4] << 24) | ((unsigned long)block[5] << 16) | ((unsigned long)block[6] << 8) | (unsigned long)block[7]; // Hoey's clever initial permutation algorithm, from Outerbridge // (see Schneier p 478) // // The convention here is the same as Outerbridge: rotate each // register left by 1 bit, i.e., so that "left" contains permuted // input bits 2, 3, 4, ... 1 and "right" contains 33, 34, 35, ... 32 // (using origin-1 numbering as in the FIPS). This allows us to avoid // one of the two rotates that would otherwise be required in each of // the 16 rounds. // work = ((left >> 4) ^ right) & 0x0f0f0f0f; right ^= work; left ^= work << 4; work = ((left >> 16) ^ right) & 0xffff; right ^= work; left ^= work << 16; work = ((right >> 2) ^ left) & 0x33333333; left ^= work; right ^= (work << 2); work = ((right >> 8) ^ left) & 0xff00ff; left ^= work; right ^= (work << 8); right = (right << 1) | (right >> 31); work = (left ^ right) & 0xaaaaaaaa; left ^= work; right ^= work; left = (left << 1) | (left >> 31); /* First key */ F(left,right,Ks[0]); F(right,left,Ks[1]); F(left,right,Ks[2]); F(right,left,Ks[3]); F(left,right,Ks[4]); F(right,left,Ks[5]); F(left,right,Ks[6]); F(right,left,Ks[7]); F(left,right,Ks[8]); F(right,left,Ks[9]); F(left,right,Ks[10]); F(right,left,Ks[11]); F(left,right,Ks[12]); F(right,left,Ks[13]); F(left,right,Ks[14]); F(right,left,Ks[15]); /* Second key (must be created in opposite mode to first key) */ F(right,left,Ks[16]); F(left,right,Ks[17]); F(right,left,Ks[18]); F(left,right,Ks[19]); F(right,left,Ks[20]); F(left,right,Ks[21]); F(right,left,Ks[22]); F(left,right,Ks[23]); F(right,left,Ks[24]); F(left,right,Ks[25]); F(right,left,Ks[26]); F(left,right,Ks[27]); F(right,left,Ks[28]); F(left,right,Ks[29]); F(right,left,Ks[30]); F(left,right,Ks[31]); /* Third key */ F(left,right,Ks[32]); F(right,left,Ks[33]); F(left,right,Ks[34]); F(right,left,Ks[35]); F(left,right,Ks[36]); F(right,left,Ks[37]); F(left,right,Ks[38]); F(right,left,Ks[39]); F(left,right,Ks[40]); F(right,left,Ks[41]); F(left,right,Ks[42]); F(right,left,Ks[43]); F(left,right,Ks[44]); F(right,left,Ks[45]); F(left,right,Ks[46]); F(right,left,Ks[47]); /* Inverse permutation, also from Hoey via Outerbridge and Schneier */ right = (right << 31) | (right >> 1); work = (left ^ right) & 0xaaaaaaaa; left ^= work; right ^= work; left = (left >> 1) | (left << 31); work = ((left >> 8) ^ right) & 0xff00ff; right ^= work; left ^= work << 8; work = ((left >> 2) ^ right) & 0x33333333; right ^= work; left ^= work << 2; work = ((right >> 16) ^ left) & 0xffff; left ^= work; right ^= work << 16; work = ((right >> 4) ^ left) & 0x0f0f0f0f; left ^= work; right ^= work << 4; /* Put the block into the output stream with final swap */ block[0] = (int8) (right >> 24); block[1] = (int8) (right >> 16); block[2] = (int8) (right >> 8); block[3] = (int8) right; block[4] = (int8) (left >> 24); block[5] = (int8) (left >> 16); block[6] = (int8) (left >> 8); block[7] = (int8) left; } #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST counter=XTmrCtr_GetValue(&TimerCounter,0); xil_printf("CPU processing done (%d ticks).\n\r",counter); #else xil_printf("CPU processing done.\n\r"); #endif #endif printf("CPU block out:"); for (i=0; i<BLOCKSIZE; i++) { printf(" %02x",block[i]); } printf("\n\r"); } int main(int argc, char *argv[]) { unsigned char * key = (unsigned char *) "Gflk jqo40978J0dmm$%@878"; /* 24 bytes */ co_architecture my_arch; IF_SIM(int c;) #ifdef IMPULSE_C_TARGET #ifdef TIMED_TEST XTmrCtr_Initialize(&TimerCounter, XPAR_OPB_TIMER_0_DEVICE_ID); XTmrCtr_SetResetValue(&TimerCounter,0,0); XTmrCtr_Start(&TimerCounter,0); #endif #endif printf("Impulse C 3DES DEMO\n\r"); des3key(Ks, key, 0); /* Create a keyschedule for encryption */ printf("Running encryption test on FPGA ...\n\r"); my_arch = co_initialize((void *)Iterations); co_execute(my_arch); printf("Running encryption test on CPU ...\n\r"); des_c(); IF_SIM(printf("Press Enter key to continue...\n");) IF_SIM(c=getc(stdin);) return(0); } |