mirror of https://github.com/apache/cloudstack.git
4446 lines
104 KiB
C
4446 lines
104 KiB
C
|
|
/*
|
|
** nbench1.c
|
|
*/
|
|
|
|
/********************************
|
|
** BYTEmark (tm) **
|
|
** BYTE NATIVE MODE BENCHMARKS **
|
|
** VERSION 2 **
|
|
** **
|
|
** Included in this source **
|
|
** file: **
|
|
** Numeric Heapsort **
|
|
** String Heapsort **
|
|
** Bitfield test **
|
|
** Floating point emulation **
|
|
** Fourier coefficients **
|
|
** Assignment algorithm **
|
|
** IDEA Encyption **
|
|
** Huffman compression **
|
|
** Back prop. neural net **
|
|
** LU Decomposition **
|
|
** (linear equations) **
|
|
** ---------- **
|
|
** Rick Grehan, BYTE Magazine **
|
|
*********************************
|
|
**
|
|
** BYTEmark (tm)
|
|
** BYTE's Native Mode Benchmarks
|
|
** Rick Grehan, BYTE Magazine
|
|
**
|
|
** Creation:
|
|
** Revision: 3/95;10/95
|
|
** 10/95 - Removed allocation that was taking place inside
|
|
** the LU Decomposition benchmark. Though it didn't seem to
|
|
** make a difference on systems we ran it on, it nonetheless
|
|
** removes an operating system dependency that probably should
|
|
** not have been there.
|
|
**
|
|
** DISCLAIMER
|
|
** The source, executable, and documentation files that comprise
|
|
** the BYTEmark benchmarks are made available on an "as is" basis.
|
|
** This means that we at BYTE Magazine have made every reasonable
|
|
** effort to verify that the there are no errors in the source and
|
|
** executable code. We cannot, however, guarantee that the programs
|
|
** are error-free. Consequently, McGraw-HIll and BYTE Magazine make
|
|
** no claims in regard to the fitness of the source code, executable
|
|
** code, and documentation of the BYTEmark.
|
|
** Furthermore, BYTE Magazine, McGraw-Hill, and all employees
|
|
** of McGraw-Hill cannot be held responsible for any damages resulting
|
|
** from the use of this code or the results obtained from using
|
|
** this code.
|
|
*/
|
|
|
|
/*
|
|
** INCLUDES
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
#include <math.h>
|
|
#include "nmglobal.h"
|
|
#include "nbench1.h"
|
|
#include "wordcat.h"
|
|
|
|
#ifdef DEBUG
|
|
static int numsort_status=0;
|
|
static int stringsort_status=0;
|
|
#endif
|
|
|
|
/*********************
|
|
** NUMERIC HEAPSORT **
|
|
**********************
|
|
** This test implements a heapsort algorithm, performed on an
|
|
** array of longs.
|
|
*/
|
|
|
|
/**************
|
|
** DoNumSort **
|
|
***************
|
|
** This routine performs the CPU numeric sort test.
|
|
** NOTE: Last version incorrectly stated that the routine
|
|
** returned result in # of longword sorted per second.
|
|
** Not so; the routine returns # of iterations per sec.
|
|
*/
|
|
|
|
void DoNumSort(void)
|
|
{
|
|
SortStruct *numsortstruct; /* Local pointer to global struct */
|
|
farlong *arraybase; /* Base pointers of array */
|
|
long accumtime; /* Accumulated time */
|
|
double iterations; /* Iteration counter */
|
|
char *errorcontext; /* Error context string pointer */
|
|
int systemerror; /* For holding error codes */
|
|
|
|
/*
|
|
** Link to global structure
|
|
*/
|
|
numsortstruct=&global_numsortstruct;
|
|
|
|
/*
|
|
** Set the error context string.
|
|
*/
|
|
errorcontext="CPU:Numeric Sort";
|
|
|
|
/*
|
|
** See if we need to do self adjustment code.
|
|
*/
|
|
if(numsortstruct->adjust==0)
|
|
{
|
|
/*
|
|
** Self-adjustment code. The system begins by sorting 1
|
|
** array. If it does that in no time, then two arrays
|
|
** are built and sorted. This process continues until
|
|
** enough arrays are built to handle the tolerance.
|
|
*/
|
|
numsortstruct->numarrays=1;
|
|
while(1)
|
|
{
|
|
/*
|
|
** Allocate space for arrays
|
|
*/
|
|
arraybase=(farlong *)AllocateMemory(sizeof(long) *
|
|
numsortstruct->numarrays * numsortstruct->arraysize,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)arraybase,
|
|
&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Do an iteration of the numeric sort. If the
|
|
** elapsed time is less than or equal to the permitted
|
|
** minimum, then allocate for more arrays and
|
|
** try again.
|
|
*/
|
|
if(DoNumSortIteration(arraybase,
|
|
numsortstruct->arraysize,
|
|
numsortstruct->numarrays)>global_min_ticks)
|
|
break; /* We're ok...exit */
|
|
|
|
FreeMemory((farvoid *)arraybase,&systemerror);
|
|
if(numsortstruct->numarrays++>NUMNUMARRAYS)
|
|
{ printf("CPU:NSORT -- NUMNUMARRAYS hit.\n");
|
|
ErrorExit();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{ /*
|
|
** Allocate space for arrays
|
|
*/
|
|
arraybase=(farlong *)AllocateMemory(sizeof(long) *
|
|
numsortstruct->numarrays * numsortstruct->arraysize,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)arraybase,
|
|
&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
}
|
|
/*
|
|
** All's well if we get here. Repeatedly perform sorts until the
|
|
** accumulated elapsed time is greater than # of seconds requested.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoNumSortIteration(arraybase,
|
|
numsortstruct->arraysize,
|
|
numsortstruct->numarrays);
|
|
iterations+=(double)1.0;
|
|
} while(TicksToSecs(accumtime)<numsortstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
FreeMemory((farvoid *)arraybase,&systemerror);
|
|
|
|
numsortstruct->sortspersec=iterations *
|
|
(double)numsortstruct->numarrays / TicksToFracSecs(accumtime);
|
|
|
|
if(numsortstruct->adjust==0)
|
|
numsortstruct->adjust=1;
|
|
|
|
#ifdef DEBUG
|
|
if (numsort_status==0) printf("Numeric sort: OK\n");
|
|
numsort_status=0;
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
/***********************
|
|
** DoNumSortIteration **
|
|
************************
|
|
** This routine executes one iteration of the numeric
|
|
** sort benchmark. It returns the number of ticks
|
|
** elapsed for the iteration.
|
|
*/
|
|
static ulong DoNumSortIteration(farlong *arraybase,
|
|
ulong arraysize,
|
|
uint numarrays)
|
|
{
|
|
ulong elapsed; /* Elapsed ticks */
|
|
ulong i;
|
|
/*
|
|
** Load up the array with random numbers
|
|
*/
|
|
LoadNumArrayWithRand(arraybase,arraysize,numarrays);
|
|
|
|
/*
|
|
** Start the stopwatch
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Execute a heap of heapsorts
|
|
*/
|
|
for(i=0;i<numarrays;i++)
|
|
NumHeapSort(arraybase+i*arraysize,0L,arraysize-1L);
|
|
|
|
/*
|
|
** Get elapsed time
|
|
*/
|
|
elapsed=StopStopwatch(elapsed);
|
|
#ifdef DEBUG
|
|
{
|
|
for(i=0;i<arraysize-1;i++)
|
|
{ /*
|
|
** Compare to check for proper
|
|
** sort.
|
|
*/
|
|
if(arraybase[i+1]<arraybase[i])
|
|
{ printf("Sort Error\n");
|
|
numsort_status=1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return(elapsed);
|
|
}
|
|
|
|
/*************************
|
|
** LoadNumArrayWithRand **
|
|
**************************
|
|
** Load up an array with random longs.
|
|
*/
|
|
static void LoadNumArrayWithRand(farlong *array, /* Pointer to arrays */
|
|
ulong arraysize,
|
|
uint numarrays) /* # of elements in array */
|
|
{
|
|
long i; /* Used for index */
|
|
farlong *darray; /* Destination array pointer */
|
|
/*
|
|
** Initialize the random number generator
|
|
*/
|
|
/* randnum(13L); */
|
|
randnum((int32)13);
|
|
|
|
/*
|
|
** Load up first array with randoms
|
|
*/
|
|
for(i=0L;i<arraysize;i++)
|
|
/* array[i]=randnum(0L); */
|
|
array[i]=randnum((int32)0);
|
|
|
|
/*
|
|
** Now, if there's more than one array to load, copy the
|
|
** first into each of the others.
|
|
*/
|
|
darray=array;
|
|
while(--numarrays)
|
|
{ darray+=arraysize;
|
|
for(i=0L;i<arraysize;i++)
|
|
darray[i]=array[i];
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/****************
|
|
** NumHeapSort **
|
|
*****************
|
|
** Pass this routine a pointer to an array of long
|
|
** integers. Also pass in minimum and maximum offsets.
|
|
** This routine performs a heap sort on that array.
|
|
*/
|
|
static void NumHeapSort(farlong *array,
|
|
ulong bottom, /* Lower bound */
|
|
ulong top) /* Upper bound */
|
|
{
|
|
ulong temp; /* Used to exchange elements */
|
|
ulong i; /* Loop index */
|
|
|
|
/*
|
|
** First, build a heap in the array
|
|
*/
|
|
for(i=(top/2L); i>0; --i)
|
|
NumSift(array,i,top);
|
|
|
|
/*
|
|
** Repeatedly extract maximum from heap and place it at the
|
|
** end of the array. When we get done, we'll have a sorted
|
|
** array.
|
|
*/
|
|
for(i=top; i>0; --i)
|
|
{ NumSift(array,bottom,i);
|
|
temp=*array; /* Perform exchange */
|
|
*array=*(array+i);
|
|
*(array+i)=temp;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/************
|
|
** NumSift **
|
|
*************
|
|
** Peforms the sift operation on a numeric array,
|
|
** constructing a heap in the array.
|
|
*/
|
|
static void NumSift(farlong *array, /* Array of numbers */
|
|
ulong i, /* Minimum of array */
|
|
ulong j) /* Maximum of array */
|
|
{
|
|
unsigned long k;
|
|
long temp; /* Used for exchange */
|
|
|
|
while((i+i)<=j)
|
|
{
|
|
k=i+i;
|
|
if(k<j)
|
|
if(array[k]<array[k+1L])
|
|
++k;
|
|
if(array[i]<array[k])
|
|
{
|
|
temp=array[k];
|
|
array[k]=array[i];
|
|
array[i]=temp;
|
|
i=k;
|
|
}
|
|
else
|
|
i=j+1;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/********************
|
|
** STRING HEAPSORT **
|
|
********************/
|
|
|
|
/*****************
|
|
** DoStringSort **
|
|
******************
|
|
** This routine performs the CPU string sort test.
|
|
** Arguments:
|
|
** requested_secs = # of seconds to execute test
|
|
** stringspersec = # of strings per second sorted (RETURNED)
|
|
*/
|
|
void DoStringSort(void)
|
|
{
|
|
|
|
SortStruct *strsortstruct; /* Local for sort structure */
|
|
faruchar *arraybase; /* Base pointer of char array */
|
|
long accumtime; /* Accumulated time */
|
|
double iterations; /* # of iterations */
|
|
char *errorcontext; /* Error context string pointer */
|
|
int systemerror; /* For holding error code */
|
|
|
|
/*
|
|
** Link to global structure
|
|
*/
|
|
strsortstruct=&global_strsortstruct;
|
|
|
|
/*
|
|
** Set the error context
|
|
*/
|
|
errorcontext="CPU:String Sort";
|
|
|
|
/*
|
|
** See if we have to perform self-adjustment code
|
|
*/
|
|
if(strsortstruct->adjust==0)
|
|
{
|
|
/*
|
|
** Initialize the number of arrays.
|
|
*/
|
|
strsortstruct->numarrays=1;
|
|
while(1)
|
|
{
|
|
/*
|
|
** Allocate space for array. We'll add an extra 100
|
|
** bytes to protect memory as strings move around
|
|
** (this can happen during string adjustment)
|
|
*/
|
|
arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
|
|
(long)strsortstruct->numarrays,&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Do an iteration of the string sort. If the
|
|
** elapsed time is less than or equal to the permitted
|
|
** minimum, then de-allocate the array, reallocate a
|
|
** an additional array, and try again.
|
|
*/
|
|
if(DoStringSortIteration(arraybase,
|
|
strsortstruct->numarrays,
|
|
strsortstruct->arraysize)>global_min_ticks)
|
|
break; /* We're ok...exit */
|
|
|
|
FreeMemory((farvoid *)arraybase,&systemerror);
|
|
strsortstruct->numarrays+=1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
** We don't have to perform self adjustment code.
|
|
** Simply allocate the space for the array.
|
|
*/
|
|
arraybase=(faruchar *)AllocateMemory((strsortstruct->arraysize+100L) *
|
|
(long)strsortstruct->numarrays,&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
/*
|
|
** All's well if we get here. Repeatedly perform sorts until the
|
|
** accumulated elapsed time is greater than # of seconds requested.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoStringSortIteration(arraybase,
|
|
strsortstruct->numarrays,
|
|
strsortstruct->arraysize);
|
|
iterations+=(double)strsortstruct->numarrays;
|
|
} while(TicksToSecs(accumtime)<strsortstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home.
|
|
** Set flag to show we don't need to rerun adjustment code.
|
|
*/
|
|
FreeMemory((farvoid *)arraybase,&systemerror);
|
|
strsortstruct->sortspersec=iterations / (double)TicksToFracSecs(accumtime);
|
|
if(strsortstruct->adjust==0)
|
|
strsortstruct->adjust=1;
|
|
#ifdef DEBUG
|
|
if (stringsort_status==0) printf("String sort: OK\n");
|
|
stringsort_status=0;
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
/**************************
|
|
** DoStringSortIteration **
|
|
***************************
|
|
** This routine executes one iteration of the string
|
|
** sort benchmark. It returns the number of ticks
|
|
** Note that this routine also builds the offset pointer
|
|
** array.
|
|
*/
|
|
static ulong DoStringSortIteration(faruchar *arraybase,
|
|
uint numarrays,ulong arraysize)
|
|
{
|
|
farulong *optrarray; /* Offset pointer array */
|
|
unsigned long elapsed; /* Elapsed ticks */
|
|
unsigned long nstrings; /* # of strings in array */
|
|
int syserror; /* System error code */
|
|
unsigned int i; /* Index */
|
|
farulong *tempobase; /* Temporary offset pointer base */
|
|
faruchar *tempsbase; /* Temporary string base pointer */
|
|
|
|
/*
|
|
** Load up the array(s) with random numbers
|
|
*/
|
|
optrarray=LoadStringArray(arraybase,numarrays,&nstrings,arraysize);
|
|
|
|
/*
|
|
** Set temp base pointers...they will be modified as the
|
|
** benchmark proceeds.
|
|
*/
|
|
tempobase=optrarray;
|
|
tempsbase=arraybase;
|
|
|
|
/*
|
|
** Start the stopwatch
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Execute heapsorts
|
|
*/
|
|
for(i=0;i<numarrays;i++)
|
|
{ StrHeapSort(tempobase,tempsbase,nstrings,0L,nstrings-1);
|
|
tempobase+=nstrings; /* Advance base pointers */
|
|
tempsbase+=arraysize+100;
|
|
}
|
|
|
|
/*
|
|
** Record elapsed time
|
|
*/
|
|
elapsed=StopStopwatch(elapsed);
|
|
|
|
#ifdef DEBUG
|
|
{
|
|
unsigned long i;
|
|
for(i=0;i<nstrings-1;i++)
|
|
{ /*
|
|
** Compare strings to check for proper
|
|
** sort.
|
|
*/
|
|
if(str_is_less(optrarray,arraybase,nstrings,i+1,i))
|
|
{ printf("Sort Error\n");
|
|
stringsort_status=1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
** Release the offset pointer array built by
|
|
** LoadStringArray()
|
|
*/
|
|
FreeMemory((farvoid *)optrarray,&syserror);
|
|
|
|
/*
|
|
** Return elapsed ticks.
|
|
*/
|
|
return(elapsed);
|
|
}
|
|
|
|
/********************
|
|
** LoadStringArray **
|
|
*********************
|
|
** Initialize the string array with random strings of
|
|
** varying sizes.
|
|
** Returns the pointer to the offset pointer array.
|
|
** Note that since we're creating a number of arrays, this
|
|
** routine builds one array, then copies it into the others.
|
|
*/
|
|
static farulong *LoadStringArray(faruchar *strarray, /* String array */
|
|
uint numarrays, /* # of arrays */
|
|
ulong *nstrings, /* # of strings */
|
|
ulong arraysize) /* Size of array */
|
|
{
|
|
faruchar *tempsbase; /* Temporary string base pointer */
|
|
farulong *optrarray; /* Local for pointer */
|
|
farulong *tempobase; /* Temporary offset pointer base pointer */
|
|
unsigned long curroffset; /* Current offset */
|
|
int fullflag; /* Indicates full array */
|
|
unsigned char stringlength; /* Length of string */
|
|
unsigned char i; /* Index */
|
|
unsigned long j; /* Another index */
|
|
unsigned int k; /* Yet another index */
|
|
unsigned int l; /* Ans still one more index */
|
|
int systemerror; /* For holding error code */
|
|
|
|
/*
|
|
** Initialize random number generator.
|
|
*/
|
|
/* randnum(13L); */
|
|
randnum((int32)13);
|
|
|
|
/*
|
|
** Start with no strings. Initialize our current offset pointer
|
|
** to 0.
|
|
*/
|
|
*nstrings=0L;
|
|
curroffset=0L;
|
|
fullflag=0;
|
|
|
|
do
|
|
{
|
|
/*
|
|
** Allocate a string with a random length no
|
|
** shorter than 4 bytes and no longer than
|
|
** 80 bytes. Note we have to also make sure
|
|
** there's room in the array.
|
|
*/
|
|
/* stringlength=(unsigned char)((1+abs_randwc(76L)) & 0xFFL);*/
|
|
stringlength=(unsigned char)((1+abs_randwc((int32)76)) & 0xFFL);
|
|
if((unsigned long)stringlength+curroffset+1L>=arraysize)
|
|
{ stringlength=(unsigned char)((arraysize-curroffset-1L) &
|
|
0xFF);
|
|
fullflag=1; /* Indicates a full */
|
|
}
|
|
|
|
/*
|
|
** Store length at curroffset and advance current offset.
|
|
*/
|
|
*(strarray+curroffset)=stringlength;
|
|
curroffset++;
|
|
|
|
/*
|
|
** Fill up the rest of the string with random bytes.
|
|
*/
|
|
for(i=0;i<stringlength;i++)
|
|
{ *(strarray+curroffset)=
|
|
/* (unsigned char)(abs_randwc((long)0xFE)); */
|
|
(unsigned char)(abs_randwc((int32)0xFE));
|
|
curroffset++;
|
|
}
|
|
|
|
/*
|
|
** Increment the # of strings counter.
|
|
*/
|
|
*nstrings+=1L;
|
|
|
|
} while(fullflag==0);
|
|
|
|
/*
|
|
** We now have initialized a single full array. If there
|
|
** is more than one array, copy the original into the
|
|
** others.
|
|
*/
|
|
k=1;
|
|
tempsbase=strarray;
|
|
while(k<numarrays)
|
|
{ tempsbase+=arraysize+100; /* Set base */
|
|
for(l=0;l<arraysize;l++)
|
|
tempsbase[l]=strarray[l];
|
|
k++;
|
|
}
|
|
|
|
/*
|
|
** Now the array is full, allocate enough space for an
|
|
** offset pointer array.
|
|
*/
|
|
optrarray=(farulong *)AllocateMemory(*nstrings * sizeof(unsigned long) *
|
|
numarrays,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError("CPU:Stringsort",systemerror);
|
|
FreeMemory((void *)strarray,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Go through the newly-built string array, building
|
|
** offsets and putting them into the offset pointer
|
|
** array.
|
|
*/
|
|
curroffset=0;
|
|
for(j=0;j<*nstrings;j++)
|
|
{ *(optrarray+j)=curroffset;
|
|
curroffset+=(unsigned long)(*(strarray+curroffset))+1L;
|
|
}
|
|
|
|
/*
|
|
** As above, we've made one copy of the offset pointers,
|
|
** so duplicate this array in the remaining ones.
|
|
*/
|
|
k=1;
|
|
tempobase=optrarray;
|
|
while(k<numarrays)
|
|
{ tempobase+=*nstrings;
|
|
for(l=0;l<*nstrings;l++)
|
|
tempobase[l]=optrarray[l];
|
|
k++;
|
|
}
|
|
|
|
/*
|
|
** All done...go home. Pass local pointer back.
|
|
*/
|
|
return(optrarray);
|
|
}
|
|
|
|
/**************
|
|
** stradjust **
|
|
***************
|
|
** Used by the string heap sort. Call this routine to adjust the
|
|
** string at offset i to length l. The members of the string array
|
|
** are moved accordingly and the length of the string at offset i
|
|
** is set to l.
|
|
*/
|
|
static void stradjust(farulong *optrarray, /* Offset pointer array */
|
|
faruchar *strarray, /* String array */
|
|
ulong nstrings, /* # of strings */
|
|
ulong i, /* Offset to adjust */
|
|
uchar l) /* New length */
|
|
{
|
|
unsigned long nbytes; /* # of bytes to move */
|
|
unsigned long j; /* Index */
|
|
int direction; /* Direction indicator */
|
|
unsigned char adjamount; /* Adjustment amount */
|
|
|
|
/*
|
|
** If new length is less than old length, the direction is
|
|
** down. If new length is greater than old length, the
|
|
** direction is up.
|
|
*/
|
|
direction=(int)l - (int)*(strarray+*(optrarray+i));
|
|
adjamount=(unsigned char)abs(direction);
|
|
|
|
/*
|
|
** See if the adjustment is being made to the last
|
|
** string in the string array. If so, we don't have to
|
|
** do anything more than adjust the length field.
|
|
*/
|
|
if(i==(nstrings-1L))
|
|
{ *(strarray+*(optrarray+i))=l;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
** Calculate the total # of bytes in string array from
|
|
** location i+1 to end of array. Whether we're moving "up" or
|
|
** down, this is how many bytes we'll have to move.
|
|
*/
|
|
nbytes=*(optrarray+nstrings-1L) +
|
|
(unsigned long)*(strarray+*(optrarray+nstrings-1L)) + 1L -
|
|
*(optrarray+i+1L);
|
|
|
|
/*
|
|
** Calculate the source and the destination. Source is
|
|
** string position i+1. Destination is string position i+l
|
|
** (i+"ell"...don't confuse 1 and l).
|
|
** Hand this straight to memmove and let it handle the
|
|
** "overlap" problem.
|
|
*/
|
|
MoveMemory((farvoid *)(strarray+*(optrarray+i)+l+1),
|
|
(farvoid *)(strarray+*(optrarray+i+1)),
|
|
(unsigned long)nbytes);
|
|
|
|
/*
|
|
** We have to adjust the offset pointer array.
|
|
** This covers string i+1 to numstrings-1.
|
|
*/
|
|
for(j=i+1;j<nstrings;j++)
|
|
if(direction<0)
|
|
*(optrarray+j)=*(optrarray+j)-adjamount;
|
|
else
|
|
*(optrarray+j)=*(optrarray+j)+adjamount;
|
|
|
|
/*
|
|
** Store the new length and go home.
|
|
*/
|
|
*(strarray+*(optrarray+i))=l;
|
|
return;
|
|
}
|
|
|
|
/****************
|
|
** strheapsort **
|
|
*****************
|
|
** Pass this routine a pointer to an array of unsigned char.
|
|
** The array is presumed to hold strings occupying at most
|
|
** 80 bytes (counts a byte count).
|
|
** This routine also needs a pointer to an array of offsets
|
|
** which represent string locations in the array, and
|
|
** an unsigned long indicating the number of strings
|
|
** in the array.
|
|
*/
|
|
static void StrHeapSort(farulong *optrarray, /* Offset pointers */
|
|
faruchar *strarray, /* Strings array */
|
|
ulong numstrings, /* # of strings in array */
|
|
ulong bottom, /* Region to sort...bottom */
|
|
ulong top) /* Region to sort...top */
|
|
{
|
|
unsigned char temp[80]; /* Used to exchange elements */
|
|
unsigned char tlen; /* Temp to hold length */
|
|
unsigned long i; /* Loop index */
|
|
|
|
|
|
/*
|
|
** Build a heap in the array
|
|
*/
|
|
for(i=(top/2L); i>0; --i)
|
|
strsift(optrarray,strarray,numstrings,i,top);
|
|
|
|
/*
|
|
** Repeatedly extract maximum from heap and place it at the
|
|
** end of the array. When we get done, we'll have a sorted
|
|
** array.
|
|
*/
|
|
for(i=top; i>0; --i)
|
|
{
|
|
strsift(optrarray,strarray,numstrings,0,i);
|
|
|
|
/* temp = string[0] */
|
|
tlen=*strarray;
|
|
MoveMemory((farvoid *)&temp[0], /* Perform exchange */
|
|
(farvoid *)strarray,
|
|
(unsigned long)(tlen+1));
|
|
|
|
|
|
/* string[0]=string[i] */
|
|
tlen=*(strarray+*(optrarray+i));
|
|
stradjust(optrarray,strarray,numstrings,0,tlen);
|
|
MoveMemory((farvoid *)strarray,
|
|
(farvoid *)(strarray+*(optrarray+i)),
|
|
(unsigned long)(tlen+1));
|
|
|
|
/* string[i]=temp */
|
|
tlen=temp[0];
|
|
stradjust(optrarray,strarray,numstrings,i,tlen);
|
|
MoveMemory((farvoid *)(strarray+*(optrarray+i)),
|
|
(farvoid *)&temp[0],
|
|
(unsigned long)(tlen+1));
|
|
|
|
}
|
|
return;
|
|
}
|
|
|
|
/****************
|
|
** str_is_less **
|
|
*****************
|
|
** Pass this function:
|
|
** 1) A pointer to an array of offset pointers
|
|
** 2) A pointer to a string array
|
|
** 3) The number of elements in the string array
|
|
** 4) Offsets to two strings (a & b)
|
|
** This function returns TRUE if string a is < string b.
|
|
*/
|
|
static int str_is_less(farulong *optrarray, /* Offset pointers */
|
|
faruchar *strarray, /* String array */
|
|
ulong numstrings, /* # of strings */
|
|
ulong a, ulong b) /* Offsets */
|
|
{
|
|
int slen; /* String length */
|
|
|
|
/*
|
|
** Determine which string has the minimum length. Use that
|
|
** to call strncmp(). If they match up to that point, the
|
|
** string with the longer length wins.
|
|
*/
|
|
slen=(int)*(strarray+*(optrarray+a));
|
|
if(slen > (int)*(strarray+*(optrarray+b)))
|
|
slen=(int)*(strarray+*(optrarray+b));
|
|
|
|
slen=strncmp((char *)(strarray+*(optrarray+a)),
|
|
(char *)(strarray+*(optrarray+b)),slen);
|
|
|
|
if(slen==0)
|
|
{
|
|
/*
|
|
** They match. Return true if the length of a
|
|
** is greater than the length of b.
|
|
*/
|
|
if(*(strarray+*(optrarray+a)) >
|
|
*(strarray+*(optrarray+b)))
|
|
return(TRUE);
|
|
return(FALSE);
|
|
}
|
|
|
|
if(slen<0) return(TRUE); /* a is strictly less than b */
|
|
|
|
return(FALSE); /* Only other possibility */
|
|
}
|
|
|
|
/************
|
|
** strsift **
|
|
*************
|
|
** Pass this function:
|
|
** 1) A pointer to an array of offset pointers
|
|
** 2) A pointer to a string array
|
|
** 3) The number of elements in the string array
|
|
** 4) Offset within which to sort.
|
|
** Sift the array within the bounds of those offsets (thus
|
|
** building a heap).
|
|
*/
|
|
static void strsift(farulong *optrarray, /* Offset pointers */
|
|
faruchar *strarray, /* String array */
|
|
ulong numstrings, /* # of strings */
|
|
ulong i, ulong j) /* Offsets */
|
|
{
|
|
unsigned long k; /* Temporaries */
|
|
unsigned char temp[80];
|
|
unsigned char tlen; /* For string lengths */
|
|
|
|
|
|
while((i+i)<=j)
|
|
{
|
|
k=i+i;
|
|
if(k<j)
|
|
if(str_is_less(optrarray,strarray,numstrings,k,k+1L))
|
|
++k;
|
|
if(str_is_less(optrarray,strarray,numstrings,i,k))
|
|
{
|
|
/* temp=string[k] */
|
|
tlen=*(strarray+*(optrarray+k));
|
|
MoveMemory((farvoid *)&temp[0],
|
|
(farvoid *)(strarray+*(optrarray+k)),
|
|
(unsigned long)(tlen+1));
|
|
|
|
/* string[k]=string[i] */
|
|
tlen=*(strarray+*(optrarray+i));
|
|
stradjust(optrarray,strarray,numstrings,k,tlen);
|
|
MoveMemory((farvoid *)(strarray+*(optrarray+k)),
|
|
(farvoid *)(strarray+*(optrarray+i)),
|
|
(unsigned long)(tlen+1));
|
|
|
|
/* string[i]=temp */
|
|
tlen=temp[0];
|
|
stradjust(optrarray,strarray,numstrings,i,tlen);
|
|
MoveMemory((farvoid *)(strarray+*(optrarray+i)),
|
|
(farvoid *)&temp[0],
|
|
(unsigned long)(tlen+1));
|
|
i=k;
|
|
}
|
|
else
|
|
i=j+1;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/************************
|
|
** BITFIELD OPERATIONS **
|
|
*************************/
|
|
|
|
/*************
|
|
** DoBitops **
|
|
**************
|
|
** Perform the bit operations test portion of the CPU
|
|
** benchmark. Returns the iterations per second.
|
|
*/
|
|
void DoBitops(void)
|
|
{
|
|
BitOpStruct *locbitopstruct; /* Local bitop structure */
|
|
farulong *bitarraybase; /* Base of bitmap array */
|
|
farulong *bitoparraybase; /* Base of bitmap operations array */
|
|
ulong nbitops; /* # of bitfield operations */
|
|
ulong accumtime; /* Accumulated time in ticks */
|
|
double iterations; /* # of iterations */
|
|
char *errorcontext; /* Error context string */
|
|
int systemerror; /* For holding error codes */
|
|
int ticks;
|
|
|
|
/*
|
|
** Link to global structure.
|
|
*/
|
|
locbitopstruct=&global_bitopstruct;
|
|
|
|
/*
|
|
** Set the error context.
|
|
*/
|
|
errorcontext="CPU:Bitfields";
|
|
|
|
/*
|
|
** See if we need to run adjustment code.
|
|
*/
|
|
if(locbitopstruct->adjust==0)
|
|
{
|
|
bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
|
|
sizeof(ulong),&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Initialize bitfield operations array to [2,30] elements
|
|
*/
|
|
locbitopstruct->bitoparraysize=30L;
|
|
|
|
while(1)
|
|
{
|
|
/*
|
|
** Allocate space for operations array
|
|
*/
|
|
bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
|
|
sizeof(ulong),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)bitarraybase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
/*
|
|
** Do an iteration of the bitmap test. If the
|
|
** elapsed time is less than or equal to the permitted
|
|
** minimum, then de-allocate the array, reallocate a
|
|
** larger version, and try again.
|
|
*/
|
|
ticks=DoBitfieldIteration(bitarraybase,
|
|
bitoparraybase,
|
|
locbitopstruct->bitoparraysize,
|
|
&nbitops);
|
|
#ifdef DEBUG
|
|
#ifdef LINUX
|
|
if (locbitopstruct->bitoparraysize==30L){
|
|
/* this is the first loop, write a debug file */
|
|
FILE *file;
|
|
unsigned long *running_base; /* same as farulong */
|
|
long counter;
|
|
file=fopen("debugbit.dat","w");
|
|
running_base=bitarraybase;
|
|
for (counter=0;counter<(long)(locbitopstruct->bitfieldarraysize);counter++){
|
|
#ifdef LONG64
|
|
fprintf(file,"%08X",(unsigned int)(*running_base&0xFFFFFFFFL));
|
|
fprintf(file,"%08X",(unsigned int)((*running_base>>32)&0xFFFFFFFFL));
|
|
if ((counter+1)%4==0) fprintf(file,"\n");
|
|
#else
|
|
fprintf(file,"%08lX",*running_base);
|
|
if ((counter+1)%8==0) fprintf(file,"\n");
|
|
#endif
|
|
running_base=running_base+1;
|
|
}
|
|
fclose(file);
|
|
printf("\nWrote the file debugbit.dat, you may want to compare it to debugbit.good\n");
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
if (ticks>global_min_ticks) break; /* We're ok...exit */
|
|
|
|
FreeMemory((farvoid *)bitoparraybase,&systemerror);
|
|
locbitopstruct->bitoparraysize+=100L;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
** Don't need to do self adjustment, just allocate
|
|
** the array space.
|
|
*/
|
|
bitarraybase=(farulong *)AllocateMemory(locbitopstruct->bitfieldarraysize *
|
|
sizeof(ulong),&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
bitoparraybase=(farulong *)AllocateMemory(locbitopstruct->bitoparraysize*2L*
|
|
sizeof(ulong),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)bitarraybase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Repeatedly perform bitops until the
|
|
** accumulated elapsed time is greater than # of seconds requested.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
do {
|
|
accumtime+=DoBitfieldIteration(bitarraybase,
|
|
bitoparraybase,
|
|
locbitopstruct->bitoparraysize,&nbitops);
|
|
iterations+=(double)nbitops;
|
|
} while(TicksToSecs(accumtime)<locbitopstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home.
|
|
** Also, set adjustment flag to show that we don't have
|
|
** to do self adjusting in the future.
|
|
*/
|
|
FreeMemory((farvoid *)bitarraybase,&systemerror);
|
|
FreeMemory((farvoid *)bitoparraybase,&systemerror);
|
|
locbitopstruct->bitopspersec=iterations /TicksToFracSecs(accumtime);
|
|
if(locbitopstruct->adjust==0)
|
|
locbitopstruct->adjust=1;
|
|
|
|
return;
|
|
}
|
|
|
|
/************************
|
|
** DoBitfieldIteration **
|
|
*************************
|
|
** Perform a single iteration of the bitfield benchmark.
|
|
** Return the # of ticks accumulated by the operation.
|
|
*/
|
|
static ulong DoBitfieldIteration(farulong *bitarraybase,
|
|
farulong *bitoparraybase,
|
|
long bitoparraysize,
|
|
ulong *nbitops)
|
|
{
|
|
long i; /* Index */
|
|
ulong bitoffset; /* Offset into bitmap */
|
|
ulong elapsed; /* Time to execute */
|
|
/*
|
|
** Clear # bitops counter
|
|
*/
|
|
*nbitops=0L;
|
|
|
|
/*
|
|
** Construct a set of bitmap offsets and run lengths.
|
|
** The offset can be any random number from 0 to the
|
|
** size of the bitmap (in bits). The run length can
|
|
** be any random number from 1 to the number of bits
|
|
** between the offset and the end of the bitmap.
|
|
** Note that the bitmap has 8192 * 32 bits in it.
|
|
** (262,144 bits)
|
|
*/
|
|
/*
|
|
** Reset random number generator so things repeat.
|
|
** Also reset the bit array we work on.
|
|
** added by Uwe F. Mayer
|
|
*/
|
|
randnum((int32)13);
|
|
for (i=0;i<global_bitopstruct.bitfieldarraysize;i++)
|
|
{
|
|
#ifdef LONG64
|
|
*(bitarraybase+i)=(ulong)0x5555555555555555;
|
|
#else
|
|
*(bitarraybase+i)=(ulong)0x55555555;
|
|
#endif
|
|
}
|
|
randnum((int32)13);
|
|
/* end of addition of code */
|
|
|
|
for (i=0;i<bitoparraysize;i++)
|
|
{
|
|
/* First item is offset */
|
|
/* *(bitoparraybase+i+i)=bitoffset=abs_randwc(262140L); */
|
|
*(bitoparraybase+i+i)=bitoffset=abs_randwc((int32)262140);
|
|
|
|
/* Next item is run length */
|
|
/* *nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc(262140L-bitoffset);*/
|
|
*nbitops+=*(bitoparraybase+i+i+1L)=abs_randwc((int32)262140-bitoffset);
|
|
}
|
|
|
|
/*
|
|
** Array of offset and lengths built...do an iteration of
|
|
** the test.
|
|
** Start the stopwatch.
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Loop through array off offset/run length pairs.
|
|
** Execute operation based on modulus of index.
|
|
*/
|
|
for(i=0;i<bitoparraysize;i++)
|
|
{
|
|
switch(i % 3)
|
|
{
|
|
|
|
case 0: /* Set run of bits */
|
|
ToggleBitRun(bitarraybase,
|
|
*(bitoparraybase+i+i),
|
|
*(bitoparraybase+i+i+1),
|
|
1);
|
|
break;
|
|
|
|
case 1: /* Clear run of bits */
|
|
ToggleBitRun(bitarraybase,
|
|
*(bitoparraybase+i+i),
|
|
*(bitoparraybase+i+i+1),
|
|
0);
|
|
break;
|
|
|
|
case 2: /* Complement run of bits */
|
|
FlipBitRun(bitarraybase,
|
|
*(bitoparraybase+i+i),
|
|
*(bitoparraybase+i+i+1));
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Return elapsed time
|
|
*/
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
|
|
/*****************************
|
|
** ToggleBitRun *
|
|
******************************
|
|
** Set or clear a run of nbits starting at
|
|
** bit_addr in bitmap.
|
|
*/
|
|
static void ToggleBitRun(farulong *bitmap, /* Bitmap */
|
|
ulong bit_addr, /* Address of bits to set */
|
|
ulong nbits, /* # of bits to set/clr */
|
|
uint val) /* 1 or 0 */
|
|
{
|
|
unsigned long bindex; /* Index into array */
|
|
unsigned long bitnumb; /* Bit number */
|
|
|
|
while(nbits--)
|
|
{
|
|
#ifdef LONG64
|
|
bindex=bit_addr>>6; /* Index is number /64 */
|
|
bitnumb=bit_addr % 64; /* Bit number in word */
|
|
#else
|
|
bindex=bit_addr>>5; /* Index is number /32 */
|
|
bitnumb=bit_addr % 32; /* bit number in word */
|
|
#endif
|
|
if(val)
|
|
bitmap[bindex]|=(1L<<bitnumb);
|
|
else
|
|
bitmap[bindex]&=~(1L<<bitnumb);
|
|
bit_addr++;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/***************
|
|
** FlipBitRun **
|
|
****************
|
|
** Complements a run of bits.
|
|
*/
|
|
static void FlipBitRun(farulong *bitmap, /* Bit map */
|
|
ulong bit_addr, /* Bit address */
|
|
ulong nbits) /* # of bits to flip */
|
|
{
|
|
unsigned long bindex; /* Index into array */
|
|
unsigned long bitnumb; /* Bit number */
|
|
|
|
while(nbits--)
|
|
{
|
|
#ifdef LONG64
|
|
bindex=bit_addr>>6; /* Index is number /64 */
|
|
bitnumb=bit_addr % 64; /* Bit number in longword */
|
|
#else
|
|
bindex=bit_addr>>5; /* Index is number /32 */
|
|
bitnumb=bit_addr % 32; /* Bit number in longword */
|
|
#endif
|
|
bitmap[bindex]^=(1L<<bitnumb);
|
|
bit_addr++;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*****************************
|
|
** FLOATING-POINT EMULATION **
|
|
*****************************/
|
|
|
|
/**************
|
|
** DoEmFloat **
|
|
***************
|
|
** Perform the floating-point emulation routines portion of the
|
|
** CPU benchmark. Returns the operations per second.
|
|
*/
|
|
void DoEmFloat(void)
|
|
{
|
|
EmFloatStruct *locemfloatstruct; /* Local structure */
|
|
InternalFPF *abase; /* Base of A array */
|
|
InternalFPF *bbase; /* Base of B array */
|
|
InternalFPF *cbase; /* Base of C array */
|
|
ulong accumtime; /* Accumulated time in ticks */
|
|
double iterations; /* # of iterations */
|
|
ulong tickcount; /* # of ticks */
|
|
char *errorcontext; /* Error context string pointer */
|
|
int systemerror; /* For holding error code */
|
|
ulong loops; /* # of loops */
|
|
|
|
/*
|
|
** Link to global structure
|
|
*/
|
|
locemfloatstruct=&global_emfloatstruct;
|
|
|
|
/*
|
|
** Set the error context
|
|
*/
|
|
errorcontext="CPU:Floating Emulation";
|
|
|
|
|
|
/*
|
|
** Test the emulation routines.
|
|
*/
|
|
#ifdef DEBUG
|
|
#endif
|
|
|
|
abase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
bbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
cbase=(InternalFPF *)AllocateMemory(locemfloatstruct->arraysize*sizeof(InternalFPF),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Set up the arrays
|
|
*/
|
|
SetupCPUEmFloatArrays(abase,bbase,cbase,locemfloatstruct->arraysize);
|
|
|
|
/*
|
|
** See if we need to do self-adjusting code.
|
|
*/
|
|
if(locemfloatstruct->adjust==0)
|
|
{
|
|
locemfloatstruct->loops=0;
|
|
|
|
/*
|
|
** Do an iteration of the tests. If the elapsed time is
|
|
** less than minimum, increase the loop count and try
|
|
** again.
|
|
*/
|
|
for(loops=1;loops<CPUEMFLOATLOOPMAX;loops+=loops)
|
|
{ tickcount=DoEmFloatIteration(abase,bbase,cbase,
|
|
locemfloatstruct->arraysize,
|
|
loops);
|
|
if(tickcount>global_min_ticks)
|
|
{ locemfloatstruct->loops=loops;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Verify that selft adjustment code worked.
|
|
*/
|
|
if(locemfloatstruct->loops==0)
|
|
{ printf("CPU:EMFPU -- CMPUEMFLOATLOOPMAX limit hit\n");
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
FreeMemory((farvoid *)cbase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Repeatedly perform floating
|
|
** tests until the accumulated time is greater than the
|
|
** # of seconds requested.
|
|
** Each iteration performs arraysize * 3 operations.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
do {
|
|
accumtime+=DoEmFloatIteration(abase,bbase,cbase,
|
|
locemfloatstruct->arraysize,
|
|
locemfloatstruct->loops);
|
|
iterations+=(double)1.0;
|
|
} while(TicksToSecs(accumtime)<locemfloatstruct->request_secs);
|
|
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home.
|
|
** Also, indicate that adjustment is done.
|
|
*/
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
FreeMemory((farvoid *)cbase,&systemerror);
|
|
|
|
locemfloatstruct->emflops=(iterations*(double)locemfloatstruct->loops)/
|
|
(double)TicksToFracSecs(accumtime);
|
|
if(locemfloatstruct->adjust==0)
|
|
locemfloatstruct->adjust=1;
|
|
|
|
#ifdef DEBUG
|
|
printf("----------------------------------------------------------------------------\n");
|
|
#endif
|
|
return;
|
|
}
|
|
|
|
/*************************
|
|
** FOURIER COEFFICIENTS **
|
|
*************************/
|
|
|
|
/**************
|
|
** DoFourier **
|
|
***************
|
|
** Perform the transcendental/trigonometric portion of the
|
|
** benchmark. This benchmark calculates the first n
|
|
** fourier coefficients of the function (x+1)^x defined
|
|
** on the interval 0,2.
|
|
*/
|
|
void DoFourier(void)
|
|
{
|
|
FourierStruct *locfourierstruct; /* Local fourier struct */
|
|
fardouble *abase; /* Base of A[] coefficients array */
|
|
fardouble *bbase; /* Base of B[] coefficients array */
|
|
unsigned long accumtime; /* Accumulated time in ticks */
|
|
double iterations; /* # of iterations */
|
|
char *errorcontext; /* Error context string pointer */
|
|
int systemerror; /* For error code */
|
|
|
|
/*
|
|
** Link to global structure
|
|
*/
|
|
locfourierstruct=&global_fourierstruct;
|
|
|
|
/*
|
|
** Set error context string
|
|
*/
|
|
errorcontext="FPU:Transcendental";
|
|
|
|
/*
|
|
** See if we need to do self-adjustment code.
|
|
*/
|
|
if(locfourierstruct->adjust==0)
|
|
{
|
|
locfourierstruct->arraysize=100L; /* Start at 100 elements */
|
|
while(1)
|
|
{
|
|
|
|
abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((void *)abase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
/*
|
|
** Do an iteration of the tests. If the elapsed time is
|
|
** less than or equal to the permitted minimum, re-allocate
|
|
** larger arrays and try again.
|
|
*/
|
|
if(DoFPUTransIteration(abase,bbase,
|
|
locfourierstruct->arraysize)>global_min_ticks)
|
|
break; /* We're ok...exit */
|
|
|
|
/*
|
|
** Make bigger arrays and try again.
|
|
*/
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
locfourierstruct->arraysize+=50L;
|
|
}
|
|
}
|
|
else
|
|
{ /*
|
|
** Don't need self-adjustment. Just allocate the
|
|
** arrays, and go.
|
|
*/
|
|
abase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
bbase=(fardouble *)AllocateMemory(locfourierstruct->arraysize*sizeof(double),
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((void *)abase,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
/*
|
|
** All's well if we get here. Repeatedly perform integration
|
|
** tests until the accumulated time is greater than the
|
|
** # of seconds requested.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
do {
|
|
accumtime+=DoFPUTransIteration(abase,bbase,locfourierstruct->arraysize);
|
|
iterations+=(double)locfourierstruct->arraysize*(double)2.0-(double)1.0;
|
|
} while(TicksToSecs(accumtime)<locfourierstruct->request_secs);
|
|
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home.
|
|
** Also set adjustment flag to indicate no adjust code needed.
|
|
*/
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
|
|
locfourierstruct->fflops=iterations/(double)TicksToFracSecs(accumtime);
|
|
|
|
if(locfourierstruct->adjust==0)
|
|
locfourierstruct->adjust=1;
|
|
|
|
return;
|
|
}
|
|
|
|
/************************
|
|
** DoFPUTransIteration **
|
|
*************************
|
|
** Perform an iteration of the FPU Transcendental/trigonometric
|
|
** benchmark. Here, an iteration consists of calculating the
|
|
** first n fourier coefficients of the function (x+1)^x on
|
|
** the interval 0,2. n is given by arraysize.
|
|
** NOTE: The # of integration steps is fixed at
|
|
** 200.
|
|
*/
|
|
static ulong DoFPUTransIteration(fardouble *abase, /* A coeffs. */
|
|
fardouble *bbase, /* B coeffs. */
|
|
ulong arraysize) /* # of coeffs */
|
|
{
|
|
double omega; /* Fundamental frequency */
|
|
unsigned long i; /* Index */
|
|
unsigned long elapsed; /* Elapsed time */
|
|
|
|
/*
|
|
** Start the stopwatch
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Calculate the fourier series. Begin by
|
|
** calculating A[0].
|
|
*/
|
|
|
|
*abase=TrapezoidIntegrate((double)0.0,
|
|
(double)2.0,
|
|
200,
|
|
(double)0.0, /* No omega * n needed */
|
|
0 )/(double)2.0;
|
|
|
|
/*
|
|
** Calculate the fundamental frequency.
|
|
** ( 2 * pi ) / period...and since the period
|
|
** is 2, omega is simply pi.
|
|
*/
|
|
omega=(double)3.1415926535897932;
|
|
|
|
for(i=1;i<arraysize;i++)
|
|
{
|
|
|
|
/*
|
|
** Calculate A[i] terms. Note, once again, that we
|
|
** can ignore the 2/period term outside the integral
|
|
** since the period is 2 and the term cancels itself
|
|
** out.
|
|
*/
|
|
*(abase+i)=TrapezoidIntegrate((double)0.0,
|
|
(double)2.0,
|
|
200,
|
|
omega * (double)i,
|
|
1);
|
|
|
|
/*
|
|
** Calculate the B[i] terms.
|
|
*/
|
|
*(bbase+i)=TrapezoidIntegrate((double)0.0,
|
|
(double)2.0,
|
|
200,
|
|
omega * (double)i,
|
|
2);
|
|
|
|
}
|
|
#ifdef DEBUG
|
|
{
|
|
int i;
|
|
printf("\nA[i]=\n");
|
|
for (i=0;i<arraysize;i++) printf("%7.3g ",abase[i]);
|
|
printf("\nB[i]=\n(undefined) ");
|
|
for (i=1;i<arraysize;i++) printf("%7.3g ",bbase[i]);
|
|
}
|
|
#endif
|
|
/*
|
|
** All done, stop the stopwatch
|
|
*/
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/***********************
|
|
** TrapezoidIntegrate **
|
|
************************
|
|
** Perform a simple trapezoid integration on the
|
|
** function (x+1)**x.
|
|
** x0,x1 set the lower and upper bounds of the
|
|
** integration.
|
|
** nsteps indicates # of trapezoidal sections
|
|
** omegan is the fundamental frequency times
|
|
** the series member #
|
|
** select = 0 for the A[0] term, 1 for cosine terms, and
|
|
** 2 for sine terms.
|
|
** Returns the value.
|
|
*/
|
|
static double TrapezoidIntegrate( double x0, /* Lower bound */
|
|
double x1, /* Upper bound */
|
|
int nsteps, /* # of steps */
|
|
double omegan, /* omega * n */
|
|
int select)
|
|
{
|
|
double x; /* Independent variable */
|
|
double dx; /* Stepsize */
|
|
double rvalue; /* Return value */
|
|
|
|
|
|
/*
|
|
** Initialize independent variable
|
|
*/
|
|
x=x0;
|
|
|
|
/*
|
|
** Calculate stepsize
|
|
*/
|
|
dx=(x1 - x0) / (double)nsteps;
|
|
|
|
/*
|
|
** Initialize the return value.
|
|
*/
|
|
rvalue=thefunction(x0,omegan,select)/(double)2.0;
|
|
|
|
/*
|
|
** Compute the other terms of the integral.
|
|
*/
|
|
if(nsteps!=1)
|
|
{ --nsteps; /* Already done 1 step */
|
|
while(--nsteps )
|
|
{
|
|
x+=dx;
|
|
rvalue+=thefunction(x,omegan,select);
|
|
}
|
|
}
|
|
/*
|
|
** Finish computation
|
|
*/
|
|
rvalue=(rvalue+thefunction(x1,omegan,select)/(double)2.0)*dx;
|
|
|
|
return(rvalue);
|
|
}
|
|
|
|
/****************
|
|
** thefunction **
|
|
*****************
|
|
** This routine selects the function to be used
|
|
** in the Trapezoid integration.
|
|
** x is the independent variable
|
|
** omegan is omega * n
|
|
** select chooses which of the sine/cosine functions
|
|
** are used. note the special case for select=0.
|
|
*/
|
|
static double thefunction(double x, /* Independent variable */
|
|
double omegan, /* Omega * term */
|
|
int select) /* Choose term */
|
|
{
|
|
|
|
/*
|
|
** Use select to pick which function we call.
|
|
*/
|
|
switch(select)
|
|
{
|
|
case 0: return(pow(x+(double)1.0,x));
|
|
|
|
case 1: return(pow(x+(double)1.0,x) * cos(omegan * x));
|
|
|
|
case 2: return(pow(x+(double)1.0,x) * sin(omegan * x));
|
|
}
|
|
|
|
/*
|
|
** We should never reach this point, but the following
|
|
** keeps compilers from issuing a warning message.
|
|
*/
|
|
return(0.0);
|
|
}
|
|
|
|
/*************************
|
|
** ASSIGNMENT ALGORITHM **
|
|
*************************/
|
|
|
|
/*************
|
|
** DoAssign **
|
|
**************
|
|
** Perform an assignment algorithm.
|
|
** The algorithm was adapted from the step by step guide found
|
|
** in "Quantitative Decision Making for Business" (Gordon,
|
|
** Pressman, and Cohn; Prentice-Hall)
|
|
**
|
|
**
|
|
** NOTES:
|
|
** 1. Even though the algorithm distinguishes between
|
|
** ASSIGNROWS and ASSIGNCOLS, as though the two might
|
|
** be different, it does presume a square matrix.
|
|
** I.E., ASSIGNROWS and ASSIGNCOLS must be the same.
|
|
** This makes for some algorithmically-correct but
|
|
** probably non-optimal constructs.
|
|
**
|
|
*/
|
|
void DoAssign(void)
|
|
{
|
|
AssignStruct *locassignstruct; /* Local structure ptr */
|
|
farlong *arraybase;
|
|
char *errorcontext;
|
|
int systemerror;
|
|
ulong accumtime;
|
|
double iterations;
|
|
|
|
/*
|
|
** Link to global structure
|
|
*/
|
|
locassignstruct=&global_assignstruct;
|
|
|
|
/*
|
|
** Set the error context string.
|
|
*/
|
|
errorcontext="CPU:Assignment";
|
|
|
|
/*
|
|
** See if we need to do self adjustment code.
|
|
*/
|
|
if(locassignstruct->adjust==0)
|
|
{
|
|
/*
|
|
** Self-adjustment code. The system begins by working on 1
|
|
** array. If it does that in no time, then two arrays
|
|
** are built. This process continues until
|
|
** enough arrays are built to handle the tolerance.
|
|
*/
|
|
locassignstruct->numarrays=1;
|
|
while(1)
|
|
{
|
|
/*
|
|
** Allocate space for arrays
|
|
*/
|
|
arraybase=(farlong *) AllocateMemory(sizeof(long)*
|
|
ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)arraybase,
|
|
&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Do an iteration of the assignment alg. If the
|
|
** elapsed time is less than or equal to the permitted
|
|
** minimum, then allocate for more arrays and
|
|
** try again.
|
|
*/
|
|
if(DoAssignIteration(arraybase,
|
|
locassignstruct->numarrays)>global_min_ticks)
|
|
break; /* We're ok...exit */
|
|
|
|
FreeMemory((farvoid *)arraybase, &systemerror);
|
|
locassignstruct->numarrays++;
|
|
}
|
|
}
|
|
else
|
|
{ /*
|
|
** Allocate space for arrays
|
|
*/
|
|
arraybase=(farlong *)AllocateMemory(sizeof(long)*
|
|
ASSIGNROWS*ASSIGNCOLS*locassignstruct->numarrays,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)arraybase,
|
|
&systemerror);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Do the tests.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoAssignIteration(arraybase,
|
|
locassignstruct->numarrays);
|
|
iterations+=(double)1.0;
|
|
} while(TicksToSecs(accumtime)<locassignstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
FreeMemory((farvoid *)arraybase,&systemerror);
|
|
|
|
locassignstruct->iterspersec=iterations *
|
|
(double)locassignstruct->numarrays / TicksToFracSecs(accumtime);
|
|
|
|
if(locassignstruct->adjust==0)
|
|
locassignstruct->adjust=1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/**********************
|
|
** DoAssignIteration **
|
|
***********************
|
|
** This routine executes one iteration of the assignment test.
|
|
** It returns the number of ticks elapsed in the iteration.
|
|
*/
|
|
static ulong DoAssignIteration(farlong *arraybase,
|
|
ulong numarrays)
|
|
{
|
|
longptr abase; /* local pointer */
|
|
ulong elapsed; /* Elapsed ticks */
|
|
ulong i;
|
|
|
|
/*
|
|
** Set up local pointer
|
|
*/
|
|
abase.ptrs.p=arraybase;
|
|
|
|
/*
|
|
** Load up the arrays with a random table.
|
|
*/
|
|
LoadAssignArrayWithRand(arraybase,numarrays);
|
|
|
|
/*
|
|
** Start the stopwatch
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Execute assignment algorithms
|
|
*/
|
|
for(i=0;i<numarrays;i++)
|
|
{ /* abase.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
|
|
/* Fixed by Eike Dierks */
|
|
Assignment(*abase.ptrs.ap);
|
|
abase.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
|
|
}
|
|
|
|
/*
|
|
** Get elapsed time
|
|
*/
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/****************************
|
|
** LoadAssignArrayWithRand **
|
|
*****************************
|
|
** Load the assignment arrays with random numbers. All positive.
|
|
** These numbers represent costs.
|
|
*/
|
|
static void LoadAssignArrayWithRand(farlong *arraybase,
|
|
ulong numarrays)
|
|
{
|
|
longptr abase,abase1; /* Local for array pointer */
|
|
ulong i;
|
|
|
|
/*
|
|
** Set local array pointer
|
|
*/
|
|
abase.ptrs.p=arraybase;
|
|
abase1.ptrs.p=arraybase;
|
|
|
|
/*
|
|
** Set up the first array. Then just copy it into the
|
|
** others.
|
|
*/
|
|
LoadAssign(*(abase.ptrs.ap));
|
|
if(numarrays>1)
|
|
for(i=1;i<numarrays;i++)
|
|
{ /* abase1.ptrs.p+=i*ASSIGNROWS*ASSIGNCOLS; */
|
|
/* Fixed by Eike Dierks */
|
|
abase1.ptrs.p+=ASSIGNROWS*ASSIGNCOLS;
|
|
CopyToAssign(*(abase.ptrs.ap),*(abase1.ptrs.ap));
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/***************
|
|
** LoadAssign **
|
|
****************
|
|
** The array given by arraybase is loaded with positive random
|
|
** numbers. Elements in the array are capped at 5,000,000.
|
|
*/
|
|
static void LoadAssign(farlong arraybase[][ASSIGNCOLS])
|
|
{
|
|
ushort i,j;
|
|
|
|
/*
|
|
** Reset random number generator so things repeat.
|
|
*/
|
|
/* randnum(13L); */
|
|
randnum((int32)13);
|
|
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
for(j=0;j<ASSIGNROWS;j++){
|
|
/* arraybase[i][j]=abs_randwc(5000000L);*/
|
|
arraybase[i][j]=abs_randwc((int32)5000000);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*****************
|
|
** CopyToAssign **
|
|
******************
|
|
** Copy the contents of one array to another. This is called by
|
|
** the routine that builds the initial array, and is used to copy
|
|
** the contents of the intial array into all following arrays.
|
|
*/
|
|
static void CopyToAssign(farlong arrayfrom[ASSIGNROWS][ASSIGNCOLS],
|
|
farlong arrayto[ASSIGNROWS][ASSIGNCOLS])
|
|
{
|
|
ushort i,j;
|
|
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
arrayto[i][j]=arrayfrom[i][j];
|
|
|
|
return;
|
|
}
|
|
|
|
/***************
|
|
** Assignment **
|
|
***************/
|
|
static void Assignment(farlong arraybase[][ASSIGNCOLS])
|
|
{
|
|
short assignedtableau[ASSIGNROWS][ASSIGNCOLS];
|
|
|
|
/*
|
|
** First, calculate minimum costs
|
|
*/
|
|
calc_minimum_costs(arraybase);
|
|
|
|
/*
|
|
** Repeat following until the number of rows selected
|
|
** equals the number of rows in the tableau.
|
|
*/
|
|
while(first_assignments(arraybase,assignedtableau)!=ASSIGNROWS)
|
|
{ second_assignments(arraybase,assignedtableau);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
{
|
|
int i,j;
|
|
printf("\nColumn choices for each row\n");
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{
|
|
printf("R%03d: ",i);
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(assignedtableau[i][j]==1)
|
|
printf("%03d ",j);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return;
|
|
}
|
|
|
|
/***********************
|
|
** calc_minimum_costs **
|
|
************************
|
|
** Revise the tableau by calculating the minimum costs on a
|
|
** row and column basis. These minima are subtracted from
|
|
** their rows and columns, creating a new tableau.
|
|
*/
|
|
static void calc_minimum_costs(long tableau[][ASSIGNCOLS])
|
|
{
|
|
ushort i,j; /* Index variables */
|
|
long currentmin; /* Current minimum */
|
|
/*
|
|
** Determine minimum costs on row basis. This is done by
|
|
** subtracting -- on a row-per-row basis -- the minum value
|
|
** for that row.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{
|
|
currentmin=MAXPOSLONG; /* Initialize minimum */
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(tableau[i][j]<currentmin)
|
|
currentmin=tableau[i][j];
|
|
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
tableau[i][j]-=currentmin;
|
|
}
|
|
|
|
/*
|
|
** Determine minimum cost on a column basis. This works
|
|
** just as above, only now we step through the array
|
|
** column-wise
|
|
*/
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
{
|
|
currentmin=MAXPOSLONG; /* Initialize minimum */
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if(tableau[i][j]<currentmin)
|
|
currentmin=tableau[i][j];
|
|
|
|
/*
|
|
** Here, we'll take the trouble to see if the current
|
|
** minimum is zero. This is likely worth it, since the
|
|
** preceding loop will have created at least one zero in
|
|
** each row. We can save ourselves a few iterations.
|
|
*/
|
|
if(currentmin!=0)
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
tableau[i][j]-=currentmin;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/**********************
|
|
** first_assignments **
|
|
***********************
|
|
** Do first assignments.
|
|
** The assignedtableau[] array holds a set of values that
|
|
** indicate the assignment of a value, or its elimination.
|
|
** The values are:
|
|
** 0 = Item is neither assigned nor eliminated.
|
|
** 1 = Item is assigned
|
|
** 2 = Item is eliminated
|
|
** Returns the number of selections made. If this equals
|
|
** the number of rows, then an optimum has been determined.
|
|
*/
|
|
static int first_assignments(long tableau[][ASSIGNCOLS],
|
|
short assignedtableau[][ASSIGNCOLS])
|
|
{
|
|
ushort i,j,k; /* Index variables */
|
|
ushort numassigns; /* # of assignments */
|
|
ushort totnumassigns; /* Total # of assignments */
|
|
ushort numzeros; /* # of zeros in row */
|
|
int selected=0; /* Flag used to indicate selection */
|
|
|
|
/*
|
|
** Clear the assignedtableau, setting all members to show that
|
|
** no one is yet assigned, eliminated, or anything.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
assignedtableau[i][j]=0;
|
|
|
|
totnumassigns=0;
|
|
do {
|
|
numassigns=0;
|
|
/*
|
|
** Step through rows. For each one that is not currently
|
|
** assigned, see if the row has only one zero in it. If so,
|
|
** mark that as an assigned row/col. Eliminate other zeros
|
|
** in the same column.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{ numzeros=0;
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(tableau[i][j]==0L)
|
|
if(assignedtableau[i][j]==0)
|
|
{ numzeros++;
|
|
selected=j;
|
|
}
|
|
if(numzeros==1)
|
|
{ numassigns++;
|
|
totnumassigns++;
|
|
assignedtableau[i][selected]=1;
|
|
for(k=0;k<ASSIGNROWS;k++)
|
|
if((k!=i) &&
|
|
(tableau[k][selected]==0))
|
|
assignedtableau[k][selected]=2;
|
|
}
|
|
}
|
|
/*
|
|
** Step through columns, doing same as above. Now, be careful
|
|
** of items in the other rows of a selected column.
|
|
*/
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
{ numzeros=0;
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if(tableau[i][j]==0L)
|
|
if(assignedtableau[i][j]==0)
|
|
{ numzeros++;
|
|
selected=i;
|
|
}
|
|
if(numzeros==1)
|
|
{ numassigns++;
|
|
totnumassigns++;
|
|
assignedtableau[selected][j]=1;
|
|
for(k=0;k<ASSIGNCOLS;k++)
|
|
if((k!=j) &&
|
|
(tableau[selected][k]==0))
|
|
assignedtableau[selected][k]=2;
|
|
}
|
|
}
|
|
/*
|
|
** Repeat until no more assignments to be made.
|
|
*/
|
|
} while(numassigns!=0);
|
|
|
|
/*
|
|
** See if we can leave at this point.
|
|
*/
|
|
if(totnumassigns==ASSIGNROWS) return(totnumassigns);
|
|
|
|
/*
|
|
** Now step through the array by row. If you find any unassigned
|
|
** zeros, pick the first in the row. Eliminate all zeros from
|
|
** that same row & column. This occurs if there are multiple optima...
|
|
** possibly.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{ selected=-1;
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if((tableau[i][j]==0L) &&
|
|
(assignedtableau[i][j]==0))
|
|
{ selected=j;
|
|
break;
|
|
}
|
|
if(selected!=-1)
|
|
{ assignedtableau[i][selected]=1;
|
|
totnumassigns++;
|
|
for(k=0;k<ASSIGNCOLS;k++)
|
|
if((k!=selected) &&
|
|
(tableau[i][k]==0L))
|
|
assignedtableau[i][k]=2;
|
|
for(k=0;k<ASSIGNROWS;k++)
|
|
if((k!=i) &&
|
|
(tableau[k][selected]==0L))
|
|
assignedtableau[k][selected]=2;
|
|
}
|
|
}
|
|
|
|
return(totnumassigns);
|
|
}
|
|
|
|
/***********************
|
|
** second_assignments **
|
|
************************
|
|
** This section of the algorithm creates the revised
|
|
** tableau, and is difficult to explain. I suggest you
|
|
** refer to the algorithm's source, mentioned in comments
|
|
** toward the beginning of the program.
|
|
*/
|
|
static void second_assignments(long tableau[][ASSIGNCOLS],
|
|
short assignedtableau[][ASSIGNCOLS])
|
|
{
|
|
int i,j; /* Indexes */
|
|
short linesrow[ASSIGNROWS];
|
|
short linescol[ASSIGNCOLS];
|
|
long smallest; /* Holds smallest value */
|
|
ushort numassigns; /* Number of assignments */
|
|
ushort newrows; /* New rows to be considered */
|
|
/*
|
|
** Clear the linesrow and linescol arrays.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
linesrow[i]=0;
|
|
for(i=0;i<ASSIGNCOLS;i++)
|
|
linescol[i]=0;
|
|
|
|
/*
|
|
** Scan rows, flag each row that has no assignment in it.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{ numassigns=0;
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(assignedtableau[i][j]==1)
|
|
{ numassigns++;
|
|
break;
|
|
}
|
|
if(numassigns==0) linesrow[i]=1;
|
|
}
|
|
|
|
do {
|
|
|
|
newrows=0;
|
|
/*
|
|
** For each row checked above, scan for any zeros. If found,
|
|
** check the associated column.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
{ if(linesrow[i]==1)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(tableau[i][j]==0)
|
|
linescol[j]=1;
|
|
}
|
|
|
|
/*
|
|
** Now scan checked columns. If any contain assigned zeros, check
|
|
** the associated row.
|
|
*/
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(linescol[j]==1)
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if((assignedtableau[i][j]==1) &&
|
|
(linesrow[i]!=1))
|
|
{
|
|
linesrow[i]=1;
|
|
newrows++;
|
|
}
|
|
} while(newrows!=0);
|
|
|
|
/*
|
|
** linesrow[n]==0 indicate rows covered by imaginary line
|
|
** linescol[n]==1 indicate cols covered by imaginary line
|
|
** For all cells not covered by imaginary lines, determine smallest
|
|
** value.
|
|
*/
|
|
smallest=MAXPOSLONG;
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if(linesrow[i]!=0)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(linescol[j]!=1)
|
|
if(tableau[i][j]<smallest)
|
|
smallest=tableau[i][j];
|
|
|
|
/*
|
|
** Subtract smallest from all cells in the above set.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if(linesrow[i]!=0)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(linescol[j]!=1)
|
|
tableau[i][j]-=smallest;
|
|
|
|
/*
|
|
** Add smallest to all cells covered by two lines.
|
|
*/
|
|
for(i=0;i<ASSIGNROWS;i++)
|
|
if(linesrow[i]==0)
|
|
for(j=0;j<ASSIGNCOLS;j++)
|
|
if(linescol[j]==1)
|
|
tableau[i][j]+=smallest;
|
|
|
|
return;
|
|
}
|
|
|
|
/********************
|
|
** IDEA Encryption **
|
|
*********************
|
|
** IDEA - International Data Encryption Algorithm.
|
|
** Based on code presented in Applied Cryptography by Bruce Schneier.
|
|
** Which was based on code developed by Xuejia Lai and James L. Massey.
|
|
** Other modifications made by Colin Plumb.
|
|
**
|
|
*/
|
|
|
|
/***********
|
|
** DoIDEA **
|
|
************
|
|
** Perform IDEA encryption. Note that we time encryption & decryption
|
|
** time as being a single loop.
|
|
*/
|
|
void DoIDEA(void)
|
|
{
|
|
IDEAStruct *locideastruct; /* Loc pointer to global structure */
|
|
int i;
|
|
IDEAkey Z,DK;
|
|
u16 userkey[8];
|
|
ulong accumtime;
|
|
double iterations;
|
|
char *errorcontext;
|
|
int systemerror;
|
|
faruchar *plain1; /* First plaintext buffer */
|
|
faruchar *crypt1; /* Encryption buffer */
|
|
faruchar *plain2; /* Second plaintext buffer */
|
|
|
|
/*
|
|
** Link to global data
|
|
*/
|
|
locideastruct=&global_ideastruct;
|
|
|
|
/*
|
|
** Set error context
|
|
*/
|
|
errorcontext="CPU:IDEA";
|
|
|
|
/*
|
|
** Re-init random-number generator.
|
|
*/
|
|
/* randnum(3L); */
|
|
randnum((int32)3);
|
|
|
|
/*
|
|
** Build an encryption/decryption key
|
|
*/
|
|
for (i=0;i<8;i++)
|
|
/* userkey[i]=(u16)(abs_randwc(60000L) & 0xFFFF); */
|
|
userkey[i]=(u16)(abs_randwc((int32)60000) & 0xFFFF);
|
|
for(i=0;i<KEYLEN;i++)
|
|
Z[i]=0;
|
|
|
|
/*
|
|
** Compute encryption/decryption subkeys
|
|
*/
|
|
en_key_idea(userkey,Z);
|
|
de_key_idea(Z,DK);
|
|
|
|
/*
|
|
** Allocate memory for buffers. We'll make 3, called plain1,
|
|
** crypt1, and plain2. It works like this:
|
|
** plain1 >>encrypt>> crypt1 >>decrypt>> plain2.
|
|
** So, plain1 and plain2 should match.
|
|
** Also, fill up plain1 with sample text.
|
|
*/
|
|
plain1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{
|
|
ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
crypt1=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{
|
|
ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)plain1,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
plain2=(faruchar *)AllocateMemory(locideastruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{
|
|
ReportError(errorcontext,systemerror);
|
|
FreeMemory((farvoid *)plain1,&systemerror);
|
|
FreeMemory((farvoid *)crypt1,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
/*
|
|
** Note that we build the "plaintext" by simply loading
|
|
** the array up with random numbers.
|
|
*/
|
|
for(i=0;i<locideastruct->arraysize;i++)
|
|
plain1[i]=(uchar)(abs_randwc(255) & 0xFF);
|
|
|
|
/*
|
|
** See if we need to perform self adjustment loop.
|
|
*/
|
|
if(locideastruct->adjust==0)
|
|
{
|
|
/*
|
|
** Do self-adjustment. This involves initializing the
|
|
** # of loops and increasing the loop count until we
|
|
** get a number of loops that we can use.
|
|
*/
|
|
for(locideastruct->loops=100L;
|
|
locideastruct->loops<MAXIDEALOOPS;
|
|
locideastruct->loops+=10L)
|
|
if(DoIDEAIteration(plain1,crypt1,plain2,
|
|
locideastruct->arraysize,
|
|
locideastruct->loops,
|
|
Z,DK)>global_min_ticks) break;
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Do the test.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoIDEAIteration(plain1,crypt1,plain2,
|
|
locideastruct->arraysize,
|
|
locideastruct->loops,Z,DK);
|
|
iterations+=(double)locideastruct->loops;
|
|
} while(TicksToSecs(accumtime)<locideastruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
FreeMemory((farvoid *)plain1,&systemerror);
|
|
FreeMemory((farvoid *)crypt1,&systemerror);
|
|
FreeMemory((farvoid *)plain2,&systemerror);
|
|
locideastruct->iterspersec=iterations / TicksToFracSecs(accumtime);
|
|
|
|
if(locideastruct->adjust==0)
|
|
locideastruct->adjust=1;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
/********************
|
|
** DoIDEAIteration **
|
|
*********************
|
|
** Execute a single iteration of the IDEA encryption algorithm.
|
|
** Actually, a single iteration is one encryption and one
|
|
** decryption.
|
|
*/
|
|
static ulong DoIDEAIteration(faruchar *plain1,
|
|
faruchar *crypt1,
|
|
faruchar *plain2,
|
|
ulong arraysize,
|
|
ulong nloops,
|
|
IDEAkey Z,
|
|
IDEAkey DK)
|
|
{
|
|
register ulong i;
|
|
register ulong j;
|
|
ulong elapsed;
|
|
#ifdef DEBUG
|
|
int status=0;
|
|
#endif
|
|
|
|
/*
|
|
** Start the stopwatch.
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Do everything for nloops.
|
|
*/
|
|
for(i=0;i<nloops;i++)
|
|
{
|
|
for(j=0;j<arraysize;j+=(sizeof(u16)*4))
|
|
cipher_idea((u16 *)(plain1+j),(u16 *)(crypt1+j),Z); /* Encrypt */
|
|
|
|
for(j=0;j<arraysize;j+=(sizeof(u16)*4))
|
|
cipher_idea((u16 *)(crypt1+j),(u16 *)(plain2+j),DK); /* Decrypt */
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
for(j=0;j<arraysize;j++)
|
|
if(*(plain1+j)!=*(plain2+j)){
|
|
printf("IDEA Error! \n");
|
|
status=1;
|
|
}
|
|
if (status==0) printf("IDEA: OK\n");
|
|
#endif
|
|
|
|
/*
|
|
** Get elapsed time.
|
|
*/
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/********
|
|
** mul **
|
|
*********
|
|
** Performs multiplication, modulo (2**16)+1. This code is structured
|
|
** on the assumption that untaken branches are cheaper than taken
|
|
** branches, and that the compiler doesn't schedule branches.
|
|
*/
|
|
static u16 mul(register u16 a, register u16 b)
|
|
{
|
|
register u32 p;
|
|
if(a)
|
|
{ if(b)
|
|
{ p=(u32)(a*b);
|
|
b=low16(p);
|
|
a=(u16)(p>>16);
|
|
return(b-a+(b<a));
|
|
}
|
|
else
|
|
return(1-a);
|
|
}
|
|
else
|
|
return(1-b);
|
|
}
|
|
|
|
/********
|
|
** inv **
|
|
*********
|
|
** Compute multiplicative inverse of x, modulo (2**16)+1
|
|
** using Euclid's GCD algorithm. It is unrolled twice
|
|
** to avoid swapping the meaning of the registers. And
|
|
** some subtracts are changed to adds.
|
|
*/
|
|
static u16 inv(u16 x)
|
|
{
|
|
u16 t0, t1;
|
|
u16 q, y;
|
|
|
|
if(x<=1)
|
|
return(x); /* 0 and 1 are self-inverse */
|
|
t1=0x10001 / x;
|
|
y=0x10001 % x;
|
|
if(y==1)
|
|
return(low16(1-t1));
|
|
t0=1;
|
|
do {
|
|
q=x/y;
|
|
x=x%y;
|
|
t0+=q*t1;
|
|
if(x==1) return(t0);
|
|
q=y/x;
|
|
y=y%x;
|
|
t1+=q*t0;
|
|
} while(y!=1);
|
|
return(low16(1-t1));
|
|
}
|
|
|
|
/****************
|
|
** en_key_idea **
|
|
*****************
|
|
** Compute IDEA encryption subkeys Z
|
|
*/
|
|
static void en_key_idea(u16 *userkey, u16 *Z)
|
|
{
|
|
int i,j;
|
|
|
|
/*
|
|
** shifts
|
|
*/
|
|
for(j=0;j<8;j++)
|
|
Z[j]=*userkey++;
|
|
for(i=0;j<KEYLEN;j++)
|
|
{ i++;
|
|
Z[i+7]=(Z[i&7]<<9)| (Z[(i+1) & 7] >> 7);
|
|
Z+=i&8;
|
|
i&=7;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/****************
|
|
** de_key_idea **
|
|
*****************
|
|
** Compute IDEA decryption subkeys DK from encryption
|
|
** subkeys Z.
|
|
*/
|
|
static void de_key_idea(IDEAkey Z, IDEAkey DK)
|
|
{
|
|
IDEAkey TT;
|
|
int j;
|
|
u16 t1, t2, t3;
|
|
u16 *p;
|
|
p=(u16 *)(TT+KEYLEN);
|
|
|
|
t1=inv(*Z++);
|
|
t2=-*Z++;
|
|
t3=-*Z++;
|
|
*--p=inv(*Z++);
|
|
*--p=t3;
|
|
*--p=t2;
|
|
*--p=t1;
|
|
|
|
for(j=1;j<ROUNDS;j++)
|
|
{ t1=*Z++;
|
|
*--p=*Z++;
|
|
*--p=t1;
|
|
t1=inv(*Z++);
|
|
t2=-*Z++;
|
|
t3=-*Z++;
|
|
*--p=inv(*Z++);
|
|
*--p=t2;
|
|
*--p=t3;
|
|
*--p=t1;
|
|
}
|
|
t1=*Z++;
|
|
*--p=*Z++;
|
|
*--p=t1;
|
|
t1=inv(*Z++);
|
|
t2=-*Z++;
|
|
t3=-*Z++;
|
|
*--p=inv(*Z++);
|
|
*--p=t3;
|
|
*--p=t2;
|
|
*--p=t1;
|
|
/*
|
|
** Copy and destroy temp copy
|
|
*/
|
|
for(j=0,p=TT;j<KEYLEN;j++)
|
|
{ *DK++=*p;
|
|
*p++=0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
** MUL(x,y)
|
|
** This #define creates a macro that computes x=x*y modulo 0x10001.
|
|
** Requires temps t16 and t32. Also requires y to be strictly 16
|
|
** bits. Here, I am using the simplest form. May not be the
|
|
** fastest. -- RG
|
|
*/
|
|
/* #define MUL(x,y) (x=mul(low16(x),y)) */
|
|
|
|
/****************
|
|
** cipher_idea **
|
|
*****************
|
|
** IDEA encryption/decryption algorithm.
|
|
*/
|
|
static void cipher_idea(u16 in[4],
|
|
u16 out[4],
|
|
register IDEAkey Z)
|
|
{
|
|
register u16 x1, x2, x3, x4, t1, t2;
|
|
/* register u16 t16;
|
|
register u16 t32; */
|
|
int r=ROUNDS;
|
|
|
|
x1=*in++;
|
|
x2=*in++;
|
|
x3=*in++;
|
|
x4=*in;
|
|
|
|
do {
|
|
MUL(x1,*Z++);
|
|
x2+=*Z++;
|
|
x3+=*Z++;
|
|
MUL(x4,*Z++);
|
|
|
|
t2=x1^x3;
|
|
MUL(t2,*Z++);
|
|
t1=t2+(x2^x4);
|
|
MUL(t1,*Z++);
|
|
t2=t1+t2;
|
|
|
|
x1^=t1;
|
|
x4^=t2;
|
|
|
|
t2^=x2;
|
|
x2=x3^t1;
|
|
x3=t2;
|
|
} while(--r);
|
|
MUL(x1,*Z++);
|
|
*out++=x1;
|
|
*out++=x3+*Z++;
|
|
*out++=x2+*Z++;
|
|
MUL(x4,*Z);
|
|
*out=x4;
|
|
return;
|
|
}
|
|
|
|
/************************
|
|
** HUFFMAN COMPRESSION **
|
|
************************/
|
|
|
|
/**************
|
|
** DoHuffman **
|
|
***************
|
|
** Execute a huffman compression on a block of plaintext.
|
|
** Note that (as with IDEA encryption) an iteration of the
|
|
** Huffman test includes a compression AND a decompression.
|
|
** Also, the compression cycle includes building the
|
|
** Huffman tree.
|
|
*/
|
|
void DoHuffman(void)
|
|
{
|
|
HuffStruct *lochuffstruct; /* Loc pointer to global data */
|
|
char *errorcontext;
|
|
int systemerror;
|
|
ulong accumtime;
|
|
double iterations;
|
|
farchar *comparray;
|
|
farchar *decomparray;
|
|
farchar *plaintext;
|
|
|
|
/*
|
|
** Link to global data
|
|
*/
|
|
lochuffstruct=&global_huffstruct;
|
|
|
|
/*
|
|
** Set error context.
|
|
*/
|
|
errorcontext="CPU:Huffman";
|
|
|
|
/*
|
|
** Allocate memory for the plaintext and the compressed text.
|
|
** We'll be really pessimistic here, and allocate equal amounts
|
|
** for both (though we know...well, we PRESUME) the compressed
|
|
** stuff will take less than the plain stuff.
|
|
** Also note that we'll build a 3rd buffer to decompress
|
|
** into, and we preallocate space for the huffman tree.
|
|
** (We presume that the Huffman tree will grow no larger
|
|
** than 512 bytes. This is actually a super-conservative
|
|
** estimate...but, who cares?)
|
|
*/
|
|
plaintext=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
ErrorExit();
|
|
}
|
|
comparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory(plaintext,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
decomparray=(farchar *)AllocateMemory(lochuffstruct->arraysize,&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory(plaintext,&systemerror);
|
|
FreeMemory(comparray,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
hufftree=(huff_node *)AllocateMemory(sizeof(huff_node) * 512,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
FreeMemory(plaintext,&systemerror);
|
|
FreeMemory(comparray,&systemerror);
|
|
FreeMemory(decomparray,&systemerror);
|
|
ErrorExit();
|
|
}
|
|
|
|
/*
|
|
** Build the plaintext buffer. Since we want this to
|
|
** actually be able to compress, we'll use the
|
|
** wordcatalog to build the plaintext stuff.
|
|
*/
|
|
/*
|
|
** Reset random number generator so things repeat.
|
|
** added by Uwe F. Mayer
|
|
*/
|
|
randnum((int32)13);
|
|
create_text_block(plaintext,lochuffstruct->arraysize-1,(ushort)500);
|
|
plaintext[lochuffstruct->arraysize-1L]='\0';
|
|
plaintextlen=lochuffstruct->arraysize;
|
|
|
|
/*
|
|
** See if we need to perform self adjustment loop.
|
|
*/
|
|
if(lochuffstruct->adjust==0)
|
|
{
|
|
/*
|
|
** Do self-adjustment. This involves initializing the
|
|
** # of loops and increasing the loop count until we
|
|
** get a number of loops that we can use.
|
|
*/
|
|
for(lochuffstruct->loops=100L;
|
|
lochuffstruct->loops<MAXHUFFLOOPS;
|
|
lochuffstruct->loops+=10L)
|
|
if(DoHuffIteration(plaintext,
|
|
comparray,
|
|
decomparray,
|
|
lochuffstruct->arraysize,
|
|
lochuffstruct->loops,
|
|
hufftree)>global_min_ticks) break;
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Do the test.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoHuffIteration(plaintext,
|
|
comparray,
|
|
decomparray,
|
|
lochuffstruct->arraysize,
|
|
lochuffstruct->loops,
|
|
hufftree);
|
|
iterations+=(double)lochuffstruct->loops;
|
|
} while(TicksToSecs(accumtime)<lochuffstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
FreeMemory((farvoid *)plaintext,&systemerror);
|
|
FreeMemory((farvoid *)comparray,&systemerror);
|
|
FreeMemory((farvoid *)decomparray,&systemerror);
|
|
FreeMemory((farvoid *)hufftree,&systemerror);
|
|
lochuffstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
|
|
|
|
if(lochuffstruct->adjust==0)
|
|
lochuffstruct->adjust=1;
|
|
|
|
}
|
|
|
|
/*********************
|
|
** create_text_line **
|
|
**********************
|
|
** Create a random line of text, stored at *dt. The line may be
|
|
** no more than nchars long.
|
|
*/
|
|
static void create_text_line(farchar *dt,
|
|
long nchars)
|
|
{
|
|
long charssofar; /* # of characters so far */
|
|
long tomove; /* # of characters to move */
|
|
char myword[40]; /* Local buffer for words */
|
|
farchar *wordptr; /* Pointer to word from catalog */
|
|
|
|
charssofar=0;
|
|
|
|
do {
|
|
/*
|
|
** Grab a random word from the wordcatalog
|
|
*/
|
|
/* wordptr=wordcatarray[abs_randwc((long)WORDCATSIZE)];*/
|
|
wordptr=wordcatarray[abs_randwc((int32)WORDCATSIZE)];
|
|
MoveMemory((farvoid *)myword,
|
|
(farvoid *)wordptr,
|
|
(unsigned long)strlen(wordptr)+1);
|
|
|
|
/*
|
|
** Append a blank.
|
|
*/
|
|
tomove=strlen(myword)+1;
|
|
myword[tomove-1]=' ';
|
|
|
|
/*
|
|
** See how long it is. If its length+charssofar > nchars, we have
|
|
** to trim it.
|
|
*/
|
|
if((tomove+charssofar)>nchars)
|
|
tomove=nchars-charssofar;
|
|
/*
|
|
** Attach the word to the current line. Increment counter.
|
|
*/
|
|
MoveMemory((farvoid *)dt,(farvoid *)myword,(unsigned long)tomove);
|
|
charssofar+=tomove;
|
|
dt+=tomove;
|
|
|
|
/*
|
|
** If we're done, bail out. Otherwise, go get another word.
|
|
*/
|
|
} while(charssofar<nchars);
|
|
|
|
return;
|
|
}
|
|
|
|
/**********************
|
|
** create_text_block **
|
|
***********************
|
|
** Build a block of text randomly loaded with words. The words
|
|
** come from the wordcatalog (which must be loaded before you
|
|
** call this).
|
|
** *tb points to the memory where the text is to be built.
|
|
** tblen is the # of bytes to put into the text block
|
|
** maxlinlen is the maximum length of any line (line end indicated
|
|
** by a carriage return).
|
|
*/
|
|
static void create_text_block(farchar *tb,
|
|
ulong tblen,
|
|
ushort maxlinlen)
|
|
{
|
|
ulong bytessofar; /* # of bytes so far */
|
|
ulong linelen; /* Line length */
|
|
|
|
bytessofar=0L;
|
|
do {
|
|
|
|
/*
|
|
** Pick a random length for a line and fill the line.
|
|
** Make sure the line can fit (haven't exceeded tablen) and also
|
|
** make sure you leave room to append a carriage return.
|
|
*/
|
|
linelen=abs_randwc(maxlinlen-6)+6;
|
|
if((linelen+bytessofar)>tblen)
|
|
linelen=tblen-bytessofar;
|
|
|
|
if(linelen>1)
|
|
{
|
|
create_text_line(tb,linelen);
|
|
}
|
|
tb+=linelen-1; /* Add the carriage return */
|
|
*tb++='\n';
|
|
|
|
bytessofar+=linelen;
|
|
|
|
} while(bytessofar<tblen);
|
|
|
|
}
|
|
|
|
/********************
|
|
** DoHuffIteration **
|
|
*********************
|
|
** Perform the huffman benchmark. This routine
|
|
** (a) Builds the huffman tree
|
|
** (b) Compresses the text
|
|
** (c) Decompresses the text and verifies correct decompression
|
|
*/
|
|
static ulong DoHuffIteration(farchar *plaintext,
|
|
farchar *comparray,
|
|
farchar *decomparray,
|
|
ulong arraysize,
|
|
ulong nloops,
|
|
huff_node *hufftree)
|
|
{
|
|
int i; /* Index */
|
|
long j; /* Bigger index */
|
|
int root; /* Pointer to huffman tree root */
|
|
float lowfreq1, lowfreq2; /* Low frequency counters */
|
|
int lowidx1, lowidx2; /* Indexes of low freq. elements */
|
|
long bitoffset; /* Bit offset into text */
|
|
long textoffset; /* Char offset into text */
|
|
long maxbitoffset; /* Holds limit of bit offset */
|
|
long bitstringlen; /* Length of bitstring */
|
|
int c; /* Character from plaintext */
|
|
char bitstring[30]; /* Holds bitstring */
|
|
ulong elapsed; /* For stopwatch */
|
|
#ifdef DEBUG
|
|
int status=0;
|
|
#endif
|
|
|
|
/*
|
|
** Start the stopwatch
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
|
|
/*
|
|
** Do everything for nloops
|
|
*/
|
|
while(nloops--)
|
|
{
|
|
|
|
/*
|
|
** Calculate the frequency of each byte value. Store the
|
|
** results in what will become the "leaves" of the
|
|
** Huffman tree. Interior nodes will be built in those
|
|
** nodes greater than node #255.
|
|
*/
|
|
for(i=0;i<256;i++)
|
|
{
|
|
hufftree[i].freq=(float)0.0;
|
|
hufftree[i].c=(unsigned char)i;
|
|
}
|
|
|
|
for(j=0;j<arraysize;j++)
|
|
hufftree[(int)plaintext[j]].freq+=(float)1.0;
|
|
|
|
for(i=0;i<256;i++)
|
|
if(hufftree[i].freq != (float)0.0)
|
|
hufftree[i].freq/=(float)arraysize;
|
|
|
|
/* Reset the second half of the tree. Otherwise the loop below that
|
|
** compares the frequencies up to index 512 makes no sense. Some
|
|
** systems automatically zero out memory upon allocation, others (like
|
|
** for example DEC Unix) do not. Depending on this the loop below gets
|
|
** different data and different run times. On our alpha the data that
|
|
** was arbitrarily assigned led to an underflow error at runtime. We
|
|
** use that zeroed-out bits are in fact 0 as a float.
|
|
** Uwe F. Mayer */
|
|
bzero((char *)&(hufftree[256]),sizeof(huff_node)*256);
|
|
/*
|
|
** Build the huffman tree. First clear all the parent
|
|
** pointers and left/right pointers. Also, discard all
|
|
** nodes that have a frequency of true 0. */
|
|
for(i=0;i<512;i++)
|
|
{ if(hufftree[i].freq==(float)0.0)
|
|
hufftree[i].parent=EXCLUDED;
|
|
else
|
|
hufftree[i].parent=hufftree[i].left=hufftree[i].right=-1;
|
|
}
|
|
|
|
/*
|
|
** Go through the tree. Finding nodes of really low
|
|
** frequency.
|
|
*/
|
|
root=255; /* Starting root node-1 */
|
|
while(1)
|
|
{
|
|
lowfreq1=(float)2.0; lowfreq2=(float)2.0;
|
|
lowidx1=-1; lowidx2=-1;
|
|
/*
|
|
** Find first lowest frequency.
|
|
*/
|
|
for(i=0;i<=root;i++)
|
|
if(hufftree[i].parent<0)
|
|
if(hufftree[i].freq<lowfreq1)
|
|
{ lowfreq1=hufftree[i].freq;
|
|
lowidx1=i;
|
|
}
|
|
|
|
/*
|
|
** Did we find a lowest value? If not, the
|
|
** tree is done.
|
|
*/
|
|
if(lowidx1==-1) break;
|
|
|
|
/*
|
|
** Find next lowest frequency
|
|
*/
|
|
for(i=0;i<=root;i++)
|
|
if((hufftree[i].parent<0) && (i!=lowidx1))
|
|
if(hufftree[i].freq<lowfreq2)
|
|
{ lowfreq2=hufftree[i].freq;
|
|
lowidx2=i;
|
|
}
|
|
|
|
/*
|
|
** If we could only find one item, then that
|
|
** item is surely the root, and (as above) the
|
|
** tree is done.
|
|
*/
|
|
if(lowidx2==-1) break;
|
|
|
|
/*
|
|
** Attach the two new nodes to the current root, and
|
|
** advance the current root.
|
|
*/
|
|
root++; /* New root */
|
|
hufftree[lowidx1].parent=root;
|
|
hufftree[lowidx2].parent=root;
|
|
hufftree[root].freq=lowfreq1+lowfreq2;
|
|
hufftree[root].left=lowidx1;
|
|
hufftree[root].right=lowidx2;
|
|
hufftree[root].parent=-2; /* Show root */
|
|
}
|
|
|
|
/*
|
|
** Huffman tree built...compress the plaintext
|
|
*/
|
|
bitoffset=0L; /* Initialize bit offset */
|
|
for(i=0;i<arraysize;i++)
|
|
{
|
|
c=(int)plaintext[i]; /* Fetch character */
|
|
/*
|
|
** Build a bit string for byte c
|
|
*/
|
|
bitstringlen=0;
|
|
while(hufftree[c].parent!=-2)
|
|
{ if(hufftree[hufftree[c].parent].left==c)
|
|
bitstring[bitstringlen]='0';
|
|
else
|
|
bitstring[bitstringlen]='1';
|
|
c=hufftree[c].parent;
|
|
bitstringlen++;
|
|
}
|
|
|
|
/*
|
|
** Step backwards through the bit string, setting
|
|
** bits in the compressed array as you go.
|
|
*/
|
|
while(bitstringlen--)
|
|
{ SetCompBit((u8 *)comparray,(u32)bitoffset,bitstring[bitstringlen]);
|
|
bitoffset++;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** Compression done. Perform de-compression.
|
|
*/
|
|
maxbitoffset=bitoffset;
|
|
bitoffset=0;
|
|
textoffset=0;
|
|
do {
|
|
i=root;
|
|
while(hufftree[i].left!=-1)
|
|
{ if(GetCompBit((u8 *)comparray,(u32)bitoffset)==0)
|
|
i=hufftree[i].left;
|
|
else
|
|
i=hufftree[i].right;
|
|
bitoffset++;
|
|
}
|
|
decomparray[textoffset]=hufftree[i].c;
|
|
|
|
#ifdef DEBUG
|
|
if(hufftree[i].c != plaintext[textoffset])
|
|
{
|
|
/* Show error */
|
|
printf("Error at textoffset %ld\n",textoffset);
|
|
status=1;
|
|
}
|
|
#endif
|
|
textoffset++;
|
|
} while(bitoffset<maxbitoffset);
|
|
|
|
} /* End the big while(nloops--) from above */
|
|
|
|
/*
|
|
** All done
|
|
*/
|
|
#ifdef DEBUG
|
|
if (status==0) printf("Huffman: OK\n");
|
|
#endif
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/***************
|
|
** SetCompBit **
|
|
****************
|
|
** Set a bit in the compression array. The value of the
|
|
** bit is set according to char bitchar.
|
|
*/
|
|
static void SetCompBit(u8 *comparray,
|
|
u32 bitoffset,
|
|
char bitchar)
|
|
{
|
|
u32 byteoffset;
|
|
int bitnumb;
|
|
|
|
/*
|
|
** First calculate which element in the comparray to
|
|
** alter. and the bitnumber.
|
|
*/
|
|
byteoffset=bitoffset>>3;
|
|
bitnumb=bitoffset % 8;
|
|
|
|
/*
|
|
** Set or clear
|
|
*/
|
|
if(bitchar=='1')
|
|
comparray[byteoffset]|=(1<<bitnumb);
|
|
else
|
|
comparray[byteoffset]&=~(1<<bitnumb);
|
|
|
|
return;
|
|
}
|
|
|
|
/***************
|
|
** GetCompBit **
|
|
****************
|
|
** Return the bit value of a bit in the comparession array.
|
|
** Returns 0 if the bit is clear, nonzero otherwise.
|
|
*/
|
|
static int GetCompBit(u8 *comparray,
|
|
u32 bitoffset)
|
|
{
|
|
u32 byteoffset;
|
|
int bitnumb;
|
|
|
|
/*
|
|
** Calculate byte offset and bit number.
|
|
*/
|
|
byteoffset=bitoffset>>3;
|
|
bitnumb=bitoffset % 8;
|
|
|
|
/*
|
|
** Fetch
|
|
*/
|
|
return((1<<bitnumb) & comparray[byteoffset] );
|
|
}
|
|
|
|
/********************************
|
|
** BACK PROPAGATION NEURAL NET **
|
|
*********************************
|
|
** This code is a modified version of the code
|
|
** that was submitted to BYTE Magazine by
|
|
** Maureen Caudill. It accomanied an article
|
|
** that I CANNOT NOW RECALL.
|
|
** The author's original heading/comment was
|
|
** as follows:
|
|
**
|
|
** Backpropagation Network
|
|
** Written by Maureen Caudill
|
|
** in Think C 4.0 on a Macintosh
|
|
**
|
|
** (c) Maureen Caudill 1988-1991
|
|
** This network will accept 5x7 input patterns
|
|
** and produce 8 bit output patterns.
|
|
** The source code may be copied or modified without restriction,
|
|
** but no fee may be charged for its use.
|
|
**
|
|
** ++++++++++++++
|
|
** I have modified the code so that it will work
|
|
** on systems other than a Macintosh -- RG
|
|
*/
|
|
|
|
/***********
|
|
** DoNNet **
|
|
************
|
|
** Perform the neural net benchmark.
|
|
** Note that this benchmark is one of the few that
|
|
** requires an input file. That file is "NNET.DAT" and
|
|
** should be on the local directory (from which the
|
|
** benchmark program in launched).
|
|
*/
|
|
void DoNNET(void)
|
|
{
|
|
NNetStruct *locnnetstruct; /* Local ptr to global data */
|
|
char *errorcontext;
|
|
ulong accumtime;
|
|
double iterations;
|
|
|
|
/*
|
|
** Link to global data
|
|
*/
|
|
locnnetstruct=&global_nnetstruct;
|
|
|
|
/*
|
|
** Set error context
|
|
*/
|
|
errorcontext="CPU:NNET";
|
|
|
|
/*
|
|
** Init random number generator.
|
|
** NOTE: It is important that the random number generator
|
|
** be re-initialized for every pass through this test.
|
|
** The NNET algorithm uses the random number generator
|
|
** to initialize the net. Results are sensitive to
|
|
** the initial neural net state.
|
|
*/
|
|
/* randnum(3L); */
|
|
randnum((int32)3);
|
|
|
|
/*
|
|
** Read in the input and output patterns. We'll do this
|
|
** only once here at the beginning. These values don't
|
|
** change once loaded.
|
|
*/
|
|
if(read_data_file()!=0)
|
|
ErrorExit();
|
|
|
|
|
|
/*
|
|
** See if we need to perform self adjustment loop.
|
|
*/
|
|
if(locnnetstruct->adjust==0)
|
|
{
|
|
/*
|
|
** Do self-adjustment. This involves initializing the
|
|
** # of loops and increasing the loop count until we
|
|
** get a number of loops that we can use.
|
|
*/
|
|
for(locnnetstruct->loops=1L;
|
|
locnnetstruct->loops<MAXNNETLOOPS;
|
|
locnnetstruct->loops++)
|
|
{ /*randnum(3L); */
|
|
randnum((int32)3);
|
|
if(DoNNetIteration(locnnetstruct->loops)
|
|
>global_min_ticks) break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
** All's well if we get here. Do the test.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
/* randnum(3L); */ /* Gotta do this for Neural Net */
|
|
randnum((int32)3); /* Gotta do this for Neural Net */
|
|
accumtime+=DoNNetIteration(locnnetstruct->loops);
|
|
iterations+=(double)locnnetstruct->loops;
|
|
} while(TicksToSecs(accumtime)<locnnetstruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
locnnetstruct->iterspersec=iterations / TicksToFracSecs(accumtime);
|
|
|
|
if(locnnetstruct->adjust==0)
|
|
locnnetstruct->adjust=1;
|
|
|
|
|
|
return;
|
|
}
|
|
|
|
/********************
|
|
** DoNNetIteration **
|
|
*********************
|
|
** Do a single iteration of the neural net benchmark.
|
|
** By iteration, we mean a "learning" pass.
|
|
*/
|
|
static ulong DoNNetIteration(ulong nloops)
|
|
{
|
|
ulong elapsed; /* Elapsed time */
|
|
int patt;
|
|
|
|
/*
|
|
** Run nloops learning cycles. Notice that, counted with
|
|
** the learning cycle is the weight randomization and
|
|
** zeroing of changes. This should reduce clock jitter,
|
|
** since we don't have to stop and start the clock for
|
|
** each iteration.
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
while(nloops--)
|
|
{
|
|
randomize_wts();
|
|
zero_changes();
|
|
iteration_count=1;
|
|
learned = F;
|
|
numpasses = 0;
|
|
while (learned == F)
|
|
{
|
|
for (patt=0; patt<numpats; patt++)
|
|
{
|
|
worst_error = 0.0; /* reset this every pass through data */
|
|
move_wt_changes(); /* move last pass's wt changes to momentum array */
|
|
do_forward_pass(patt);
|
|
do_back_pass(patt);
|
|
iteration_count++;
|
|
}
|
|
numpasses ++;
|
|
learned = check_out_error();
|
|
}
|
|
#ifdef DEBUG
|
|
printf("Learned in %d passes\n",numpasses);
|
|
#endif
|
|
}
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/*************************
|
|
** do_mid_forward(patt) **
|
|
**************************
|
|
** Process the middle layer's forward pass
|
|
** The activation of middle layer's neurode is the weighted
|
|
** sum of the inputs from the input pattern, with sigmoid
|
|
** function applied to the inputs.
|
|
**/
|
|
static void do_mid_forward(int patt)
|
|
{
|
|
double sum;
|
|
int neurode, i;
|
|
|
|
for (neurode=0;neurode<MID_SIZE; neurode++)
|
|
{
|
|
sum = 0.0;
|
|
for (i=0; i<IN_SIZE; i++)
|
|
{ /* compute weighted sum of input signals */
|
|
sum += mid_wts[neurode][i]*in_pats[patt][i];
|
|
}
|
|
/*
|
|
** apply sigmoid function f(x) = 1/(1+exp(-x)) to weighted sum
|
|
*/
|
|
sum = 1.0/(1.0+exp(-sum));
|
|
mid_out[neurode] = sum;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*********************
|
|
** do_out_forward() **
|
|
**********************
|
|
** process the forward pass through the output layer
|
|
** The activation of the output layer is the weighted sum of
|
|
** the inputs (outputs from middle layer), modified by the
|
|
** sigmoid function.
|
|
**/
|
|
static void do_out_forward()
|
|
{
|
|
double sum;
|
|
int neurode, i;
|
|
|
|
for (neurode=0; neurode<OUT_SIZE; neurode++)
|
|
{
|
|
sum = 0.0;
|
|
for (i=0; i<MID_SIZE; i++)
|
|
{ /*
|
|
** compute weighted sum of input signals
|
|
** from middle layer
|
|
*/
|
|
sum += out_wts[neurode][i]*mid_out[i];
|
|
}
|
|
/*
|
|
** Apply f(x) = 1/(1+exp(-x)) to weighted input
|
|
*/
|
|
sum = 1.0/(1.0+exp(-sum));
|
|
out_out[neurode] = sum;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*************************
|
|
** display_output(patt) **
|
|
**************************
|
|
** Display the actual output vs. the desired output of the
|
|
** network.
|
|
** Once the training is complete, and the "learned" flag set
|
|
** to TRUE, then display_output sends its output to both
|
|
** the screen and to a text output file.
|
|
**
|
|
** NOTE: This routine has been disabled in the benchmark
|
|
** version. -- RG
|
|
**/
|
|
/*
|
|
void display_output(int patt)
|
|
{
|
|
int i;
|
|
|
|
fprintf(outfile,"\n Iteration # %d",iteration_count);
|
|
fprintf(outfile,"\n Desired Output: ");
|
|
|
|
for (i=0; i<OUT_SIZE; i++)
|
|
{
|
|
fprintf(outfile,"%6.3f ",out_pats[patt][i]);
|
|
}
|
|
fprintf(outfile,"\n Actual Output: ");
|
|
|
|
for (i=0; i<OUT_SIZE; i++)
|
|
{
|
|
fprintf(outfile,"%6.3f ",out_out[i]);
|
|
}
|
|
fprintf(outfile,"\n");
|
|
return;
|
|
}
|
|
*/
|
|
|
|
/**********************
|
|
** do_forward_pass() **
|
|
***********************
|
|
** control function for the forward pass through the network
|
|
** NOTE: I have disabled the call to display_output() in
|
|
** the benchmark version -- RG.
|
|
**/
|
|
static void do_forward_pass(int patt)
|
|
{
|
|
do_mid_forward(patt); /* process forward pass, middle layer */
|
|
do_out_forward(); /* process forward pass, output layer */
|
|
/* display_output(patt); ** display results of forward pass */
|
|
return;
|
|
}
|
|
|
|
/***********************
|
|
** do_out_error(patt) **
|
|
************************
|
|
** Compute the error for the output layer neurodes.
|
|
** This is simply Desired - Actual.
|
|
**/
|
|
static void do_out_error(int patt)
|
|
{
|
|
int neurode;
|
|
double error,tot_error, sum;
|
|
|
|
tot_error = 0.0;
|
|
sum = 0.0;
|
|
for (neurode=0; neurode<OUT_SIZE; neurode++)
|
|
{
|
|
out_error[neurode] = out_pats[patt][neurode] - out_out[neurode];
|
|
/*
|
|
** while we're here, also compute magnitude
|
|
** of total error and worst error in this pass.
|
|
** We use these to decide if we are done yet.
|
|
*/
|
|
error = out_error[neurode];
|
|
if (error <0.0)
|
|
{
|
|
sum += -error;
|
|
if (-error > tot_error)
|
|
tot_error = -error; /* worst error this pattern */
|
|
}
|
|
else
|
|
{
|
|
sum += error;
|
|
if (error > tot_error)
|
|
tot_error = error; /* worst error this pattern */
|
|
}
|
|
}
|
|
avg_out_error[patt] = sum/OUT_SIZE;
|
|
tot_out_error[patt] = tot_error;
|
|
return;
|
|
}
|
|
|
|
/***********************
|
|
** worst_pass_error() **
|
|
************************
|
|
** Find the worst and average error in the pass and save it
|
|
**/
|
|
static void worst_pass_error()
|
|
{
|
|
double error,sum;
|
|
|
|
int i;
|
|
|
|
error = 0.0;
|
|
sum = 0.0;
|
|
for (i=0; i<numpats; i++)
|
|
{
|
|
if (tot_out_error[i] > error) error = tot_out_error[i];
|
|
sum += avg_out_error[i];
|
|
}
|
|
worst_error = error;
|
|
average_error = sum/numpats;
|
|
return;
|
|
}
|
|
|
|
/*******************
|
|
** do_mid_error() **
|
|
********************
|
|
** Compute the error for the middle layer neurodes
|
|
** This is based on the output errors computed above.
|
|
** Note that the derivative of the sigmoid f(x) is
|
|
** f'(x) = f(x)(1 - f(x))
|
|
** Recall that f(x) is merely the output of the middle
|
|
** layer neurode on the forward pass.
|
|
**/
|
|
static void do_mid_error()
|
|
{
|
|
double sum;
|
|
int neurode, i;
|
|
|
|
for (neurode=0; neurode<MID_SIZE; neurode++)
|
|
{
|
|
sum = 0.0;
|
|
for (i=0; i<OUT_SIZE; i++)
|
|
sum += out_wts[i][neurode]*out_error[i];
|
|
|
|
/*
|
|
** apply the derivative of the sigmoid here
|
|
** Because of the choice of sigmoid f(I), the derivative
|
|
** of the sigmoid is f'(I) = f(I)(1 - f(I))
|
|
*/
|
|
mid_error[neurode] = mid_out[neurode]*(1-mid_out[neurode])*sum;
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*********************
|
|
** adjust_out_wts() **
|
|
**********************
|
|
** Adjust the weights of the output layer. The error for
|
|
** the output layer has been previously propagated back to
|
|
** the middle layer.
|
|
** Use the Delta Rule with momentum term to adjust the weights.
|
|
**/
|
|
static void adjust_out_wts()
|
|
{
|
|
int weight, neurode;
|
|
double learn,delta,alph;
|
|
|
|
learn = BETA;
|
|
alph = ALPHA;
|
|
for (neurode=0; neurode<OUT_SIZE; neurode++)
|
|
{
|
|
for (weight=0; weight<MID_SIZE; weight++)
|
|
{
|
|
/* standard delta rule */
|
|
delta = learn * out_error[neurode] * mid_out[weight];
|
|
|
|
/* now the momentum term */
|
|
delta += alph * out_wt_change[neurode][weight];
|
|
out_wts[neurode][weight] += delta;
|
|
|
|
/* keep track of this pass's cum wt changes for next pass's momentum */
|
|
out_wt_cum_change[neurode][weight] += delta;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*************************
|
|
** adjust_mid_wts(patt) **
|
|
**************************
|
|
** Adjust the middle layer weights using the previously computed
|
|
** errors.
|
|
** We use the Generalized Delta Rule with momentum term
|
|
**/
|
|
static void adjust_mid_wts(int patt)
|
|
{
|
|
int weight, neurode;
|
|
double learn,alph,delta;
|
|
|
|
learn = BETA;
|
|
alph = ALPHA;
|
|
for (neurode=0; neurode<MID_SIZE; neurode++)
|
|
{
|
|
for (weight=0; weight<IN_SIZE; weight++)
|
|
{
|
|
/* first the basic delta rule */
|
|
delta = learn * mid_error[neurode] * in_pats[patt][weight];
|
|
|
|
/* with the momentum term */
|
|
delta += alph * mid_wt_change[neurode][weight];
|
|
mid_wts[neurode][weight] += delta;
|
|
|
|
/* keep track of this pass's cum wt changes for next pass's momentum */
|
|
mid_wt_cum_change[neurode][weight] += delta;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*******************
|
|
** do_back_pass() **
|
|
********************
|
|
** Process the backward propagation of error through network.
|
|
**/
|
|
void do_back_pass(int patt)
|
|
{
|
|
|
|
do_out_error(patt);
|
|
do_mid_error();
|
|
adjust_out_wts();
|
|
adjust_mid_wts(patt);
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/**********************
|
|
** move_wt_changes() **
|
|
***********************
|
|
** Move the weight changes accumulated last pass into the wt-change
|
|
** array for use by the momentum term in this pass. Also zero out
|
|
** the accumulating arrays after the move.
|
|
**/
|
|
static void move_wt_changes()
|
|
{
|
|
int i,j;
|
|
|
|
for (i = 0; i<MID_SIZE; i++)
|
|
for (j = 0; j<IN_SIZE; j++)
|
|
{
|
|
mid_wt_change[i][j] = mid_wt_cum_change[i][j];
|
|
/*
|
|
** Zero it out for next pass accumulation.
|
|
*/
|
|
mid_wt_cum_change[i][j] = 0.0;
|
|
}
|
|
|
|
for (i = 0; i<OUT_SIZE; i++)
|
|
for (j=0; j<MID_SIZE; j++)
|
|
{
|
|
out_wt_change[i][j] = out_wt_cum_change[i][j];
|
|
out_wt_cum_change[i][j] = 0.0;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
/**********************
|
|
** check_out_error() **
|
|
***********************
|
|
** Check to see if the error in the output layer is below
|
|
** MARGIN*OUT_SIZE for all output patterns. If so, then
|
|
** assume the network has learned acceptably well. This
|
|
** is simply an arbitrary measure of how well the network
|
|
** has learned -- many other standards are possible.
|
|
**/
|
|
static int check_out_error()
|
|
{
|
|
int result,i,error;
|
|
|
|
result = T;
|
|
error = F;
|
|
worst_pass_error(); /* identify the worst error in this pass */
|
|
|
|
/*
|
|
#ifdef DEBUG
|
|
printf("\n Iteration # %d",iteration_count);
|
|
#endif
|
|
*/
|
|
for (i=0; i<numpats; i++)
|
|
{
|
|
/* printf("\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
|
|
i+1,tot_out_error[i], avg_out_error[i]);
|
|
fprintf(outfile,
|
|
"\n Error pattern %d: Worst: %8.3f; Average: %8.3f",
|
|
i+1,tot_out_error[i]);
|
|
*/
|
|
|
|
if (worst_error >= STOP) result = F;
|
|
if (tot_out_error[i] >= 16.0) error = T;
|
|
}
|
|
|
|
if (error == T) result = ERR;
|
|
|
|
|
|
#ifdef DEBUG
|
|
/* printf("\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
|
|
worst_error,average_error);
|
|
*/
|
|
/* fprintf(outfile,
|
|
"\n Error this pass thru data: Worst: %8.3f; Average: %8.3f",
|
|
worst_error, average_error); */
|
|
#endif
|
|
|
|
return(result);
|
|
}
|
|
|
|
|
|
/*******************
|
|
** zero_changes() **
|
|
********************
|
|
** Zero out all the wt change arrays
|
|
**/
|
|
static void zero_changes()
|
|
{
|
|
int i,j;
|
|
|
|
for (i = 0; i<MID_SIZE; i++)
|
|
{
|
|
for (j=0; j<IN_SIZE; j++)
|
|
{
|
|
mid_wt_change[i][j] = 0.0;
|
|
mid_wt_cum_change[i][j] = 0.0;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i< OUT_SIZE; i++)
|
|
{
|
|
for (j=0; j<MID_SIZE; j++)
|
|
{
|
|
out_wt_change[i][j] = 0.0;
|
|
out_wt_cum_change[i][j] = 0.0;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
/********************
|
|
** randomize_wts() **
|
|
*********************
|
|
** Intialize the weights in the middle and output layers to
|
|
** random values between -0.25..+0.25
|
|
** Function rand() returns a value between 0 and 32767.
|
|
**
|
|
** NOTE: Had to make alterations to how the random numbers were
|
|
** created. -- RG.
|
|
**/
|
|
static void randomize_wts()
|
|
{
|
|
int neurode,i;
|
|
double value;
|
|
|
|
/*
|
|
** Following not used int benchmark version -- RG
|
|
**
|
|
** printf("\n Please enter a random number seed (1..32767): ");
|
|
** scanf("%d", &i);
|
|
** srand(i);
|
|
*/
|
|
|
|
for (neurode = 0; neurode<MID_SIZE; neurode++)
|
|
{
|
|
for(i=0; i<IN_SIZE; i++)
|
|
{
|
|
/* value=(double)abs_randwc(100000L); */
|
|
value=(double)abs_randwc((int32)100000);
|
|
value=value/(double)100000.0 - (double) 0.5;
|
|
mid_wts[neurode][i] = value/2;
|
|
}
|
|
}
|
|
for (neurode=0; neurode<OUT_SIZE; neurode++)
|
|
{
|
|
for(i=0; i<MID_SIZE; i++)
|
|
{
|
|
/* value=(double)abs_randwc(100000L); */
|
|
value=(double)abs_randwc((int32)100000);
|
|
value=value/(double)10000.0 - (double) 0.5;
|
|
out_wts[neurode][i] = value/2;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/*********************
|
|
** read_data_file() **
|
|
**********************
|
|
** Read in the input data file and store the patterns in
|
|
** in_pats and out_pats.
|
|
** The format for the data file is as follows:
|
|
**
|
|
** line# data expected
|
|
** ----- ------------------------------
|
|
** 1 In-X-size,in-y-size,out-size
|
|
** 2 number of patterns in file
|
|
** 3 1st X row of 1st input pattern
|
|
** 4.. following rows of 1st input pattern pattern
|
|
** in-x+2 y-out pattern
|
|
** 1st X row of 2nd pattern
|
|
** etc.
|
|
**
|
|
** Each row of data is separated by commas or spaces.
|
|
** The data is expected to be ascii text corresponding to
|
|
** either a +1 or a 0.
|
|
**
|
|
** Sample input for a 1-pattern file (The comments to the
|
|
** right may NOT be in the file unless more sophisticated
|
|
** parsing of the input is done.):
|
|
**
|
|
** 5,7,8 input is 5x7 grid, output is 8 bits
|
|
** 1 one pattern in file
|
|
** 0,1,1,1,0 beginning of pattern for "O"
|
|
** 1,0,0,0,1
|
|
** 1,0,0,0,1
|
|
** 1,0,0,0,1
|
|
** 1,0,0,0,1
|
|
** 1,0,0,0,0
|
|
** 0,1,1,1,0
|
|
** 0,1,0,0,1,1,1,1 ASCII code for "O" -- 0100 1111
|
|
**
|
|
** Clearly, this simple scheme can be expanded or enhanced
|
|
** any way you like.
|
|
**
|
|
** Returns -1 if any file error occurred, otherwise 0.
|
|
**/
|
|
static int read_data_file()
|
|
{
|
|
FILE *infile;
|
|
|
|
int xinsize,yinsize,youtsize;
|
|
int patt, element, i, row;
|
|
int vals_read;
|
|
int val1,val2,val3,val4,val5,val6,val7,val8;
|
|
|
|
/* printf("\n Opening and retrieving data from file."); */
|
|
|
|
infile = fopen(inpath, "r");
|
|
if (infile == NULL)
|
|
{
|
|
printf("\n CPU:NNET--error in opening file!");
|
|
return -1 ;
|
|
}
|
|
vals_read =fscanf(infile,"%d %d %d",&xinsize,&yinsize,&youtsize);
|
|
if (vals_read != 3)
|
|
{
|
|
printf("\n CPU:NNET -- Should read 3 items in line one; did read %d",vals_read);
|
|
return -1;
|
|
}
|
|
vals_read=fscanf(infile,"%d",&numpats);
|
|
if (vals_read !=1)
|
|
{
|
|
printf("\n CPU:NNET -- Should read 1 item in line 2; did read %d",vals_read);
|
|
return -1;
|
|
}
|
|
if (numpats > MAXPATS)
|
|
numpats = MAXPATS;
|
|
|
|
for (patt=0; patt<numpats; patt++)
|
|
{
|
|
element = 0;
|
|
for (row = 0; row<yinsize; row++)
|
|
{
|
|
vals_read = fscanf(infile,"%d %d %d %d %d",
|
|
&val1, &val2, &val3, &val4, &val5);
|
|
if (vals_read != 5)
|
|
{
|
|
printf ("\n CPU:NNET -- failure in reading input!");
|
|
return -1;
|
|
}
|
|
element=row*xinsize;
|
|
|
|
in_pats[patt][element] = (double) val1; element++;
|
|
in_pats[patt][element] = (double) val2; element++;
|
|
in_pats[patt][element] = (double) val3; element++;
|
|
in_pats[patt][element] = (double) val4; element++;
|
|
in_pats[patt][element] = (double) val5; element++;
|
|
}
|
|
for (i=0;i<IN_SIZE; i++)
|
|
{
|
|
if (in_pats[patt][i] >= 0.9)
|
|
in_pats[patt][i] = 0.9;
|
|
if (in_pats[patt][i] <= 0.1)
|
|
in_pats[patt][i] = 0.1;
|
|
}
|
|
element = 0;
|
|
vals_read = fscanf(infile,"%d %d %d %d %d %d %d %d",
|
|
&val1, &val2, &val3, &val4, &val5, &val6, &val7, &val8);
|
|
|
|
out_pats[patt][element] = (double) val1; element++;
|
|
out_pats[patt][element] = (double) val2; element++;
|
|
out_pats[patt][element] = (double) val3; element++;
|
|
out_pats[patt][element] = (double) val4; element++;
|
|
out_pats[patt][element] = (double) val5; element++;
|
|
out_pats[patt][element] = (double) val6; element++;
|
|
out_pats[patt][element] = (double) val7; element++;
|
|
out_pats[patt][element] = (double) val8; element++;
|
|
}
|
|
|
|
/* printf("\n Closing the input file now. "); */
|
|
|
|
fclose(infile);
|
|
return(0);
|
|
}
|
|
|
|
/*********************
|
|
** initialize_net() **
|
|
**********************
|
|
** Do all the initialization stuff before beginning
|
|
*/
|
|
/*
|
|
static int initialize_net()
|
|
{
|
|
int err_code;
|
|
|
|
randomize_wts();
|
|
zero_changes();
|
|
err_code = read_data_file();
|
|
iteration_count = 1;
|
|
return(err_code);
|
|
}
|
|
*/
|
|
|
|
/**********************
|
|
** display_mid_wts() **
|
|
***********************
|
|
** Display the weights on the middle layer neurodes
|
|
** NOTE: This routine is not used in the benchmark
|
|
** test -- RG
|
|
**/
|
|
/* static void display_mid_wts()
|
|
{
|
|
int neurode, weight, row, col;
|
|
|
|
fprintf(outfile,"\n Weights of Middle Layer neurodes:");
|
|
|
|
for (neurode=0; neurode<MID_SIZE; neurode++)
|
|
{
|
|
fprintf(outfile,"\n Mid Neurode # %d",neurode);
|
|
for (row=0; row<IN_Y_SIZE; row++)
|
|
{
|
|
fprintf(outfile,"\n ");
|
|
for (col=0; col<IN_X_SIZE; col++)
|
|
{
|
|
weight = IN_X_SIZE * row + col;
|
|
fprintf(outfile," %8.3f ", mid_wts[neurode][weight]);
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
*/
|
|
/**********************
|
|
** display_out_wts() **
|
|
***********************
|
|
** Display the weights on the output layer neurodes
|
|
** NOTE: This code is not used in the benchmark
|
|
** test -- RG
|
|
*/
|
|
/* void display_out_wts()
|
|
{
|
|
int neurode, weight;
|
|
|
|
fprintf(outfile,"\n Weights of Output Layer neurodes:");
|
|
|
|
for (neurode=0; neurode<OUT_SIZE; neurode++)
|
|
{
|
|
fprintf(outfile,"\n Out Neurode # %d \n",neurode);
|
|
for (weight=0; weight<MID_SIZE; weight++)
|
|
{
|
|
fprintf(outfile," %8.3f ", out_wts[neurode][weight]);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
*/
|
|
|
|
/***********************
|
|
** LU DECOMPOSITION **
|
|
** (Linear Equations) **
|
|
************************
|
|
** These routines come from "Numerical Recipes in Pascal".
|
|
** Note that, as in the assignment algorithm, though we
|
|
** separately define LUARRAYROWS and LUARRAYCOLS, the two
|
|
** must be the same value (this routine depends on a square
|
|
** matrix).
|
|
*/
|
|
|
|
/*********
|
|
** DoLU **
|
|
**********
|
|
** Perform the LU decomposition benchmark.
|
|
*/
|
|
void DoLU(void)
|
|
{
|
|
LUStruct *loclustruct; /* Local pointer to global data */
|
|
char *errorcontext;
|
|
int systemerror;
|
|
fardouble *a;
|
|
fardouble *b;
|
|
fardouble *abase;
|
|
fardouble *bbase;
|
|
LUdblptr ptra;
|
|
int n;
|
|
int i;
|
|
ulong accumtime;
|
|
double iterations;
|
|
|
|
/*
|
|
** Link to global data
|
|
*/
|
|
loclustruct=&global_lustruct;
|
|
|
|
/*
|
|
** Set error context.
|
|
*/
|
|
errorcontext="FPU:LU";
|
|
|
|
/*
|
|
** Our first step is to build a "solvable" problem. This
|
|
** will become the "seed" set that all others will be
|
|
** derived from. (I.E., we'll simply copy these arrays
|
|
** into the others.
|
|
*/
|
|
a=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYCOLS * LUARRAYROWS,
|
|
&systemerror);
|
|
b=(fardouble *)AllocateMemory(sizeof(double) * LUARRAYROWS,
|
|
&systemerror);
|
|
n=LUARRAYROWS;
|
|
|
|
/*
|
|
** We need to allocate a temp vector that is used by the LU
|
|
** algorithm. This removes the allocation routine from the
|
|
** timing.
|
|
*/
|
|
LUtempvv=(fardouble *)AllocateMemory(sizeof(double)*LUARRAYROWS,
|
|
&systemerror);
|
|
|
|
/*
|
|
** Build a problem to be solved.
|
|
*/
|
|
ptra.ptrs.p=a; /* Gotta coerce linear array to 2D array */
|
|
build_problem(*ptra.ptrs.ap,n,b);
|
|
|
|
/*
|
|
** Now that we have a problem built, see if we need to do
|
|
** auto-adjust. If so, repeatedly call the DoLUIteration routine,
|
|
** increasing the number of solutions per iteration as you go.
|
|
*/
|
|
if(loclustruct->adjust==0)
|
|
{
|
|
loclustruct->numarrays=0;
|
|
for(i=1;i<=MAXLUARRAYS;i++)
|
|
{
|
|
abase=(fardouble *)AllocateMemory(sizeof(double) *
|
|
LUARRAYCOLS*LUARRAYROWS*(i+1),&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
|
|
ErrorExit();
|
|
}
|
|
bbase=(fardouble *)AllocateMemory(sizeof(double) *
|
|
LUARRAYROWS*(i+1),&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
LUFreeMem(a,b,abase,(fardouble *)NULL);
|
|
ErrorExit();
|
|
}
|
|
if(DoLUIteration(a,b,abase,bbase,i)>global_min_ticks)
|
|
{ loclustruct->numarrays=i;
|
|
break;
|
|
}
|
|
/*
|
|
** Not enough arrays...free them all and try again
|
|
*/
|
|
FreeMemory((farvoid *)abase,&systemerror);
|
|
FreeMemory((farvoid *)bbase,&systemerror);
|
|
}
|
|
/*
|
|
** Were we able to do it?
|
|
*/
|
|
if(loclustruct->numarrays==0)
|
|
{ printf("FPU:LU -- Array limit reached\n");
|
|
LUFreeMem(a,b,abase,bbase);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
else
|
|
{ /*
|
|
** Don't need to adjust -- just allocate the proper
|
|
** number of arrays and proceed.
|
|
*/
|
|
abase=(fardouble *)AllocateMemory(sizeof(double) *
|
|
LUARRAYCOLS*LUARRAYROWS*loclustruct->numarrays,
|
|
&systemerror);
|
|
if(systemerror)
|
|
{ ReportError(errorcontext,systemerror);
|
|
LUFreeMem(a,b,(fardouble *)NULL,(fardouble *)NULL);
|
|
ErrorExit();
|
|
}
|
|
bbase=(fardouble *)AllocateMemory(sizeof(double) *
|
|
LUARRAYROWS*loclustruct->numarrays,&systemerror);
|
|
if(systemerror)
|
|
{
|
|
ReportError(errorcontext,systemerror);
|
|
LUFreeMem(a,b,abase,(fardouble *)NULL);
|
|
ErrorExit();
|
|
}
|
|
}
|
|
/*
|
|
** All's well if we get here. Do the test.
|
|
*/
|
|
accumtime=0L;
|
|
iterations=(double)0.0;
|
|
|
|
do {
|
|
accumtime+=DoLUIteration(a,b,abase,bbase,
|
|
loclustruct->numarrays);
|
|
iterations+=(double)loclustruct->numarrays;
|
|
} while(TicksToSecs(accumtime)<loclustruct->request_secs);
|
|
|
|
/*
|
|
** Clean up, calculate results, and go home. Be sure to
|
|
** show that we don't have to rerun adjustment code.
|
|
*/
|
|
loclustruct->iterspersec=iterations / TicksToFracSecs(accumtime);
|
|
|
|
if(loclustruct->adjust==0)
|
|
loclustruct->adjust=1;
|
|
|
|
LUFreeMem(a,b,abase,bbase);
|
|
return;
|
|
}
|
|
|
|
/**************
|
|
** LUFreeMem **
|
|
***************
|
|
** Release memory associated with LU benchmark.
|
|
*/
|
|
static void LUFreeMem(fardouble *a, fardouble *b,
|
|
fardouble *abase,fardouble *bbase)
|
|
{
|
|
int systemerror;
|
|
|
|
FreeMemory((farvoid *)a,&systemerror);
|
|
FreeMemory((farvoid *)b,&systemerror);
|
|
FreeMemory((farvoid *)LUtempvv,&systemerror);
|
|
|
|
if(abase!=(fardouble *)NULL) FreeMemory((farvoid *)abase,&systemerror);
|
|
if(bbase!=(fardouble *)NULL) FreeMemory((farvoid *)bbase,&systemerror);
|
|
return;
|
|
}
|
|
|
|
/******************
|
|
** DoLUIteration **
|
|
*******************
|
|
** Perform an iteration of the LU decomposition benchmark.
|
|
** An iteration refers to the repeated solution of several
|
|
** identical matrices.
|
|
*/
|
|
static ulong DoLUIteration(fardouble *a,fardouble *b,
|
|
fardouble *abase, fardouble *bbase,
|
|
ulong numarrays)
|
|
{
|
|
fardouble *locabase;
|
|
fardouble *locbbase;
|
|
LUdblptr ptra; /* For converting ptr to 2D array */
|
|
ulong elapsed;
|
|
ulong j,i; /* Indexes */
|
|
|
|
|
|
/*
|
|
** Move the seed arrays (a & b) into the destination
|
|
** arrays;
|
|
*/
|
|
for(j=0;j<numarrays;j++)
|
|
{ locabase=abase+j*LUARRAYROWS*LUARRAYCOLS;
|
|
locbbase=bbase+j*LUARRAYROWS;
|
|
for(i=0;i<LUARRAYROWS*LUARRAYCOLS;i++)
|
|
*(locabase+i)=*(a+i);
|
|
for(i=0;i<LUARRAYROWS;i++)
|
|
*(locbbase+i)=*(b+i);
|
|
}
|
|
|
|
/*
|
|
** Do test...begin timing.
|
|
*/
|
|
elapsed=StartStopwatch();
|
|
for(i=0;i<numarrays;i++)
|
|
{ locabase=abase+i*LUARRAYROWS*LUARRAYCOLS;
|
|
locbbase=bbase+i*LUARRAYROWS;
|
|
ptra.ptrs.p=locabase;
|
|
lusolve(*ptra.ptrs.ap,LUARRAYROWS,locbbase);
|
|
}
|
|
|
|
return(StopStopwatch(elapsed));
|
|
}
|
|
|
|
/******************
|
|
** build_problem **
|
|
*******************
|
|
** Constructs a solvable set of linear equations. It does this by
|
|
** creating an identity matrix, then loading the solution vector
|
|
** with random numbers. After that, the identity matrix and
|
|
** solution vector are randomly "scrambled". Scrambling is
|
|
** done by (a) randomly selecting a row and multiplying that
|
|
** row by a random number and (b) adding one randomly-selected
|
|
** row to another.
|
|
*/
|
|
static void build_problem(double a[][LUARRAYCOLS],
|
|
int n,
|
|
double b[LUARRAYROWS])
|
|
{
|
|
long i,j,k,k1; /* Indexes */
|
|
double rcon; /* Random constant */
|
|
|
|
/*
|
|
** Reset random number generator
|
|
*/
|
|
/* randnum(13L); */
|
|
randnum((int32)13);
|
|
|
|
/*
|
|
** Build an identity matrix.
|
|
** We'll also use this as a chance to load the solution
|
|
** vector.
|
|
*/
|
|
for(i=0;i<n;i++)
|
|
{ /* b[i]=(double)(abs_randwc(100L)+1L); */
|
|
b[i]=(double)(abs_randwc((int32)100)+(int32)1);
|
|
for(j=0;j<n;j++)
|
|
if(i==j)
|
|
/* a[i][j]=(double)(abs_randwc(1000L)+1L); */
|
|
a[i][j]=(double)(abs_randwc((int32)1000)+(int32)1);
|
|
else
|
|
a[i][j]=(double)0.0;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
printf("Problem:\n");
|
|
for(i=0;i<n;i++)
|
|
{
|
|
/*
|
|
for(j=0;j<n;j++)
|
|
printf("%6.2f ",a[i][j]);
|
|
*/
|
|
printf("%.0f/%.0f=%.2f\t",b[i],a[i][i],b[i]/a[i][i]);
|
|
/*
|
|
printf("\n");
|
|
*/
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
** Scramble. Do this 8n times. See comment above for
|
|
** a description of the scrambling process.
|
|
*/
|
|
|
|
for(i=0;i<8*n;i++)
|
|
{
|
|
/*
|
|
** Pick a row and a random constant. Multiply
|
|
** all elements in the row by the constant.
|
|
*/
|
|
/* k=abs_randwc((long)n);
|
|
rcon=(double)(abs_randwc(20L)+1L);
|
|
for(j=0;j<n;j++)
|
|
a[k][j]=a[k][j]*rcon;
|
|
b[k]=b[k]*rcon;
|
|
*/
|
|
/*
|
|
** Pick two random rows and add second to
|
|
** first. Note that we also occasionally multiply
|
|
** by minus 1 so that we get a subtraction operation.
|
|
*/
|
|
/* k=abs_randwc((long)n); */
|
|
/* k1=abs_randwc((long)n); */
|
|
k=abs_randwc((int32)n);
|
|
k1=abs_randwc((int32)n);
|
|
if(k!=k1)
|
|
{
|
|
if(k<k1) rcon=(double)1.0;
|
|
else rcon=(double)-1.0;
|
|
for(j=0;j<n;j++)
|
|
a[k][j]+=a[k1][j]*rcon;;
|
|
b[k]+=b[k1]*rcon;
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
/***********
|
|
** ludcmp **
|
|
************
|
|
** From the procedure of the same name in "Numerical Recipes in Pascal",
|
|
** by Press, Flannery, Tukolsky, and Vetterling.
|
|
** Given an nxn matrix a[], this routine replaces it by the LU
|
|
** decomposition of a rowwise permutation of itself. a[] and n
|
|
** are input. a[] is output, modified as follows:
|
|
** -- --
|
|
** | b(1,1) b(1,2) b(1,3)... |
|
|
** | a(2,1) b(2,2) b(2,3)... |
|
|
** | a(3,1) a(3,2) b(3,3)... |
|
|
** | a(4,1) a(4,2) a(4,3)... |
|
|
** | ... |
|
|
** -- --
|
|
**
|
|
** Where the b(i,j) elements form the upper triangular matrix of the
|
|
** LU decomposition, and the a(i,j) elements form the lower triangular
|
|
** elements. The LU decomposition is calculated so that we don't
|
|
** need to store the a(i,i) elements (which would have laid along the
|
|
** diagonal and would have all been 1).
|
|
**
|
|
** indx[] is an output vector that records the row permutation
|
|
** effected by the partial pivoting; d is output as +/-1 depending
|
|
** on whether the number of row interchanges was even or odd,
|
|
** respectively.
|
|
** Returns 0 if matrix singular, else returns 1.
|
|
*/
|
|
static int ludcmp(double a[][LUARRAYCOLS],
|
|
int n,
|
|
int indx[],
|
|
int *d)
|
|
{
|
|
|
|
double big; /* Holds largest element value */
|
|
double sum;
|
|
double dum; /* Holds dummy value */
|
|
int i,j,k; /* Indexes */
|
|
int imax=0; /* Holds max index value */
|
|
double tiny; /* A really small number */
|
|
|
|
tiny=(double)1.0e-20;
|
|
|
|
*d=1; /* No interchanges yet */
|
|
|
|
for(i=0;i<n;i++)
|
|
{ big=(double)0.0;
|
|
for(j=0;j<n;j++)
|
|
if((double)fabs(a[i][j]) > big)
|
|
big=fabs(a[i][j]);
|
|
/* Bail out on singular matrix */
|
|
if(big==(double)0.0) return(0);
|
|
LUtempvv[i]=1.0/big;
|
|
}
|
|
|
|
/*
|
|
** Crout's algorithm...loop over columns.
|
|
*/
|
|
for(j=0;j<n;j++)
|
|
{ if(j!=0)
|
|
for(i=0;i<j;i++)
|
|
{ sum=a[i][j];
|
|
if(i!=0)
|
|
for(k=0;k<i;k++)
|
|
sum-=(a[i][k]*a[k][j]);
|
|
a[i][j]=sum;
|
|
}
|
|
big=(double)0.0;
|
|
for(i=j;i<n;i++)
|
|
{ sum=a[i][j];
|
|
if(j!=0)
|
|
for(k=0;k<j;k++)
|
|
sum-=a[i][k]*a[k][j];
|
|
a[i][j]=sum;
|
|
dum=LUtempvv[i]*fabs(sum);
|
|
if(dum>=big)
|
|
{ big=dum;
|
|
imax=i;
|
|
}
|
|
}
|
|
if(j!=imax) /* Interchange rows if necessary */
|
|
{ for(k=0;k<n;k++)
|
|
{ dum=a[imax][k];
|
|
a[imax][k]=a[j][k];
|
|
a[j][k]=dum;
|
|
}
|
|
*d=-*d; /* Change parity of d */
|
|
dum=LUtempvv[imax];
|
|
LUtempvv[imax]=LUtempvv[j]; /* Don't forget scale factor */
|
|
LUtempvv[j]=dum;
|
|
}
|
|
indx[j]=imax;
|
|
/*
|
|
** If the pivot element is zero, the matrix is singular
|
|
** (at least as far as the precision of the machine
|
|
** is concerned.) We'll take the original author's
|
|
** recommendation and replace 0.0 with "tiny".
|
|
*/
|
|
if(a[j][j]==(double)0.0)
|
|
a[j][j]=tiny;
|
|
|
|
if(j!=(n-1))
|
|
{ dum=1.0/a[j][j];
|
|
for(i=j+1;i<n;i++)
|
|
a[i][j]=a[i][j]*dum;
|
|
}
|
|
}
|
|
|
|
return(1);
|
|
}
|
|
|
|
/***********
|
|
** lubksb **
|
|
************
|
|
** Also from "Numerical Recipes in Pascal".
|
|
** This routine solves the set of n linear equations A X = B.
|
|
** Here, a[][] is input, not as the matrix A, but as its
|
|
** LU decomposition, created by the routine ludcmp().
|
|
** Indx[] is input as the permutation vector returned by ludcmp().
|
|
** b[] is input as the right-hand side an returns the
|
|
** solution vector X.
|
|
** a[], n, and indx are not modified by this routine and
|
|
** can be left in place for different values of b[].
|
|
** This routine takes into account the possibility that b will
|
|
** begin with many zero elements, so it is efficient for use in
|
|
** matrix inversion.
|
|
*/
|
|
static void lubksb( double a[][LUARRAYCOLS],
|
|
int n,
|
|
int indx[LUARRAYROWS],
|
|
double b[LUARRAYROWS])
|
|
{
|
|
|
|
int i,j; /* Indexes */
|
|
int ip; /* "pointer" into indx */
|
|
int ii;
|
|
double sum;
|
|
|
|
/*
|
|
** When ii is set to a positive value, it will become
|
|
** the index of the first nonvanishing element of b[].
|
|
** We now do the forward substitution. The only wrinkle
|
|
** is to unscramble the permutation as we go.
|
|
*/
|
|
ii=-1;
|
|
for(i=0;i<n;i++)
|
|
{ ip=indx[i];
|
|
sum=b[ip];
|
|
b[ip]=b[i];
|
|
if(ii!=-1)
|
|
for(j=ii;j<i;j++)
|
|
sum=sum-a[i][j]*b[j];
|
|
else
|
|
/*
|
|
** If a nonzero element is encountered, we have
|
|
** to do the sums in the loop above.
|
|
*/
|
|
if(sum!=(double)0.0)
|
|
ii=i;
|
|
b[i]=sum;
|
|
}
|
|
/*
|
|
** Do backsubstitution
|
|
*/
|
|
for(i=(n-1);i>=0;i--)
|
|
{
|
|
sum=b[i];
|
|
if(i!=(n-1))
|
|
for(j=(i+1);j<n;j++)
|
|
sum=sum-a[i][j]*b[j];
|
|
b[i]=sum/a[i][i];
|
|
}
|
|
return;
|
|
}
|
|
|
|
/************
|
|
** lusolve **
|
|
*************
|
|
** Solve a linear set of equations: A x = b
|
|
** Original matrix A will be destroyed by this operation.
|
|
** Returns 0 if matrix is singular, 1 otherwise.
|
|
*/
|
|
static int lusolve(double a[][LUARRAYCOLS],
|
|
int n,
|
|
double b[LUARRAYROWS])
|
|
{
|
|
int indx[LUARRAYROWS];
|
|
int d;
|
|
#ifdef DEBUG
|
|
int i,j;
|
|
#endif
|
|
|
|
if(ludcmp(a,n,indx,&d)==0) return(0);
|
|
|
|
/* Matrix not singular -- proceed */
|
|
lubksb(a,n,indx,b);
|
|
|
|
#ifdef DEBUG
|
|
printf("Solution:\n");
|
|
for(i=0;i<n;i++)
|
|
{
|
|
for(j=0;j<n;j++){
|
|
/*
|
|
printf("%6.2f ",a[i][j]);
|
|
*/
|
|
}
|
|
printf("%6.2f\t",b[i]);
|
|
/*
|
|
printf("\n");
|
|
*/
|
|
}
|
|
printf("\n");
|
|
#endif
|
|
|
|
return(1);
|
|
}
|