/***********************************************************************/
/*                                                                     */
/*                                                                     */
/* Program:                                                            */
/* --------                                                            */
/*                                                                     */
/* hexediv.c                                                           */
/*                                                                     */
/* This program is included in a package, consisting of 6 fast         */
/* programs, to calculate e=exp(1) hexadecimal. Programs to            */
/* convert from hexadecimal to decimal are also included.              */
/*                                                                     */
/*                                                                     */
/* Version:                                                            */
/* --------                                                            */
/*                                                                     */
/* First version: November 1998                                        */
/* Latest update: 2003-06-15                                           */
/*                                                                     */
/*                                                                     */
/* Author of the program:                                              */
/* ----------------------                                              */
/*                                                                     */
/* Stefan Spaennare, Lund, Sweden                                      */
/*                                                                     */
/* E-mail: stefan@spaennare.se                                         */
/*                                                                     */
/* Home page:    http://www.spaennare.se/index.html                    */
/* Program page: http://www.spaennare.se/ssprog.html                   */
/*                                                                     */
/* If you have problems with the program please let me know!           */
/*                                                                     */
/*                                                                     */
/* References:                                                         */
/* -----------                                                         */
/*                                                                     */
/* 1. The program now use a very fast FFT (fftsg_h.c) by Takuya Ooura: */  
/*                                                                     */
/* http://momonga.t.u-tokyo.ac.jp/~ooura/                              */
/*                                                                     */
/*                                                                     */
/* Copy-right:                                                         */
/* -----------                                                         */
/*                                                                     */
/* This program is free to copy if this information follows the        */
/* program.                                                            */
/*                                                                     */
/*                                                                     */
/* IMPORTANT!                                                          */
/* ----------                                                          */
/*                                                                     */
/* Please don't use the result from this program for important         */
/* purposes without doing an independent check of the result!          */
/* See also the notice below!                                          */
/*                                                                     */
/*                                                                     */
/* Function:                                                           */
/* ---------                                                           */
/*                                                                     */
/* How to calculate e=exp(1) hexadecimal and decimal:                  */
/*                                                                     */
/* First calculate the hexadecimal numbers a and b using one of the    */
/* ssehex.c, ssehex2.c or ssehex3.c. Here n=100000 is the number       */
/* of desired DECIMAL digits. Type the following:                      */
/*                                                                     */
/*>ssehex 100000                                                       */
/*                                                                     */
/* Then calculate e=exp(1) hexadecimal by e=a/b:                       */
/*                                                                     */
/*>hexediv ehex_a.dat ehex_b.dat e5.hex                                */
/*                                                                     */
/* Then convert e=exp(1) hexadecimal to decimal by:                    */
/*                                                                     */
/*>hex2dec e5.hex e5.dec                                               */
/*                                                                     */
/* or                                                                  */
/*                                                                     */
/*>fhex2dec e5.hex e5.dec                                              */
/*                                                                     */
/*                                                                     */
/* Compilation:                                                        */
/* ------------                                                        */
/*                                                                     */
/* On Linux computers just run "compall" to compile all six programs:  */
/*                                                                     */
/*>compall                                                             */
/*                                                                     */
/* The program works in UNIX, Linux and Windows (DOS using DJGPP)      */
/* It is recommended to optimize the program (i.e. use cc -O3 option   */
/* or similar option) when you compile.                                */
/*                                                                     */
/*>gcc -O3 -o program program.c -lm                                    */
/*                                                                     */
/* or (for those programs using FFT)                                   */
/*                                                                     */
/*>gcc -O3 -o program program.c fftsg_h.c -lm                                    */
/*                                                                     */
/* The time wraps around after 4295 seconds on some computers.         */
/*                                                                     */
/*                                                                     */
/* Hexadecimal numbers:                                                */
/* --------------------                                                */
/*                                                                     */
/* Hexadecimal numbers that can be converted to decimal with the       */
/* programs hex2dec.c and fhex2dec.c must be on the following          */
/* form (0 <= X <= 9):                                                 */
/*                                                                     */
/* X.A1B678F3EC95D6C4...                                               */
/*                                                                     */
/*                                                                     */
/* Algorithm and accuracy:                                             */
/* -----------------------                                             */
/*                                                                     */
/* The programs ssehex.c, ssehex2.c and ssehex3.c calculates           */
/* SUM 1/k! using the algorithm (C-like notation):                     */
/*                                                                     */
/* b=1;                                                                */
/* a=0;                                                                */
/*                                                                     */
/* for (i=n; i>=1; i--) {                                              */
/*    b=b*i;                                                           */
/*    a=a+b;                                                           */
/* }                                                                   */
/*                                                                     */
/* e=a/b;                                                              */
/*                                                                     */
/* Multiplications and divisions with the base (2^32) are made using   */
/* the shift operators (<< and >>) and the AND operator (&&) to be     */
/* fast. This is also the reason why the calculations are performed    */
/* with a hexadecimal (binary) base.                                   */
/*                                                                     */
/* The final computation of e=a/b is made with the separate program    */
/* hexediv.c using Newtons Iteration and FFT-multiplication            */
/* for division.                                                       */
/*                                                                     */
/* The programs use long long integer (64 bit) calculations and gives  */
/* probably correct result up to 2*10^9 digits of e=exp(1) (slightly   */
/* less than maxint = 2^31 digits). But it will take a VERY long       */
/* time! The result has however only been verified up to 10^7 digits.  */
/*                                                                     */
/* Note! If the final decimal result is expected to have n digits      */
/* The actual calculation is performed using slightly more digits.     */
/* All calculated digits are output from these programs, but only      */
/* the n desired decimal digits can be expected to be correct.         */
/*                                                                     */
/* WARNING!                                                            */
/* --------                                                            */
/*                                                                     */
/* This is valid for hexediv.c and fhex2dec.c:                         */
/*                                                                     */
/* If you want to make calculations with much more than 4 million      */
/* (2^22) digits the convolutions in the FFT multiplications can be    */
/* saturated which will give erroneous results or no convergence       */
/* at all. If this occur then set mulversion=2 which                   */
/* is 2 times slower than mulversion=1 but use the base 16             */
/* instead of 256. This means that the convolution will be             */
/* saturated for a much larger number of digits (> 100 millions).      */
/*                                                                     */
/*                                                                     */
/* Timing, benchmark and memory:                                       */
/* -----------------------------                                       */
/*                                                                     */
/* The CPU-times are given for a 1400 MHz Intel Celeron computer       */
/* (100 MHz memory buss) and 512 Mbyte memory. The cache memory is     */
/* 256 kbyte at full CPU speed. The operating system was Red Hat       */
/* Linux 9 with the gcc 3.22 C-compiler. The CPU load was on           */
/* average 1.00 (no other programs running).                           */
/*                                                                     */
/* The programs were compiled using the following lines:               */
/*                                                                     */
/* For ssehex.c, ssehex2.c, ssehex3.c and hex2dec.c:                   */
/*                                                                     */
/*>gcc -O3 -o program program.c -lm                                    */
/*                                                                     */
/* For hexediv.c and fhex2dec.c:                                       */
/*                                                                     */
/*>gcc -O3 -o program program.c fftsg_h.c -lm                          */
/*                                                                     */
/* How to compile see also the file "compall". TC below means          */
/* Time Complexity. The CPU-times are given for n=10^5=100000 and      */
/* and n=10^6=1000000 decimal digits.                                  */
/*                                                                     */
/*                                                                     */
/* Program       10^5       10^6         TC          Memory/(10^6)     */
/*---------------------------------------------------------------------*/
/* ssehex        3.46 s    791.00 s     O(n^2)         1.2 Mbyte       */ 
/* ssehex2       2.86 s    607.09 s     O(n^2)         1.2 Mbyte       */ 
/* ssehex3       3.78 s    707.71 s     O(n^2)         1.2 Mbyte       */ 
/* hexediv       5.32 s     62.24 s  O(n*log(n)^2)    21.0 Mbyte       */
/* hex2dec       1.35 s    266.02 s     O(n^2)         0.9 Mbyte       */
/* fhex2dec     12.48 s    157.51 s  O(n*log(n)^3)    36.0 Mbyte       */
/*---------------------------------------------------------------------*/
/*                                                                     */
/***********************************************************************/

/********************************************************************************/
/*                                                                              */
/* Notice                                                                       */
/* ======                                                                       */
/*                                                                              */
/* I make no warranties that this program is (1) free of errors, (2) consistent */
/* with any standard merchantability, or (3) meeting the requirements of a      */
/* particular application. This software shall not, partly or as a whole,       */
/* participate in a process, whose outcome can result in injury to a person or  */
/* loss of property. It is solely designed for analytical work. Permission to   */
/* use, copy and distribute is hereby granted without fee, providing that the   */
/* header above including this notice appears in all copies.                    */
/*                                                                              */
/*                                                            Stefan Spaennare  */
/*                                                                              */
/********************************************************************************/


#include <math.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>


double timetic=(double)(CLOCKS_PER_SEC);

char fname[80]="sspie.dat";  /* Output filename */

int mulversion=1;           /* Multiplication version (1 or 2) */

double bas=256.0;
double basi=0.00390625;

double pi=3.14159265358979323;


void mul(d,n)
double *d;
int n;
{
   int nn,nn3,nn2,j;
   double x1,x2,y1,y2;
   
   nn=n+1;
   nn2=2*n+2;
   nn3=nn2+1;

   x1=d[0]; y1=d[1];
   x2=y1; y1=0.0; y2=0.0;
   d[0]=x1*x2-y1*y2;
   d[1]=x1*y2+y1*x2;

   for (j=3; j<=nn; j=j+2) {
      x1=0.5*(d[j-1]+d[nn2-j-1]);
      x2=0.5*(d[j-1]-d[nn2-j-1]);
      y1=0.5*(d[j+1-1]+d[nn3-j-1]);
      y2=0.5*(d[j+1-1]-d[nn3-j-1]);
      
      d[j-1]=x1*y1-y2*(-x2);
      d[j+1-1]=x1*(-x2)+y2*y1;
      d[nn2-j-1]=x1*y1-(-y2)*x2;
      d[nn3-j-1]=x1*x2+(-y2)*y1;
   } /* for j */

} /* mul */


void lzero(a,nn)

unsigned char *a;
int *nn;
 
{
   int i,l,il,dl,ib;

   l=nn[0]; il=nn[1]; dl=nn[2];

   ib=l-il-dl+1;

   for (i=l;i>=ib; i--) {
      a[i]=0;
   } /* for i */
   
   a[l+1]=1;

} /* lzero */

      
void linttol(a,man0,exp0,nn)

unsigned char *a;
int man0,exp0;
int *nn;
 
{
   int i,l,il,dl,ib;

   l=nn[0]; il=nn[1]; dl=nn[2];

   ib=l-il-dl+1;

   for (i=l;i>=ib; i--) {
      a[i]=0;
   } /* for i */

   a[l-il+exp0+1]=abs(man0);
  
   if (man0 >= 0) { 
      a[l+1]=1;
   }
   else {
      a[l+1]=-1;
  } /* if */

} /* linttol */


void add(a,b,c,nn)

unsigned char *a;
unsigned char *b;
unsigned char *c;
int *nn;

{
  int i,ib,il,dl,l;

  int q,r,basint;

  l=nn[0]; il=nn[1]; dl=nn[2];

  ib=l-il-dl+1;

  basint=256;
  r=0;

  for (i=ib; i<=l; i++) {
     q=(int)(b[i])+(int)(c[i])+r;
     if (q >= basint) {
        a[i]=(unsigned char)(q-basint);
	r=1;
     }
     else {
        a[i]=(unsigned char)(q);
	r=0;
     } /* if */
  } /* for i */

} /* add */
      

void sub(a,b,c,nn)

unsigned char *a;
unsigned char *b;
unsigned char *c;
int *nn;

{
  int i,ib,il,dl,l;

  int q,r,basint;

  l=nn[0]; il=nn[1]; dl=nn[2];

  ib=l-il-dl+1;

  basint=256;
  r=0;

  for (i=ib; i<=l; i++) {
     q=(int)(b[i])-(int)(c[i])-r;
     if (q < 0) {
        a[i]=(unsigned char)(basint+q);
	r=1;
     }
     else {
        a[i]=(unsigned char)(q);
	r=0;
     } /* if */
  } /* for i */

} /* sub */


int lcompeps(a,nn)

unsigned char *a;
int *nn;

{
   int i,l,il,dl,k,ib,n,ll;

   l=nn[0]; il=nn[1]; dl=nn[2]; n=nn[3];

   ib=l-il-dl+1;

   k=0;

   ll=l-il-n;

   for (i=l; i>=ll; i--) {
      if (a[i]==0) {
         k++;
      } /* if */
   } /* for i */

   if (k==(il+n+1)) {
      return(-1);
   }
   else {
      return(1);
   } /* if */

}  /* lcompeps */


void lass(a,b,nn)

unsigned char *a;
unsigned char *b;
int *nn;

{
  int i,l,il,dl,ib;

  l=nn[0]; il=nn[1]; dl=nn[2];
  
  ib=l-il-dl+1;

  for (i=l+1; i>=ib; i--) {
     a[i]=b[i];
  } /* for i */

} /* lass */

void lcmul(a,b,c,nn,exp0)

unsigned char *a;
int b;
unsigned char *c;
int *nn;
int exp0;

{
   int i,j,k,l,il,dl,ib,b2;
   double q,r,b2r;

   l=nn[0]; il=nn[1]; dl=nn[2];

   if (b>=0) {
      b2=b;
   }
   else {
      b2=-b;
   } /* if */

   ib=l-il-dl+1;
   k=ib+exp0;

   for (i=ib; i<k; i++) {
      a[i]=0;
   } /* for i */

   r=0.0;
   b2r=(double)(b2);

   for (j=ib; j<=l; j++) {
      q=(double)(c[j])*b2r+r+0.5;
      r=(double)((int)(basi*q));
      if (k>=ib) {
         a[k]=(int)(q-bas*r);
      } /* if */
      k++;
   } /* for j */

   a[k]=(int)(r+0.5);

   k++;
   for (i=k; i<=l; i++) {
      a[i]=0;
   } /* for i */

   if (b>=0) {
      a[l+1]=c[l+1];
   }
   else {
      a[l+1]=-c[l+1];
   } /* if */

} /* lcmul */


void lmul(a,b,c,nn,q)
unsigned char *a;
unsigned char *b;
unsigned char *c;
int *nn;
double *q;

{


   int i,k,l,il,dl,ib,n,n2;
   int bi,b1,c1;

   double isize,rr,xx;
   double br,bri;


if (mulversion == 1) {

   l=nn[0]; il=nn[1]; dl=nn[2];

   n=2*l;
   n2=2*n;

   isize=1.0/sqrt((double)(n));

   ib=l-il-dl+1;
   
   for (i=0; i<n2; i++) {
      q[i]=0.0;
   } /* for i */
 
   k=0;

   for (i=ib; i<=l; i++) {
      q[k]=(double)(b[i])*isize;
      q[k+1]=(double)(c[i])*isize;
      k=k+2;
   } /* for i */


   cdft(n2,1,q);

   mul(q,n);

   cdft(n2,-1,q);

   rr=0.0;

   for (i=0; i<n2; i=i+2) {
      xx=q[i]+rr;
      rr=floor((xx+0.5)*basi);
      q[i]=xx-bas*rr; 
   } /* for i */

   k=n-2*il;

   for (i=ib; i<=l; i++) {
      a[i]=(int)(q[k]+0.5);
      k=k+2;
   } /* for i */

   a[l+1]=b[l+1]*c[l+1]; 

}
else {
  
   l=nn[0]; il=nn[1]; dl=nn[2];

   n=4*nn[0];
   n2=2*n;

   br=16.0;
   bri=0.0625;
   bi=16;

   isize=1.0/sqrt((double)(n));

   ib=l-il-dl+1;
   
   for (i=0; i<n2; i++) {
      q[i]=0.0;
   } /* for i */
 
   k=0;

   for (i=ib; i<=l; i++) {
      b1=b[i] % bi;
      q[k]=(double)(b1)*isize;
      b1=b[i] / bi;
      q[k+2]=(double)(b1)*isize;
      c1=c[i] % bi;
      q[k+1]=(double)(c1)*isize;
      c1=c[i] / bi;
      q[k+3]=(double)(c1)*isize;
      k=k+4;
   } /* for i */

   cdft(n2,1,q);

   mul(q,n);

   cdft(n2,-1,q);

   rr=0.0;

   for (i=0; i<n2; i=i+2) {
      xx=q[i]+rr;
      rr=floor((xx+0.5)*bri);
      q[i]=xx-br*rr; 
   } /* for i */

   k=n-4*il;

   for (i=ib; i<=l; i++) {
      a[i]=bi*(int)(q[k+2]+0.5)+(int)(q[k]+0.5);
      k=k+4;
   } /* for i */

   a[l+1]=b[l+1]*c[l+1]; 

} /* if */


} /* lmul */


 
void linv(a,b,x1,x2,t1,t2,nn,q)

unsigned char *a;
unsigned char *b;
unsigned char *x1;
unsigned char *x2;
unsigned char *t1;
unsigned char *t2;
int *nn;
double *q;

{

   double fv,fu,potd;

   int s,l,il,dl,ll,i,j,pot;

   l=nn[0]; il=nn[1]; dl=nn[2];
   ll=l-il;
 
   s=b[l+1];
   b[l+1]=1;

   lzero(x1,nn);

   fv=(double)(b[1]);

   for (j=1; j<=ll+1; j++) {
      fv=fv/256.0;
      fv=fv+(double)(b[j]);
   } /* for j */

   fu=1.0/fv;

   potd=log(fu)/(8.0*log(2.0));
   pot=(int)(potd)+1;

   printf("%18.10f %18.10f %18.10f\n",fv,fu,potd);

   for (j=ll+pot; j>=1; j--) {
      i=(int)(fu);
      x1[j]=(unsigned char)(i);
      fu=256.0*(fu-(double)(i));
   } /* for j */


   do {
      lass(x2,x1,nn);
      lcmul(a,2,x1,nn,0);
      lmul(t1,b,x1,nn,q);
      lmul(t2,x1,t1,nn,q);
      sub(x1,a,t2,nn);
      sub(t1,x1,x2,nn);
   } while (lcompeps(t1,nn)==1); 

   lass(a,x1,nn);

   a[l+1]=s;

} /* linv */ 
 
 
void ldivv(a,b,c,x1,x2,t1,t2,nn,q)

unsigned char *a;
unsigned char *b;
unsigned char *c;
unsigned char *x1;
unsigned char *x2;
unsigned char *t1;
unsigned char *t2;
int *nn;
double *q;

{

   linv(a,c,x1,x2,t1,t2,nn,q);

   lmul(a,a,b,nn,q);

} /* ldivv */    

 
unsigned char ppp1(s)
unsigned char s;
{
    unsigned char ss;

    if ((s >= '0') && (s <='9')) {
       ss=s-48;
    }
    else {
        if (s == 'A') {
	   ss=0x0a;
	} 
	else if (s == 'B') {
	   ss=0x0b;
	} 
	else if (s == 'C') {
	   ss=0x0c;
	} 
	else if (s == 'D') {
	   ss=0x0d;
	} 
	else if (s == 'E') {
	   ss=0x0e;
	} 
	else if (s == 'F') {
	   ss=0x0f;
	}  /* if */
    } /* if */

    return(ss);

} /* ppp1 */

char qqq1(s)
unsigned long s;
{
   char c;

   if ((s>=0) && (s<=9)) {
      c=s+48; 
   } 
   else {
     if (s==10) {
        c='A';
     } 
     else if (s==11) {
        c='B';
     }
     else if (s==12) {
        c='C';
     } 
     else if (s==13) {
        c='D';
     }
     else if (s==14) {
        c='E';
     }
     else if (s==15) {
        c='F';
     } /* if */
  } /* if */

  return(c);

} /* qqq1 */

void qqq2(sss,s)
char *sss;
unsigned char s;
{
   unsigned long temp;

   temp=s & 0xf0;
   temp=temp >> 4;
   sss[0]=qqq1(temp);

   temp=s & 0x0f;
   temp=temp >> 0;
   sss[1]=qqq1(temp);

   sss[2]='\0';

} /* qqq2 */


int main(argc,argv) 
int argc;
char *argv[];

{

   FILE *outfile,*infile1,*infile2;

   int l,ql;
   int i,j,length,ilength,pot,n;
   int fl1,fl2,fl11,fl22;

   unsigned char h1,h0,cc;

   char sss[10];

   int nn[4];

   clock_t time1,timediff1;

   unsigned char *a,*b,*c,*x1,*x2,*t1,*t2; 

   double *q;

   ql=4*mulversion;
 
   if (argc != 4) {
      printf("Usage: %s hexfile_a hexfile_b hexoutfile\n",argv[0]);
      exit(0);
   } /* if */

   if ((infile1 = fopen(argv[1],"r")) == NULL) {
      printf("File not found.\n");
      exit(0);
   } /* if */

   if ((infile2 = fopen(argv[2],"r")) == NULL) {
      printf("File not found.\n");
      exit(0);
   } /* if */

   rewind(infile1);
   fseek(infile1,0,SEEK_END);
   fl1=ftell(infile1);
   rewind(infile1);

   rewind(infile2);
   fseek(infile2,0,SEEK_END);
   fl2=ftell(infile2);
   rewind(infile2);


   n=fl1;
    
   printf("%6d\n",n-3);

   pot=(int)(log((double)(n))/log(2.0));

   length=1;

   for (i=1; i<=pot; i++) {
      length=2*length;
   } /* for i */

   printf("%6d\n",length);

   ilength=8;

   nn[0]=length;
   nn[1]=ilength;
   nn[2]=length-ilength;
   nn[3]=length-ilength-10;

   l=length;

   a = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   b = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   c = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   x1 = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   x2 = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   t1 = (unsigned char *)calloc(l+2,sizeof(unsigned char));
   t2 = (unsigned char *)calloc(l+2,sizeof(unsigned char));

   q = (double *)calloc(ql*l+1,sizeof(double));

   lzero(a,nn);
   lzero(b,nn);
   lzero(c,nn);

   l=length-ilength;

   cc=getc(infile1);
   cc=getc(infile1);

   fl11=(fl1-3)/2;;

   i=l;
   for (j=1; j<=fl11; j++) {
      cc=getc(infile1);
      h1=ppp1(cc);
      cc=getc(infile1);
      h0=ppp1(cc);
      a[i]=(h1 << 4)+h0;
      i--;
   } /* for i */

   cc=getc(infile2);
   cc=getc(infile2);

   fl22=(fl2-3)/2;

   i=l;
   for (j=1; j<=fl22; j++) {
      cc=getc(infile2);
      h1=ppp1(cc);
      cc=getc(infile2);
      h0=ppp1(cc);
      b[i]=(h1 << 4)+h0;
      i--;
   } /* for i */

  
   printf("\n");

   time1=clock();

   ldivv(c,a,b,x1,x2,t1,t2,nn,q);

   timediff1=clock()-time1;

   printf("\n");

   printf("%10.2f\n",(double)(timediff1)/timetic);

   outfile=fopen(argv[3],"w");

   fprintf(outfile,"2.");

   for (i=l; i>=(l-fl11); i--) {
      qqq2(sss,c[i]);
      fprintf(outfile,"%s",sss);
   } /* for i */ 


/*
   printf("\n\n");

   for (i=l; i>=1; i--) {
       printf("%6d %6d\n",a[i],b[i]);
   }
*/


   fclose(infile1);
   fclose(infile2);
   fclose(outfile);


} /* End */
