#ifndef FAST_EARLYRED_H
#define FAST_EARLYRED_H





template<class ZT,class FT> class fast_early_red:public fast<ZT,FT>
{
protected:
  virtual inline int BabaiCall(int*alpha, int zeros, int kappamax, int var_k,
			       Z_NR<ZT>&ztmp, int& newvec, int& newvecmax, 
			       int n)
  {
    static int d=this->B->GetNumRows();
    if (newvec>newvecmax)
      {
	newvecmax*=2;newvec=0;
#ifdef VERBOSE
	cerr<<"Early-reduction. \n";
#endif
	for (int target=d-1; target>=this->kappa; target--)
	  {
	    //  int var_k2=var_k<=target+1+shift?target+1+shift:var_k;
	    if (Babai (alpha[target], zeros, kappamax, var_k<=n?var_k:n, 
		       ztmp, target))
	      return this->kappa;

	    this->expo[target] = set_line (this->appB[target], 
					   (this->B)->GetVec(target), 
					   var_k<=n?var_k:n);
	  }
      }
    else
      if (Babai (alpha[this->kappa], zeros, kappamax, var_k<=n?var_k:n, 
		 ztmp, this->kappa))
	return this->kappa;

    return 0;
  };

  virtual inline double GSO(int a, int zeros, int kappamax, int n,Z_NR<ZT>& ztmp,int aa,int red)
  {
    int j,k;
    double max=0.0;
    FT rtmp,tmp;
    for (j=aa; j<this->kappa; j++)
      {	  
	if (this->appSP[red][j]!=this->appSP[red][j])
	  {
	    this->appSP[red][j] = fpScalarProduct (this->appB[red], this->appB[j], n);
	  }
	
#ifdef DEBUG
	{
	  printf("\n          j is %d\n", j);	  
#ifdef FORCE_LONGDOUBLE
	  printf("          appSP[%d][%d] is: %1.5Le", 
		 this->kappa, j, this->appSP[this->kappa][j]); 
#else
	  printf("          appSP[%d][%d] is: %E", 
		 this->kappa, j, this->appSP[this->kappa][j]); 
#endif
	  printf(", which approximates ");
	  ScalarProduct (ztmp, this->B->GetVec(red), this->B->GetVec(j), n);
	  printf("\n");
	  this->B->print(this->kappa+1, n);
	  ztmp.print(); 
	  printf("\n          Norm of B[%d]^2: ", j);
	  ScalarProduct (ztmp, this->B->GetVec(j), this->B->GetVec(j), n);
	  ztmp.print();  
	  printf("\n          Norm of B[%d]^2: ",this->kappa);
	  ScalarProduct (ztmp, this->B->GetVec(red), this->B->GetVec(red), n);
	  ztmp.print(); 
	}
#endif
	
	if (j > zeros+2)
	  {
	    tmp = this->mu[j][zeros+1] * this->r[red][zeros+1];
	    rtmp = this->appSP[red][j] - tmp;
	    for (k=zeros+2; k<j-1; k++)
	      {
		tmp = this->mu[j][k] * this->r[red][k];
		rtmp = rtmp - tmp;
	      }
	    tmp = this->mu[j][j-1] * this->r[red][j-1];
	    this->r[red][j] = rtmp - tmp;
	  }
	else if (j==zeros+2)
	  {
	    tmp = this->mu[j][zeros+1] * this->r[red][zeros+1];
	    this->r[red][j] = this->appSP[red][j] - tmp;
	  }
	else this->r[red][j] = this->appSP[red][j];
	
	this->mu[red][j] = this->r[red][j] / this->r[j][j];
	int x=this->expo[red]-this->expo[j];
	if (max<x) max=x;
      }
    return max;
  };


public:
  virtual int Babai (int a, int zeros, int kappamax, int n, 
		     Z_NR<ZT>& ztmp, int red)
  {
    int i, j, k, test, aa, exponent;
    signed long xx;
    FT tmp, rtmp;
#ifdef DEBUG
    int loops=0;
#endif 
    double max=0.0, max2=0.0, max3=0.0;
    
    aa = (a > zeros) ? a : zeros+1;
    
#ifdef DEBUG
    printf("\nappSP: \n");
    Print_matf (this->appSP, this->expo, kappamax+1, kappamax+1);
    printf("\nr: \n");
    Print_matf (this->r, this->expo, kappamax+1, kappamax+1);
    printf("\n          STARTING BABAI WITH k=%d\n", this->kappa);
    printf("\nappB: \n");
    Print_matf (this->appB, this->expo, kappamax+1, n);
    loops = 0;
    printf("\nmu: \n");
    Print_matf (this->mu, this->expo, kappamax+1, kappamax+1);
    printf ("\n\na is %d, zeros is %d, aa is %d\n", a, zeros, aa);
    this->B->print(kappamax+1, n);
#endif
    
    int ll=0;
    do
      {
	ll++;
	test=0;
	
#ifdef DEBUG     
	if (loops++ > LOOPS_BABAI) 
	  {
	    printf("INFINITE LOOP?\n"); 
	    abort();
	  }
#endif 
	
	/* ************************************** */
	/* Step2: compute the GSO for stage kappa */
	/* ************************************** */
	max3=max2;
	max2=max;
	max=GSO(a,zeros,kappamax,n,ztmp,aa,red);
	

	if(ll>=3)
	  {
#ifdef DEBUG
	    cout << "\nrtmp="<<rtmp<<"\nmax2="<<max2<<"\nmax3="<<max3;
	    cout<<"\n";
#endif
	    if( max3!=max3 || max3<max2+10)
	      {
#ifdef VERBOSE
		cerr << "unexpected behaviour -> exit";
#endif
		return this->kappa;
	      }
	  }
	
#ifdef DEBUG
	if (loops <=LOOPS_BABAI)
	  {
	    printf("\nmu :\n");
	    Print_matf (this->mu, this->expo, this->kappa+1, this->kappa+1);
	    printf("\nr :\n");
	    Print_matf (this->r, this->expo, this->kappa+1, this->kappa+1);
	    cout.flush();
	  }
#endif
	
	/* **************************** */
	/* Step3--5: compute the X_j's  */
	/* **************************** */
	
	for (j=this->kappa-1; j>zeros; j--)
	  {
	    /* test of the relaxed size-reduction condition */
	    
	    tmp = fabs (this->mu[red][j]);
	    tmp = ldexp (tmp, this->expo[red]-this->expo[j]);
	    
#ifdef DEBUG
#ifdef FORCE_LONGDOUBLE
	    if (loops<=LOOPS_BABAI) printf( "tmp is: %1.5Le\n", tmp); 
#else
	    if (loops<=LOOPS_BABAI) printf( "tmp is: %E\n", tmp); 
#endif
	    cout.flush();
#endif
	    
	    if (tmp > this->halfplus) 
	      {
		test = 1; 
		exponent = this->expo[j] - this->expo[red];
		
		/* we consider separately the cases X = +-1 */     
		if (tmp <= this->onedothalfplus)   
		  {
#ifdef DEBUG
		    printf(" X is pm1\n");
#endif
		    
		    if ( this->mu[red][j] >=0 )   /* in this case, X is 1 */
		      {
			for (k=zeros+1; k<j; k++)
			  {
			    tmp = ldexp (this->mu[j][k], exponent);
			    this->mu[red][k] =  this->mu[red][k] - tmp; 
			  }
			
			for (i=0; i<n; i++)
			  this->B->Get(red,i).sub(this->B->Get(red,i),this->B->Get(j,i));	  
		      }
		    
		    else          /* otherwise X is -1 */ 
		      {
			for (k=zeros+1; k<j; k++)
			  {
			    tmp = ldexp (this->mu[j][k], exponent);
			    this->mu[red][k] = this->mu[red][k] + tmp;
			  }
			
			for (i=0; i<n; i++)
			  this->B->Get(red,i).add(
					    this->B->Get(red,i), 
					    this->B->Get(j,i));
		      }
		  }
		
		else   /* we must have |X| >= 2 */
		  {
		    tmp = ldexp (this->mu[red][j] , -exponent);
		    
		    if ((tmp < static_cast<FT>( MAX_LONG_FAST))
			&&(tmp > static_cast<FT>( -MAX_LONG_FAST)))  
		      {
			tmp = rint (tmp);
			
			for (k=zeros+1; k<j; k++)
			  {
			    rtmp = tmp * this->mu[j][k];
			    rtmp = ldexp (rtmp, exponent);
			    this->mu[red][k] = this->mu[red][k] - rtmp;
			  }
			
			xx = static_cast<signed long int>( tmp);
			
#ifdef DEBUG
			if (loops<=LOOPS_BABAI)
			  {
			    printf("          xx[%d] is %ld\n", j, xx);
#ifdef FORCE_LONGDOUBLE
			    printf("          and tmp was %1.5Le\n", tmp);
#else
			    printf("          and tmp was %E\n", tmp);
#endif
			  }
#endif
			
			for (i=0; i<n; i++)
			  {
			    if (xx > 0)
			      {
				this->B->Get(red,i).submul_ui( 
							this->B->Get(j,i), 
							(unsigned long int) xx);
			      }
			    else
			      {
				this->B->Get(red,i).addmul_ui( 
							this->B->Get(j,i), 
							(unsigned long int) -xx);
			      }
			  }
		      }
		    
		    else
		      {
			tmp = frexp(this->mu[red][j], &exponent); 
			tmp = tmp * MAX_LONG_FAST;
			xx = (signed long int) tmp;
			exponent += this->expo[red]-this->expo[j] - CPU_SIZE_1 ;
			
#ifdef DEBUG
#ifdef FORCE_LONGDOUBLE
			printf("tmp is %1.5Le", tmp);
#else
			printf("tmp is %E", tmp);
#endif
			printf("\nand exponent is %d, and X is %ld\n", 
			       exponent, xx);
#endif
			
			/* This case is extremely rare: never occured to me */
			if (exponent <= 0) 
			  {
#ifdef VERBOSE
			    fprintf(stderr, 
				    "This is the rare Babai case\n");
#endif
			    xx = xx << -exponent;
			    exponent = 0;
			    
			    for (i=0; i<n; i++)
			      {
				ztmp.mul_si(this->B->Get(j,i), xx);
				this->B->Get(red,i).sub( 
						  this->B->Get(red,i), ztmp);
			      }
			    for (k=zeros+1; k<j; k++)
			      {
				rtmp = ((FT) xx) * this->mu[j][k];
				rtmp = ldexp (rtmp, this->expo[j]-this->expo[red]);
				this->mu[red][k] = this->mu[red][k] - rtmp;
			      }
			    
			  }
			else
			  {
			    for (i=0; i<n; i++)
			      {
				ztmp.mul_2exp(this->B->Get(j,i), exponent); 
				if (xx>0)
				  {
				    this->B->Get(red,i).submul_ui( 
							    ztmp, 
							    (unsigned long int) xx);
				  }
				else
				  {
				    this->B->Get(red,i).addmul_ui( 
							    ztmp, 
							    (unsigned long int) -xx);
				  }
			      }
			    
			    for (k=zeros+1; k<j; k++)
			      {
				rtmp = ((FT) xx) * this->mu[j][k];
				rtmp = ldexp (rtmp, 
					      exponent+this->expo[j]-this->expo[red]);
				this->mu[red][k] = this->mu[red][k] - rtmp;
			      }
			  }
		      }		  
		  }
	      }
	  }


	if (test)   /* Anything happened? */
	  {
	    this->expo[red] = set_line (this->appB[red], this->B->GetVec(red), n);
	    aa = zeros+1;
	    for (i=zeros+1; i<=this->kappa; i++) 
	      this->appSP[red][i] = NAN;//0.0/0.0;
	    for (i=this->kappa+1; i<=kappamax; i++) 
	      this->appSP[i][red] = NAN;//0.0/0.0;
	  }
	else if(red!=this->kappa){
	  for (i=zeros+1; i<=this->kappa; i++) 
	    this->appSP[red][i] = NAN;//0.0/0.0;
	}	  
	
#ifdef DEBUG
	if (loops<=LOOPS_BABAI)
	  {
	    printf("          test is %d\n", test);
	    printf("\nmu: \n");
	    Print_matf (this->mu, this->expo, this->kappa+1, this->kappa+1);
	    printf("\nr: \n");
	    Print_matf (this->r, this->expo, this->kappa+1, this->kappa+1);
	    cout.flush();
	  }
#endif
	
      }
    while (test);
    
    if (red==this->kappa){
      if (this->appSP[red][red]!=this->appSP[red][red]) 
	{
	  this->appSP[red][red] = fpNorm (this->appB[red], n);
	}
      this->s[zeros+1] = this->appSP[red][red];
      
      // the last s[kappa-1]=r[kappa][kappa] is computed only if kappa increases

      for (k=zeros+1; k<this->kappa-1; k++)
	{
	  tmp = this->mu[red][k] * this->r[red][k];
	  this->s[k+1] = this->s[k] - tmp;
	}
#ifdef DEBUG      
      printf ("s=");
      for (k=0; k<this->kappa; k++)
	printf ("%E ", this->s[k]);
      printf ("\n");
      cout.flush();
#endif
    }
    
#ifdef DEBUG
    printf("          Number of loops is %d\n", loops);
    cout.flush();
#endif
    return 0;    
  };



  fast_early_red(ZZ_mat<ZT>*B,int precision=0,double eta=0.51,double delta=0.99):fast<ZT,FT>::fast(B,precision,eta,delta)
  {
  };
};  





#endif
