///
/// This file is part of Rheolef.
///
/// Copyright (C) 2000-2009 Pierre Saramito <Pierre.Saramito@imag.fr>
///
/// Rheolef is free software; you can redistribute it and/or modify
/// it under the terms of the GNU General Public License as published by
/// the Free Software Foundation; either version 2 of the License, or
/// (at your option) any later version.
///
/// Rheolef is distributed in the hope that it will be useful,
/// but WITHOUT ANY WARRANTY; without even the implied warranty of
/// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
/// GNU General Public License for more details.
///
/// You should have received a copy of the GNU General Public License
/// along with Rheolef; if not, write to the Free Software
/// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
/// 
/// =========================================================================
// exp(tensor) : 2D is explicit while 3D uses pade' approx
// 
#include "rheolef/tensor.h"
#include "rheolef/ublas-invert.h"
// =========================================================================
// 2D case: explicit
// =========================================================================
//
// see tensor_exp_tst.mac
// see also maxima: matrixexp.usage allows explicit expm() computation in 2D
//    http://www.ma.utexas.edu/pipermail/maxima/2006/003031.html
//    /usr/share/maxima/5.27.0/share/linearalgebra/matrixexp.usage
// refs. KanGueFor-2009, p. 50 (is buggy: missing 2 factor in A00 & A11...)
namespace rheolef { 

template <class T>
static
tensor_basic<T>
exp2d (const tensor_basic<T>& chi) {
  static T eps = 1e3*std::numeric_limits<T>::epsilon();
  T a = chi(0,0), b = chi(1,0), c = chi(1,1);
  T b2=sqr(b);
  T d2 = sqr(a-c)+4*b2;
  T d = sqrt(d2);
  tensor_basic<T> A;
  if (fabs(d) < eps) { // chi = a*I
    A(0,0) = A(1,1) = exp(a);
    A(0,1) = A(1,0) = 0.;
    return A;
  }
  T a2=sqr(a), c2=sqr(c);
  T ed = exp(d);
  T k = exp((a+c-d)/2)/(2*d);
  T x1 = (ed+1)*d;
  T x2 = (ed-1)*(a-c);
  T x3 = 2*(ed-1)*b;
  A(0,0)          = k*(x1 + x2);
  A(1,1)          = k*(x1 - x2);
  A(1,0) = A(0,1) = k*x3;
  return A;
}

}// namespace rheolef
// =========================================================================
// 3D case: boost::ublas::expm (licence is GPL)
// =========================================================================
// http://www.guwi17.de/ublas/examples/expm.hpp
// http://www.guwi17.de/ublas/examples/expm_sample.cpp
// https://www.dbtsai.com/blog/2008-11-25-matrix-exponential/
//
//  Copyright (c) 2007
//  Tsai, Dung-Bang	
//  National Taiwan University, Department of Physics
// 
//  E-Mail : dbtsai (at) gmail.com
//  Begine : 2007/11/20
//  Last modify : 2007/11/22
//  Version : v0.1
//
//  EXPGM_PAD computes the matrix exponential exp(H) for general matrixs,
//  including complex and real matrixs using the irreducible (p,p) degree
//  rational Pade approximation to the exponential 
//  exp(z) = r(z)=(+/-)( I+2*(Q(z)/P(z))).
//
//  Usage : 
//
//    U = expm_pad(H)
//    U = expm_pad(H, p)
//    
//    where p is internally set to 6 (recommended and gererally satisfactory).
//
//  See also MATLAB supplied functions, EXPM and EXPM1.
//
//  Reference :
//  EXPOKIT, Software Package for Computing Matrix Exponentials.
//  ACM - Transactions On Mathematical Software, 24(1):130-156, 1998
//
//  Permission to use, copy, modify, distribute and sell this software
//  and its documentation for any purpose is hereby granted without fee,
//  provided that the above copyright notice appear in all copies and
//  that both that copyright notice and this permission notice appear
//  in supporting documentation.  The authors make no representations
//  about the suitability of this software for any purpose.
//  It is provided "as is" without express or implied warranty.
//
#ifndef _BOOST_UBLAS_EXPM_
#define _BOOST_UBLAS_EXPM_
#include <complex>
#include <boost/numeric/ublas/vector.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/lu.hpp>

namespace boost { namespace numeric { namespace ublas {

namespace details {

template <class T>
struct get_float_traits { typedef T type; };
template <class T>
struct get_float_traits<std::complex<T> > { typedef T type; };

} // namespace details

template<typename MATRIX>
MATRIX
expm_pad (const MATRIX &H, const size_t p = 6) {
  using std::max;
  // Note: the default value p=6 is fine for double precision
  // could it be adjusted automatically with the float type ? 
  typedef typename MATRIX::value_type value_type;
  typedef typename MATRIX::size_type  size_type;
  typedef typename details::get_float_traits<value_type>::type real_value_type;

  check_macro (H.size1() == H.size2(), "exp(tensor): invalid sizes");	
  const size_type n = H.size1();
  const identity_matrix<value_type> I(n);
  matrix<value_type> U(n,n), H2(n,n), P(n,n), Q(n,n);
  real_value_type norm = 0.0;

  // calcuate Pade coefficients  (1-based instead of 0-based as in the c vector)
  vector<real_value_type> c(p+2);
  c(0) = 0.; // not used
  c(1) = 1.;  
  for (size_type i = 1; i <= p; ++i) {
    c(i+1) = c(i) * ((p + 1.0 - i)/(i * (2.0 * p + 1 - i)));
  }
  // calcuate the infinty norm of H, which is defined as the largest row sum of a matrix
  for (size_type i = 0; i < n; ++i) {
    real_value_type temp = 0.0;
    for (size_type j = 0; j < n; j++) {
      temp += fabs(H(i,j)); // Correct me, if H is complex, can I use that abs?
    }
    norm = std::max<real_value_type>(norm, temp);
  }
  if (norm == 0) {
    // H == 0 => U=exp(H)=I
    for (size_type i = 0; i < n; ++i) {
      for (size_type j = 0; j < n; ++j) {
        U(i,j) = 0;
      } 
      U(i,i) = 1;
    }
    return U;
  }
  // scaling, seek s such that || H*2^(-s) || < 1/2, and set scale = 2^(-s)
  size_type s = 0;
  real_value_type scale = 1.0;
  if (norm > 0.5) {
    s = static_cast<int> (max(real_value_type(0), floor((log(norm)/log(2.0) + 2.0))));
    scale /= static_cast<real_value_type>(pow(2.0, s));
  }
  U.assign (scale*H); // Here U is used as temp value due to that H is const

  // Horner evaluation of the irreducible fraction, see the following ref above.
  // Initialise P (numerator) and Q (denominator)
  H2.assign (prod(U, U));
  Q.assign  (c(p+1)*I);
  P.assign  (c(p)*I);
  size_type odd = 1;
  for (size_type k = p-1; k > 0; --k) {
    if (odd == 1) {
      Q = prod(Q,H2) + c(k)*I; 
    } else {
      P = prod(P,H2) + c(k)*I;
    }
    odd = 1 - odd;
  }
  if (odd == 1) {
    Q = prod(Q,U);	
    Q -= P;
    // U.assign( -(I + 2*(Q\P)));
  } else {
    P = prod(P,U);
    Q -= P;
    // U.assign( I + 2*(Q\P));
  }
  // In origine expokit package, they use lapack ZGESV to obtain inverse matrix,
  // and in that ZGESV routine, it uses LU decomposition for obtaing inverse matrix.
  // Since in ublas, there is no matrix inversion template, I simply use the build-in
  // LU decompostion package in ublas, and back substitute by myself.
  //
  // implement matrix inversion
  permutation_matrix<size_type> pm(n); 
  int status = lu_factorize (Q, pm);
  check_macro (status == 0, "exp(tensor): inversion failed");
  H2 = I;  // H2 is not needed anymore, so it is temporary used as identity matrix for substituting.
  lu_substitute(Q, pm, H2); 
  if (odd == 1) {
    U.assign (-(I + real_value_type(2.0)*prod(H2,P)));
  } else {
    U.assign (  I + real_value_type(2.0)*prod(H2,P) );
  }
  // squaring 
  for (size_t i = 0; i < s; ++i) {
    U = prod(U,U);
  }
  return U;
}

}}}
#endif // _BOOST_UBLAS_EXPM_

// =========================================================================
// tensor interface
// =========================================================================
namespace rheolef { 

template <class T>
tensor_basic<T>
exp (const tensor_basic<T>& a, size_t d)
{
  if (d == 2) return exp2d (a);
  // 3D case:
  using namespace boost::numeric::ublas;
  using namespace std;
  typedef typename tensor_basic<T>::size_type size_type;

  matrix<complex<T> > a1 (d,d);
  for (size_type i = 0; i < d; ++i)
  for (size_type j = 0; j < d; ++j) a1(i,j) = a(i,j);
  matrix<complex<T> > b1 = expm_pad(a1);
  // TODO: check that a is symmetric ; otherwise, the result could be complex ?
  tensor b;
  for (size_type i = 0; i < d; ++i)
  for (size_type j = 0; j < d; ++j) b(i,j) = b1(i,j).real();
  return b;
}
#if defined(_RHEOLEF_HAVE_FLOAT128)
// still compilation pbs with ublas
template<>
tensor_basic<float128>
exp (const tensor_basic<float128>& a, size_t d)
{
  fatal_macro ("exp(tensor): not yet supported with float128");
  return tensor_basic<float128>();
}
#endif // _RHEOLEF_HAVE_FLOAT128
// ----------------------------------------------------------------------------
// instanciation in library
// ----------------------------------------------------------------------------
#define _RHEOLEF_instanciation(T)                                             	\
template tensor_basic<T> exp (const tensor_basic<T>& a, size_t d);		\

_RHEOLEF_instanciation(Float)

}// namespace rheolef
