#ifndef AVE_H

#define AVE_H

#include <errut/errorbase.h>
#include <vector>
#include <cmath>

template<class T>
class AdditiveVectorExtrapolation : public errut::ErrorBase
{
public:
	AdditiveVectorExtrapolation(size_t dimension, bool secondOrder);
	~AdditiveVectorExtrapolation();

	void setStartingPoint(const T *pPos, int startDestPos = 0, int numEntries = -1);
	bool step();
	size_t getDimension() const;

	const T *getAcceleratedPosition() const;
	const T *getIteratedPosition() const;
	T getAlpha() const;
protected:
	virtual void getNextPosition(T *pNextPos, const T *pCurPos) = 0;
	virtual void onExtrapolatedPosition(T *pPos)							{ }
private:
	std::vector<T> m_x[4], m_y[4], m_p[4], m_q[4];
	int m_k;
	T m_alpha;
	bool m_secondOrder;
};

template<class T>
AdditiveVectorExtrapolation<T>::AdditiveVectorExtrapolation(size_t dimension, bool secondOrder)
{
	for (int i = 0 ; i < 4 ; i++)
	{
		m_x[i].resize(dimension);
		m_y[i].resize(dimension);
		m_q[i].resize(dimension);
		m_p[i].resize(dimension);
	}
	m_k = 0;
	m_secondOrder = secondOrder;
}

template<class T>
AdditiveVectorExtrapolation<T>::~AdditiveVectorExtrapolation()
{
}

template<class T>
void AdditiveVectorExtrapolation<T>::setStartingPoint(const T *pPos, int startDestPos, int numEntries)
{
	size_t n = m_x[0].size();

	if (numEntries >= 0)
		n = (size_t)numEntries;

	//#pragma omp parallel for
	for (size_t i = 0 ; i < n ; i++)
		m_x[0][i+startDestPos] = pPos[i];

	m_k = 0;
}

template<class T>
bool AdditiveVectorExtrapolation<T>::step()
{
	size_t n = m_x[0].size();

	if (m_k == 0)
	{
		getNextPosition(&(m_x[1][0]), &(m_x[0][0]));

//		#pragma omp parallel for
		for (int i = 0 ; i < n ; i++)
		{
			m_p[0][i] = m_x[1][i]-m_x[0][i];
			m_y[1][i] = m_x[1][i];
			m_y[0][i] = -123456;
		}

		m_k++;
	}
	else
	{
		m_k++;

		int curK = m_k%4;
		int prevK = (m_k-1+4)%4;
		int prevPrevK = (m_k-2+4)%4;

		getNextPosition(&(m_x[curK][0]), &(m_y[prevK][0]));

//		#pragma omp parallel for
		for (int i = 0 ; i < n ; i++)
		{
			m_p[prevK][i] = m_x[curK][i] - m_y[prevK][i];
			m_q[curK][i] = m_x[curK][i] - m_x[prevK][i];
		}
		
		
		double alphaNum = 0;
		double alphaDenom = 0;

//		#pragma omp parallel for reduction(+:alphaNum, alphaDenom)
		for (int i = 0 ; i < n ; i++)
		{
			double v = m_p[prevPrevK][i];

			alphaNum += m_p[prevK][i]*v;
			alphaDenom += v*v;
		}
		

		double alpha = alphaNum/(alphaDenom+1e-20); // avoid division by zero

		if (alpha < 0.0)
			alpha = 0.0;
		else if (alpha > 1.0)
			alpha = 1.0;

		m_alpha = (T)alpha;

		if (m_secondOrder && m_k > 2)
		{
			T alphaSqrt = std::sqrt(m_alpha);

//			#pragma omp parallel for
			for (int i = 0 ; i < n ; i++)
				m_y[curK][i] = m_x[curK][i] 
					     + alphaSqrt * m_q[curK][i] 
					     + 0.5 * m_alpha * (m_q[curK][i] - m_q[prevK][i]);;

			m_alpha = alphaSqrt;
		}
		else
		{
//			#pragma omp parallel for
			for (int i = 0 ; i < n ; i++)
				m_y[curK][i] = m_x[curK][i] + m_alpha * m_q[curK][i];
		}

		onExtrapolatedPosition(&(m_y[curK][0]));
	}

	return true;
}

template<class T>
size_t AdditiveVectorExtrapolation<T>::getDimension() const
{
	return m_x[0].size();
}

template<class T>
const T *AdditiveVectorExtrapolation<T>::getAcceleratedPosition() const
{
	int curK = m_k%4;

	return &(m_y[curK][0]);
}

template<class T>
const T *AdditiveVectorExtrapolation<T>::getIteratedPosition() const
{
	int curK = m_k%4;

	return &(m_x[curK][0]);
}

template<class T>
T AdditiveVectorExtrapolation<T>::getAlpha() const
{
	return m_alpha;
}

#endif // AVE_H
