# convergence types
define(`DELTA_X', 1)
define(`SMALL_GRAD', 2)
define(`MAX_ITER', 3)

# step types
define(`NEWTON', `1')
define(`TRUNC_NEWTON', `2')
define(`TRUNC_CAUCHY', `3')
define(`DOGLEG', `4')
define(`STEP_TAKEN', `10')
define(`INITIAL', `20')

# trace offsets
define(`X_OFFSET', `0')
define(`F_OFFSET', `n')
define(`G_OFFSET', `n+1')
define(`RADIUS_OFFSET', `2*n+1')
define(`STEP_OFFSET', `2*n+2')
define(`RECORD_SIZE', `(2*n+3)')

# aliases
define(`Gdx', `ifelse($1,,wc,wc($*))')
define(`dg', `ifelse($1,,newt,newt($*))')
define(`cauchy', `ifelse($1,,wb,wb($*))')
define(`scr', `ifelse($1,,wb,wb($*))')

subroutine minop(x, n, f, g, var, radius, xtol, gtol, maxit, calcf, userg, calcg, trace, maxtr, halt)

# Minimize the function f(x) of n variables with initial
# guess supplied in x. The miniumum value and the
# gradient there are returned in f and g and an estimate
# of the inverse Hessian is returned in var. calcf is an
# external routine to compute the function values (in the
# form 'call calcf(x,n,f)') and if userg is TRUE then
# calcg similarly computes the gradient.  halt returns
# the reason for return:
#         0: the relative change in x was smaller than xtol
#         1: the absolute value of g was smaller than gtol
#         2: more than maxit iterations are required
# and radius is supplied as the radius of a ball in n-
# space centered at the initial guess which represents a
# region in which f is reasonably quadratic. On return,
# the array trace will contain a trace of the iterations
# that minop took.  At the i'th iteration the i'th column
# of trace contains the current x, the current f, the
# current g, the current radius and the current iteration
# type (with ten added to the latter if a step was
# taken).  If the current iteration does not result in a
# step being taken, the current g in trace will be NA
# (since the gradient is only updated when a step is
# taken).  Not more than maxtr iterations will be traced.
# On return maxit and maxtr contain the actual numbers of
# iterations taken and traced.

# Algorithm: J. E. Dennis, Jr. and H. H. W. Mei, "Two New
# Unconstrained Optimization Algorithms Which Use
# Function and Gradient Values", Journal of Optimization
# Theory and Applications, 1979, vol 28, no. 4, pages
# 453-482

# BFGS update: J. E. Dennis, Jr. and Jorge J. More

integer n, maxit, maxtr, halt
real x(n), f, g(n), var(n,n), radius, xtol, gtol, trace(1)
logical userg
external calcf, calcg
POINTER jstkgt, xnew, gnew, dx, wb, wc, newt, diag, hess
INCLUDE(stack)

xnew = jstkgt(n, REAL)
gnew = jstkgt(n, REAL)
dx = jstkgt(n, REAL)
wb = jstkgt(n, REAL)
wc = jstkgt(n, REAL)
newt = jstkgt(n, REAL)
diag = jstkgt(n, REAL)
hess = jstkgt(max0((n*(n-1))/2,1), REAL)
call minop2(x, n, f, g, radius, xtol, gtol, maxit,
	  rs(xnew), rs(gnew), rs(dx), rs(wb), rs(wc), rs(newt), rs(diag),
	  rs(hess), calcf, userg, calcg, trace, maxtr, halt)
call varmak(rs(hess), n, rs(diag), var, rs(scr))
call jstkrl(8)
return
end



subroutine minop2(x, n, f, g, radius, xtol, gtol, maxit, xnew, gnew,
		dx, wb, wc, newt, diag, hess, calcf, userg, calcg, trace, maxtr, halt)
integer n, maxit, niter, maxtr, halt, i
real x(n), f, g(n), radius, xtol, gtol, xnew(n), gnew(n), dx(n), wb(n), wc(n),
     newt(n), diag(n), hess(1), trace(1)
real radsq, gsq, newtsq, gGg, d, e, c, theta, fnew, gdx, dxGdx, prod
logical userg
external calcf, calcg

xtolsq = xtol * xtol
gtolsq = gtol * gtol
radsq = radius * radius
niter = 1
itrace = 1
call calcf(x, n, f)
if(userg)
	call calcg(x, n, g)
else
	call grad(x, n, f, g, calcf)
if(maxtr >= 1) {
	call rcopy(x, trace(itrace + X_OFFSET), n)
	call rcopy(g, trace(itrace + G_OFFSET), n)
	trace(itrace + F_OFFSET) = f
	trace(itrace + RADIUS_OFFSET) = sqrt(radsq)
	trace(itrace + STEP_OFFSET) = INITIAL
	itrace = itrace + RECORD_SIZE
}
gsq = dotv(g, n, g)

# the Hessian is kept in factored form as LDL', where L is unit lower
# triangular and D is diagonal; the strictly lower triangular part of L
# is stored in hess and the diagonal of D is stored in diag
call rfill(0.01 * sqrt(gsq) / radius, diag, n)
call rfill(0.0, hess, (n*(n-1))/2)

for(;;) {
	if(gsq < gtolsq) {
		halt = SMALL_GRAD
		break
	}
	do i = 1, n
		wc(i) = -g(i)
	call solve(hess, n, diag, newt, wc)
	newtsq = dotv(newt, n, newt)
	gGg = Gdot(g, hess, n, diag, wb)
	d = 0.2 - 0.8 * (gsq**2 / (dotv(newt, n, g) * gGg))
	repeat {   # until function value decreases
		if(niter >= maxit) {
			halt = MAX_ITER
			break 2
		}
		if(radsq < xtolsq * dotv(x,n,x)) {
			halt = DELTA_X
			break 2
		}
		if(newtsq < radsq) {  # Newton step is small - take it
			call rcopy(newt, dx, n)
			step = NEWTON
		} else if(d*d*newtsq <= radsq) {  # take truncated Newton step
			e = sqrt(radsq/newtsq)
			do i = 1, n
				dx(i) = e * newt(i)
			step = TRUNC_NEWTON
		} else if(abs(gGg)**(2./3.)*radsq**(1./3.) <= gsq) {  # Cauchy step is large - use truncated version
			e = -sqrt(radsq/gsq)
			do i = 1, n
				dx(i) = e * g(i)
			step = TRUNC_CAUCHY
		} else {  # take double dogleg step
			e = -gsq / gGg
			do i = 1, n {
				cauchy(i) = e * g(i)  # Cauchy step
				wc(i) = d * newt(i) - cauchy(i)
			}
			prod = dotv(cauchy, n, wc)
			c = radsq - e**2 * gsq  # positive in this 'else'
			theta = c / (prod + sqrt(prod**2 + c * dotv(wc,n,wc)))
			do i = 1, n
				dx(i) = cauchy(i) + theta * wc(i)
			step = DOGLEG
		}
		do i = 1, n
			xnew(i) = x(i) + dx(i)
		call calcf(xnew, n, fnew)
		niter = niter + 1
		if(niter <= maxtr) {
			call rcopy(xnew, trace(itrace + X_OFFSET), n)
			call ifill(NA, trace(itrace + G_OFFSET), n)
			trace(itrace + F_OFFSET) = fnew
			trace(itrace + RADIUS_OFFSET) = sqrt(radsq)
			trace(itrace + STEP_OFFSET) = step
			itrace = itrace + RECORD_SIZE
		}
		radsq = 0.25 * radsq
	} until(fnew < f)
	if(userg)
		call calcg(xnew, n, gnew)
	else
		call grad(xnew, n, fnew, gnew, calcf)
	if(niter <= maxtr) {
		i = itrace - RECORD_SIZE
		trace(i + STEP_OFFSET) = trace(i + STEP_OFFSET) + STEP_TAKEN
		call rcopy(gnew, trace(i + G_OFFSET), n)
	}
	dxGdx = Gdot(dx, hess, n, diag, Gdx)
	do i = 1, n {
		dg(i) = gnew(i) - g(i)
		wb(i) = dg(i) - Gdx(i)
	}
	gdx = dotv(g, n, dx)
	if(fnew - f > 0.1 * (gdx + 0.5*dxGdx))
		radsq = 0.25 * dotv(dx, n, dx)
	else if(dotv(wb, n, wb) < 0.25*gsq | gdx > 2.0*dotv(gnew, n, dx))
		radsq = 4.0 * dotv(dx, n, dx)
	else
		radsq = dotv(dx, n, dx)
# compute BFGS update
	call update(hess, n, diag, 1.0/dotv(dg,n,dx), dg, scr)
	call update(hess, n, diag, -1.0/dxGdx, Gdx, scr)
	call rcopy(xnew, x, n)
	f = fnew
	call rcopy(gnew, g, n)
	gsq = dotv(g, n, g)
}
maxit = niter
maxtr = (itrace - 1) / RECORD_SIZE
return
end



subroutine update(L, n, D, s, z, p)

# compute the Cholesky factorization of LDL' + szz'; the
# new factors are stored in L and D, and the new LDL' is
# ensured to be positive definite (note: z gets overwritten)

# Reference: P. E. Gill et. al., "Methods for Modifying Matrix
# Factorizations", Mathematics of Computation, 1974, vol. 28,
# pages 505-535 (especially Algorithm C2, page 520)

integer n, i, j, k
real L(1), D(n), s, z(n), p(n)
real sum, sigma, alpha, pi, q, theta, rho, rhosq, beta

alpha = s
call rcopy(z, p, n)
sum = 0.0
k = 1
do i = 1, n {
	pi = p(i)
	do j = i+1, n {
		p(j) = p(j) - pi * L(k)
		k = k + 1
	}
	sum = sum + pi**2 / D(i)
}
sigma = alpha * sum
if(sigma <= -1.0)
	sigma = -sigma  # ensure positive definiteness
sigma = alpha / (1.0 + sqrt(1.0 + sigma))
k = 1
do i = 1, n {
	pi = p(i)
	q = pi**2 / D(i)
	theta = 1.0 + sigma * q
	sum = amax1(sum - q, 0.0)
	rhosq = theta**2 + sigma**2 * q * sum
	rho = sign(sqrt(rhosq), theta)
	D(i) = rhosq * D(i)
	beta = alpha * pi / D(i)
	alpha = alpha / rhosq
	sigma = sigma * (1.0 + rho) / (rho * (theta + rho))
	do j = i+1, n {
		z(j) = z(j) - pi * L(k)
		L(k) = L(k) + beta * z(j)
		k = k + 1
	}
}
return
end



subroutine solve(L, n, D, x, b)

# solve LDL'x = b for x

integer n, i, j, k
real L(1), D(n), x(n), b(n), sum

# forward substitution
call rcopy(b, x, n)
k = 1
do i = 1, n {
	sum = x(i)
	x(i) = sum / D(i)
	do j = i+1, n {
		x(j) = x(j) - sum * L(k)
		k = k + 1
	}
}
# back substitution
k = n*(n-1)/2
do i = n-1, 1, -1 {
	sum = x(i)
	do j = i+1, n {
		sum = sum - L(k) * x(j)
		k = k + 1
	}
	x(i) = sum
	k = k - 2*(n-i) - 1
}
return
end



real function Gdot(a, L, n, D, b)

# b <- LDL'a and Gdot <- a'LDL'a

integer n, i, j, k1, k2
real a(n), L(1), D(n), b(n), sum

Gdot = 0.0
call rfill(0.0, b, n)
k1 = 1; k2 = 1;
do i = 1, n {
	sum = a(i)
	do j = i+1, n {
		sum = sum + L(k1) * a(j)
		k1 = k1 + 1
	}
	Gdot = Gdot + sum * sum * D(i)
	sum = sum * D(i)
	b(i) = b(i) + sum
	do j = i+1, n {
		b(j) = b(j) + l(k2) * sum
		k2 = k2 + 1
	}
}
return
end


subroutine varmak(L, n, D, var, scrach)

# var <- inverse of LDL'

integer n, i
real L(1), D(n), var(n,n), scrach(n)

call rfill(0.0, scrach, n)
do i = 1, n {
	if(i > 1)
		scrach(i-1) = 0.0
	scrach(i) = 1.0
	call solve(L, n, D, var(1,i), scrach)
}
return
end


subroutine grad(x, n, fcur, gcur, calcf)
integer n, i
real x(n), fcur, gcur(n)
real d, dprec, old, fplus
external calcf

dprec = sqrt(PRECISION)
do i = 1, n {
	old = x(i)
	d = dprec * amax1(1.0, abs(old))
	x(i) = old + d
	call calcf(x, n, fplus)
	gcur(i) = (fplus - fcur) / d
	x(i) = old
}
return
end
