subroutine slap_sslugm(n, b, x, nelt, ia, ja, a, isym, nsave, $ itol, tol, itmax, iter, err, ierr, iunit, rwork, lenw, $ iwork, leniw ) #if defined O_embm && defined O_embm_slap implicit none integer locrb, locib parameter (locrb=1, locib=11) integer n, nelt, ia(nelt), ja(nelt), isym, nsave, itol integer itmax, iter, ierr, iunit, lenw, iwork(leniw), leniw integer lociw, locl, locnc, locju, locnr, myitol integer locw, locrgw, locdin, locu, locjl, lociu, locil integer j, locigw, jbgn, jend, icol, nl, nu real b(n), x(n), a(nelt), tol, err, rwork(lenw) ! change the slap input matrix ia, ja, a to slap-column format. ierr = 0 err = 0.0 if ( nsave.le.1 ) then ierr = 3 return endif ! write(*,*) 'sslugm: changing data structure' call ss2y( n, nelt, ia, ja, a, isym ) ! count number of non-zero elements preconditioner ilu matrix. ! then set up the work arrays. we assume maxl=kmp=nsave. ! write(*,*) 'sslugm: counting non-zeros' nl = 0 nu = 0 do 20 icol = 1, n ! don't count diagonal. jbgn = ja(icol)+1 jend = ja(icol+1)-1 if ( jbgn.le.jend ) then do 10 j = jbgn, jend if ( ia(j).gt.icol ) then nl = nl + 1 if ( isym.ne.0 ) nu = nu + 1 else nu = nu + 1 endif 10 continue endif 20 continue ! write(*,*) 'nl,nu =', nl,nu locigw = locib locil = locigw + 20 locjl = locil + n+1 lociu = locjl + nl locju = lociu + nu locnr = locju + n+1 locnc = locnr + n lociw = locnc + n locl = locrb locdin = locl + nl locu = locdin + n locrgw = locu + nu locw = locrgw + 1+n*(nsave+6)+nsave*(nsave+3) iwork(1) = locil iwork(2) = locjl iwork(3) = lociu iwork(4) = locju iwork(5) = locl iwork(6) = locdin iwork(7) = locu iwork(9) = lociw iwork(10) = locw ! compute the incomplete lu decomposition. ! write(*,*) 'sslugm: computing ilu factors' call ssilus( n, nelt, ia, ja, a, isym, nl, iwork(locil), $ iwork(locjl), rwork(locl), rwork(locdin), nu, $ iwork(lociu), $ iwork(locju), rwork(locu), iwork(locnr), iwork(locnc) ) ! perform the incomplet lu preconditioned generalized minimum ! residual iteration algorithm. the following sgmres ! defaults are used maxl = kmp = nsave, jscal = 0, ! jpre = -1, nrmax = itmax/nsave iwork(locigw ) = nsave iwork(locigw+1) = nsave iwork(locigw+2) = 0 iwork(locigw+3) = -1 iwork(locigw+4) = itmax/nsave myitol = 0 call sgmres( n, b, x, nelt, ia, ja, a, isym, myitol, $ tol, itmax, iter, err, ierr, iunit, rwork, rwork, $ rwork(locrgw), lenw-locrgw, iwork(locigw), 20, $ rwork, iwork ) if ( iter .gt.itmax ) ierr = 2 return end subroutine ss2y(n, nelt, ia, ja, a, isym ) !***purpose slap triad to slap column format converter. ! routine to convert from the slap triad to slap column ! format. ! *arguments: ! n :in integer ! order of the matrix. ! nelt :in integer. ! number of non-zeros stored in a. ! ia :inout integer ia(nelt). ! ja :inout integer ja(nelt). ! a :inout real a(nelt). ! these arrays should hold the matrix a in either the slap ! triad format or the slap column format. see "long ! description", below. if the slap triad format is used ! this format is translated to the slap column format by ! this routine. ! isym :in integer. ! flag to indicate symmetric storage format. ! if isym=0, all nonzero entries of the matrix are stored. ! if isym=1, the matrix is symmetric, and only the lower ! triangle of the matrix is stored. ! the sparse linear algebra package (slap) utilizes two matrix ! data structures: 1) the slap triad format or 2) the slap ! column format. the user can hand this routine either of the ! of these data structures. if the slap triad format is give ! as input then this routine transforms it into slap column ! format. the way this routine tells which format is given as ! input is to look at ja(n+1). if ja(n+1) = nelt+1 then we ! have the slap column format. if that equality does not hold ! then it is assumed that the ia, ja, a arrays contain the ! slap triad format. implicit none integer n, nelt, ia(nelt), ja(nelt), isym integer i, itemp, ibgn, iend, icol, j real a(nelt), temp ! check to see if the (ia,ja,a) arrays are in slap column ! format. if it's not then transform from slap triad. if ( ja(n+1).eq.nelt+1 ) return ! sort into ascending order by column (on the ja array). ! this will line up the columns. call qs2i1r( ja, ia, a, nelt, 1 ) ! loop over each column to see where the column indicies change ! in the column index array ja. this marks the beginning of the ! next column. ja(1) = 1 do 20 icol = 1, n-1 do 10 j = ja(icol)+1, nelt if ( ja(j).ne.icol ) then ja(icol+1) = j goto 20 endif 10 continue 20 continue ja(n+1) = nelt+1 ! mark the n+2 element so that future calls to a slap routine ! utilizing the ysmp-column storage format will be able to tell. ja(n+2) = 0 ! now loop thru the ia(i) array making sure that the diagonal ! matrix element appears first in the column. then sort the ! rest of the column in ascending order. do 70 icol = 1, n ibgn = ja(icol) iend = ja(icol+1)-1 do 30 i = ibgn, iend if ( ia(i).eq.icol ) then ! swap the diag element with the first element in the column. itemp = ia(i) ia(i) = ia(ibgn) ia(ibgn) = itemp temp = a(i) a(i) = a(ibgn) a(ibgn) = temp goto 40 endif 30 continue 40 ibgn = ibgn + 1 if ( ibgn.lt.iend ) then do 60 i = ibgn, iend do 50 j = i+1, iend if ( ia(i).gt.ia(j) ) then itemp = ia(i) ia(i) = ia(j) ia(j) = itemp temp = a(i) a(i) = a(j) a(j) = temp endif 50 continue 60 continue endif 70 continue return end subroutine sslui(n, b, x, nelt, ia, ja, a, isym, rwork, iwork) implicit none integer n, nelt, ia(nelt), ja(nelt), isym, iwork(*) integer locl, locdin, locu, locju, locil, locjl, lociu real b(n), x(n), a(nelt), rwork(*) ! pull out the locations of the arrays holding the ilu ! factorization. locil = iwork(1) locjl = iwork(2) lociu = iwork(3) locju = iwork(4) locl = iwork(5) locdin = iwork(6) locu = iwork(7) ! solve the system lux = b call sslui2(n, b, x, iwork(locil), iwork(locjl), rwork(locl), $ rwork(locdin), iwork(lociu), iwork(locju), rwork(locu) ) return end subroutine sslui2(n, b, x, il, jl, l, dinv, iu, ju, u ) !***purpose slap back solve for ldu factorization. ! routine to solve a system of the form l*d*u x = b, ! where l is a unit lower triangular matrix, d is a ! diagonal matrix, and u is a unit upper triangular matrix. ! *arguments: ! n :in integer ! order of the matrix. ! b :in real b(n). ! right hand side. ! x :out real x(n). ! solution of l*d*u x = b. ! nel :in integer. ! number of non-zeros in the el array. ! il :in integer il(n+1). ! jl :in integer jl(nl). ! l :in real l(nl). ! il, jl, l contain the unit lower triangular factor of the ! incomplete decomposition of some matrix stored in slap row ! format. the diagonal of ones *is* stored. this structure ! can be set up by the ssilus routine. see ! "description", below for more details about the slap ! format. ! dinv :in real dinv(n). ! inverse of the diagonal matrix d. ! nu :in integer. ! number of non-zeros in the u array. ! iu :in integer iu(n+1). ! ju :in integer ju(nu). ! u :in real u(nu). ! iu, ju, u contain the unit upper triangular factor of the ! incomplete decomposition of some matrix stored in slap ! column format. the diagonal of ones *is* stored. this ! structure can be set up by the ssilus routine. see ! "description", below for more details about the slap ! format. ! *description: ! this routine is supplied with the slap package as a routine ! to perform the sslui operation in the sir and sbcg ! iteration routines for the drivers ssilur and sslubc. it ! must be called via the slap sslui calling sequence ! convention interface routine sslui. ! **** this routine itself does not conform to the **** ! **** slap sslui calling convention **** ! il, jl, l should contain the unit lower triangular factor of ! the incomplete decomposition of the a matrix stored in slap ! row format. iu, ju, u should contain the unit upper factor ! of the incomplete decomposition of the a matrix stored in ! slap column format this ilu factorization can be computed by ! the ssilus routine. the diagonals (which is all one's) are ! stored. ! =================== s l a p column format ================== ! this routine requires that the matrix a be stored in the ! slap column format. in this format the non-zeros are stored ! counting down columns (except for the diagonal entry, which ! must appear first in each "column") and are stored in the ! real array a. in other words, for each column in the matrix ! put the diagonal entry in a. then put in the other non-zero ! elements going down the column (except the diagonal) in ! order. the ia array holds the row index for each non-zero. ! the ja array holds the offsets into the ia, a arrays for the ! beginning of each column. that is, ia(ja(icol)), ! a(ja(icol)) points to the beginning of the icol-th column in ! ia and a. ia(ja(icol+1)-1), a(ja(icol+1)-1) points to the ! end of the icol-th column. note that we always have ! ja(n+1) = nelt+1, where n is the number of columns in the ! matrix and nelt is the number of non-zeros in the matrix. ! here is an example of the slap column storage format for a ! 5x5 matrix (in the a and ia arrays '|' denotes the end of a ! column): ! 5x5 matrix slap column format for 5x5 matrix on left. ! 1 2 3 4 5 6 7 8 9 10 11 ! |11 12 0 0 15| a: 11 21 51 | 22 12 | 33 53 | 44 | 55 15 35 ! |21 22 0 0 0| ia: 1 2 5 | 2 1 | 3 5 | 4 | 5 1 3 ! | 0 0 33 0 35| ja: 1 4 6 8 9 12 ! | 0 0 0 44 0| ! |51 0 53 0 55| implicit none integer n, il(*), jl(*), iu(*), ju(*) integer i, icol, irow, j, jbgn, jend real b(n), x(n), l(*), dinv(n), u(*) ! solve l*y = b, storing result in x, l stored by rows. do 10 i = 1, n x(i) = b(i) 10 continue do 30 irow = 2, n jbgn = il(irow) jend = il(irow+1)-1 if ( jbgn.le.jend ) then do 20 j = jbgn, jend x(irow) = x(irow) - l(j)*x(jl(j)) 20 continue endif 30 continue ! solve d*z = y, storing result in x. do 40 i=1,n x(i) = x(i)*dinv(i) 40 continue ! solve u*x = z, u stored by columns. do 60 icol = n, 2, -1 jbgn = ju(icol) jend = ju(icol+1)-1 if ( jbgn.le.jend ) then do 50 j = jbgn, jend x(iu(j)) = x(iu(j)) - u(j)*x(icol) 50 continue endif 60 continue return end subroutine ssilus(n, nelt, ia, ja, a, isym, nl, il, jl, $ l, dinv, nu, iu, ju, u, nrow, ncol) !***purpose incomplete lu decomposition preconditioner slap set up. ! routine to generate the incomplete ldu decomposition of a ! matrix. the unit lower triangular factor l is stored by ! rows and the unit upper triangular factor u is stored by ! columns. the inverse of the diagonal matrix d is stored. ! no fill in is allowed. ! *arguments: ! n :in integer ! order of the matrix. ! nelt :in integer. ! number of elements in arrays ia, ja, and a. ! ia :in integer ia(nelt). ! ja :in integer ja(nelt). ! a :in real a(nelt). ! these arrays should hold the matrix a in the slap column ! format. see "description", below. ! isym :in integer. ! flag to indicate symmetric storage format. ! if isym=0, all nonzero entries of the matrix are stored. ! if isym=1, the matrix is symmetric, and only the lower ! triangle of the matrix is stored. ! nl :out integer. ! number of non-zeros in the el array. ! il :out integer il(n+1). ! jl :out integer jl(nl). ! l :out real l(nl). ! il, jl, l contain the unit ower triangular factor of the ! incomplete decomposition of some matrix stored in slap ! row format. the diagonal of ones *is* stored. see ! "description", below for more details about the slap format. ! nu :out integer. ! number of non-zeros in the u array. ! iu :out integer iu(n+1). ! ju :out integer ju(nu). ! u :out real u(nu). ! iu, ju, u contain the unit upper triangular factor of the ! incomplete decomposition of some matrix stored in slap ! column format. the diagonal of ones *is* stored. see ! "description", below for more details about the slap ! format. ! nrow :work integer nrow(n). ! nrow(i) is the number of non-zero elements in the i-th row ! of l. ! ncol :work integer ncol(n). ! ncol(i) is the number of non-zero elements in the i-th ! column of u. ! *description ! il, jl, l should contain the unit lower triangular factor of ! the incomplete decomposition of the a matrix stored in slap ! row format. iu, ju, u should contain the unit upper factor ! of the incomplete decomposition of the a matrix stored in ! slap column format this ilu factorization can be computed by ! the ssilus routine. the diagonals (which is all one's) are ! stored. implicit none integer n, nelt, ia(nelt), ja(nelt), isym, nl, il(nl), jl(nl) integer nu, iu(nu), ju(nu), nrow(n), ncol(n) integer i, ibgn, iend, j, jbgn, jend, icol, irow integer itemp , jtemp, indx, indx1, indx2, indxr1, indxr2 integer indxc2, indxc1, k, kc, kr real a(nelt), l(nl), dinv(n), u(nu), temp ! count number of elements in each row of the lower triangle. do 10 i=1,n nrow(i) = 0 ncol(i) = 0 10 continue do 30 icol = 1, n jbgn = ja(icol)+1 jend = ja(icol+1)-1 if ( jbgn.le.jend ) then do 20 j = jbgn, jend if ( ia(j).lt.icol ) then ncol(icol) = ncol(icol) + 1 else nrow(ia(j)) = nrow(ia(j)) + 1 if ( isym.ne.0 ) ncol(ia(j)) = ncol(ia(j)) + 1 endif 20 continue endif 30 continue ju(1) = 1 il(1) = 1 do 40 icol = 1, n il(icol+1) = il(icol) + nrow(icol) ju(icol+1) = ju(icol) + ncol(icol) nrow(icol) = il(icol) ncol(icol) = ju(icol) 40 continue ! copy the matrix a into the l and u structures. do 60 icol = 1, n dinv(icol) = a(ja(icol)) jbgn = ja(icol)+1 jend = ja(icol+1)-1 if ( jbgn.le.jend ) then do 50 j = jbgn, jend irow = ia(j) if ( irow.lt.icol ) then ! part of the upper triangle. iu(ncol(icol)) = irow u(ncol(icol)) = a(j) ncol(icol) = ncol(icol) + 1 else ! part of the lower triangle (stored by row). jl(nrow(irow)) = icol l(nrow(irow)) = a(j) nrow(irow) = nrow(irow) + 1 if ( isym.ne.0 ) then ! symmetric...copy lower triangle into upper triangle as well. iu(ncol(irow)) = icol u(ncol(irow)) = a(j) ncol(irow) = ncol(irow) + 1 endif endif 50 continue endif 60 continue ! sort the rows of l and the columns of u. do 110 k = 2, n jbgn = ju(k) jend = ju(k+1)-1 if ( jbgn.lt.jend ) then do 80 j = jbgn, jend-1 do 70 i = j+1, jend if ( iu(j).gt.iu(i) ) then itemp = iu(j) iu(j) = iu(i) iu(i) = itemp temp = u(j) u(j) = u(i) u(i) = temp endif 70 continue 80 continue endif ibgn = il(k) iend = il(k+1)-1 if ( ibgn.lt.iend ) then do 100 i = ibgn, iend-1 do 90 j = i+1, iend if ( jl(i).gt.jl(j) ) then jtemp = ju(i) ju(i) = ju(j) ju(j) = jtemp temp = l(i) l(i) = l(j) l(j) = temp endif 90 continue 100 continue endif 110 continue ! perform the incomplete ldu decomposition. do 300 i=2,n ! i-th row of l indx1 = il(i) indx2 = il(i+1) - 1 if (indx1 .gt. indx2) go to 200 do 190 indx=indx1,indx2 if (indx .eq. indx1) go to 180 indxr1 = indx1 indxr2 = indx - 1 indxc1 = ju(jl(indx)) indxc2 = ju(jl(indx)+1) - 1 if (indxc1 .gt. indxc2) go to 180 160 kr = jl(indxr1) 170 kc = iu(indxc1) if (kr .gt. kc) then indxc1 = indxc1 + 1 if (indxc1 .le. indxc2) go to 170 elseif(kr .lt. kc) then indxr1 = indxr1 + 1 if (indxr1 .le. indxr2) go to 160 elseif(kr .eq. kc) then l(indx) = l(indx) - l(indxr1)*dinv(kc)*u(indxc1) indxr1 = indxr1 + 1 indxc1 = indxc1 + 1 if (indxr1.le.indxr2 .and. indxc1.le.indxc2) go to 160 endif 180 l(indx) = l(indx)/dinv(jl(indx)) 190 continue ! ith column of u 200 indx1 = ju(i) indx2 = ju(i+1) - 1 if (indx1 .gt. indx2) go to 260 do 250 indx=indx1,indx2 if (indx .eq. indx1) go to 240 indxc1 = indx1 indxc2 = indx - 1 indxr1 = il(iu(indx)) indxr2 = il(iu(indx)+1) - 1 if (indxr1 .gt. indxr2) go to 240 210 kr = jl(indxr1) 220 kc = iu(indxc1) if (kr .gt. kc) then indxc1 = indxc1 + 1 if (indxc1 .le. indxc2) go to 220 elseif(kr .lt. kc) then indxr1 = indxr1 + 1 if (indxr1 .le. indxr2) go to 210 elseif(kr .eq. kc) then u(indx) = u(indx) - l(indxr1)*dinv(kc)*u(indxc1) indxr1 = indxr1 + 1 indxc1 = indxc1 + 1 if (indxr1.le.indxr2 .and. indxc1.le.indxc2) go to 210 endif 240 u(indx) = u(indx)/dinv(iu(indx)) 250 continue ! ith diagonal element 260 indxr1 = il(i) indxr2 = il(i+1) - 1 if (indxr1 .gt. indxr2) go to 300 indxc1 = ju(i) indxc2 = ju(i+1) - 1 if (indxc1 .gt. indxc2) go to 300 270 kr = jl(indxr1) 280 kc = iu(indxc1) if (kr .gt. kc) then indxc1 = indxc1 + 1 if (indxc1 .le. indxc2) go to 280 elseif(kr .lt. kc) then indxr1 = indxr1 + 1 if (indxr1 .le. indxr2) go to 270 elseif(kr .eq. kc) then dinv(i) = dinv(i) - l(indxr1)*dinv(kc)*u(indxc1) indxr1 = indxr1 + 1 indxc1 = indxc1 + 1 if (indxr1 .le. indxr2 .and. indxc1 .le. indxc2) go to 270 endif 300 continue ! replace diagonal lts by their inverses. do 430 i=1,n dinv(i) = 1./dinv(i) 430 continue return end subroutine sgmres(n, b, x, nelt, ia, ja, a, isym, $ itol, tol, itmax, iter, err, ierr, iunit, sb, sx, $ rgwk, lrgw, igwk, ligw, rwork, iwork ) implicit none integer n, nelt, ia(nelt), ja(nelt), isym, itol, itmax, iter integer iunit, lrgw, ligw, igwk(ligw), iwork(*) integer jpre, kmp, maxl, nms, maxlp1, nmsl, nrsts, nrmax integer i, iflag, lr, ldl, lhes, lgmr, lq, lv, lw integer lz, ierr, lzm1, lxl, jscal real b(n), x(n), tol, err, sb(n), sx(n), a(nelt) real rgwk(lrgw), rwork(*) real bnrm, rhol, sum, slap_snrm2 ierr = 0 rhol = 0.0 ! --------------------------------------------------------------- ! load method parameters with user values or defaults. ! -------------------------------------------------------------- maxl = igwk(1) if (maxl .eq. 0) maxl = 10 if (maxl .gt. n) maxl = n kmp = igwk(2) if (kmp .eq. 0) kmp = maxl if (kmp .gt. maxl) kmp = maxl jscal = igwk(3) jpre = igwk(4) ! check for consistent values of itol and jpre. if ( itol.eq.1 .and. jpre.lt.0 ) goto 650 if ( itol.eq.2 .and. jpre.ge.0 ) goto 650 nrmax = igwk(5) if ( nrmax.eq.0 ) nrmax = 10 ! if nrmax .eq. -1, then set nrmax = 0 to ! turn off restarting. if ( nrmax.eq.-1 ) nrmax = 0 ! initialize counters. iter = 0 nms = 0 nrsts = 0 ! --------------------------------------------------------------- ! form work array segment pointers. ! -------------------------------------------------------------- maxlp1 = maxl + 1 lv = 1 lr = lv + n*maxlp1 lhes = lr + n + 1 lq = lhes + maxl*maxlp1 ldl = lq + 2*maxl lw = ldl + n lxl = lw + n lz = lxl + n ! load igwk(6) with required minimum length of the rgwk array. igwk(6) = lz + n - 1 if ( lz+n-1.gt.lrgw ) goto 640 ! -------------------------------------------------------------- ! calculate scaled-preconditioned norm of rhs vector b. ! -------------------------------------------------------------- if (jpre .lt. 0) then call sslui(n, b, rgwk(lr), nelt, ia, ja, a, isym, $ rwork, iwork) nms = nms + 1 else call slap_scopy(n, b, 1, rgwk(lr), 1) endif if ( jscal.eq.2 .or. jscal.eq.3 ) then sum = 0.e0 do 10 i = 1,n sum = sum + (rgwk(lr-1+i)*sb(i))**2 10 continue bnrm = sqrt(sum) else bnrm = slap_snrm2(n,rgwk(lr),1) endif ! ------------------------------------------------------------- ! calculate initial residual. ! ------------------------------------------------------------- call ssmv(n, x, rgwk(lr), nelt, ia, ja, a, isym) do 50 i = 1,n rgwk(lr-1+i) = b(i) - rgwk(lr-1+i) 50 continue ! ------------------------------------------------------------- ! if performing restarting, then load the residual into the ! correct location in the rgwk array. ! -------------------------------------------------------------- 100 continue if ( nrsts.gt.nrmax ) goto 610 if ( nrsts.gt.0 ) then ! copy the curr residual to different loc in the rgwk array. call slap_scopy(n, rgwk(ldl), 1, rgwk(lr), 1) endif ! ------------------------------------------------------------ ! use the spigmr algorithm to solve the linear system a*z = r. ! ------------------------------------------------------------- call spigmr(n, rgwk(lr), sb, sx, jscal, maxl, maxlp1, kmp, $ nrsts, jpre, nmsl, rgwk(lz), rgwk(lv), $ rgwk(lhes), rgwk(lq), lgmr, rwork, iwork, rgwk(lw), $ rgwk(ldl), rhol, nrmax, b, bnrm, x, rgwk(lxl), itol, $ tol, nelt, ia, ja, a, isym, iunit, iflag, err) iter = iter + lgmr nms = nms + nmsl ! increment x by the current approximate solution z of a*z = r. lzm1 = lz - 1 do 110 i = 1,n x(i) = x(i) + rgwk(lzm1+i) 110 continue if ( iflag.eq.0 ) goto 600 if ( iflag.eq.1 ) then nrsts = nrsts + 1 goto 100 endif if ( iflag.eq.2 ) goto 620 ! ------------------------------------------------------------------ ! all returns are made through this section. ! ------------------------------------------------------------------ ! the iteration has converged. 600 continue igwk(7) = nms rgwk(1) = rhol ierr = 0 return ! max number((nrmax+1)*maxl) of linear iterations performed. 610 continue igwk(7) = nms rgwk(1) = rhol ierr = 1 return ! gmres failed to reduce last residual in maxl iterations. ! the iteration has stalled. 620 continue igwk(7) = nms rgwk(1) = rhol ierr = 2 return ! error return. insufficient length for rgwk array. 640 continue err = tol ierr = -1 return ! error return. inconsistent itol and jpre values. 650 continue err = tol ierr = -2 return end subroutine slap_scopy(n,sx,incx,sy,incy) ! copies a vector, x, to a vector, y. ! uses unrolled loops for increments equal to 1. ! jack dongarra, linpack, 3/11/78. implicit none integer i, incx, incy, ix, iy, m, mp1, n real sx(*), sy(*) if (n.le.0)return if (incx.eq.1.and.incy.eq.1)go to 20 ! code for unequal increments or equal increments ! not equal to 1 ix = 1 iy = 1 if (incx.lt.0)ix = (-n+1)*incx + 1 if (incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n sy(iy) = sx(ix) ix = ix + incx iy = iy + incy 10 continue return ! code for both increments equal to 1 ! clean-up loop 20 m = mod(n,7) if ( m .eq. 0 ) go to 40 do 30 i = 1,m sy(i) = sx(i) 30 continue if ( n .lt. 7 ) return 40 mp1 = m + 1 do 50 i = mp1,n,7 sy(i) = sx(i) sy(i + 1) = sx(i + 1) sy(i + 2) = sx(i + 2) sy(i + 3) = sx(i + 3) sy(i + 4) = sx(i + 4) sy(i + 5) = sx(i + 5) sy(i + 6) = sx(i + 6) 50 continue return end subroutine qs2i1r( ia, ja, a, n, kflag ) !***purpose sort an integer array also moving an integer and real array ! this routine sorts the integer array ia and makes the same ! interchanges in the integer array ja and the real array a. ! the array ia may be sorted in increasing order or decreas- ! ing order. a slightly modified quicksort algorithm is used. ! description of parameters ! ia - integer array of values to be sorted. ! ja - integer array to be carried along. ! a - real array to be carried along. ! n - number of values in integer array ia to be sorted. ! kflag - control parameter ! = 1 means sort ia in increasing order. ! =-1 means sort ia in decreasing order. implicit none integer il(21), iu(21) integer ia(n), ja(n), it, iit, jt, jjt, k integer kflag, ij, l, j, nn, n, m, i, kk real a(n), ta, tta, r nn = n if ( n.eq.1 ) return kk = iabs(kflag) ! alter array ia to get decreasing order if needed. if ( kflag.lt.1 ) then do 20 i=1,nn ia(i) = -ia(i) 20 continue endif ! sort ia and carry ja and a along. ! and now...just a little black magic... m = 1 i = 1 j = nn r = .375 210 if ( r.le.0.5898437 ) then r = r + 3.90625e-2 else r = r-.21875 endif 225 k = i ! select a central element of the array and save it in location ! it, jt, at. ij = i + ifix (float (j-i) *r) it = ia(ij) jt = ja(ij) ta = a(ij) ! if first element of array is greater than it, interchange with it. if ( ia(i).gt.it ) then ia(ij) = ia(i) ia(i) = it it = ia(ij) ja(ij) = ja(i) ja(i) = jt jt = ja(ij) a(ij) = a(i) a(i) = ta ta = a(ij) endif l=j ! if last element of array is less than it, swap with it. if ( ia(j).lt.it ) then ia(ij) = ia(j) ia(j) = it it = ia(ij) ja(ij) = ja(j) ja(j) = jt jt = ja(ij) a(ij) = a(j) a(j) = ta ta = a(ij) ! if first element of array is greater than it, swap with it. if ( ia(i).gt.it ) then ia(ij) = ia(i) ia(i) = it it = ia(ij) ja(ij) = ja(i) ja(i) = jt jt = ja(ij) a(ij) = a(i) a(i) = ta ta = a(ij) endif endif ! find an element in the second half of the array which is ! smaller than it. 240 l=l-1 if ( ia(l).gt.it ) go to 240 ! find an element in the first half of the array which is ! greater than it. 245 k=k+1 if ( ia(k).lt.it ) go to 245 ! interchange these elements. if ( k.le.l ) then iit = ia(l) ia(l) = ia(k) ia(k) = iit jjt = ja(l) ja(l) = ja(k) ja(k) = jjt tta = a(l) a(l) = a(k) a(k) = tta goto 240 endif ! save upper and lower subscripts of the array yet to be sorted. if ( l-i.gt.j-k ) then il(m) = i iu(m) = l i = k m = m+1 else il(m) = k iu(m) = j j = l m = m+1 endif go to 260 ! begin again on another portion of the unsorted array. 255 m = m-1 if ( m.eq.0 ) go to 300 i = il(m) j = iu(m) 260 if ( j-i.ge.1 ) go to 225 if ( i.eq.j ) go to 255 if ( i.eq.1 ) go to 210 i = i-1 265 i = i+1 if ( i.eq.j ) go to 255 it = ia(i+1) jt = ja(i+1) ta = a(i+1) if ( ia(i).le.it ) go to 265 k=i 270 ia(k+1) = ia(k) ja(k+1) = ja(k) a(k+1) = a(k) k = k-1 if ( it.lt.ia(k) ) go to 270 ia(k+1) = it ja(k+1) = jt a(k+1) = ta go to 265 ! clean up, if necessary. 300 if ( kflag.lt.1 ) then do 310 i=1,nn ia(i) = -ia(i) 310 continue endif return end subroutine spigmr(n, r0, sr, sz, jscal, maxl, maxlp1, kmp, $ nrsts, jpre, nmsl, z, v, hes, q, lgmr, $ rpar, ipar, wk, dl, rhol, nrmax, b, bnrm, x, xl, $ itol, tol, nelt, ia, ja, a, isym, iunit, iflag, err) implicit none integer n, maxl, maxlp1, kmp, jpre, nmsl, lgmr, iflag, jscal integer nrsts, nrmax, itol, nelt, isym integer ipar(*), ia(*), ja(*) real rhol,bnrm,tol real r0(*), sr(*), sz(*), z(*), v(n,*), hes(maxlp1,*) real q(*), rpar(*), wk(*), dl(*) real a(*), b(*), x(*), xl(*) real slap_snrm2 ! local variables. integer i, info, ip1, i2, j, k, ll, llp1 real r0nrm,c,dlnrm,prod,rho,s,snormw,tem, err integer iter, iunit, itmax, issgmr ! zero out the z array. do 5 i = 1,n z(i) = 0.0e0 5 continue iflag = 0 lgmr = 0 nmsl = 0 ! load itmax, the maximum number of iterations. itmax =(nrmax+1)*maxl ! ------------------------------------------------------------------- ! the initial residual is the vector r0. ! apply left precon. if jpre < 0 and this is not a restart. ! apply scaling to r0 if jscal = 2 or 3. ! ------------------------------------------------------------------- if ((jpre .lt. 0) .and.(nrsts .eq. 0)) then call slap_scopy(n, r0, 1, wk, 1) call sslui(n, wk, r0, nelt, ia, ja, a, isym, rpar, ipar) nmsl = nmsl + 1 endif if (((jscal.eq.2) .or.(jscal.eq.3)) .and.(nrsts.eq.0)) then do 10 i = 1,n v(i,1) = r0(i)*sr(i) 10 continue else do 20 i = 1,n v(i,1) = r0(i) 20 continue endif r0nrm = slap_snrm2(n, v, 1) iter = nrsts*maxl ! call stopping routine issgmr. if (issgmr(n, b, x, xl, nelt, ia, ja, a, isym, $ nmsl, itol, tol, itmax, iter, err, iunit, v(1,1), z, wk, $ rpar, ipar, r0nrm, bnrm, sr, sz, jscal, $ kmp, lgmr, maxl, maxlp1, v, q, snormw, prod, r0nrm, $ hes, jpre) .ne. 0) return tem = 1.0e0/(r0nrm + 1.e-20) call slap_sscal(n, tem, v(1,1), 1) ! zero out the hes array. do 50 j = 1,maxl do 40 i = 1,maxlp1 hes(i,j) = 0.0e0 40 continue 50 continue ! ------------------------------------------------------------------- ! main loop to compute the vectors v(*,2) to v(*,maxl). ! the running product prod is needed for the convergence test. ! ------------------------------------------------------------------- prod = 1.0e0 do 90 ll = 1,maxl lgmr = ll ! ------------------------------------------------------------------- ! unscale the current v(ll) and store in wk. call routine ! sslui to compute(m-inverse)*wk, where m is the ! preconditioner matrix. save the answer in z. call routine ! ssmv to compute vnew = a*z, where a is the the system ! matrix. save the answer in v(ll+1). scale v(ll+1). call ! routine sorth to orthogonalize the new vector vnew = ! v(*,ll+1). call routine sheqr to update the factors of hes. ! ------------------------------------------------------------------- if ((jscal .eq. 1) .or.(jscal .eq. 3)) then do 60 i = 1,n wk(i) = v(i,ll)/sz(i) 60 continue else call slap_scopy(n, v(1,ll), 1, wk, 1) endif if (jpre .gt. 0) then call sslui(n, wk, z, nelt, ia, ja, a, isym, rpar, ipar) nmsl = nmsl + 1 call ssmv(n, z, v(1,ll+1), nelt, ia, ja, a, isym) else call ssmv(n, wk, v(1,ll+1), nelt, ia, ja, a, isym) endif if (jpre .lt. 0) then call slap_scopy(n, v(1,ll+1), 1, wk, 1) call sslui(n,wk,v(1,ll+1),nelt,ia,ja,a,isym,rpar,ipar) nmsl = nmsl + 1 endif if ((jscal .eq. 2) .or.(jscal .eq. 3)) then do 65 i = 1,n v(i,ll+1) = v(i,ll+1)*sr(i) 65 continue endif call sorth(v(1,ll+1), v, hes, n, ll, maxlp1, kmp, snormw) hes(ll+1,ll) = snormw call sheqr(hes, maxlp1, ll, q, info, ll) if (info .eq. ll) go to 120 ! ------------------------------------------------------------------- ! update rho, the estimate of the norm of the residual r0-a*zl. ! if kmp < maxl, then the vectors v(*,1),...,v(*,ll+1) are not ! necessarily orthogonal for ll > kmp. the vector dl must then ! be computed, and its norm used in the calculation of rho. ! ------------------------------------------------------------------- prod = prod*q(2*ll) rho = abs(prod*r0nrm) if ((ll.gt.kmp) .and.(kmp.lt.maxl)) then if (ll .eq. kmp+1) then call slap_scopy(n, v(1,1), 1, dl, 1) do 75 i = 1,kmp ip1 = i + 1 i2 = i*2 s = q(i2) c = q(i2-1) do 70 k = 1,n dl(k) = s*dl(k) + c*v(k,ip1) 70 continue 75 continue endif s = q(2*ll) c = q(2*ll-1)/snormw llp1 = ll + 1 do 80 k = 1,n dl(k) = s*dl(k) + c*v(k,llp1) 80 continue dlnrm = slap_snrm2(n, dl, 1) rho = rho*dlnrm endif rhol = rho ! ------------------------------------------------------------------- ! test for convergence. if passed, compute approximation zl. ! if failed and ll < maxl, then continue iterating. ! ------------------------------------------------------------------- iter = nrsts*maxl + lgmr if (issgmr(n, b, x, xl, nelt, ia, ja, a, isym, $ nmsl, itol, tol, itmax, iter, err, iunit, dl, z, wk, $ rpar, ipar, rhol, bnrm, sr, sz, jscal, $ kmp, lgmr, maxl, maxlp1, v, q, snormw, prod, r0nrm, $ hes, jpre) .ne. 0) go to 200 if (ll .eq. maxl) go to 100 ! ------------------------------------------------------------------- ! rescale so that the norm of v(1,ll+1) is one. ! ------------------------------------------------------------------- tem = 1.0e0/snormw call slap_sscal(n, tem, v(1,ll+1), 1) 90 continue 100 continue if (rho .lt. r0nrm) go to 150 120 continue iflag = 2 ! load approximate solution with zero. do 130 i = 1,n z(i) = 0.e0 130 continue return 150 iflag = 1 ! tolerance not met, but residual norm reduced. if (nrmax .gt. 0) then ! if performing restarting (nrmax > 0) calculate the residual ! vector rl and store it in the dl array. if the incomplete ! version is being used (kmp < maxl) then dl has already been ! calculated up to a scaling factor. use srlcal to calculate ! the scaled residual vector. call srlcal(n, kmp, maxl, maxl, v, q, dl, snormw, prod, $ r0nrm) endif ! ------------------------------------------------------------------- ! compute the approximation zl to the solution. since the ! vector z was used as work space, and the initial guess ! of the linear iteration is zero, z must be reset to zero. ! ------------------------------------------------------------------- 200 continue ll = lgmr llp1 = ll + 1 do 210 k = 1,llp1 r0(k) = 0.0e0 210 continue r0(1) = r0nrm call shels(hes, maxlp1, ll, q, r0) do 220 k = 1,n z(k) = 0.0e0 220 continue do 230 i = 1,ll call slap_saxpy(n, r0(i), v(1,i), 1, z, 1) 230 continue if ((jscal .eq. 1) .or.(jscal .eq. 3)) then do 240 i = 1,n z(i) = z(i)/sz(i) 240 continue endif if (jpre .gt. 0) then call slap_scopy(n, z, 1, wk, 1) call sslui(n, wk, z, nelt, ia, ja, a, isym, rpar, ipar) nmsl = nmsl + 1 endif return end subroutine slap_sscal(n,sa,sx,incx) ! scales a vector by a constant. ! uses unrolled loops for increment equal to 1. ! jack dongarra, linpack, 3/11/78. ! modified to correct problem with negative increments, 9/29/88. implicit none real sa, sx(*) integer i, ix, incx, m, mp1, n if (n.le.0)return if (incx.eq.1)go to 20 ! code for increment not equal to 1 ix = 1 if (incx.lt.0)ix = (-n+1)*incx + 1 do 10 i = 1,n sx(ix) = sa*sx(ix) ix = ix + incx 10 continue return ! code for increment equal to 1 ! clean-up loop 20 m = mod(n,5) if ( m .eq. 0 ) go to 40 do 30 i = 1,m sx(i) = sa*sx(i) 30 continue if ( n .lt. 5 ) return 40 mp1 = m + 1 do 50 i = mp1,n,5 sx(i) = sa*sx(i) sx(i + 1) = sa*sx(i + 1) sx(i + 2) = sa*sx(i + 2) sx(i + 3) = sa*sx(i + 3) sx(i + 4) = sa*sx(i + 4) 50 continue return end subroutine sorth(vnew, v, hes, n, ll, ldhes, kmp, snormw) implicit none integer n, ll, ldhes, kmp real vnew(*), v(n,*), hes(ldhes,*), snormw real slap_snrm2, slap_sdot ! internal variables. integer i, i0 real arg, sumdsq, tem, vnrm ! get norm of unaltered vnew for later use. vnrm = slap_snrm2(n, vnew, 1) ! ------------------------------------------------------------------- ! perform the modified gram-schmidt procedure on vnew =a*v(ll). ! scaled inner products give new column of hes. ! projections of earlier vectors are subtracted from vnew. ! ------------------------------------------------------------------- i0 = max0(1,ll-kmp+1) do 10 i = i0,ll hes(i,ll) = slap_sdot(n, v(1,i), 1, vnew, 1) tem = -hes(i,ll) call slap_saxpy(n, tem, v(1,i), 1, vnew, 1) 10 continue ! ------------------------------------------------------------------- ! compute snormw = norm of vnew. if vnew is small compared ! to its input value (in norm), then reorthogonalize vnew to ! v(*,1) through v(*,ll). correct if relative correction ! exceeds 1000*(unit roundoff). finally, correct snormw using ! the dot products involved. ! ------------------------------------------------------------------- snormw = slap_snrm2(n, vnew, 1) if (vnrm + 0.001e0*snormw .ne. vnrm) return sumdsq = 0.0e0 do 30 i = i0,ll tem = -slap_sdot(n, v(1,i), 1, vnew, 1) if (hes(i,ll) + 0.001e0*tem .eq. hes(i,ll)) go to 30 hes(i,ll) = hes(i,ll) - tem call slap_saxpy(n, tem, v(1,i), 1, vnew, 1) sumdsq = sumdsq + tem**2 30 continue if (sumdsq .eq. 0.0e0) return arg = amax1(0.0e0,snormw**2 - sumdsq) snormw = sqrt(arg) return end subroutine sheqr(a, lda, n, q, info, ijob) implicit none integer lda, n, info, ijob real a(lda,*), q(*) ! local variables. integer i, iq, j, k, km1, kp1, nm1 real c, s, t, t1, t2 if (ijob .gt. 1) go to 70 ! ------------------------------------------------------------------- ! a new factorization is desired. ! ------------------------------------------------------------------- ! qr decomposition without pivoting. info = 0 do 60 k = 1, n km1 = k - 1 kp1 = k + 1 ! compute k-th column of r. ! first, multiply the k-th column of a by the previous ! k-1 givens rotations. if (km1 .lt. 1) go to 20 do 10 j = 1, km1 i = 2*(j-1) + 1 t1 = a(j,k) t2 = a(j+1,k) c = q(i) s = q(i+1) a(j,k) = c*t1 - s*t2 a(j+1,k) = s*t1 + c*t2 10 continue ! compute givens components c and s. 20 continue iq = 2*km1 + 1 t1 = a(k,k) t2 = a(kp1,k) if ( t2.eq.0.0e0 ) then c = 1.0e0 s = 0.0e0 elseif( abs(t2).ge.abs(t1) ) then t = t1/t2 s = -1.0e0/sqrt(1.0e0+t*t) c = -s*t else t = t2/t1 c = 1.0e0/sqrt(1.0e0+t*t) s = -c*t endif q(iq) = c q(iq+1) = s a(k,k) = c*t1 - s*t2 if ( a(k,k).eq.0.0e0 ) info = k 60 continue return ! ------------------------------------------------------------------- ! the old factorization of a will be updated. a row and a ! column has been added to the matrix a. n by n-1 is now ! the old size of the matrix. ! ------------------------------------------------------------------- 70 continue nm1 = n - 1 ! ------------------------------------------------------------------- ! multiply the new column by the n previous givens rotations. ! ------------------------------------------------------------------- do 100 k = 1,nm1 i = 2*(k-1) + 1 t1 = a(k,n) t2 = a(k+1,n) c = q(i) s = q(i+1) a(k,n) = c*t1 - s*t2 a(k+1,n) = s*t1 + c*t2 100 continue ! ------------------------------------------------------------------- ! complete update of decomposition by forming last givens ! rotation, and multiplying it times the column ! vector(a(n,n),a(np1,n)). ! ------------------------------------------------------------------- info = 0 t1 = a(n,n) t2 = a(n+1,n) if ( t2.eq.0.0e0 ) then c = 1.0e0 s = 0.0e0 elseif( abs(t2).ge.abs(t1) ) then t = t1/t2 s = -1.0e0/sqrt(1.0e0+t*t) c = -s*t else t = t2/t1 c = 1.0e0/sqrt(1.0e0+t*t) s = -c*t endif iq = 2*n - 1 q(iq) = c q(iq+1) = s a(n,n) = c*t1 - s*t2 if (a(n,n) .eq. 0.0e0) info = n return end subroutine srlcal(n, kmp, ll, maxl, v, q, rl, snormw, prod, $ r0nrm) implicit none integer n, kmp, ll, maxl real v(n,*), q(*), rl(n), snormw ! internal variables. integer i, ip1, i2, k, llp1, llm1 real c, s, tem, prod, r0nrm if (kmp .eq. maxl) then ! calculate rl. start by copying v(*,1) into rl. call slap_scopy(n, v(1,1), 1, rl, 1) llm1 = ll - 1 do 20 i = 1,llm1 ip1 = i + 1 i2 = i*2 s = q(i2) c = q(i2-1) do 10 k = 1,n rl(k) = s*rl(k) + c*v(k,ip1) 10 continue 20 continue s = q(2*ll) c = q(2*ll-1)/snormw llp1 = ll + 1 do 30 k = 1,n rl(k) = s*rl(k) + c*v(k,llp1) 30 continue endif ! when kmp < maxl, rl vector already partially calculated. ! scale rl by r0nrm*prod to obtain the residual rl. tem = r0nrm*prod call slap_sscal(n, tem, rl, 1) return end subroutine shels(a, lda, n, q, b) implicit none integer lda, n real a(lda,*), b(*), q(*) ! local variables. integer iq, k, kb, kp1 real c, s, t, t1, t2 ! minimize(b-a*x,b-a*x). first form q*b. do 20 k = 1, n kp1 = k + 1 iq = 2*(k-1) + 1 c = q(iq) s = q(iq+1) t1 = b(k) t2 = b(kp1) b(k) = c*t1 - s*t2 b(kp1) = s*t1 + c*t2 20 continue ! now solve r*x = q*b. do 40 kb = 1, n k = n + 1 - kb b(k) = b(k)/a(k,k) t = -b(k) call slap_saxpy(k-1, t, a(1,k), 1, b(1), 1) 40 continue return end subroutine slap_saxpy(n,sa,sx,incx,sy,incy) ! constant times a vector plus a vector. ! uses unrolled loop for increments equal to one. ! jack dongarra, linpack, 3/11/78. implicit none integer i, incx, incy, ix, iy, m, mp1, n real sx(*), sy(*), sa if (n.le.0)return if (sa .eq. 0.0) return if (incx.eq.1.and.incy.eq.1)go to 20 ! code for unequal increments or equal increments ! not equal to 1 ix = 1 iy = 1 if (incx.lt.0)ix = (-n+1)*incx + 1 if (incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n sy(iy) = sy(iy) + sa*sx(ix) ix = ix + incx iy = iy + incy 10 continue return ! code for both increments equal to 1 ! clean-up loop 20 m = mod(n,4) if ( m .eq. 0 ) go to 40 do 30 i = 1,m sy(i) = sy(i) + sa*sx(i) 30 continue if ( n .lt. 4 ) return 40 mp1 = m + 1 do 50 i = mp1,n,4 sy(i) = sy(i) + sa*sx(i) sy(i + 1) = sy(i + 1) + sa*sx(i + 1) sy(i + 2) = sy(i + 2) + sa*sx(i + 2) sy(i + 3) = sy(i + 3) + sa*sx(i + 3) 50 continue return end subroutine ssmv( n, x, y, nelt, ia, ja, a, isym ) !***purpose slap column format sparse matrix vector product. ! routine to calculate the sparse matrix vector product: ! y = a*x. ! *arguments: ! n :in integer. ! order of the matrix. ! x :in real x(n). ! the vector that should be multiplied by the matrix. ! y :out real y(n). ! the product of the matrix and the vector. ! nelt :in integer. ! number of non-zeros stored in a. ! ia :in integer ia(nelt). ! ja :in integer ja(n+1). ! a :in integer a(nelt). ! these arrays should hold the matrix a in the slap column ! format. see "description", below. ! isym :in integer. ! flag to indicate symmetric storage format. ! if isym=0, all nonzero entries of the matrix are stored. ! if isym=1, the matrix is symmetric, and only the upper ! or lower triangle of the matrix is stored. ! *cautions: ! this routine assumes that the matrix a is stored in slap ! column format. it does not check for this (for speed) and ! evil, ugly, ornery and nasty things will happen if the matrix ! data structure is, in fact, not slap column. beware of the ! wrong data structure implicit none integer n, nelt, ia(nelt), ja(nelt), isym integer i, ibgn, iend, j, jbgn, jend, irow, icol real a(nelt), x(n), y(n) ! zero out the result vector. do 10 i = 1, n y(i) = 0.0 10 continue ! multiply by a. do 30 icol = 1, n ibgn = ja(icol) iend = ja(icol+1)-1 do 20 i = ibgn, iend y(ia(i)) = y(ia(i)) + a(i)*x(icol) 20 continue 30 continue if ( isym.eq.1 ) then ! the matrix is non-symmetric. need to get the other half in... ! this loops assumes that the diagonal is the first entry in ! each column. do 50 irow = 1, n jbgn = ja(irow)+1 jend = ja(irow+1)-1 if ( jbgn.gt.jend ) goto 50 do 40 j = jbgn, jend y(irow) = y(irow) + a(j)*x(ia(j)) 40 continue 50 continue endif return end real function slap_sdot(n,sx,incx,sy,incy) ! forms the dot product of two vectors. ! uses unrolled loops for increments equal to one. ! jack dongarra, linpack, 3/11/78. implicit none real sx(*),sy(*),stemp integer i,incx,incy,ix,iy,m,mp1,n stemp = 0.0e0 slap_sdot = 0.0e0 if (n.le.0)return if (incx.eq.1.and.incy.eq.1)go to 20 ! code for unequal increments or equal increments ! not equal to 1 ix = 1 iy = 1 if (incx.lt.0)ix = (-n+1)*incx + 1 if (incy.lt.0)iy = (-n+1)*incy + 1 do 10 i = 1,n stemp = stemp + sx(ix)*sy(iy) ix = ix + incx iy = iy + incy 10 continue slap_sdot = stemp return ! code for both increments equal to 1 ! clean-up loop 20 m = mod(n,5) if ( m .eq. 0 ) go to 40 do 30 i = 1,m stemp = stemp + sx(i)*sy(i) 30 continue if ( n .lt. 5 ) go to 60 40 mp1 = m + 1 do 50 i = mp1,n,5 stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) + * sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4) 50 continue 60 slap_sdot = stemp return end real function slap_snrm2 ( n, sx, incx) implicit none integer next, i, j, n, nn, incx real sx(*), cutlo, cuthi, hitest, sum, xmax, zero, one data zero, one /0.0e0, 1.0e0/ ! euclidean norm of the n-vector stored in sx() with storage ! increment incx . ! if n .le. 0 return with result = 0. ! if n .ge. 1 then incx must be .ge. 1 ! four phase method using two built-in constants that are ! hopefully applicable to all machines. ! cutlo = maximum of sqrt(u/eps) over all known machines. ! cuthi = minimum of sqrt(v) over all known machines. ! where ! eps = smallest no. such that eps + 1. .gt. 1. ! u = smallest positive no. (underflow limit) ! v = largest no. (overflow limit) ! brief outline of algorithm.. ! phase 1 scans zero components. ! move to phase 2 when a component is nonzero and .le. cutlo ! move to phase 3 when a component is .gt. cutlo ! move to phase 4 when a component is .ge. cuthi/m ! where m = n for x() real and m = 2*n for complex. data cutlo, cuthi / 4.441e-16, 1.304e19 / if (n .gt. 0) go to 10 slap_snrm2 = zero go to 300 10 next = 30 sum = zero nn = n * incx ! begin main loop i = 1 20 if (next.eq.30) goto 30 if (next.eq.50) goto 50 if (next.eq.70) goto 70 if (next.eq.110) goto 110 30 if ( abs(sx(i)) .gt. cutlo) go to 85 next = 50 xmax = zero ! phase 1. sum is zero 50 if ( sx(i) .eq. zero) go to 200 if ( abs(sx(i)) .gt. cutlo) go to 85 ! prepare for phase 2. next = 70 go to 105 ! prepare for phase 4. 100 i = j next = 110 sum = (sum / sx(i)) / sx(i) 105 xmax = abs(sx(i)) go to 115 ! phase 2. sum is small. ! scale to avoid destructive underflow. 70 if ( abs(sx(i)) .gt. cutlo ) go to 75 ! common code for phases 2 and 4. ! in phase 4 sum is large. scale to avoid overflow. 110 if ( abs(sx(i)) .le. xmax ) go to 115 sum = one + sum * (xmax / sx(i))**2 xmax = abs(sx(i)) go to 200 115 sum = sum + (sx(i)/xmax)**2 go to 200 ! prepare for phase 3. 75 sum = (sum * xmax) * xmax ! for real or d.p. set hitest = cuthi/n ! for complex set hitest = cuthi/(2*n) 85 hitest = cuthi/float( n ) ! phase 3. sum is mid-range. no scaling. do 95 j =i,nn,incx if (abs(sx(j)) .ge. hitest) go to 100 sum = sum + sx(j)**2 95 continue slap_snrm2 = sqrt( sum ) go to 300 200 continue i = i + incx if ( i .le. nn ) go to 20 ! end of main loop. ! compute square root and adjust for scaling. slap_snrm2 = xmax * sqrt(sum) 300 continue return end function issgmr(n, b, x, xl, nelt, ia, ja, a, isym, $ nmsl, itol, tol, itmax, iter, err, iunit, r, z, dz, $ rwork, iwork, rnrm, bnrm, sb, sx, jscal, $ kmp, lgmr, maxl, maxlp1, v, q, snormw, prod, r0nrm, $ hes, jpre) implicit none integer kmp, lgmr, maxl, maxlp1, jpre, nmsl, n, nelt integer ia(nelt), ja(nelt), iwork(*) real dxnrm, rnrm, r0nrm, snormw, solnrm, prod real slap_snrm2, sb(*), sx(*), q(*), v(n,*) real b(*), x(*), r(*), z(*), dz(*), rwork(*) real hes(maxlp1,*), xl(*), a(nelt) ! local variables. integer i, itol, iter, itmax, issgmr, iunit, ielmax integer jscal, isym real tol, err, bnrm, fuzz, ratmax, rat real c,dlnrm,rho,s,tem ! common /solblk/ soln(1) ! save solnrm issgmr = 0 if ( itol.eq.0 ) then ! use input from spigmr to determine if stop conditions are met. err = rnrm/(bnrm + 1.e-20) endif if ( (itol.gt.0) .and. (itol.le.3) ) then ! use srlcal to calculate the scaled residual vector. ! store answer in r. if ( lgmr.ne.0 ) call srlcal(n, kmp, lgmr, maxl, v, q, r, $ snormw, prod, r0nrm) if ( itol.le.2 ) then ! err = ||residual||/||righthandside||(2-norms). err = slap_snrm2(n, r, 1)/(bnrm + 1.e-20) ! unscale r by r0nrm*prod when kmp < maxl. if ( (kmp.lt.maxl) .and. (lgmr.ne.0) ) then tem = 1.0e0/(r0nrm*prod) call slap_sscal(n, tem, r, 1) endif elseif ( itol.eq.3 ) then ! err = max |(minv*residual)(i)/x(i)| ! when jpre .lt. 0, r already contains minv*residual. if ( jpre.gt.0 ) then call sslui(n, r, dz, nelt, ia, ja, a, isym, rwork, $ iwork) nmsl = nmsl + 1 endif ! unscale r by r0nrm*prod when kmp < maxl. if ( (kmp.lt.maxl) .and. (lgmr.ne.0) ) then tem = 1.0e0/(r0nrm*prod) call slap_sscal(n, tem, r, 1) endif fuzz = 1.0e-20 ielmax = 1 ratmax = abs(dz(1))/amax1(abs(x(1)),fuzz) do 25 i = 2, n rat = abs(dz(i))/amax1(abs(x(i)),fuzz) if ( rat.gt.ratmax ) then ielmax = i ratmax = rat endif 25 continue err = ratmax if ( ratmax.le.tol ) issgmr = 1 if ( iunit.gt.0 ) write(iunit,1020) iter, ielmax, ratmax return endif endif if ( iunit.ne.0 ) then if ( iter.eq.0 ) then write(iunit,1000) maxl, kmp, n, itol endif ! write(6,*) 'itol, iter =', itol, iter ! write(6,*) 'rnrm, bnrm =', rnrm, bnrm write(iunit,1010) iter, rnrm/(bnrm + 1.e-20), err endif if ( err.le.tol ) issgmr = 1 1000 format(' generalized minimum residual(',i3,i3,') for ', $ 'n, itol = ',i6, i6, $ /' iter',' natral err est',' error estimate') 1010 format(1x,i4,1x,e16.7,1x,e16.7) 1020 format(1x,' iter = ',i5, ' ielmax = ',i5, $ ' |r(ielmax)/x(ielmax)| = ',e12.5) #endif return end