      SUBROUTINE PBCTRSV( ICONTXT, UPLO, TRANS, DIAG, XDIST, N, NB, NZ,
     $                    A, LDA, X, INCX, IAROW, IACOL, IXPOS, XWORK,
     $                    WORK )
*
*  -- PB-BLAS routine (version 2.1) --
*     University of Tennessee, Knoxville, Oak Ridge National Laboratory.
*     April 28, 1996
*
*     Jaeyoung Choi, Oak Ridge National Laboratory
*     Jack Dongarra, University of Tennessee and Oak Ridge National Lab.
*     David Walker,  Oak Ridge National Laboratory
*
*     .. Scalar Arguments ..
      CHARACTER*1        DIAG, TRANS, UPLO, XDIST, XWORK
      INTEGER            IACOL, IAROW, ICONTXT, INCX, IXPOS, LDA, N, NB,
     $                   NZ
*     ..
*     .. Array Arguments ..
      COMPLEX            A( LDA, * ), WORK( * ), X( * )
*     ..
*
*  Purpose
*  =======
*
*  PBCTRSV is a parallel blocked version of the level-2 BLAS routine
*  CTRSV.
*  PBCTRSV solves one of the matrix equations based on block cyclic
*  distribution.
*
*     op( A ) * Y = X
*
*  where X and Y are n vectors, and A is a unit, or non-unit, upper or
*  lower triangular matrix.  op( A ) is one of
*
*     op( A ) = A,  A**T,  or  A**H
*
*  where the size of the matrix op( A ) is N-by-N. The length N vector
*  X is distributed on columnwise or rowwise.  It is overwritten on Y.
*
*  The first elements of the matrices A, and X can be located inside of
*  the first blocks.
*
*  Parameters
*  ==========
*
*  ICONTXT (input) INTEGER
*          ICONTXT is the BLACS mechanism for partitioning communication
*          space.  A defining property of a context is that a message in
*          a context cannot be sent or received in another context.  The
*          BLACS context includes the definition of a grid, and each
*          process' coordinates in it.
*
*  UPLO    (input) CHARACTER*1
*          UPLO specifies whether the matrix A is an upper or lower
*          triangular matrix as follows:
*
*             UPLO = 'U',  A is an upper triangular matrix.
*             UPLO = 'L',  A is a  lower triangular matrix.
*
*  TRANS   (input) CHARACTER*1
*          TRANS specifies the form of op( A ) to be used in
*          the matrix multiplication as follows:
*
*             TRANS = 'N',  op( A ) = A.
*             TRANS = 'T',  op( A ) = A**T.
*             TRANS = 'C',  op( A ) = A**H.
*
*  DIAG    (input) CHARACTER*1
*          DIAG specifies whether or not A is unit triangular as
*          follows:
*
*             DIAG = 'U'   A is assumed to be unit triangular.
*             DIAG = 'N'   A is not assumed to be unit
*                                 triangular.
*
*  XDIST   (input) CHARACTER*1
*          XDIST specifies the distribution of vectors x and y
*          as follows:
*
*             XDIST = 'C',  x is distributed columnwise
*                           or in a column of processes
*             XDIST = 'R',  x is distributed rowwise
*                           or in a row of processes
*
*  N       (input) INTEGER
*          N specifies the (global) number of rows and columns of the
*          matrix A.  N >= 0.
*
*  NB      (input) INTEGER
*          NB specifies the row and column block size of matrix A.
*          It also specifies the block size of the vector X.  NB >= 1.
*
*  NZ      (input) INTEGER
*          NZ is the row and column offset to specify the row and column
*          distance from the beginning of the block to the first
*          element of A.  And it also specifies the offset to the first
*          element of the vector X.  0 <= NZ < NB.
*
*  A       (input) COMPLEX array of DIMENSION ( LDA, Nq ).
*          The N-by-N part of the array A must contain the (global)
*          triangular matrix, such that when UPLO = 'U', the leading
*          N-by-N upper triangular part of the array  A  must contain
*          the upper triangular part of the (global) matrix and the
*          strictly lower triangular part of A is not referenced,
*          and when  UPLO = 'L', the leading N-by-N lower triangular
*          part of the array A must contain the lower triangular part
*          of the (global) matrix and the strictly upper triangular
*          part of A is not referenced.
*          Note that when DIAG = `U', the diagonal elements of A are
*          not referenced either, but are assumed to be unity.
*
*  LDA     (input) INTEGER
*          LDA specifies the first dimension of A as declared in the
*          calling (sub) program.  LDA >= MAX(1,Np).
*
*  X       (input) COMPLEX array of DIMENSION at least
*          ( 1  + ( Np - 1 ) * abs( INCX ) ) if XDIST = 'C', or
*          ( 1  + ( Nq - 1 ) * abs( INCX ) ) if XDIST = 'R',
*          The incremented array X must contain the vector X.
*          On exit, X is overwritten by the updated vector X.
*
*  INCX    (input) INTEGER
*          INCX specifies the increment for the elements of X.
*          INCX <> 0.
*
*  IAROW   (input) INTEGER
*          IAROW specifies a row of the process template, which holds
*          the  first  block of  the  matrix A.  0 <= IAROW < NPROW.
*
*  IACOL   (input) INTEGER
*          IACOL specifies  a column of the process template, which
*          holds the first block of the matrix A.  0 <= IACOL < NPCOL.
*
*  IXPOS   (input) INTEGER
*          If XDIST = 'C', IXPOS specifies a column of the process
*          template which holds the vector X.  If XDIST = 'R', IXPOS
*          specifies a row of the process template which holds the
*          vector X.
*
*  XWORK   (input) CHARACTER*1
*          XWORK determines whether X is a workspace or not.
*
*             XWORK = 'Y':  X is workspace in other processes.
*                           X is overwitten with temporal X in other
*                           processes. It is assumed that processes
*                           have sufficient space to store temporal
*                           (local) X.
*             XWORK = 'N':  Data of X in other processes will be
*                           untouched (unchanged).
*
*          If transposition of X is involved with the computation,
*          the argument is ignored.
*
*  WORK    (workspace) COMPLEX array of dimension Size(WORK).
*          It will store copy of X or X' if necessary.
*
*  Communication Scheme
*  ====================
*
*  The communication scheme of the routine is determined by the
*  conditions  and it is independent of  machine characteristics,
*  so that it is not an option of the routine.  Increasing  ring  or
*  Decreasing ring is used depeding on the following input conditions.
*
*  COMM='Increasing ring' when UPLO = 'U', XDIST = 'L', TRANS = 'T'/'C'
*                           or UPLO = 'U', XDIST = 'R', TRANS = 'N'
*                           or UPLO = 'L', XDIST = 'L', TRANS = 'N'
*                           or UPLO = 'L', XDIST = 'R', TRANS = 'T'/'C'
*
*  COMM='Decreasing ring' when UPLO = 'U', XDIST = 'L', TRANS = 'N'
*                           or UPLO = 'U', XDIST = 'R', TRANS = 'T'/'C'
*                           or UPLO = 'L', XDIST = 'L', TRANS = 'T'/'C'
*                           or UPLO = 'L', XDIST = 'R', TRANS = 'N'
*
*  Parameters Details
*  ==================
*
*  Lx      It is  a local portion  of L  owned  by  a process,  (L is
*          replaced by M, or N,  and x  is replaced  by  either  p
*          (=NPROW) or q (=NPCOL)).  The value is determined by  L, LB,
*          x, and MI,  where  LB is  a block size  and MI is a  row  or
*          column position in a process template.  Lx is equal to  or
*          less than  Lx0 = CEIL( L, LB*x ) * LB.
*
*  Memory Requirement of WORK
*  ==========================
*
*  NN   = N + NZ
*  Npb  = CEIL( NN, NB*NPROW )
*  Nqb  = CEIL( NN, NB*NPCOL )
*  Np0  = NUMROC( NN, NB, 0, 0, NPROW ) ~= Npb * NB
*  Nq0  = NUMROC( NN, NB, 0, 0, NPCOL ) ~= Nqb * NB
*  LCMQ = LCM / NPCOL
*  LCMP = LCM / NPROW
*
*  (1)  TRANS = 'N'
*    (i)  XDIST = 'Col'
*         Size(WORK) = Np0    (if XWORK <> 'Y')
*                    + NB * MAX[ 1, CEIL(Q-1,P) ]
*    (ii) XDIST = 'Row'
*         Size(WORK) = Np0
*                    + MAX[ NB*MAX[ CEIL(Npb,LCMP), CEIL(Nqb,LCMQ) ],
*                           NB*CEIL(Q-1,P) ]
*
*  (2) TRANS = 'T'/'C'
*    (i)  XDIST = 'Row'
*         Size(WORK) = Nq0    (if XWORK <> 'Y')
*                    + NB * MAX[ 1, CEIL(P-1,Q) ]
*    (ii) XDIST = 'Col'
*         Size(WORK) = Nq0
*                    + MAX[ NB*MAX[ CEIL(Npb,LCMP), CEIL(Nqb,LCMQ) ],
*                           NB*CEIL(P-1,Q) ]
*
*  Notes
*  -----
*  More precise space can be computed as
*
*  CEIL(Npb,LCMP)*NB => NUMROC( NUMROC(NN,NB,0,0,NPROW), NB, 0, 0, LCMP)
*                    = NUMROC( Np0, NB, 0, 0, LCMP )
*  CEIL(Nqb,LCMQ)*NB => NUMROC( NUMROC(NN,NB,0,0,NPCOL), NB, 0, 0, LCMQ)
*                    = NUMROC( Nq0, NB, 0, 0, LCMQ )
*
*  =====================================================================
*
*     .. Parameters ..
      COMPLEX            ONE, ZERO
      PARAMETER          ( ONE  = ( 1.0E+0, 0.0E+0 ),
     $                   ZERO = ( 0.0E+0, 0.0E+0 ) )
*     ..
*     .. Local Scalars ..
      LOGICAL            COLUMN, NOTRAN, UPPER, XDATA
      INTEGER            ICURCOL, ICURROW, IDEST, II, IIN, IN, INFO,
     $                   IPART, IPT, IRDB, IRPB, J, JB, JJ, JJN, JN,
     $                   JNZ, KB, KDIST, KZ, LCM, MBTROW, MLFCOL,
     $                   MRTCOL, MTPROW, MYCOL, MYROW, NCOMM, NLENG, NN,
     $                   NP, NPART, NPCOL, NPROW, NQ, NREST, NXTCOL,
     $                   NXTROW
      COMPLEX            DUMMY
*     ..
*     .. External Functions ..
      LOGICAL            LSAME
      INTEGER            ICEIL, ILCM, NUMROC
      EXTERNAL           ICEIL, ILCM, LSAME, NUMROC
*     ..
*     .. External Subroutines ..
      EXTERNAL           BLACS_GRIDINFO, CCOPY, CGEBR2D, CGEBS2D, CGEMM,
     $                   CGEMV, CGERV2D, CGESD2D, CTRSV, PBCTRNV,
     $                   PBCVECADD, PXERBLA
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC          MAX, MIN, MOD
*     ..
*     .. Executable Statements ..
*
*     Quick return if possible.
*
      IF( N.EQ.0 ) RETURN
*
      CALL BLACS_GRIDINFO( ICONTXT, NPROW, NPCOL, MYROW, MYCOL )
*
*     Test the input parameters.
*
      UPPER  = LSAME( UPLO,  'U' )
      NOTRAN = LSAME( TRANS, 'N' )
      COLUMN = LSAME( XDIST, 'C' )
*
      INFO = 0
      IF(      ( .NOT.UPPER               ).AND.
     $         ( .NOT.LSAME( UPLO , 'L' ) )            ) THEN
        INFO = 2
      ELSE IF( ( .NOT.NOTRAN              ).AND.
     $         ( .NOT.LSAME( TRANS, 'T' ) ).AND.
     $         ( .NOT.LSAME( TRANS, 'C' ) )            ) THEN
        INFO = 3
      ELSE IF( ( .NOT.LSAME( DIAG , 'U' ) ).AND.
     $         ( .NOT.LSAME( DIAG , 'N' ) )            ) THEN
        INFO = 4
      ELSE IF( ( .NOT.COLUMN ).AND.
     $         ( .NOT.LSAME( XDIST, 'R' ) )            ) THEN
        INFO = 5
      ELSE IF( N  .LT. 0                               ) THEN
        INFO = 6
      ELSE IF( NB .LT. 1                               ) THEN
        INFO = 7
      ELSE IF( NZ .LT. 0 .OR. NZ.GE.NB                 ) THEN
        INFO = 8
      ELSE IF( LDA.LT. 1                               ) THEN
        INFO = 10
      ELSE IF( INCX.EQ.0                               ) THEN
        INFO = 12
      ELSE IF( IAROW .GE. NPROW                        ) THEN
        INFO = 13
      ELSE IF( IACOL .GE. NPCOL                        ) THEN
        INFO = 14
      ELSE IF( ( IXPOS.LT.0 ) .OR.
     $         ( IXPOS.GE.NPCOL .AND. COLUMN ) .OR.
     $         ( IXPOS.GE.NPROW .AND. (.NOT.COLUMN)  ) ) THEN
        INFO = 15
      END IF
*
   10 CONTINUE
      IF( INFO.NE.0 ) THEN
        CALL PXERBLA( ICONTXT, 'PBCTRSV ', INFO )
        RETURN
      END IF
*
*     Start the operations.
*
*     Initialize parameters
*
      NN = N + NZ
      NP = NUMROC( NN, NB, MYROW, IAROW, NPROW )
      IF( MYROW.EQ.IAROW ) NP = NP - NZ
      NQ = NUMROC( NN, NB, MYCOL, IACOL, NPCOL )
      IF( MYCOL.EQ.IACOL ) NQ = NQ - NZ
*
      IF( LDA.LT.MAX(1,NP) ) THEN
        INFO = 10
        GO TO 10
      END IF
*
*                                  MTPROW (Top)
*                                      |
*     MLFCOL <- MYCOL -> MRTCOL,     MYROW
*     (Left)             (Right)       |
*                                 MBTROW (Bottom)
*
      MLFCOL = MOD( NPCOL+MYCOL-1, NPCOL )
      MRTCOL = MOD( MYCOL+1, NPCOL )
      MTPROW = MOD( NPROW+MYROW-1, NPROW )
      MBTROW = MOD( MYROW+1, NPROW )
      LCM    = ILCM( NPROW, NPCOL )
      XDATA  = .FALSE.
*
*     Start the operations.
*
      IF( UPPER ) THEN
        IF( NOTRAN ) THEN
*
*         Form  X := Up( A ) \ X
*                  __________
*          ||      \_        |     ||
*          ||        \_      |     ||
*          |X  :=      \_ A  |  \  |X
*          ||            \_  |     ||
*          ||              \_|     ||
*
          IPT     = NP + 1
          ICURROW = MOD( ICEIL(NN,NB)+IAROW-1, NPROW )
          ICURCOL = MOD( ICEIL(NN,NB)+IACOL-1, NPCOL )
*
          IF( COLUMN ) THEN
            IF( LSAME( XWORK, 'Y' ) ) THEN
              IF( MYCOL.EQ.IXPOS ) THEN
                IF( MYCOL.NE.ICURCOL ) THEN
                  CALL CGESD2D( ICONTXT, 1, NP, X, INCX,
     $                          MYROW, ICURCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1,
     $                            ZERO, X, INCX )
                END IF
              ELSE IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGERV2D( ICONTXT, 1, NP, X, INCX, MYROW, IXPOS )
              ELSE
                CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1, ZERO,
     $                          X, INCX )
              END IF
              XDATA = .TRUE.
              IPT = 1
*
            ELSE
              IF( MYCOL.EQ.IXPOS ) THEN
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CCOPY( NP, X, INCX, WORK, 1 )
                ELSE
                  CALL CGESD2D( ICONTXT, 1, NP, X, INCX,
     $                          MYROW, ICURCOL )
                END IF
              ELSE IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, IXPOS )
              END IF
            END IF
            IDEST = IXPOS
*
          ELSE
            CALL PBCTRNV( ICONTXT, 'Row', 'Trans', N, NB, NZ, X, INCX,
     $                    ZERO, WORK, 1, IXPOS, IACOL, IAROW, ICURCOL,
     $                    WORK(IPT) )
            IDEST = ICURCOL
          END IF
*
          IF( .NOT.XDATA .AND. MYCOL.NE.ICURCOL )
     $      CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1, ZERO,
     $                      WORK, 1 )
*
          IRPB  = MOD( NPCOL+IDEST-MYCOL-1, NPCOL )
          IRDB  = NB * MOD( IRPB+1, NPCOL )
          IRPB  = NB * IRPB
          NCOMM = NB * (NPCOL-1)
          KB    = MOD( NN, NB )
          IF( KB.EQ.0 ) KB = NB
*
          II = NP - NB + 1
          IF( MYROW.EQ.ICURROW ) II = NP - KB + 1
          IN = II
          JJ = NQ - NB + 1
          IF( MYCOL.EQ.ICURCOL ) JJ = NQ - KB + 1
          JB = KB
          KZ = 0
*
          IF( XDATA ) THEN
            DO 20 J = 1, NN, NB
              NLENG = N - J - KB + 1
              IF( NLENG.LT.0 ) THEN
                NLENG = 0
                KZ = NZ
                IF( MYROW.EQ.ICURROW ) THEN
                  JB = JB - KZ
                  II = 1
                END IF
                IF( MYCOL.EQ.ICURCOL ) JJ = 1
              END IF
              NXTROW = MOD( NPROW+ICURROW-1, NPROW )
              NXTCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
              IF( MYROW.EQ.ICURROW ) IN = II - NB
*
              IF( MYCOL.EQ.ICURCOL ) THEN
*
*               Receive updated vector from previous column of processes
*
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG+JB, NCOMM), NB, ICURROW,
     $                            MYROW, NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MYROW, MRTCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, X((II+JB-NPART-1)*INCX+1),
     $                            INCX )
                END IF
*
*               X(II) <== A(II,JJ) \ X(II),  ( X(II) = WORK(II) )
*               where A(II,JJ) is a upper triangular matrix
*
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CTRSV( 'Upper', 'No', DIAG, JB, A(II,JJ), LDA,
     $                        X((II-1)*INCX+1), INCX )
                  CALL CCOPY( JB, X((II-1)*INCX+1), INCX, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Col', 'D-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Col', 'D-ring', 1, JB-KZ,
     $                          WORK(IPT), 1, ICURROW, MYCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG.GT.0 ) THEN
*
*                 Update the (NPCOL-1) vector first
*
                  NREST = MIN( NLENG, NCOMM )
                  NPART = NUMROC( NREST, NB, ICURROW, MYROW+1, NPROW )
                  IIN   = IN + NB - NPART
*
                  CALL CGEMM( 'No', 'Trans', 1, NPART, JB, -ONE,
     $                        WORK(IPT), 1, A(IIN,JJ), LDA, ONE,
     $                        X((IIN-1)*INCX+1), INCX )
*                 CALL CGEMV( 'No', NPART, JB, -ONE, A(IIN,JJ), LDA,
*     $                       WORK(IPT),1, ONE, X((IIN-1)*INCX+1),INCX )
*
*                 Send updated vector to next column of processes
*
                  IF( NPCOL.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, X((IIN-1)*INCX+1),
     $                            INCX, MYROW, MLFCOL )
*
*                 Update the rest of the matrix
*
                  CALL CGEMV( 'No', IIN-1, JB, -ONE, A(1,JJ), LDA,
     $                        WORK(IPT), 1, ONE, X, INCX )
                END IF
*
*               Send the solution vector to destination (IDEST column)
*
                JJN = J + KB - 1
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JJN-NB), NB, MYROW,
     $                            ICURROW+1, NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, X((II+JB-1)*INCX+1),
     $                          INCX, MYROW, MRTCOL )
                END IF
*
                IF( NLENG.GT.0 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JJN), NB, MYROW, ICURROW,
     $                            NPROW )
                  CALL CGESD2D( ICONTXT, 1, NPART, X((IN+NB-1)*INCX+1),
     $                          INCX, MYROW, MLFCOL )
                END IF
*
                JJ = JJ - NB
              END IF
*
              II = IN
              JB = NB
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   20       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURCOL = MOD( ICURCOL+1, NPCOL )
            IF( ICURCOL.NE.IDEST ) THEN
              KDIST = MOD( NPCOL+IDEST-ICURCOL, NPCOL )
              NPART = NUMROC( MIN(NN, KDIST*NB), NB, MYROW, IAROW,
     $                        NPROW )
              IF( MYROW.EQ.IAROW ) NPART = NPART - NZ
*
              IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, X, INCX, MYROW, IDEST )
              ELSE IF( MYCOL.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, X, INCX,
     $                        MYROW, ICURCOL )
              END IF
            END IF
*
          ELSE
            DO 30 J = 1, NN, NB
              NLENG = N - J - KB + 1
              IF( NLENG.LT.0 ) THEN
                NLENG = 0
                KZ = NZ
                IF( MYROW.EQ.ICURROW ) THEN
                  JB = JB - KZ
                  II = 1
                END IF
                IF( MYCOL.EQ.ICURCOL ) JJ = 1
              END IF
              NXTROW = MOD( NPROW+ICURROW-1, NPROW )
              NXTCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
              IF( MYROW.EQ.ICURROW ) IN = II - NB
*
              IF( MYCOL.EQ.ICURCOL ) THEN
*
*               Receive updated vector from previous column of processes
*
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG+JB, NCOMM), NB, ICURROW,
     $                            MYROW, NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MYROW, MRTCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, WORK(II+JB-NPART), 1 )
                END IF
*
*               X(II) <== A(II,JJ) \ X(II),  ( X(II) = WORK(II) )
*               where A(II,JJ) is a upper triangular matrix
*
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CTRSV( 'Upper', 'No', DIAG, JB, A(II,JJ), LDA,
     $                        WORK(II), 1 )
                  CALL CCOPY( JB, WORK(II), 1, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Col', 'D-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Col', 'D-ring', 1, JB-KZ,
     $                          WORK(IPT), 1, ICURROW, MYCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG.GT.0 ) THEN
*
*                 Update the (NPCOL-1) vector first
*
                  NREST = MIN( NLENG, NCOMM )
                  NPART = NUMROC( NREST, NB, ICURROW, MYROW+1, NPROW )
                  IIN   = IN + NB - NPART
*
                  CALL CGEMM( 'No', 'No', NPART, 1, JB, -ONE, A(IIN,JJ),
     $                        LDA, WORK(IPT), JB, ONE, WORK(IIN),
     $                        MAX(1, NPART) )
*                 CALL CGEMV( 'No', NPART, JB, -ONE, A(IIN,JJ), LDA,
*    $                        WORK(IPT), 1, ONE, WORK(IIN), 1 )
*
*                 Send updated vector to next column of processes
*
                  IF( NPCOL.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, WORK(IIN), 1,
     $                            MYROW, MLFCOL )
*
*                 Update the rest of the matrix
*
                  CALL CGEMV( 'No', IIN-1, JB, -ONE, A(1,JJ), LDA,
     $                        WORK(IPT), 1, ONE, WORK, 1 )
                END IF
*
*               Send the solution vector to destination (IDEST column)
*
                JJN = J + KB - 1
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JJN-NB), NB, MYROW,
     $                            ICURROW+1, NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(II+JB), 1,
     $                          MYROW, MRTCOL )
                END IF
*
                IF( NLENG.GT.0 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JJN), NB, MYROW, ICURROW,
     $                            NPROW )
                  CALL CGESD2D( ICONTXT, 1, NPART, WORK(IN+NB), 1,
     $                          MYROW, MLFCOL )
                END IF
*
                JJ = JJ - NB
              END IF
*
              II = IN
              JB = NB
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   30       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURCOL = MOD( ICURCOL+1, NPCOL )
            IF( ICURCOL.NE.IDEST ) THEN
              KDIST = MOD( NPCOL+IDEST-ICURCOL, NPCOL )
              NPART = NUMROC( MIN(NN, KDIST*NB), NB, MYROW, IAROW,
     $                        NPROW )
              IF( MYROW.EQ.IAROW ) NPART = NPART - NZ
*
              IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, WORK, 1, MYROW, IDEST )
              ELSE IF( MYCOL.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, WORK, 1,
     $                        MYROW, ICURCOL )
              END IF
            END IF
          END IF
*
          IF( COLUMN ) THEN
            IF( .NOT.XDATA .AND. MYCOL.EQ.IDEST )
     $        CALL CCOPY( NP, WORK, 1, X, INCX )
          ELSE
            CALL PBCTRNV( ICONTXT, 'Col', 'Trans', N, NB, NZ, WORK, 1,
     $                    ZERO, X, INCX, IAROW, IDEST, IXPOS, IACOL,
     $                    WORK(IPT) )
          END IF
*
        ELSE
*
*         Form  X := X / Up( A ).
*                                        __________
*                                        \_        |
*          __________       __________     \_      |
*          -----X----  :=   -----X----  \    \_ A  |
*                                              \_  |
*                                                \_|
*
          IPT     = NQ + 1
          ICURROW = IAROW
          ICURCOL = IACOL
*
          IF( COLUMN ) THEN
            CALL PBCTRNV( ICONTXT, 'Col', 'Trans', N, NB, NZ, X, INCX,
     $                    ZERO, WORK, 1, IAROW, IXPOS, ICURROW, IACOL,
     $                    WORK(IPT) )
            IDEST = ICURROW
*
          ELSE
            IF( LSAME( XWORK, 'Y' ) ) THEN
              IF( MYROW.EQ.IXPOS ) THEN
                IF( MYROW.NE.ICURROW ) THEN
                  CALL CGESD2D( ICONTXT, 1, NQ, X, INCX,
     $                          ICURROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1,
     $                            ZERO, X, INCX )
                END IF
              ELSE IF( MYROW.EQ.ICURROW ) THEN
                CALL CGERV2D( ICONTXT, 1, NQ, X, INCX, IXPOS, MYCOL )
              ELSE
                CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1, ZERO,
     $                          X, INCX )
              END IF
              XDATA = .TRUE.
              IPT = 1
*
            ELSE
              IF( MYROW.EQ.IXPOS ) THEN
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CCOPY( NQ, X, INCX, WORK, 1 )
                ELSE
                  CALL CGESD2D( ICONTXT, 1, NQ, X, INCX,
     $                          ICURROW, MYCOL )
                END IF
              ELSE IF( MYROW.EQ.ICURROW ) THEN
                CALL CGERV2D( ICONTXT, 1, NQ, WORK, 1, IXPOS, MYCOL )
              END IF
            END IF
            IDEST = IXPOS
          END IF
*
          IF( .NOT.XDATA .AND. MYROW.NE.ICURROW )
     $      CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1, ZERO,
     $                      WORK, 1 )
*
          IRPB  = MOD( NPROW+MYROW-IDEST-1, NPROW )
          IRDB  = NB * MOD( IRPB+1, NPROW )
          IRPB  = NB * IRPB
          NCOMM = NB * (NPROW-1)
*
          KZ = NZ
          II = 1
          JJ = 1
          JN = 1
*
          IF( XDATA ) THEN
            DO 40 J = 1, NN, NB
              NLENG  = NN - J + 1
              JB     = MIN( NLENG, NB ) - KZ
              NXTROW = MOD( ICURROW+1, NPROW )
              NXTCOL = MOD( ICURCOL+1, NPCOL )
              IF( MYCOL.EQ.ICURCOL ) JN = JJ + JB
*
              IF( MYROW.EQ.ICURROW ) THEN
*
*               Receive updated vector from previous row of processes
*
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG, NCOMM), NB, MYCOL, ICURCOL,
     $                            NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MTPROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, X((JJ-1)*INCX+1), INCX )
                END IF
*
*               X(JJ) <== A(II,JJ) \ X(JJ),  (X(JJ) = WORK(JJ))
*               where A(II,JJ) is a upper triangular matrix
*
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CTRSV( 'Upper', TRANS, DIAG, JB, A(II,JJ), LDA,
     $                        X((JJ-1)*INCX+1), INCX )
                  CALL CCOPY( JB, X((JJ-1)*INCX+1), INCX, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Row', 'I-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Row', 'I-ring', 1, JB,
     $                          WORK(IPT), 1, MYROW, ICURCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG-KZ.GT.JB ) THEN
*
*                 Update the (NPROW-1) vector first
*
                  NREST = MIN( NLENG-NB, NCOMM )
                  NPART = NUMROC( NREST, NB, MYCOL, ICURCOL+1, NPCOL )
*
                  CALL CGEMV( TRANS, JB, NPART, -ONE, A(II,JN), LDA,
     $                        WORK(IPT),1, ONE, X((JN-1)*INCX+1), INCX )
*
*                 Send updated vector to next column of processes
*
                  IF( NPROW.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, X((JN-1)*INCX+1),
     $                            INCX, MBTROW, MYCOL )
*
*                 Update the rest of the matrix
*
                  IPART = NUMROC( NLENG-NB-NREST, NB, MYCOL+LCM,
     $                            ICURCOL+NPROW, NPCOL )
                  CALL CGEMV( TRANS, JB, IPART, -ONE,
     $                        A(II,JN+NPART), LDA, WORK(IPT), 1, ONE,
     $                        X((JN+NPART-1)*INCX+1), INCX )
                END IF
*
*               Send the solution vector to the destination (IDEST row)
*
                JNZ = MAX( 0, J-NZ-1 )
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JNZ), NB, ICURCOL, MYCOL+1,
     $                            NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART,
     $                         X((JJ-NPART-1)*INCX+1), INCX,
     $                         MTPROW, MYCOL )
                END IF
*
                IF( NLENG-KZ.GT.JB .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JNZ+JB), NB, ICURCOL, MYCOL,
     $                            NPCOL )
                  CALL CGESD2D( ICONTXT, 1, NPART,
     $                          X((JN-NPART-1)*INCX+1), INCX,
     $                          MBTROW, MYCOL )
                END IF
*
                II = II + JB
              END IF
*
              JJ = JN
              KZ = 0
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   40       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURROW = MOD( NPROW+ICURROW-1, NPROW )
            IF( ICURROW.NE.IDEST ) THEN
              KDIST = MOD( NPROW+ICURROW-IDEST-1, NPROW )
              IF( ICEIL(NN,NB).GT.MOD(IDEST-IAROW+NPROW,NPROW) ) THEN
                NPART = NUMROC( KDIST*NB+JB, NB, MYCOL+KDIST, ICURCOL-1,
     $                          NPCOL )
              ELSE
                NPART = NUMROC( NN, NB, MYCOL, IACOL, NPCOL )
                IF( MYCOL.EQ.IACOL ) NPART = NPART - NZ
              END IF
*
              IF( MYROW.EQ.ICURROW ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART,
     $                        X((JJ-NPART-1)*INCX+1), INCX,
     $                        IDEST, MYCOL )
              ELSE IF( MYROW.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART,
     $                        X((JJ-NPART-1)*INCX+1), INCX,
     $                        ICURROW, MYCOL )
              END IF
            END IF
*
          ELSE
            DO 50 J = 1, NN, NB
              NLENG  = NN - J + 1
              JB     = MIN( NLENG, NB ) - KZ
              NXTROW = MOD( ICURROW+1, NPROW )
              NXTCOL = MOD( ICURCOL+1, NPCOL )
              IF( MYCOL.EQ.ICURCOL ) JN = JJ + JB
*
              IF( MYROW.EQ.ICURROW ) THEN
*
*               Receive updated vector from previous row of processes
*
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG, NCOMM), NB, MYCOL, ICURCOL,
     $                            NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MTPROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE,  WORK(JJ), 1 )
                END IF
*
*               X(JJ) <== A(II,JJ) \ X(JJ),  (X(JJ) = WORK(JJ))
*               where A(II,JJ) is a upper triangular matrix
*
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CTRSV( 'Upper', TRANS, DIAG, JB, A(II,JJ), LDA,
     $                        WORK(JJ), 1 )
                  CALL CCOPY( JB, WORK(JJ), 1, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Row', 'I-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Row', 'I-ring', 1, JB,
     $                          WORK(IPT), 1, MYROW, ICURCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG-KZ.GT.JB ) THEN
*
*                 Update the (NPROW-1) vector first
*
                  NREST = MIN( NLENG-NB, NCOMM )
                  NPART = NUMROC( NREST, NB, MYCOL, ICURCOL+1, NPCOL )
*
                  CALL CGEMV( TRANS, JB, NPART, -ONE, A(II,JN), LDA,
     $                        WORK(IPT), 1, ONE, WORK(JN), 1 )
*
*                 Send updated vector to next column of processes
*
                  IF( NPROW.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, WORK(JN), 1,
     $                            MBTROW, MYCOL )
*
*                 Update the rest of the matrix
*
                  IPART = NUMROC( NLENG-NB-NREST, NB, MYCOL+LCM,
     $                            ICURCOL+NPROW, NPCOL )
                  CALL CGEMV( TRANS, JB, IPART, -ONE, A(II,JN+NPART),
     $                        LDA, WORK(IPT), 1, ONE, WORK(JN+NPART),
     $                        1 )
                END IF
*
*               Send the solution vector to the destination (IDEST row)
*
                JNZ = MAX( 0, J-NZ-1 )
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JNZ), NB, ICURCOL, MYCOL+1,
     $                            NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(JJ-NPART), 1,
     $                         MTPROW, MYCOL )
                END IF
*
                IF( NLENG-KZ.GT.JB .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JNZ+JB), NB, ICURCOL, MYCOL,
     $                            NPCOL )
                  CALL CGESD2D( ICONTXT, 1, NPART, WORK(JN-NPART), 1,
     $                          MBTROW, MYCOL )
                END IF
*
                II = II + JB
              END IF
*
              JJ = JN
              KZ = 0
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   50       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURROW = MOD( NPROW+ICURROW-1, NPROW )
            IF( ICURROW.NE.IDEST ) THEN
              KDIST = MOD( NPROW+ICURROW-IDEST-1, NPROW )
              IF( ICEIL(NN,NB).GT.MOD(IDEST-IAROW+NPROW,NPROW) ) THEN
                NPART = NUMROC( KDIST*NB+JB, NB, MYCOL+KDIST, ICURCOL-1,
     $                          NPCOL )
              ELSE
                NPART = NUMROC( NN, NB, MYCOL, IACOL, NPCOL )
                IF( MYCOL.EQ.IACOL ) NPART = NPART - NZ
              END IF
*
              IF( MYROW.EQ.ICURROW ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, WORK(JJ-NPART), 1,
     $                        IDEST, MYCOL )
              ELSE IF( MYROW.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, WORK(JJ-NPART), 1,
     $                        ICURROW, MYCOL )
              END IF
            END IF
          END IF
*
          IF( COLUMN ) THEN
            CALL PBCTRNV( ICONTXT, 'Row', 'Trans', N, NB, NZ, WORK, 1,
     $                    ZERO, X, INCX, IDEST, IACOL, IAROW, IXPOS,
     $                    WORK(IPT) )
          ELSE
            IF( .NOT.XDATA .AND. MYROW.EQ.IDEST )
     $        CALL CCOPY( NQ, WORK, 1, X, INCX )
          END IF
        END IF
*
*     if ( LSAME( UPLO, 'L' ) then
*
      ELSE
*
        IF( NOTRAN ) THEN
*
*         Form  X := Lo( A ) \ X
*                    _
*           ||      | \_            ||
*           ||      |   \_          ||
*           |X  :=  |  A  \_     \  |X
*           ||      |       \_      ||
*           ||      |_________|     ||
*
          IPT     = NP + 1
          ICURROW = IAROW
          ICURCOL = IACOL
*
          IF( COLUMN ) THEN
            IF( LSAME( XWORK, 'Y' ) ) THEN
              IF( MYCOL.EQ.IXPOS ) THEN
                IF( MYCOL.NE.ICURCOL ) THEN
                  CALL CGESD2D( ICONTXT, 1, NP, X, INCX,
     $                          MYROW, ICURCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1,
     $                            ZERO, X, INCX )
                END IF
              ELSE IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGERV2D( ICONTXT, 1, NP, X, INCX, MYROW, IXPOS )
              ELSE
                CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1, ZERO,
     $                          X, INCX )
              END IF
              XDATA = .TRUE.
              IPT = 1
*
            ELSE
              IF( MYCOL.EQ.IXPOS ) THEN
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CCOPY( NP, X, INCX, WORK, 1 )
                ELSE
                  CALL CGESD2D( ICONTXT, 1, NP, X, INCX,
     $                          MYROW, ICURCOL )
                END IF
              ELSE IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGERV2D( ICONTXT, 1, NP, WORK, 1, MYROW, IXPOS )
              END IF
            END IF
            IDEST = IXPOS
*
          ELSE
            CALL PBCTRNV( ICONTXT, 'Row', 'Trans', N, NB, NZ, X, INCX,
     $                    ZERO, WORK, 1, IXPOS, IACOL, IAROW, ICURCOL,
     $                    WORK(IPT) )
            IDEST = IACOL
          END IF
*
          IF( .NOT.XDATA .AND. MYCOL.NE.ICURCOL )
     $      CALL PBCVECADD( ICONTXT, 'G', NP, ZERO, DUMMY, 1, ZERO,
     $                      WORK, 1 )
*
          IRPB  = MOD( NPCOL+MYCOL-IDEST-1, NPCOL )
          IRDB  = NB * MOD( IRPB+1, NPCOL )
          IRPB  = NB * IRPB
          NCOMM = NB * (NPCOL-1)
*
          KZ = NZ
          II = 1
          IN = 1
          JJ = 1
*
          IF( XDATA ) THEN
            DO 60 J = 1, NN, NB
              NLENG  = NN - J + 1
              JB     = MIN( NLENG, NB ) - KZ
              NXTROW = MOD( ICURROW+1, NPROW )
              NXTCOL = MOD( ICURCOL+1, NPCOL )
              IF( MYROW.EQ.ICURROW ) IN = II + JB
*
              IF( MYCOL.EQ.ICURCOL ) THEN
*
*               Receive updated vector from previous column of processes
*
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG, NCOMM), NB, MYROW, ICURROW,
     $                            NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MYROW, MLFCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, X((II-1)*INCX+1), INCX )
                END IF
*
*               X(II) <== A(II,JJ) \ X(II),  ( X(II) = WORK(II) )
*               where A(II,JJ) is a lower triangular matrix
*
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CTRSV( 'Lower', 'No', DIAG, JB, A(II,JJ), LDA,
     $                        X((II-1)*INCX+1), INCX )
                  CALL CCOPY( JB, X((II-1)*INCX+1), INCX, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Col', 'I-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Col', 'I-ring', 1, JB,
     $                          WORK(IPT), 1, ICURROW, MYCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG-KZ.GT.JB ) THEN
*
*                 Update the (NPCOL-1) vector first
*
                  NREST = MIN( NLENG-NB, NCOMM )
                  NPART = NUMROC( NREST, NB, MYROW, ICURROW+1, NPROW )
*
                  CALL CGEMM( 'No', 'Trans', 1, NPART, JB, -ONE,
     $                        WORK(IPT), 1, A(IN,JJ), LDA, ONE,
     $                        X((IN-1)*INCX+1), INCX )
*                 CALL CGEMV( 'No', NPART, JB, -ONE, A(IN,JJ), LDA,
*    $                        WORK(IPT), 1, ONE, X((IN-1)*INCX+1),INCX )
*
*                 Send updated vector to next column of processes
*
                  IF( NPCOL.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, X((IN-1)*INCX+1),
     $                            INCX, MYROW, MRTCOL )
*
*                 Update the rest of the matrix
*
                  IPART = NUMROC( NLENG-NB-NREST, NB, MYROW+LCM,
     $                            ICURROW+NPCOL, NPROW )
                  CALL CGEMV( 'No', IPART, JB, -ONE,
     $                        A(IN+NPART,JJ), LDA, WORK(IPT), 1, ONE,
     $                        X((IN+NPART-1)*INCX+1), INCX )
                END IF
*
*               Send the solution vector to destination (IDEST column)
*
                JNZ = MAX( 0, J-NZ-1 )
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JNZ), NB, ICURROW, MYROW+1,
     $                            NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART,
     $                          X((II-NPART-1)*INCX+1), INCX,
     $                          MYROW, MLFCOL )
                END IF
*
                IF( NLENG-KZ.GT.JB .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JNZ+JB), NB, ICURROW, MYROW,
     $                            NPROW )
                  CALL CGESD2D( ICONTXT, 1, NPART,
     $                          X((IN-NPART-1)*INCX+1), INCX,
     $                          MYROW, MRTCOL )
                END IF
*
                JJ = JJ + JB
              END IF
*
              II = IN
              KZ = 0
              ICURROW = NXTROW
              ICURCOL = NXTCOL
*
   60       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
            IF( ICURCOL.NE.IDEST ) THEN
              KDIST = MOD( NPCOL+ICURCOL-IDEST-1, NPCOL )
              IF( ICEIL(NN,NB).GT.MOD(IDEST-IACOL+NPCOL,NPCOL) ) THEN
                NPART = NUMROC( KDIST*NB+JB, NB, MYROW+KDIST, ICURROW-1,
     $                          NPROW )
              ELSE
                NPART = NUMROC( NN, NB, MYROW, IAROW, NPROW )
                IF( MYROW.EQ.IAROW ) NPART = NPART - NZ
              END IF
*
              IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART,
     $                        X((II-NPART-1)*INCX+1), INCX,
     $                        MYROW, IDEST )
              ELSE IF( MYCOL.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART,
     $                        X((II-NPART-1)*INCX+1), INCX,
     $                        MYROW, ICURCOL )
              END IF
            END IF
*
          ELSE
            DO 70 J = 1, NN, NB
              NLENG  = NN - J + 1
              JB     = MIN( NLENG, NB ) - KZ
              NXTROW = MOD( ICURROW+1, NPROW )
              NXTCOL = MOD( ICURCOL+1, NPCOL )
              IF( MYROW.EQ.ICURROW ) IN = II + JB
*
              IF( MYCOL.EQ.ICURCOL ) THEN
*
*               Receive updated vector from previous column of processes
*
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG, NCOMM), NB, MYROW, ICURROW,
     $                            NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MYROW, MLFCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, WORK(II), 1 )
                END IF
*
*               X(II) <== A(II,JJ) \ X(II),  ( X(II) = WORK(II) )
*               where A(II,JJ) is a lower triangular matrix
*
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CTRSV( 'Lower', 'No', DIAG, JB, A(II,JJ), LDA,
     $                        WORK(II), 1 )
                  CALL CCOPY( JB, WORK(II), 1, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Col', 'I-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Col', 'I-ring', 1, JB,
     $                          WORK(IPT), 1, ICURROW, MYCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG-KZ.GT.JB ) THEN
*
*                 Update the (NPCOL-1) vector first
*
                  NREST = MIN( NLENG-NB, NCOMM )
                  NPART = NUMROC( NREST, NB, MYROW, ICURROW+1, NPROW )
*
                  CALL CGEMM( 'No', 'No', NPART, 1, JB, -ONE, A(IN,JJ),
     $                        LDA, WORK(IPT), JB, ONE, WORK(IN),
     $                        MAX(1, NPART) )
*                 CALL CGEMV( 'No', NPART, JB, -ONE, A(IN,JJ), LDA,
*    $                        WORK(IPT), 1, ONE, WORK(IN), 1 )
*
*                 Send updated vector to next column of processes
*
                  IF( NPCOL.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, WORK(IN), 1,
     $                            MYROW, MRTCOL )
*
*                 Update the rest of the matrix
*
                  IPART = NUMROC( NLENG-NB-NREST, NB, MYROW+LCM,
     $                            ICURROW+NPCOL, NPROW )
                  CALL CGEMV( 'No', IPART, JB, -ONE, A(IN+NPART,JJ),
     $                        LDA, WORK(IPT), 1, ONE, WORK(IN+NPART),
     $                        1 )
                END IF
*
*               Send the solution vector to destination (IDEST column)
*
                JNZ = MAX( 0, J-NZ-1 )
                IF( J.GT.1 .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JNZ), NB, ICURROW, MYROW+1,
     $                            NPROW )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(II-NPART), 1,
     $                          MYROW, MLFCOL )
                END IF
*
                IF( NLENG-KZ.GT.JB .AND. NPCOL.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JNZ+JB), NB, ICURROW, MYROW,
     $                            NPROW )
                  CALL CGESD2D( ICONTXT, 1, NPART, WORK(IN-NPART), 1,
     $                          MYROW, MRTCOL )
                END IF
*
                JJ = JJ + JB
              END IF
*
              II = IN
              KZ = 0
              ICURROW = NXTROW
              ICURCOL = NXTCOL
*
   70       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
            IF( ICURCOL.NE.IDEST ) THEN
              KDIST = MOD( NPCOL+ICURCOL-IDEST-1, NPCOL )
              IF( ICEIL(NN,NB).GT.MOD(IDEST-IACOL+NPCOL,NPCOL) ) THEN
                NPART = NUMROC( KDIST*NB+JB, NB, MYROW+KDIST, ICURROW-1,
     $                          NPROW )
              ELSE
                NPART = NUMROC( NN, NB, MYROW, IAROW, NPROW )
                IF( MYROW.EQ.IAROW ) NPART = NPART - NZ
              END IF
*
              IF( MYCOL.EQ.ICURCOL ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, WORK(II-NPART), 1,
     $                        MYROW, IDEST )
              ELSE IF( MYCOL.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, WORK(II-NPART), 1,
     $                        MYROW, ICURCOL )
              END IF
            END IF
          END IF
*
          IF( COLUMN ) THEN
            IF( .NOT.XDATA .AND. MYCOL.EQ.IDEST )
     $        CALL CCOPY( NP, WORK, 1, X, INCX )
          ELSE
            CALL PBCTRNV( ICONTXT, 'Col', 'Trans', N, NB, NZ, WORK, 1,
     $                    ZERO, X, INCX, IAROW, IDEST, IXPOS, IACOL,
     $                    WORK(IPT) )
          END IF
*
        ELSE
*
*         Form X := X / Lo( A ).
*                                          _
*                                         | \_
*           __________     __________     |   \_
*           ----X----- :=  ----X-----  \  |  A  \_
*                                         |       \_
*                                         |_________|
*
          IPT     = NQ + 1
          ICURROW = MOD( ICEIL(NN,NB)+IAROW-1, NPROW )
          ICURCOL = MOD( ICEIL(NN,NB)+IACOL-1, NPCOL )
*
          IF( COLUMN ) THEN
            CALL PBCTRNV( ICONTXT, 'Col', 'Trans', N, NB, NZ, X, INCX,
     $                    ZERO, WORK, 1, IAROW, IXPOS, ICURROW, IACOL,
     $                    WORK(IPT) )
            IDEST = ICURROW
*
          ELSE
            IF( LSAME( XWORK, 'Y' ) ) THEN
              IF( MYROW.EQ.IXPOS ) THEN
                IF( MYROW.NE.ICURROW ) THEN
                  CALL CGESD2D( ICONTXT, 1, NQ, X, INCX,
     $                          ICURROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1,
     $                            ZERO, X, INCX )
                END IF
              ELSE IF( MYROW.EQ.ICURROW ) THEN
                CALL CGERV2D( ICONTXT, 1, NQ, X, INCX, IXPOS, MYCOL )
              ELSE
                CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1, ZERO,
     $                          X, INCX )
              END IF
              XDATA = .TRUE.
              IPT = 1
*
            ELSE
              IF( MYROW.EQ.IXPOS ) THEN
                IF( MYROW.EQ.ICURROW ) THEN
                  CALL CCOPY( NQ, X, INCX, WORK, 1 )
                ELSE
                  CALL CGESD2D( ICONTXT, 1, NQ, X, INCX,
     $                          ICURROW, MYCOL )
                END IF
              ELSE IF( MYROW.EQ.ICURROW ) THEN
                CALL CGERV2D( ICONTXT, 1, NQ, WORK, 1, IXPOS, MYCOL )
              END IF
            END IF
            IDEST = IXPOS
          END IF
*
          IF( .NOT.XDATA .AND. MYROW.NE.ICURROW )
     $      CALL PBCVECADD( ICONTXT, 'G', NQ, ZERO, DUMMY, 1, ZERO,
     $                      WORK, 1 )
*
          IRPB  = MOD( NPROW+IDEST-MYROW-1, NPROW )
          IRDB  = NB * MOD( IRPB+1, NPROW )
          IRPB  = NB * IRPB
          NCOMM = NB * (NPROW-1)
          KB    = MOD( NN, NB )
          IF( KB.EQ.0 ) KB = NB
*
          II = NP - NB + 1
          IF( MYROW.EQ.ICURROW ) II = NP - KB + 1
          JJ = NQ - NB + 1
          IF( MYCOL.EQ.ICURCOL ) JJ = NQ - KB + 1
          JN = JJ
          JB = KB
          KZ = 0
*
          IF( XDATA ) THEN
            DO 80 J = 1, NN, NB
              NLENG = N - J - KB + 1
              IF( NLENG.LT.0 ) THEN
                NLENG = 0
                KZ = NZ
                IF( MYCOL.EQ.ICURCOL ) THEN
                  JB = JB - KZ
                  JJ = 1
                END IF
                IF( MYROW.EQ.ICURROW ) II = 1
              END IF
              NXTROW = MOD( NPROW+ICURROW-1, NPROW )
              NXTCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
              IF( MYCOL.EQ.ICURCOL ) JN = JJ - NB
*
              IF( MYROW.EQ.ICURROW ) THEN
*
*               Receive updated vector from previous row of processes
*
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG+JB, NCOMM), NB, ICURCOL,
     $                            MYCOL, NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MBTROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, X((JJ+JB-NPART-1)*INCX+1),
     $                            INCX )
                END IF
*
*               X(JJ) <== A(II,JJ) / X(JJ) ( X(JJ) = WORK(JJ) )
*               where A(II,JJ) is a lower triangular matrix
*
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CTRSV( 'Lower', TRANS, DIAG, JB, A(II,JJ), LDA,
     $                        X((JJ-1)*INCX+1), INCX )
                  CALL CCOPY( JB, X((JJ-1)*INCX+1), INCX, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Row', 'D-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Row', 'D-ring', 1, JB-KZ,
     $                          WORK(IPT), 1, MYROW, ICURCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG.GT.0 ) THEN
*
*                 Update the (NPROW-1) vector first
*
                  NREST = MIN( NLENG, NCOMM )
                  NPART = NUMROC( NREST, NB, ICURCOL, MYCOL+1, NPCOL )
                  JJN   = JN + NB - NPART
                  CALL CGEMV( TRANS, JB, NPART, -ONE, A(II,JJN), LDA,
     $                        WORK(IPT),1, ONE, X((JJN-1)*INCX+1),INCX )
*
*                 Send updated vector to next column of processes
*
                  IF( NPROW.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, X((JJN-1)*INCX+1),
     $                            INCX, MTPROW, MYCOL )
*
*                 Update the rest of the matrix
*
                  CALL CGEMV( TRANS, JB, JJN-1, -ONE, A(II,1), LDA,
     $                        WORK(IPT), 1, ONE, X, INCX )
                END IF
*
*               Send the solution vector to the destination (IDEST row)
*
                JJN = J + KB - 1
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JJN-NB), NB, MYCOL,
     $                            ICURCOL+1, NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, X((JJ+JB-1)*INCX+1),
     $                          INCX, MBTROW, MYCOL )
                END IF
*
                IF( NLENG.GT.0 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JJN), NB, MYCOL, ICURCOL,
     $                            NPCOL )
                  CALL CGESD2D( ICONTXT, 1, NPART, X((JN+NB-1)*INCX+1),
     $                          INCX, MTPROW, MYCOL )
                END IF
*
                II = II - NB
              END IF
*
              JJ = JN
              JB = NB
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   80       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURROW = MOD( ICURROW+1, NPROW )
            IF( ICURROW.NE.IDEST ) THEN
              KDIST = MOD( NPROW+IDEST-ICURROW, NPROW )
              NPART = NUMROC( MIN(NN, KDIST*NB), NB, MYCOL, IACOL,
     $                        NPCOL )
              IF( MYCOL. EQ. IACOL ) NPART = NPART - NZ
*
              IF( MYROW.EQ.ICURROW ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, X, INCX, IDEST, MYCOL )
              ELSE IF( MYROW.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, X, INCX,
     $                        ICURROW, MYCOL )
              END IF
            END IF
*
          ELSE
            DO 90 J = 1, NN, NB
              NLENG = N - J - KB + 1
              IF( NLENG.LT.0 ) THEN
                NLENG = 0
                KZ = NZ
                IF( MYCOL.EQ.ICURCOL ) THEN
                  JB = JB - KZ
                  JJ = 1
                END IF
                IF( MYROW.EQ.ICURROW ) II = 1
              END IF
              NXTROW = MOD( NPROW+ICURROW-1, NPROW )
              NXTCOL = MOD( NPCOL+ICURCOL-1, NPCOL )
              IF( MYCOL.EQ.ICURCOL ) JN = JJ - NB
*
              IF( MYROW.EQ.ICURROW ) THEN
*
*               Receive updated vector from previous row of processes
*
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(NLENG+JB, NCOMM), NB, ICURCOL,
     $                            MYCOL, NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(IPT), 1,
     $                          MBTROW, MYCOL )
                  CALL PBCVECADD( ICONTXT, 'G', NPART, ONE, WORK(IPT),
     $                            1, ONE, WORK(JJ+JB-NPART), 1 )
                END IF
*
*               X(JJ) <== A(II,JJ) / X(JJ) ( X(JJ) = WORK(JJ) )
*               where A(II,JJ) is a lower triangular matrix
*
                IF( MYCOL.EQ.ICURCOL ) THEN
                  CALL CTRSV( 'Lower', TRANS, DIAG, JB, A(II,JJ), LDA,
     $                        WORK(JJ), 1 )
                  CALL CCOPY( JB, WORK(JJ), 1, WORK(IPT), 1 )
                  CALL CGEBS2D( ICONTXT, 'Row', 'D-ring', 1, JB,
     $                          WORK(IPT), 1 )
                ELSE
                  CALL CGEBR2D( ICONTXT, 'Row', 'D-ring', 1, JB-KZ,
     $                          WORK(IPT), 1, MYROW, ICURCOL )
                END IF
*
*               Update the rest of data and prepare for the next step
*
                IF( NLENG.GT.0 ) THEN
*
*                 Update the (NPROW-1) vector first
*
                  NREST = MIN( NLENG, NCOMM )
                  NPART = NUMROC( NREST, NB, ICURCOL, MYCOL+1, NPCOL )
                  JJN   = JN + NB - NPART
                  CALL CGEMV( TRANS, JB, NPART, -ONE, A(II,JJN), LDA,
     $                        WORK(IPT), 1, ONE, WORK(JJN), 1 )
*
*                 Send updated vector to next column of processes
*
                  IF( NPROW.GT.1 )
     $              CALL CGESD2D( ICONTXT, 1, NPART, WORK(JJN), 1,
     $                            MTPROW, MYCOL )
*
*                 Update the rest of the matrix
*
                  CALL CGEMV( TRANS, JB, JJN-1, -ONE, A(II,1), LDA,
     $                        WORK(IPT), 1, ONE, WORK, 1 )
                END IF
*
*               Send the solution vector to the destination (IDEST row)
*
                JJN = J + KB - 1
                IF( J.GT.1 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRPB, JJN-NB), NB, MYCOL,
     $                            ICURCOL+1, NPCOL )
                  CALL CGERV2D( ICONTXT, 1, NPART, WORK(JJ+JB), 1,
     $                          MBTROW, MYCOL )
                END IF
*
                IF( NLENG.GT.0 .AND. NPROW.GT.1 ) THEN
                  NPART = NUMROC( MIN(IRDB, JJN), NB, MYCOL, ICURCOL,
     $                            NPCOL )
                  CALL CGESD2D( ICONTXT, 1, NPART, WORK(JN+NB), 1,
     $                          MTPROW, MYCOL )
                END IF
*
                II = II - NB
              END IF
*
              JJ = JN
              JB = NB
              ICURROW = NXTROW
              ICURCOL = NXTCOL
   90       CONTINUE
*
*           Uncopied solutions are moved to the first column of procs.
*
            ICURROW = MOD( ICURROW+1, NPROW )
            IF( ICURROW.NE.IDEST ) THEN
              KDIST = MOD( NPROW+IDEST-ICURROW, NPROW )
              NPART = NUMROC( MIN(NN, KDIST*NB), NB, MYCOL, IACOL,
     $                        NPCOL )
              IF( MYCOL. EQ. IACOL ) NPART = NPART - NZ
*
              IF( MYROW.EQ.ICURROW ) THEN
                CALL CGESD2D( ICONTXT, 1, NPART, WORK, 1, IDEST, MYCOL )
              ELSE IF( MYROW.EQ.IDEST ) THEN
                CALL CGERV2D( ICONTXT, 1, NPART, WORK, 1,
     $                        ICURROW, MYCOL )
              END IF
            END IF
          END IF
*
          IF( COLUMN ) THEN
            CALL PBCTRNV( ICONTXT, 'Row', 'Trans', N, NB, NZ, WORK, 1,
     $                    ZERO, X, INCX, IDEST, IACOL, IAROW, IXPOS,
     $                    WORK(IPT) )
          ELSE
            IF( .NOT.XDATA .AND. MYROW.EQ.IDEST )
     $        CALL CCOPY( NQ, WORK, 1, X, INCX )
          END IF
        END IF
      END IF
*
      RETURN
*
*     End of PBCTRSV
*
      END
