#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: mlNew.c,v 1.18 2001/01/27 21:32:36 knepley Exp $";
#endif
/*
   Defines a multilevel preconditioner due to Vivek Sarin for AIJ matrices
*/
#include "src/sles/pc/pcimpl.h"    /*I "pc.h" I*/
#include "src/mesh/impls/triangular/triimpl.h"
#include "src/grid/gridimpl.h"
#include <fcntl.h>
#if defined(PETSC_HAVE_UNISTD_H)
#include <unistd.h>
#endif
#if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
  #include <sys/procfs.h>
  #include <sys/hwperftypes.h>
  #include <sys/hwperfmacros.h>
#endif
#include "ml.h"

int PC_MLEvents[PC_ML_MAX_EVENTS];

#undef  __FUNCT__
#define __FUNCT__ "PCMultiLevelInitializePackage"
/*@C
  PCMultiLevelInitializePackage - This function initializes everything in the ML package. It is called
  from PetscDLLibraryRegister() when using dynamic libraries, and on the first call to PCCreate()
  when using static libraries.

  Input Parameter:
  path - The dynamic library path, or PETSC_NULL

  Level: developer

.keywords: PC, multilevel, initialize, package
.seealso: PetscInitialize()
@*/
int PCMultiLevelInitializePackage(char *path) {
  static PetscTruth initialized = PETSC_FALSE;
  int               ierr;

  PetscFunctionBegin;
  if (initialized == PETSC_TRUE) PetscFunctionReturn(0);
  initialized = PETSC_TRUE;
  /* Register Classes */
  /* Register Constructors and Serializers */
  /* Register Events */
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpInit],                        "MLSetUpInit",      PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrained],                 "MLSetUpConstr",    PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpConstrainedBd],               "MLSetUpConstrBd",  PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_SetUpParallel],                    "MLSetUpParallel",  PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionMesh],              "MLRdPartMesh",     PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReducePartitionRowCol],            "MLRdPartRowCol",   PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceFactor],                     "MLRdFactor",       PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGrad],                     "MLRdBdGrad",       PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradExtract],              "MLRdBdGradExtrct", PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceBdGradRowPartLocalToGlobal], "MLRdBdGrdRwPtL2G", PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ReduceShrinkMesh],                 "MLRdShrinkMesh",   PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricLeftParallel],       "MLApplySymmLeftP", PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplySymmetricRightParallel],      "MLApplySymmRghtP", PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_QRFactorization],                  "MLQRFact",         PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_ApplyQR],                          "MLQRApply",        PC_COOKIE);CHKERRQ(ierr);
  ierr = PetscLogEventRegister(&PC_MLEvents[PC_ML_CreateBdGrad],                     "MLCreateBdGrad",   PC_COOKIE);CHKERRQ(ierr);
  /* Process info exclusions */
  /* Process summary exclusions */
  /* Add options checkers */
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PCConvert_Multilevel"
int PCConvert_Multilevel(PC pc)
{
  PC_Multilevel *ml = (PC_Multilevel *) pc->data;
#ifdef PETSC_HAVE_MATHEMATICA
  int            ierr;
#endif

  PetscFunctionBegin;
  /* Convert a Mathematica MLData object into a PC_Multilevel object */
  if (ml->mathViewer != PETSC_NULL) {
#ifdef PETSC_HAVE_MATHEMATICA
    ierr = PetscViewerMathematicaMultiLevelConvert(ml->mathViewer, pc);                                   CHKERRQ(ierr);
#else
    SETERRQ(PETSC_ERR_SUP, " ");
#endif
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCDestroyStructures_Multilevel"
static int PCDestroyStructures_Multilevel(PC pc)
{
  PC_Multilevel *ml = (PC_Multilevel *) pc->data;
  int            numProcs, numRows, numCols;
  int            level, part;
  int            ierr;

  PetscFunctionBegin;
  /* Destroy gradient structures */
  ierr = MPI_Comm_size(pc->comm, &numProcs);                                                              CHKERRQ(ierr);
  ierr = VarOrderingDestroy(ml->sOrder);                                                                  CHKERRQ(ierr);
  ierr = VarOrderingDestroy(ml->tOrder);                                                                  CHKERRQ(ierr);
  ierr = LocalVarOrderingDestroy(ml->sLocOrder);                                                          CHKERRQ(ierr);
  ierr = LocalVarOrderingDestroy(ml->tLocOrder);                                                          CHKERRQ(ierr);
  ierr = GMatDestroy(ml->B);                                                                              CHKERRQ(ierr);
  if (ml->reduceSol != PETSC_NULL)
    {ierr = GVecDestroy(ml->reduceSol);                                                                   CHKERRQ(ierr);}
  if (ml->diag      != PETSC_NULL)
    {ierr = GVecDestroy(ml->diag);                                                                        CHKERRQ(ierr);}

  /* Destroy mesh structures */
  if (ml->useMath == PETSC_FALSE) {
    ierr = PetscFree(ml->numElements);                                                                    CHKERRQ(ierr);
    ierr = PetscFree(ml->numVertices);                                                                    CHKERRQ(ierr);
    for(level = 0; level <= ml->numLevels; level++) {
      ierr = PetscFree(ml->meshes[level][MESH_OFFSETS]);                                                  CHKERRQ(ierr);
      ierr = PetscFree(ml->meshes[level][MESH_ADJ]);                                                      CHKERRQ(ierr);
      ierr = PetscFree(ml->meshes[level][MESH_ELEM]);                                                     CHKERRQ(ierr);
      ierr = PetscFree(ml->meshes[level]);                                                                CHKERRQ(ierr);
    }
    ierr = PetscFree(ml->vertices);                                                                       CHKERRQ(ierr);
    ierr = PetscFree(ml->meshes);                                                                         CHKERRQ(ierr);
  }

  /* Destroy factorization structures */
  if (ml->useMath == PETSC_FALSE)
  {
    for(level = 0; level < ml->numLevels; level++)
    {
      if (ml->numPartitions[level] == 0)
        continue;
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        numRows = ml->numPartitionRows[level][part];
        numCols = ml->numPartitionCols[level][part];
        if (numRows > 0)
        {
          ierr = PetscFree(ml->factors[level][part][FACT_U]);                                             CHKERRQ(ierr);
          if (PCMultiLevelDoQR_Private(pc, numRows, numCols) == PETSC_TRUE)
          {
            ierr = PetscFree(ml->factors[level][part][FACT_QR]);                                          CHKERRQ(ierr);
            ierr = PetscFree(ml->factors[level][part][FACT_TAU]);                                         CHKERRQ(ierr);
          }
        }
        if (numCols > 0)
        {
          ierr = PetscFree(ml->factors[level][part][FACT_DINV]);                                          CHKERRQ(ierr);
          ierr = PetscFree(ml->factors[level][part][FACT_V]);                                             CHKERRQ(ierr);
        }
        ierr = PetscFree(ml->factors[level][part]);                                                       CHKERRQ(ierr);
      } 
      ierr = PetscFree(ml->factors[level]);                                                               CHKERRQ(ierr);
      ierr = MatDestroy(ml->grads[level]);                                                                CHKERRQ(ierr);
      ierr = VecDestroy(ml->bdReduceVecs[level]);                                                         CHKERRQ(ierr);
      ierr = VecDestroy(ml->colReduceVecs[level]);                                                        CHKERRQ(ierr);
      ierr = VecDestroy(ml->colReduceVecs2[level]);                                                       CHKERRQ(ierr);
    }
    ierr = PetscFree(ml->factors);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(ml->grads);                                                                          CHKERRQ(ierr);
    ierr = PetscFree(ml->bdReduceVecs);                                                                   CHKERRQ(ierr);
    ierr = PetscFree(ml->colReduceVecs);                                                                  CHKERRQ(ierr);
    ierr = PetscFree(ml->colReduceVecs2);                                                                 CHKERRQ(ierr);
    ierr = PetscFree(ml->interiorWork);                                                                   CHKERRQ(ierr);
    ierr = PetscFree(ml->interiorWork2);                                                                  CHKERRQ(ierr);
    if (ml->svdWork != PETSC_NULL)
      {ierr = PetscFree(ml->svdWork);                                                                     CHKERRQ(ierr);}
  }

  /* Destroy numbering structures */
  if (ml->useMath == PETSC_FALSE)
  {
    for(level = 0; level < ml->numLevels; level++)
    {
      if (ml->numPartitions[level] == 0)
        continue;
      for(part = 0; part < ml->numPartitions[level]; part++)
      {
        if (ml->numPartitionCols[level][part] > 0)
          {ierr = PetscFree(ml->colPartition[level][part]);                                               CHKERRQ(ierr);}
        if (ml->numPartitionRows[level][part] > 0)
          {ierr = PetscFree(ml->rowPartition[level][PART_ROW_INT][part]);                                 CHKERRQ(ierr);}
      } 
      if (ml->numPartitionRows[level][ml->numPartitions[level]] > 0)
        {ierr = PetscFree(ml->rowPartition[level][PART_ROW_BD][0]);                                       CHKERRQ(ierr);}
      if (ml->numPartitionRows[level][ml->numPartitions[level]+1] > 0)
        {ierr = PetscFree(ml->rowPartition[level][PART_ROW_RES][0]);                                      CHKERRQ(ierr);}
      ierr = PetscFree(ml->rowPartition[level][PART_ROW_INT]);                                            CHKERRQ(ierr);
      ierr = PetscFree(ml->rowPartition[level][PART_ROW_BD]);                                             CHKERRQ(ierr);
      ierr = PetscFree(ml->rowPartition[level][PART_ROW_RES]);                                            CHKERRQ(ierr);
      ierr = PetscFree(ml->numPartitionCols[level]);                                                      CHKERRQ(ierr);
      ierr = PetscFree(ml->colPartition[level]);                                                          CHKERRQ(ierr);
      ierr = PetscFree(ml->numPartitionRows[level]);                                                      CHKERRQ(ierr);
      ierr = PetscFree(ml->rowPartition[level]);                                                          CHKERRQ(ierr);
    }
    ierr = PetscFree(ml->numPartitions);                                                                  CHKERRQ(ierr);
    ierr = PetscFree(ml->numPartitionCols);                                                               CHKERRQ(ierr);
    ierr = PetscFree(ml->numPartitionRows);                                                               CHKERRQ(ierr);
    ierr = PetscFree(ml->colPartition);                                                                   CHKERRQ(ierr);
    ierr = PetscFree(ml->rowPartition);                                                                   CHKERRQ(ierr);
  }
  if (ml->range    != PETSC_NULL) {
    ierr = PetscFree(ml->range);                                                                          CHKERRQ(ierr);
  }
  ierr = VecScatterDestroy(ml->rangeScatter);                                                             CHKERRQ(ierr);
  if (ml->nullCols != PETSC_NULL) {
    ierr = PetscFree(ml->nullCols);                                                                       CHKERRQ(ierr);
  }

  /* Destroy parallel structures */
  if (numProcs > 1)
  {
    ierr = MatDestroy(ml->locB);                                                                          CHKERRQ(ierr);
    ierr = MatDestroy(ml->interfaceB);                                                                    CHKERRQ(ierr);
    ierr = VecDestroy(ml->interiorRhs);                                                                   CHKERRQ(ierr);
    ierr = VecScatterDestroy(ml->interiorScatter);                                                        CHKERRQ(ierr);
    ierr = VecDestroy(ml->interfaceRhs);                                                                  CHKERRQ(ierr);
    ierr = VecScatterDestroy(ml->interfaceScatter);                                                       CHKERRQ(ierr);
#ifndef PETSC_HAVE_PLAPACK
    ierr = VecDestroy(ml->locInterfaceRhs);                                                               CHKERRQ(ierr);
    ierr = VecScatterDestroy(ml->locInterfaceScatter);                                                    CHKERRQ(ierr);
#endif
    ierr = VecDestroy(ml->interfaceColRhs);                                                               CHKERRQ(ierr);
    ierr = VecScatterDestroy(ml->interfaceColScatter);                                                    CHKERRQ(ierr);
    ierr = VecDestroy(ml->colWorkVec);                                                                    CHKERRQ(ierr);
    ierr = PetscFree(ml->interfaceRows);                                                                  CHKERRQ(ierr);
    ierr = PetscFree(ml->firstInterfaceRow);                                                              CHKERRQ(ierr);
    ierr = PetscFree(ml->firstNullCol);                                                                   CHKERRQ(ierr);
#ifdef PETSC_HAVE_PLAPACK
    ierr = PLA_Obj_free(&ml->interfaceQR);                                                                CHKERRQ(ierr);
    ierr = PLA_Obj_free(&ml->interfaceTAU);                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_free(&ml->PLArhsD);                                                                    CHKERRQ(ierr);
    ierr = PLA_Obj_free(&ml->PLArhsP);                                                                    CHKERRQ(ierr);
    ierr = PLA_Temp_free(&ml->PLAlayout);                                                                 CHKERRQ(ierr);
#else
    ierr = PetscFree(ml->interfaceQR);                                                                    CHKERRQ(ierr);
    ierr = PetscFree(ml->interfaceTAU);                                                                   CHKERRQ(ierr);
    ierr = PetscFree(ml->work);                                                                           CHKERRQ(ierr);
#endif
  }
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCSetUp_Multilevel"
static int PCSetUp_Multilevel(PC pc)
{
  PC_Multilevel   *ml = (PC_Multilevel *) pc->data;
  GMat             A  = pc->mat;
  Grid             grid;
  Mesh             mesh;
  Mesh_Triangular *tri;
  Partition        p;
  /* Mesh support */
  char            *typeName;
  int              numVertices, numNodes, numEdges, numElements;
  int             *bcNodes;
  /* Parallel support */
  PetscTruth       isInterfaceRow;
  int             *diagInterfaceRows;
  int             *offdiagInterfaceRows;
  int             *interfaceRows;
  int             *interfaceCols;
  int             *interiorRows;
  int              numNewRange;
  int             *newRange;
  int             *rangeRows;
  int             *nullCols;
  int              rowLen;
  int             *rowLens;
  int             *cols;
  PetscScalar     *vals;
  Vec              tempV, tempB;
  PetscScalar     *arrayB;
  IS               localIS, globalIS;
  int             *recvCounts;
  int              numProcs, rank, proc;
  PetscScalar      one  = 1.0;
  int              elem, newElem, corner, row, row2, col, nullCol, rangeCol, bc, count, lossCount;
#ifdef PETSC_HAVE_PLAPACK
  double          *locB_I;
#else
  int              minSize;
#endif
#if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
  int              pid, fd;
  char             procfile[64];
  int              event, event0, event1;
  int              gen_start, gen_read;
  long long        count0, globalCount0;
  long long        count1, globalCount1;
  hwperf_profevctrarg_t evctr_args;
  hwperf_cntr_t         counts;
#endif
  PetscTruth       opt, match;
  int              ierr;

  PetscFunctionBegin;
  /* We need this so that setup is not called recursively by PCMultiLevelApplyXXX */
  if (pc->setupcalled > 0) {
    PetscFunctionReturn(0);
  }
  pc->setupcalled = 2;

  /* Destroy old structures -- This should not happen anymore */
  if (ml->numLevels >= 0) {
    SETERRQ(PETSC_ERR_ARG_WRONG, "Should not reform an existing decomposition");
    /* ierr = PCDestroyStructures_Multilevel(pc);                                                         CHKERRQ(ierr); */
  }

  ierr = PC_MLLogEventBegin(PC_ML_SetUpInit, pc, 0, 0, 0);                                                CHKERRQ(ierr);
  /* Get the grid */
  ierr = GMatGetGrid(A, &grid);                                                                           CHKERRQ(ierr);
  ml->grid = grid;

  /* Get mesh information */
  ierr = GridGetMesh(grid, &mesh);                                                                        CHKERRQ(ierr);
  ierr = MeshGetType(mesh, &typeName);                                                                    CHKERRQ(ierr);
  ierr = PetscTypeCompare((PetscObject) mesh, MESH_TRIANGULAR_2D, &match);                                CHKERRQ(ierr);
  if (match == PETSC_FALSE) SETERRQ(PETSC_ERR_SUP, "Multilevel preconditioner only supports 2D triangular meshes");
  tri  = (Mesh_Triangular *) mesh->data;
  /* Set target partition size */
  ml->partSize  = 10;
  ierr = PetscOptionsGetInt(pc->prefix, "-pc_ml_partition_size", &ml->partSize, &opt);                    CHKERRQ(ierr);

  /* Setup parallel information */
  p        = mesh->part;
  numProcs = p->numProcs;
  rank     = p->rank;

  /* Enable Mathematica support */
  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_math", &opt);                                            CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ml->useMath = PETSC_TRUE;
    ierr = PetscViewerMathematicaOpen(pc->comm, PETSC_DECIDE, PETSC_NULL, PETSC_NULL, &ml->mathViewer);   CHKERRQ(ierr);
  }

  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_mesh_view_draw", &opt);                                  CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = PetscViewerDrawOpen(pc->comm, 0, 0, 0, 0, 300, 300, &ml->factorizationViewer);                 CHKERRQ(ierr);
  }

  /* Setup allocation */
  ierr = PetscOptionsGetInt(pc->prefix, "-pc_ml_max_levels", &ml->maxLevels, &opt);                       CHKERRQ(ierr);
  if (ml->maxLevels < 0)
    ml->maxLevels = 25;

  /* Set the threshold for a final QR factorization */
  ierr = PetscOptionsGetInt(pc->prefix, "-pc_ml_qr_thresh", &ml->QRthresh, &opt);                         CHKERRQ(ierr);
  ierr = PetscOptionsGetReal(pc->prefix, "-pc_ml_qr_factor", &ml->DoQRFactor, &opt);                      CHKERRQ(ierr);

  /* Allocate mesh storage */
  ml->numMeshes = 1;
  ierr = MeshGetInfo(mesh, &numVertices, &numNodes, &numEdges, &numElements);                             CHKERRQ(ierr);
  ierr = PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numElements);                               CHKERRQ(ierr);
  ierr = PetscMalloc((ml->maxLevels+1) * sizeof(int),    &ml->numVertices);                               CHKERRQ(ierr);
  ierr = PetscMalloc((ml->maxLevels+1) * sizeof(int **), &ml->meshes);                                    CHKERRQ(ierr);
  ierr = PetscMalloc(NUM_MESH_DIV      * sizeof(int *),  &ml->meshes[0]);                                 CHKERRQ(ierr);
  ierr = PetscMalloc((numElements*3)   * sizeof(int),    &ml->meshes[0][MESH_ELEM]);                      CHKERRQ(ierr);
  ierr = PetscMalloc(grid->numPointBC  * sizeof(int),    &bcNodes);                                       CHKERRQ(ierr);
  PetscLogObjectMemory(pc, (ml->maxLevels+1)*2 * sizeof(int) + (ml->maxLevels+1) * sizeof(int **) + NUM_MESH_DIV * sizeof(int *) +
                       (numElements*3) * sizeof(int) + grid->numPointBC * sizeof(int));

  /* Create the local graph */
  for(bc = 0; bc < grid->numPointBC; bc++) {
    bcNodes[bc] = grid->pointBC[bc].node;
  }
  ierr = MeshCreateLocalCSR(mesh, &ml->numVertices[0], &ml->numEdges, &ml->meshes[0][MESH_OFFSETS],
                            &ml->meshes[0][MESH_ADJ], &ml->vertices, grid->numPointBC, bcNodes, PETSC_FALSE);
                                                                                                          CHKERRQ(ierr);

  /* Create the element descriptions */
  ml->numElements[0] = 0;
  for(elem = 0; elem < numElements; elem++) {
    /* Remove all elements containing a constrained node */
    for(bc = 0; bc < grid->numPointBC; bc++)
      if ((tri->faces[elem*mesh->numCorners+0] == bcNodes[bc]) ||
          (tri->faces[elem*mesh->numCorners+1] == bcNodes[bc]) ||
          (tri->faces[elem*mesh->numCorners+2] == bcNodes[bc]))
        break;
    if (bc < grid->numPointBC)
      continue;

    /* Remove elements with off-processor nodes */
    if ((tri->faces[elem*mesh->numCorners+0] >= numNodes) ||
        (tri->faces[elem*mesh->numCorners+1] >= numNodes) ||
        (tri->faces[elem*mesh->numCorners+2] >= numNodes))
      continue;

    /* Add the element */
    for(corner = 0; corner < 3; corner++) {
      /* Decrement node numbers to account for constrained nodes */
      newElem = tri->faces[elem*mesh->numCorners+corner];
      for(bc = 0; bc < grid->numPointBC; bc++)
        if ((bcNodes[bc] >= 0) && (tri->faces[elem*mesh->numCorners+corner] > bcNodes[bc]))
          newElem--;
      ml->meshes[0][MESH_ELEM][ml->numElements[0]*3+corner] = newElem+1;
    }
    ml->numElements[0]++;
  }

  if (ml->factorizationViewer != PETSC_NULL) {
    PetscTruth isPeriodic;
    PetscDraw  draw;

    ierr = MeshView(mesh, ml->factorizationViewer);                                                       CHKERRQ(ierr);
    ierr = PetscViewerFlush(ml->factorizationViewer);                                                     CHKERRQ(ierr);
    ierr = PetscViewerDrawGetDraw(ml->factorizationViewer, 0, &draw);                                     CHKERRQ(ierr);
    ierr = MeshIsPeriodic(mesh, &isPeriodic);                                                             CHKERRQ(ierr);
    if (isPeriodic == PETSC_FALSE) {
      /* Does not work in periodic case */
      ierr = PCMultiLevelView_Private(pc, ml->factorizationViewer, 0, ml->numVertices[0], ml->numElements[0], ml->meshes[0]);
                                                                                                          CHKERRQ(ierr);
    }
  }

  /* Cleanup */
  ierr = PetscFree(bcNodes);                                                                              CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
  ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                            CHKERRQ(ierr);
#endif

  if ((ml->sField < 0) || (ml->tField < 0)) {
    SETERRQ(PETSC_ERR_ARG_WRONGSTATE, "Shape and test function fields must be setup.");
  }
  /* Create shape function variable orderings */
  ierr = VarOrderingCreateGeneral(grid, 1, &ml->sField, &ml->sOrder);                                     CHKERRQ(ierr);
  ierr = LocalVarOrderingCreate(grid, 1, &ml->sField, &ml->sLocOrder);                                    CHKERRQ(ierr);
  /* Create test function variable orderings */
  ierr = VarOrderingCreateGeneral(grid, 1, &ml->tField, &ml->tOrder);                                     CHKERRQ(ierr);
  ierr = LocalVarOrderingCreate(grid, 1, &ml->tField, &ml->tLocOrder);                                    CHKERRQ(ierr);
  /* Check orderings */
  if (ml->numVertices[0] != ml->sOrder->numLocVars) {
    SETERRQ2(PETSC_ERR_ARG_CORRUPT, "Invalid local graph: %d vertices != %d vars", ml->numVertices[0], ml->sOrder->numLocVars);
  }
  /* Allocate the operator */
  ierr = GMatCreateRectangular(grid, ml->sOrder, ml->tOrder, &ml->B);                                     CHKERRQ(ierr);
  /* Create the multiplier solution vector */
  ierr = GVecCreateRectangular(ml->grid, ml->tOrder, &ml->reduceSol);                                     CHKERRQ(ierr);
  /* Construct the operator */
  ierr = MatSetOption(ml->B, MAT_NEW_NONZERO_ALLOCATION_ERR);                                             CHKERRQ(ierr);
  ierr = GMatEvaluateOperatorGalerkin(ml->B, PETSC_NULL, 1, &ml->sField, &ml->tField, ml->gradOp,
                                      ml->gradAlpha, MAT_FINAL_ASSEMBLY, ml);
                                                                                                          CHKERRQ(ierr);

  /* Precondition the initial system with its diagonal */
  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_jacobi", &opt);                                          CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = GVecCreateConstrained(ml->grid, &ml->diag);                                                    CHKERRQ(ierr);
    ierr = GMatGetDiagonalConstrained(pc->mat, ml->diag);                                                 CHKERRQ(ierr);
    ierr = VecReciprocal(ml->diag);                                                                       CHKERRQ(ierr);
    ierr = VecSqrt(ml->diag);                                                                             CHKERRQ(ierr);
    ierr = MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);                                                 CHKERRQ(ierr);
  }

  /* Parallel Support */
  if (numProcs > 1) {
    ierr = PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);                                          CHKERRQ(ierr);
    /* Separate interior and interface rows */
    ierr = PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &interiorRows);                              CHKERRQ(ierr);
    ierr = PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &ml->interfaceRows);                         CHKERRQ(ierr);
    ierr = PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &diagInterfaceRows);                         CHKERRQ(ierr);
    ierr = PetscMalloc(ml->tOrder->numLocVars * sizeof(int), &offdiagInterfaceRows);                      CHKERRQ(ierr);
    ierr = PetscMemzero(diagInterfaceRows,    ml->tOrder->numLocVars * sizeof(int));                      CHKERRQ(ierr);
    ierr = PetscMemzero(offdiagInterfaceRows, ml->tOrder->numLocVars * sizeof(int));                      CHKERRQ(ierr);
    PetscLogObjectMemory(pc, ml->tOrder->numLocVars * sizeof(int));
    ml->numInteriorRows     = 0;
    ml->numLocInterfaceRows = 0;
    for(row = ml->tOrder->firstVar[rank]; row < ml->tOrder->firstVar[rank+1]; row++) {
      ierr = MatGetRow(ml->B, row, &rowLen, &cols, PETSC_NULL);                                           CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
      if (rowLen <= 0) {
        PetscPrintf(PETSC_COMM_SELF, "row %d is null\n", row);
        SETERRQ(PETSC_ERR_ARG_CORRUPT, "Null row in gradient");
      }
#endif
      /* Find rows on domain boundaries */
      for(col = 0, isInterfaceRow = PETSC_FALSE; col < rowLen; col++) {
        if ((cols[col] < ml->sOrder->firstVar[rank]) || (cols[col] >= ml->sOrder->firstVar[rank+1])) {
          isInterfaceRow = PETSC_TRUE;
          offdiagInterfaceRows[ml->numLocInterfaceRows]++;
        }
      }
      if (isInterfaceRow == PETSC_FALSE) {
        interiorRows[ml->numInteriorRows++] = row;
      } else {
        ml->interfaceRows[ml->numLocInterfaceRows]   = row;
        diagInterfaceRows[ml->numLocInterfaceRows++] = rowLen - offdiagInterfaceRows[ml->numLocInterfaceRows];
      }

      ierr = MatRestoreRow(ml->B, row, &rowLen, &cols, PETSC_NULL);                                       CHKERRQ(ierr);
    }
    if (ml->numInteriorRows + ml->numLocInterfaceRows != ml->tOrder->numLocVars) {
      SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
    }

    /* Place interior rows after interface rows */
    ml->interiorRows = ml->interfaceRows + ml->numLocInterfaceRows;
    ierr = PetscMemcpy(ml->interiorRows, interiorRows, ml->numInteriorRows * sizeof(int));                CHKERRQ(ierr);
    ierr = PetscFree(interiorRows);                                                                       CHKERRQ(ierr);

    /* Calculate global size of the interface matrix */
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &ml->firstInterfaceRow);                               CHKERRQ(ierr);
    PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
    ierr = MPI_Allgather(&ml->numLocInterfaceRows, 1, MPI_INT, &ml->firstInterfaceRow[1], 1, MPI_INT, pc->comm);
                                                                                                          CHKERRQ(ierr);
    ml->firstInterfaceRow[0] = 0;
    for(proc = 1; proc < numProcs+1; proc++)
      ml->firstInterfaceRow[proc] += ml->firstInterfaceRow[proc-1];
    ml->numInterfaceRows = ml->firstInterfaceRow[numProcs];

    /* Setup allocation */
    ierr = PetscMalloc((ml->numInteriorRows+1) * sizeof(int), &rowLens);                                  CHKERRQ(ierr);
    for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++) {
      if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row)) {
        lossCount++;
        continue;
      } else {
        ierr = MatGetRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);                                    CHKERRQ(ierr);
        rowLens[count] = rowLen;
        ierr = MatRestoreRow(ml->B, row, &rowLen, PETSC_NULL, PETSC_NULL);                                CHKERRQ(ierr);
        count++;
      }
    }
    if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
    if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");

    /* Create the gradient local to this processor */
    ierr = MatCreateSeqAIJ(PETSC_COMM_SELF, ml->numInteriorRows, ml->sOrder->numLocVars, 0, rowLens, &ml->locB);
                                                                                                          CHKERRQ(ierr);
    ierr = PetscFree(rowLens);                                                                            CHKERRQ(ierr);
    for(row = ml->tOrder->firstVar[rank], count = 0, lossCount = 0; row < ml->tOrder->firstVar[rank+1]; row++)
    {
      if ((lossCount < ml->numLocInterfaceRows) && (ml->interfaceRows[lossCount] == row))
      {
        lossCount++;
        continue;
      }
      else
      {
        ierr = MatGetRow(ml->B, row, &rowLen, &cols, &vals);                                              CHKERRQ(ierr);
        /* This is definitely cheating */
        for(col = 0; col < rowLen; col++)
          cols[col] -= ml->sOrder->firstVar[rank];
        ierr = MatSetValues(ml->locB, 1, &count, rowLen, cols, vals, INSERT_VALUES);                      CHKERRQ(ierr);
        ierr = MatRestoreRow(ml->B, row, &rowLen, &cols, &vals);                                          CHKERRQ(ierr);
        count++;
      }
    }
    ierr = MatAssemblyBegin(ml->locB, MAT_FINAL_ASSEMBLY);                                                CHKERRQ(ierr);
    ierr = MatAssemblyEnd(ml->locB, MAT_FINAL_ASSEMBLY);                                                  CHKERRQ(ierr);
    if (count != ml->numInteriorRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
    if (lossCount != ml->numLocInterfaceRows) SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid decomposition in this domain");
    ierr = PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);                                            CHKERRQ(ierr);
  } else {
    ml->locB = ml->B;
  }
  ierr = MatGetLocalSize(ml->locB, &ml->numRows, &ml->numCols);                                           CHKERRQ(ierr);
#ifdef PETSC_USE_BOPT_g
  ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                           CHKERRQ(ierr);
#endif

#if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
  /* Events for the SGI R10000 hardware counters, Add 16 to each event for Counter 1.

     Event Counter 0                                            Counter 1
     0     Cycles                                               Cycles
     1     Instructions Issued                                  Instructions Graduated
     2     Load/prefetch/sync Issued                            Load/prefetch/sync Graduated
     3     Stores Issued                                        Stores Graduated
     4     Store conditional Issued                             Store conditional Graduated
     5     Failed store conditional                             Floating point instructions Graduated
     6     Branches decoded                                     Write back from data cache to secondary cache
     7     Write back from secondary cache to system interface  TLB refill exceptions
     8     Single-bit ECC errors on secondary cache data        Branches mispredicted
     9     Instruction cache misses                             Data cache misses
     10    Secondary instruction cache misses                   Secondary data cache misses
     11    Secondary instruction cache way mispredicted         Secondary data cache way mispredicted
     12    External intervention requests                       External intervention hits
     13    External invalidation requests                       External invalidation hits
     14    Virtual coherency                                    Upgrade requests on clean secondary cache lines
     15    Instructions graduated                               Upgrade requests on shared secondary cache lines
   */
  /* Setup events */
  event0 = 0;
  event1 = 21;
  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_flops_monitor", &opt);                                   CHKERRQ(ierr);
  if (opt == PETSC_TRUE)
    event1 = 21;
  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_cache_monitor", &opt);                                   CHKERRQ(ierr);
  if (opt == PETSC_TRUE)
    event1 = 25;
  ierr = PetscOptionsHasName(pc->prefix, "-pc_ml_sec_cache_monitor", &opt);                               CHKERRQ(ierr);
  if (opt == PETSC_TRUE)
    event1 = 26;

  /* Open the proc file for iotctl() */
  pid = getpid();
  sprintf(procfile, "/proc/%010d", pid);
  if ((fd  = open(procfile, O_RDWR)) < 0) SETERRQ(PETSC_ERR_FILE_OPEN, "Could not open proc file");
  /* Set the event for counter 0 */
  for (event = 0; event < HWPERF_EVENTMAX; event++) {
    if (event == event0) {
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event;
      evctr_args.hwp_ovflw_freq[event]                                = 0;
    } else {
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
      evctr_args.hwp_ovflw_freq[event]                       = 0;
    }
  }
  /* Set the event for counter 1 */
  for (event = HWPERF_CNT1BASE; event < HWPERF_EVENTMAX; event++) {
    if (event == event1) {
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_mode = HWPERF_CNTEN_U;
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ie   = 1;
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_creg.hwp_ev   = event - HWPERF_CNT1BASE;
      evctr_args.hwp_ovflw_freq[event]                                = 0;
    } else {
      evctr_args.hwp_evctrargs.hwp_evctrl[event].hwperf_spec = 0;
      evctr_args.hwp_ovflw_freq[event]                       = 0;
    }
  }
  evctr_args.hwp_ovflw_sig = 0;
  /* Start SGI hardware counters */
  if ((gen_start = ioctl(fd, PIOCENEVCTRS, (void *) &evctr_args)) < 0) {
    SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
  }
#endif
  ierr = PC_MLLogEventEnd(PC_ML_SetUpInit, pc, 0, 0, 0);                                                  CHKERRQ(ierr);

  /* Factorize the gradient matrix */
  ierr = PCMultiLevelReduce(pc);                                                                          CHKERRQ(ierr);

  /* Parallel support */
  if (numProcs > 1) {
    ierr = PC_MLLogEventBegin(PC_ML_SetUpParallel, pc, 0, 0, 0);                                          CHKERRQ(ierr);
    /* Get size information */
    ierr = PetscMalloc(numProcs     * sizeof(int), &nullCols);                                            CHKERRQ(ierr);
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &ml->firstNullCol);                                    CHKERRQ(ierr);
    PetscLogObjectMemory(pc, (numProcs+1) * sizeof(int));
    MPI_Allreduce(&ml->numLocNullCols, &ml->numNullCols, 1, MPI_INT, MPI_SUM, pc->comm);
    MPI_Allgather(&ml->numLocNullCols, 1, MPI_INT, nullCols, 1, MPI_INT, pc->comm);
    for(proc = 1, ml->firstNullCol[0] = 0; proc <= numProcs; proc++) {
      ml->firstNullCol[proc] = nullCols[proc-1] + ml->firstNullCol[proc-1];
    }
    PetscLogInfo(pc, "Interface rows: %d Total rows: %d NullCols: %d\n", ml->numInterfaceRows, ml->tOrder->numVars, ml->numNullCols);

    /* Get all interface rows */
    ierr = PetscMalloc(ml->numInterfaceRows * sizeof(int), &interfaceRows);                               CHKERRQ(ierr);
    ierr = PetscMalloc(numProcs             * sizeof(int), &recvCounts);                                  CHKERRQ(ierr);
    for(proc = 0; proc < numProcs; proc++)
      recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
    MPI_Allgatherv(ml->interfaceRows, ml->numLocInterfaceRows,           MPI_INT,
                   interfaceRows,     recvCounts, ml->firstInterfaceRow, MPI_INT, pc->comm);
    ierr = PetscFree(recvCounts);                                                                         CHKERRQ(ierr);

    /* Get all the interface columns */
    ierr = PetscMalloc(ml->numNullCols    * sizeof(int), &interfaceCols);                                 CHKERRQ(ierr);
    if (ml->numLocNullCols > 0) {
      ierr = PetscMalloc(ml->numLocNullCols * sizeof(int), &cols);                                        CHKERRQ(ierr);
    }
    for(col = 0; col < ml->numLocNullCols; col++) {
      cols[col] = ml->nullCols[col] + ml->sOrder->firstVar[rank];
    }
    MPI_Allgatherv(cols,          ml->numLocNullCols,     MPI_INT,
                   interfaceCols, nullCols, ml->firstNullCol, MPI_INT, pc->comm);
    if (ml->numLocNullCols > 0) {
      ierr = PetscFree(cols);                                                                             CHKERRQ(ierr);
    }

    /* Get the rows for the range split onto processors like a column vector
         Here we are looping over all local columns (ml->sOrder->numLocVars)
       using the local column number (col). The number of null columns discovered
       (nullCol) should equals the number found in the serial factorization
       (ml->numLocNullCols), as the number of range columns (rangeCol) should
       agree with the local rank (ml->rank).
     */
    ierr = PetscMalloc(ml->sOrder->numLocVars * sizeof(int), &rangeRows);                                 CHKERRQ(ierr);
    for(col = 0, nullCol = 0, rangeCol = 0; col < ml->sOrder->numLocVars; col++) {
      if ((nullCol < ml->numLocNullCols) && (ml->nullCols[nullCol] == col)) {
        rangeRows[col] = interfaceRows[ml->firstNullCol[rank]+nullCol++];
      } else {
        rangeRows[col] = ml->interiorRows[ml->range[rangeCol++]];
      }
    }
    if ((rangeCol != ml->rank) || (nullCol != ml->numLocNullCols)) {
      PetscPrintf(PETSC_COMM_SELF, "[%d]rank: %d rangeCol: %d null: %d nullCol: %d\n",
                  rank, ml->rank, rangeCol, ml->numLocNullCols, nullCol);
      SETERRQ(PETSC_ERR_ARG_CORRUPT, "Invalid range space");
    }

    /* Renumber and enlarge range[] to account for interface rows */
    ml->localRank = ml->rank;
    if (ml->numNullCols < ml->firstInterfaceRow[rank])
      numNewRange = 0;
    else
      numNewRange = PetscMin(ml->firstInterfaceRow[rank+1], ml->numNullCols) - ml->firstInterfaceRow[rank];
    if (ml->rank + numNewRange > 0)
    {
      /* We maintain the range[] in local row numbers */
      ierr = PetscMalloc((ml->rank + numNewRange) * sizeof(int), &newRange);                              CHKERRQ(ierr);
      for(row = 0; row < ml->rank; row++)
        newRange[row] = ml->interiorRows[ml->range[row]] - ml->tOrder->firstVar[rank];
      for(row = 0; row < numNewRange; row++)
        newRange[ml->rank+row] = ml->interfaceRows[row] - ml->tOrder->firstVar[rank];
      ml->rank += numNewRange;
      if (ml->range != PETSC_NULL)
        {ierr = PetscFree(ml->range);                                                                     CHKERRQ(ierr);}
      ml->range = newRange;
    }
    MPI_Allreduce(&ml->rank, &ml->globalRank, 1, MPI_INT, MPI_SUM, pc->comm);

    /* Create work vectors for applying P in parallel */
    ierr = GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);                                      CHKERRQ(ierr);

    /* Create scatter from the solution vector to a local interior vector */
    ierr = ISCreateStride(PETSC_COMM_SELF, ml->numInteriorRows, 0, 1, &localIS);                          CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->numInteriorRows, ml->interiorRows, &globalIS);            CHKERRQ(ierr);
    ierr = VecCreateSeq(PETSC_COMM_SELF, ml->numInteriorRows, &ml->interiorRhs);                          CHKERRQ(ierr);
    ierr = VecScatterCreate(pc->vec, globalIS, ml->interiorRhs, localIS, &ml->interiorScatter);           CHKERRQ(ierr);
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);

    /* Create scatter from the solution vector to a local interface vector */
    ierr = ISCreateStride(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->firstInterfaceRow[rank], 1, &localIS); CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->numLocInterfaceRows, ml->interfaceRows, &globalIS);       CHKERRQ(ierr);
    ierr = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &ml->interfaceRhs);      CHKERRQ(ierr);
    ierr = VecScatterCreate(pc->vec, globalIS, ml->interfaceRhs, localIS, &ml->interfaceScatter);         CHKERRQ(ierr);
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);

#ifndef PETSC_HAVE_PLAPACK
    /* Create scatter from the solution vector to an interface vector */
    if (rank == 0) {
      ierr = ISCreateStride(PETSC_COMM_SELF, ml->numInterfaceRows, 0, 1, &localIS);                       CHKERRQ(ierr);
      ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->numInterfaceRows, interfaceRows, &globalIS);            CHKERRQ(ierr);
      ierr = VecCreateSeq(PETSC_COMM_SELF, ml->numInterfaceRows, &ml->locInterfaceRhs);                   CHKERRQ(ierr);
      ierr = VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter); CHKERRQ(ierr);
    } else {
      ierr = ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);                                          CHKERRQ(ierr);
      ierr = ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);                                         CHKERRQ(ierr);
      ierr = VecCreateSeq(PETSC_COMM_SELF, 0, &ml->locInterfaceRhs);                                      CHKERRQ(ierr);
      ierr = VecScatterCreate(pc->vec, globalIS, ml->locInterfaceRhs, localIS, &ml->locInterfaceScatter); CHKERRQ(ierr);
    }
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);
#endif

    /* Create scatter from a column vector to a column interface vector */
#ifdef PETSC_HAVE_PLAPACK
    ierr = ISCreateStride(PETSC_COMM_SELF, ml->numLocNullCols, ml->firstNullCol[rank], 1, &localIS);      CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->numLocNullCols, &interfaceCols[ml->firstNullCol[rank]], &globalIS);
                                                                                                          CHKERRQ(ierr);
    ierr = VecCreateMPI(pc->comm, ml->numLocNullCols, ml->numNullCols, &ml->interfaceColRhs);             CHKERRQ(ierr);
    ierr = VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
                                                                                                          CHKERRQ(ierr);
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);
#else
    if (rank == 0) {
      ierr = ISCreateStride(PETSC_COMM_SELF, ml->numNullCols, 0, 1, &localIS);                            CHKERRQ(ierr);
      ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->numNullCols, interfaceCols, &globalIS);                 CHKERRQ(ierr);
      ierr = VecCreateSeq(PETSC_COMM_SELF, ml->numNullCols, &ml->interfaceColRhs);                        CHKERRQ(ierr);
      ierr = VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
                                                                                                          CHKERRQ(ierr);
    } else {
      ierr = ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &localIS);                                          CHKERRQ(ierr);
      ierr = ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &globalIS);                                         CHKERRQ(ierr);
      ierr = VecCreateSeq(PETSC_COMM_SELF, 0, &ml->interfaceColRhs);                                      CHKERRQ(ierr);
      ierr = VecScatterCreate(ml->colWorkVec, globalIS, ml->interfaceColRhs, localIS, &ml->interfaceColScatter);
                                                                                                          CHKERRQ(ierr);
    }
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);
#endif

    /* Create scatter from a solution vector to a column vector */
    ierr = ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS); CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);                CHKERRQ(ierr);
    ierr = VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);               CHKERRQ(ierr);
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);

    /* Create sparse B_I */
    ierr = MatCreateMPIAIJ(pc->comm, ml->numLocInterfaceRows, ml->sOrder->numLocVars, ml->numInterfaceRows,
                           ml->sOrder->numVars, 0, diagInterfaceRows, 0, offdiagInterfaceRows, &ml->interfaceB);
                                                                                                          CHKERRQ(ierr);
    ierr = PetscFree(diagInterfaceRows);                                                                  CHKERRQ(ierr);
    ierr = PetscFree(offdiagInterfaceRows);                                                               CHKERRQ(ierr);

    for(row = 0; row < ml->numLocInterfaceRows; row++) {
      /* Copy row into B^p_I */
      row2 = row + ml->firstInterfaceRow[rank];
      ierr = MatGetRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);                             CHKERRQ(ierr);
      ierr = MatSetValues(ml->interfaceB, 1, &row2, rowLen, cols, vals, INSERT_VALUES);                   CHKERRQ(ierr);
      ierr = MatRestoreRow(ml->B, ml->interfaceRows[row], &rowLen, &cols, &vals);                         CHKERRQ(ierr);
    }
    ierr = MatAssemblyBegin(ml->interfaceB, MAT_FINAL_ASSEMBLY);                                          CHKERRQ(ierr);
    ierr = MatAssemblyEnd(ml->interfaceB, MAT_FINAL_ASSEMBLY);                                            CHKERRQ(ierr);

    /* Get V_I: The rows of V corresponding to the zero singular values for each domain */
    ierr = VecDuplicateVecs(ml->colWorkVec, ml->numNullCols, &ml->interfaceV);                            CHKERRQ(ierr);
    for(col = 0; col < ml->numNullCols; col++) {
      tempV   = ml->interfaceV[col];
      if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
        nullCol = ml->nullCols[col-ml->firstNullCol[rank]]+ml->sOrder->firstVar[rank];
        ierr    = VecSetValues(tempV, 1, &nullCol, &one, INSERT_VALUES);                                  CHKERRQ(ierr);
      }
      ierr      = VecAssemblyBegin(tempV);                                                                CHKERRQ(ierr);
      ierr      = VecAssemblyEnd(tempV);                                                                  CHKERRQ(ierr);
      if ((col >= ml->firstNullCol[rank]) && (col < ml->firstNullCol[rank+1])) {
        ierr    = PCMultiLevelApplyV(pc, tempV, tempV);                                                   CHKERRQ(ierr);
      }
    }

#ifdef PETSC_HAVE_PLAPACK
    /* You MUST initialize all PLAPACK variables to PETSC_NULL */
    ml->PLAlayout    = PETSC_NULL;
    ml->interfaceQR  = PETSC_NULL;
    ml->interfaceTAU = PETSC_NULL;
    ml->PLArhsD      = PETSC_NULL;
    ml->PLArhsP      = PETSC_NULL;
    /* Create the template for vector and matrix alignment */
    ierr = PLA_Temp_create(ml->numNullCols, 0, &ml->PLAlayout);                                           CHKERRQ(ierr);
    /* Create the interface matrix B_\Gamma */
    ierr = PLA_Matrix_create(MPI_DOUBLE, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, 0, 0, &ml->interfaceQR);
                                                                                                          CHKERRQ(ierr);
    /* Create the vector of scaling factors Tau */
    ierr = PLA_Vector_create(MPI_DOUBLE, ml->numNullCols, ml->PLAlayout, 0, &ml->interfaceTAU);           CHKERRQ(ierr);
    /* Create the storage vectors for application of D and P */
    ierr = PLA_Vector_create(MPI_DOUBLE, ml->numNullCols,      ml->PLAlayout, 0, &ml->PLArhsD);           CHKERRQ(ierr);
    ierr = PLA_Vector_create(MPI_DOUBLE, ml->numInterfaceRows, ml->PLAlayout, 0, &ml->PLArhsP);           CHKERRQ(ierr);
    /* Create temporary space for the local rows of the interface matrix */
    ierr = PetscMalloc(ml->numLocInterfaceRows*ml->numNullCols * sizeof(double), &locB_I);                CHKERRQ(ierr);

    /* Construct B_I V_I */
    ierr  = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);                CHKERRQ(ierr);
    for(col = 0; col < ml->numNullCols; col++) {
      ierr = MatMult(ml->interfaceB, ml->interfaceV[col], tempB);                                         CHKERRQ(ierr);
      /* Copy into storage -- column-major order */
      ierr  = VecGetArray(tempB, &arrayB);                                                                CHKERRQ(ierr);
      ierr = PetscMemcpy(&locB_I[ml->numLocInterfaceRows*col], arrayB, ml->numLocInterfaceRows * sizeof(double));
                                                                                                          CHKERRQ(ierr);
      ierr = VecRestoreArray(tempB, &arrayB);                                                             CHKERRQ(ierr);
    }
    ierr = VecDestroy(tempB);                                                                             CHKERRQ(ierr);
    ierr = VecDestroyVecs(ml->interfaceV, ml->numNullCols);                                               CHKERRQ(ierr);

    /* Set the matrix entries */
    ierr = PLA_Obj_set_to_zero(ml->interfaceQR);                                                          CHKERRQ(ierr);
    ierr = PLA_API_begin();                                                                               CHKERRQ(ierr);
    ierr = PLA_Obj_API_open(ml->interfaceQR);                                                             CHKERRQ(ierr);
    ierr = PLA_API_axpy_matrix_to_global(ml->numLocInterfaceRows, ml->numNullCols, &one, locB_I,
                                         ml->numLocInterfaceRows, ml->interfaceQR, ml->firstInterfaceRow[rank], 0);
                                                                                                          CHKERRQ(ierr);
    ierr = PLA_Obj_API_close(ml->interfaceQR);                                                            CHKERRQ(ierr);
    ierr = PLA_API_end();                                                                                 CHKERRQ(ierr);
    ierr = PetscFree(locB_I);                                                                             CHKERRQ(ierr);
#else
    /* Allocate storage for the QR factorization */
    minSize = PetscMin(ml->numInterfaceRows, ml->numNullCols);
    ierr = PetscMalloc(ml->numInterfaceRows*ml->numNullCols * sizeof(double), &ml->interfaceQR);          CHKERRQ(ierr);
    ierr = PetscMalloc(minSize                              * sizeof(double), &ml->interfaceTAU);         CHKERRQ(ierr);
    PetscLogObjectMemory(pc, (ml->numInterfaceRows*ml->numNullCols + minSize)*sizeof(double));
    ierr = PetscMemzero(ml->interfaceQR,  ml->numInterfaceRows*ml->numNullCols * sizeof(double));         CHKERRQ(ierr);
    ierr = PetscMemzero(ml->interfaceTAU, minSize                              * sizeof(double));         CHKERRQ(ierr);

    /* Construct B_I V_I */
    ierr = VecCreateMPI(pc->comm, ml->numLocInterfaceRows, ml->numInterfaceRows, &tempB);                 CHKERRQ(ierr);
    for(col = 0; col < ml->numNullCols; col++) {
      ierr = MatMult(ml->interfaceB, ml->interfaceV[col], tempB);                                         CHKERRQ(ierr);
      /* Copy into storage -- column-major order */
      ierr = VecGetArray(tempB, &arrayB);                                                                 CHKERRQ(ierr);
      ierr = PetscMemcpy(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], arrayB,
                         ml->numLocInterfaceRows * sizeof(double));
                                                                                                          CHKERRQ(ierr);
      ierr = VecRestoreArray(tempB, &arrayB);                                                             CHKERRQ(ierr);
    }
    ierr = VecDestroy(tempB);                                                                             CHKERRQ(ierr);
    ierr = VecDestroyVecs(ml->interfaceV, ml->numNullCols);                                               CHKERRQ(ierr);

    /* Gather matrix / B^1_I V^1_I | ... | B^1_I V^p_I \
                     | B^2_I V^1_I | ... | B^2_I V^p_I |
                                     ...
                     \ B^p_I V^1_I | ... | B^p_I V^p_I /

       Note: It has already been put in column-major order
    */
    ierr = PetscMalloc(numProcs * sizeof(int), &recvCounts);                                              CHKERRQ(ierr);
    for(proc = 0; proc < numProcs; proc++)
      recvCounts[proc] = ml->firstInterfaceRow[proc+1] - ml->firstInterfaceRow[proc];
    for(col = 0; col < ml->numNullCols; col++) {
      ierr = MPI_Gatherv(&ml->interfaceQR[ml->numInterfaceRows*col+ml->firstInterfaceRow[rank]], ml->numLocInterfaceRows,
                         MPI_DOUBLE, &ml->interfaceQR[ml->numInterfaceRows*col], recvCounts, ml->firstInterfaceRow,
                         MPI_DOUBLE, 0, pc->comm);
                                                                                                          CHKERRQ(ierr);
    }
    ierr = PetscFree(recvCounts);                                                                         CHKERRQ(ierr);

    /* Allocate workspace -- Also used in applying P */
    ml->workLen = ml->numInterfaceRows;
    ierr = PetscMalloc(ml->workLen * sizeof(double), &ml->work);                                          CHKERRQ(ierr);
    PetscLogObjectMemory(pc, ml->workLen * sizeof(double));
#endif

    /* Do QR factorization on one processor */
    ierr = PC_MLLogEventBegin(PC_ML_QRFactorization, pc, 0, 0, 0);                                        CHKERRQ(ierr);
#ifdef PETSC_HAVE_PLAPACK
    ierr = PLA_QR(ml->interfaceQR, ml->interfaceTAU);                                                     CHKERRQ(ierr);
#else
    if (rank == 0) {
      LAgeqrf_(&ml->numInterfaceRows, &ml->numNullCols, ml->interfaceQR,
             &ml->numInterfaceRows, ml->interfaceTAU, ml->work, &ml->workLen, &ierr);
      if (ierr) SETERRQ(PETSC_ERR_LIB, "Invalid interface QR");
    }
#endif
    ierr = PC_MLLogEventEnd(PC_ML_QRFactorization, pc, 0, 0, 0);                                          CHKERRQ(ierr);

#ifdef PETSC_USE_BOPT_g
    /* Diagnostics */
    ierr = PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug_qr", &opt);                                      CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      PetscReal *intQR;
      PetscReal *intTAU;
      int     r, c;
#ifdef PETSC_HAVE_PLAPACK
      PLA_Obj tau = PETSC_NULL;
      PLA_Obj qr  = PETSC_NULL;
      int     length, width, stride, ldim;

      ierr = PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numNullCols, 1, ml->PLAlayout, &tau);               CHKERRQ(ierr);
      ierr = PLA_Mscalar_create(MPI_DOUBLE, 0, 0, ml->numInterfaceRows, ml->numNullCols, ml->PLAlayout, &qr); CHKERRQ(ierr);
      ierr = PLA_Copy(ml->interfaceTAU, tau);                                                             CHKERRQ(ierr);
      ierr = PLA_Copy(ml->interfaceQR,  qr);                                                              CHKERRQ(ierr);
      ierr = PLA_Obj_local_info(tau, &length, &width, (void **) &intTAU, &stride, &ldim);                 CHKERRQ(ierr);
      if ((rank == 0) && ((length != ml->numNullCols) || (width != 1) || (stride != 1)))
        SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface tau vector");
      ierr = PLA_Obj_local_info(qr,  &length, &width, (void **) &intQR,  &stride, &ldim);                 CHKERRQ(ierr);
      if ((rank == 0) && ((length != ml->numInterfaceRows) || (width != ml->numNullCols) || (ldim != ml->numInterfaceRows)))
        SETERRQ(PETSC_ERR_LIB, "PLAPACK returned a bad interface qr matrix");
#else
      intTAU = ml->interfaceTAU;
      intQR  = ml->interfaceQR;
#endif
      if (rank == 0) {
        for(c = 0; c < ml->numNullCols; c++)
          PetscPrintf(pc->comm, "%.4g ", intTAU[c]);
        PetscPrintf(pc->comm, "\n");
        for(r = 0; r < ml->numInterfaceRows; r++) {
          for(c = 0; c < ml->numNullCols; c++)
            PetscPrintf(pc->comm, "%.4g ", intQR[ml->numInterfaceRows*c+r]);
          PetscPrintf(pc->comm, "\n");
        }
      }
#ifdef PETSC_HAVE_PLAPACK
      ierr = PLA_Obj_free(&tau);                                                                          CHKERRQ(ierr);
      ierr = PLA_Obj_free(&qr);                                                                           CHKERRQ(ierr);
#endif
    }
#endif

    /* Scatter Cleanup */
    ierr = PetscFree(rangeRows);                                                                          CHKERRQ(ierr);
    ierr = PetscFree(nullCols);                                                                           CHKERRQ(ierr);
    ierr = PetscFree(interfaceRows);                                                                      CHKERRQ(ierr);
    ierr = PetscFree(interfaceCols);                                                                      CHKERRQ(ierr);
    ierr = PC_MLLogEventEnd(PC_ML_SetUpParallel, pc, 0, 0, 0);                                            CHKERRQ(ierr);
  } else {
    /* Get the rows for the range split onto processors like a column vector */
    ierr = PetscMalloc(ml->rank * sizeof(int), &rangeRows);                                               CHKERRQ(ierr);
    for(row = 0; row < ml->rank; row++)
      rangeRows[row] = ml->range[row];
    ierr = GVecCreateRectangular(grid, ml->sOrder, &ml->colWorkVec);                                      CHKERRQ(ierr);

    /* Create scatter from a solution vector to a column vector */
    ierr = ISCreateStride(PETSC_COMM_SELF, ml->sOrder->numLocVars, ml->sOrder->firstVar[rank], 1, &localIS); CHKERRQ(ierr);
    ierr = ISCreateGeneral(PETSC_COMM_SELF, ml->sOrder->numLocVars, rangeRows, &globalIS);                CHKERRQ(ierr);
    ierr = VecScatterCreate(pc->vec, globalIS, ml->colWorkVec, localIS, &ml->rangeScatter);               CHKERRQ(ierr);
    ierr = ISDestroy(localIS);                                                                            CHKERRQ(ierr);
    ierr = ISDestroy(globalIS);                                                                           CHKERRQ(ierr);

    /* Cleanup */
    ierr = PetscFree(rangeRows);                                                                          CHKERRQ(ierr);
    ierr = VecDestroy(ml->colWorkVec);                                                                    CHKERRQ(ierr);
  }

  if (ml->diag != PETSC_NULL) {
    /* Recover original B */
    ierr = VecReciprocal(ml->diag);                                                                       CHKERRQ(ierr);
    ierr = MatDiagonalScale(ml->B, ml->diag, PETSC_NULL);                                                 CHKERRQ(ierr);
    ierr = VecReciprocal(ml->diag);                                                                       CHKERRQ(ierr);
  }

#if defined(PARCH_IRIX64) && !defined(PETSC_USE_BOPT_g)
  /* Read the hardware counter values */
  if ((gen_read = ioctl(fd, PIOCGETEVCTRS, (void *) &counts)) < 0) {
    SETERRQ(PETSC_ERR_LIB, "Could not access SGI hardware counters");
  }
  /* Generation number should be the same */
  if (gen_start != gen_read) SETERRQ(PETSC_ERR_LIB, "Lost SGI hardware counters");
  /* Release the counters */
  if ((ioctl(fd, PIOCRELEVCTRS)) < 0) SETERRQ(PETSC_ERR_LIB, "Could not free SGI hardware counters");
  ierr = close(fd);                                                                                       CHKERRQ(ierr);
  count0 = counts.hwp_evctr[event0];
  count1 = counts.hwp_evctr[event1];
  ierr = MPI_Reduce(&count0, &globalCount0, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);                  CHKERRQ(ierr);
  ierr = MPI_Reduce(&count1, &globalCount1, 1, MPI_LONG_LONG_INT, MPI_SUM, 0, pc->comm);                  CHKERRQ(ierr);
  /* Use hardware counter data */
  switch (event1) {
  case 21:
    PetscPrintf(pc->comm, "Flops: %ld\n", globalCount1);
    break;
  case 25:
    PetscPrintf(pc->comm, "Data cache misses: %ld\n", globalCount1);
    break;
  case 26:
    PetscPrintf(pc->comm, "Secondary data cache misses: %ld\n", globalCount1);
    break;
  }
#endif

  ierr = PetscOptionsHasName(PETSC_NULL, "-pc_ml_debug", &opt);                                           CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = PCDebug_Multilevel(pc);                                                                        CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCPreSolve_Multilevel"
static int PCPreSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
{
  PC_Multilevel *ml   = (PC_Multilevel *) pc->data;
  Grid           grid = ml->grid;
  GVec           bdReduceVec, bdReduceVec2;
  GVec           reduceVec;
  GMat           bdReduceMat;
  VarOrdering    sOrder;
  PetscScalar    one = 1.0, reduceAlpha;
  int            bc;
  int            ierr;

  PetscFunctionBegin;
  /* Create -g = B^T u + B^T_\Gamma f_\Gamma */
  ierr = GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec);                                           CHKERRQ(ierr);
  ierr = VarOrderingCreateSubset(grid->reduceOrder, 1, &ml->tField, PETSC_FALSE, &sOrder);                CHKERRQ(ierr);
#define OLD_REDUCTION
#ifdef OLD_REDUCTION
  ierr = GMatCreateRectangular(grid, sOrder, ml->sOrder, &bdReduceMat);                                   CHKERRQ(ierr);
  ierr = MatSetOption(bdReduceMat, MAT_NEW_NONZERO_ALLOCATION_ERR);                                       CHKERRQ(ierr);
  for(bc = 0; bc < grid->numBC; bc++) {
    /* Conditions on A */
    if (ml->tField == grid->bc[bc].field) {
      ierr = GMatEvaluateOperatorGalerkin(bdReduceMat, PETSC_NULL, 1, &ml->tField, &ml->sField, ml->divOp,
                                          1.0, MAT_FINAL_ASSEMBLY, ml);
                                                                                                          CHKERRQ(ierr);
    }
    /* Conditions on B^T only show up in the u equations, so they are already handled */
  }
  for(bc = 0; bc < grid->numPointBC; bc++) {
    /* Conditions on A */
    if (ml->tField == grid->pointBC[bc].field) {
      ierr = GVecEvaluateOperatorGalerkin(bdReduceVec, grid->bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
                                          ml->divOp, 1.0, ml);
                                                                                                          CHKERRQ(ierr);
    }
    /* Conditions on B^T only show up in the u equations, so they are already handled */
#ifdef PETSC_USE_BOPT_g
    ierr = PetscTrValid(__LINE__, __FUNCT__, __FILE__, __SDIR__);                                         CHKERRQ(ierr);
#endif
  }
#else
  ierr = GVecEvaluateOperatorGalerkin(bdReduceVec, bdReduceVec, grid->bdReduceVec, ml->tField, ml->sField,
                                      ml->divOp, 1.0, ml);
                                                                                                          CHKERRQ(ierr);
#endif
  ierr = VarOrderingDestroy(sOrder);                                                                      CHKERRQ(ierr);
#ifdef OLD_REDUCTION
  /* Should be taken out when I fix the MF version */
  ierr = MatMult(bdReduceMat, grid->bdReduceVec, bdReduceVec);                                            CHKERRQ(ierr);
  ierr = GMatDestroy(bdReduceMat);                                                                        CHKERRQ(ierr);
#endif

  /* Add in the B^T u part */
  if (ml->nonlinearIterate != PETSC_NULL) {
    ierr = GVecCreateRectangular(grid, ml->sOrder, &bdReduceVec2);                                        CHKERRQ(ierr);
    ierr = PCMultiLevelApplyGradientTrans(pc, ml->nonlinearIterate, bdReduceVec2);                        CHKERRQ(ierr);
    ierr = VecAXPY(&one, bdReduceVec2, bdReduceVec);                                                      CHKERRQ(ierr);
    ierr = GVecDestroy(bdReduceVec2);                                                                     CHKERRQ(ierr);
  }

  /* Calculate -D^{-T} Z^T g = D^{-T} Z^T B^T_\Gamma f_\Gamma */
  ierr = PCMultiLevelApplyVTrans(pc, bdReduceVec, bdReduceVec);                                           CHKERRQ(ierr);
  ierr = PCMultiLevelApplyDInvTrans(pc, bdReduceVec, bdReduceVec);                                        CHKERRQ(ierr);

  /* Calculate -P_1 D^{-1} Z^T g */
  ierr = PCMultiLevelApplyP1(pc, bdReduceVec, ml->reduceSol);                                             CHKERRQ(ierr);

  /* Calculate -A P_1 D^{-1} Z^T g -- This does not seem like good design, but I will deal with it later */
  ierr = GVecCreateRectangular(grid, ml->tOrder, &reduceVec);                                             CHKERRQ(ierr);
  ierr = MatMult(pc->mat, ml->reduceSol, reduceVec);                                                      CHKERRQ(ierr);

  /* f --> f - A P_1 D^{-1} Z^T g */
  ierr = GridGetBCMultiplier(grid, &reduceAlpha);                                                         CHKERRQ(ierr);
  ierr = VecAXPY(&reduceAlpha, reduceVec, rhs);                                                           CHKERRQ(ierr);

  /* Cleanup */
  ierr = GVecDestroy(reduceVec);                                                                          CHKERRQ(ierr);
  ierr = GVecDestroy(bdReduceVec);                                                                        CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCPostSolve_Multilevel"
static int PCPostSolve_Multilevel(PC pc, KSP ksp, Vec x, Vec rhs)
{
  PetscFunctionBegin;
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCDestroy_Multilevel"
static int PCDestroy_Multilevel(PC pc)
{
  PC_Multilevel *ml = (PC_Multilevel *) pc->data;
  int            ierr;

  PetscFunctionBegin;
  if (ml->numLevels >= 0) {
    ierr = PCDestroyStructures_Multilevel(pc);                                                            CHKERRQ(ierr);
  }
  if (ml->mathViewer != PETSC_NULL) {
    ierr = PetscViewerDestroy(ml->mathViewer);                                                            CHKERRQ(ierr);
  }
  if (ml->factorizationViewer != PETSC_NULL) {
    ierr = PetscViewerDestroy(ml->factorizationViewer);                                                   CHKERRQ(ierr);
  }
  PetscFree(ml);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCApply_Multilevel"
int PCApply_Multilevel(PC pc, Vec x, Vec y)
{
  int ierr;

  PetscFunctionBegin;
  ierr = PCMultiLevelApplyP(pc, x, y);                                                                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCApplyTrans_Multilevel"
int PCApplyTrans_Multilevel(PC pc, Vec x, Vec y)
{
  int ierr;

  PetscFunctionBegin;
  ierr = PCMultiLevelApplyPTrans(pc, x, y);                                                               CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCApplySymmetricLeft_Multilevel"
int PCApplySymmetricLeft_Multilevel(PC pc, Vec x, Vec y)
{
  int ierr;

  PetscFunctionBegin;
  ierr = PCMultiLevelApplyP2Trans(pc, x, y);                                                              CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef __FUNCT__  
#define __FUNCT__ "PCApplySymmetricRight_Multilevel"
int PCApplySymmetricRight_Multilevel(PC pc, Vec x, Vec y)
{
  int ierr;

  PetscFunctionBegin;
  ierr = PCMultiLevelApplyP2(pc, x, y);                                                                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

EXTERN_C_BEGIN
#undef __FUNCT__  
#define __FUNCT__ "PCCreate_Multilevel"
int PCCreate_Multilevel(PC pc)
{
  PC_Multilevel *ml;
  int            ierr;

  PetscFunctionBegin;
  ierr = PCMultiLevelInitializePackage(PETSC_NULL);                                                       CHKERRQ(ierr);

  ierr = PetscNew(PC_Multilevel, &ml);                                                                    CHKERRQ(ierr);
  PetscLogObjectMemory(pc, sizeof(PC_Multilevel));
  pc->data                     = (void *) ml;

  pc->ops->setup               = PCSetUp_Multilevel;
  pc->ops->apply               = PCApply_Multilevel;
  pc->ops->applyrichardson     = PETSC_NULL;
  pc->ops->applyBA             = PETSC_NULL;
  pc->ops->applytranspose      = PCApplyTrans_Multilevel;
  pc->ops->applyBAtranspose    = PETSC_NULL;
  pc->ops->setfromoptions      = PETSC_NULL;
  pc->ops->presolve            = PCPreSolve_Multilevel;
  pc->ops->postsolve           = PCPostSolve_Multilevel;
  pc->ops->getfactoredmatrix   = PETSC_NULL;
  pc->ops->applysymmetricleft  = PCApplySymmetricLeft_Multilevel;
  pc->ops->applysymmetricright = PCApplySymmetricRight_Multilevel;
  pc->ops->setuponblocks       = PETSC_NULL;
  pc->ops->destroy             = PCDestroy_Multilevel;
  pc->ops->view                = PETSC_NULL;
  pc->modifysubmatrices        = PETSC_NULL;

  ml->grid                = PETSC_NULL;
  ml->useMath             = PETSC_FALSE;
  ml->mathViewer          = PETSC_NULL;
  ml->factorizationViewer = PETSC_NULL;

  ml->gradOp              = GRADIENT;
  ml->divOp               = DIVERGENCE;
  ml->sField              = -1;
  ml->tField              = -1;
  ml->sOrder              = PETSC_NULL;
  ml->sLocOrder           = PETSC_NULL;
  ml->tOrder              = PETSC_NULL;
  ml->tLocOrder           = PETSC_NULL;
  ml->gradAlpha           = 1.0;
  ml->B                   = PETSC_NULL;
  ml->locB                = PETSC_NULL;
  ml->reduceSol           = PETSC_NULL;
  ml->nonlinearIterate    = PETSC_NULL;
  ml->diag                = PETSC_NULL;

  ml->numMeshes           = -1;
  ml->numElements         = PETSC_NULL;
  ml->numVertices         = PETSC_NULL;
  ml->vertices            = PETSC_NULL;
  ml->numEdges            = -1;
  ml->meshes              = PETSC_NULL;

  ml->numPartitions       = PETSC_NULL;
  ml->numPartitionCols    = PETSC_NULL;
  ml->colPartition        = PETSC_NULL;
  ml->numPartitionRows    = PETSC_NULL;
  ml->rowPartition        = PETSC_NULL;
  ml->rank                = -1;
  ml->localRank           = -1;
  ml->range               = PETSC_NULL;
  ml->rangeScatter        = PETSC_NULL;
  ml->numLocNullCols      = -1;
  ml->nullCols            = PETSC_NULL;

  ml->numRows             = -1;
  ml->numCols             = -1;
  ml->numLevels           = -1;
  ml->maxLevels           = -1;
  ml->QRthresh            = 0;
  ml->DoQRFactor          = 2;
  ml->zeroTol             = 1.0e-10;
  ml->factors             = PETSC_NULL;
  ml->grads               = PETSC_NULL;

  ml->globalRank          = -1;
  ml->numNullCols         = -1;
  ml->numLocInterfaceRows = -1;
  ml->numInterfaceRows    = -1;
  ml->interfaceRows       = PETSC_NULL;
  ml->firstInterfaceRow   = PETSC_NULL;
  ml->numInteriorRows     = -1;
  ml->interiorRows        = PETSC_NULL;
  ml->interfaceQR         = PETSC_NULL;
  ml->interfaceTAU        = PETSC_NULL;
  ml->interfaceV          = PETSC_NULL;
  ml->interfaceB          = PETSC_NULL;
  ml->interiorRhs         = PETSC_NULL;
  ml->interiorScatter     = PETSC_NULL;
  ml->interfaceRhs        = PETSC_NULL;
  ml->interfaceScatter    = PETSC_NULL;
#ifndef PETSC_HAVE_PLAPACK
  ml->locInterfaceRhs     = PETSC_NULL;
  ml->locInterfaceScatter = PETSC_NULL;
#endif
  ml->interfaceColRhs     = PETSC_NULL;
  ml->interfaceColScatter = PETSC_NULL;
  ml->reduceScatter       = PETSC_NULL;
  ml->colWorkVec          = PETSC_NULL;
  ml->globalColWorkVec    = PETSC_NULL;

  ml->workLen             = -1;
  ml->work                = PETSC_NULL;
  ml->svdWorkLen          = -1;
  ml->svdWork             = PETSC_NULL;
  PetscFunctionReturn(0);
}
EXTERN_C_END
