#ifdef PETSC_RCS_HEADER
static char vcid[] = "$Id: part2d.c,v 1.14 2000/01/31 17:40:21 knepley Exp $";
#endif

#include "src/mesh/impls/triangular/2d/2dimpl.h"         /*I "mesh.h" I*/
#ifdef PETSC_HAVE_PARMETIS
EXTERN_C_BEGIN
#include "parmetis.h"
EXTERN_C_END
#endif
#include "part2d.h"

#undef  __FUNCT__
#define __FUNCT__ "PartitionView_Triangular_2D_File"
static int PartitionView_Triangular_2D_File(Partition p, PetscViewer viewer) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;
  FILE                    *fd;
  int                      numLocElements = p->numLocElements;
  int                      numLocNodes    = q->numLocNodes;
  int                      numLocEdges    = q->numLocEdges;
  int                      i;
  int                      ierr;

  PetscFunctionBegin;
  PetscViewerASCIIPrintf(viewer, "Partition Object:\n");
  PetscViewerASCIIPrintf(viewer, "  Partition of triangular 2D grid with %d elements and %d nodes\n", p->numElements, q->numNodes);
  ierr = PetscViewerASCIIGetPointer(viewer, &fd);                                                         CHKERRQ(ierr);
  PetscSynchronizedFPrintf(p->comm, fd, "    Proc %d: %d elements %d nodes %d edges\n",
                           p->rank, numLocElements, numLocNodes, numLocEdges);
  PetscSynchronizedFlush(p->comm);
  if (p->ordering != PETSC_NULL) {
    PetscViewerASCIIPrintf(viewer, "  Global element renumbering:\n");
    ierr = AOView(p->ordering, viewer);                                                                   CHKERRQ(ierr);
  }
  if (q->nodeOrdering != PETSC_NULL) {
    PetscViewerASCIIPrintf(viewer, "  Global node renumbering:\n");
    ierr = AOView(q->nodeOrdering, viewer);                                                               CHKERRQ(ierr);
  }
  PetscSynchronizedFPrintf(p->comm, fd, "  %d ghost elements on proc %d\n", p->numOverlapElements - numLocElements, p->rank);
  for(i = 0; i < p->numOverlapElements - numLocElements; i++)
    PetscSynchronizedFPrintf(p->comm, fd, "  %d %d %d\n", i, p->ghostElements[i], p->ghostElementProcs[i]);
  PetscSynchronizedFlush(p->comm);
  PetscSynchronizedFPrintf(p->comm, fd, "  %d ghost nodes on proc %d\n", q->numOverlapNodes - numLocNodes, p->rank);
  for(i = 0; i < q->numOverlapNodes - numLocNodes; i++)
    PetscSynchronizedFPrintf(p->comm, fd, "  %d %d\n", i, q->ghostNodes[i]);
  PetscSynchronizedFlush(p->comm);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionView_Triangular_2D_Draw"
static int PartitionView_Triangular_2D_Draw(Partition p, PetscViewer v) {
  PetscFunctionBegin;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionView_Triangular_2D"
static int PartitionView_Triangular_2D(Partition p, PetscViewer viewer) {
  PetscTruth isascii, isdraw;
  int        ierr;

  PetscFunctionBegin;
  ierr = PetscTypeCompare((PetscObject) viewer, PETSC_VIEWER_ASCII, &isascii);                            CHKERRQ(ierr);
  ierr = PetscTypeCompare((PetscObject) viewer, PETSC_VIEWER_DRAW,  &isdraw);                             CHKERRQ(ierr);
  if (isascii == PETSC_TRUE) {
    ierr = PartitionView_Triangular_2D_File(p, viewer);                                                   CHKERRQ(ierr);
  } else if (isdraw == PETSC_TRUE) {
    ierr = PartitionView_Triangular_2D_Draw(p, viewer);                                                   CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionViewFromOptions_Private"
static int PartitionViewFromOptions_Private(Partition part, char *title) {
  PetscViewer viewer;
  PetscDraw   draw;
  PetscTruth  opt;
  int         ierr;

  PetscFunctionBegin;
  ierr = PetscOptionsHasName(part->prefix, "-part_view", &opt);                                           CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = PartitionView(part, PETSC_NULL);                                                               CHKERRQ(ierr);
  }
  ierr = PetscOptionsHasName(part->prefix, "-part_view_draw", &opt);                                      CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = PetscViewerDrawOpen(part->comm, 0, 0, 0, 0, 300, 300, &viewer);                                CHKERRQ(ierr);
    ierr = PetscViewerDrawGetDraw(viewer, 0, &draw);                                                      CHKERRQ(ierr);
    if (title != PETSC_NULL) {
      ierr = PetscDrawSetTitle(draw, title);                                                              CHKERRQ(ierr);
    }
    ierr = PartitionView(part, viewer);                                                                   CHKERRQ(ierr);
    ierr = PetscViewerFlush(viewer);                                                                      CHKERRQ(ierr);
    ierr = PetscDrawPause(draw);                                                                          CHKERRQ(ierr);
    ierr = PetscViewerDestroy(viewer);                                                                    CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDestroy_Triangular_2D"
static int PartitionDestroy_Triangular_2D(Partition p) {
  Partition_Triangular_2D *s = (Partition_Triangular_2D *) p->data;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscFree(p->firstElement);                                                                     CHKERRQ(ierr);
  if (p->ordering != PETSC_NULL)
    {ierr = AODestroy(p->ordering);                                                                      CHKERRQ(ierr);}
  if (p->ghostElements != PETSC_NULL)
    {ierr = PetscFree(p->ghostElements);                                                                 CHKERRQ(ierr);}
  if (p->ghostElementProcs != PETSC_NULL)
    {ierr = PetscFree(p->ghostElementProcs);                                                             CHKERRQ(ierr);}
  ierr = PetscFree(s->firstNode);                                                                        CHKERRQ(ierr);
  ierr = PetscFree(s->firstBdNode);                                                                      CHKERRQ(ierr);
  if (s->nodeOrdering != PETSC_NULL)
    {ierr = AODestroy(s->nodeOrdering);                                                                  CHKERRQ(ierr);}
  if (s->ghostNodes != PETSC_NULL)
    {ierr = PetscFree(s->ghostNodes);                                                                    CHKERRQ(ierr);}
  if (s->ghostNodeProcs != PETSC_NULL)
    {ierr = PetscFree(s->ghostNodeProcs);                                                                CHKERRQ(ierr);}
  if (s->ghostBdNodes != PETSC_NULL)
    {ierr = PetscFree(s->ghostBdNodes);                                                                  CHKERRQ(ierr);}
  ierr = PetscFree(s->firstEdge);                                                                        CHKERRQ(ierr);
  if (s->edgeOrdering != PETSC_NULL)
    {ierr = AODestroy(s->edgeOrdering);                                                                  CHKERRQ(ierr);}
  ierr = PetscFree(s);                                                                                   CHKERRQ(ierr);

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGhostNodeExchange_Triangular_2D"
static int PartitionGhostNodeExchange_Triangular_2D(Partition part, InsertMode addv, ScatterMode mode, int *locVars, int *ghostVars) {
  Partition_Triangular_2D *q    = (Partition_Triangular_2D *) part->data;
  Mesh                     mesh = part->mesh;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscGhostExchange(part->comm, q->numOverlapNodes - mesh->numNodes, q->ghostNodeProcs, q->ghostNodes, PETSC_INT,
                            q->firstNode, addv, mode, locVars, ghostVars);
  CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetTotalNodes_Triangular_2D"
static int PartitionGetTotalNodes_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *size = q->numNodes;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetStartNode_Triangular_2D"
static int PartitionGetStartNode_Triangular_2D(Partition p, int *node) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *node = q->firstNode[p->rank];
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetEndNode_Triangular_2D"
static int PartitionGetEndNode_Triangular_2D(Partition p, int *node) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *node = q->firstNode[p->rank+1];
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNumNodes_Triangular_2D"
static int PartitionGetNumNodes_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *size = q->numLocNodes;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNumOverlapNodes_Triangular_2D"
static int PartitionGetNumOverlapNodes_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *size = q->numOverlapNodes;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGhostNodeIndex_Private"
int PartitionGhostNodeIndex_Private(Partition p, int node, int *gNode) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;
  int                      low, high, mid;

  PetscFunctionBegin;
  /* Use bisection since the array is assumed to be sorted */
  low  = 0;
  high = q->numOverlapNodes - (q->firstNode[p->rank+1] - q->firstNode[p->rank]) - 1;
  while (low <= high) {
    mid = (low + high)/2;
    if (node == q->ghostNodes[mid]) {
      *gNode = mid;
      PetscFunctionReturn(0);
    } else if (node < q->ghostNodes[mid]) {
      high = mid - 1;
    } else {
      low  = mid + 1;
    }
  }
  *gNode = -1;
  /* Flag for ghost node not present */
  PetscFunctionReturn(1);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGlobalToLocalNodeIndex_Triangular_2D"
static int PartitionGlobalToLocalNodeIndex_Triangular_2D(Partition p, int node, int *locNode) {
  Partition_Triangular_2D *q           = (Partition_Triangular_2D *) p->data;
  int                      numLocNodes = q->numLocNodes;
  int                      gNode; /* Local ghost node number */
  int                      ierr;

  PetscFunctionBegin;
  if (node < 0) {
    *locNode = node;
    PetscFunctionReturn(0);
  }
  /* Check for ghost node */
  if ((node < q->firstNode[p->rank]) || (node >= q->firstNode[p->rank+1])) {
    /* Search for canonical number */
    ierr = PartitionGhostNodeIndex_Private(p, node, &gNode);                                             CHKERRQ(ierr);
    *locNode = numLocNodes + gNode;
  } else {
    *locNode = node - q->firstNode[p->rank];
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionLocalToGlobalNodeIndex_Triangular_2D"
static int PartitionLocalToGlobalNodeIndex_Triangular_2D(Partition p, int locNode, int *node) {
  Partition_Triangular_2D *q           = (Partition_Triangular_2D *) p->data;
  int                      numLocNodes = q->numLocNodes;

  PetscFunctionBegin;
  if (locNode < 0) {
    *node = locNode;
    PetscFunctionReturn(0);
  }
  /* Check for ghost node */
  if (locNode >= numLocNodes) {
    *node = q->ghostNodes[locNode - numLocNodes];
  } else {
    *node = locNode + q->firstNode[p->rank];
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGlobalToGhostNodeIndex_Triangular_2D"
static int PartitionGlobalToGhostNodeIndex_Triangular_2D(Partition p, int node, int *ghostNode, int *ghostProc) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;
  int                      ierr;

  PetscFunctionBegin;
  if (node < 0) {
    *ghostNode = node;
    *ghostProc = -1;
    PetscFunctionReturn(0);
  }
  /* Check for ghost node */
  if ((node < q->firstNode[p->rank]) || (node >= q->firstNode[p->rank+1])) {
    ierr = PartitionGhostNodeIndex_Private(p, node, ghostNode);                                           CHKERRQ(ierr);
    *ghostProc = q->ghostNodeProcs[*ghostNode];
  } else {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Global node %d is not a ghost node", node);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGhostToGlobalNodeIndex_Triangular_2D"
static int PartitionGhostToGlobalNodeIndex_Triangular_2D(Partition p, int ghostNode, int *node, int *ghostProc) {
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  int                      numGhostNodes = q->numOverlapNodes - q->numLocNodes;

  PetscFunctionBegin;
  if (ghostNode < 0) {
    *node      = ghostNode;
    *ghostProc = -1;
    PetscFunctionReturn(0);
  }
  /* Check for ghost node */
  if (ghostNode < numGhostNodes) {
    *node      = q->ghostNodes[ghostNode];
    *ghostProc = q->ghostNodeProcs[ghostNode];
  } else {
    SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "Ghost node %d does not exist", ghostNode);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNodeOrdering_Triangular_2D"
static int PartitionGetNodeOrdering_Triangular_2D(Partition p, AO *order) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *order = q->nodeOrdering;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetTotalEdges_Triangular_2D"
static int PartitionGetTotalEdges_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *size = q->numEdges;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetStartEdge_Triangular_2D"
static int PartitionGetStartEdge_Triangular_2D(Partition p, int *edge) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *edge = q->firstEdge[p->rank];
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetEndEdge_Triangular_2D"
static int PartitionGetEndEdge_Triangular_2D(Partition p, int *edge) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *edge = q->firstEdge[p->rank+1];
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNumEdges_Triangular_2D"
static int PartitionGetNumEdges_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *size = q->numLocEdges;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNumOverlapEdges_Triangular_2D"
static int PartitionGetNumOverlapEdges_Triangular_2D(Partition p, int *size) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  /* We do not maintain ghost edges */
  *size = q->numLocEdges;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetEdgeOrdering_Triangular_2D"
static int PartitionGetEdgeOrdering_Triangular_2D(Partition p, AO *order) {
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;

  PetscFunctionBegin;
  *order = q->edgeOrdering;
  PetscFunctionReturn(0);
}

static struct _PartitionOps POps = {/* Generic Operations */
                                    PETSC_NULL/* PartitionSetup */,
                                    PETSC_NULL/* PartitionSetFromOptions */,
                                    PartitionView_Triangular_2D,
                                    PETSC_NULL/* PartitionCopy */,
                                    PETSC_NULL/* PartitionDuplicate */,
                                    PartitionDestroy_Triangular_2D,
                                    /* Partition-Specific Operations */
                                    PartitionGhostNodeExchange_Triangular_2D,
                                    /* Node Query Functions */
                                    PartitionGetTotalNodes_Triangular_2D,
                                    PartitionGetStartNode_Triangular_2D,
                                    PartitionGetEndNode_Triangular_2D,
                                    PartitionGetNumNodes_Triangular_2D,
                                    PartitionGetNumOverlapNodes_Triangular_2D,
                                    PartitionGlobalToLocalNodeIndex_Triangular_2D,
                                    PartitionLocalToGlobalNodeIndex_Triangular_2D,
                                    PartitionGlobalToGhostNodeIndex_Triangular_2D,
                                    PartitionGhostToGlobalNodeIndex_Triangular_2D,
                                    PartitionGetNodeOrdering_Triangular_2D,
                                    /* Face Query Functions */
                                    PartitionGetTotalElements,
                                    PartitionGetStartElement,
                                    PartitionGetEndElement,
                                    PartitionGetNumElements,
                                    PartitionGetNumOverlapElements,
                                    PartitionGlobalToLocalElementIndex,
                                    PartitionLocalToGlobalElementIndex,
                                    PartitionGetElementOrdering,
                                    /* Edge Query Functions */
                                    PartitionGetTotalEdges_Triangular_2D,
                                    PartitionGetStartEdge_Triangular_2D,
                                    PartitionGetEndEdge_Triangular_2D,
                                    PartitionGetNumEdges_Triangular_2D,
                                    PartitionGetNumOverlapEdges_Triangular_2D,
                                    PETSC_NULL/* PartitionGlobalToLocalEdgeIndex */,
                                    PETSC_NULL/* PartitionLocalToGlobalEdgeIndex */,
                                    PartitionGetEdgeOrdering_Triangular_2D};

#undef  __FUNCT__
#define __FUNCT__ "PartitionCalcCut_Private"
int PartitionCalcCut_Private(Partition p, int *cut)
{
  Mesh_Triangular *tri            = (Mesh_Triangular *) p->mesh->data;
  int              numLocElements = p->numLocElements;
  int             *neighbors      = tri->neighbors;
  int              locCut;        /* The number of edges of the dual crossing the partition from this domain */
  int              elem, neighbor;
  int              ierr;

  PetscFunctionBegin;
  for(elem = 0, locCut = 0; elem < numLocElements; elem++) {
    for(neighbor = 0; neighbor < 3; neighbor++) {
      if (neighbors[elem*3+neighbor] >= numLocElements)
        locCut++;
    }
  }
  ierr  = MPI_Allreduce(&locCut, cut, 1, MPI_INT, MPI_SUM, p->comm);                                     CHKERRQ(ierr);
  *cut /= 2;
  PetscFunctionReturn(0);
}

int PartitionDebugAO_Private(Partition p, int *nodeProcs)
{
  Mesh                     mesh        = p->mesh;
  Mesh_Triangular         *tri         = (Mesh_Triangular *) mesh->data;
  Partition_Triangular_2D *q           = (Partition_Triangular_2D *) p->data;
  int                      numCorners  = mesh->numCorners;
  int                      numElements = mesh->numFaces;
  int                     *elements    = tri->faces;
  int                      numNodes    = q->numNodes;
  int                      numProcs    = p->numProcs;
  int                      rank        = p->rank;
  int                     *support;
  int                     *temp;
  int                      proc, nProc, elem, nElem, sElem, corner, nCorner, node, degree, index;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscMalloc(numProcs * sizeof(int), &temp);                                                      CHKERRQ(ierr);
  for(node = 0; node < numNodes; node++) {
    PetscSynchronizedPrintf(p->comm, " %d", nodeProcs[node]);
    PetscSynchronizedFlush(p->comm);
    PetscPrintf(p->comm, "\n");
    ierr = MPI_Allgather(&nodeProcs[node], 1, MPI_INT, temp, 1, MPI_INT, p->comm);                        CHKERRQ(ierr);
    for(proc = 0, index = 0; proc < numProcs; proc++) {
      if (temp[proc] == proc) index++;
    }

    /* If a node is not scheduled for a unique domain */
    if (index != 1) {
      for(elem = 0; elem < numElements; elem++) {
        for(corner = 0; corner < numCorners; corner++) {
          /* Locate an element containing the node */
          if (node != elements[elem*numCorners+corner])
            continue;
          
          /* Check the support of the node */
          PetscPrintf(PETSC_COMM_SELF, "[%d]elem: %d corner: %d node: %d\n", rank, elem, corner, node);
          ierr = MeshGetNodeSupport(mesh, node, elem, &degree, &support);                                 CHKERRQ(ierr);
          for(sElem = 0; sElem < degree; sElem++) {
            nElem = support[sElem];
            PetscPrintf(PETSC_COMM_SELF, "[%d]support[%d] = %d\n", rank, sElem, nElem);
            /* See if neighbor is in another domain */
            if (nElem >= numElements) {
              /* Check to see if node is contained in the neighboring element */
              for(nCorner = 0; nCorner < numCorners; nCorner++)
                if (elements[nElem*numCorners+nCorner] == node) {
                  nProc = p->ghostElementProcs[nElem-numElements];
                  PetscPrintf(PETSC_COMM_SELF, "[%d]Found in corner %d proc: %d\n", rank, nCorner, nProc);
                  break;
                }
            }
          }
          ierr = MeshRestoreNodeSupport(mesh, node, elem, &degree, &support);                             CHKERRQ(ierr);
          if (nodeProcs[node] < 0)
            nodeProcs[node] = rank;
          PetscPrintf(PETSC_COMM_SELF, "[%d]nodeProcs[%d]: %d\n", rank, node, nodeProcs[node]);
        }
      }
    }
  }
  ierr = PetscFree(temp);                                                                                 CHKERRQ(ierr);
  ierr = PetscBarrier((PetscObject) p);                                                                   CHKERRQ(ierr);
  PetscFunctionReturn(1);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionSortGhosts_Private"
/*
  PartitionSortGhosts_Private - This function sorts the ghost array and
  removes any duplicates.

  Input Parameters:
. p          - The Partition
. numGhosts  - The number of ghosts
. ghostArray - The ghost indices

  Output Paramters:
. numGhosts  - The new size of the ghost array
. ghostArray - The sorted ghost indices

.seealso:
*/
int PartitionSortGhosts_Private(Partition p, int *numGhosts, int *ghostArray, int **ghostPerm)
{
  int *perm, *temp;
  int  size;
  int  ghost, newGhost;
  int  ierr;

  PetscFunctionBegin;
  size = *numGhosts;
  ierr = PetscMalloc(size * sizeof(int), &perm);                                                          CHKERRQ(ierr);
  ierr = PetscMalloc(size * sizeof(int), &temp);                                                          CHKERRQ(ierr);

  /* Sort ghosts */
  for(ghost = 0; ghost < size; ghost++) perm[ghost] = ghost;
  ierr = PetscSortIntWithPermutation(size, ghostArray, perm);                                             CHKERRQ(ierr);

  /* Permute ghosts and eliminates duplicates */
  for(ghost = 0, newGhost = 0; ghost < size; ghost++) {
    if ((newGhost == 0) || (temp[newGhost-1] != ghostArray[perm[ghost]])) {
      /* Keep ghost */
      temp[newGhost++] = ghostArray[perm[ghost]];
    } else {
      /* Eliminate redundant ghost */
      ierr = PetscMemmove(&perm[ghost], &perm[ghost+1], (size - (ghost+1)) * sizeof(int));                CHKERRQ(ierr);
      ghost--;
      size--;
    }
  }
  for(ghost = 0; ghost < size; ghost++) {
    ghostArray[ghost] = temp[ghost];
  }
  ierr = PetscFree(temp);                                                                                 CHKERRQ(ierr);

  *numGhosts = size;
  *ghostPerm = perm;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNewGhostNodes_Serial"
int PartitionGetNewGhostNodes_Serial(Partition p, int *newProcNodes, int *newNodes)
{
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  int                      numLocNodes   = q->numLocNodes;
  int                      numGhostNodes = q->numOverlapNodes - numLocNodes;
  int                      numProcs      = p->numProcs;
  int                     *nodePerm;        /* The new permutation for the sorted ghost nodes */
  int                      numNewNodes;    /* Total number of new ghost nodes to add */
  int                     *temp;
  int                      proc, node, i;
  int                      ierr;

  PetscFunctionBegin;
  for(proc = 0, numNewNodes = 0; proc < numProcs; proc++)
    numNewNodes += newProcNodes[proc];

  /* Add in new ghost nodes */
  if (numNewNodes > 0) {
    ierr = PetscMalloc((numGhostNodes + numNewNodes) * sizeof(int), &temp);                               CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, q->ghostNodes, numGhostNodes * sizeof(int));                                 CHKERRQ(ierr);
    for(node = 0; node < numNewNodes; node++) {
      temp[numGhostNodes+node] = newNodes[node];
    }
    if (q->ghostNodes != PETSC_NULL) {
      ierr = PetscFree(q->ghostNodes);                                                                    CHKERRQ(ierr);
    }
    q->ghostNodes = temp;

    ierr = PetscMalloc((numGhostNodes + numNewNodes) * sizeof(int), &temp);                               CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, q->ghostNodeProcs, numGhostNodes * sizeof(int));                             CHKERRQ(ierr);
    for(proc = 0, node = 0; proc < numProcs; proc++) {
      for(i = 0; i < newProcNodes[proc]; i++)
        temp[numGhostNodes+(node++)] = proc;
    }
    if (q->ghostNodeProcs != PETSC_NULL) {
      ierr = PetscFree(q->ghostNodeProcs);                                                                CHKERRQ(ierr);
    }
    q->ghostNodeProcs = temp;

    /* Resort ghost nodes and remove duplicates */
    numGhostNodes += numNewNodes;
    ierr = PartitionSortGhosts_Private(p, &numGhostNodes, q->ghostNodes, &nodePerm);                      CHKERRQ(ierr);
    q->numOverlapNodes = numLocNodes + numGhostNodes;
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &temp);                                               CHKERRQ(ierr);
    for(node = 0; node < numGhostNodes; node++) {
      temp[node] = q->ghostNodeProcs[nodePerm[node]];
    }
    ierr = PetscFree(q->ghostNodeProcs);                                                                  CHKERRQ(ierr);
    q->ghostNodeProcs = temp;
    ierr = PetscFree(nodePerm);                                                                           CHKERRQ(ierr);
  }
#ifdef PETSC_USE_BOPT_g
  /* Consistency check for ghost nodes */
  for(node = 0; node < numGhostNodes; node++) {
    if ((q->ghostNodes[node] <  q->firstNode[q->ghostNodeProcs[node]]) ||
        (q->ghostNodes[node] >= q->firstNode[q->ghostNodeProcs[node]+1])) {
      SETERRQ(PETSC_ERR_PLIB, "Invalid ghost node source processor");
    }
  }
#endif
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNewGhostNodes_Parallel"
int PartitionGetNewGhostNodes_Parallel(Partition p, int *sendGhostNodes, int *sendNodes, int *recvGhostNodes, int *recvNodes)
{
  Partition_Triangular_2D *q               = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh            = p->mesh;
  Mesh_Triangular         *tri             = (Mesh_Triangular *) mesh->data;
  int                      numLocNodes     = q->numLocNodes;
  int                     *firstNode       = q->firstNode;
  double                  *nodes           = tri->nodes;
  int                     *markers         = tri->markers;
  int                     *degrees         = tri->degrees;
  int                      numProcs        = p->numProcs;
  int                      rank            = p->rank;
  int                      numGhostNodes;
  int                     *nodePerm;        /* The new permutation for the sorted ghost nodes */
  int                      numSendNodes;    /* Total number of new ghost nodes to receive */
  int                      numRecvNodes;    /* Total number of new ghost nodes to send */
  int                     *sumSendNodes;    /* Prefix sums of sendGhostNodes */
  int                     *sumRecvNodes;    /* Prefix sums of recvGhostNodes */
  int                     *sendGhostCoords; /* Number of new ghost nodes needed from a given processor */
  int                     *recvGhostCoords; /* Number of new ghost nodes needed by a given processor */
  int                     *sumSendCoords;   /* Prefix sums of sendGhostCoords */
  int                     *sumRecvCoords;   /* Prefix sums of recvGhostCoords */
  double                  *sendCoords;      /* Coordinates of ghost nodes for other domains */
  double                  *recvCoords;      /* Coordinates of ghost nodes for this domains */
  int                     *sendMarkers;     /* Markers of ghost nodes for other domains */
  int                     *recvMarkers;     /* Markers of ghost nodes for this domains */
  int                     *sendDegrees;     /* Degrees of ghost nodes for other domains */
  int                     *recvDegrees;     /* Degrees of ghost nodes for this domains */
  int                     *offsets;         /* Offsets into the send array for each destination proc */
  int                     *temp;
  double                  *temp2;
  int                      proc, node, locNode, i;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscMalloc(numProcs * sizeof(int), &sumSendNodes);                                              CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvNodes);                                              CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sendGhostCoords);                                           CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &recvGhostCoords);                                           CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumSendCoords);                                             CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvCoords);                                             CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &offsets);                                                   CHKERRQ(ierr);
  ierr = PetscMemzero(sumSendNodes, numProcs * sizeof(int));                                              CHKERRQ(ierr);
  ierr = PetscMemzero(sumRecvNodes, numProcs * sizeof(int));                                              CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,      numProcs * sizeof(int));                                              CHKERRQ(ierr);

  /* Compute new ghost node offsets */
  for(proc = 1; proc < numProcs; proc++) {
    sumSendNodes[proc] = sumSendNodes[proc-1] + sendGhostNodes[proc-1];
    sumRecvNodes[proc] = sumRecvNodes[proc-1] + recvGhostNodes[proc-1];
  }
  numSendNodes = sumSendNodes[numProcs-1] + sendGhostNodes[numProcs-1];
  numRecvNodes = sumRecvNodes[numProcs-1] + recvGhostNodes[numProcs-1];

  /* Get numbers of ghost nodes to provide */
  ierr = MPI_Alltoallv(sendNodes, sendGhostNodes, sumSendNodes, MPI_INT,
                       recvNodes, recvGhostNodes, sumRecvNodes, MPI_INT, p->comm);
  CHKERRQ(ierr);

  /* Get node coordinates, markers, and degrees */
  for(proc = 0; proc < numProcs; proc++) {
    sendGhostCoords[proc] = sendGhostNodes[proc]*2;
    recvGhostCoords[proc] = recvGhostNodes[proc]*2;
    sumSendCoords[proc]   = sumSendNodes[proc]*2;
    sumRecvCoords[proc]   = sumRecvNodes[proc]*2;
  }
  if (numSendNodes) {
    ierr = PetscMalloc(numSendNodes*2 * sizeof(double), &recvCoords);                                     CHKERRQ(ierr);
    ierr = PetscMalloc(numSendNodes   * sizeof(int),    &recvMarkers);                                    CHKERRQ(ierr);
    ierr = PetscMalloc(numSendNodes   * sizeof(int),    &recvDegrees);                                    CHKERRQ(ierr);
  }
  if (numRecvNodes) {
    ierr = PetscMalloc(numRecvNodes*2 * sizeof(double), &sendCoords);                                     CHKERRQ(ierr);
    ierr = PetscMalloc(numRecvNodes   * sizeof(int),    &sendMarkers);                                    CHKERRQ(ierr);
    ierr = PetscMalloc(numRecvNodes   * sizeof(int),    &sendDegrees);                                    CHKERRQ(ierr);
    for(node = 0; node < numRecvNodes; node++) {
      locNode = recvNodes[node] - firstNode[rank];
#ifdef PETSC_USE_BOPT_g
      if ((locNode < 0) || (locNode >= numLocNodes)) {
        SETERRQ2(PETSC_ERR_PLIB, "Invalid ghost node %d should be in [0,%d)", locNode, numLocNodes);
      }
#endif
      for(i = 0; i < 2; i++)
        sendCoords[node*2+i] = nodes[locNode*2+i];
      sendMarkers[node] = markers[locNode];
      sendDegrees[node] = degrees[locNode];
    }
  }

  /* Communicate node coordinates and markers and degrees */
  ierr = MPI_Alltoallv(sendCoords,  recvGhostCoords, sumRecvCoords, MPI_DOUBLE,
                       recvCoords,  sendGhostCoords, sumSendCoords, MPI_DOUBLE, p->comm);
  CHKERRQ(ierr);
  ierr = MPI_Alltoallv(sendMarkers, recvGhostNodes,  sumRecvNodes,  MPI_INT,
                       recvMarkers, sendGhostNodes,  sumSendNodes,  MPI_INT, p->comm);
  CHKERRQ(ierr);
  ierr = MPI_Alltoallv(sendDegrees, recvGhostNodes,  sumRecvNodes,  MPI_INT,
                       recvDegrees, sendGhostNodes,  sumSendNodes,  MPI_INT, p->comm);
  CHKERRQ(ierr);

  /* Add in new ghost nodes */
  numGhostNodes = q->numOverlapNodes - numLocNodes;
  if (numSendNodes > 0) {
    ierr = PetscMalloc((numGhostNodes + numSendNodes) * sizeof(int), &temp);                              CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, q->ghostNodes, numGhostNodes * sizeof(int));                                 CHKERRQ(ierr);
    for(node = 0; node < numSendNodes; node++)
      temp[numGhostNodes+node] = sendNodes[node];
    if (q->ghostNodes != PETSC_NULL) {
      ierr = PetscFree(q->ghostNodes);                                                                    CHKERRQ(ierr);
    }
    q->ghostNodes = temp;

    ierr = PetscMalloc((numGhostNodes + numSendNodes) * sizeof(int), &temp);                              CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, q->ghostNodeProcs, numGhostNodes * sizeof(int));                             CHKERRQ(ierr);
    for(proc = 0, node = 0; proc < numProcs; proc++) {
      for(i = 0; i < sendGhostNodes[proc]; i++)
        temp[numGhostNodes+(node++)] = proc;
    }
    if (q->ghostNodeProcs != PETSC_NULL) {
      ierr = PetscFree(q->ghostNodeProcs);                                                                CHKERRQ(ierr);
    }
    q->ghostNodeProcs = temp;

    ierr = PetscMalloc((q->numOverlapNodes + numSendNodes)*2 * sizeof(double), &temp2);                   CHKERRQ(ierr);
    ierr = PetscMemcpy(temp2, nodes, q->numOverlapNodes*2 * sizeof(double));                              CHKERRQ(ierr);
    for(node = 0; node < numSendNodes*2; node++)
      temp2[q->numOverlapNodes*2+node] = recvCoords[node];
    ierr = PetscFree(nodes);
    tri->nodes = temp2;

    ierr = PetscMalloc((q->numOverlapNodes + numSendNodes) * sizeof(int), &temp);                         CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, markers, q->numOverlapNodes * sizeof(int));                                  CHKERRQ(ierr);
    for(node = 0; node < numSendNodes; node++)
      temp[q->numOverlapNodes+node] = recvMarkers[node];
    ierr = PetscFree(markers);
    tri->markers = temp;

    ierr = PetscMalloc((q->numOverlapNodes + numSendNodes) * sizeof(int), &temp);                         CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, degrees, q->numOverlapNodes * sizeof(int));                                  CHKERRQ(ierr);
    for(node = 0; node < numSendNodes; node++)
      temp[q->numOverlapNodes+node] = recvDegrees[node];
    ierr = PetscFree(degrees);
    tri->degrees = temp;
  }

  /* Resort ghost nodes and remove duplicates */
  numGhostNodes     += numSendNodes;
  ierr = PartitionSortGhosts_Private(p, &numGhostNodes, q->ghostNodes, &nodePerm);                        CHKERRQ(ierr);
  q->numOverlapNodes = numLocNodes + numGhostNodes;

  ierr = PetscMalloc(numGhostNodes * sizeof(int), &temp);                                                 CHKERRQ(ierr);
  for(node = 0; node < numGhostNodes; node++)
    temp[node] = q->ghostNodeProcs[nodePerm[node]];
  for(node = 0; node < numGhostNodes; node++)
    q->ghostNodeProcs[node] = temp[node];

  for(node = 0; node < numGhostNodes; node++)
    temp[node] = tri->markers[mesh->numNodes+nodePerm[node]];
  for(node = 0; node < numGhostNodes; node++)
    tri->markers[mesh->numNodes+node] = temp[node];

  for(node = 0; node < numGhostNodes; node++)
    temp[node] = tri->degrees[mesh->numNodes+nodePerm[node]];
  for(node = 0; node < numGhostNodes; node++)
    tri->degrees[mesh->numNodes+node] = temp[node];
  ierr = PetscFree(temp);     CHKERRQ(ierr);

  ierr = PetscMalloc(numGhostNodes*2 * sizeof(double), &temp2);                                           CHKERRQ(ierr);
  for(node = 0; node < numGhostNodes; node++) {
    temp2[node*2]   = tri->nodes[(mesh->numNodes+nodePerm[node])*2];
    temp2[node*2+1] = tri->nodes[(mesh->numNodes+nodePerm[node])*2+1];
  }
  for(node = 0; node < numGhostNodes; node++) {
    tri->nodes[(mesh->numNodes+node)*2]   = temp2[node*2];
    tri->nodes[(mesh->numNodes+node)*2+1] = temp2[node*2+1];
  }
  ierr = PetscFree(temp2);                                                                                CHKERRQ(ierr);
  ierr = PetscFree(nodePerm);                                                                             CHKERRQ(ierr);

#ifdef PETSC_USE_BOPT_g
  /* Consistency check for ghost nodes */
  for(node = 0; node < numGhostNodes; node++) {
    if ((q->ghostNodes[node] <  firstNode[q->ghostNodeProcs[node]]) ||
        (q->ghostNodes[node] >= firstNode[q->ghostNodeProcs[node]+1])) {
      SETERRQ(PETSC_ERR_PLIB, "Invalid ghost node source processor");
    }
  }
#endif

  /* Cleanup */
  ierr = PetscFree(sumSendNodes);                                                                         CHKERRQ(ierr);
  ierr = PetscFree(sendGhostCoords);                                                                      CHKERRQ(ierr);
  ierr = PetscFree(sumSendCoords);                                                                        CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                              CHKERRQ(ierr);
  if (numSendNodes) {
    ierr = PetscFree(recvCoords);                                                                         CHKERRQ(ierr);
    ierr = PetscFree(recvMarkers);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(recvDegrees);                                                                        CHKERRQ(ierr);
  }
  ierr = PetscFree(sumRecvNodes);                                                                         CHKERRQ(ierr);
  ierr = PetscFree(recvGhostCoords);                                                                      CHKERRQ(ierr);
  ierr = PetscFree(sumRecvCoords);                                                                        CHKERRQ(ierr);
  if (numRecvNodes) {
    ierr = PetscFree(sendCoords);                                                                         CHKERRQ(ierr);
    ierr = PetscFree(sendMarkers);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(sendDegrees);                                                                        CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNewGhostElements_Serial"
int PartitionGetNewGhostElements_Serial(Partition p, int *newProcElements, int *newElements)
{
  int  numLocElements   = p->numLocElements;
  int  numGhostElements = p->numOverlapElements - numLocElements;
  int  numProcs         = p->numProcs;
  int *elemPerm;        /* The new permutation for the sorted ghost elements */
  int  numNewElements;  /* Total number of new ghost elements to add */
  int *temp;
  int  proc, elem, i;
  int  ierr;

  PetscFunctionBegin;
  for(proc = 0, numNewElements = 0; proc < numProcs; proc++)
    numNewElements += newProcElements[proc];

  /* Add in new ghost elements */
  if (numNewElements > 0) {
    ierr = PetscMalloc((numGhostElements + numNewElements) * sizeof(int), &temp);                         CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, p->ghostElements, numGhostElements * sizeof(int));                           CHKERRQ(ierr);
    for(elem = 0; elem < numNewElements; elem++)
      temp[numGhostElements+elem] = newElements[elem];
    if (p->ghostElements != PETSC_NULL) {
      ierr = PetscFree(p->ghostElements);                                                                 CHKERRQ(ierr);
    }
    p->ghostElements = temp;

    ierr = PetscMalloc((numGhostElements + numNewElements) * sizeof(int), &temp);                         CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, p->ghostElementProcs, numGhostElements * sizeof(int));                       CHKERRQ(ierr);
    for(proc = 0, elem = 0; proc < numProcs; proc++) {
      for(i = 0; i < newProcElements[proc]; i++)
        temp[numGhostElements+(elem++)] = proc;
    }
    if (p->ghostElementProcs != PETSC_NULL) {
      ierr = PetscFree(p->ghostElementProcs);                                                             CHKERRQ(ierr);
    }
    p->ghostElementProcs = temp;

    /* Resort ghost elements and remove duplicates */
    numGhostElements += numNewElements;
    ierr = PartitionSortGhosts_Private(p, &numGhostElements, p->ghostElements, &elemPerm);                CHKERRQ(ierr);
    p->numOverlapElements = numLocElements + numGhostElements;
    ierr = PetscMalloc(numGhostElements * sizeof(int), &temp);                                            CHKERRQ(ierr);
    for(elem = 0; elem < numGhostElements; elem++)
      temp[elem] = p->ghostElementProcs[elemPerm[elem]];
    ierr = PetscFree(p->ghostElementProcs);                                                               CHKERRQ(ierr);
    p->ghostElementProcs = temp;
    ierr = PetscFree(elemPerm);                                                                           CHKERRQ(ierr);
  }
#ifdef PETSC_USE_BOPT_g
  /* Consistency check for ghost elements */
  for(elem = 0; elem < numGhostElements; elem++) {
    if ((p->ghostElements[elem] <  p->firstElement[p->ghostElementProcs[elem]]) ||
        (p->ghostElements[elem] >= p->firstElement[p->ghostElementProcs[elem]+1])) {
      SETERRQ(PETSC_ERR_PLIB, "Invalid ghost element source processor");
    }
  }
#endif
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateElementMap_METIS"
/*
  PartitionCreateElementMap_METIS - This function creates a map from elements to domains,
  using METIS to minimize the cut and approximately balance the sizes.

  Input Parameters:
+ p           - The Partition
- numElements - The local number of elements

  Output Parameter:
. elementMap  - The map from nodes to domains

.seealso: PartitionNodes_Private()
*/
int PartitionCreateElementMap_METIS(Partition p, int numElements, int **elementMap)
{
#ifdef PETSC_HAVE_PARMETIS
  Mesh mesh = p->mesh;
  int       *elemProcs;     /* The processor assigned to each element */
  int       *elemOffsets;   /* The offsets into elemNeighbors of each element row for dual in CSR format */
  int       *elemNeighbors; /* The list of element neighbors for dual in CSR format */
  int       *edgeWeights;   /* The list of edge weights for dual in CSR format */
  int        weight;        /* A weight for constrained nodes */
  int        options[5];    /* The option flags for METIS */
  PetscTruth opt;
  int        ierr;

  PetscFunctionBegin;
  /* Create the dual graph in distributed CSR format */
  weight = 0;
  ierr   = PetscOptionsGetInt(mesh->prefix, "-mesh_partition_weight", &weight, &opt);                     CHKERRQ(ierr);
  ierr   = MeshCreateDualCSR(mesh, &elemOffsets, &elemNeighbors, &edgeWeights, weight);                   CHKERRQ(ierr);

  /* Partition graph */
  if (numElements != p->numLocElements) {
    SETERRQ2(PETSC_ERR_ARG_WRONG, "Incorrect input size %d for ParMETIS, should be %d", numElements, p->numLocElements);
  }
  ierr = PetscMalloc(numElements * sizeof(int), &elemProcs);                                              CHKERRQ(ierr);
  options[0] = 0;   /* Returns the edge cut */
  options[1] = 150; /* The folding factor, 0 = no folding */
  options[2] = 1;   /* Serial initial partition */
  options[3] = 0;   /* C style numbering */
  options[4] = 0;   /* No timing information */
  PARKMETIS(p->firstElement, elemOffsets, PETSC_NULL, elemNeighbors, PETSC_NULL, elemProcs, options, p->comm);

  /* Destroy dual graph */
  ierr = MeshDestroyDualCSR(mesh, elemOffsets, elemNeighbors, edgeWeights);                               CHKERRQ(ierr);

  *elementMap = elemProcs;
  PetscFunctionReturn(0);
#else
  SETERRQ(PETSC_ERR_SUP, "You must obtain George Karypis' ParMETIS software")
#endif
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateElementMap_NodeBased"
/*
  PartitionCreateElementMap_NodeBased - This function creates a map from elements to domains,
  using a previous partition of the nodes.

  Input Parameters:
+ p           - The Partition
- numElements - The global number of nodes

  Output Parameter:
. elementMap  - The map from nodes to domains

.seealso: PartitionNodes_Private()
*/
int PartitionCreateElementMap_NodeBased(Partition p, int numElements, int **elementMap)
{
  Partition_Triangular_2D *q            = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh         = p->mesh;
  Mesh_Triangular         *tri          = (Mesh_Triangular *) mesh->data;
  int                      numCorners   = mesh->numCorners;
  int                     *elements     = tri->faces;
  int                     *firstElement = p->firstElement;
  int                     *firstNode    = q->firstNode;
  int                      numProcs     = p->numProcs;
  int                      rank         = p->rank;
  int                      startElement = firstElement[rank];
  int                      endElement   = firstElement[rank+1];
  int                     *elemProcs;     /* The processor assigned to each element */
  int                      proc, elem, corner, node;
  int                      ierr;

  PetscFunctionBegin;
  if (numElements != p->numLocElements) {
    SETERRQ2(PETSC_ERR_ARG_WRONG, "Incorrect input size %d should be %d", numElements, p->numLocElements);
  }
  /* Count elements on this partition -- keep element if you are the lower numbered domain */
  ierr = PetscMalloc(numElements * sizeof(int), &elemProcs);                                              CHKERRQ(ierr);
  for(elem = 0; elem < numElements; elem++) {
    elemProcs[elem] = -1;
  }

  for(elem = startElement; elem < endElement; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      node = elements[elem*numCorners+corner];
      if ((node < firstNode[rank]) || (node >= firstNode[rank+1])) {
        /* Get the domain of the node */
        for(proc = 0; proc < numProcs; proc++) {
          if ((node >= firstNode[proc]) && (node < firstNode[proc+1])) break;
        }
        if ((elemProcs[elem-startElement] < 0) || (proc < elemProcs[elem-startElement]))
          elemProcs[elem-startElement] = proc;
      }
    }
    /* If no one else claims it, take the element */
    if (elemProcs[elem-startElement] < 0) {
      elemProcs[elem-startElement] = rank;
    }
  }

  *elementMap = elemProcs;
  PetscFunctionReturn(0);
}

/*
  PartitionCreateElementPartition_Private - This function uses the element map to create
  partition structures for element-based data.

  Input Parameters:
+ p              - The Partition
- elementMap     - The map from elements to domains

  Output Parameters:
. ordering       - The new element ordering

  Output Parameters in Partition_Triangular_2D:
+ numLocElements - The number of local elements
- firstElement   - The first elemnt in each domain

.seealso: PartitionElements_Private()
*/
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateElementPartition_Private"
int PartitionCreateElementPartition_Private(Partition p, int *elementMap, AO *ordering)
{
  int              numLocElements = p->numLocElements; /* Number of local elements before partitioning */
  int             *firstElement   = p->firstElement;
  int              numProcs       = p->numProcs;
  int              rank           = p->rank;
  int             *partSendElements;     /* The number of elements sent to each processor for partitioning */
  int             *sumSendElements;      /* The prefix sums of partSendElements */
  int             *partRecvElements;     /* The number of elements received from each processor for partitioning */
  int             *sumRecvElements;      /* The prefix sums of partRecvElements */
  int             *offsets;              /* The offsets into the send and receive arrays */
  int             *sendBuffer;
  int             *AppOrdering, *PetscOrdering;
  int              proc, elem;
  int              ierr;

  PetscFunctionBegin;
  /* Initialize communication */
  ierr = PetscMalloc(numProcs * sizeof(int), &partSendElements);                                          CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumSendElements);                                           CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &partRecvElements);                                          CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &sumRecvElements);                                           CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs * sizeof(int), &offsets);                                                   CHKERRQ(ierr);
  ierr = PetscMemzero(partSendElements,  numProcs * sizeof(int));                                         CHKERRQ(ierr);
  ierr = PetscMemzero(sumSendElements,   numProcs * sizeof(int));                                         CHKERRQ(ierr);
  ierr = PetscMemzero(partRecvElements,  numProcs * sizeof(int));                                         CHKERRQ(ierr);
  ierr = PetscMemzero(sumRecvElements,   numProcs * sizeof(int));                                         CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,           numProcs * sizeof(int));                                         CHKERRQ(ierr);

  /* Get sizes of interior element number blocks to send to each processor */
  for(elem = 0; elem < numLocElements; elem++) {
    partSendElements[elementMap[elem]]++;
  }

  /* Get sizes of interior element number blocks to receive from each processor */
  ierr = MPI_Alltoall(partSendElements, 1, MPI_INT, partRecvElements, 1, MPI_INT, p->comm);              CHKERRQ(ierr);

  /* Calculate offsets into the interior element number send array */
  for(proc = 1; proc < numProcs; proc++) {
    sumSendElements[proc] = sumSendElements[proc-1] + partSendElements[proc-1];
    offsets[proc]         = sumSendElements[proc];
  }

  /* Calculate offsets into the interior element number receive array */
  for(proc = 1; proc < numProcs; proc++) {
    sumRecvElements[proc] = sumRecvElements[proc-1] + partRecvElements[proc-1];
  }

  /* Send interior element numbers to each processor -- could prevent copying elements already there I think */
  p->numLocElements = sumRecvElements[numProcs-1] + partRecvElements[numProcs-1];
  ierr = PetscMalloc(numLocElements    * sizeof(int), &sendBuffer);                                       CHKERRQ(ierr);
  ierr = PetscMalloc(p->numLocElements * sizeof(int), &AppOrdering);                                      CHKERRQ(ierr);
  ierr = PetscMalloc(p->numLocElements * sizeof(int), &PetscOrdering);                                    CHKERRQ(ierr);
  for(elem = 0; elem < numLocElements; elem++) {
    sendBuffer[offsets[elementMap[elem]]++] = elem + firstElement[rank];
  }
  ierr = MPI_Alltoallv(sendBuffer,  partSendElements, sumSendElements, MPI_INT,
                       AppOrdering, partRecvElements, sumRecvElements, MPI_INT, p->comm);
  CHKERRQ(ierr);

  /* If the mesh was intially distributed, we would need to send the elements themselves here */

  /* Recompute size and offset of each domain */
  ierr = MPI_Allgather(&p->numLocElements, 1, MPI_INT, &firstElement[1], 1, MPI_INT, p->comm);           CHKERRQ(ierr);
  for(proc = 1, firstElement[0] = 0; proc <= numProcs; proc++) {
    firstElement[proc] = firstElement[proc] + firstElement[proc-1];
  }

  /* Create the global element reordering */
  for(elem = 0; elem < p->numLocElements; elem++) {
    /* This would be the time to do RCM on the local graph by reordering PetscOrdering[] */
    PetscOrdering[elem] = elem + firstElement[rank];
  }

  /* Cleanup */
  ierr = PetscFree(partSendElements);                                                                    CHKERRQ(ierr);
  ierr = PetscFree(sumSendElements);                                                                     CHKERRQ(ierr);
  ierr = PetscFree(partRecvElements);                                                                    CHKERRQ(ierr);
  ierr = PetscFree(sumRecvElements);                                                                     CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(sendBuffer);                                                                          CHKERRQ(ierr);

  /* Create the global element reordering */
  ierr = AOCreateBasic(p->comm, p->numLocElements, AppOrdering, PetscOrdering, ordering);                CHKERRQ(ierr);
  PetscLogObjectParent(p, p->ordering);

  ierr = PetscFree(AppOrdering);                                                                         CHKERRQ(ierr);
  ierr = PetscFree(PetscOrdering);                                                                       CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionPermuteElements_Private"
/*
  PartitionPermuteElements_Private - This function permutes the data which is implicitly
  indexed by element number

  Input Parameter:
. p         - The Partition

  Output Parameter in Mesh_Triangular:
+ faces     - The nodes on each element
- neighbors - The neighbors of each element

.seealso: PartitionElements_Private()
*/
int PartitionPermuteElements_Private(Partition p)
{
  Mesh             mesh = p->mesh;
  Mesh_Triangular *tri  = (Mesh_Triangular *) mesh->data;
  int              ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetscPermuteInt(p->ordering, mesh->numCorners, tri->faces);                       CHKERRQ(ierr);
  ierr = AOApplicationToPetscPermuteInt(p->ordering, 3,                tri->neighbors);                   CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionRenumberElements_Private"
/*
  PartitionRenumberElements_Private - This function renumbers the element-based data globally in
  order to make the canonical numbers sequential in each domain.

  Input Parameter:
. p         - The Partition

  Output Parameter in Mesh_Triangular:
. neighbors - The neighbors of each element

.seealso: PartitionElements_Private()
*/
int PartitionRenumberElements_Private(Partition p)
{
  Mesh_Triangular         *tri         = (Mesh_Triangular *) p->mesh->data;
  int                      numElements = p->numElements;
  int                     *neighbors   = tri->neighbors;
  int                      ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetsc(p->ordering, numElements*3, neighbors);                                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCalcGhostElements_Private"
/*
  PartitionCalcGhostElements_Private - This function calculates the ghost elements.

  Input Parameters:
. p         - The Partition

  Output Parameters in Partition_Triangular_2D:

.seealso: PartitionElements_Private()
*/
int PartitionCalcGhostElements_Private(Partition p)
{
  Partition_Triangular_2D *q            = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh         = p->mesh;
  Mesh_Triangular         *tri          = (Mesh_Triangular *) mesh->data;
  int                      numCorners   = mesh->numCorners;
  int                      numElements  = p->numElements;
  int                     *elements     = tri->faces;
  int                     *neighbors    = tri->neighbors;
  int                     *firstElement = p->firstElement;
  int                      numProcs     = p->numProcs;
  int                      rank         = p->rank;
  int                      numLocNodes  = q->numLocNodes;
  int                      startNode    = q->firstNode[rank];
  PetscTruth               isNodePart   = q->isNodePartitioned;
  int                     *newProcElements; /* The number of new ghost elements from each processor */
  int                      numNewElements;  /* The number of new ghost elements */
  int                     *newElements;     /* The new ghost elements */
  int                     *offsets;         /* The offsets into the send and receive arrays */
  int                      degree;          /* The degree of a vertex */
  int                     *support;         /* The list of elements in the support of a basis function */
  int                     *elemMap;
  int                      proc, elem, bElem, sElem, nElem, corner, neighbor, node;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscMalloc(numElements  * sizeof(int), &elemMap);                                               CHKERRQ(ierr);
  ierr = PetscMalloc(numProcs     * sizeof(int), &newProcElements);                                       CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &offsets);                                               CHKERRQ(ierr);
  ierr = PetscMemzero(newProcElements,  numProcs     * sizeof(int));                                      CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,          (numProcs+1) * sizeof(int));                                      CHKERRQ(ierr);
  for(elem = 0; elem < numElements; elem++) {
    elemMap[elem] = -1;
  }
  for(elem = 0; elem < numElements; elem++) {
    if ((elem >= firstElement[rank]) && (elem < firstElement[rank+1])) {
      /* Find a boundary element */
      for(neighbor = 0; neighbor < 3; neighbor++) {
        bElem = neighbors[elem*3+neighbor];
        if ((bElem >= 0) && ((bElem < firstElement[rank]) || (bElem >= firstElement[rank+1])))
          break;
      }

      if (neighbor < 3) {
        /* Check the support of each vertex for off-processor elements */
        for(corner = 0; corner < numCorners; corner++) {
          node = elements[elem*numCorners+corner];
          ierr = MeshGetNodeSupport(mesh, node, elem, &degree, &support);                                 CHKERRQ(ierr);
          for(sElem = 0; sElem < degree; sElem++) {
            nElem = support[sElem];
            if (elemMap[nElem] >= 0) continue;
            for(proc = 0; proc < numProcs; proc++) {
              if ((proc != rank) && (nElem >= firstElement[proc]) && (nElem < firstElement[proc+1])) {
                elemMap[nElem] = proc;
                break;
              }
            }
          }
          ierr = MeshRestoreNodeSupport(mesh, node, elem, &degree, &support);                             CHKERRQ(ierr);
        }
      }
    } else if (isNodePart == PETSC_TRUE) {
      if (elemMap[elem] >= 0) continue;
      /* We may also need elements on which we have nodes, but are not attached to */
      for(corner = 0; corner < numCorners; corner++) {
        node = elements[elem*numCorners+corner] - startNode;
        if ((node >= 0) && (node < numLocNodes)) {
          for(proc = 0; proc < numProcs; proc++) {
            if ((elem >= firstElement[proc]) && (elem < firstElement[proc+1])) {
              elemMap[elem] = proc;
              break;
            }
          }
        }
      }
    }
  }

  /* Compute new ghost element offsets */
  for(elem = 0; elem < numElements; elem++) {
    if (elemMap[elem] >= 0) {
      newProcElements[elemMap[elem]]++;
    }
  }
  for(proc = 0, numNewElements = 0; proc < numProcs; proc++) {
    numNewElements  += newProcElements[proc];
    offsets[proc+1]  = offsets[proc] + newProcElements[proc];
  }

  /* Get ghost nodes */
  if (numNewElements > 0) {
    ierr = PetscMalloc(numNewElements * sizeof(int), &newElements);                                       CHKERRQ(ierr);
    for(elem = 0; elem < numElements; elem++) {
      if (elemMap[elem] >= 0) {
        newElements[offsets[elemMap[elem]]++] = elem;
      }
    }
  }
  for(proc = 1; proc < numProcs-1; proc++) {
    if (offsets[proc] - offsets[proc-1]  != newProcElements[proc]) {
      SETERRQ3(PETSC_ERR_PLIB, "Invalid number of ghost elements sent %d to proc %d should be %d",
               offsets[proc] - offsets[proc-1], proc, newProcElements[proc]);
    }
  }
  if (offsets[0] != newProcElements[0]) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of ghost elements sent %d to proc 0 should be %d",
             offsets[0], newProcElements[0]);
  }

  /* Add new ghosts */
  p->numOverlapElements = p->numLocElements;
  ierr = PartitionGetNewGhostElements_Serial(p, newProcElements, newElements);                           CHKERRQ(ierr);

  /* Cleanup */
  ierr = PetscFree(elemMap);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(newProcElements);                                                                     CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                             CHKERRQ(ierr);
  if (numNewElements > 0) {
    ierr = PetscFree(newElements);                                                                       CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDistributeElements_Private"
/*
  PartitionDistributeElements_Private - This function distributes the element-based data, and
  permutes arrays which are implicitly indexed by element number.

  Input Parameters:
. p         - The Partition

  Output Parameters in Mesh_Triangular:
+ faces     - The nodes on each element
- neighbors - The element neighbors

.seealso: PartitionElements_Private()
*/
int PartitionDistributeElements_Private(Partition p)
{
  Mesh             mesh               = p->mesh;
  Mesh_Triangular *tri                = (Mesh_Triangular *) mesh->data;
  int              numLocElements     = p->numLocElements;
  int              numOverlapElements = p->numOverlapElements;
  int              numGhostElements   = numOverlapElements - numLocElements;
  int              numCorners         = mesh->numCorners;
  int             *firstElement       = p->firstElement;
  int             *ghostElements      = p->ghostElements;
  int              rank               = p->rank;
  int             *temp;
  int              elem, corner, neighbor;
  int              ierr;

  /* Note here that we can use PetscMemcpy() for the interior variables because we already permuted the
     arrays so that ghost elements could be computed.
  */
  PetscFunctionBegin;
  mesh->numFaces = numLocElements;
  ierr = PetscMalloc(numOverlapElements*numCorners * sizeof(int), &temp);                                 CHKERRQ(ierr);
  /* Interior faces */
  ierr = PetscMemcpy(temp, &tri->faces[firstElement[rank]*numCorners], numLocElements*numCorners * sizeof(int)); CHKERRQ(ierr);
  /* Ghost faces */
  for(elem = 0; elem < numGhostElements; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      temp[(numLocElements+elem)*numCorners+corner] = tri->faces[ghostElements[elem]*numCorners+corner];
    }
  }
  ierr = PetscFree(tri->faces);                                                                           CHKERRQ(ierr);
  tri->faces = temp;
  PetscLogObjectMemory(p, numGhostElements*numCorners * sizeof(int));

  ierr = PetscMalloc(numOverlapElements*3 * sizeof(int), &temp);                                          CHKERRQ(ierr);
  /* Interior neighbors */
  ierr = PetscMemcpy(temp, &tri->neighbors[firstElement[rank]*3], numLocElements*3 * sizeof(int));        CHKERRQ(ierr);
  /* Ghost neighbors */
  for(elem = 0; elem < numGhostElements; elem++) {
    for(neighbor = 0; neighbor < 3; neighbor++) {
      temp[(numLocElements+elem)*3+neighbor] = tri->neighbors[ghostElements[elem]*3+neighbor];
    }
  }
  ierr = PetscFree(tri->neighbors);                                                                       CHKERRQ(ierr);
  tri->neighbors = temp;
  PetscLogObjectMemory(p, numGhostElements*3 * sizeof(int));

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionElementGlobalToLocal_Private"
int PartitionElementGlobalToLocal_Private(Partition p)
{
  Mesh_Triangular *tri                = (Mesh_Triangular *) p->mesh->data;
  int              numOverlapElements = p->numOverlapElements;
  int             *neighbors          = tri->neighbors;
  int              neighbor;
  int              ierr;

  PetscFunctionBegin;
  /* We indicate neighbors which are not interior or ghost by -2 since boundaries are -1 */
  for(neighbor = 0; neighbor < numOverlapElements*3; neighbor++) {
    ierr = PartitionGlobalToLocalElementIndex(p, neighbors[neighbor], &neighbors[neighbor]);             CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNewGhostNodes_Element"
int PartitionGetNewGhostNodes_Element(Partition p)
{
  Partition_Triangular_2D *q                = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh             = p->mesh;
  Mesh_Triangular         *tri              = (Mesh_Triangular *) mesh->data;
  int                      numCorners       = mesh->numCorners;
  int                     *elements         = tri->faces;
  int                     *firstElement     = p->firstElement;
  int                      numGhostElements = p->numOverlapElements - p->numLocElements;
  int                     *ghostElements    = p->ghostElements;
  int                      numNodes         = q->numNodes;
  int                     *firstNode        = q->firstNode;
  int                      numProcs         = p->numProcs;
  int                      rank             = p->rank;
  int                     *newProcNodes; /* Number of new ghost nodes needed from a given processor */
  int                      numNewNodes;  /* Total number of new ghost nodes to receive */
  int                     *newNodes;     /* New ghost nodes for this domain */
  int                     *offsets;      /* The offsets into newNodes[] */
  int                     *nodeMap;      /* The map of nodes to processors */
  int                      proc, elem, corner, node, gNode;
  int                      ierr;

  PetscFunctionBegin;
  if (q->isNodePartitioned == PETSC_FALSE)
    PetscFunctionReturn(0);
  /* Initialize communication */
  ierr = PetscMalloc(numProcs     * sizeof(int), &newProcNodes);                                          CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &offsets);                                               CHKERRQ(ierr);
  ierr = PetscMalloc(numNodes     * sizeof(int), &nodeMap);                                               CHKERRQ(ierr);
  ierr = PetscMemzero(newProcNodes, numProcs      * sizeof(int));                                         CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,     (numProcs+1)   * sizeof(int));                                         CHKERRQ(ierr);
  for(node = 0; node < numNodes; node++) {
    nodeMap[node] = -1;
  }

  /* Check for new ghost nodes */
  for(elem = firstElement[rank]; elem < firstElement[rank+1]; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      node = elements[elem*numCorners+corner];
      if (nodeMap[node] >= 0) continue;

      if ((node < firstNode[rank]) || (node >= firstNode[rank+1])) {
        /* Get the domain of the node */
        for(proc = 0; proc < numProcs; proc++) {
          if ((node >= firstNode[proc]) && (node < firstNode[proc+1])) break;
        }
        /* Check for the presence of this node */
        if (PartitionGhostNodeIndex_Private(p, node, &gNode) && ((nodeMap[node] < 0) || (proc < nodeMap[node]))) {
          nodeMap[node] = proc;
        }
      }
    }
  }
  for(elem = 0; elem < numGhostElements; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      node = elements[ghostElements[elem]*numCorners+corner];
      if (nodeMap[node] >= 0) continue;

      if ((node < firstNode[rank]) || (node >= firstNode[rank+1])) {
        /* Get the domain of the node */
        for(proc = 0; proc < numProcs; proc++) {
          if ((node >= firstNode[proc]) && (node < firstNode[proc+1])) break;
        }
        /* Check for the presence of this node */
        if (PartitionGhostNodeIndex_Private(p, node, &gNode) && ((nodeMap[node] < 0) || (proc < nodeMap[node]))) {
          nodeMap[node] = proc;
        }
      }
    }
  }

  /* Compute new ghost node offsets */
  for(node = 0; node < numNodes; node++) {
    if (nodeMap[node] >= 0) {
      newProcNodes[nodeMap[node]]++;
    }
  }
  for(proc = 0, numNewNodes = 0; proc < numProcs; proc++) {
    numNewNodes   += newProcNodes[proc];
    offsets[proc+1] = offsets[proc] + newProcNodes[proc];
  }

  /* Get ghost nodes */
  if (numNewNodes > 0) {
    ierr = PetscMalloc(numNewNodes * sizeof(int), &newNodes);                                             CHKERRQ(ierr);
    for(node = 0; node < numNodes; node++) {
      if (nodeMap[node] >= 0) {
        newNodes[offsets[nodeMap[node]]++] = node;
      }
    }
  }
  for(proc = 1; proc < numProcs-1; proc++) {
    if (offsets[proc] - offsets[proc-1]  != newProcNodes[proc]) {
      SETERRQ3(PETSC_ERR_PLIB, "Invalid number of ghost nodes sent %d to proc %d should be %d",
               offsets[proc] - offsets[proc-1], proc, newProcNodes[proc]);
    }
  }
  if (offsets[0] != newProcNodes[0]) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of ghost nodes sent %d to proc 0 should be %d", offsets[0], newProcNodes[0]);
  }

  /* Add new ghosts */
  ierr = PartitionGetNewGhostNodes_Serial(p, newProcNodes, newNodes);                                    CHKERRQ(ierr);

  /* Cleanup */
  ierr = PetscFree(nodeMap);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(newProcNodes);                                                                        CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                             CHKERRQ(ierr);
  if (numNewNodes) {
    ierr = PetscFree(newNodes);                                                                          CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGetNewGhostNodes_Edge"
int PartitionGetNewGhostNodes_Edge(Partition p)
{
  Partition_Triangular_2D *q         = (Partition_Triangular_2D *) p->data;
  Mesh_Triangular         *tri       = (Mesh_Triangular *) p->mesh->data;
  int                     *edges     = tri->edges;
  int                     *firstEdge = q->firstEdge;
  int                      numNodes  = q->numNodes;
  int                     *firstNode = q->firstNode;
  int                      numProcs  = p->numProcs;
  int                      rank      = p->rank;
  int                     *newProcNodes; /* Number of new ghost nodes needed from a given processor */
  int                      numNewNodes;  /* Total number of new ghost nodes to receive */
  int                     *newNodes;     /* New ghost nodes for this domain */
  int                     *offsets;      /* The offsets into newNodes[] */
  int                     *nodeMap;      /* The map of nodes to processors */
  int                      proc, edge, node, startNode, endNode, ghostNode, gNode;
  int                      ierr;

  PetscFunctionBegin;
  /* Initialize communication */
  ierr = PetscMalloc(numProcs     * sizeof(int), &newProcNodes);                                          CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &offsets);                                               CHKERRQ(ierr);
  ierr = PetscMalloc(numNodes     * sizeof(int), &nodeMap);                                               CHKERRQ(ierr);
  ierr = PetscMemzero(newProcNodes, numProcs    * sizeof(int));                                           CHKERRQ(ierr);
  ierr = PetscMemzero(offsets,     (numProcs+1) * sizeof(int));                                           CHKERRQ(ierr);
  for(node = 0; node < numNodes; node++) {
    nodeMap[node] = -1;
  }

  /* Check for new ghost nodes */
  for(edge = firstEdge[rank]; edge < firstEdge[rank+1]; edge++) {
    /* Check for new ghost node */
    startNode = edges[edge*2];
    endNode   = edges[edge*2+1];
    ghostNode = -1;
    if ((startNode < firstNode[rank]) || (startNode >= firstNode[rank+1])) {
      ghostNode = startNode;
    } else if ((endNode < firstNode[rank]) || (endNode >= firstNode[rank+1])) {
      ghostNode = endNode;
    }
    if (ghostNode >= 0) {
      /* Get the domain of the node */
      for(proc = 0; proc < numProcs; proc++) {
        if ((ghostNode >= firstNode[proc]) && (ghostNode < firstNode[proc+1])) break;
      }
      /* Check for the presence of this ghost node */
      if (PartitionGhostNodeIndex_Private(p, ghostNode, &gNode) && ((nodeMap[ghostNode] < 0) || (proc < nodeMap[ghostNode]))) {
        /* We must add this node as a ghost node */
        nodeMap[ghostNode] = proc;
      }
    }
  }

  /* Compute new ghost node offsets */
  for(node = 0; node < numNodes; node++) {
    if (nodeMap[node] >= 0) {
      newProcNodes[nodeMap[node]]++;
    }
  }
  for(proc = 0, numNewNodes = 0; proc < numProcs; proc++) {
    numNewNodes   += newProcNodes[proc];
    offsets[proc+1] = offsets[proc] + newProcNodes[proc];
  }

  /* Get ghost nodes */
  if (numNewNodes > 0) {
    ierr = PetscMalloc(numNewNodes * sizeof(int), &newNodes);                                             CHKERRQ(ierr);
    for(node = 0; node < numNodes; node++) {
      if (nodeMap[node] >= 0) {
        newNodes[offsets[nodeMap[node]]++] = node;
      }
    }
  }
  for(proc = 1; proc < numProcs-1; proc++) {
    if (offsets[proc] - offsets[proc-1]  != newProcNodes[proc]) {
      SETERRQ3(PETSC_ERR_PLIB, "Invalid number of ghost nodes sent %d to proc %d should be %d",
               offsets[proc] - offsets[proc-1], proc, newProcNodes[proc]);
    }
  }
  if (offsets[0] != newProcNodes[0]) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of ghost nodes sent %d to proc 0 should be %d", offsets[0], newProcNodes[0]);
  }

  /* Add new ghosts */
  ierr = PartitionGetNewGhostNodes_Serial(p, newProcNodes, newNodes);                                    CHKERRQ(ierr);

  /* Cleanup */
  ierr = PetscFree(nodeMap);                                                                             CHKERRQ(ierr);
  ierr = PetscFree(newProcNodes);                                                                        CHKERRQ(ierr);
  ierr = PetscFree(offsets);                                                                             CHKERRQ(ierr);
  if (numNewNodes) {
    ierr = PetscFree(newNodes);                                                                          CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionElements_Private"
int PartitionElements_Private(Partition p)
{
  int (*f)(Partition, int, int **);
  int  *elementMap; /* The map from elements to domains */
  int   ierr;

  PetscFunctionBegin;
  /* Create a new map of elements to domains */
  ierr = PetscObjectQueryFunction((PetscObject) p, "PartitionTriangular2D_CreateElementMap", (void (**)(void)) &f); CHKERRQ(ierr);
  ierr = (*f)(p, p->numLocElements, &elementMap);                                                        CHKERRQ(ierr);

#if 0
  /* Communicate interior elements */
  ierr = GridInteriorExchange(numLocElements, elementMap, p->firstElement);                              CHKERRQ(ierr);
#endif
  /* Create the element partition */
  ierr = PartitionCreateElementPartition_Private(p, elementMap, &p->ordering);                           CHKERRQ(ierr);
  ierr = PetscFree(elementMap);                                                                          CHKERRQ(ierr);

  /* Permute arrays implicitly indexed by element number */
  ierr = PartitionPermuteElements_Private(p);                                                            CHKERRQ(ierr);

  /* Globally renumber the elements to make canonical numbers sequential in each domain */
  ierr = PartitionRenumberElements_Private(p);                                                           CHKERRQ(ierr);

  /* Calculate ghosts */
  ierr = PartitionCalcGhostElements_Private(p);                                                          CHKERRQ(ierr);
  
  /* Check for new ghost nodes created by the element partition */
  ierr = PartitionGetNewGhostNodes_Element(p);                                                           CHKERRQ(ierr);

  p->isElementPartitioned = PETSC_TRUE;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateNodeMap_Simple_Seq"
/*
  PartitionCreateNodeMap_Simple_Seq - This function creates a map from nodes to domains,
  using a reordering of the serial mesh to reduce the bandwidth.

  Input Parameters:
+ p        - The Partition
- numNodes - The global number of nodes

  Output Parameter:
. nodeMap  - The map from nodes to domains

.seealso: PartitionNodes_Private()
*/
int PartitionCreateNodeMap_Simple_Seq(Partition p, int numNodes, int **nodeMap)
{
  Partition_Triangular_2D *q         = (Partition_Triangular_2D *) p->data;
  int                     *firstNode = q->firstNode;
  int                      rank      = p->rank;
  int                     *nodeProcs; /* The processor which each node will lie on */
  int                      node;
  int                      ierr;

  PetscFunctionBegin;
  /* Use the existing interior nodes */
  ierr = PetscMalloc(numNodes * sizeof(int), &nodeProcs);                                                 CHKERRQ(ierr);
  for(node = 0; node < numNodes; node++) {
    nodeProcs[node] = -1;
  }
  for(node = firstNode[rank]; node < firstNode[rank+1]; node++) {
    nodeProcs[node] = rank;
  }

  *nodeMap = nodeProcs;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateNodeMap_ElementBased"
/*
  PartitionCreateNodeMap_ElementBased - This function creates a map from nodes to domains,
  based upon a prior partition of the elements.

  Input Parameters:
+ p        - The Partition
- numNodes - The global number of nodes

  Output Parameter:
. nodeMap  - The map from nodes to domains

.seealso: PartitionNodes_Private()
*/
int PartitionCreateNodeMap_ElementBased(Partition p, int numNodes, int **nodeMap)
{
  Mesh             mesh               = p->mesh;
  Mesh_Triangular *tri                = (Mesh_Triangular *) mesh->data;
  int              numLocElements     = p->numLocElements;
  int              numGhostElements   = p->numOverlapElements - p->numLocElements;
  int              numCorners         = mesh->numCorners;
  int             *elements           = tri->faces;
  int             *firstElement       = p->firstElement;
  int             *ghostElements      = p->ghostElements;
  int             *ghostElementProcs  = p->ghostElementProcs;
  int              rank               = p->rank;
  int             *nodeProcs;     /* The processor which each node will lie on */
  int             *support;
  int              nProc, elem, sElem, nElem, nLocElem, gElem, corner, nCorner, node, degree;
  int              ierr;

  PetscFunctionBegin;
  /* Count nodes on this partition -- keep node if you are the lower numbered domain */
  ierr = PetscMalloc(numNodes * sizeof(int), &nodeProcs);                                                 CHKERRQ(ierr);
  for(node = 0; node < numNodes; node++) {
    nodeProcs[node] = -1;
  }

  for(elem = firstElement[rank]; elem < firstElement[rank+1]; elem++) {
    for(corner = 0; corner < numCorners; corner++) {
      node = elements[elem*numCorners+corner];

      /* Check the support of the node */
      ierr = MeshGetNodeSupport(mesh, node, elem, &degree, &support);                                     CHKERRQ(ierr);
      for(sElem = 0; sElem < degree; sElem++) {
        nElem = support[sElem];
        /* See if neighbor is in another domain */
        if ((nElem < firstElement[rank]) || (nElem >= firstElement[rank+1])) {
          /* Check to see if node is contained in the neighboring element */
          for(nCorner = 0; nCorner < numCorners; nCorner++) {
            if (elements[nElem*numCorners+nCorner] == node) {
              ierr  = PartitionGlobalToLocalElementIndex(p, nElem, &nLocElem);                            CHKERRQ(ierr);
              nProc = ghostElementProcs[nLocElem-numLocElements];
              /* Give the node to the lowest numbered domain */
              if ((nProc < rank) && ((nodeProcs[node] < 0) || (nProc < nodeProcs[node]))) {
                nodeProcs[node] = nProc;
              }
              break;
            }
          }
        }
      }
      ierr = MeshRestoreNodeSupport(mesh, node, elem, &degree, &support);                                 CHKERRQ(ierr);

      /* If no one else claims it, take the node */
      if (nodeProcs[node] < 0) {
        nodeProcs[node] = rank;
      }
    }
  }

  /* Now assign the ghost nodes from ghost elements (which we can never own) */
  for(gElem = 0; gElem < numGhostElements; gElem++) {
    for(corner = 0; corner < numCorners; corner++) {
      node = elements[ghostElements[gElem]*numCorners+corner];
      if (nodeProcs[node] < 0)
        nodeProcs[node] = ghostElementProcs[gElem];
    }
  }

  *nodeMap = nodeProcs;
  PetscFunctionReturn(0);
}

/*
  PartitionCreateNodePartition_Private - This function uses the node map to create
  partition structures for node-based data.

  Input Parameters:
+ p               - The Partition
- nodeMap         - The map from nodes to domains

  Output Parameter:
. ordering        - The new node ordering

  Output Parameters in Partition_Triangular_2D:
+ numLocNodes     - The number of local nodes
. numOverlapNodes - The number of local + ghost nodes
. firstNode       - The first node in each domain
- ghostNodes      - The global number for each ghost node

.seealso: PartitionNodes_Private()
*/
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateNodePartition_Private"
int PartitionCreateNodePartition_Private(Partition p, int *nodeMap, AO *ordering)
{
  Partition_Triangular_2D *q        = (Partition_Triangular_2D *) p->data;
  int                      numNodes = q->numNodes;
  int                      numProcs = p->numProcs;
  int                      rank     = p->rank;
  int                      numGhostNodes; /* Number of ghost nodes for this domain */
  int                     *AppOrdering, *PetscOrdering;
  int                      proc, node, index, index2;
  int                      ierr;

  PetscFunctionBegin;
  /* Determine local and ghost sizes */
  for(node = 0, q->numLocNodes = 0, numGhostNodes = 0; node < numNodes; node++) {
    if (nodeMap[node] == rank) {
      q->numLocNodes++;
    } else if (nodeMap[node] >= 0) {
      numGhostNodes++;
    }
  }

  /* Recompute size and offset of each domain */
  ierr = MPI_Allgather(&q->numLocNodes, 1, MPI_INT, &q->firstNode[1], 1, MPI_INT, p->comm);              CHKERRQ(ierr);
  for(proc = 1, q->firstNode[0] = 0; proc <= numProcs; proc++) {
    q->firstNode[proc] = q->firstNode[proc] + q->firstNode[proc-1];
  }
  if (q->firstNode[numProcs] != numNodes) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of nodes %d should be %d", q->firstNode[numProcs], numNodes);
  }

  /* Setup ghost node structures */
  q->numOverlapNodes = q->numLocNodes + numGhostNodes;
  if (numGhostNodes > 0) {
    ierr = PetscMalloc(numGhostNodes * sizeof(int), &q->ghostNodes);                                      CHKERRQ(ierr);
  }

  /* Get indices for reordering */
  ierr = PetscMalloc(q->numLocNodes * sizeof(int), &AppOrdering);                                         CHKERRQ(ierr);
  ierr = PetscMalloc(q->numLocNodes * sizeof(int), &PetscOrdering);                                       CHKERRQ(ierr);
  for(node = 0; node < q->numLocNodes; node++) {
    /* This would be the time to do RCM on the local graph by reordering PetscOrdering[] */
    PetscOrdering[node] = q->firstNode[rank] + node;
  }
  for(node = 0, index = 0, index2 = 0; node < numNodes; node++) {
    if (nodeMap[node] == rank) {
      AppOrdering[index++]    = node;
    } else if (nodeMap[node] >= 0) {
      q->ghostNodes[index2++] = node;
    }
  }
  if (index  != q->numLocNodes) SETERRQ(PETSC_ERR_PLIB, "Invalid node renumbering");
  if (index2 != numGhostNodes)  SETERRQ(PETSC_ERR_PLIB, "Invalid ghost node renumbering");

  /* Create the global node reordering */
  ierr = AOCreateBasic(p->comm, q->numLocNodes, AppOrdering, PetscOrdering, ordering);
  if (ierr) {
    ierr = PartitionDebugAO_Private(p, nodeMap);                                                         CHKERRQ(ierr);
  }

  ierr = PetscFree(AppOrdering);                                                                         CHKERRQ(ierr);
  ierr = PetscFree(PetscOrdering);                                                                       CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionPermuteNodes_Private"
/*
  PartitionPermuteNodes_Private - This function permutes the data which is implicitly
  indexed by node number

  Input Parameter:
. p       - The Partition

  Output Parameter in Mesh_Triangular:
+ nodes   - The coordinates on each node
. markers - The node markers
- degrees - The degree of each node

.seealso: PartitionNodes_Private()
*/
int PartitionPermuteNodes_Private(Partition p)
{
  Partition_Triangular_2D *q    = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh = p->mesh;
  Mesh_Triangular         *tri  = (Mesh_Triangular *) mesh->data;
  int                      ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetscPermuteReal(q->nodeOrdering, mesh->dim, tri->nodes);                        CHKERRQ(ierr);
  ierr = AOApplicationToPetscPermuteInt(q->nodeOrdering,    1,         tri->markers);                    CHKERRQ(ierr);
  ierr = AOApplicationToPetscPermuteInt(q->nodeOrdering,    1,         tri->degrees);                    CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionRenumberNodes_Private"
/*
  PartitionRenumberNodes_Private - This function renumbers the node-based data globally in
  order to make the canonical numbers sequential in each domain.

  Input Parameter:
. p          - The Partition

  Output Parameters in Mesh_Triangular:
+ faces      - The nodes in each element
. edges      - The nodes on each edge
- bdNodes    - The nodes on each boundary

  Output Parameter in Partition_Triangular_2D:
. ghostNodes - The global number of each ghost node

.seealso: PartitionNodes_Private()
*/
int PartitionRenumberNodes_Private(Partition p)
{
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh          = p->mesh;
  Mesh_Triangular         *tri           = (Mesh_Triangular *) mesh->data;
  int                      numElements   = p->numElements;
  int                      numCorners    = mesh->numCorners;
  int                     *faces         = tri->faces;
  int                      numEdges      = q->numEdges;
  int                     *edges         = tri->edges;
  int                      numBdNodes    = q->numBdNodes;
  int                     *bdNodes       = tri->bdNodes;
  int                      numGhostNodes = q->numOverlapNodes - q->numLocNodes;
  int                     *ghostNodes    = q->ghostNodes;
  int                      ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetsc(q->nodeOrdering, numEdges*2,             edges);                           CHKERRQ(ierr);
  ierr = AOApplicationToPetsc(q->nodeOrdering, numElements*numCorners, faces);                           CHKERRQ(ierr);
  ierr = AOApplicationToPetsc(q->nodeOrdering, numBdNodes,             bdNodes);                         CHKERRQ(ierr);
  ierr = AOApplicationToPetsc(q->nodeOrdering, numGhostNodes,          ghostNodes);                      CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDistributeNodes_Private"
/*
  PartitionDistributeNodes_Private - This function distributes the node-based data, and
  permutes arrays which are implicitly indexed by node number.

  Input Parameter:
. p       - The Partition

  Output Parameters in Mesh_Triangular:
+ nodes   - The node coordinates
. markers - The node markers
- degrees - The node degrees

.seealso: PartitionNodes_Private()
*/
int PartitionDistributeNodes_Private(Partition p)
{
  Partition_Triangular_2D *q               = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh            = p->mesh;
  Mesh_Triangular         *tri             = (Mesh_Triangular *) mesh->data;
  int                      dim             = mesh->dim;
  int                      numLocNodes     = q->numLocNodes;
  int                      numOverlapNodes = q->numOverlapNodes;
  int                      numGhostNodes   = numOverlapNodes - numLocNodes;
  int                     *firstNode       = q->firstNode;
  int                     *ghostNodes      = q->ghostNodes;
  int                      rank            = p->rank;
  int                     *temp;
  double                  *temp2;
  int                      node, c;
  int                      ierr;

  PetscFunctionBegin;
  mesh->numNodes = numLocNodes;
  ierr = PetscMalloc(numOverlapNodes*dim * sizeof(double), &temp2);                                       CHKERRQ(ierr);
  /* Interior nodes */
  ierr = PetscMemcpy(temp2, &tri->nodes[firstNode[rank]*dim], numLocNodes*dim * sizeof(double));          CHKERRQ(ierr);
  /* Ghost nodes */
  for(node = 0; node < numGhostNodes; node++) {
    for(c = 0; c < dim; c++) {
      temp2[(numLocNodes+node)*dim+c] = tri->nodes[ghostNodes[node]*dim+c];
    }
  }
  ierr = PetscFree(tri->nodes);                                                                           CHKERRQ(ierr);
  tri->nodes = temp2;
  PetscLogObjectMemory(p, numGhostNodes*dim * sizeof(double));

  ierr = PetscMalloc(numOverlapNodes * sizeof(int), &temp);                                               CHKERRQ(ierr);
  /* Interior markers */
  ierr = PetscMemcpy(temp, &tri->markers[firstNode[rank]], numLocNodes * sizeof(int));                    CHKERRQ(ierr);
  /* Ghost markers */
  for(node = 0; node < numGhostNodes; node++) {
    temp[numLocNodes+node] = tri->markers[ghostNodes[node]];
  }
  ierr = PetscFree(tri->markers);                                                                         CHKERRQ(ierr);
  tri->markers = temp;
  PetscLogObjectMemory(p, numGhostNodes * sizeof(int));

  ierr = PetscMalloc(numOverlapNodes * sizeof(int), &temp);                                               CHKERRQ(ierr);
  /* Interior degrees */
  ierr = PetscMemcpy(temp, &tri->degrees[firstNode[rank]], numLocNodes * sizeof(int));                    CHKERRQ(ierr);
  /* Ghost degrees */
  for(node = 0; node < numGhostNodes; node++) {
    temp[numLocNodes+node] = tri->degrees[ghostNodes[node]];
  }
  ierr = PetscFree(tri->degrees);                                                                         CHKERRQ(ierr);
  tri->degrees = temp;
  PetscLogObjectMemory(p, numGhostNodes * sizeof(int));

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionNodeCalcGhostProcs_Private"
/*
  PartitionNodeCalcGhostProcs_Private - This function determines the processor from
  which each ghost node comes.

  Input Parameter:
. p              - The Partition

  Output Parameter in Partition_Triangular_2D:
. ghostNodeProcs - The domain of each ghost node

.seealso: PartitionNodes_Private()
*/
int PartitionNodeCalcGhostProcs_Private(Partition p)
{
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  int                      numGhostNodes = q->numOverlapNodes - q->numLocNodes;
  int                     *ghostNodes    = q->ghostNodes;
  int                     *firstNode     = q->firstNode;
  int                      numProcs      = p->numProcs;
  int                     *nodePerm;
  int                      proc, node;
  int                      ierr;

  PetscFunctionBegin;
  if (numGhostNodes == 0)
    PetscFunctionReturn(0);

  /* Resort ghost nodes after renumbering */
  ierr = PartitionSortGhosts_Private(p, &numGhostNodes, ghostNodes, &nodePerm);                           CHKERRQ(ierr);
  ierr = PetscFree(nodePerm);                                                                             CHKERRQ(ierr);
  q->numOverlapNodes = q->numLocNodes + numGhostNodes;

  /* calculate ghost node domains */
  ierr = PetscMalloc(numGhostNodes * sizeof(int), &q->ghostNodeProcs);                                    CHKERRQ(ierr);
  for(node = 0; node < numGhostNodes; node++) {
    for(proc = 0; proc < numProcs; proc++) {
      if ((ghostNodes[node] >= firstNode[proc]) && (ghostNodes[node] <  firstNode[proc+1])) {
        q->ghostNodeProcs[node] = proc;
        break;
      }
    }
    if (proc == numProcs) SETERRQ2(PETSC_ERR_PLIB, "Invalid ghost node %d, global number %d", node, ghostNodes[node]);
  }
#ifdef PETSC_USE_BOPT_g
  for(node = 0; node < numGhostNodes; node++) {
    if ((ghostNodes[node] < firstNode[q->ghostNodeProcs[node]]) || (ghostNodes[node] >= firstNode[q->ghostNodeProcs[node]+1]))
      SETERRQ2(PETSC_ERR_LIB, "Invalid source processor %d on ghost node %d", q->ghostNodeProcs[node], node);
  }
#endif
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionNodes_Private"
int PartitionNodes_Private(Partition p)
{
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;
  int                    (*f)(Partition, int, int **);
  int                     *nodeMap; /* The map from nodes to domains */
  int                      ierr;

  PetscFunctionBegin;
  /* Create a new map of nodes to domains */
  ierr = PetscObjectQueryFunction((PetscObject) p, "PartitionTriangular2D_CreateNodeMap", (void (**)(void)) &f); CHKERRQ(ierr);
  ierr = (*f)(p, q->numNodes, &nodeMap);                                                                  CHKERRQ(ierr);

  /* Create the node partition */
  ierr = PartitionCreateNodePartition_Private(p, nodeMap, &q->nodeOrdering);                              CHKERRQ(ierr);
  ierr = PetscFree(nodeMap);                                                                              CHKERRQ(ierr);

  /* Permute arrays implicitly indexed by node number */
  ierr = PartitionPermuteNodes_Private(p);                                                                CHKERRQ(ierr);

  /* Globally renumber the nodes to make canonical numbers sequential in each domain */
  /* WARNING: We must resort ghost nodes after renumbering, but this is done anyway in edge partitioning */
  ierr = PartitionRenumberNodes_Private(p);                                                               CHKERRQ(ierr);

  /* Assign ghost node source processors */
  ierr = PartitionNodeCalcGhostProcs_Private(p);                                                          CHKERRQ(ierr);

  q->isNodePartitioned = PETSC_TRUE;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateEdgeMap_NodeBased"
/*
  PartitionCreateEdgeMap_NodeBased - This function creates a map from edges to domains,
  using a previous partition of the nodes.

  Input Parameters:
+ p        - The Partition
- numEdges - The global number of edges

  Output Parameter:
. edgeMap  - The map from edges to domains

.seealso: PartitionEdges_Private()
*/
int PartitionCreateEdgeMap_NodeBased(Partition p, int numEdges, int **edgeMap)
{
  Partition_Triangular_2D *q         = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh      = p->mesh;
  Mesh_Triangular         *tri       = (Mesh_Triangular *) mesh->data;
  int                     *edges     = tri->edges;
  int                     *firstNode = q->firstNode;
  int                      numProcs  = p->numProcs;
  int                      rank      = p->rank;
  int                      startProc = -1;
  int                      endProc   = -1;
  int                     *edgeProcs;     /* The processor assigned to each edge */
  int                      proc, edge, startNode, endNode;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscMalloc(numEdges * sizeof(int), &edgeProcs);                                                 CHKERRQ(ierr);
  for(edge = 0; edge < numEdges; edge++) {
    edgeProcs[edge] = -1;
  }

  /* Count edges on this partition -- keep edge if you are the lower numbered domain */
  for(edge = 0; edge < numEdges; edge++) {
    startNode = edges[edge*2];
    endNode   = edges[edge*2+1];

    if ((startNode >= firstNode[rank]) && (startNode < firstNode[rank+1])) {
      /* startNode is local */
      if ((endNode >= firstNode[rank]) && (endNode < firstNode[rank+1])) {
        /* endNode is local */
        edgeProcs[edge] = rank;
      } else {
        /* endNode is not local */
        for(proc = 0; proc < numProcs; proc++) {
          if ((endNode >= firstNode[proc]) && (endNode < firstNode[proc+1])) {
            endProc = proc;
            break;
          }
        }
        if (rank < endProc) {
          edgeProcs[edge] = rank;
        }
      }
    } else {
      /* startNode is not local */
      if ((endNode >= firstNode[rank]) && (endNode < firstNode[rank+1])) {
        /* endNode is local */
        for(proc = 0; proc < numProcs; proc++) {
          if ((startNode >= firstNode[proc]) && (startNode < firstNode[proc+1])) {
            startProc = proc;
            break;
          }
        }
        if (rank < startProc) {
          edgeProcs[edge] = rank;
        }
      }
    }
  }

  *edgeMap = edgeProcs;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateEdgePartition_Private"
int PartitionCreateEdgePartition_Private(Partition p, int *edgeMap, AO *ordering)
{
  Mesh                     mesh          = p->mesh;
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  int                      numEdges      = mesh->numEdges;
  int                      numProcs      = p->numProcs;
  int                      rank          = p->rank;
  int                     *AppOrdering   = PETSC_NULL;
  int                     *PetscOrdering = PETSC_NULL;
  int                      proc, edge, index;
  int                      ierr;

  PetscFunctionBegin;
  /* Determine local edges and new ghost nodes */
  for(edge = 0, q->numLocEdges = 0; edge < numEdges; edge++) {
    if (edgeMap[edge] == rank) {
      q->numLocEdges++;
    }
  }

  /* Recompute size and offset of each domain */
  ierr = MPI_Allgather(&q->numLocEdges, 1, MPI_INT, &q->firstEdge[1], 1, MPI_INT, p->comm);               CHKERRQ(ierr);
  for(proc = 1, q->firstEdge[0] = 0; proc <= numProcs; proc++)
    q->firstEdge[proc] = q->firstEdge[proc] + q->firstEdge[proc-1];
  if (q->firstEdge[numProcs] != q->numEdges) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid global number of edges %d should be %d", q->firstEdge[numProcs], q->numEdges);
  }

  /* Get indices for reordering */
  if (q->numLocEdges > 0) {
    ierr = PetscMalloc(q->numLocEdges * sizeof(int), &AppOrdering);                                       CHKERRQ(ierr);
    ierr = PetscMalloc(q->numLocEdges * sizeof(int), &PetscOrdering);                                     CHKERRQ(ierr);
  }
  for(edge = 0; edge < q->numLocEdges; edge++) {
    PetscOrdering[edge] = q->firstEdge[rank] + edge;
  }
  for(edge = 0, index = 0; edge < numEdges; edge++) {
    if (edgeMap[edge] == rank) {
      AppOrdering[index++] = edge;
    }
  }
  if (index != q->numLocEdges) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of local edges %d should be %d", index, q->numLocEdges);
  }

  /* Create the global edge reordering */
  ierr = AOCreateBasic(p->comm, q->numLocEdges, AppOrdering, PetscOrdering, ordering);                    CHKERRQ(ierr);

  if (q->numLocEdges > 0) {
    ierr = PetscFree(AppOrdering);                                                                        CHKERRQ(ierr);
    ierr = PetscFree(PetscOrdering);                                                                      CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDistributeEdges_Private"
/*
  PartitionDistributeEdges_Private - This function distributes the edge-based data, and
  permutes arrays which are implicitly indexed by edge number.

  Input Parameter:
. p           - The Partition

  Output Parameters in Mesh_Triangular:
+ edges       - The nodes on each edge
- edgemarkers - The edge markers

.seealso: PartitionEdges_Private()
*/
int PartitionDistributeEdges_Private(Partition p) {
  Partition_Triangular_2D *q           = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh        = p->mesh;
  Mesh_Triangular         *tri         = (Mesh_Triangular *) mesh->data;
  int                      numLocEdges = q->numLocEdges;
  int                     *firstEdge   = q->firstEdge;
  int                      rank        = p->rank;
  int                     *temp        = PETSC_NULL;
  int                      ierr;

  PetscFunctionBegin;
  mesh->numEdges = numLocEdges;
  if (numLocEdges > 0) {
    ierr = PetscMalloc(numLocEdges*2 * sizeof(int), &temp);                                               CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, &tri->edges[firstEdge[rank]*2], numLocEdges*2 * sizeof(int));                CHKERRQ(ierr);
    ierr = PetscFree(tri->edges);                                                                         CHKERRQ(ierr);
  }
  tri->edges = temp;

  if (numLocEdges > 0) {
    ierr = PetscMalloc(numLocEdges * sizeof(int), &temp);                                                 CHKERRQ(ierr);
    ierr = PetscMemcpy(temp, &tri->edgemarkers[firstEdge[rank]], numLocEdges * sizeof(int));              CHKERRQ(ierr);
    ierr = PetscFree(tri->edgemarkers);                                                                   CHKERRQ(ierr);
  }
  tri->edgemarkers = temp;

  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionPermuteEdges_Private"
/*
  PartitionPermuteEdges_Private - This function permutes the data which is implicitly
  indexed by edge number

  Input Parameter:
. p           - The Partition

  Output Parameter in Mesh_Triangular:
+ edges       - The nodes on each edge
- edgemarkers - The edge markers

.seealso: PartitionEdges_Private()
*/
int PartitionPermuteEdges_Private(Partition p)
{
  Partition_Triangular_2D *q   = (Partition_Triangular_2D *) p->data;
  Mesh_Triangular         *tri = (Mesh_Triangular *) p->mesh->data;
  int                      ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetscPermuteInt(q->edgeOrdering, 2, tri->edges);                                 CHKERRQ(ierr);
  ierr = AOApplicationToPetscPermuteInt(q->edgeOrdering, 1, tri->edgemarkers);                           CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionRenumberEdges_Private"
/*
  PartitionRenumberEdges_Private - This function renumbers the edge-based data globally in
  order to make the canonical numbers sequential in each domain.

  Input Parameter:
. p       - The Partition

  Output Parameter in Mesh_Triangular:
. bdEdges - The edges on each boundary

.seealso: PartitionEdges_Private()
*/
int PartitionRenumberEdges_Private(Partition p)
{
  Partition_Triangular_2D *q          = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh       = p->mesh;
  Mesh_Triangular         *tri        = (Mesh_Triangular *) mesh->data;
  int                      numBdEdges = mesh->numBdEdges;
  int                     *bdEdges    = tri->bdEdges;
  int                      ierr;

  PetscFunctionBegin;
  ierr = AOApplicationToPetsc(q->edgeOrdering, numBdEdges, bdEdges);                                     CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionEdgeGlobalToLocal_Private"
int PartitionEdgeGlobalToLocal_Private(Partition p)
{
  Partition_Triangular_2D *q           = (Partition_Triangular_2D *) p->data;
  Mesh_Triangular         *tri         = (Mesh_Triangular *) p->mesh->data;
  int                      numLocEdges = q->numLocEdges;
  int                     *edges       = tri->edges;
  int                      node;
  int                      ierr;

  PetscFunctionBegin;
  for(node = 0; node < numLocEdges*2; node++) {
    ierr = PartitionGlobalToLocalNodeIndex(p, edges[node], &edges[node]);                                CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionEdges_Private"
int PartitionEdges_Private(Partition p)
{
  Partition_Triangular_2D *q = (Partition_Triangular_2D *) p->data;
  int                    (*f)(Partition, int, int **);
  int                     *edgeMap; /* The map from edges to domains */
  int                      ierr;

  PetscFunctionBegin;
  /* Create a new map of nodes to domains */
  ierr = PetscObjectQueryFunction((PetscObject) p, "PartitionTriangular2D_CreateEdgeMap", (void (**)(void)) &f); CHKERRQ(ierr);
  ierr = (*f)(p, q->numEdges, &edgeMap);                                                                  CHKERRQ(ierr);

  /* Create the edge partition */
  ierr = PartitionCreateEdgePartition_Private(p, edgeMap, &q->edgeOrdering);                              CHKERRQ(ierr);
  ierr = PetscFree(edgeMap);                                                                              CHKERRQ(ierr);

  /* Permute arrays implicitly indexed by edge number */
  ierr = PartitionPermuteEdges_Private(p);                                                                CHKERRQ(ierr);

  /* Globally renumber the edges to make canonical numbers sequential in each domain */
  ierr = PartitionRenumberEdges_Private(p);                                                               CHKERRQ(ierr);

  /* Check for new ghost nodes created by the element partition */
  ierr = PartitionGetNewGhostNodes_Edge(p);                                                               CHKERRQ(ierr);

  q->isEdgePartitioned = PETSC_TRUE;
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionBoundaryNodes_Private"
int PartitionBoundaryNodes_Private(Partition p)
{
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh          = p->mesh;
  Mesh_Triangular         *tri           = (Mesh_Triangular *) mesh->data;
  int                      numBdNodes    = mesh->numBdNodes;
  int                     *bdNodes       = tri->bdNodes;
  int                     *firstNode     = q->firstNode;
  int                      numProcs      = p->numProcs;
  int                      rank          = p->rank;
  int                      proc, node;
  int                      ierr;

  PetscFunctionBegin;
  q->numLocBdNodes = 0;
  for(node = 0; node < numBdNodes; node++) {
    if ((bdNodes[node] >= firstNode[rank]) && (bdNodes[node] < firstNode[rank+1]))
      q->numLocBdNodes++;
  }
  ierr = MPI_Allgather(&q->numLocBdNodes, 1, MPI_INT, &q->firstBdNode[1], 1, MPI_INT, p->comm);          CHKERRQ(ierr);
  q->firstBdNode[0] = 0;
  for(proc = 1; proc <= numProcs; proc++) {
    q->firstBdNode[proc] = q->firstBdNode[proc] + q->firstBdNode[proc-1];
  }
  if (q->firstBdNode[numProcs] != q->numBdNodes) {
    SETERRQ2(PETSC_ERR_PLIB, "Invalid number of boundary nodes %d should be %d", q->firstBdNode[numProcs], q->numBdNodes);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDistributeBdNodes_Private"
/*
  PartitionDistributeBdNodes_Private - This function distributes the edge-based data, and
  permutes arrays which are implicitly indexed by edge number.

  Input Parameter:
. p           - The Partition

  Output Parameters in Mesh_Triangular:
+ edges       - The nodes on each edge
- edgemarkers - The edge markers

.seealso: PartitionBdNodes_Private()
*/
int PartitionDistributeBdNodes_Private(Partition p)
{
  Partition_Triangular_2D *q             = (Partition_Triangular_2D *) p->data;
  Mesh_Triangular         *tri           = (Mesh_Triangular *) p->mesh->data;
  int                      numLocNodes   = q->numLocNodes;
  int                      numGhostNodes = q->numOverlapNodes - q->numLocNodes;
  int                     *markers       = tri->markers;
  int                      numLocBdNodes = q->numLocBdNodes;
  int                      node, bdNode;
  int                      ierr;

  PetscFunctionBegin;
  /* Process ghost boundary nodes */
  q->numOverlapBdNodes = numLocBdNodes;
  for(node = 0; node < numGhostNodes; node++) {
    if (markers[numLocNodes+node] != 0)
      q->numOverlapBdNodes++;
  }
  if (q->numOverlapBdNodes > numLocBdNodes) {
    ierr = PetscMalloc((q->numOverlapBdNodes - numLocBdNodes) * sizeof(int), &q->ghostBdNodes);           CHKERRQ(ierr);
    for(node = 0, bdNode = 0; node < numGhostNodes; node++) {
      if (markers[numLocNodes+node] != 0)
        q->ghostBdNodes[bdNode++] = node;
    }
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionDistribute_Private"
int PartitionDistribute_Private(Partition p)
{
  int ierr;

  PetscFunctionBegin;
  /* Redistribute the elements and arrays implicitly numbered by element numbers */
  ierr = PartitionDistributeElements_Private(p);                                                         CHKERRQ(ierr);

  /* Redistribute the nodes and permute arrays implicitly numbered by node numbers */
  ierr = PartitionDistributeNodes_Private(p);                                                            CHKERRQ(ierr);

  /* Redistribute the edges and permute arrays implicitly numbered by edge numbers */
  ierr = PartitionDistributeEdges_Private(p);                                                            CHKERRQ(ierr);

  /* Store ghost boundary nodes */
  ierr = PartitionDistributeBdNodes_Private(p);                                                          CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionGlobalToLocal_Private"
int PartitionGlobalToLocal_Private(Partition p)
{
  Partition_Triangular_2D *q                  = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh               = p->mesh;
  Mesh_Triangular         *tri                = (Mesh_Triangular *) mesh->data;
  int                      numOverlapElements = p->numOverlapElements;
  int                      numCorners         = mesh->numCorners;
  int                     *faces              = tri->faces;
  int                     *neighbors          = tri->neighbors;
  int                      numLocEdges        = q->numLocEdges;
  int                     *edges              = tri->edges;
  int                      corner, neighbor, node;
  int                      ierr;

  PetscFunctionBegin;
  for(corner = 0; corner < numOverlapElements*numCorners; corner++) {
    ierr = PartitionGlobalToLocalNodeIndex(p, faces[corner], &faces[corner]);                            CHKERRQ(ierr);
  }
  /* We indicate neighbors which are not interior or ghost by -2 since boundaries are -1 */
  for(neighbor = 0; neighbor < numOverlapElements*3; neighbor++) {
    ierr = PartitionGlobalToLocalElementIndex(p, neighbors[neighbor], &neighbors[neighbor]);             CHKERRQ(ierr);
  }
  for(node = 0; node < numLocEdges*2; node++) {
    ierr = PartitionGlobalToLocalNodeIndex(p, edges[node], &edges[node]);                                CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#undef  __FUNCT__
#define __FUNCT__ "PartitionCreateTriangular2D"
/*@
  PartitionCreateTriangular2D - Creates a partition of a two dimensional unstructured grid.

  Collective on Mesh

  Input Parameters:
. mesh      - The mesh to be partitioned

  Output Paramter:
. partition - The partition

  Level: beginner

.keywords unstructured mesh, partition
.seealso MeshCreateTriangular2D
@*/
int PartitionCreateTriangular2D(Mesh mesh, Partition *part)
{
  int        numProcs;
  PetscTruth opt;
  int        ierr;

  PetscFunctionBegin;
  ierr = MPI_Comm_size(mesh->comm, &numProcs);                                                            CHKERRQ(ierr);
  ierr = PartitionCreate(mesh, part);                                                                     CHKERRQ(ierr);
  if (numProcs == 1) {
    ierr = PetscObjectComposeFunction((PetscObject) *part, "PartitionTriangular2D_Create_C", "PartitionCreate_Uni",
                                      (void (*)(void)) PartitionCreate_Uni);
    CHKERRQ(ierr);
  } else {
    ierr = PetscObjectComposeFunction((PetscObject) *part, "PartitionTriangular2D_Create_C", "PartitionCreate_ElementBased",
                                      (void (*)(void)) PartitionCreate_ElementBased);
    CHKERRQ(ierr);
    ierr = PetscOptionsHasName(mesh->prefix, "-part_node_based", &opt);                                   CHKERRQ(ierr);
    if (opt == PETSC_TRUE) {
      ierr = PetscObjectComposeFunction((PetscObject) *part, "PartitionTriangular2D_Create_C", "PartitionCreate_NodeBased",
                                        (void (*)(void)) PartitionCreate_NodeBased);
      CHKERRQ(ierr);
    }
  }
  ierr = PartitionSetType(*part, PARTITION_TRIANGULAR_2D);                                                CHKERRQ(ierr);

  ierr = PartitionViewFromOptions_Private(*part, "Partition");                                            CHKERRQ(ierr);
  PetscFunctionReturn(0);
}

EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "PartitionSerialize_Triangular_2D"
int PartitionSerialize_Triangular_2D(Mesh mesh, Partition *part, PetscViewer viewer, PetscTruth store)
{
  Partition                p;
  Partition_Triangular_2D *q;
  int                      fd;
  int                      numGhostElements, numGhostNodes, numGhostBdNodes, hasOrdering;
  int                      numProcs, rank;
  int                      one  = 1;
  int                      zero = 0;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscViewerBinaryGetDescriptor(viewer, &fd);                                                     CHKERRQ(ierr);
  if (store == PETSC_TRUE) {
    p = *part;
    numProcs = p->numProcs;
    numGhostElements = p->numOverlapElements - p->numLocElements;
    ierr = PetscBinaryWrite(fd, &p->numProcs,           1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &p->rank,               1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &p->numLocElements,     1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &p->numElements,        1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &p->numOverlapElements, 1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  p->firstElement,       numProcs+1,       PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  p->ghostElements,      numGhostElements, PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  p->ghostElementProcs,  numGhostElements, PETSC_INT, 0);                  CHKERRQ(ierr);
    if (p->ordering != PETSC_NULL) {
      ierr = PetscBinaryWrite(fd, &one,                 1,                PETSC_INT, 0);                  CHKERRQ(ierr);
      ierr = AOSerialize(p->comm, &p->ordering, viewer, store);                                           CHKERRQ(ierr);
    } else {
      ierr = PetscBinaryWrite(fd, &zero,                1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    }

    q    = (Partition_Triangular_2D *) (*part)->data;
    numGhostNodes   = q->numOverlapNodes   - q->numLocNodes;
    numGhostBdNodes = q->numOverlapBdNodes - q->numLocBdNodes;
    ierr = PetscBinaryWrite(fd, &q->numLocNodes,        1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numNodes,           1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numOverlapNodes,    1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->firstNode,          numProcs+1,       PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->ghostNodes,         numGhostNodes,    PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->ghostNodeProcs,     numGhostNodes,    PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numLocEdges,        1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numEdges,           1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->firstEdge,          numProcs+1,       PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numLocBdNodes,      1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numBdNodes,         1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd, &q->numOverlapBdNodes,  1,                PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->firstBdNode,        numProcs+1,       PETSC_INT, 0);                  CHKERRQ(ierr);
    ierr = PetscBinaryWrite(fd,  q->ghostBdNodes,       numGhostBdNodes,  PETSC_INT, 0);                  CHKERRQ(ierr);
  } else {
    /* Create the partition context */
    ierr = PartitionCreate(mesh, &p);                                                                     CHKERRQ(ierr);
    ierr = PetscNew(Partition_Triangular_2D, &q);                                                         CHKERRQ(ierr);
    PetscLogObjectMemory(p, sizeof(Partition_Triangular_2D));
    ierr = PetscMemcpy(p->ops, &POps, sizeof(struct _PartitionOps));                                      CHKERRQ(ierr);
    ierr = PetscStrallocpy(PARTITION_TRIANGULAR_2D, &p->type_name);                                       CHKERRQ(ierr);
    p->data = (void *) q;

    ierr = MPI_Comm_size(p->comm, &numProcs);                                                             CHKERRQ(ierr);
    ierr = MPI_Comm_rank(p->comm, &rank);                                                                 CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &p->numProcs,           1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &p->rank,               1,                PETSC_INT);                      CHKERRQ(ierr);
    if (p->numProcs != numProcs) {
      SETERRQ2(PETSC_ERR_FILE_UNEXPECTED, "Invalid number of processors %d should be %d", numProcs, p->numProcs);
    }
    if (p->rank != rank) {
      SETERRQ2(PETSC_ERR_FILE_UNEXPECTED, "Invalid processor rank %d should be %d", rank, p->rank);
    }
    ierr = PetscBinaryRead(fd, &p->numLocElements,     1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &p->numElements,        1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &p->numOverlapElements, 1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &p->firstElement);                                     CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  p->firstElement,       numProcs+1,       PETSC_INT);                      CHKERRQ(ierr);
    numGhostElements = p->numOverlapElements - p->numLocElements;
    if (numGhostElements > 0) {
      ierr = PetscMalloc(numGhostElements * sizeof(int), &p->ghostElements);                              CHKERRQ(ierr);
      ierr = PetscMalloc(numGhostElements * sizeof(int), &p->ghostElementProcs);                          CHKERRQ(ierr);
    }
    ierr = PetscBinaryRead(fd,  p->ghostElements,      numGhostElements, PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  p->ghostElementProcs,  numGhostElements, PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &hasOrdering,           1,                PETSC_INT);                      CHKERRQ(ierr);
    if (hasOrdering) {
      ierr = AOSerialize(p->comm, &p->ordering, viewer, store);                                           CHKERRQ(ierr);
    }

    q->ghostNodes        = PETSC_NULL;
    q->ghostNodeProcs    = PETSC_NULL;
    q->ghostBdNodes      = PETSC_NULL;
    q->nodeOrdering      = PETSC_NULL;
    q->edgeOrdering      = PETSC_NULL;

    ierr = PetscBinaryRead(fd, &q->numLocNodes,        1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numNodes,           1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numOverlapNodes,    1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstNode);                                        CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  q->firstNode,          numProcs+1,       PETSC_INT);                      CHKERRQ(ierr);
    numGhostNodes   = q->numOverlapNodes - q->numLocNodes;
    if (numGhostNodes > 0) {
      ierr = PetscMalloc(numGhostNodes * sizeof(int), &q->ghostNodes);                                    CHKERRQ(ierr);
      ierr = PetscMalloc(numGhostNodes * sizeof(int), &q->ghostNodeProcs);                                CHKERRQ(ierr);
    }
    ierr = PetscBinaryRead(fd,  q->ghostNodes,         numGhostNodes,    PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  q->ghostNodeProcs,     numGhostNodes,    PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numLocEdges,        1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numEdges,           1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstEdge);                                        CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  q->firstEdge,          numProcs+1,       PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numLocBdNodes,      1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numBdNodes,         1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd, &q->numOverlapBdNodes,  1,                PETSC_INT);                      CHKERRQ(ierr);
    ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstBdNode);                                      CHKERRQ(ierr);
    ierr = PetscBinaryRead(fd,  q->firstBdNode,        numProcs+1,       PETSC_INT);                      CHKERRQ(ierr);
    numGhostBdNodes = q->numOverlapBdNodes - q->numLocBdNodes;
    if (numGhostBdNodes) {
      ierr = PetscMalloc(numGhostBdNodes * sizeof(int), &q->ghostBdNodes);                                CHKERRQ(ierr);
    }
    ierr = PetscBinaryRead(fd,  q->ghostBdNodes,       numGhostBdNodes,  PETSC_INT);                      CHKERRQ(ierr);
    PetscLogObjectMemory(p, ((numProcs+1)*4 + numGhostElements*2 + numGhostNodes*2 + numGhostBdNodes)* sizeof(int));
    *part = p;
  }
  if (p->numProcs > 1) {
    ierr = AOSerialize(p->comm, &q->nodeOrdering, viewer, store);                                         CHKERRQ(ierr);
    ierr = AOSerialize(p->comm, &q->edgeOrdering, viewer, store);                                         CHKERRQ(ierr);
  }

  PetscFunctionReturn(0);
}
EXTERN_C_END

EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreate_ElementBased"
int PartitionCreate_ElementBased(Partition p)
{
#ifdef PETSC_USE_BOPT_g
  int cut; /* The number of edges of the dual crossing the partition */
#endif
  int ierr;

  PetscFunctionBegin;

  /* Partition elements */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateElementMap",
                                    "PartitionCreateElementMap_METIS", (void (*)(void)) PartitionCreateElementMap_METIS);
  CHKERRQ(ierr);
  ierr = PartitionElements_Private(p);                                                                   CHKERRQ(ierr);

  /* Partition the nodes */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateNodeMap",
                                    "PartitionCreateNodeMap_ElementBased", (void (*)(void)) PartitionCreateNodeMap_ElementBased);
  CHKERRQ(ierr);
  ierr = PartitionNodes_Private(p);                                                                      CHKERRQ(ierr);

  /* Partition the edges -- Changes the ghost nodes */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateEdgeMap",
                                    "PartitionCreateEdgeMap_NodeBased", (void (*)(void)) PartitionCreateEdgeMap_NodeBased);
  CHKERRQ(ierr);
  ierr = PartitionEdges_Private(p);                                                                      CHKERRQ(ierr);

  /* Partition boundary nodes */
  ierr = PartitionBoundaryNodes_Private(p);                                                              CHKERRQ(ierr);

  /* Redistribute structures and arrays implicitly numbered by canonical numbers */
  ierr = PartitionDistribute_Private(p);                                                                 CHKERRQ(ierr);

  /* Change to local node numbers */
  ierr = PartitionGlobalToLocal_Private(p);                                                              CHKERRQ(ierr);

#ifdef PETSC_USE_BOPT_g
  /* Compute the size of the cut */
  ierr = PartitionCalcCut_Private(p, &cut);                                                              CHKERRQ(ierr);
  PetscLogInfo(p, "Size of cut: %d\n", cut);
#endif

  PetscFunctionReturn(0);
}
EXTERN_C_END

EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreate_Uni"
int PartitionCreate_Uni(Partition p)
{
  Partition_Triangular_2D *q    = (Partition_Triangular_2D *) p->data;
  Mesh                     mesh = p->mesh;
  Mesh_Triangular         *tri  = (Mesh_Triangular *) mesh->data;
  PetscTruth               opt;
  int                      ierr;

  PetscFunctionBegin;
  ierr = PetscOptionsHasName(p->prefix, "-part_mesh_reorder", &opt);                                      CHKERRQ(ierr);
  if (opt == PETSC_TRUE) {
    ierr = MeshGetOrdering(mesh, MESH_ORDER_TRIANGULAR_2D_RCM, &q->nodeOrdering);                         CHKERRQ(ierr);
    PetscLogObjectParent(p, q->nodeOrdering);

    /* Permute arrays implicitly numbered by node numbers */
    ierr = AOApplicationToPetscPermuteReal(q->nodeOrdering, 2, tri->nodes);                               CHKERRQ(ierr);
    ierr = AOApplicationToPetscPermuteInt(q->nodeOrdering, 1, tri->markers);                              CHKERRQ(ierr);
    ierr = AOApplicationToPetscPermuteInt(q->nodeOrdering, 1, tri->degrees);                              CHKERRQ(ierr);
    /* Renumber arrays dependent on the canonical node numbering */
    ierr = AOApplicationToPetsc(q->nodeOrdering, mesh->numEdges*2,                       tri->edges);      CHKERRQ(ierr);
    ierr = AOApplicationToPetsc(q->nodeOrdering, p->numOverlapElements*mesh->numCorners, tri->faces);      CHKERRQ(ierr);
    ierr = AOApplicationToPetsc(q->nodeOrdering, mesh->numBdNodes,                       tri->bdNodes);    CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}
EXTERN_C_END

EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreate_NodeBased"
int PartitionCreate_NodeBased(Partition p) {
#ifdef PETSC_USE_BOPT_g
  int cut; /* The number of edges of the dual crossing the partition */
#endif
  int ierr;

  PetscFunctionBegin;
  /* Partition Nodes */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateNodeMap",
                                    "PartitionCreateNodeMap_Simple_Seq", (void (*)(void)) PartitionCreateNodeMap_Simple_Seq);
  CHKERRQ(ierr);
  ierr = PartitionNodes_Private(p);                                                                      CHKERRQ(ierr);

  /* Partition elements */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateElementMap",
                                    "PartitionCreateElementMap_NodeBased", (void (*)(void)) PartitionCreateElementMap_NodeBased);
  CHKERRQ(ierr);
  ierr = PartitionElements_Private(p);                                                                   CHKERRQ(ierr);

  /* Partition edges */
  ierr = PetscObjectComposeFunction((PetscObject) p, "PartitionTriangular2D_CreateEdgeMap",
                                    "PartitionCreateEdgeMap_NodeBased", (void (*)(void)) PartitionCreateEdgeMap_NodeBased);
  CHKERRQ(ierr);
  ierr = PartitionEdges_Private(p);                                                                      CHKERRQ(ierr);

  /* Partition boundary nodes */
  ierr = PartitionBoundaryNodes_Private(p);                                                              CHKERRQ(ierr);

  /* Redistribute structures and arrays implicitly numbered by canonical numbers */
  ierr = PartitionDistribute_Private(p);                                                                 CHKERRQ(ierr);

  /* Change to local node numbers */
  ierr = PartitionGlobalToLocal_Private(p);                                                              CHKERRQ(ierr);

#ifdef PETSC_USE_BOPT_g
  /* Compute the size of the cut */
  ierr = PartitionCalcCut_Private(p, &cut);                                                              CHKERRQ(ierr);
  PetscLogInfo(p, "Size of cut: %d\n", cut);
#endif

  PetscFunctionReturn(0);
}
EXTERN_C_END


EXTERN_C_BEGIN
#undef  __FUNCT__
#define __FUNCT__ "PartitionCreate_Triangular_2D"
int PartitionCreate_Triangular_2D(Partition p) {
  Partition_Triangular_2D *q;
  Mesh                     mesh = p->mesh;
  int                    (*f)(Partition);
  int                      numProcs, rank, rem;
  int                      proc;
  int                      ierr;

  PetscFunctionBegin;
  ierr = MPI_Comm_size(p->comm, &numProcs);                                                               CHKERRQ(ierr);
  ierr = MPI_Comm_rank(p->comm, &rank);                                                                   CHKERRQ(ierr);

  ierr = PetscNew(Partition_Triangular_2D, &q);                                                           CHKERRQ(ierr);
  PetscLogObjectMemory(p, sizeof(Partition_Triangular_2D));
  ierr = PetscMemcpy(p->ops, &POps, sizeof(struct _PartitionOps));                                        CHKERRQ(ierr);
  p->data = (void *) q;
  ierr = PetscStrallocpy(PARTITION_SER_TRIANGULAR_2D_BINARY, &p->serialize_name);                         CHKERRQ(ierr);
  PetscLogObjectParent(mesh, p);

  /* Initialize structure */
  p->numProcs             = numProcs;
  p->rank                 = rank;
  p->isElementPartitioned = PETSC_FALSE;
  p->ordering             = PETSC_NULL;
  p->ghostElements        = PETSC_NULL;
  p->ghostElementProcs    = PETSC_NULL;
  q->isNodePartitioned    = PETSC_FALSE;
  q->isEdgePartitioned    = PETSC_FALSE;
  q->nodeOrdering         = PETSC_NULL;
  q->ghostNodes           = PETSC_NULL;
  q->ghostNodeProcs       = PETSC_NULL;
  q->edgeOrdering         = PETSC_NULL;
  q->ghostBdNodes         = PETSC_NULL;
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &p->firstElement);                                       CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstNode);                                          CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstBdNode);                                        CHKERRQ(ierr);
  ierr = PetscMalloc((numProcs+1) * sizeof(int), &q->firstEdge);                                          CHKERRQ(ierr);
  PetscLogObjectMemory(p, (numProcs+1)*4 * sizeof(int));
  ierr = PetscMemzero(p->firstElement, (numProcs+1) * sizeof(int));                                       CHKERRQ(ierr);
  ierr = PetscMemzero(q->firstNode,    (numProcs+1) * sizeof(int));                                       CHKERRQ(ierr);
  ierr = PetscMemzero(q->firstBdNode,  (numProcs+1) * sizeof(int));                                       CHKERRQ(ierr);
  ierr = PetscMemzero(q->firstEdge,    (numProcs+1) * sizeof(int));                                       CHKERRQ(ierr);

  /* Setup crude preliminary partition */
  for(proc = 0; proc < numProcs; proc++) {
    rem                   = (mesh->numFaces%numProcs);
    p->firstElement[proc] = (mesh->numFaces/numProcs)*proc + PetscMin(rem, proc);
    rem                   = (mesh->numNodes%numProcs);
    q->firstNode[proc]    = (mesh->numNodes/numProcs)*proc + PetscMin(rem, proc);
    rem                   = (mesh->numEdges%numProcs);
    q->firstEdge[proc]    = (mesh->numEdges/numProcs)*proc + PetscMin(rem, proc);
    rem                   = (mesh->numBdNodes%numProcs);
    q->firstBdNode[proc]  = (mesh->numBdNodes/numProcs)*proc + PetscMin(rem, proc);
  }
  p->firstElement[numProcs] = mesh->numFaces;
  q->firstNode[numProcs]    = mesh->numNodes;
  q->firstEdge[numProcs]    = mesh->numEdges;
  q->firstBdNode[numProcs]  = mesh->numBdNodes;

  p->numLocElements            = p->firstElement[rank+1] - p->firstElement[rank];
  p->numElements               = p->firstElement[numProcs];
  p->numOverlapElements        = p->numLocElements;
  q->numLocNodes               = q->firstNode[rank+1] - q->firstNode[rank];
  q->numNodes                  = q->firstNode[numProcs];
  q->numOverlapNodes           = q->numLocNodes;
  q->numLocEdges               = q->firstEdge[rank+1] - q->firstEdge[rank];
  q->numEdges                  = q->firstEdge[numProcs];
  q->numLocBdNodes             = q->firstBdNode[rank+1] - q->firstBdNode[rank];
  q->numBdNodes                = q->firstBdNode[numProcs];
  q->numOverlapBdNodes         = q->numLocBdNodes;

  /* Partition the mesh */
  ierr = PetscObjectQueryFunction((PetscObject) p,"PartitionTriangular2D_Create_C",(PetscVoidFunction) &f); CHKERRQ(ierr);
  ierr = (*f)(p);                                                                                         CHKERRQ(ierr);

  /* Recalculate derived quantites */
  ierr = MeshTriangular2DCalcAreas(mesh, PETSC_FALSE);                                                    CHKERRQ(ierr);
  ierr = MeshTriangular2DCalcAspectRatios(mesh, PETSC_FALSE);                                             CHKERRQ(ierr);

  PetscFunctionReturn(0);
}
EXTERN_C_END
