Actual source code: sbaijfact.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  3: #include <petsc/private/kernels/blockinvert.h>
  4: #include <petscis.h>

  6: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos)
  7: {
  8:   Mat_SeqSBAIJ *fact = (Mat_SeqSBAIJ *)F->data;
  9:   MatScalar    *dd   = fact->a;
 10:   PetscInt      mbs = fact->mbs, bs = F->rmap->bs, i, nneg_tmp, npos_tmp, *fi = fact->diag;

 12:   PetscFunctionBegin;
 13:   PetscCheck(bs == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for bs: %" PetscInt_FMT " >1 yet", bs);

 15:   nneg_tmp = 0;
 16:   npos_tmp = 0;
 17:   for (i = 0; i < mbs; i++) {
 18:     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 19:     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 20:     fi++;
 21:   }
 22:   if (nneg) *nneg = nneg_tmp;
 23:   if (npos) *npos = npos_tmp;
 24:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }

 28: /*
 29:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 30:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 31: */
 32: static PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F, Mat A, IS perm, const MatFactorInfo *info)
 33: {
 34:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b;
 35:   const PetscInt *rip, *ai, *aj;
 36:   PetscInt        i, mbs = a->mbs, *jutmp, bs = A->rmap->bs, bs2 = a->bs2;
 37:   PetscInt        m, reallocs = 0, prow;
 38:   PetscInt       *jl, *q, jmin, jmax, juidx, nzk, qm, *iu, *ju, k, j, vj, umax, maxadd;
 39:   PetscReal       f = info->fill;
 40:   PetscBool       perm_identity;

 42:   PetscFunctionBegin;
 43:   /* check whether perm is the identity mapping */
 44:   PetscCall(ISIdentity(perm, &perm_identity));
 45:   PetscCall(ISGetIndices(perm, &rip));

 47:   if (perm_identity) { /* without permutation */
 48:     a->permute = PETSC_FALSE;

 50:     ai = a->i;
 51:     aj = a->j;
 52:   } else { /* non-trivial permutation */
 53:     a->permute = PETSC_TRUE;

 55:     PetscCall(MatReorderingSeqSBAIJ(A, perm));

 57:     ai = a->inew;
 58:     aj = a->jnew;
 59:   }

 61:   /* initialization */
 62:   PetscCall(PetscMalloc1(mbs + 1, &iu));
 63:   umax = (PetscInt)(f * ai[mbs] + 1);
 64:   umax += mbs + 1;
 65:   PetscCall(PetscMalloc1(umax, &ju));
 66:   iu[0] = mbs + 1;
 67:   juidx = mbs + 1; /* index for ju */
 68:   /* jl linked list for pivot row -- linked list for col index */
 69:   PetscCall(PetscMalloc2(mbs, &jl, mbs, &q));
 70:   for (i = 0; i < mbs; i++) {
 71:     jl[i] = mbs;
 72:     q[i]  = 0;
 73:   }

 75:   /* for each row k */
 76:   for (k = 0; k < mbs; k++) {
 77:     for (i = 0; i < mbs; i++) q[i] = 0; /* to be removed! */
 78:     nzk  = 0;                           /* num. of nz blocks in k-th block row with diagonal block excluded */
 79:     q[k] = mbs;
 80:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 81:     jmin = ai[rip[k]] + 1; /* exclude diag[k] */
 82:     jmax = ai[rip[k] + 1];
 83:     for (j = jmin; j < jmax; j++) {
 84:       vj = rip[aj[j]]; /* col. value */
 85:       if (vj > k) {
 86:         qm = k;
 87:         do {
 88:           m  = qm;
 89:           qm = q[m];
 90:         } while (qm < vj);
 91:         PetscCheck(qm != vj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Duplicate entry in A");
 92:         nzk++;
 93:         q[m]  = vj;
 94:         q[vj] = qm;
 95:       } /* if (vj > k) */
 96:     }   /* for (j=jmin; j<jmax; j++) */

 98:     /* modify nonzero structure of k-th row by computing fill-in
 99:        for each row i to be merged in */
100:     prow = k;
101:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

103:     while (prow < k) {
104:       /* merge row prow into k-th row */
105:       jmin = iu[prow] + 1;
106:       jmax = iu[prow + 1];
107:       qm   = k;
108:       for (j = jmin; j < jmax; j++) {
109:         vj = ju[j];
110:         do {
111:           m  = qm;
112:           qm = q[m];
113:         } while (qm < vj);
114:         if (qm != vj) {
115:           nzk++;
116:           q[m]  = vj;
117:           q[vj] = qm;
118:           qm    = vj;
119:         }
120:       }
121:       prow = jl[prow]; /* next pivot row */
122:     }

124:     /* add k to row list for first nonzero element in k-th row */
125:     if (nzk > 0) {
126:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
127:       jl[k] = jl[i];
128:       jl[i] = k;
129:     }
130:     iu[k + 1] = iu[k] + nzk;

132:     /* allocate more space to ju if needed */
133:     if (iu[k + 1] > umax) {
134:       /* estimate how much additional space we will need */
135:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
136:       /* just double the memory each time */
137:       maxadd = umax;
138:       if (maxadd < nzk) maxadd = (mbs - k) * (nzk + 1) / 2;
139:       umax += maxadd;

141:       /* allocate a longer ju */
142:       PetscCall(PetscMalloc1(umax, &jutmp));
143:       PetscCall(PetscArraycpy(jutmp, ju, iu[k]));
144:       PetscCall(PetscFree(ju));
145:       ju = jutmp;
146:       reallocs++; /* count how many times we realloc */
147:     }

149:     /* save nonzero structure of k-th row in ju */
150:     i = k;
151:     while (nzk--) {
152:       i           = q[i];
153:       ju[juidx++] = i;
154:     }
155:   }

157: #if defined(PETSC_USE_INFO)
158:   if (ai[mbs] != 0) {
159:     PetscReal af = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
160:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
161:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
162:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
163:     PetscCall(PetscInfo(A, "for best performance.\n"));
164:   } else {
165:     PetscCall(PetscInfo(A, "Empty matrix\n"));
166:   }
167: #endif

169:   PetscCall(ISRestoreIndices(perm, &rip));
170:   PetscCall(PetscFree2(jl, q));

172:   /* put together the new matrix */
173:   PetscCall(MatSeqSBAIJSetPreallocation(F, bs, MAT_SKIP_ALLOCATION, NULL));

175:   b               = (Mat_SeqSBAIJ *)(F)->data;
176:   b->singlemalloc = PETSC_FALSE;
177:   b->free_a       = PETSC_TRUE;
178:   b->free_ij      = PETSC_TRUE;

180:   PetscCall(PetscMalloc1((iu[mbs] + 1) * bs2, &b->a));
181:   b->j    = ju;
182:   b->i    = iu;
183:   b->diag = NULL;
184:   b->ilen = NULL;
185:   b->imax = NULL;
186:   b->row  = perm;

188:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

190:   PetscCall(PetscObjectReference((PetscObject)perm));

192:   b->icol = perm;
193:   PetscCall(PetscObjectReference((PetscObject)perm));
194:   PetscCall(PetscMalloc1(bs * mbs + bs, &b->solve_work));
195:   /* In b structure:  Free imax, ilen, old a, old j.
196:      Allocate idnew, solve_work, new a, new j */
197:   b->maxnz = b->nz = iu[mbs];

199:   (F)->info.factor_mallocs   = reallocs;
200:   (F)->info.fill_ratio_given = f;
201:   if (ai[mbs] != 0) {
202:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs]) / ((PetscReal)ai[mbs]);
203:   } else {
204:     (F)->info.fill_ratio_needed = 0.0;
205:   }
206:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(F, perm_identity));
207:   PetscFunctionReturn(PETSC_SUCCESS);
208: }
209: /*
210:     Symbolic U^T*D*U factorization for SBAIJ format.
211:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
212: */
213: #include <petscbt.h>
214: #include <../src/mat/utils/freespace.h>
215: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
216: {
217:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
218:   Mat_SeqSBAIJ      *b;
219:   PetscBool          perm_identity, missing;
220:   PetscReal          fill = info->fill;
221:   const PetscInt    *rip, *ai = a->i, *aj = a->j;
222:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow;
223:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
224:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr, *udiag;
225:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
226:   PetscBT            lnkbt;

228:   PetscFunctionBegin;
229:   PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must be square matrix, rows %" PetscInt_FMT " columns %" PetscInt_FMT, A->rmap->n, A->cmap->n);
230:   PetscCall(MatMissingDiagonal(A, &missing, &i));
231:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
232:   if (bs > 1) {
233:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact, A, perm, info));
234:     PetscFunctionReturn(PETSC_SUCCESS);
235:   }

237:   /* check whether perm is the identity mapping */
238:   PetscCall(ISIdentity(perm, &perm_identity));
239:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
240:   a->permute = PETSC_FALSE;
241:   PetscCall(ISGetIndices(perm, &rip));

243:   /* initialization */
244:   PetscCall(PetscMalloc1(mbs + 1, &ui));
245:   PetscCall(PetscMalloc1(mbs + 1, &udiag));
246:   ui[0] = 0;

248:   /* jl: linked list for storing indices of the pivot rows
249:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
250:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
251:   for (i = 0; i < mbs; i++) {
252:     jl[i] = mbs;
253:     il[i] = 0;
254:   }

256:   /* create and initialize a linked list for storing column indices of the active row k */
257:   nlnk = mbs + 1;
258:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

260:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
261:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
262:   current_space = free_space;

264:   for (k = 0; k < mbs; k++) { /* for each active row k */
265:     /* initialize lnk by the column indices of row rip[k] of A */
266:     nzk   = 0;
267:     ncols = ai[k + 1] - ai[k];
268:     PetscCheck(ncols, PETSC_COMM_SELF, PETSC_ERR_MAT_CH_ZRPVT, "Empty row %" PetscInt_FMT " in matrix ", k);
269:     for (j = 0; j < ncols; j++) {
270:       i       = *(aj + ai[k] + j);
271:       cols[j] = i;
272:     }
273:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
274:     nzk += nlnk;

276:     /* update lnk by computing fill-in for each pivot row to be merged in */
277:     prow = jl[k]; /* 1st pivot row */

279:     while (prow < k) {
280:       nextprow = jl[prow];
281:       /* merge prow into k-th row */
282:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
283:       jmax   = ui[prow + 1];
284:       ncols  = jmax - jmin;
285:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
286:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
287:       nzk += nlnk;

289:       /* update il and jl for prow */
290:       if (jmin < jmax) {
291:         il[prow] = jmin;
292:         j        = *uj_ptr;
293:         jl[prow] = jl[j];
294:         jl[j]    = prow;
295:       }
296:       prow = nextprow;
297:     }

299:     /* if free space is not available, make more free space */
300:     if (current_space->local_remaining < nzk) {
301:       i = mbs - k + 1;                                   /* num of unfactored rows */
302:       i = PetscIntMultTruncate(i, PetscMin(nzk, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
303:       PetscCall(PetscFreeSpaceGet(i, &current_space));
304:       reallocs++;
305:     }

307:     /* copy data into free space, then initialize lnk */
308:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

310:     /* add the k-th row into il and jl */
311:     if (nzk > 1) {
312:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
313:       jl[k] = jl[i];
314:       jl[i] = k;
315:       il[k] = ui[k] + 1;
316:     }
317:     ui_ptr[k] = current_space->array;

319:     current_space->array += nzk;
320:     current_space->local_used += nzk;
321:     current_space->local_remaining -= nzk;

323:     ui[k + 1] = ui[k] + nzk;
324:   }

326:   PetscCall(ISRestoreIndices(perm, &rip));
327:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

329:   /* destroy list of free space and other temporary array(s) */
330:   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
331:   PetscCall(PetscFreeSpaceContiguous_Cholesky(&free_space, uj, mbs, ui, udiag)); /* store matrix factor */
332:   PetscCall(PetscLLDestroy(lnk, lnkbt));

334:   /* put together the new matrix in MATSEQSBAIJ format */
335:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

337:   b               = (Mat_SeqSBAIJ *)fact->data;
338:   b->singlemalloc = PETSC_FALSE;
339:   b->free_a       = PETSC_TRUE;
340:   b->free_ij      = PETSC_TRUE;

342:   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));

344:   b->j         = uj;
345:   b->i         = ui;
346:   b->diag      = udiag;
347:   b->free_diag = PETSC_TRUE;
348:   b->ilen      = NULL;
349:   b->imax      = NULL;
350:   b->row       = perm;
351:   b->icol      = perm;

353:   PetscCall(PetscObjectReference((PetscObject)perm));
354:   PetscCall(PetscObjectReference((PetscObject)perm));

356:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

358:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));

360:   b->maxnz = b->nz = ui[mbs];

362:   fact->info.factor_mallocs   = reallocs;
363:   fact->info.fill_ratio_given = fill;
364:   if (ai[mbs] != 0) {
365:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
366:   } else {
367:     fact->info.fill_ratio_needed = 0.0;
368:   }
369: #if defined(PETSC_USE_INFO)
370:   if (ai[mbs] != 0) {
371:     PetscReal af = fact->info.fill_ratio_needed;
372:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
373:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
374:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
375:   } else {
376:     PetscCall(PetscInfo(A, "Empty matrix\n"));
377:   }
378: #endif
379:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
380:   PetscFunctionReturn(PETSC_SUCCESS);
381: }

383: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact, Mat A, IS perm, const MatFactorInfo *info)
384: {
385:   Mat_SeqSBAIJ      *a = (Mat_SeqSBAIJ *)A->data;
386:   Mat_SeqSBAIJ      *b;
387:   PetscBool          perm_identity, missing;
388:   PetscReal          fill = info->fill;
389:   const PetscInt    *rip, *ai, *aj;
390:   PetscInt           i, mbs = a->mbs, bs = A->rmap->bs, reallocs = 0, prow, d;
391:   PetscInt          *jl, jmin, jmax, nzk, *ui, k, j, *il, nextprow;
392:   PetscInt           nlnk, *lnk, ncols, *cols, *uj, **ui_ptr, *uj_ptr;
393:   PetscFreeSpaceList free_space = NULL, current_space = NULL;
394:   PetscBT            lnkbt;

396:   PetscFunctionBegin;
397:   PetscCall(MatMissingDiagonal(A, &missing, &d));
398:   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, d);

400:   /*
401:    This code originally uses Modified Sparse Row (MSR) storage
402:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choice!
403:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
404:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
405:    thus the original code in MSR format is still used for these cases.
406:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
407:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
408:   */
409:   if (bs > 1) {
410:     PetscCall(MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact, A, perm, info));
411:     PetscFunctionReturn(PETSC_SUCCESS);
412:   }

414:   /* check whether perm is the identity mapping */
415:   PetscCall(ISIdentity(perm, &perm_identity));
416:   PetscCheck(perm_identity, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix reordering is not supported for sbaij matrix. Use aij format");
417:   a->permute = PETSC_FALSE;
418:   ai         = a->i;
419:   aj         = a->j;
420:   PetscCall(ISGetIndices(perm, &rip));

422:   /* initialization */
423:   PetscCall(PetscMalloc1(mbs + 1, &ui));
424:   ui[0] = 0;

426:   /* jl: linked list for storing indices of the pivot rows
427:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
428:   PetscCall(PetscMalloc4(mbs, &ui_ptr, mbs, &il, mbs, &jl, mbs, &cols));
429:   for (i = 0; i < mbs; i++) {
430:     jl[i] = mbs;
431:     il[i] = 0;
432:   }

434:   /* create and initialize a linked list for storing column indices of the active row k */
435:   nlnk = mbs + 1;
436:   PetscCall(PetscLLCreate(mbs, mbs, nlnk, lnk, lnkbt));

438:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
439:   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(fill, ai[mbs] + 1), &free_space));
440:   current_space = free_space;

442:   for (k = 0; k < mbs; k++) { /* for each active row k */
443:     /* initialize lnk by the column indices of row rip[k] of A */
444:     nzk   = 0;
445:     ncols = ai[rip[k] + 1] - ai[rip[k]];
446:     for (j = 0; j < ncols; j++) {
447:       i       = *(aj + ai[rip[k]] + j);
448:       cols[j] = rip[i];
449:     }
450:     PetscCall(PetscLLAdd(ncols, cols, mbs, &nlnk, lnk, lnkbt));
451:     nzk += nlnk;

453:     /* update lnk by computing fill-in for each pivot row to be merged in */
454:     prow = jl[k]; /* 1st pivot row */

456:     while (prow < k) {
457:       nextprow = jl[prow];
458:       /* merge prow into k-th row */
459:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
460:       jmax   = ui[prow + 1];
461:       ncols  = jmax - jmin;
462:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
463:       PetscCall(PetscLLAddSorted(ncols, uj_ptr, mbs, &nlnk, lnk, lnkbt));
464:       nzk += nlnk;

466:       /* update il and jl for prow */
467:       if (jmin < jmax) {
468:         il[prow] = jmin;

470:         j        = *uj_ptr;
471:         jl[prow] = jl[j];
472:         jl[j]    = prow;
473:       }
474:       prow = nextprow;
475:     }

477:     /* if free space is not available, make more free space */
478:     if (current_space->local_remaining < nzk) {
479:       i = mbs - k + 1;                                                            /* num of unfactored rows */
480:       i = PetscMin(PetscIntMultTruncate(i, nzk), PetscIntMultTruncate(i, i - 1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
481:       PetscCall(PetscFreeSpaceGet(i, &current_space));
482:       reallocs++;
483:     }

485:     /* copy data into free space, then initialize lnk */
486:     PetscCall(PetscLLClean(mbs, mbs, nzk, lnk, current_space->array, lnkbt));

488:     /* add the k-th row into il and jl */
489:     if (nzk - 1 > 0) {
490:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
491:       jl[k] = jl[i];
492:       jl[i] = k;
493:       il[k] = ui[k] + 1;
494:     }
495:     ui_ptr[k] = current_space->array;

497:     current_space->array += nzk;
498:     current_space->local_used += nzk;
499:     current_space->local_remaining -= nzk;

501:     ui[k + 1] = ui[k] + nzk;
502:   }

504:   PetscCall(ISRestoreIndices(perm, &rip));
505:   PetscCall(PetscFree4(ui_ptr, il, jl, cols));

507:   /* destroy list of free space and other temporary array(s) */
508:   PetscCall(PetscMalloc1(ui[mbs] + 1, &uj));
509:   PetscCall(PetscFreeSpaceContiguous(&free_space, uj));
510:   PetscCall(PetscLLDestroy(lnk, lnkbt));

512:   /* put together the new matrix in MATSEQSBAIJ format */
513:   PetscCall(MatSeqSBAIJSetPreallocation(fact, bs, MAT_SKIP_ALLOCATION, NULL));

515:   b               = (Mat_SeqSBAIJ *)fact->data;
516:   b->singlemalloc = PETSC_FALSE;
517:   b->free_a       = PETSC_TRUE;
518:   b->free_ij      = PETSC_TRUE;

520:   PetscCall(PetscMalloc1(ui[mbs] + 1, &b->a));

522:   b->j    = uj;
523:   b->i    = ui;
524:   b->diag = NULL;
525:   b->ilen = NULL;
526:   b->imax = NULL;
527:   b->row  = perm;

529:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

531:   PetscCall(PetscObjectReference((PetscObject)perm));
532:   b->icol = perm;
533:   PetscCall(PetscObjectReference((PetscObject)perm));
534:   PetscCall(PetscMalloc1(mbs + 1, &b->solve_work));
535:   b->maxnz = b->nz = ui[mbs];

537:   fact->info.factor_mallocs   = reallocs;
538:   fact->info.fill_ratio_given = fill;
539:   if (ai[mbs] != 0) {
540:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs]) / ai[mbs];
541:   } else {
542:     fact->info.fill_ratio_needed = 0.0;
543:   }
544: #if defined(PETSC_USE_INFO)
545:   if (ai[mbs] != 0) {
546:     PetscReal af = fact->info.fill_ratio_needed;
547:     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)fill, (double)af));
548:     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
549:     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g) for best performance.\n", (double)af));
550:   } else {
551:     PetscCall(PetscInfo(A, "Empty matrix\n"));
552:   }
553: #endif
554:   PetscCall(MatSeqSBAIJSetNumericFactorization_inplace(fact, perm_identity));
555:   PetscFunctionReturn(PETSC_SUCCESS);
556: }

558: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C, Mat A, const MatFactorInfo *info)
559: {
560:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
561:   IS              perm = b->row;
562:   const PetscInt *ai, *aj, *perm_ptr, mbs = a->mbs, *bi = b->i, *bj = b->j;
563:   PetscInt        i, j;
564:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
565:   PetscInt        bs = A->rmap->bs, bs2 = a->bs2;
566:   MatScalar      *ba = b->a, *aa, *ap, *dk, *uik;
567:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr;
568:   MatScalar      *work;
569:   PetscInt       *pivots;
570:   PetscBool       allowzeropivot, zeropivotdetected;

572:   PetscFunctionBegin;
573:   /* initialization */
574:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
575:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
576:   allowzeropivot = PetscNot(A->erroriffailure);

578:   il[0] = 0;
579:   for (i = 0; i < mbs; i++) jl[i] = mbs;

581:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
582:   PetscCall(PetscMalloc1(bs, &pivots));

584:   PetscCall(ISGetIndices(perm, &perm_ptr));

586:   /* check permutation */
587:   if (!a->permute) {
588:     ai = a->i;
589:     aj = a->j;
590:     aa = a->a;
591:   } else {
592:     ai = a->inew;
593:     aj = a->jnew;
594:     PetscCall(PetscMalloc1(bs2 * ai[mbs], &aa));
595:     PetscCall(PetscArraycpy(aa, a->a, bs2 * ai[mbs]));
596:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
597:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

599:     for (i = 0; i < mbs; i++) {
600:       jmin = ai[i];
601:       jmax = ai[i + 1];
602:       for (j = jmin; j < jmax; j++) {
603:         while (a2anew[j] != j) {
604:           k         = a2anew[j];
605:           a2anew[j] = a2anew[k];
606:           a2anew[k] = k;
607:           for (k1 = 0; k1 < bs2; k1++) {
608:             dk[k1]           = aa[k * bs2 + k1];
609:             aa[k * bs2 + k1] = aa[j * bs2 + k1];
610:             aa[j * bs2 + k1] = dk[k1];
611:           }
612:         }
613:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
614:         if (i > aj[j]) {
615:           ap = aa + j * bs2;                       /* ptr to the beginning of j-th block of aa */
616:           for (k = 0; k < bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
617:           for (k = 0; k < bs; k++) {               /* j-th block of aa <- dk^T */
618:             for (k1 = 0; k1 < bs; k1++) *ap++ = dk[k + bs * k1];
619:           }
620:         }
621:       }
622:     }
623:     PetscCall(PetscFree(a2anew));
624:   }

626:   /* for each row k */
627:   for (k = 0; k < mbs; k++) {
628:     /*initialize k-th row with elements nonzero in row perm(k) of A */
629:     jmin = ai[perm_ptr[k]];
630:     jmax = ai[perm_ptr[k] + 1];

632:     ap = aa + jmin * bs2;
633:     for (j = jmin; j < jmax; j++) {
634:       vj       = perm_ptr[aj[j]]; /* block col. index */
635:       rtmp_ptr = rtmp + vj * bs2;
636:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
637:     }

639:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
640:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
641:     i = jl[k]; /* first row to be added to k_th row  */

643:     while (i < k) {
644:       nexti = jl[i]; /* next row to be added to k_th row */

646:       /* compute multiplier */
647:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

649:       /* uik = -inv(Di)*U_bar(i,k) */
650:       diag = ba + i * bs2;
651:       u    = ba + ili * bs2;
652:       PetscCall(PetscArrayzero(uik, bs2));
653:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

655:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
656:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
657:       PetscCall(PetscLogFlops(4.0 * bs * bs2));

659:       /* update -U(i,k) */
660:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

662:       /* add multiple of row i to k-th row ... */
663:       jmin = ili + 1;
664:       jmax = bi[i + 1];
665:       if (jmin < jmax) {
666:         for (j = jmin; j < jmax; j++) {
667:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
668:           rtmp_ptr = rtmp + bj[j] * bs2;
669:           u        = ba + j * bs2;
670:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
671:         }
672:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

674:         /* ... add i to row list for next nonzero entry */
675:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
676:         j     = bj[jmin];
677:         jl[i] = jl[j];
678:         jl[j] = i; /* update jl */
679:       }
680:       i = nexti;
681:     }

683:     /* save nonzero entries in k-th row of U ... */

685:     /* invert diagonal block */
686:     diag = ba + k * bs2;
687:     PetscCall(PetscArraycpy(diag, dk, bs2));

689:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
690:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

692:     jmin = bi[k];
693:     jmax = bi[k + 1];
694:     if (jmin < jmax) {
695:       for (j = jmin; j < jmax; j++) {
696:         vj       = bj[j]; /* block col. index of U */
697:         u        = ba + j * bs2;
698:         rtmp_ptr = rtmp + vj * bs2;
699:         for (k1 = 0; k1 < bs2; k1++) {
700:           *u++        = *rtmp_ptr;
701:           *rtmp_ptr++ = 0.0;
702:         }
703:       }

705:       /* ... add k to row list for first nonzero entry in k-th row */
706:       il[k] = jmin;
707:       i     = bj[jmin];
708:       jl[k] = jl[i];
709:       jl[i] = k;
710:     }
711:   }

713:   PetscCall(PetscFree(rtmp));
714:   PetscCall(PetscFree2(il, jl));
715:   PetscCall(PetscFree3(dk, uik, work));
716:   PetscCall(PetscFree(pivots));
717:   if (a->permute) PetscCall(PetscFree(aa));

719:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

721:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
722:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
723:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
724:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

726:   C->assembled    = PETSC_TRUE;
727:   C->preallocated = PETSC_TRUE;

729:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
730:   PetscFunctionReturn(PETSC_SUCCESS);
731: }

733: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
734: {
735:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
736:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
737:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
738:   PetscInt      bs = A->rmap->bs, bs2 = a->bs2;
739:   MatScalar    *ba = b->a, *aa, *ap, *dk, *uik;
740:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
741:   MatScalar    *work;
742:   PetscInt     *pivots;
743:   PetscBool     allowzeropivot, zeropivotdetected;

745:   PetscFunctionBegin;
746:   PetscCall(PetscCalloc1(bs2 * mbs, &rtmp));
747:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
748:   il[0] = 0;
749:   for (i = 0; i < mbs; i++) jl[i] = mbs;

751:   PetscCall(PetscMalloc3(bs2, &dk, bs2, &uik, bs, &work));
752:   PetscCall(PetscMalloc1(bs, &pivots));
753:   allowzeropivot = PetscNot(A->erroriffailure);

755:   ai = a->i;
756:   aj = a->j;
757:   aa = a->a;

759:   /* for each row k */
760:   for (k = 0; k < mbs; k++) {
761:     /*initialize k-th row with elements nonzero in row k of A */
762:     jmin = ai[k];
763:     jmax = ai[k + 1];
764:     ap   = aa + jmin * bs2;
765:     for (j = jmin; j < jmax; j++) {
766:       vj       = aj[j]; /* block col. index */
767:       rtmp_ptr = rtmp + vj * bs2;
768:       for (i = 0; i < bs2; i++) *rtmp_ptr++ = *ap++;
769:     }

771:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
772:     PetscCall(PetscArraycpy(dk, rtmp + k * bs2, bs2));
773:     i = jl[k]; /* first row to be added to k_th row  */

775:     while (i < k) {
776:       nexti = jl[i]; /* next row to be added to k_th row */

778:       /* compute multiplier */
779:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

781:       /* uik = -inv(Di)*U_bar(i,k) */
782:       diag = ba + i * bs2;
783:       u    = ba + ili * bs2;
784:       PetscCall(PetscArrayzero(uik, bs2));
785:       PetscKernel_A_gets_A_minus_B_times_C(bs, uik, diag, u);

787:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
788:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, dk, uik, u);
789:       PetscCall(PetscLogFlops(2.0 * bs * bs2));

791:       /* update -U(i,k) */
792:       PetscCall(PetscArraycpy(ba + ili * bs2, uik, bs2));

794:       /* add multiple of row i to k-th row ... */
795:       jmin = ili + 1;
796:       jmax = bi[i + 1];
797:       if (jmin < jmax) {
798:         for (j = jmin; j < jmax; j++) {
799:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
800:           rtmp_ptr = rtmp + bj[j] * bs2;
801:           u        = ba + j * bs2;
802:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs, rtmp_ptr, uik, u);
803:         }
804:         PetscCall(PetscLogFlops(2.0 * bs * bs2 * (jmax - jmin)));

806:         /* ... add i to row list for next nonzero entry */
807:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
808:         j     = bj[jmin];
809:         jl[i] = jl[j];
810:         jl[j] = i; /* update jl */
811:       }
812:       i = nexti;
813:     }

815:     /* save nonzero entries in k-th row of U ... */

817:     /* invert diagonal block */
818:     diag = ba + k * bs2;
819:     PetscCall(PetscArraycpy(diag, dk, bs2));

821:     PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, pivots, work, allowzeropivot, &zeropivotdetected));
822:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

824:     jmin = bi[k];
825:     jmax = bi[k + 1];
826:     if (jmin < jmax) {
827:       for (j = jmin; j < jmax; j++) {
828:         vj       = bj[j]; /* block col. index of U */
829:         u        = ba + j * bs2;
830:         rtmp_ptr = rtmp + vj * bs2;
831:         for (k1 = 0; k1 < bs2; k1++) {
832:           *u++        = *rtmp_ptr;
833:           *rtmp_ptr++ = 0.0;
834:         }
835:       }

837:       /* ... add k to row list for first nonzero entry in k-th row */
838:       il[k] = jmin;
839:       i     = bj[jmin];
840:       jl[k] = jl[i];
841:       jl[i] = k;
842:     }
843:   }

845:   PetscCall(PetscFree(rtmp));
846:   PetscCall(PetscFree2(il, jl));
847:   PetscCall(PetscFree3(dk, uik, work));
848:   PetscCall(PetscFree(pivots));

850:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
851:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
852:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
853:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
854:   C->assembled           = PETSC_TRUE;
855:   C->preallocated        = PETSC_TRUE;

857:   PetscCall(PetscLogFlops(1.3333 * bs * bs2 * b->mbs)); /* from inverting diagonal blocks */
858:   PetscFunctionReturn(PETSC_SUCCESS);
859: }

861: /*
862:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
863:     Version for blocks 2 by 2.
864: */
865: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C, Mat A, const MatFactorInfo *info)
866: {
867:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
868:   IS              perm = b->row;
869:   const PetscInt *ai, *aj, *perm_ptr;
870:   PetscInt        i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
871:   PetscInt       *a2anew, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
872:   MatScalar      *ba = b->a, *aa, *ap;
873:   MatScalar      *u, *diag, *rtmp, *rtmp_ptr, dk[4], uik[4];
874:   PetscReal       shift = info->shiftamount;
875:   PetscBool       allowzeropivot, zeropivotdetected;

877:   PetscFunctionBegin;
878:   allowzeropivot = PetscNot(A->erroriffailure);

880:   /* initialization */
881:   /* il and jl record the first nonzero element in each row of the accessing
882:      window U(0:k, k:mbs-1).
883:      jl:    list of rows to be added to uneliminated rows
884:             i>= k: jl(i) is the first row to be added to row i
885:             i<  k: jl(i) is the row following row i in some list of rows
886:             jl(i) = mbs indicates the end of a list
887:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
888:             row i of U */
889:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
890:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
891:   il[0] = 0;
892:   for (i = 0; i < mbs; i++) jl[i] = mbs;

894:   PetscCall(ISGetIndices(perm, &perm_ptr));

896:   /* check permutation */
897:   if (!a->permute) {
898:     ai = a->i;
899:     aj = a->j;
900:     aa = a->a;
901:   } else {
902:     ai = a->inew;
903:     aj = a->jnew;
904:     PetscCall(PetscMalloc1(4 * ai[mbs], &aa));
905:     PetscCall(PetscArraycpy(aa, a->a, 4 * ai[mbs]));
906:     PetscCall(PetscMalloc1(ai[mbs], &a2anew));
907:     PetscCall(PetscArraycpy(a2anew, a->a2anew, ai[mbs]));

909:     for (i = 0; i < mbs; i++) {
910:       jmin = ai[i];
911:       jmax = ai[i + 1];
912:       for (j = jmin; j < jmax; j++) {
913:         while (a2anew[j] != j) {
914:           k         = a2anew[j];
915:           a2anew[j] = a2anew[k];
916:           a2anew[k] = k;
917:           for (k1 = 0; k1 < 4; k1++) {
918:             dk[k1]         = aa[k * 4 + k1];
919:             aa[k * 4 + k1] = aa[j * 4 + k1];
920:             aa[j * 4 + k1] = dk[k1];
921:           }
922:         }
923:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
924:         if (i > aj[j]) {
925:           ap    = aa + j * 4; /* ptr to the beginning of the block */
926:           dk[1] = ap[1];      /* swap ap[1] and ap[2] */
927:           ap[1] = ap[2];
928:           ap[2] = dk[1];
929:         }
930:       }
931:     }
932:     PetscCall(PetscFree(a2anew));
933:   }

935:   /* for each row k */
936:   for (k = 0; k < mbs; k++) {
937:     /*initialize k-th row with elements nonzero in row perm(k) of A */
938:     jmin = ai[perm_ptr[k]];
939:     jmax = ai[perm_ptr[k] + 1];
940:     ap   = aa + jmin * 4;
941:     for (j = jmin; j < jmax; j++) {
942:       vj       = perm_ptr[aj[j]]; /* block col. index */
943:       rtmp_ptr = rtmp + vj * 4;
944:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
945:     }

947:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
948:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
949:     i = jl[k]; /* first row to be added to k_th row  */

951:     while (i < k) {
952:       nexti = jl[i]; /* next row to be added to k_th row */

954:       /* compute multiplier */
955:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

957:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
958:       diag   = ba + i * 4;
959:       u      = ba + ili * 4;
960:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
961:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
962:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
963:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

965:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
966:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
967:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
968:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
969:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

971:       PetscCall(PetscLogFlops(16.0 * 2.0));

973:       /* update -U(i,k): ba[ili] = uik */
974:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

976:       /* add multiple of row i to k-th row ... */
977:       jmin = ili + 1;
978:       jmax = bi[i + 1];
979:       if (jmin < jmax) {
980:         for (j = jmin; j < jmax; j++) {
981:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
982:           rtmp_ptr = rtmp + bj[j] * 4;
983:           u        = ba + j * 4;
984:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
985:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
986:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
987:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
988:         }
989:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

991:         /* ... add i to row list for next nonzero entry */
992:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
993:         j     = bj[jmin];
994:         jl[i] = jl[j];
995:         jl[j] = i; /* update jl */
996:       }
997:       i = nexti;
998:     }

1000:     /* save nonzero entries in k-th row of U ... */

1002:     /* invert diagonal block */
1003:     diag = ba + k * 4;
1004:     PetscCall(PetscArraycpy(diag, dk, 4));
1005:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1006:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1008:     jmin = bi[k];
1009:     jmax = bi[k + 1];
1010:     if (jmin < jmax) {
1011:       for (j = jmin; j < jmax; j++) {
1012:         vj       = bj[j]; /* block col. index of U */
1013:         u        = ba + j * 4;
1014:         rtmp_ptr = rtmp + vj * 4;
1015:         for (k1 = 0; k1 < 4; k1++) {
1016:           *u++        = *rtmp_ptr;
1017:           *rtmp_ptr++ = 0.0;
1018:         }
1019:       }

1021:       /* ... add k to row list for first nonzero entry in k-th row */
1022:       il[k] = jmin;
1023:       i     = bj[jmin];
1024:       jl[k] = jl[i];
1025:       jl[i] = k;
1026:     }
1027:   }

1029:   PetscCall(PetscFree(rtmp));
1030:   PetscCall(PetscFree2(il, jl));
1031:   if (a->permute) PetscCall(PetscFree(aa));
1032:   PetscCall(ISRestoreIndices(perm, &perm_ptr));

1034:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1035:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1036:   C->assembled           = PETSC_TRUE;
1037:   C->preallocated        = PETSC_TRUE;

1039:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1040:   PetscFunctionReturn(PETSC_SUCCESS);
1041: }

1043: /*
1044:       Version for when blocks are 2 by 2 Using natural ordering
1045: */
1046: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C, Mat A, const MatFactorInfo *info)
1047: {
1048:   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1049:   PetscInt      i, j, mbs = a->mbs, *bi = b->i, *bj = b->j;
1050:   PetscInt     *ai, *aj, k, k1, jmin, jmax, *jl, *il, vj, nexti, ili;
1051:   MatScalar    *ba = b->a, *aa, *ap, dk[8], uik[8];
1052:   MatScalar    *u, *diag, *rtmp, *rtmp_ptr;
1053:   PetscReal     shift = info->shiftamount;
1054:   PetscBool     allowzeropivot, zeropivotdetected;

1056:   PetscFunctionBegin;
1057:   allowzeropivot = PetscNot(A->erroriffailure);

1059:   /* initialization */
1060:   /* il and jl record the first nonzero element in each row of the accessing
1061:      window U(0:k, k:mbs-1).
1062:      jl:    list of rows to be added to uneliminated rows
1063:             i>= k: jl(i) is the first row to be added to row i
1064:             i<  k: jl(i) is the row following row i in some list of rows
1065:             jl(i) = mbs indicates the end of a list
1066:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1067:             row i of U */
1068:   PetscCall(PetscCalloc1(4 * mbs, &rtmp));
1069:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));
1070:   il[0] = 0;
1071:   for (i = 0; i < mbs; i++) jl[i] = mbs;

1073:   ai = a->i;
1074:   aj = a->j;
1075:   aa = a->a;

1077:   /* for each row k */
1078:   for (k = 0; k < mbs; k++) {
1079:     /*initialize k-th row with elements nonzero in row k of A */
1080:     jmin = ai[k];
1081:     jmax = ai[k + 1];
1082:     ap   = aa + jmin * 4;
1083:     for (j = jmin; j < jmax; j++) {
1084:       vj       = aj[j]; /* block col. index */
1085:       rtmp_ptr = rtmp + vj * 4;
1086:       for (i = 0; i < 4; i++) *rtmp_ptr++ = *ap++;
1087:     }

1089:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1090:     PetscCall(PetscArraycpy(dk, rtmp + k * 4, 4));
1091:     i = jl[k]; /* first row to be added to k_th row  */

1093:     while (i < k) {
1094:       nexti = jl[i]; /* next row to be added to k_th row */

1096:       /* compute multiplier */
1097:       ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */

1099:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1100:       diag   = ba + i * 4;
1101:       u      = ba + ili * 4;
1102:       uik[0] = -(diag[0] * u[0] + diag[2] * u[1]);
1103:       uik[1] = -(diag[1] * u[0] + diag[3] * u[1]);
1104:       uik[2] = -(diag[0] * u[2] + diag[2] * u[3]);
1105:       uik[3] = -(diag[1] * u[2] + diag[3] * u[3]);

1107:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1108:       dk[0] += uik[0] * u[0] + uik[1] * u[1];
1109:       dk[1] += uik[2] * u[0] + uik[3] * u[1];
1110:       dk[2] += uik[0] * u[2] + uik[1] * u[3];
1111:       dk[3] += uik[2] * u[2] + uik[3] * u[3];

1113:       PetscCall(PetscLogFlops(16.0 * 2.0));

1115:       /* update -U(i,k): ba[ili] = uik */
1116:       PetscCall(PetscArraycpy(ba + ili * 4, uik, 4));

1118:       /* add multiple of row i to k-th row ... */
1119:       jmin = ili + 1;
1120:       jmax = bi[i + 1];
1121:       if (jmin < jmax) {
1122:         for (j = jmin; j < jmax; j++) {
1123:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1124:           rtmp_ptr = rtmp + bj[j] * 4;
1125:           u        = ba + j * 4;
1126:           rtmp_ptr[0] += uik[0] * u[0] + uik[1] * u[1];
1127:           rtmp_ptr[1] += uik[2] * u[0] + uik[3] * u[1];
1128:           rtmp_ptr[2] += uik[0] * u[2] + uik[1] * u[3];
1129:           rtmp_ptr[3] += uik[2] * u[2] + uik[3] * u[3];
1130:         }
1131:         PetscCall(PetscLogFlops(16.0 * (jmax - jmin)));

1133:         /* ... add i to row list for next nonzero entry */
1134:         il[i] = jmin; /* update il(i) in column k+1, ... mbs-1 */
1135:         j     = bj[jmin];
1136:         jl[i] = jl[j];
1137:         jl[j] = i; /* update jl */
1138:       }
1139:       i = nexti;
1140:     }

1142:     /* save nonzero entries in k-th row of U ... */

1144:     /* invert diagonal block */
1145:     diag = ba + k * 4;
1146:     PetscCall(PetscArraycpy(diag, dk, 4));
1147:     PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
1148:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1150:     jmin = bi[k];
1151:     jmax = bi[k + 1];
1152:     if (jmin < jmax) {
1153:       for (j = jmin; j < jmax; j++) {
1154:         vj       = bj[j]; /* block col. index of U */
1155:         u        = ba + j * 4;
1156:         rtmp_ptr = rtmp + vj * 4;
1157:         for (k1 = 0; k1 < 4; k1++) {
1158:           *u++        = *rtmp_ptr;
1159:           *rtmp_ptr++ = 0.0;
1160:         }
1161:       }

1163:       /* ... add k to row list for first nonzero entry in k-th row */
1164:       il[k] = jmin;
1165:       i     = bj[jmin];
1166:       jl[k] = jl[i];
1167:       jl[i] = k;
1168:     }
1169:   }

1171:   PetscCall(PetscFree(rtmp));
1172:   PetscCall(PetscFree2(il, jl));

1174:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1175:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1176:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1177:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1178:   C->assembled           = PETSC_TRUE;
1179:   C->preallocated        = PETSC_TRUE;

1181:   PetscCall(PetscLogFlops(1.3333 * 8 * b->mbs)); /* from inverting diagonal blocks */
1182:   PetscFunctionReturn(PETSC_SUCCESS);
1183: }

1185: /*
1186:     Numeric U^T*D*U factorization for SBAIJ format.
1187:     Version for blocks are 1 by 1.
1188: */
1189: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C, Mat A, const MatFactorInfo *info)
1190: {
1191:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1192:   IS              ip = b->row;
1193:   const PetscInt *ai, *aj, *rip;
1194:   PetscInt       *a2anew, i, j, mbs = a->mbs, *bi = b->i, *bj = b->j, *bcol;
1195:   PetscInt        k, jmin, jmax, *jl, *il, col, nexti, ili, nz;
1196:   MatScalar      *rtmp, *ba = b->a, *bval, *aa, dk, uikdi;
1197:   PetscReal       rs;
1198:   FactorShiftCtx  sctx;

1200:   PetscFunctionBegin;
1201:   /* MatPivotSetUp(): initialize shift context sctx */
1202:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1204:   PetscCall(ISGetIndices(ip, &rip));
1205:   if (!a->permute) {
1206:     ai = a->i;
1207:     aj = a->j;
1208:     aa = a->a;
1209:   } else {
1210:     ai = a->inew;
1211:     aj = a->jnew;
1212:     nz = ai[mbs];
1213:     PetscCall(PetscMalloc1(nz, &aa));
1214:     a2anew = a->a2anew;
1215:     bval   = a->a;
1216:     for (j = 0; j < nz; j++) aa[a2anew[j]] = *(bval++);
1217:   }

1219:   /* initialization */
1220:   /* il and jl record the first nonzero element in each row of the accessing
1221:      window U(0:k, k:mbs-1).
1222:      jl:    list of rows to be added to uneliminated rows
1223:             i>= k: jl(i) is the first row to be added to row i
1224:             i<  k: jl(i) is the row following row i in some list of rows
1225:             jl(i) = mbs indicates the end of a list
1226:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1227:             row i of U */
1228:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &jl));

1230:   do {
1231:     sctx.newshift = PETSC_FALSE;
1232:     il[0]         = 0;
1233:     for (i = 0; i < mbs; i++) {
1234:       rtmp[i] = 0.0;
1235:       jl[i]   = mbs;
1236:     }

1238:     for (k = 0; k < mbs; k++) {
1239:       /*initialize k-th row by the perm[k]-th row of A */
1240:       jmin = ai[rip[k]];
1241:       jmax = ai[rip[k] + 1];
1242:       bval = ba + bi[k];
1243:       for (j = jmin; j < jmax; j++) {
1244:         col       = rip[aj[j]];
1245:         rtmp[col] = aa[j];
1246:         *bval++   = 0.0; /* for in-place factorization */
1247:       }

1249:       /* shift the diagonal of the matrix */
1250:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1252:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1253:       dk = rtmp[k];
1254:       i  = jl[k]; /* first row to be added to k_th row  */

1256:       while (i < k) {
1257:         nexti = jl[i]; /* next row to be added to k_th row */

1259:         /* compute multiplier, update diag(k) and U(i,k) */
1260:         ili   = il[i];                /* index of first nonzero element in U(i,k:bms-1) */
1261:         uikdi = -ba[ili] * ba[bi[i]]; /* diagonal(k) */
1262:         dk += uikdi * ba[ili];
1263:         ba[ili] = uikdi; /* -U(i,k) */

1265:         /* add multiple of row i to k-th row */
1266:         jmin = ili + 1;
1267:         jmax = bi[i + 1];
1268:         if (jmin < jmax) {
1269:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1270:           PetscCall(PetscLogFlops(2.0 * (jmax - jmin)));

1272:           /* update il and jl for row i */
1273:           il[i] = jmin;
1274:           j     = bj[jmin];
1275:           jl[i] = jl[j];
1276:           jl[j] = i;
1277:         }
1278:         i = nexti;
1279:       }

1281:       /* shift the diagonals when zero pivot is detected */
1282:       /* compute rs=sum of abs(off-diagonal) */
1283:       rs   = 0.0;
1284:       jmin = bi[k] + 1;
1285:       nz   = bi[k + 1] - jmin;
1286:       if (nz) {
1287:         bcol = bj + jmin;
1288:         while (nz--) {
1289:           rs += PetscAbsScalar(rtmp[*bcol]);
1290:           bcol++;
1291:         }
1292:       }

1294:       sctx.rs = rs;
1295:       sctx.pv = dk;
1296:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1297:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1298:       dk = sctx.pv;

1300:       /* copy data into U(k,:) */
1301:       ba[bi[k]] = 1.0 / dk; /* U(k,k) */
1302:       jmin      = bi[k] + 1;
1303:       jmax      = bi[k + 1];
1304:       if (jmin < jmax) {
1305:         for (j = jmin; j < jmax; j++) {
1306:           col       = bj[j];
1307:           ba[j]     = rtmp[col];
1308:           rtmp[col] = 0.0;
1309:         }
1310:         /* add the k-th row into il and jl */
1311:         il[k] = jmin;
1312:         i     = bj[jmin];
1313:         jl[k] = jl[i];
1314:         jl[i] = k;
1315:       }
1316:     }
1317:   } while (sctx.newshift);
1318:   PetscCall(PetscFree3(rtmp, il, jl));
1319:   if (a->permute) PetscCall(PetscFree(aa));

1321:   PetscCall(ISRestoreIndices(ip, &rip));

1323:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1324:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1325:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1326:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1327:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1328:   C->assembled           = PETSC_TRUE;
1329:   C->preallocated        = PETSC_TRUE;

1331:   PetscCall(PetscLogFlops(C->rmap->N));
1332:   if (sctx.nshift) {
1333:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1334:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1335:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1336:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1337:     }
1338:   }
1339:   PetscFunctionReturn(PETSC_SUCCESS);
1340: }

1342: /*
1343:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1344:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1345: */
1346: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B, Mat A, const MatFactorInfo *info)
1347: {
1348:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data;
1349:   Mat_SeqSBAIJ  *b = (Mat_SeqSBAIJ *)B->data;
1350:   PetscInt       i, j, mbs = A->rmap->n, *bi = b->i, *bj = b->j, *bdiag = b->diag, *bjtmp;
1351:   PetscInt      *ai = a->i, *aj = a->j, *ajtmp;
1352:   PetscInt       k, jmin, jmax, *c2r, *il, col, nexti, ili, nz;
1353:   MatScalar     *rtmp, *ba = b->a, *bval, *aa = a->a, dk, uikdi;
1354:   FactorShiftCtx sctx;
1355:   PetscReal      rs;
1356:   MatScalar      d, *v;

1358:   PetscFunctionBegin;
1359:   PetscCall(PetscMalloc3(mbs, &rtmp, mbs, &il, mbs, &c2r));

1361:   /* MatPivotSetUp(): initialize shift context sctx */
1362:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1364:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1365:     sctx.shift_top = info->zeropivot;

1367:     PetscCall(PetscArrayzero(rtmp, mbs));

1369:     for (i = 0; i < mbs; i++) {
1370:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1371:       d = (aa)[a->diag[i]];
1372:       rtmp[i] += -PetscRealPart(d); /* diagonal entry */
1373:       ajtmp = aj + ai[i] + 1;       /* exclude diagonal */
1374:       v     = aa + ai[i] + 1;
1375:       nz    = ai[i + 1] - ai[i] - 1;
1376:       for (j = 0; j < nz; j++) {
1377:         rtmp[i] += PetscAbsScalar(v[j]);
1378:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1379:       }
1380:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1381:     }
1382:     sctx.shift_top *= 1.1;
1383:     sctx.nshift_max = 5;
1384:     sctx.shift_lo   = 0.;
1385:     sctx.shift_hi   = 1.;
1386:   }

1388:   /* allocate working arrays
1389:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1390:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1391:   */
1392:   do {
1393:     sctx.newshift = PETSC_FALSE;

1395:     for (i = 0; i < mbs; i++) c2r[i] = mbs;
1396:     if (mbs) il[0] = 0;

1398:     for (k = 0; k < mbs; k++) {
1399:       /* zero rtmp */
1400:       nz    = bi[k + 1] - bi[k];
1401:       bjtmp = bj + bi[k];
1402:       for (j = 0; j < nz; j++) rtmp[bjtmp[j]] = 0.0;

1404:       /* load in initial unfactored row */
1405:       bval = ba + bi[k];
1406:       jmin = ai[k];
1407:       jmax = ai[k + 1];
1408:       for (j = jmin; j < jmax; j++) {
1409:         col       = aj[j];
1410:         rtmp[col] = aa[j];
1411:         *bval++   = 0.0; /* for in-place factorization */
1412:       }
1413:       /* shift the diagonal of the matrix: ZeropivotApply() */
1414:       rtmp[k] += sctx.shift_amount; /* shift the diagonal of the matrix */

1416:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1417:       dk = rtmp[k];
1418:       i  = c2r[k]; /* first row to be added to k_th row  */

1420:       while (i < k) {
1421:         nexti = c2r[i]; /* next row to be added to k_th row */

1423:         /* compute multiplier, update diag(k) and U(i,k) */
1424:         ili   = il[i];                   /* index of first nonzero element in U(i,k:bms-1) */
1425:         uikdi = -ba[ili] * ba[bdiag[i]]; /* diagonal(k) */
1426:         dk += uikdi * ba[ili];           /* update diag[k] */
1427:         ba[ili] = uikdi;                 /* -U(i,k) */

1429:         /* add multiple of row i to k-th row */
1430:         jmin = ili + 1;
1431:         jmax = bi[i + 1];
1432:         if (jmin < jmax) {
1433:           for (j = jmin; j < jmax; j++) rtmp[bj[j]] += uikdi * ba[j];
1434:           /* update il and c2r for row i */
1435:           il[i]  = jmin;
1436:           j      = bj[jmin];
1437:           c2r[i] = c2r[j];
1438:           c2r[j] = i;
1439:         }
1440:         i = nexti;
1441:       }

1443:       /* copy data into U(k,:) */
1444:       rs   = 0.0;
1445:       jmin = bi[k];
1446:       jmax = bi[k + 1] - 1;
1447:       if (jmin < jmax) {
1448:         for (j = jmin; j < jmax; j++) {
1449:           col   = bj[j];
1450:           ba[j] = rtmp[col];
1451:           rs += PetscAbsScalar(ba[j]);
1452:         }
1453:         /* add the k-th row into il and c2r */
1454:         il[k]  = jmin;
1455:         i      = bj[jmin];
1456:         c2r[k] = c2r[i];
1457:         c2r[i] = k;
1458:       }

1460:       sctx.rs = rs;
1461:       sctx.pv = dk;
1462:       PetscCall(MatPivotCheck(B, A, info, &sctx, k));
1463:       if (sctx.newshift) break;
1464:       dk = sctx.pv;

1466:       ba[bdiag[k]] = 1.0 / dk; /* U(k,k) */
1467:     }
1468:   } while (sctx.newshift);

1470:   PetscCall(PetscFree3(rtmp, il, c2r));

1472:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1473:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1474:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1475:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1476:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1477:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1479:   B->assembled    = PETSC_TRUE;
1480:   B->preallocated = PETSC_TRUE;

1482:   PetscCall(PetscLogFlops(B->rmap->n));

1484:   /* MatPivotView() */
1485:   if (sctx.nshift) {
1486:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1487:       PetscCall(PetscInfo(A, "number of shift_pd tries %" PetscInt_FMT ", shift_amount %g, diagonal shifted up by %e fraction top_value %e\n", sctx.nshift, (double)sctx.shift_amount, (double)sctx.shift_fraction, (double)sctx.shift_top));
1488:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1489:       PetscCall(PetscInfo(A, "number of shift_nz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1490:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1491:       PetscCall(PetscInfo(A, "number of shift_inblocks applied %" PetscInt_FMT ", each shift_amount %g\n", sctx.nshift, (double)info->shiftamount));
1492:     }
1493:   }
1494:   PetscFunctionReturn(PETSC_SUCCESS);
1495: }

1497: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C, Mat A, const MatFactorInfo *info)
1498: {
1499:   Mat_SeqSBAIJ  *a = (Mat_SeqSBAIJ *)A->data, *b = (Mat_SeqSBAIJ *)C->data;
1500:   PetscInt       i, j, mbs = a->mbs;
1501:   PetscInt      *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j;
1502:   PetscInt       k, jmin, *jl, *il, nexti, ili, *acol, *bcol, nz;
1503:   MatScalar     *rtmp, *ba = b->a, *aa = a->a, dk, uikdi, *aval, *bval;
1504:   PetscReal      rs;
1505:   FactorShiftCtx sctx;

1507:   PetscFunctionBegin;
1508:   /* MatPivotSetUp(): initialize shift context sctx */
1509:   PetscCall(PetscMemzero(&sctx, sizeof(FactorShiftCtx)));

1511:   /* initialization */
1512:   /* il and jl record the first nonzero element in each row of the accessing
1513:      window U(0:k, k:mbs-1).
1514:      jl:    list of rows to be added to uneliminated rows
1515:             i>= k: jl(i) is the first row to be added to row i
1516:             i<  k: jl(i) is the row following row i in some list of rows
1517:             jl(i) = mbs indicates the end of a list
1518:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1519:   */
1520:   PetscCall(PetscMalloc1(mbs, &rtmp));
1521:   PetscCall(PetscMalloc2(mbs, &il, mbs, &jl));

1523:   do {
1524:     sctx.newshift = PETSC_FALSE;
1525:     il[0]         = 0;
1526:     for (i = 0; i < mbs; i++) {
1527:       rtmp[i] = 0.0;
1528:       jl[i]   = mbs;
1529:     }

1531:     for (k = 0; k < mbs; k++) {
1532:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1533:       nz   = ai[k + 1] - ai[k];
1534:       acol = aj + ai[k];
1535:       aval = aa + ai[k];
1536:       bval = ba + bi[k];
1537:       while (nz--) {
1538:         rtmp[*acol++] = *aval++;
1539:         *bval++       = 0.0; /* for in-place factorization */
1540:       }

1542:       /* shift the diagonal of the matrix */
1543:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1545:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1546:       dk = rtmp[k];
1547:       i  = jl[k]; /* first row to be added to k_th row  */

1549:       while (i < k) {
1550:         nexti = jl[i]; /* next row to be added to k_th row */
1551:         /* compute multiplier, update D(k) and U(i,k) */
1552:         ili   = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1553:         uikdi = -ba[ili] * ba[bi[i]];
1554:         dk += uikdi * ba[ili];
1555:         ba[ili] = uikdi; /* -U(i,k) */

1557:         /* add multiple of row i to k-th row ... */
1558:         jmin = ili + 1;
1559:         nz   = bi[i + 1] - jmin;
1560:         if (nz > 0) {
1561:           bcol = bj + jmin;
1562:           bval = ba + jmin;
1563:           PetscCall(PetscLogFlops(2.0 * nz));
1564:           while (nz--) rtmp[*bcol++] += uikdi * (*bval++);

1566:           /* update il and jl for i-th row */
1567:           il[i] = jmin;
1568:           j     = bj[jmin];
1569:           jl[i] = jl[j];
1570:           jl[j] = i;
1571:         }
1572:         i = nexti;
1573:       }

1575:       /* shift the diagonals when zero pivot is detected */
1576:       /* compute rs=sum of abs(off-diagonal) */
1577:       rs   = 0.0;
1578:       jmin = bi[k] + 1;
1579:       nz   = bi[k + 1] - jmin;
1580:       if (nz) {
1581:         bcol = bj + jmin;
1582:         while (nz--) {
1583:           rs += PetscAbsScalar(rtmp[*bcol]);
1584:           bcol++;
1585:         }
1586:       }

1588:       sctx.rs = rs;
1589:       sctx.pv = dk;
1590:       PetscCall(MatPivotCheck(C, A, info, &sctx, k));
1591:       if (sctx.newshift) break; /* sctx.shift_amount is updated */
1592:       dk = sctx.pv;

1594:       /* copy data into U(k,:) */
1595:       ba[bi[k]] = 1.0 / dk;
1596:       jmin      = bi[k] + 1;
1597:       nz        = bi[k + 1] - jmin;
1598:       if (nz) {
1599:         bcol = bj + jmin;
1600:         bval = ba + jmin;
1601:         while (nz--) {
1602:           *bval++       = rtmp[*bcol];
1603:           rtmp[*bcol++] = 0.0;
1604:         }
1605:         /* add k-th row into il and jl */
1606:         il[k] = jmin;
1607:         i     = bj[jmin];
1608:         jl[k] = jl[i];
1609:         jl[i] = k;
1610:       }
1611:     } /* end of for (k = 0; k<mbs; k++) */
1612:   } while (sctx.newshift);
1613:   PetscCall(PetscFree(rtmp));
1614:   PetscCall(PetscFree2(il, jl));

1616:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1617:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1618:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1619:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1620:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1622:   C->assembled    = PETSC_TRUE;
1623:   C->preallocated = PETSC_TRUE;

1625:   PetscCall(PetscLogFlops(C->rmap->N));
1626:   if (sctx.nshift) {
1627:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1628:       PetscCall(PetscInfo(A, "number of shiftnz tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1629:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1630:       PetscCall(PetscInfo(A, "number of shiftpd tries %" PetscInt_FMT ", shift_amount %g\n", sctx.nshift, (double)sctx.shift_amount));
1631:     }
1632:   }
1633:   PetscFunctionReturn(PETSC_SUCCESS);
1634: }

1636: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A, IS perm, const MatFactorInfo *info)
1637: {
1638:   Mat C;

1640:   PetscFunctionBegin;
1641:   PetscCall(MatGetFactor(A, "petsc", MAT_FACTOR_CHOLESKY, &C));
1642:   PetscCall(MatCholeskyFactorSymbolic(C, A, perm, info));
1643:   PetscCall(MatCholeskyFactorNumeric(C, A, info));

1645:   A->ops->solve          = C->ops->solve;
1646:   A->ops->solvetranspose = C->ops->solvetranspose;

1648:   PetscCall(MatHeaderMerge(A, &C));
1649:   PetscFunctionReturn(PETSC_SUCCESS);
1650: }