Commit 46013265 authored by Chris Cantwell's avatar Chris Cantwell

Added code to randomly cause failures to occur, up to the number of spares.

parent d48920a5
...@@ -97,34 +97,40 @@ int main(int argc, char *argv[]) ...@@ -97,34 +97,40 @@ int main(int argc, char *argv[])
// Zero field coefficients for initial guess for linear solver. // Zero field coefficients for initial guess for linear solver.
Vmath::Zero(field->GetNcoeffs(), field->UpdateCoeffs(), 1); Vmath::Zero(field->GetNcoeffs(), field->UpdateCoeffs(), 1);
int failureOccured = 0;
// int n = 0; int failureMax = session->GetCmdLineArgument<int>("spares");
// if (session->GetComm()->IsRecovering())
// {
//// session->GetComm()->StateRestore();
//// session->GetComm()->StateGet("n", n);
//// session->GetComm()->StateGet("Field", &field->UpdatePhys()[0], nq);
// n++;
// }
// Time integrate using backward Euler // Time integrate using backward Euler
for (int n = 0 ; n < nSteps; ++n) for (int n = 0 ; n < nSteps; ++n)
{ {
cout << "Time step: " << n << endl; cout << "Time step: " << n << endl;
try { try {
cout << "IsRecovering: " << session->GetComm()->IsRecovering() << endl;
double r = (double)rand()/RAND_MAX;
if (r < 0.001 && failureOccured < failureMax) {
cout << "COMMITTING SUICIDE!" << endl;
int blah;
cin >> blah;
raise(SIGKILL);
}
Vmath::Smul(nq, -1.0/delta_t/epsilon, field->GetPhys(), 1, Vmath::Smul(nq, -1.0/delta_t/epsilon, field->GetPhys(), 1,
field->UpdatePhys(), 1); field->UpdatePhys(), 1);
cout << "HelmSolve" << endl;
field->HelmSolve(field->GetPhys(), field->UpdateCoeffs(), field->HelmSolve(field->GetPhys(), field->UpdateCoeffs(),
NullFlagList, factors); NullFlagList, factors);
cout << "BwsTrans" << endl;
field->BwdTrans(field->GetCoeffs(), field->UpdatePhys()); field->BwdTrans(field->GetCoeffs(), field->UpdatePhys());
if (session->GetComm()->IsRecovering()) if (session->GetComm()->IsRecovering())
{ {
cout << "Restoring field data back to last step" << endl; cout << "Restoring field data back to last step" << endl;
session->GetComm()->StateGet("n", n); session->GetComm()->StateGet("n", n);
session->GetComm()->StateGet("Field", &field->UpdatePhys()[0], nq); session->GetComm()->StateGet("Field", &field->UpdatePhys()[0], nq);
session->GetComm()->StateGet("failures", failureOccured);
failureOccured++;
cout << "RESUMING AT TIMESTEP " << n << endl;
} }
else else
{ {
...@@ -132,11 +138,15 @@ int main(int argc, char *argv[]) ...@@ -132,11 +138,15 @@ int main(int argc, char *argv[])
session->GetComm()->StateAdd("Field", &field->GetPhys()[0], nq); session->GetComm()->StateAdd("Field", &field->GetPhys()[0], nq);
cout << "Add time step data" << endl; cout << "Add time step data" << endl;
session->GetComm()->StateAdd("n", n); session->GetComm()->StateAdd("n", n);
session->GetComm()->StateAdd("failures", failureOccured);
cout << "Commit state" << endl; cout << "Commit state" << endl;
session->GetComm()->StateCommit(); session->GetComm()->StateCommit();
} }
if (n == 0 || session->GetComm()->IsRecovering()) { if (n == 0 || session->GetComm()->IsRecovering()) {
if (session->GetComm()->IsRecovering()) {
failureOccured++;
}
cout << "Ending transaction log" << endl; cout << "Ending transaction log" << endl;
session->GetComm()->EndTransactionLog(); session->GetComm()->EndTransactionLog();
} }
...@@ -145,19 +155,12 @@ int main(int argc, char *argv[]) ...@@ -145,19 +155,12 @@ int main(int argc, char *argv[])
cout << "Caught an error - trying to invoke a spare." << endl; cout << "Caught an error - trying to invoke a spare." << endl;
int x = session->GetComm()->EnrolSpare(); int x = session->GetComm()->EnrolSpare();
cout << "Enroled spare, result: " << x << endl; cout << "Enroled spare, result: " << x << endl;
// --n; // need to roll back to previous time step here...
// Vmath::Smul(nq, -delta_t*epsilon, field->GetPhys(), 1,
// field->UpdatePhys(), 1);
// cout << "Recover state" << endl;
// session->GetComm()->StateGet("Field",
// &field->UpdatePhys()[0],
// field->GetTotPoints());
// session->GetComm()->StateGet("n", n);
// cout << "Finished recovering state" << endl;
cout << "Restoring last state" << endl; cout << "Restoring last state" << endl;
// session->GetComm()->StateRestore();
session->GetComm()->StateGet("n", n); session->GetComm()->StateGet("n", n);
session->GetComm()->StateGet("Field", &field->UpdatePhys()[0], nq); session->GetComm()->StateGet("Field", &field->UpdatePhys()[0], nq);
session->GetComm()->StateGet("failures", failureOccured);
failureOccured++;
cout << "RECOVERED TO TIMESTEP " << n << endl;
cout << "Completed restoring last state" << endl; cout << "Completed restoring last state" << endl;
} catch (...) { } catch (...) {
cout << "ERROR WHEN PERFORMING ENROLSPARE!!!" << endl; cout << "ERROR WHEN PERFORMING ENROLSPARE!!!" << endl;
...@@ -201,7 +204,8 @@ int main(int argc, char *argv[]) ...@@ -201,7 +204,8 @@ int main(int argc, char *argv[])
<< field->H1(field->GetPhys(), exact) << endl; << field->H1(field->GetPhys(), exact) << endl;
//-------------------------------------------- //--------------------------------------------
} }
int blah;
cin >> blah;
// Finalise session // Finalise session
session->Finalise(); session->Finalise();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment