00001
00002
00003
00005
00006
00007
00008
00009
00010
00011
00013
00014 #include "TROOT.h"
00015 #include "TApplication.h"
00016
00017 #include "TFndRun.h"
00018 #include "TThread.h"
00019 #include "TFndMonitorMan.h"
00020
00021 ClassImp(TFndMonitorMan)
00022
00023
00024 TFndMonitorMan::TFndMonitorMan():
00025 fStartProdThread(),fCheckProdThread(),fProducerCommand()
00026 {
00027 gROOT->Info("TFndMonitorMan::TFndMonitorMan","called");
00028 if(!fndrun) new TFndRun("MonitorGUI","Finuda");
00029 }
00030
00031
00032 TFndMonitorMan::~TFndMonitorMan(){
00033
00034 StopCheckRunningThread();
00035
00036 if(fStartProdThread){
00037 if(fStartProdThread->GetState() == TThread::kRunningState){
00038 Printf("killing \"Start new producer thread\"");
00039 fStartProdThread->Kill();
00040 TThread::Delete(fStartProdThread);
00041 delete fStartProdThread;
00042 }
00043 else Printf("Start new producer thread found, but not running");
00044 }
00045 else Printf("Start new producer thread not found");
00046 }
00047
00048
00049 void TFndMonitorMan::AutoRestartOnlineProd(Int_t mode){
00050
00051
00052
00053
00054 KillCurrentProducer();
00055 if(mode==0) RemoveMapFiles();
00056
00057 if(fStartProdThread){
00058 if(fStartProdThread->GetState() == TThread::kRunningState){
00059 Printf("killing \"Start new producer thread\"");
00060 fStartProdThread->Kill();
00061 TThread::Delete(fStartProdThread);
00062 delete fStartProdThread;
00063 }
00064 }
00065
00066 fStartProdThread = new TThread("Monitor_producer",RunProdThread,(void *)this);
00067
00068 gROOT->Info("SendProducerCommand","\"%s\"",fProducerCommand.Data());
00069 Printf("Sending command: \"%s\"",fProducerCommand.Data());
00070 fStartProdThread->Run();
00071 Printf("...done");
00072
00073 }
00074
00075
00076 void TFndMonitorMan::StopCheckRunningThread(){
00077
00078 if(fCheckProdThread){
00079 if(fCheckProdThread->GetState() == TThread::kRunningState){
00080 Printf("killing \"Check producer thread\"");
00081 fCheckProdThread->Kill();
00082 TThread::Delete(fCheckProdThread);
00083 delete fCheckProdThread;
00084 fCheckProdThread = 0;
00085 }
00086 else Printf("Start new producer thread found, but not running");
00087 }
00088 else Printf("Start new producer thread not found");
00089
00090 }
00091
00092
00093 void* TFndMonitorMan::CheckRunningProdThread(void *arg){
00094
00095
00096
00097
00098
00099
00100
00101 TThread::SetCancelOff();
00102 TThread::SetCancelDeferred();
00103
00104 TDatime start_time;
00105 start_time.Set();
00106 UInt_t stim = start_time.Convert();
00107
00108 UInt_t MaxDurationSec = 3600;
00109
00110
00111
00112
00113 while(1){
00114 TThread::CancelPoint();
00115
00116 TDatime cur_time;
00117 cur_time.Set();
00118 UInt_t eltim = cur_time.Convert() - stim;
00119 if(eltim < 5){
00120 usleep(700000);
00121 continue;
00122 }
00123
00124
00125 enum pr_st{
00126 pr_st_unknown = 0,
00127 pr_st_running = 1,
00128 pr_st_stopped = 2,
00129 pr_st_broken = 3
00130 };
00131 pr_st pr_st_val = pr_st_unknown;
00132
00133 Int_t gsys_res = gSystem->Exec("pgrep geb2hdt > /dev/null");
00134
00135 Bool_t is_stopfile_present = kTRUE;
00136 if(gSystem->AccessPathName(".froot_onlstopped") ) is_stopfile_present = kFALSE;
00137
00138 TString p_status = "unknown status";
00139 if(gsys_res == 0){
00140 pr_st_val = pr_st_running;
00141 p_status = "running";
00142 }
00143 else if(gsys_res != 0 && is_stopfile_present){
00144 pr_st_val = pr_st_stopped;
00145 p_status = "stopped";
00146 }
00147 else if(gsys_res != 0 && !is_stopfile_present){
00148 pr_st_val = pr_st_broken;
00149 p_status = "broken";
00150 }
00151
00152
00153 TString msg = "";
00154 msg.Form("*** CHECKING ONLINE PRODUCER: %s",p_status.Data());
00155 TString msg1 = "";
00156 msg1.Form(" (elapsed time: %u s)",eltim);
00157 if( pr_st_val != pr_st_stopped ) msg+=msg1;
00158 else{
00159 start_time.Set();
00160 stim = start_time.Convert();
00161 }
00162
00163 if(pr_st_val != pr_st_running) Printf(msg);
00164 if(eltim > MaxDurationSec) Printf("*** Online Process too long: restarting it.");
00165 if(pr_st_val == pr_st_broken || eltim > MaxDurationSec){
00166 Printf("\n\n\n Restarting online producer (automatic check is running)");
00167 gSystem->Exec("date");
00168 ((TFndMonitorMan *)arg) -> AutoRestartOnlineProd(1);
00169 start_time.Set();
00170 stim = start_time.Convert();
00171 }
00172
00173 usleep(10000000);
00174 }
00175 }
00176
00177
00178 void* TFndMonitorMan::RunProdThread(void *arg){
00179
00180
00181
00182 Printf("executing command...");
00183 gSystem->Exec(((TFndMonitorMan *)arg)->GetProducerCommand());
00184 return 0;
00185 }
00186
00187
00188
00189 void TFndMonitorMan::SendProducerCommand(TString appl,TString db_host,TString data_path,TString run_type,Int_t run_num,Int_t n_events){
00190
00191 TString run_name = BuildRunName(run_type,run_num);
00192
00193 fndrun->SetRunType(run_type);
00194 if(run_type.CompareTo("ONLM")) fndrun->SetRunNumber(run_num);
00195 else{
00196 fndrun->SetRunNumber(0);
00197 run_name = run_type;
00198 }
00199
00200
00201 fProducerCommand.Form("%s %s %s %s %d %d > %s.dat" ,appl.Data(),
00202 db_host.Data(),
00203 data_path.Data(),
00204 run_type.Data(),
00205 run_num,
00206 n_events,
00207 run_name.Data());
00208
00209 fStartProdThread = new TThread("Monitor_producer",RunProdThread,(void *)this);
00210
00211 gROOT->Info("SendProducerCommand","\"%s\"",fProducerCommand.Data());
00212 Printf("Sending command: \"%s\"",fProducerCommand.Data());
00213 fStartProdThread->Run();
00214 Printf("...done");
00215
00216
00217 if(run_type.CompareTo("ONLM") == 0){
00218 if(!fCheckProdThread) fCheckProdThread = new TThread("Monitor_controller",CheckRunningProdThread,(void *)this);
00219 if(fCheckProdThread->GetState() != TThread::kRunningState){
00220 Printf("Starting control for running producer");
00221 fCheckProdThread->Run();
00222 Printf("...done");
00223 }
00224 }
00225 }
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255 void TFndMonitorMan::KillCurrentProducer(){
00256
00257
00258
00259 Printf("Killing current process...");
00260
00261 TString pid_kill = TString("kill -9 ");
00262
00263 FILE *f = fopen(ExpandPathName(".froot_pid"),"r");
00264 Char_t file[200][200];
00265 if(!f){
00266 gROOT->Warning("TFndMonitorMan::KillCurrentProducer()","PID file not found");
00267 return;
00268 }
00269
00270 TString cur_line = TString();
00271 int l=0;
00272 const Char_t *prod_pid = 0;
00273 while (fgets(file[l],200,f)) {
00274 Char_t *line=file[l++];
00275 if(strlen(line)<2) continue;
00276 line[strlen(line)-1]='\0';
00277 cur_line = line;
00278 Int_t SpacePos = (Int_t) cur_line.Index(" ");
00279 if(!strncmp(cur_line.Data(),"prod:",SpacePos)){
00280 prod_pid = ((TSubString)(cur_line.SubString("",SpacePos+1))).Data();
00281 }
00282 }
00283 pid_kill += prod_pid;
00284 Printf(" ******* Killing current producer: sending command: \"%s\" *******",(Char_t *)(pid_kill.Data()));
00285 gSystem->Exec(pid_kill.Data());
00286 Printf(" ******* current producer killed *******");
00287 fclose(f);
00288
00289
00290
00291
00292 }
00293
00294
00295 void TFndMonitorMan::KillOtherProducers(){
00296
00297
00298
00299
00300 gSystem->Exec("pkill froot,geb2");
00301
00302 }
00303
00304
00305 void TFndMonitorMan::KillYourself(){
00306
00307
00308
00309 cout << "Killing application" << endl;
00310 gApplication->Terminate(0);
00311 }
00312
00313
00314 void TFndMonitorMan::RemoveMapFiles(){
00315
00316
00317
00318
00319
00320
00321 cout << "... removing used shared memories..." << endl;
00322 gSystem->Exec("rm -f $FND_SHR/*.map");
00323 gSystem->Exec("rm ./*.map");
00324 cout << "...done." << endl;
00325 }