GCOP
1.0
|
00001 #ifndef GCOP_SDDP_H 00002 #define GCOP_SDDP_H 00003 00006 #include "docp.h" 00007 #include <exception> // std::exception 00008 #include <stdexcept> 00009 #include <random> 00010 00011 namespace gcop { 00012 00013 using namespace std; 00014 using namespace Eigen; 00015 00016 template <typename T, int nx = Dynamic, int nu = Dynamic, int np = Dynamic> class SDdp : 00017 public Docp<T, nx, nu, np> { 00018 00019 typedef Matrix<double, nx, 1> Vectornd; 00020 typedef Matrix<double, nu, 1> Vectorcd; 00021 typedef Matrix<double, np, 1> Vectorpd; 00022 00023 typedef Matrix<double, nx, nx> Matrixnd; 00024 typedef Matrix<double, nx, nu> Matrixncd; 00025 typedef Matrix<double, nu, nx> Matrixcnd; 00026 typedef Matrix<double, nu, nu> Matrixcd; 00027 00028 //External render Function for trajectories: 00029 typedef void(RenderFunc)(int, vector<T>&); 00030 00031 public: 00050 SDdp(System<T, nx, nu, np> &sys, Cost<T, nx, nu, np> &cost, 00051 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 00052 Vectorpd *p = 0, bool update = true); 00053 00054 virtual ~SDdp(); 00055 00056 00063 void Iterate(); 00064 00068 void Forward(); 00069 00073 void Backward(); 00074 00078 void Linearize(); 00079 00080 int N; 00081 00082 double mu; 00083 double mu0; 00084 double dmu0; 00085 double mumax; 00086 double a; 00087 double current_a; 00088 00089 std::vector<Vectorcd> dus; 00090 std::vector<T> xss; 00091 std::vector<T> xsprev; 00092 00093 std::vector<Vectorcd> kus; 00094 std::vector<Vectorcd> du_sigma; 00095 std::vector<Matrixcnd> Kuxs; 00096 std::vector<Vectorcd> Qud; 00097 00098 std::default_random_engine randgenerator; 00099 00100 std::normal_distribution<double> normal_dist; 00101 //std::uniform_real_distribution<double> uniform_dist;///< Creates a normal distribution 00102 00103 Vectornd Lx; 00104 Matrixnd Lxx; 00105 Vectorcd Lu; 00106 Matrixcd Luu; 00107 00108 Matrixnd P; 00109 Vectornd v; 00110 00111 Vectorcd duscale; 00112 Vectornd dxscale; 00113 RenderFunc *external_render; 00114 00115 double V; 00116 Vector2d dV; 00117 00118 double s1; 00119 double s2; 00120 double b1; 00121 double b2; 00122 00123 char type; 00124 int Ns; 00125 00126 static const char PURE = 0; 00127 static const char DDP = 1; 00128 static const char LQS = 2; 00129 00130 /* 00131 class Fx : public Function<T, n, n> { 00132 public: 00133 Fx(System &sys) : 00134 Function<T>(sys.U.nspace), sys(sys) { 00135 } 00136 00137 void F(Vectornd &f, const T &x) { 00138 00139 00140 sys.X.L 00141 this->xa = xa; 00142 sys.FK(this->xa); 00143 sys.ID(f, t, *xb, this->xa, *u, h); 00144 } 00145 00146 Mbs &sys; 00147 00148 double t; 00149 const MbsState *xb; 00150 const Vectorcd *u; 00151 double h; 00152 }; 00153 00154 class Fu : public Function<VectorXd> { 00155 public: 00156 Fu(Mbs &sys) : 00157 Function<VectorXd>(sys.U), sys(sys) { 00158 } 00159 00160 void F(VectorXd &f, const VectorXd &u) { 00161 sys.ID(f, t, *xb, *xa, u, h); 00162 } 00163 00164 Mbs &sys; 00165 00166 double t; 00167 const MbsState *xa; 00168 const MbsState *xb; 00169 double h; 00170 }; 00171 00172 */ 00173 00174 bool pd(const Matrixnd &P) { 00175 LLT<Matrixnd> llt; 00176 llt.compute(P); 00177 return llt.info() == Eigen::Success; 00178 } 00179 00180 bool pdX(const MatrixXd &P) { 00181 LLT<MatrixXd> llt; 00182 llt.compute(P); 00183 return llt.info() == Eigen::Success; 00184 } 00185 00186 void Reset() 00187 { 00188 this->current_a = 1.0; 00189 this->mu = (1e-3); 00190 this-> mu0 = (1e-3); 00191 this->dmu0 = (2); 00192 this->a = (1); 00193 this->s1 = (0.1); 00194 this->s2 = (0.5); 00195 this->b1 = (0.25); 00196 this->b2 = (2); 00197 for(int count = 0; count < N; count++) 00198 { 00199 Kuxs[count].setZero(); 00200 kus[count].setZero(); 00201 } 00202 } 00203 00204 }; 00205 00206 using namespace std; 00207 using namespace Eigen; 00208 00209 template <typename T, int nx, int nu, int np> 00210 SDdp<T, nx, nu, np>::SDdp(System<T, nx, nu, np> &sys, 00211 Cost<T, nx, nu, np> &cost, 00212 vector<double> &ts, 00213 vector<T> &xs, 00214 vector<Matrix<double, nu, 1> > &us, 00215 Matrix<double, np, 1> *p, 00216 bool update) : Docp<T, nx, nu, np>(sys, cost, ts, xs, us, p, false), 00217 N(us.size()), 00218 mu(1e-3), mu0(1e-3), dmu0(2), mumax(1e6), a(1), current_a(1), 00219 dus(N), kus(N), Kuxs(N), Qud(N), du_sigma(N), 00220 s1(0.1), s2(0.5), b1(0.25), b2(2), 00221 type(LQS), 00222 normal_dist(0,1), 00223 //uniform_dist(-1,1), 00224 external_render(0), 00225 xss(xs), 00226 xsprev(xs), 00227 Ns(30) 00228 // normal_dist(0,0.02) 00229 //normal_dist(0,0.001) 00230 { 00231 assert(N > 0); 00232 assert(sys.X.n > 0); 00233 assert(sys.U.n > 0); 00234 00235 assert(ts.size() == N+1); 00236 assert(xs.size() == N+1); 00237 assert(xss.size() == N+1); 00238 assert(xsprev.size() == N+1); 00239 assert(us.size() == N); 00240 00241 if (nx == Dynamic || nu == Dynamic) { 00242 for (int i = 0; i < N; ++i) { 00243 dus[i].resize(sys.U.n); 00244 // As[i].resize(sys.X.n, sys.X.n); 00245 // Bs[i].resize(sys.X.n, sys.U.n); 00246 kus[i].resize(sys.U.n); 00247 Kuxs[i].resize(sys.U.n, sys.X.n); 00248 } 00249 00250 Lx.resize(sys.X.n); 00251 Lxx.resize(sys.X.n, sys.X.n); 00252 Lu.resize(sys.U.n); 00253 Luu.resize(sys.U.n, sys.U.n); 00254 00255 P.resize(sys.X.n, sys.X.n); 00256 v.resize(sys.X.n); 00257 } 00258 duscale = Vectorcd::Constant(0.02);//Default initialization of scale 00259 for(int i =0; i<N; ++i) 00260 { 00261 dus[i].setZero(); 00262 Kuxs[i].setZero();//Set initial feedback matrices to zero 00263 kus[i].setZero(); 00264 Qud[i] = Vectorcd::Constant(1.0/0.0004);//Set the inverse variance to the given value in the beginning 00265 du_sigma[i] = duscale;// Set the sigma to small value to begin with 00266 } 00267 dxscale = Vectornd::Constant(0.02);//Default initialization of scale 00268 //duscale.setZero(); 00269 cout<<"duscale: "<<duscale<<endl; 00270 if (update) { 00271 this->Update(false); 00272 00273 ++(this->nofevaluations); 00274 xss = xs;//Copy xs to xss in the beginning 00275 //this->Update(true);//#DEBUG 00276 //this->Linearize(); 00277 } 00278 } 00279 00280 template <typename T, int nx, int nu, int np> 00281 SDdp<T, nx, nu, np>::~SDdp() 00282 { 00283 } 00284 00285 template <typename T, int nx, int nu, int np> 00286 void SDdp<T, nx, nu, np>::Backward() { 00287 00288 typedef Matrix<double, nx, 1> Vectornd; 00289 typedef Matrix<double, nu, 1> Vectorcd; 00290 typedef Matrix<double, nx, nx> Matrixnd; 00291 typedef Matrix<double, nx, nu> Matrixncd; 00292 typedef Matrix<double, nu, nx> Matrixcnd; 00293 typedef Matrix<double, nu, nu> Matrixcd; 00294 00295 double t = this->ts.back(); 00296 const T &x = this->xs.back(); 00297 const Vectorcd &u = this->us.back(); 00298 double L = this->cost.L(t, x, u, 0, 0, &Lx, &Lxx); 00299 00300 V = L; 00301 dV.setZero(); 00302 00303 v = Lx; 00304 P = Lxx; 00305 00306 Vectornd Qx; 00307 Vectorcd Qu; 00308 Matrixnd Qxx; 00309 Matrixcd Quu; 00310 Matrixcd Quum; 00311 Matrixcnd Qux; 00312 00313 Matrixnd At; 00314 Matrixcnd Bt; 00315 00316 Matrixcd Ic = MatrixXd::Identity(this->sys.U.n, this->sys.U.n); 00317 00318 for (int k = N-1; k >=0; --k) { 00319 00320 t = this->ts[k]; 00321 double h = this->ts[k+1] - t; 00322 00323 const T &x = this->xs[k]; 00324 const Vectorcd &u = this->us[k]; 00325 double L = this->cost.L(t, x, u, h, 0, &Lx, &Lxx, &Lu, &Luu); 00326 00327 if (std::isnan(L)) { 00328 cout << "NAN" << " k=" << k << " Lx=" << Lx << " Lxx=" << Lxx << endl; 00329 getchar(); 00330 } 00331 00332 V += L; 00333 00334 const Matrixnd &A = this->As[k]; 00335 const Matrixncd &B = this->Bs[k]; 00336 Vectorcd &ku = kus[k]; 00337 Matrixcnd &Kux = Kuxs[k]; 00338 00339 At = A.transpose(); 00340 Bt = B.transpose(); 00341 00342 Qx = Lx + At*v; 00343 Qu = Lu + Bt*v; 00344 Qxx = Lxx + At*P*A; 00345 Quu = Luu + Bt*P*B; 00346 Qux = Bt*P*A; 00347 00348 double mu = this->mu; 00349 double dmu = 1; 00350 00351 if (this->debug) { 00352 if (!pd(P)) { 00353 cout << "P[" << k << "] is not pd =" << endl << P << endl; 00354 cout << "Luu=" << endl << Luu << endl; 00355 } 00356 00357 Matrixcd Pp = Bt*P*B; 00358 if (!pdX(Pp)) { 00359 cout << "B'PB is not positive definite:" << endl << Pp << endl; 00360 } 00361 } 00362 00363 LLT<Matrixcd> llt; 00364 00365 while (1) { 00366 Quum = Quu + mu*Ic; 00367 llt.compute(Quum); 00368 00369 // if OK, then reduce mu and break 00370 if (llt.info() == Eigen::Success) { 00371 // this is the standard quadratic rule specified by Tassa and Todorov 00372 dmu = min(1/this->dmu0, dmu/this->dmu0); 00373 if (mu*dmu > this->mu0) 00374 mu = mu*dmu; 00375 else 00376 mu = this->mu0; 00377 00378 if (this->debug) 00379 cout << "[I] SDdp::Backward: reduced mu=" << mu << " at k=" << k << endl; 00380 00381 //Store Diag value for inverse Variance: 00382 Qud[k] = Quum.diagonal(); 00383 cout<<"Qud: "<<Qud[k].transpose()<<endl; 00384 00385 00386 break; 00387 } 00388 00389 // if negative then increase mu 00390 dmu = max(this->dmu0, dmu*this->dmu0); 00391 mu = max(this->mu0, mu*dmu); 00392 00393 if (this->debug) { 00394 cout << "[I] SDdp::Backward: increased mu=" << mu << " at k=" << k << endl; 00395 cout << "[I] SDdp::Backward: Quu=" << Quu << endl; 00396 } 00397 00398 if (mu > mumax) { 00399 cout << "[W] SDdp::Backward: mu= " << mu << " exceeded maximum !" << endl; 00400 if (this->debug) 00401 getchar(); 00402 break; 00403 } 00404 } 00405 00406 if (mu > mumax) 00407 break; 00408 00409 ku = -llt.solve(Qu); 00410 Kux = -llt.solve(Qux); 00411 00412 assert(!std::isnan(ku[0])); 00413 assert(!std::isnan(Kux(0,0))); 00414 00415 v = Qx + Kux.transpose()*Qu; 00416 P = Qxx + Kux.transpose()*Qux; 00417 00418 dV[0] += ku.dot(Qu); 00419 dV[1] += ku.dot(Quu*ku/2); 00420 } 00421 00422 // if (debug) 00423 cout << "[I] SDdp::Backward: current V=" << V << endl; 00424 } 00425 00426 template <typename T, int nx, int nu, int np> 00427 void SDdp<T, nx, nu, np>::Forward() { 00428 //static int count_iterate = 0; 00429 //count_iterate++; 00430 00431 typedef Matrix<double, nx, 1> Vectornd; 00432 typedef Matrix<double, nu, 1> Vectorcd; 00433 typedef Matrix<double, nx, nx> Matrixnd; 00434 typedef Matrix<double, nx, nu> Matrixncd; 00435 typedef Matrix<double, nu, nx> Matrixcnd; 00436 typedef Matrix<double, nu, nu> Matrixcd; 00437 00438 // measured change in V 00439 double dVm = 1; 00440 00441 //double a = this->a; 00442 this->current_a = this->a;//Start with big step size 00443 00444 while (dVm > 0) { 00445 00446 Vectornd dx = VectorXd::Zero(this->sys.X.n); 00447 dx.setZero();//Redundancy 00448 T xn = this->xs[0]; 00449 this->sys.Reset(xn,this->ts[0]);//Reset to initial state 00450 Vectorcd un; 00451 00452 double Vm = 0; 00453 00454 for (int k = 0; k < N; ++k) { 00455 const Vectorcd &u = this->us[k]; 00456 Vectorcd &du = dus[k]; 00457 00458 const Vectorcd &ku = kus[k]; 00459 const Matrixcnd &Kux = Kuxs[k]; 00460 00461 du = current_a*ku + Kux*dx; 00462 un = u + du; 00463 00464 Rn<nu> &U = (Rn<nu>&)this->sys.U; 00465 if (U.bnd) { 00466 for (int j = 0; j < u.size(); ++j) 00467 if (un[j] < U.lb[j]) { 00468 //cout<<"I hit bound"<<endl;//[DEBUG] 00469 un[j] = U.lb[j]; 00470 du[j] = un[j] - u[j]; 00471 } else 00472 if (un[j] > U.ub[j]) { 00473 un[j] = U.ub[j]; 00474 du[j] = un[j] - u[j]; 00475 } 00476 } 00477 00478 /*if(count_iterate == 1) 00479 { 00480 //[DEBUG]: 00481 cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl; 00482 cout<<"dx[ "<<k<<"]:\t"<<dx.transpose()<<endl; 00483 cout<<"ku[ "<<k<<"]:\t"<<ku.transpose()<<endl; 00484 cout<<"Kux[ "<<k<<"]:"<<endl<<Kux<<endl; 00485 } 00486 */ 00487 00488 const double &t = this->ts[k]; 00489 double h = this->ts[k+1] - t; 00490 00491 double L = this->cost.L(t, xn, un, h); 00492 Vm += L; 00493 00494 // update dx 00495 if (type == PURE) { 00496 dx = this->As[k]*dx + this->Bs[k]*du; 00497 this->sys.X.Retract(xn, xn, dx); 00498 } else { 00499 double h = this->ts[k+1] - t; 00500 //Adding nan catching : 00501 try 00502 { 00503 this->sys.Step(xn, un, h, this->p); 00504 } 00505 catch(std::exception &e) 00506 { 00507 //[DEBUG] Statement 00508 std::cerr << "exception caught: " << e.what() << '\n'; 00509 std::cout<<" Iteration counter: "<<k<<endl; 00510 std::cout<<" u: "<<u.transpose()<<endl; 00511 std::cout<<" du: "<<du.transpose()<<endl; 00512 //More debug statements: 00513 //std::cout<<" a: "<<a<<"\t ku: "<<ku.transpose()<<"\t dx: "<<dx.transpose()<<"\n kux: \n"<<Kux<<endl; 00514 //[Culprit dx] 00515 00516 throw std::runtime_error(std::string("Nan observed")); 00517 return; 00518 } 00519 //cout<<"dx: "<<dx.transpose()<<endl;//[DEBUG]//Previous iteration dx 00520 //std::cout<<" du: "<<du.transpose()<<endl;//[DEBUG] 00521 this->sys.X.Lift(dx, this->xs[k+1], xn); 00522 /*if((k == 29) || (k == 30) || (k == 31)) 00523 { 00524 //cout dx: 00525 cout<<"dx[ "<<(k+1)<<"]:\t"<<dx.transpose()<<endl; 00526 cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl; 00527 cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl; 00528 cout<<"Printing states ["<<(k+1)<<"]"<<endl; 00529 this->sys.print(this->xs[k+1]); 00530 cout<<"Printing xn: "<<endl; 00531 this->sys.print(xn); 00532 if(k == 31) 00533 { 00534 //exit(0); 00535 } 00536 } 00537 */ 00538 //cout<<"xs[ "<<(k+1)<<"]:\t"<<this->xs[k+1]<<endl; 00539 //cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl; 00540 00541 // cout << xn.gs[0] << " " << xn.r << " " << xn.vs[0] << " " << xn.dr << endl; 00542 00543 // cout << this->xs[k+1].gs[0] << " " << this->xs[k+1].r << " " << this->xs[k+1].vs[0] << " " << this->xs[k+1].dr << endl; 00544 assert(!std::isnan(dx[0])); 00545 } 00546 } 00547 ++(this->nofevaluations); 00548 00549 double L = this->cost.L(this->ts[N], xn, un, 0); 00550 Vm += L; 00551 00552 if (this->debug) 00553 cout << "[I] SDdp::Forward: measured V=" << Vm << endl; 00554 00555 dVm = Vm - V; 00556 00557 if (dVm > 0) { 00558 current_a *= b1; 00559 if(current_a == 0) 00560 break; 00561 if (current_a < 1e-12) 00562 { 00563 current_a = 0; 00564 } 00565 if (this->debug) 00566 cout << "[I] SDdp::Forward: step-size reduced a=" << current_a << endl; 00567 00568 continue; 00569 } 00570 00571 /*double r = dVm/(current_a*dV[0] + current_a*current_a*dV[1]); 00572 if (r < s1) 00573 current_a = b1*current_a; 00574 else 00575 if (r >= s2) 00576 current_a = b2*current_a; 00577 00578 if (this->debug) 00579 cout << "[I] SDdp::Forward: step-size a=" << current_a << endl; 00580 */ 00581 } 00582 this->J = V + dVm;//Set the optimal cost after one iteration 00583 } 00584 00585 template <typename T, int nx, int nu, int np> 00586 void SDdp<T, nx, nu, np>::Iterate() { 00587 00588 Linearize();//Linearize with the current us 00589 Backward(); 00590 Forward(); 00591 for (int k = 0; k < N; ++k) 00592 this->us[k] += dus[k]; 00593 this->Update(false); 00594 ++(this->nofevaluations); 00595 //this->Update(true);//#DEBUG 00596 } 00597 template <typename T, int nx, int nu, int np> 00598 void SDdp<T, nx, nu, np>::Linearize(){ 00599 //static int count_iterate = 0; 00600 randgenerator.seed(370212); 00601 MatrixXd dusmatrix(nu*N,Ns); 00602 MatrixXd dxsmatrix(nx*(N+1),Ns); 00603 00604 //Vectorcd du; 00605 Vectorcd us1; 00606 00607 int count = 0; 00608 00609 //Create dus random matrix: 00610 //#pragma omp parallel for private(count) 00611 /*for(count = 0;count < Ns;count++) 00612 { 00613 for(int count1 = 0;count1 < N*nu;count1++) 00614 { 00615 //dusmatrix(count1,count) = normal_dist(randgenerator); 00616 int count_u = count1%nu;//find the index corresponding to du_scale 00617 dusmatrix(count1,count) = duscale(count_u)*uniform_dist(randgenerator); 00618 } 00619 } 00620 */ 00621 //cout<<dusmatrix<<endl;//#DEBUG 00622 //getchar(); 00623 00624 /*Find xsprev based on dus and us: 00625 this->sys.Reset(this->xs[0],this->ts[0]); 00626 for(int count_traj = 0; count_traj< N;count_traj++) 00627 { 00628 us1 = this->us[count_traj] - this->dus[count_traj]; 00629 this->sys.Step(xsprev[count_traj+1],us1,(this->ts[count_traj+1])-(this->ts[count_traj]), this->p); 00630 } 00631 */ 00632 00633 //dxsmatrix.block(0,0,nx,Ns).setZero();//Set zero first block 00634 Vectornd dx; 00635 T x0_sample; 00636 Rn<nu> &U = (Rn<nu>&)this->sys.U; 00637 //NonParallelizable code for now 00638 // First do some iterations to adjust the dus_scale(stdeviation of du) so that dx_stdeviation lies close to dx_scale (target stdeviation) across trajectory 00639 //Set all du_sigma to given default values: 00640 for(count = 0; count < N; count++) 00641 { 00642 du_sigma[count] = duscale;//Set to given default value 00643 } 00644 //double kp = 0.005; 00645 /* 00646 double kp = 0.002; 00647 int Ns1 = 15; 00648 vector<double> dxsquare(N+1, 0);//0 to N 00649 Vectornd dxscale1 = dxscale; 00650 for(int count_it = 0;count_it < 2;count_it++) 00651 { 00652 for(count = 0;count < Ns1; count++) 00653 { 00654 //Adding Variance to duscale to ensure common variance over the trajectory///////////////////: 00655 // First Sample: 00656 for(int count1 = 0; count1 < nx; count1++) 00657 { 00658 dx(count1) = dxscale1(count1)*normal_dist(randgenerator);//Adjust dx_scale 00659 } 00660 //dxsmatrix.block<nx,1>(0,count) = dx; 00661 this->sys.X.Retract(x0_sample, this->xs[0], dx); 00662 xss[0] = x0_sample; 00663 this->sys.Reset(x0_sample,this->ts[0]); 00664 for(int count_traj = 0; count_traj< N;count_traj++) 00665 { 00666 this->sys.X.Lift(dx, this->xs[count_traj], xss[count_traj]);//This is for feedback 00667 dxsquare[count_traj] += dx.squaredNorm(); 00668 //cout<<"dx: "<<dx.transpose()<<endl; 00669 //if(du_sigma[count_traj].maxCoeff() > 0.00011)//Only Use Feedback if variance is atleast 0.002 00670 { 00671 us1 = this->us[count_traj] + this->Kuxs[count_traj]*dx;//Before Update 00672 } 00673 */ 00674 /*else 00675 { 00676 us1 = this->us[count_traj];//No Feedback 00677 } 00678 */ 00679 /*for(int count_u = 0;count_u < nu; count_u++) 00680 { 00681 */ 00682 /*if(du_sigma[count_traj][count_u] < 0.002) 00683 du_sigma[count_traj][count_u] = 0.002; 00684 else if(du_sigma[count_traj][count_u] > 0.2) 00685 du_sigma[count_traj][count_u] = 0.2;//Bounds ond du_sigma 00686 */ 00687 /* 00688 us1[count_u] = us1[count_u] + du_sigma[count_traj][count_u]*normal_dist(randgenerator); 00689 } 00690 //cout<<"du_sigma: "<<du_sigma[count_traj].transpose()<<endl; 00691 //getchar(); 00692 this->sys.Step(xss[count_traj+1],us1,(this->ts[count_traj+1])-(this->ts[count_traj]), this->p); 00693 } 00694 this->sys.X.Lift(dx, this->xs[N], xss[N]);//This is for feedback 00695 dxsquare[N] += dx.squaredNorm(); 00696 //cout<<(dx.squaredNorm()/dxscale.squaredNorm()-1)<<endl; 00697 } 00698 cout<<(dxsquare[0]/(Ns1*dxscale1.squaredNorm()) - 1)<<endl; 00699 for(count = 1;count <= N; count++) 00700 { 00701 for(int count_sigma = 0;count_sigma < count; count_sigma++) 00702 { 00703 du_sigma[count_sigma] -= (kp*(count_sigma+1)/double(count))*Vectorcd::Constant(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1); 00704 //cout<<"du_sigma, dx-dxscale["<<count_sigma<<"]: "<<du_sigma[count_sigma].transpose()<<endl; 00705 for(int count_u = 0;count_u < nu; count_u++) 00706 { 00707 if(du_sigma[count_sigma][count_u] < 0.005) 00708 du_sigma[count_sigma][count_u] = 0.005; 00709 else if(du_sigma[count_sigma][count_u] > 0.2) 00710 du_sigma[count_sigma][count_u] = 0.2;//Bounds ond du_sigma 00711 } 00712 } 00713 //dxscale1 -= (kp/double(count))*Vectornd::Constant(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1); 00714 cout<<(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1)<<endl; 00715 //set dxsquare back to zero: 00716 dxsquare[count] = 0; 00717 } 00718 dxsquare[0] = 0; 00719 for(count = 0; count< N; count++) 00720 { 00721 cout<<"du_sigma["<<count<<"]: "<<du_sigma[count].transpose()<<endl; 00722 } 00723 //cout<<(dx.squaredNorm()/dxscale.squaredNorm() - 1)<<endl; 00724 cout<<"------Iteration done----"<<endl; 00725 } 00726 */ 00727 //getchar(); 00728 for(count = 0;count < Ns;count++) 00729 { 00730 //Set to initial state perturbed by small amount: 00731 for(int count1 = 0; count1 < nx; count1++) 00732 { 00733 dx(count1) = this->dxscale(count1)*normal_dist(randgenerator);//Adjust dx_scale 00734 } 00735 dxsmatrix.block<nx,1>(0,count) = dx; 00736 //cout<<"dx0: "<<dx.transpose()<<endl; 00737 //cout<<dx.norm(); 00738 00739 this->sys.X.Retract(x0_sample, this->xs[0], dx); 00740 00741 xss[0] = x0_sample; 00742 this->sys.Reset(x0_sample,this->ts[0]); 00743 for(int count1 = 0;count1 < N;count1++) 00744 { 00745 //this->sys.X.Lift(dx, xsprev[count1], xss[count1]);//This is for feedback 00746 //us1 = this->us[count1] - this->dus[count1] + (this->current_a)*this->kus[count1] + this->Kuxs[count1]*dx;//Before Update 00747 this->sys.X.Lift(dx, this->xs[count1], xss[count1]);//This is for feedback 00748 dxsmatrix.block<nx,1>((count1)*nx,count) = dx; 00749 //if(du_sigma[count1].maxCoeff() > 0.00011)//Only Use Feedback if variance is greater than 0.0001(min) 00750 { 00751 us1 = this->us[count1] + this->Kuxs[count1]*dx;//Before Update 00752 //us1 = this->us[count1];//No Feedback 00753 //cout<<"Feedback: "<< (this->Kuxs[count1]*dx).transpose()<<endl; 00754 } 00755 /*else 00756 { 00757 us1 = this->us[count1];//No Feedback 00758 } 00759 */ 00760 // if(count_iterate != 2) 00761 { 00762 for(int count_u = 0;count_u < nu; count_u++) 00763 { 00764 //Make sure Qud is not smaller than some threshold: 00765 //dusmatrix(count1,count) = duscale(count_u)*uniform_dist(randgenerator); 00766 //if(Qud[count1][count_u]<(1.0/(duscale[count_u]*duscale[count_u]))) 00767 assert((du_sigma[count1][count_u] < 0.21)&&(du_sigma[count1][count_u]>0.00009)); 00768 us1[count_u] = us1[count_u] + du_sigma[count1][count_u]*normal_dist(randgenerator); 00769 //else 00770 //us1[count_u] = us1[count_u] + (1.0/sqrt(Qud[count1][count_u]))*normal_dist(randgenerator); 00771 } 00772 //getchar(); 00773 } 00774 //The sampled control should be within the control bounds of the system !!! 00775 if (U.bnd) { 00776 for (int count_u = 0; count_u < nu; ++count_u) 00777 if (us1[count_u] < U.lb[count_u]) { 00778 //cout<<"I hit bound"<<endl;//[DEBUG] 00779 us1[count_u] = U.lb[count_u]; 00780 // du[j] = un[j] - u[j]; 00781 } else 00782 if (us1[count_u] > U.ub[count_u]) { 00783 us1[count_u] = U.ub[count_u]; 00784 //du[j] = un[j] - u[j]; 00785 } 00786 } 00787 00788 dusmatrix.block<nu,1>(count1*nu, count) = us1 - this->us[count1];//Verify that this is zero without randomness #DEBUG 00789 //dumatrix.block<nu,1>(count1*nu,count) = us1; 00790 /*if(count_iterate == 2) 00791 { 00792 cout<<"us1, us: "<<us1.transpose()<<"\t , "<<(this->us[count1]).transpose()<<endl; 00793 //cout<<"dus: "<<(this->dus[count1]).transpose()<<"\t"<<.transpose()<<endl; 00794 cout<<"dx[ "<<count1<<"]:\t"<<dx.transpose()<<endl; 00795 cout<<"ku[ "<<count1<<"]:\t"<<this->kus[count1].transpose()<<endl; 00796 cout<<"Kux[ "<<count1<<"]:"<<endl<<this->Kuxs[count1]<<endl; 00797 cout<<"current_a"<<current_a<<endl; 00798 //cout<<"ku, Kxu: "<<endl<<(this->kus[count1])<<endl<<(this->Kuxs[count1])<<endl; 00799 } 00800 */ 00801 //cout<<"du, u: "<<du.transpose()<<"\t"<<(this->us[count1]).transpose()<<endl; 00802 this->sys.Step(xss[count1+1],us1,(this->ts[count1+1])-(this->ts[count1]), this->p); 00803 //this->sys.X.Lift(dx, this->xs[count1+1], xss[count1+1]);//This is for fitting Linear Model 00804 //cout<<" "<<dx.norm(); 00805 /*if(count_iterate == 2) 00806 { 00807 cout<<"dx: "<<count1<<"\t"<<dx.transpose()<<endl; 00808 //getchar(); 00809 } 00810 */ 00811 } 00812 ++(this->nofevaluations); 00813 //Final step: 00814 this->sys.X.Lift(dx, this->xs[N], xss[N]);//This is for feedback 00815 dxsmatrix.block<nx,1>((N)*nx,count) = dx; 00816 //cout<<endl; 00817 00818 //Render trajectory samples if external rendering function is provided: 00819 if(external_render) 00820 { 00821 external_render(count,xss);//ID for the sample trajectory 00822 } 00823 //if(count_iterate == 2) 00824 //getchar(); 00825 } 00826 00827 //Matrix<double, nx, nx+nu>Abs; 00828 //cout<<dxsmatrix<<endl;//#DEBUG 00829 //getchar(); 00830 MatrixXd Abs(nx+nu,nx); 00831 MatrixXd XUMatrix(nx+nu,Ns); 00832 00833 MatrixXd Xverifymatrix(nx,Ns);//Verify 00834 00835 //Compute As and Bs for every k: 00836 for(count = 0;count < N;count++) 00837 { 00838 XUMatrix<<dxsmatrix.block(count*nx,0,nx,Ns), dusmatrix.block(count*nu,0,nu,Ns); 00839 Abs = (XUMatrix*XUMatrix.transpose()).ldlt().solve(XUMatrix*dxsmatrix.block((count+1)*nx,0,nx,Ns).transpose()); 00840 // Compare As with true As #DEBUG: 00841 /*cout<<"Computed: As, Bs: "<<endl; 00842 cout<<endl<<Abs.block<nx,nx>(0,0).transpose()<<endl; 00843 cout<<endl<<Abs.block<nu,nx>(nx,0).transpose()<<endl; 00844 cout<<"True As, bs: "<<endl; 00845 cout<<endl<<this->As[count]<<endl; 00846 cout<<endl<<this->Bs[count]<<endl; 00847 cout<<"Diff As, bs: "<<endl; 00848 cout<<endl<<(Abs.block<nx,nx>(0,0).transpose() - this->As[count])<<endl; 00849 cout<<endl<<(Abs.block<nu,nx>(nx,0).transpose() - this->Bs[count])<<endl; 00850 */ 00851 // getchar(); 00852 00853 this->As[count] = Abs.block<nx,nx>(0,0).transpose(); 00854 this->Bs[count] = Abs.block<nu,nx>(nx,0).transpose(); 00855 00856 /******VERIFY**********/ 00857 Xverifymatrix = Abs.transpose()*XUMatrix; 00858 //cout<<endl<<"dXpredicted: "<<endl<<Xverifymatrix<<endl; 00859 //cout<<endl<<"True_dX: "<<dxsmatrix.block((count+1)*nx,0,nx,Ns)<<endl; 00860 cout<<endl<<"Error_predicted: "<<(Xverifymatrix - dxsmatrix.block((count+1)*nx,0,nx,Ns)).squaredNorm()<<endl; 00861 //cout<<endl<<Abs<<endl; 00862 //getchar(); 00863 00864 //Debug: Print: 00865 /* cout<<"As, Bs, Abs: "<<endl; 00866 cout<<endl<<Abs<<endl; 00867 cout<<endl<<this->As[count]<<endl; 00868 cout<<endl<<this->Bs[count]<<endl; 00869 cout<<"Comparing dxs[count+1] with As*dxs[count] + Bs[count]*du forall samples "<<endl; 00870 cout<<"Prediction - Actual: "<<endl; 00871 cout<<endl<<(Abs.transpose()*XUMatrix - dxsmatrix.block((count+1)*nx,0,nx,Ns))<<endl; 00872 getchar(); 00873 */ 00874 } 00875 //count_iterate++; 00876 } 00877 } 00878 00879 #endif