GCOP
1.0
|
00001 #ifndef GCOP_PDDP_H 00002 #define GCOP_PDDP_H 00003 00004 #include <Eigen/Dense> 00005 #include <vector> 00006 #include <type_traits> 00007 #include <algorithm> 00008 #include <iostream> 00009 #include "ddp.h" 00010 00011 namespace gcop { 00012 00013 template <typename T, int n = Dynamic, int c = Dynamic, int np = Dynamic> 00014 class PDdp : public Ddp<T, n, c, np> { 00015 00016 typedef Eigen::Matrix<double, n, 1> Vectornd; 00017 typedef Eigen::Matrix<double, c, 1> Vectorcd; 00018 00019 typedef Eigen::Matrix<double, n, n> Matrixnd; 00020 typedef Eigen::Matrix<double, n, c> Matrixncd; 00021 typedef Eigen::Matrix<double, c, n> Matrixcnd; 00022 typedef Eigen::Matrix<double, c, c> Matrixcd; 00023 00024 typedef Eigen::Matrix<double, np, 1> Vectormd; 00025 typedef Eigen::Matrix<double, np, np> Matrixmd; 00026 typedef Eigen::Matrix<double, n, np> Matrixnmd; 00027 typedef Eigen::Matrix<double, np, n> Matrixmnd; 00028 00029 typedef Eigen::Matrix<double, c, np> Matrixcmd; 00030 typedef Eigen::Matrix<double, np, c> Matrixmcd; 00031 00032 00033 public: 00034 00055 PDdp(System<T, n, c, np> &sys, Cost<T, n, c, np> &cost, 00056 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 00057 Vectormd &p, int dynParams = 0, bool update = true); 00058 00065 void Iterate(); 00066 00067 00072 void Update(bool der = true); 00073 00077 void Forward(); 00078 00082 void Backward(); 00083 00084 00085 int m; 00086 int dynParams; 00087 00088 VectorXd &p; 00089 VectorXd dp; 00090 00091 std::vector<Matrixnmd> Cs; 00092 00093 std::vector<MatrixXd> Kuxs; 00094 00095 VectorXd Lxa; 00096 MatrixXd Lxxa; 00097 Vectorcd Lu; 00098 Matrixcd Luu; 00099 MatrixXd Lxua; 00100 00101 VectorXd v; 00102 MatrixXd P; 00103 00104 double nu; 00105 double nu0; 00106 double dnu0; 00107 bool printDebug; 00108 00109 00110 bool pdX(const MatrixXd &P) { 00111 LLT<MatrixXd> llt; 00112 llt.compute(P); 00113 return llt.info() == Eigen::Success; 00114 } 00115 00116 }; 00117 00118 00119 using namespace std; 00120 using namespace Eigen; 00121 00122 template <typename T, int n, int c, int np> 00123 PDdp<T, n, c, np>::PDdp(System<T, n, c, np> &sys, 00124 Cost<T, n, c, np> &cost, 00125 vector<double> &ts, 00126 vector<T> &xs, 00127 vector<Matrix<double, c, 1> > &us, 00128 Matrix<double, np, 1> &p, int dynParams, bool update) : 00129 Ddp<T, n, c, np>(sys, cost, ts, xs, us, &p, false), 00130 m(p.size()), dynParams(dynParams), p(p), dp(m), Cs(this->N), 00131 nu(1e-3), nu0(1e-3), dnu0(2) { 00132 00133 Lxa.resize(sys.X.n + m); 00134 Lxxa.resize(sys.X.n + m, sys.X.n + m); 00135 Lxua.resize(sys.X.n + m, sys.U.n); 00136 Kuxs.resize(this->N); 00137 00138 for (int i = 0; i < this->N; ++i) { 00139 Kuxs[i] = MatrixXd::Zero(sys.U.n, sys.X.n + m); 00140 if (dynParams) 00141 Cs[i].resize(sys.X.n, dynParams); 00142 } 00143 00144 00145 if (update) 00146 Update(); 00147 printDebug = false; 00148 } 00149 00150 00151 template <typename T, int n, int c, int np> 00152 void PDdp<T, n, c, np>::Update(bool der) { 00153 00154 VectorXd pdyn; 00155 if (dynParams) 00156 pdyn = this->p.head(dynParams); 00157 for (int k = 0; k < this->N; ++k) { 00158 double h = this->ts[k+1] - this->ts[k]; 00159 if (der) 00160 this->sys.Step(this->xs[k+1], this->ts[k], this->xs[k], this->us[k], h, 00161 dynParams ? &pdyn : 0, 00162 &this->As[k], &this->Bs[k], dynParams ? &this->Cs[k] : 0); 00163 else 00164 this->sys.Step(this->xs[k+1], this->ts[k], this->xs[k], this->us[k], h, 00165 dynParams ? &pdyn : 0); 00166 } 00167 } 00168 00169 template <typename T, int n, int c, int np> 00170 void PDdp<T, n, c, np>::Backward() { 00171 00172 int N = this->us.size(); 00173 00174 typedef Matrix<double, n, 1> Vectornd; 00175 typedef Matrix<double, c, 1> Vectorcd; 00176 typedef Matrix<double, n, n> Matrixnd; 00177 typedef Matrix<double, n, c> Matrixncd; 00178 typedef Matrix<double, c, n> Matrixcnd; 00179 typedef Matrix<double, c, c> Matrixcd; 00180 typedef Matrix<double, Dynamic, 1> Vectormd; 00181 typedef Matrix<double, Dynamic, Dynamic> Matrixmd; 00182 typedef Matrix<double, n, Dynamic> Matrixnmd; 00183 typedef Matrix<double, Dynamic, n> Matrixmnd; 00184 00185 double t = this->ts.back(); 00186 const T &x = this->xs.back(); 00187 const Vectorcd &u = this->us.back(); 00188 00189 00190 Vectornd Lx; 00191 Matrixnd Lxx; 00192 Vectormd Lp; 00193 Matrixmd Lpp; 00194 Matrixmnd Lpx; 00195 Vectorcd Lu; 00196 Matrixcd Luu; 00197 Matrixncd Lxu; 00198 00199 Lp.resize(m); 00200 Lpp.resize(m,m); 00201 Lpx.resize(m,n); 00202 00203 double L = this->cost.L(t, x, u, 0, &p, 00204 &Lx, &Lxx, 0, 0, 0, 00205 &Lp, &Lpp, &Lpx); 00206 00207 this->V = L; 00208 this->dV.setZero(); 00209 00210 /* 00211 cout << "Lxa: " << Lxa.rows() << "x" << Lxa.cols() << endl; 00212 cout << "Lx: " << Lx.rows() << "x" << Lx.cols() << endl; 00213 cout << "Lp: " << Lp.rows() << "x" << Lp.cols() << endl; 00214 cout << "Lxxa: " << Lxxa.rows() << "x" << Lxxa.cols() << endl; 00215 cout << "Lxx: " << Lxx.rows() << "x" << Lxx.cols() << endl; 00216 cout << "Lpp: " << Lpp.rows() << "x" << Lpp.cols() << endl; 00217 cout << "Lpx: " << Lpx.rows() << "x" << Lpx.cols() << endl; 00218 */ 00219 00220 00221 Lxa.head(n) = Lx; 00222 Lxa.tail(m) = Lp; 00223 Lxxa.block(0,0,n,n) = Lxx; 00224 Lxxa.block(n,n,m,m) = Lpp; 00225 Lxxa.block(n,0,m,n) = Lpx; 00226 Lxxa.block(0,n,n,m) = Lpx.transpose(); 00227 00228 /* 00229 cout << "Lxa:" << endl << Lxa << endl; 00230 cout << "Lx:" << endl << Lx << endl; 00231 cout << "Lp:" << endl << Lp << endl; 00232 cout << "Lxxa:" << endl << Lxxa << endl; 00233 cout << "Lxx:" << endl << Lxx << endl; 00234 cout << "Lpp:" << endl << Lpp << endl; 00235 cout << "Lpx:" << endl << Lpx << endl; 00236 */ 00237 00238 v = Lxa; 00239 P = Lxxa; 00240 00241 //cout << "v start: " << endl << v << endl; 00242 //cout << "P start: " << endl << P << endl; 00243 00244 VectorXd Qx = VectorXd::Zero(this->sys.X.n + m); 00245 Vectorcd Qu; 00246 00247 MatrixXd Qxx = MatrixXd::Zero(this->sys.X.n + m, this->sys.X.n+m); 00248 Matrixcd Quu; 00249 Matrixcd Quum; 00250 MatrixXd Quxm = MatrixXd::Zero(this->sys.U.n, this->sys.X.n+m); 00251 MatrixXd Qux = MatrixXd::Zero(this->sys.U.n, this->sys.X.n+m); 00252 00253 MatrixXd Aat = MatrixXd::Zero(this->sys.X.n + m, this->sys.X.n+m); 00254 MatrixXd At = MatrixXd::Zero(this->sys.X.n, this->sys.X.n); 00255 MatrixXd Bt = MatrixXd::Zero(this->sys.U.n, this->sys.X.n); 00256 00257 Matrixcd Ic = MatrixXd::Identity(this->sys.U.n, this->sys.U.n); 00258 00259 for (int k = N - 1; k >=0; --k) { 00260 t = this->ts[k]; 00261 const T &x = this->xs[k]; 00262 const Vectorcd &u = this->us[k]; 00263 double h = this->ts[k+1] - this->ts[k]; 00264 assert(h >0); 00265 00266 double L = this->cost.L(t, x, u, h, &p, &Lx, &Lxx, &Lu, &Luu, &Lxu, &Lp, &Lpp, &Lpx); 00267 Lxa.head(n) = Lx; 00268 Lxa.tail(m) = Lp; 00269 Lxxa.block(0,0,n,n) = Lxx; 00270 Lxxa.block(n,n,m,m) = Lpp; 00271 Lxxa.block(n,0,m,n) = Lpx; 00272 Lxxa.block(0,n,n,m) = Lpx.transpose(); 00273 Lxua.setZero(); 00274 Lxua.block(0,0,n,c) = Lxu; 00275 00276 // cout << "L=" << L << endl; 00277 00278 00279 this->V += L; 00280 00281 MatrixXd A = this->As[k]; 00282 MatrixXd Aa = MatrixXd::Identity(this->sys.X.n + m, this->sys.X.n + m); 00283 Aa.block(0,0,n,n) = A; 00284 if(dynParams) 00285 Aa.block(0,n,n,dynParams) = this->Cs[k]; 00286 00287 MatrixXd B = this->Bs[k];//MatrixXd::Zero(this->sys.X.n + m, this->sys.U.n); 00288 //B.block(0,0,n,c) = this->Bs[k]; 00289 00290 Vectorcd &ku = this->kus[k]; 00291 MatrixXd &Kux = Kuxs[k]; 00292 00293 Aat = Aa.transpose(); 00294 At = A.transpose(); 00295 Bt = B.transpose(); 00296 00297 Qx = Lxa + Aat*v; 00298 Qu = Lu + Bt*v.head(n); 00299 00300 Qxx = Lxxa + Aat*P*Aa; 00301 Quu = Luu + Bt*P.block(0,0,n,n)*B; 00302 Qux.block(0,0,c,n) = Bt*P.block(0,0,n,n)*A; // assume Lux = 0 00303 Qux.block(0,n,c,m) = Bt*P.block(0,n,n,m); 00304 if(dynParams) 00305 Qux.block(0,n,c,dynParams) += Bt*P.block(0,0,n,n)*this->Cs[k]; 00306 00307 double dmu = 1; 00308 00309 LLT<Matrixcd> llt; 00310 00311 printDebug = false; 00312 00313 double mu = this->mu; 00314 if (this->debug) { 00315 if (!pdX(P)) { 00316 // cout << "P[" << k << "] is not pd =" << endl << P << endl; 00317 // cout << "Luu=" << endl << Luu << endl; 00318 } 00319 00320 Matrixcd Pp = Bt*P.block(0,0,n,n)*B; 00321 if (!pdX(Pp)) { 00322 // cout << "B'PB is not positive definite:" << endl << Pp << endl; 00323 } 00324 } 00325 00326 if(false) 00327 { 00328 cout << "k=" << endl << k << endl; 00329 //cout << "x:" << endl << x << endl; 00330 cout << "u:" << endl << u << endl; 00331 cout << "p:" << endl << p << endl; 00332 cout << "Qxx:" << endl << Qxx << endl; 00333 cout << "Quu:" << endl << Quu << endl; 00334 cout << "Qux:" << endl << Qux << endl; 00335 cout << "P:" << endl << P << endl; 00336 cout << "v:" << endl << v << endl; 00337 cout << "Lxxa: " << endl << Lxxa << endl; 00338 cout << "Lxx: " << endl << Lxx << endl; 00339 cout << "Lpp: " << endl << Lpp << endl; 00340 cout << "Lpx: " << endl << Lpx << endl; 00341 cout << "Lxa: " << endl << Lxa << endl; 00342 cout << "Luu: " << endl << Luu << endl; 00343 cout << "Lu: " << endl << Lu << endl; 00344 cout << "Aa: " << endl << Aa << endl; 00345 cout << "B: " << endl << B << endl; 00346 } 00347 00348 00349 while (1) { 00350 Quum = Quu + mu*Ic; 00351 00352 // From https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf 00353 //MatrixXd Pm = P+mu*MatrixXd::Identity(n+m,n+m); 00354 //Quum = Luu + Bt*Pm.block(0,0,n,n)*B; 00355 llt.compute(Quum); 00356 00357 // if OK, then reduce mu and break 00358 if (llt.info() == Eigen::Success) { 00359 00360 //Quxm.block(0,0,c,n) = Bt*Pm.block(0,0,n,n)*A; // assume Lux = 0 00361 //Quxm.block(0,n,c,m) = Bt*Pm.block(0,n,n,m); 00362 //if(dynParams) 00363 // Quxm.block(0,n,c,dynParams) += Bt*Pm.block(0,0,n,n)*this->Cs[k]; 00364 00365 // Tassa and Todorov recently proposed this quadratic rule, seems pretty good 00366 dmu = min(1/this->dmu0, dmu/this->dmu0); 00367 if (mu*dmu > this->mu0) 00368 { 00369 mu = mu*dmu; 00370 } 00371 else 00372 { 00373 mu = this->mu0; 00374 } 00375 break; 00376 } 00377 else 00378 { 00379 // if negative then increase mu 00380 dmu = max(this->dmu0, dmu*this->dmu0); 00381 mu = max(this->mu0, mu*dmu); 00382 } 00383 00384 if (this->debug) 00385 cout << "[I] PDdp::Backward: increased mu=" << mu << " at k=" << k << endl; 00386 printDebug = true; 00387 00388 if (mu > this->mumax) { 00389 cout << "[W] PDdp::Backward: mu= " << mu << " exceeded maximum !" << endl; 00390 if (this->debug) 00391 getchar(); 00392 break; 00393 } 00394 00395 } 00396 00397 00398 if(mu > this->mumax) 00399 break; 00400 00401 00402 ku = -llt.solve(Qu); 00403 Kux = -llt.solve(Qux); 00404 //Kux = -llt.solve(Quxm); 00405 00406 assert(!std::isnan(ku[0])); 00407 assert(!std::isnan(Kux(0,0))); 00408 00409 // Update used in https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf 00410 //v = Qx + Kux.transpose()*Quu*ku + Kux.transpose()*Qu + Qux.transpose()*ku; 00411 //P = Qxx + Kux.transpose()*Quu*Kux + Kux.transpose()*Qux + Qux.transpose()*Kux; 00412 00413 v = Qx + Kux.transpose()*Qu; 00414 P = Qxx + Kux.transpose()*Qux; 00415 this->dV[0] += ku.dot(Qu); 00416 this->dV[1] += ku.dot(Quu*ku/2); 00417 00418 } 00419 00420 if (this->debug) 00421 cout << "[I] PDdp::Backward: current V=" << this->V << endl; 00422 00423 } 00424 00425 template <int n = Dynamic, int c = Dynamic> 00426 Matrix<double, n, c> sym(const Matrix<double, n, c> &A) { return A+A.transpose(); }; 00427 00428 template <typename T, int n, int c, int np> 00429 void PDdp<T, n, c, np>::Forward() { 00430 00431 typedef Matrix<double, n, 1> Vectornd; 00432 typedef Matrix<double, c, 1> Vectorcd; 00433 typedef Matrix<double, n, n> Matrixnd; 00434 typedef Matrix<double, n, c> Matrixncd; 00435 typedef Matrix<double, c, n> Matrixcnd; 00436 typedef Matrix<double, c, c> Matrixcd; 00437 typedef Matrix<double, np, 1> Vectormd; 00438 typedef Matrix<double, np, np> Matrixmd; 00439 typedef Matrix<double, n, np> Matrixnmd; 00440 typedef Matrix<double, np, n> Matrixmnd; 00441 00442 // find dp 00443 int N = this->us.size(); 00444 00445 LLT<Matrixmd> llt; 00446 double nu = this->nu; 00447 double dnu = 1; 00448 Vectormd bp = v.tail(m); 00449 00450 while (1) { 00451 Matrixmd Apm = P.block(n,n,m,m); 00452 Apm.diagonal() = Apm.diagonal().array() + nu; 00453 00454 llt.compute(Apm); 00455 // if OK, then reduce mu and break 00456 if (llt.info() == Eigen::Success) { 00457 dnu = min(1/this->dnu0, dnu/this->dnu0); 00458 if (nu*dnu > this->nu0) 00459 nu *= dnu; 00460 else 00461 nu = this->nu0; 00462 break; 00463 } 00464 00465 // if negative then increase mu 00466 dnu = max(this->dnu0, dnu*this->dnu0); 00467 nu = max(this->nu0, nu*dnu); 00468 if (this->debug) 00469 { 00470 cout << "[I] PDdp::Forward: increased nu=" << nu << endl; 00471 } 00472 } 00473 00474 dp = -llt.solve(bp); 00475 00476 // measured change in V 00477 double dVm = 1; 00478 00479 double a = this->a; 00480 00481 Vectormd dp1 = dp; 00482 00483 while (dVm > 0) { 00484 VectorXd dxa = VectorXd::Zero(this->sys.X.n + m); 00485 Vectornd dx = VectorXd::Zero(this->sys.X.n); 00486 00487 dp = a*dp1; 00488 dxa.tail(m) = dp; 00489 // Vectornd dx = Vectornd::Zero(); 00490 T xn = this->xs[0]; 00491 Vectorcd un; 00492 Vectormd pn = p + dp; 00493 00494 VectorXd pdyn; 00495 if (dynParams) 00496 pdyn = pn.head(dynParams); 00497 00498 double Vm = 0; 00499 00500 for (int k = 0; k < N; ++k) { 00501 const Vectorcd &u = this->us[k]; 00502 Vectorcd &du = this->dus[k]; 00503 00504 du = a*this->kus[k] + Kuxs[k]*dxa; 00505 un = u + du; 00506 00507 //cout << "k=" << k << endl; 00508 //cout << "kus:" << endl << this->kus[k] << endl; 00509 //cout << "Kuxs:" << endl << Kuxs[k] << endl; 00510 00511 const double &t = this->ts[k]; 00512 double L = this->cost.L(t, xn, un, 0, &pn); 00513 Vm += L; 00514 00515 // update dx 00516 if (this->type == this->PURE) { 00517 dx = this->As[k]*dx + this->Bs[k]*du; 00518 if(dynParams) 00519 dx += Cs[k]*dp.head(dynParams); 00520 this->sys.X.Retract(xn, xn, dx); 00521 } else { 00522 double h = this->ts[k+1] - t; 00523 T xn_(xn); 00524 this->sys.Step(xn_, t, xn, un, h, dynParams ? &pdyn : 0); 00525 xn = xn_; 00526 this->sys.X.Lift(dx, this->xs[k+1], xn); 00527 } 00528 00529 dxa.head(n) = dx; 00530 } 00531 00532 double L = this->cost.L(this->ts[N], xn, un, 0, &pn); 00533 Vm += L; 00534 00535 if (this->debug) 00536 cout << "[I] PDdp::Forward: computed V=" << Vm << endl; 00537 00538 dVm = Vm - this->V; 00539 00540 if (dVm > 0) { 00541 a *= this->b1; 00542 if (a < 1e-12) 00543 break; 00544 if (this->debug) 00545 cout << "[I] PDdp::Forward: step-size reduced a=" << a << endl; 00546 //cout << "[I] PDdp::Forward: step-size reduced a=" << this->a << endl; 00547 00548 00549 } 00550 00551 //double r = dVm/(this->a*this->dV[0] + this->a*this->a*this->dV[1]); 00552 double r = dVm/(a*this->dV[0] + a*a*this->dV[1]); 00553 if (r < this->s1) 00554 a *= this->b1; 00555 else 00556 if (r >= this->s2) 00557 a *= this->b2; 00558 00559 if (this->debug) 00560 cout << "[I] PDdp::Forward: step-size a=" << a << endl; 00561 } 00562 } 00563 00564 template <typename T, int n, int c, int np> 00565 void PDdp<T, n, c, np>::Iterate() { 00566 00567 Backward(); 00568 Forward(); 00569 for (int k = 0; k < this->N; ++k) 00570 this->us[k] += this->dus[k]; 00571 p = p + dp; 00572 Update(); 00573 } 00574 00575 } 00576 00577 #endif 00578