GCOP: lib/algos/pddp.h Source File

Go to the documentation of this file.
00001 #ifndef GCOP_PDDP_H
00002 #define GCOP_PDDP_H
00003 
00004 #include <Eigen/Dense>
00005 #include <vector>
00006 #include <type_traits>
00007 #include <algorithm>
00008 #include <iostream>
00009 #include "ddp.h"
00010 
00011 namespace gcop {
00012   
00013   template <typename T, int n = Dynamic, int c = Dynamic, int np = Dynamic> 
00014     class PDdp : public Ddp<T, n, c, np> {
00015     
00016     typedef Eigen::Matrix<double, n, 1> Vectornd;
00017     typedef Eigen::Matrix<double, c, 1> Vectorcd;
00018 
00019     typedef Eigen::Matrix<double, n, n> Matrixnd;
00020     typedef Eigen::Matrix<double, n, c> Matrixncd;
00021     typedef Eigen::Matrix<double, c, n> Matrixcnd;
00022     typedef Eigen::Matrix<double, c, c> Matrixcd;  
00023     
00024     typedef Eigen::Matrix<double, np, 1> Vectormd;
00025     typedef Eigen::Matrix<double, np, np> Matrixmd;
00026     typedef Eigen::Matrix<double, n, np> Matrixnmd;
00027     typedef Eigen::Matrix<double, np, n> Matrixmnd;
00028 
00029     typedef Eigen::Matrix<double, c, np> Matrixcmd;
00030     typedef Eigen::Matrix<double, np, c> Matrixmcd;
00031     
00032 
00033   public:
00034 
00055     PDdp(System<T, n, c, np> &sys, Cost<T, n, c, np> &cost, 
00056           vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 
00057           Vectormd &p, int dynParams = 0, bool update = true);
00058     
00065     void Iterate();
00066 
00067 
00072     void Update(bool der = true);
00073     
00077     void Forward();
00078 
00082     void Backward();
00083 
00084 
00085     int m;         
00086     int dynParams; 
00087     
00088     VectorXd &p;  
00089     VectorXd dp;  
00090   
00091     std::vector<Matrixnmd> Cs;
00092 
00093     std::vector<MatrixXd> Kuxs;
00094 
00095     VectorXd Lxa;
00096     MatrixXd Lxxa;
00097     Vectorcd Lu;
00098     Matrixcd Luu;
00099     MatrixXd Lxua;
00100 
00101     VectorXd v;
00102     MatrixXd P;
00103     
00104     double nu;    
00105     double nu0;   
00106     double dnu0;  
00107     bool printDebug;
00108 
00109 
00110     bool pdX(const MatrixXd &P) {
00111       LLT<MatrixXd> llt;
00112       llt.compute(P);
00113       return llt.info() == Eigen::Success;
00114     }
00115 
00116   };
00117   
00118   
00119   using namespace std;
00120   using namespace Eigen;
00121   
00122   template <typename T, int n, int c, int np> 
00123     PDdp<T, n, c, np>::PDdp(System<T, n, c, np> &sys, 
00124                               Cost<T, n, c, np> &cost, 
00125                               vector<double> &ts, 
00126                               vector<T> &xs, 
00127                               vector<Matrix<double, c, 1> > &us,
00128                               Matrix<double, np, 1> &p, int dynParams, bool update) : 
00129     Ddp<T, n, c, np>(sys, cost, ts, xs, us, &p, false),
00130     m(p.size()), dynParams(dynParams), p(p), dp(m), Cs(this->N), 
00131     nu(1e-3), nu0(1e-3), dnu0(2) {
00132     
00133     Lxa.resize(sys.X.n + m);
00134     Lxxa.resize(sys.X.n + m, sys.X.n + m);
00135     Lxua.resize(sys.X.n + m, sys.U.n);
00136     Kuxs.resize(this->N);   
00137  
00138     for (int i = 0; i < this->N; ++i) {
00139       Kuxs[i] = MatrixXd::Zero(sys.U.n, sys.X.n + m);
00140       if (dynParams)
00141         Cs[i].resize(sys.X.n, dynParams);
00142     }
00143 
00144 
00145     if (update)
00146       Update();
00147     printDebug = false;
00148   }
00149   
00150 
00151   template <typename T, int n, int c, int np> 
00152     void PDdp<T, n, c, np>::Update(bool der) {
00153     
00154     VectorXd pdyn;
00155     if (dynParams)
00156       pdyn = this->p.head(dynParams);
00157     for (int k = 0; k < this->N; ++k) {
00158       double h = this->ts[k+1] - this->ts[k];
00159       if (der)
00160         this->sys.Step(this->xs[k+1], this->ts[k], this->xs[k], this->us[k], h, 
00161                        dynParams ? &pdyn : 0, 
00162                        &this->As[k], &this->Bs[k], dynParams ? &this->Cs[k] : 0);
00163       else
00164         this->sys.Step(this->xs[k+1], this->ts[k], this->xs[k], this->us[k], h, 
00165                        dynParams ? &pdyn : 0);
00166     }
00167   }
00168   
00169   template <typename T, int n, int c, int np> 
00170     void PDdp<T, n, c, np>::Backward() {
00171     
00172     int N = this->us.size();
00173 
00174     typedef Matrix<double, n, 1> Vectornd;
00175     typedef Matrix<double, c, 1> Vectorcd;
00176     typedef Matrix<double, n, n> Matrixnd;
00177     typedef Matrix<double, n, c> Matrixncd;
00178     typedef Matrix<double, c, n> Matrixcnd; 
00179     typedef Matrix<double, c, c> Matrixcd;  
00180     typedef Matrix<double, Dynamic, 1> Vectormd;
00181     typedef Matrix<double, Dynamic, Dynamic> Matrixmd;
00182     typedef Matrix<double, n, Dynamic> Matrixnmd;
00183     typedef Matrix<double, Dynamic, n> Matrixmnd;
00184     
00185     double t = this->ts.back();
00186     const T &x = this->xs.back();
00187     const Vectorcd &u = this->us.back();
00188 
00189 
00190     Vectornd Lx;
00191     Matrixnd Lxx;
00192     Vectormd Lp;
00193     Matrixmd Lpp;
00194     Matrixmnd Lpx;
00195     Vectorcd Lu;
00196     Matrixcd Luu;
00197     Matrixncd Lxu;
00198    
00199     Lp.resize(m);
00200     Lpp.resize(m,m);
00201     Lpx.resize(m,n);
00202 
00203     double L = this->cost.L(t, x, u, 0, &p, 
00204                             &Lx, &Lxx, 0, 0, 0, 
00205                             &Lp, &Lpp, &Lpx);
00206     
00207     this->V = L;
00208     this->dV.setZero();
00209 
00210     /*
00211     cout << "Lxa: " << Lxa.rows() << "x" << Lxa.cols() << endl;
00212     cout << "Lx: " << Lx.rows() << "x" << Lx.cols() << endl;
00213     cout << "Lp: " << Lp.rows() << "x" << Lp.cols() << endl;
00214     cout << "Lxxa: " << Lxxa.rows() << "x" << Lxxa.cols() << endl;
00215     cout << "Lxx: " << Lxx.rows() << "x" << Lxx.cols() << endl;
00216     cout << "Lpp: " << Lpp.rows() << "x" << Lpp.cols() << endl;
00217     cout << "Lpx: " << Lpx.rows() << "x" << Lpx.cols() << endl;
00218     */
00219 
00220   
00221     Lxa.head(n) = Lx;
00222     Lxa.tail(m) = Lp;    
00223     Lxxa.block(0,0,n,n) = Lxx;
00224     Lxxa.block(n,n,m,m) = Lpp;
00225     Lxxa.block(n,0,m,n) = Lpx;
00226     Lxxa.block(0,n,n,m) = Lpx.transpose();
00227 
00228     /* 
00229     cout << "Lxa:" << endl << Lxa << endl;
00230     cout << "Lx:" << endl << Lx << endl;
00231     cout << "Lp:" << endl << Lp << endl;
00232     cout << "Lxxa:" << endl << Lxxa << endl;
00233     cout << "Lxx:" << endl << Lxx << endl;
00234     cout << "Lpp:" << endl << Lpp << endl;
00235     cout << "Lpx:" << endl << Lpx << endl;
00236     */
00237 
00238     v = Lxa;
00239     P = Lxxa;
00240 
00241     //cout << "v start: " << endl << v << endl;
00242     //cout << "P start: " << endl << P << endl;
00243     
00244     VectorXd Qx = VectorXd::Zero(this->sys.X.n + m);
00245     Vectorcd Qu;  
00246     
00247     MatrixXd Qxx = MatrixXd::Zero(this->sys.X.n + m, this->sys.X.n+m);
00248     Matrixcd Quu;
00249     Matrixcd Quum;
00250     MatrixXd Quxm =  MatrixXd::Zero(this->sys.U.n, this->sys.X.n+m);
00251     MatrixXd Qux = MatrixXd::Zero(this->sys.U.n, this->sys.X.n+m);
00252 
00253     MatrixXd Aat = MatrixXd::Zero(this->sys.X.n + m, this->sys.X.n+m);
00254     MatrixXd At = MatrixXd::Zero(this->sys.X.n, this->sys.X.n);
00255     MatrixXd Bt = MatrixXd::Zero(this->sys.U.n, this->sys.X.n);
00256     
00257     Matrixcd Ic = MatrixXd::Identity(this->sys.U.n, this->sys.U.n);
00258 
00259     for (int k = N - 1; k >=0; --k) {
00260       t = this->ts[k];
00261       const T &x = this->xs[k];
00262       const Vectorcd &u = this->us[k];
00263       double h = this->ts[k+1] - this->ts[k];
00264       assert(h >0);
00265 
00266       double L = this->cost.L(t, x, u, h, &p, &Lx, &Lxx, &Lu, &Luu, &Lxu, &Lp, &Lpp, &Lpx);
00267       Lxa.head(n) = Lx;
00268       Lxa.tail(m) = Lp;    
00269       Lxxa.block(0,0,n,n) = Lxx;
00270       Lxxa.block(n,n,m,m) = Lpp;
00271       Lxxa.block(n,0,m,n) = Lpx;
00272       Lxxa.block(0,n,n,m) = Lpx.transpose();
00273       Lxua.setZero();
00274       Lxua.block(0,0,n,c) = Lxu;
00275 
00276       //      cout << "L=" << L << endl;
00277       
00278       
00279       this->V += L;
00280       
00281       MatrixXd A = this->As[k];
00282       MatrixXd Aa = MatrixXd::Identity(this->sys.X.n + m, this->sys.X.n + m);
00283       Aa.block(0,0,n,n) = A;
00284       if(dynParams)
00285         Aa.block(0,n,n,dynParams) = this->Cs[k];
00286 
00287       MatrixXd B = this->Bs[k];//MatrixXd::Zero(this->sys.X.n + m, this->sys.U.n);
00288       //B.block(0,0,n,c) = this->Bs[k];
00289 
00290       Vectorcd &ku = this->kus[k];
00291       MatrixXd &Kux = Kuxs[k];
00292       
00293       Aat = Aa.transpose();
00294       At = A.transpose();
00295       Bt = B.transpose();
00296       
00297       Qx = Lxa + Aat*v;
00298       Qu = Lu + Bt*v.head(n);
00299 
00300       Qxx = Lxxa + Aat*P*Aa;
00301       Quu = Luu + Bt*P.block(0,0,n,n)*B;
00302       Qux.block(0,0,c,n) =  Bt*P.block(0,0,n,n)*A;     // assume Lux = 0
00303       Qux.block(0,n,c,m) =  Bt*P.block(0,n,n,m);     
00304       if(dynParams)
00305         Qux.block(0,n,c,dynParams) +=  Bt*P.block(0,0,n,n)*this->Cs[k];     
00306 
00307       double dmu = 1;
00308       
00309       LLT<Matrixcd> llt;
00310       
00311       printDebug = false;
00312      
00313       double mu = this->mu;
00314       if (this->debug) {
00315         if (!pdX(P)) {
00316         //  cout << "P[" << k << "] is not pd =" << endl << P << endl;
00317         //  cout << "Luu=" << endl << Luu << endl;
00318         }
00319 
00320         Matrixcd Pp = Bt*P.block(0,0,n,n)*B;
00321         if (!pdX(Pp)) {
00322         //  cout << "B'PB is not positive definite:" << endl << Pp << endl;
00323         }
00324       }
00325 
00326       if(false)
00327       {
00328         cout << "k=" << endl << k << endl;
00329         //cout << "x:" << endl << x << endl;
00330         cout << "u:" << endl << u << endl;
00331         cout << "p:" << endl << p << endl;
00332         cout << "Qxx:" << endl << Qxx << endl;
00333         cout << "Quu:" << endl << Quu << endl;
00334         cout << "Qux:" << endl << Qux << endl;
00335         cout << "P:" << endl << P << endl;
00336         cout << "v:" << endl << v << endl;
00337         cout << "Lxxa: " << endl << Lxxa << endl;
00338         cout << "Lxx: " << endl << Lxx << endl;
00339         cout << "Lpp: " << endl << Lpp << endl;
00340         cout << "Lpx: " << endl << Lpx << endl;
00341         cout << "Lxa: " << endl << Lxa << endl;
00342         cout << "Luu: " << endl << Luu << endl;
00343         cout << "Lu: " << endl << Lu << endl;
00344         cout << "Aa: " << endl << Aa << endl;
00345         cout << "B: " << endl << B << endl;
00346       }
00347 
00348 
00349       while (1) {
00350         Quum = Quu + mu*Ic;
00351         
00352         // From https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf
00353         //MatrixXd Pm = P+mu*MatrixXd::Identity(n+m,n+m);
00354         //Quum = Luu + Bt*Pm.block(0,0,n,n)*B;
00355         llt.compute(Quum);
00356         
00357         // if OK, then reduce mu and break
00358         if (llt.info() == Eigen::Success) {
00359           
00360           //Quxm.block(0,0,c,n) =  Bt*Pm.block(0,0,n,n)*A;     // assume Lux = 0
00361           //Quxm.block(0,n,c,m) =  Bt*Pm.block(0,n,n,m);     
00362           //if(dynParams)
00363           //  Quxm.block(0,n,c,dynParams) +=  Bt*Pm.block(0,0,n,n)*this->Cs[k];     
00364 
00365           // Tassa and Todorov recently proposed this quadratic rule, seems pretty good
00366           dmu = min(1/this->dmu0, dmu/this->dmu0);
00367           if (mu*dmu > this->mu0)
00368           { 
00369              mu = mu*dmu;
00370           }
00371           else
00372           {
00373              mu = this->mu0;
00374           }
00375           break;
00376         }
00377         else
00378         {
00379           // if negative then increase mu
00380           dmu = max(this->dmu0, dmu*this->dmu0);
00381           mu = max(this->mu0, mu*dmu);   
00382         }
00383 
00384         if (this->debug)
00385           cout << "[I] PDdp::Backward: increased mu=" << mu << " at k=" << k << endl;
00386         printDebug = true;
00387 
00388         if (mu > this->mumax) {
00389           cout << "[W] PDdp::Backward: mu= " << mu << " exceeded maximum !" << endl;
00390           if (this->debug)
00391             getchar();
00392           break;
00393         }
00394 
00395       }
00396 
00397       
00398       if(mu > this->mumax)
00399         break;
00400 
00401 
00402       ku = -llt.solve(Qu);
00403       Kux = -llt.solve(Qux);
00404       //Kux = -llt.solve(Quxm);
00405 
00406       assert(!std::isnan(ku[0]));
00407       assert(!std::isnan(Kux(0,0)));
00408 
00409       // Update used in https://homes.cs.washington.edu/~todorov/papers/TassaIROS12.pdf
00410       //v = Qx + Kux.transpose()*Quu*ku + Kux.transpose()*Qu + Qux.transpose()*ku;
00411       //P = Qxx + Kux.transpose()*Quu*Kux + Kux.transpose()*Qux + Qux.transpose()*Kux;
00412  
00413       v = Qx + Kux.transpose()*Qu;
00414       P = Qxx + Kux.transpose()*Qux;
00415       this->dV[0] += ku.dot(Qu);
00416       this->dV[1] += ku.dot(Quu*ku/2);
00417 
00418     }
00419     
00420     if (this->debug)
00421       cout << "[I] PDdp::Backward: current V=" << this->V << endl;
00422     
00423   }
00424   
00425   template <int n = Dynamic, int c = Dynamic> 
00426     Matrix<double, n, c> sym(const Matrix<double, n, c> &A) { return A+A.transpose(); };
00427   
00428   template <typename T, int n, int c, int np> 
00429     void PDdp<T, n, c, np>::Forward() {
00430 
00431     typedef Matrix<double, n, 1> Vectornd;
00432     typedef Matrix<double, c, 1> Vectorcd;
00433     typedef Matrix<double, n, n> Matrixnd;
00434     typedef Matrix<double, n, c> Matrixncd;
00435     typedef Matrix<double, c, n> Matrixcnd;
00436     typedef Matrix<double, c, c> Matrixcd;  
00437     typedef Matrix<double, np, 1> Vectormd;
00438     typedef Matrix<double, np, np> Matrixmd;
00439     typedef Matrix<double, n, np> Matrixnmd;
00440     typedef Matrix<double, np, n> Matrixmnd;
00441 
00442     // find dp
00443     int N = this->us.size();
00444 
00445     LLT<Matrixmd> llt;
00446     double nu = this->nu;
00447     double dnu = 1;
00448     Vectormd bp = v.tail(m);
00449 
00450     while (1) {
00451       Matrixmd Apm = P.block(n,n,m,m);
00452       Apm.diagonal() = Apm.diagonal().array() + nu;
00453       
00454       llt.compute(Apm);
00455       // if OK, then reduce mu and break
00456       if (llt.info() == Eigen::Success) {
00457         dnu = min(1/this->dnu0, dnu/this->dnu0);
00458         if (nu*dnu > this->nu0)
00459           nu *= dnu;
00460         else
00461           nu = this->nu0;
00462         break;
00463       }
00464       
00465       // if negative then increase mu
00466       dnu = max(this->dnu0, dnu*this->dnu0);
00467       nu = max(this->nu0, nu*dnu);   
00468       if (this->debug)
00469       {
00470         cout << "[I] PDdp::Forward: increased nu=" << nu << endl;
00471       }
00472     }
00473 
00474     dp = -llt.solve(bp);
00475 
00476     // measured change in V
00477     double dVm = 1;      
00478 
00479     double a = this->a;
00480 
00481     Vectormd dp1 = dp;
00482 
00483     while (dVm > 0) {
00484       VectorXd dxa = VectorXd::Zero(this->sys.X.n + m);
00485       Vectornd dx = VectorXd::Zero(this->sys.X.n);
00486 
00487       dp = a*dp1;
00488       dxa.tail(m) = dp;
00489       //      Vectornd dx = Vectornd::Zero();
00490       T xn = this->xs[0];
00491       Vectorcd un;
00492       Vectormd pn = p + dp;
00493       
00494       VectorXd pdyn;
00495       if (dynParams)
00496         pdyn = pn.head(dynParams);
00497   
00498       double Vm = 0;
00499       
00500       for (int k = 0; k < N; ++k) {
00501         const Vectorcd &u = this->us[k];
00502         Vectorcd &du = this->dus[k];
00503         
00504         du = a*this->kus[k] + Kuxs[k]*dxa;
00505         un = u + du;
00506         
00507         //cout << "k=" << k << endl;
00508         //cout << "kus:" << endl << this->kus[k] << endl;
00509         //cout << "Kuxs:" << endl << Kuxs[k] << endl;
00510 
00511         const double &t = this->ts[k];
00512         double L = this->cost.L(t, xn, un, 0, &pn);
00513         Vm += L;
00514         
00515         // update dx
00516         if (this->type == this->PURE) {
00517           dx = this->As[k]*dx + this->Bs[k]*du;
00518           if(dynParams)
00519             dx += Cs[k]*dp.head(dynParams);
00520           this->sys.X.Retract(xn, xn, dx);
00521         } else {
00522           double h = this->ts[k+1] - t;
00523           T xn_(xn);
00524           this->sys.Step(xn_, t, xn, un, h, dynParams ? &pdyn : 0);
00525           xn = xn_;
00526           this->sys.X.Lift(dx, this->xs[k+1], xn);
00527         }
00528 
00529         dxa.head(n) = dx;
00530       }
00531       
00532       double L = this->cost.L(this->ts[N], xn, un, 0, &pn);
00533       Vm += L;
00534       
00535       if (this->debug)
00536         cout << "[I] PDdp::Forward: computed V=" << Vm << endl;
00537       
00538       dVm = Vm - this->V;
00539       
00540       if (dVm > 0) {
00541         a *= this->b1;
00542         if (a < 1e-12)
00543           break;
00544         if (this->debug)
00545           cout << "[I] PDdp::Forward: step-size reduced a=" << a << endl;
00546           //cout << "[I] PDdp::Forward: step-size reduced a=" << this->a << endl;
00547         
00548         
00549       }
00550       
00551       //double r = dVm/(this->a*this->dV[0] + this->a*this->a*this->dV[1]);
00552       double r = dVm/(a*this->dV[0] + a*a*this->dV[1]);
00553       if (r < this->s1)
00554         a *= this->b1;
00555       else 
00556         if (r >= this->s2)
00557           a *= this->b2;    
00558     
00559       if (this->debug)
00560         cout << "[I] PDdp::Forward: step-size a=" << a << endl;    
00561     }
00562   }
00563 
00564   template <typename T, int n, int c, int np> 
00565     void PDdp<T, n, c, np>::Iterate() {
00566 
00567     Backward();
00568     Forward();
00569     for (int k = 0; k < this->N; ++k)
00570       this->us[k] += this->dus[k];
00571     p = p + dp;
00572     Update();
00573   }
00574   
00575 }
00576 
00577 #endif
00578