GCOP: lib/algos/ddp.h Source File

Go to the documentation of this file.
00001 #ifndef GCOP_DDP_H
00002 #define GCOP_DDP_H
00003 
00004 #include "docp.h"
00005 #include <exception>      // std::exception
00006 #include <stdexcept>
00007 
00008 namespace gcop {
00009   
00010   using namespace std;
00011   using namespace Eigen;
00012   
00013   template <typename T, int nx = Dynamic, int nu = Dynamic, int np = Dynamic> class Ddp :
00014     public Docp<T, nx, nu, np> {
00015     
00016     typedef Matrix<double, nx, 1> Vectornd;
00017     typedef Matrix<double, nu, 1> Vectorcd;
00018     typedef Matrix<double, np, 1> Vectorpd;
00019 
00020     typedef Matrix<double, nx, nx> Matrixnd;
00021     typedef Matrix<double, nx, nu> Matrixncd;
00022     typedef Matrix<double, nu, nx> Matrixcnd;
00023     typedef Matrix<double, nu, nu> Matrixcd;
00024     
00025   public:
00044     Ddp(System<T, nx, nu, np> &sys, Cost<T, nx, nu, np> &cost, 
00045          vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 
00046          Vectorpd *p = 0, bool update = true);
00047     
00048     virtual ~Ddp();
00049 
00050 
00057     void Iterate();
00058 
00062     void Forward();
00063 
00067     void Backward();
00068         
00069     int N;        
00070     
00071     double mu;    
00072     double mu0;   
00073     double dmu0;  
00074     double mumax; 
00075     double a;     
00076     
00077     std::vector<Vectorcd> dus;  
00078     
00079     std::vector<Vectorcd> kus;
00080     std::vector<Matrixcnd> Kuxs;
00081     
00082     Vectornd Lx;
00083     Matrixnd Lxx;
00084     Vectorcd Lu;
00085     Matrixcd Luu;
00086     
00087     Matrixnd P;
00088     Vectornd v;
00089     
00090     double V;
00091     Vector2d dV;
00092 
00093     double s1;   
00094     double s2;   
00095     double b1;   
00096     double b2;   
00097     
00098     char type;   
00099     
00100     static const char PURE = 0;     
00101     static const char DDP = 1;      
00102     static const char LQS = 2;      
00103     
00104     /*
00105     class Fx : public Function<T, n, n> {
00106     public:
00107     Fx(System &sys) :
00108       Function<T>(sys.U.nspace), sys(sys) {
00109       }
00110       
00111       void F(Vectornd &f, const T &x) {
00112 
00113         
00114         sys.X.L
00115         this->xa = xa;
00116         sys.FK(this->xa);
00117         sys.ID(f, t, *xb, this->xa, *u, h);
00118       }
00119       
00120       Mbs &sys;
00121       
00122       double t;
00123       const MbsState *xb;
00124       const Vectorcd *u;
00125       double h;
00126     };
00127 
00128     class Fu : public Function<VectorXd> {
00129     public:
00130     Fu(Mbs &sys) :
00131       Function<VectorXd>(sys.U), sys(sys) {
00132       }
00133       
00134       void F(VectorXd &f, const VectorXd &u) {
00135         sys.ID(f, t, *xb, *xa, u, h);
00136       }
00137       
00138       Mbs &sys;
00139       
00140       double t;
00141       const MbsState *xa;
00142       const MbsState *xb;
00143       double h;
00144     };
00145 
00146     */
00147 
00148     bool pd(const Matrixnd &P) {
00149       LLT<Matrixnd> llt;     
00150       llt.compute(P);
00151       return llt.info() == Eigen::Success;
00152     }
00153 
00154     bool pdX(const MatrixXd &P) {
00155       LLT<MatrixXd> llt;     
00156       llt.compute(P);
00157       return llt.info() == Eigen::Success;
00158     }  
00159 
00160 
00161   };
00162 
00163   using namespace std;
00164   using namespace Eigen;
00165   
00166   template <typename T, int nx, int nu, int np> 
00167     Ddp<T, nx, nu, np>::Ddp(System<T, nx, nu, np> &sys, 
00168                             Cost<T, nx, nu, np> &cost, 
00169                             vector<double> &ts, 
00170                             vector<T> &xs, 
00171                             vector<Matrix<double, nu, 1> > &us,
00172                             Matrix<double, np, 1> *p,
00173                             bool update) : Docp<T, nx, nu, np>(sys, cost, ts, xs, us, p, update),
00174     N(us.size()), 
00175     mu(1e-3), mu0(1e-3), dmu0(2), mumax(1e6), a(1), 
00176     dus(N), kus(N), Kuxs(N), 
00177     s1(0.1), s2(0.5), b1(0.25), b2(2),
00178     type(LQS)
00179     {
00180       assert(N > 0);
00181       assert(sys.X.n > 0);
00182       assert(sys.U.n > 0);
00183 
00184       assert(ts.size() == N+1);
00185       assert(xs.size() == N+1);
00186       assert(us.size() == N);
00187 
00188       if (nx == Dynamic || nu == Dynamic) {
00189         for (int i = 0; i < N; ++i) {
00190           dus[i].resize(sys.U.n);
00191           //          As[i].resize(sys.X.n, sys.X.n);
00192           //          Bs[i].resize(sys.X.n, sys.U.n);
00193           kus[i].resize(sys.U.n);
00194           Kuxs[i].resize(sys.U.n, sys.X.n);
00195         }
00196 
00197         Lx.resize(sys.X.n);
00198         Lxx.resize(sys.X.n, sys.X.n);
00199         Lu.resize(sys.U.n);
00200         Luu.resize(sys.U.n, sys.U.n);
00201 
00202         P.resize(sys.X.n, sys.X.n);
00203         v.resize(sys.X.n);       
00204       }
00205 
00206       if (update) {
00207         this->Update();
00208       }
00209     }
00210   
00211   template <typename T, int nx, int nu, int np> 
00212     Ddp<T, nx, nu, np>::~Ddp()
00213     {
00214     }
00215     
00216   template <typename T, int nx, int nu, int np> 
00217     void Ddp<T, nx, nu, np>::Backward() {
00218 
00219     typedef Matrix<double, nx, 1> Vectornd;
00220     typedef Matrix<double, nu, 1> Vectorcd;
00221     typedef Matrix<double, nx, nx> Matrixnd;
00222     typedef Matrix<double, nx, nu> Matrixncd;
00223     typedef Matrix<double, nu, nx> Matrixcnd;
00224     typedef Matrix<double, nu, nu> Matrixcd;  
00225     
00226     double t = this->ts.back();
00227     const T &x = this->xs.back();
00228     const Vectorcd &u = this->us.back();
00229     double L = this->cost.L(t, x, u, 0, 0, &Lx, &Lxx);
00230     
00231     V = L;
00232     dV.setZero();
00233     
00234     v = Lx;
00235     P = Lxx;
00236     
00237     Vectornd Qx;
00238     Vectorcd Qu;    
00239     Matrixnd Qxx;
00240     Matrixcd Quu;
00241     Matrixcd Quum;
00242     Matrixcnd Qux;   
00243     
00244     Matrixnd At;
00245     Matrixcnd Bt;
00246     
00247     Matrixcd Ic = MatrixXd::Identity(this->sys.U.n, this->sys.U.n);
00248     
00249     for (int k = N-1; k >=0; --k) {
00250       
00251       t = this->ts[k];
00252       double h = this->ts[k+1] - t;
00253 
00254       const T &x = this->xs[k];
00255       const Vectorcd &u = this->us[k];
00256       double L = this->cost.L(t, x, u, h, 0, &Lx, &Lxx, &Lu, &Luu);
00257 
00258       if (std::isnan(L)) {
00259         cout << "NAN" << " k=" << k << " Lx=" << Lx << " Lxx=" << Lxx << endl;
00260         getchar();
00261       }
00262       
00263       V += L;
00264       
00265       const Matrixnd &A = this->As[k];
00266       const Matrixncd &B = this->Bs[k];
00267       Vectorcd &ku = kus[k];
00268       Matrixcnd &Kux = Kuxs[k];
00269       
00270       At = A.transpose();    
00271       Bt = B.transpose();
00272       
00273       Qx = Lx + At*v;
00274       Qu = Lu + Bt*v;
00275 
00276       Qxx = Lxx + At*P*A;
00277       Quu = Luu + Bt*P*B;
00278       Qux = Bt*P*A;
00279       
00280       double mu = this->mu;
00281       double dmu = 1;
00282      
00283       if (this->debug) {
00284       if (!pd(P)) {
00285         cout << "P[" << k << "] is not pd =" << endl << P << endl;
00286         cout << "Luu=" << endl << Luu << endl;
00287       }
00288 
00289       Matrixcd Pp = Bt*P*B;
00290       if (!pdX(Pp)) {
00291         cout << "B'PB is not positive definite:" << endl << Pp << endl;
00292       }
00293       }
00294 
00295       LLT<Matrixcd> llt;     
00296       
00297       while (1) {
00298         Quum = Quu + mu*Ic;
00299         llt.compute(Quum);
00300         
00301         // if OK, then reduce mu and break
00302         if (llt.info() == Eigen::Success) {
00303           // this is the standard quadratic rule specified by Tassa and Todorov
00304           dmu = min(1/this->dmu0, dmu/this->dmu0);
00305           if (mu*dmu > this->mu0)
00306             mu = mu*dmu;
00307           else
00308             mu = this->mu0;
00309           
00310         if (this->debug) 
00311           cout << "[I] Ddp::Backward: reduced mu=" << mu << " at k=" << k << endl;
00312 
00313 
00314           break;
00315         }
00316         
00317         // if negative then increase mu
00318         dmu = max(this->dmu0, dmu*this->dmu0);
00319         mu = max(this->mu0, mu*dmu);
00320         
00321         if (this->debug) {
00322           cout << "[I] Ddp::Backward: increased mu=" << mu << " at k=" << k << endl;
00323           cout << "[I] Ddp::Backward: Quu=" << Quu << endl;
00324         }
00325 
00326         if (mu > mumax) {
00327           cout << "[W] Ddp::Backward: mu= " << mu << " exceeded maximum !" << endl;          
00328           if (this->debug)
00329             getchar();
00330           break;
00331         }
00332       }
00333 
00334       if (mu > mumax)
00335         break;
00336       
00337       ku = -llt.solve(Qu);
00338       Kux = -llt.solve(Qux);
00339       
00340       assert(!std::isnan(ku[0]));
00341       assert(!std::isnan(Kux(0,0)));
00342 
00343       v = Qx + Kux.transpose()*Qu;
00344       P = Qxx + Kux.transpose()*Qux;
00345       
00346       dV[0] += ku.dot(Qu);
00347       dV[1] += ku.dot(Quu*ku/2);
00348     }
00349     
00350     //    if (debug)
00351     cout << "[I] Ddp::Backward: current V=" << V << endl;    
00352   }
00353 
00354   template <typename T, int nx, int nu, int np> 
00355     void Ddp<T, nx, nu, np>::Forward() {
00356 
00357     typedef Matrix<double, nx, 1> Vectornd;
00358     typedef Matrix<double, nu, 1> Vectorcd;
00359     typedef Matrix<double, nx, nx> Matrixnd;
00360     typedef Matrix<double, nx, nu> Matrixncd;
00361     typedef Matrix<double, nu, nx> Matrixcnd;
00362     typedef Matrix<double, nu, nu> Matrixcd;  
00363     
00364     // measured change in V
00365     double dVm = 1;
00366     
00367     double a = this->a;
00368     
00369     while (dVm > 0) {
00370 
00371       Vectornd dx = VectorXd::Zero(this->sys.X.n);
00372       dx.setZero();//Redundancy
00373       T xn = this->xs[0];
00374       this->sys.Reset(xn,this->ts[0]);//Reset to initial state
00375       Vectorcd un;
00376       
00377       double Vm = 0;
00378       
00379       for (int k = 0; k < N; ++k) {
00380         const Vectorcd &u = this->us[k];
00381         Vectorcd &du = dus[k];
00382         
00383         const Vectorcd &ku = kus[k];
00384         const Matrixcnd &Kux = Kuxs[k];
00385         
00386         du = a*ku + Kux*dx;
00387         un = u + du;
00388         //[DEBUG]:
00389         /*cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl;
00390         cout<<"dx[ "<<k<<"]:\t"<<dx.transpose()<<endl;
00391         cout<<"ku[ "<<k<<"]:\t"<<ku.transpose()<<endl;
00392         cout<<"Kux[ "<<k<<"]:"<<endl<<Kux<<endl;
00393         */
00394         
00395         Rn<nu> &U = (Rn<nu>&)this->sys.U;
00396         if (U.bnd) {
00397           U.Bound(un, du, u);
00398           /*
00399           for (int j = 0; j < u.size(); ++j) 
00400             if (un[j] < U.lb[j]) {
00401               un[j] = U.lb[j];
00402               du[j] = un[j] - u[j];
00403             } else
00404               if (un[j] > U.ub[j]) {
00405                 un[j] = U.ub[j];
00406                 du[j] = un[j] - u[j];
00407               }
00408           */
00409         }
00410         
00411         const double &t = this->ts[k];
00412         double h = this->ts[k+1] - t;
00413 
00414         double L = this->cost.L(t, xn, un, h);
00415         Vm += L;
00416         
00417         // update dx
00418         if (type == PURE) {
00419           dx = this->As[k]*dx + this->Bs[k]*du;
00420           this->sys.X.Retract(xn, xn, dx);
00421         } else {
00422           double h = this->ts[k+1] - t;
00423           //Adding nan catching :
00424           try
00425           {
00426             this->sys.Step(xn, un, h, this->p);
00427           }
00428           catch(std::exception &e)
00429           {
00430             //[DEBUG] Statement
00431             std::cerr << "exception caught: " << e.what() << '\n';
00432             std::cout<<" Iteration counter: "<<k<<endl;
00433             std::cout<<" u: "<<u.transpose()<<endl;
00434             std::cout<<" du: "<<du.transpose()<<endl;
00435             //More debug statements:
00436             //std::cout<<" a: "<<a<<"\t ku: "<<ku.transpose()<<"\t dx: "<<dx.transpose()<<"\n kux: \n"<<Kux<<endl;
00437             //[Culprit dx]
00438 
00439             throw std::runtime_error(std::string("Nan observed"));
00440             return;
00441           }
00442           //cout<<"dx: "<<dx.transpose()<<endl;//[DEBUG]//Previous iteration dx
00443           //std::cout<<" du: "<<du.transpose()<<endl;//[DEBUG]
00444           this->sys.X.Lift(dx, this->xs[k+1], xn);
00445           /*if((k == 29) || (k == 30) || (k == 31))
00446           {
00447             //cout dx:
00448             cout<<"dx[ "<<(k+1)<<"]:\t"<<dx.transpose()<<endl;
00449             cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl;
00450             cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl;
00451             cout<<"Printing states ["<<(k+1)<<"]"<<endl;
00452             this->sys.print(this->xs[k+1]);
00453             cout<<"Printing xn: "<<endl;
00454             this->sys.print(xn);
00455             if(k == 31)
00456             {
00457               //exit(0);
00458             }
00459           }
00460           */
00461           //cout<<"xs[ "<<(k+1)<<"]:\t"<<this->xs[k+1]<<endl;
00462           //cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl;
00463           
00464           //          cout << xn.gs[0] << " " << xn.r << " " << xn.vs[0] << " " << xn.dr << endl;
00465 
00466           //          cout << this->xs[k+1].gs[0] << " " << this->xs[k+1].r << " " << this->xs[k+1].vs[0] << " " << this->xs[k+1].dr << endl;
00467           assert(!std::isnan(dx[0]));
00468         }
00469       }
00470       
00471       double L = this->cost.L(this->ts[N], xn, un, 0);
00472       Vm += L;
00473       
00474       if (this->debug)
00475         cout << "[I] Ddp::Forward: measured V=" << Vm << endl;
00476       
00477       dVm = Vm - V;
00478       
00479       if (dVm > 0) {
00480         a *= b1;
00481         if (a < 1e-12)
00482           break;
00483         if (this->debug)
00484           cout << "[I] Ddp::Forward: step-size reduced a=" << a << endl;
00485         
00486         continue;
00487       }
00488       
00489       double r = dVm/(a*dV[0] + a*a*dV[1]);
00490       if (r < s1)
00491         a = b1*a;
00492       else 
00493         if (r >= s2) 
00494           a = b2*a;    
00495     
00496       if (this->debug)
00497         cout << "[I] Ddp::Forward: step-size a=" << a << endl;    
00498     }
00499     this->J = V + dVm;//Set the optimal cost after one iteration
00500   }
00501 
00502   template <typename T, int nx, int nu, int np> 
00503     void Ddp<T, nx, nu, np>::Iterate() {
00504     
00505     Backward();
00506     Forward();
00507     for (int k = 0; k < N; ++k)
00508       this->us[k] += dus[k];
00509     this->Update();
00510   }
00511 }
00512 
00513 #endif