GCOP: lib/algos/systemcewithsddp.h Source File

Go to the documentation of this file.
00001 #ifndef GCOP_SYSTEMCE_H
00002 #define GCOP_SYSTEMCE_H
00003 
00004 #include <Eigen/Dense>
00005 #include <vector>
00006 #include <type_traits>
00007 #include <algorithm>
00008 #include <iterator>
00009 #include "system.h"
00010 #include "cost.h"
00011 #include "ce.h"
00012 #include <cmath>
00013 #include "rn.h"
00014 #include <limits>
00015 #include "sddp.h"
00016 
00017 #include "tparam.h"
00018 
00019 namespace gcop {
00020   
00021   using namespace std;
00022   using namespace Eigen;
00023  
00024   template <typename T, int n = Dynamic, int c = Dynamic, int np = Dynamic, int ntp = Dynamic> class SystemCe {
00025     
00026     typedef Matrix<double, n, 1> Vectornd;
00027     typedef Matrix<double, c, 1> Vectorcd;
00028     typedef Matrix<double, np, 1> Vectormd;
00029 
00030     typedef Matrix<double, n, n> Matrixnd;
00031     typedef Matrix<double, n, c> Matrixncd;
00032     typedef Matrix<double, c, n> Matrixcnd;
00033     typedef Matrix<double, c, c> Matrixcd;  
00034 
00035     typedef Matrix<double, ntp, 1> Vectortpd;  
00036     
00037     //External render Function for trajectories:
00038     typedef void(RenderFunc)(int, vector<T>&);
00039 
00040     typedef SDdp<T, nx, nu, np> MySDdp;
00041 
00042   public:
00064     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np> &cost, 
00065              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00066              vector<Vectorcd> &dus, vector<Vectorcd> &es, 
00067              bool update = true);
00068 
00091     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np> &cost, Tparam<T, n, c, np, ntp> &tp,
00092              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00093              vector<Vectorcd> &dus, vector<Vectorcd> &es, 
00094              bool update = true);
00095 
00096     
00097     virtual ~SystemCe();
00098     
00099 
00103     void Iterate();
00104 
00111     double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true);
00112         
00113     void us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const;    
00114 
00115     void z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const;
00116 
00117     System<T, n, c, np> &sys;    
00118 
00119     Cost<T, n, c, np> &cost;     
00120 
00121     Tparam<T, n, c, np, ntp> *tp;     
00122 
00123     std::vector<double> &ts; 
00124 
00125     std::vector<T> &xs;      
00126 
00127     std::vector<Vectorcd> &us;      
00128 
00129     Vectormd *p;      
00130 
00131     std::vector<Vectorcd> &dus;      
00132 
00133     std::vector<T> xss;             
00134     
00135     std::vector<Vectorcd> uss;      
00136     
00137     int N;        
00138 
00139     Ce<ntp> ce;       
00140 
00141     int Ns;      
00142 
00143     double J;    
00144 
00145     int nofevaluations;
00146     
00147     bool debug;  
00148 
00149     RenderFunc *external_render;
00150 
00151   };
00152 
00153   using namespace std;
00154   using namespace Eigen;
00155   
00156   template <typename T, int n, int c, int np, int ntp> 
00157     SystemCe<T, n, c, np, ntp>::SystemCe(System<T, n, c, np> &sys, 
00158                                      Cost<T, n, c, np> &cost, 
00159                                      vector<double> &ts, 
00160                                      vector<T> &xs, 
00161                                      vector<Matrix<double, c, 1> > &us,
00162                                      Matrix<double, np, 1> *p,
00163                                      vector<Matrix<double, c, 1> > &dus,
00164                                      vector<Matrix<double, c, 1> > &es,
00165                                      bool update) : 
00166     sys(sys), cost(cost), tp(0),
00167     ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 
00168     ce(N*sys.U.n, 1), Ns(1000), debug(true), external_render(0), nofevaluations(0)
00169     {
00170       // do not use an external tparam, just assume discrete controls are the params
00171       
00172       if (ntp != Dynamic) {
00173         assert(ntp == N*sys.U.n);
00174         if (ntp > 16)
00175           cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl;
00176       }
00177 
00178       assert(N > 0);
00179       assert(ts.size() == N+1);
00180       assert(xs.size() == N+1);
00181       assert(us.size() == N);
00182       assert(dus.size() == N);
00183       assert(xss.size() == N+1);
00184       assert(uss.size() == N);
00185 
00186       if (update) {
00187         J = Update(xs, us);
00188       } else {
00189         J = numeric_limits<double>::max();
00190       }
00191       
00192       us2z(ce.gmm.ns[0].mu, us);
00193       Vectortpd z;
00194       if (ntp == Dynamic)
00195         z.resize(sys.U.n*N);
00196   
00197       us2z(z, dus);
00198       ce.gmm.ns[0].P = z.asDiagonal();
00199       ce.gmm.Update();
00200 
00201       us2z(z, es);
00202       ce.S = z.asDiagonal();
00203     }
00204 
00205   template <typename T, int n, int c, int np, int ntp> 
00206     SystemCe<T, n, c, np, ntp>::SystemCe(System<T, n, c, np> &sys, 
00207                                          Cost<T, n, c, np> &cost,                              
00208                                          Tparam<T, n, c, np, ntp> &tp, 
00209                                          vector<double> &ts, 
00210                                          vector<T> &xs, 
00211                                          vector<Matrix<double, c, 1> > &us,
00212                                          Matrix<double, np, 1> *p,
00213                                          vector<Matrix<double, c, 1> > &dus,
00214                                          vector<Matrix<double, c, 1> > &es,
00215                                          bool update) : 
00216     sys(sys), cost(cost), tp(&tp), 
00217     ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 
00218     ce(tp.ntp, 1), Ns(1000), debug(true), external_render(0), nofevaluations(0)
00219     {
00220       /*
00221       if (ntp != Dynamic) {
00222         assert(ntp == N*sys.U.n);
00223         if (ntp > 16)
00224           cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl;
00225       }
00226       */
00227 
00228       assert(N > 0);
00229       assert(ts.size() == N+1);
00230       assert(xs.size() == N+1);
00231       assert(us.size() == N);
00232       assert(dus.size() == N);
00233       assert(xss.size() == N+1);
00234       assert(uss.size() == N);
00235 
00236       if (update) {
00237         J = Update(xs, us);
00238       } else {
00239         J = numeric_limits<double>::max();
00240       }
00241     
00242       tp.To(ce.gmm.ns[0].mu, ts, xs, us, p);
00243             //      us2z(ce.gmm.ns[0].mu, us);
00244       Vectortpd z;
00245       if (ntp == Dynamic) {
00246         z.resize(tp.ntp);
00247       }  
00248 
00249 
00250       tp.To(z, ts, xs, dus, p);
00251       // us2z(z, dus);
00252       ce.gmm.ns[0].P = z.asDiagonal();
00253       ce.gmm.Update();
00254 
00255       //      us2z(z, es);
00256       tp.To(z, ts, xs, es, p);
00257       ce.S = z.asDiagonal();
00258     }
00259 
00260   
00261   template <typename T, int n, int c, int np, int ntp> 
00262     SystemCe<T, n, c, np, ntp>::~SystemCe()
00263     {
00264     }
00265 
00266   template <typename T, int n, int c, int np, int ntp> 
00267     void SystemCe<T, n, c, np, ntp>::us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const
00268     {
00269       assert(us.size() > 0);
00270       assert(z.size() == us.size()*us[0].size());
00271       for (int i = 0; i < us.size(); ++i) {
00272         z.segment(i*sys.U.n, sys.U.n) = us[i];
00273       }
00274     }
00275   
00276   template <typename T, int n, int c, int np, int ntp> 
00277     void SystemCe<T, n, c, np, ntp>::z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const
00278     {
00279       assert(us.size() > 0);
00280       assert(z.size() == us.size()*us[0].size());
00281       for (int i = 0; i < us.size(); ++i) {
00282         us[i] = z.segment(i*sys.U.n, sys.U.n);
00283       }
00284     }
00285 
00286   
00287   template <typename T, int n, int c, int np, int ntp> 
00288     double SystemCe<T, n, c, np, ntp>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) {    
00289     double J = 0;
00290     sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset to specific state and time.
00291 
00292     for (int k = 0; k < N; ++k) {
00293       double h = ts[k+1] - ts[k];
00294       sys.Step1(xs[k+1], us[k], h, p);
00295       if (evalCost) 
00296         J += cost.L(ts[k], xs[k], us[k], h, p);
00297       
00298     }
00299     if (evalCost)
00300       J += cost.L(ts[N], xs[N], us[N-1], 0, p);
00301     return J;
00302   }
00303 
00304   template <typename T, int n, int c, int np, int ntp> 
00305     void SystemCe<T, n, c, np, ntp>::Iterate() {
00306 
00307       if (ce.inc) 
00308       {
00309         Vectortpd z;
00310         if (ntp == Dynamic)
00311           if (tp)
00312             z.resize(tp->ntp);
00313           else 
00314             z.resize(us.size()*this->N);        
00315 
00316         ce.Sample(z);
00317         if (tp)
00318         {
00319           tp->From(ts, xss, uss, z, p);
00320           double cost_trajectory = 0;
00321           int N = uss.size();
00322           for(int k = 0;k < N; k++)
00323           { 
00324             cost_trajectory += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p);
00325           }
00326           cost_trajectory += cost.L(ts[N], xss[N], uss[N-1], 0, p);
00327           ce.AddSample(z, cost_trajectory);
00328         }
00329         else
00330         {
00331           z2us(uss, z);
00332           ce.AddSample(z, Update(xss, uss));
00333         }
00334       } 
00335       else 
00336       {
00337         ce.Reset();
00338 
00339         Vectortpd z;
00340         if (ntp == Dynamic)
00341           if (tp)
00342             z.resize(tp->ntp);
00343           else 
00344             z.resize(us.size()*this->N);        
00345 
00346         for (int j = 0; j < Ns; ++j) {
00347           ce.Sample(z);
00348           if (tp)
00349           {
00350             tp->From(ts, xss, uss, z, p);
00351             double cost_trajectory = 0;
00352             int N = uss.size();
00353             for(int k = 0;k < N; k++)
00354             { 
00355               cost_trajectory += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p);
00356             }
00357             cost_trajectory += cost.L(ts[N], xss[N], uss[N-1], 0, p);
00358             ce.AddSample(z, cost_trajectory);
00359           }
00360           else
00361           {
00362             z2us(uss, z);
00363             ce.AddSample(z, Update(xss, uss));
00364           }
00365 
00366           //#DEBUG Number of function evaluations:
00367           //cout<<++nofevaluations<<endl;
00368           ++nofevaluations;
00369 
00370           //Render trajectory samples if external rendering function is provided:
00371           if(external_render)
00372           {
00373             external_render(j,xss);//ID for the sample trajectory
00374           }
00375         }
00376       }
00377 
00378     // estimate distribution
00379     ce.Select();    
00380     //Create SDDP problem for elite samples
00381     if (tp)
00382     {
00383       tp->From(ts, xss, uss, ce.zps, p);
00384       double cost_trajectory = 0;
00385       int N = uss.size();
00386       for(int k = 0;k < N; k++)
00387       { 
00388         cost_trajectory += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p);
00389       }
00390       cost_trajectory += cost.L(ts[N], xss[N], uss[N-1], 0, p);
00391       ce.AddSample(z, cost_trajectory);
00392     }
00393     else
00394     {
00395       z2us(uss, z);
00396       ce.AddSample(z, Update(xss, uss));
00397     }
00398     MySDdp sddp(sys, cost, ts, xss, uss);
00399 
00400     if (!ce.Fit()) {
00401       cout << "[W] TrajectoryPrmSample::Sample: ce.Fit failed!" << endl;
00402     }
00403 
00404     if (ce.Jmin < J) {
00405       if (tp)
00406         tp->From(ts, xs, us, ce.zmin, p);
00407       else
00408         z2us(us, ce.zmin);
00409 
00410       Update(xs, us, false);
00411       J = ce.Jmin;
00412     }
00413 
00414     // construct trajectory using the first sample (this is the one with lowest cost)
00415     //    Traj(xs, ce.zps[0].first, x0);
00416     // cout << "Solution: xs[K]=" << xs[K].transpose() << " c=" << ce.cs[0] << endl;
00417   }
00418 }
00419 
00420 #endif