GCOP: lib/algos/systemce.h Source File

Go to the documentation of this file.
00001 // This file is part of libgcop, a library for Geometric Control, Optimization, and Planning (GCOP)
00002 //
00003 // Copyright (C) 2004-2014 Marin Kobilarov <marin(at)jhu.edu>
00004 //
00005 // This Source Code Form is subject to the terms of the Mozilla
00006 // Public License v. 2.0. If a copy of the MPL was not distributed
00007 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00008 
00009 #ifndef GCOP_SYSTEMCE_H
00010 #define GCOP_SYSTEMCE_H
00011 
00012 #include <Eigen/Dense>
00013 #include <vector>
00014 #include <type_traits>
00015 #include <algorithm>
00016 #include <iterator>
00017 #include "system.h"
00018 #include "cost.h"
00019 #include "ce.h"
00020 #include <cmath>
00021 #include "rn.h"
00022 #include <limits>
00023 
00024 #include "tparam.h"
00025 #include "creator.h"
00026 
00027 namespace gcop {
00028   
00029   using namespace std;
00030   using namespace Eigen;
00031  
00039   template <typename T, 
00040     int n = Dynamic, 
00041     int c = Dynamic, 
00042     int np = Dynamic, 
00043     int ntp = Dynamic,
00044     typename Tc = T> 
00045     class SystemCe {
00046     
00047     typedef Matrix<double, n, 1> Vectornd;
00048     typedef Matrix<double, c, 1> Vectorcd;
00049     typedef Matrix<double, np, 1> Vectormd;
00050 
00051     typedef Matrix<double, n, n> Matrixnd;
00052     typedef Matrix<double, n, c> Matrixncd;
00053     typedef Matrix<double, c, n> Matrixcnd;
00054     typedef Matrix<double, c, c> Matrixcd;  
00055 
00056     typedef Matrix<double, ntp, 1> Vectortpd;  
00057     typedef Matrix<double, ntp, ntp> Matrixtpd;  
00058     
00059     //External render Function for trajectories:
00060     typedef void(RenderFunc)(int, vector<T>&);
00061   public:
00079     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, 
00080              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00081              vector<Vectorcd> &dus, vector<Vectorcd> &es, 
00082              bool update = true);
00083 
00105     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, Tc> &tp,
00106              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00107              vector<Vectorcd> &dus, vector<Vectorcd> &es, 
00108              bool update = true);
00109 
00131     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, Tc> &tp,
00132              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00133              VectorXd &dss, VectorXd &dess, 
00134              bool update = true);
00135 
00159     SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, 
00160              Tc> &tp, Creator<Tc> *contextSampler,
00161              vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p,
00162              Vectortpd &mu0, Matrixtpd &P0, Matrixtpd &S, bool update = true);
00163 
00164     
00165     
00166     virtual ~SystemCe();
00167     
00168 
00172     void Iterate(bool updatexsfromus = true);
00173 
00180     double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true);
00181         
00182     void us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const;    
00183 
00184     void z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const;
00185 
00186     System<T, n, c, np> &sys;    
00187 
00188     Cost<T, n, c, np, Tc> &cost;     
00189 
00190     Tparam<T, n, c, np, ntp, Tc> *tp;     
00191 
00192     Creator<Tc> *contextSampler;     
00193 
00194     std::vector<double> &ts; 
00195 
00196     std::vector<T> &xs;      
00197 
00198     std::vector<Vectorcd> &us;      
00199 
00200     Vectormd *p;      
00201 
00202     std::vector<Vectorcd> &dus;      
00203 
00204     std::vector<T> xss;             
00205     
00206     std::vector<Vectorcd> uss;      
00207 
00208     std::vector< std::vector<T> > xsas;    
00209     
00210     std::vector< std::vector<Vectorcd> >usas;  
00211 
00212     std::vector< bool >bs;        
00213     
00214     int N;        
00215 
00216     Ce<ntp> ce;       
00217 
00218     int Ns;      
00219 
00220     double J;    
00221 
00222     bool enforceUpperBound; 
00223 
00224     bool updateUpperBound; 
00225 
00226     double Jub;  
00227 
00228     Vectortpd zmin; 
00229 
00230     int nofevaluations;
00231     
00232     bool debug;  
00233 
00234     // TODO: fix this: call it an external callback, etc... 
00235     RenderFunc *external_render;
00236 
00237   };
00238 
00239   using namespace std;
00240   using namespace Eigen;
00241   
00242   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00243     SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 
00244                                              Cost<T, n, c, np, Tc> &cost, 
00245                                              vector<double> &ts, 
00246                                              vector<T> &xs, 
00247                                              vector<Matrix<double, c, 1> > &us,
00248                                              Matrix<double, np, 1> *p,
00249                                              vector<Matrix<double, c, 1> > &dus,
00250                                              vector<Matrix<double, c, 1> > &es,
00251                                              bool update) : 
00252     sys(sys), cost(cost), tp(0), contextSampler(0),
00253     ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 
00254     ce(N*sys.U.n, 1), Ns(1000), 
00255     J(numeric_limits<double>::max()), 
00256     enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()),
00257     debug(true), external_render(0), nofevaluations(0)
00258     {
00259       // do not use an external tparam, just assume discrete controls are the params
00260       
00261       if (ntp != Dynamic) {
00262         assert(ntp == N*sys.U.n);
00263         if (ntp > 16)
00264           cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl;
00265       }
00266 
00267       assert(N > 0);
00268       assert(ts.size() == N+1);
00269       assert(xs.size() == N+1);
00270       assert(us.size() == N);
00271       assert(dus.size() == N);
00272       assert(xss.size() == N+1);
00273       assert(uss.size() == N);
00274 
00275       if (update) {
00276         J = Update(xs, us);
00277       }
00278       
00279       us2z(ce.gmm.ns[0].mu, us);
00280       Vectortpd z;
00281       if (ntp == Dynamic)
00282         z.resize(sys.U.n*N);
00283   
00284       us2z(z, dus);
00285       ce.gmm.ns[0].P = (z.cwiseProduct()).asDiagonal();
00286       ce.gmm.Update();
00287 
00288       us2z(z, es);
00289       ce.S = z.asDiagonal();
00290     }
00291 
00292   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00293     SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 
00294                                              Cost<T, n, c, np, Tc> &cost, 
00295                                              Tparam<T, n, c, np, ntp, Tc> &tp,
00296                                              vector<double> &ts, 
00297                                              vector<T> &xs, 
00298                                              vector<Vectorcd> &us, 
00299                                              Vectormd *p,
00300                                              VectorXd &dss, 
00301                                              VectorXd &dess, 
00302                                              bool update) :
00303     sys(sys), cost(cost), tp(&tp), contextSampler(0),
00304     ts(ts), xs(xs), us(us), dus(us), p(p), xss(xs), uss(us), N(us.size()), 
00305     ce(tp.ntp, 1), Ns(1000), 
00306     J(numeric_limits<double>::max()), 
00307     enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()),
00308     debug(true), external_render(0), nofevaluations(0)
00309     {
00310 
00311       assert(N > 0);
00312       assert(ts.size() == N+1);
00313       assert(xs.size() == N+1);
00314       assert(us.size() == N);
00315       assert(xss.size() == N+1);
00316       assert(uss.size() == N);
00317       assert(dss.size() == tp.ntp);
00318       assert(dess.size() == tp.ntp);
00319 
00320       if (update) {
00321         J = Update(xs, us);
00322       }
00323     
00324       tp.To(ce.gmm.ns[0].mu, ts, xs, us, p);
00325 
00326       ce.gmm.ns[0].P = dss.asDiagonal();
00327       ce.gmm.Update();
00328 
00329       ce.S = dess.asDiagonal();
00330     }
00331 
00332   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00333     SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 
00334                                              Cost<T, n, c, np, Tc> &cost, 
00335                                              Tparam<T, n, c, np, ntp, Tc> &tp,
00336                                              Creator<Tc> *contextSampler,
00337                                              vector<double> &ts, 
00338                                              vector<T> &xs, 
00339                                              vector<Vectorcd> &us, 
00340                                              Vectormd *p,
00341                                              Vectortpd &mu0,
00342                                              Matrixtpd &P0,
00343                                              Matrixtpd &S, 
00344                                              bool update) :
00345     sys(sys), cost(cost), tp(&tp), contextSampler(contextSampler),
00346     ts(ts), xs(xs), us(us), dus(us), p(p), xss(xs), uss(us), N(us.size()), 
00347     ce(tp.ntp, 1), Ns(1000), 
00348     J(numeric_limits<double>::max()), 
00349     enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()),
00350     debug(true), external_render(0), nofevaluations(0)
00351     {
00352 
00353       assert(N > 0);
00354       assert(ts.size() == N+1);
00355       assert(xs.size() == N+1);
00356       assert(us.size() == N);
00357       assert(xss.size() == N+1);
00358       assert(uss.size() == N);
00359       assert(mu0.size() == tp.ntp);
00360       //      assert(tpCov.size() == tp.ntp);
00361       //      assert(tpS.size() == tp.ntp);
00362 
00363       //      if (update) {
00364       //        J = Update(xs, us);
00365       //      } else {
00366       //      }
00367       
00368       ce.gmm.ns[0].mu = mu0;
00369       ce.gmm.ns[0].P = P0;
00370       ce.S = S;
00371 
00372       ce.gmm.Update();
00373           
00374       //      tp.To(ce.gmm.ns[0].mu, ts, xs, us, p);
00375       if (contextSampler) {
00376         tp.SetContext( (*contextSampler)() );
00377       }
00378       tp.From(ts, xs, us, mu0, p);
00379     }
00380 
00381 
00382   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00383     SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 
00384                                          Cost<T, n, c, np, Tc> &cost,                              
00385                                          Tparam<T, n, c, np, ntp, Tc> &tp, 
00386                                          vector<double> &ts, 
00387                                          vector<T> &xs, 
00388                                          vector<Matrix<double, c, 1> > &us,
00389                                          Matrix<double, np, 1> *p,
00390                                          vector<Matrix<double, c, 1> > &dus,
00391                                          vector<Matrix<double, c, 1> > &es,
00392                                          bool update) : 
00393     sys(sys), cost(cost), tp(&tp), contextSampler(0),
00394     ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 
00395     ce(tp.ntp, 1), Ns(1000), 
00396     J(numeric_limits<double>::max()), 
00397     enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()),
00398     debug(true), external_render(0), nofevaluations(0)
00399     {
00400       /*
00401       if (ntp != Dynamic) {
00402         assert(ntp == N*sys.U.n);
00403         if (ntp > 16)
00404           cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl;
00405       }
00406       */
00407 
00408       assert(N > 0);
00409       assert(ts.size() == N+1);
00410       assert(xs.size() == N+1);
00411       assert(us.size() == N);
00412       assert(dus.size() == N);
00413       assert(xss.size() == N+1);
00414       assert(uss.size() == N);
00415 
00416       if (update) {
00417         J = Update(xs, us);
00418       }
00419 
00420       // initialize mean using the provided initial trajectory and controls
00421       tp.To(ce.gmm.ns[0].mu, ts, xs, us, p);
00422       Vectortpd z;
00423       Vectortpd dz;
00424       if (ntp == Dynamic) {
00425         z.resize(tp.ntp);
00426         dz.resize(tp.ntp);
00427       }  
00428 
00429       // perturbed controls:  ups = us + dus
00430       vector<Matrix<double, c, 1> > ups(us.size());
00431       transform(us.begin(), us.end(), dus.begin(), ups.begin(),plus< Matrix<double, c, 1> >());
00432 
00433       // initialize the covariance using the provided variance in the initial controls
00434       tp.To(z, ts, xs, ups, p);
00435       dz = z - ce.gmm.ns[0].mu;
00436       ce.gmm.ns[0].P = (dz.cwiseProduct(dz)).asDiagonal();
00437       ce.gmm.Update();
00438 
00439       tp.To(z, ts, xs, es, p);
00440       ce.S = z.asDiagonal();
00441     }
00442 
00443   
00444   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00445     SystemCe<T, n, c, np, ntp, Tc>::~SystemCe()
00446     {
00447     }
00448 
00449   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00450     void SystemCe<T, n, c, np, ntp, Tc>::us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const
00451     {
00452       assert(us.size() > 0);
00453       assert(z.size() == us.size()*us[0].size());
00454       for (int i = 0; i < us.size(); ++i) {
00455         z.segment(i*sys.U.n, sys.U.n) = us[i];
00456       }
00457     }
00458   
00459   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00460     void SystemCe<T, n, c, np, ntp, Tc>::z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const
00461     {
00462       assert(us.size() > 0);
00463       assert(z.size() == us.size()*us[0].size());
00464       for (int i = 0; i < us.size(); ++i) {
00465         us[i] = z.segment(i*sys.U.n, sys.U.n);
00466       }
00467     }
00468 
00469   
00470   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00471     double SystemCe<T, n, c, np, ntp, Tc>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) {    
00472     double J = 0;
00473     sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset to specific state and time.
00474 
00475     for (int k = 0; k < N; ++k) {
00476       double h = ts[k+1] - ts[k];
00477       sys.Step(xs[k+1], us[k], h, p);
00478       if (evalCost) 
00479         J += cost.L(ts[k], xs[k], us[k], h, p);
00480       
00481     }
00482     if (evalCost)
00483       J += cost.L(ts[N], xs[N], us[N-1], 0, p);
00484     return J;
00485   }
00486 
00487   template <typename T, int n, int c, int np, int ntp, typename Tc> 
00488     void SystemCe<T, n, c, np, ntp, Tc>::Iterate(bool updatexsfromus) {
00489 
00490       if (ce.inc) 
00491       {
00492         Vectortpd z;
00493         if (ntp == Dynamic)
00494           if (tp)
00495             z.resize(tp->ntp);
00496           else 
00497             z.resize(us.size()*this->N);        
00498 
00499         ce.Sample(z);
00500         if (tp) {
00501           // set context (e.g. goal, environment, etc...) possibly using random sampling
00502           if (contextSampler) {
00503             Tc &context = (*contextSampler)();
00504             tp->SetContext(context);            
00505             cost.SetContext(context);
00506           }
00507           tp->From(ts, xss, uss, z, p);
00508           double cost_trajectory = 0;
00509           int N = uss.size();
00510           for(int k = 0;k < N; k++)
00511           { 
00512             cost_trajectory += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p);
00513           }
00514           cost_trajectory += cost.L(ts[N], xss[N], uss[N-1], 0, p);
00515           ce.AddSample(z, cost_trajectory);
00516         }
00517         else
00518         {
00519           z2us(uss, z);
00520           ce.AddSample(z, Update(xss, uss));
00521         }
00522 
00523         xsas.push_back(xss);
00524         usas.push_back(uss);
00525       } else   // non-incremental
00526       {
00527         // Jub is used to discard any samples with cost below Jub
00528         ce.Reset();
00529         xsas.clear();
00530         usas.clear();
00531         bs.clear();
00532 
00533         Vectortpd z;
00534         if (ntp == Dynamic)
00535           if (tp)
00536             z.resize(tp->ntp);
00537           else 
00538             z.resize(us.size()*this->N);
00539         
00540         for (int j = 0; j < Ns; ++j) {
00541           
00542           // sample context
00543           if (contextSampler) {
00544             Tc &context = (*contextSampler)();
00545             tp->SetContext(context);            
00546             cost.SetContext(context);
00547           }
00548           
00549           double J = 0;
00550           bool good = true;
00551 
00552           do {
00553             ce.Sample(z);       
00554             if (tp) {
00555               good = tp->From(ts, xss, uss, z, p);
00556               int N = uss.size();
00557               J = 0;
00558               for(int k = 0;k < N; k++) { 
00559                 J += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p);
00560               }
00561               J += cost.L(ts[N], xss[N], uss[N-1], 0, p);
00562             } else {
00563               z2us(uss, z);
00564               J = Update(xss, uss);
00565             }
00566           } while(enforceUpperBound && J > Jub);
00567           
00568           ce.AddSample(z, J);
00569           
00570           // add to list of all samples
00571           xsas.push_back(xss);
00572           usas.push_back(uss);
00573           bs.push_back(good);
00574 
00575           //#DEBUG Number of function evaluations:
00576           //cout<<++nofevaluations<<endl;
00577           ++nofevaluations;
00578 
00579           //Render trajectory samples if external rendering function is provided:
00580           if(external_render)
00581           {
00582             external_render(j,xss);//ID for the sample trajectory
00583           }
00584         }
00585         
00586         if (updateUpperBound) {
00587           cout << "Jub=" << Jub << " ce.Jmax=" << ce.Jmax << endl;          
00588           Jub = min(Jub, ce.Jmax);
00589           cout << "Jub=" << Jub << endl;
00590         }
00591       }
00592 
00593     // estimate distribution
00594     ce.Select();    
00595 
00596     if (!ce.Fit()) {
00597       cout << "[W] TrajectoryPrmSample::Sample: ce.Fit failed!" << endl;
00598     }
00599 
00600     if (ce.Jmin < J) {
00601       if (tp)
00602         tp->From(ts, xs, us, ce.zmin, p);
00603       else
00604         z2us(us, ce.zmin);
00605 
00606       if(updatexsfromus)
00607         Update(xs, us, false);
00608       J = ce.Jmin;
00609       zmin = ce.zmin;//Store the parameters also
00610     }
00611 
00612     // construct trajectory using the first sample (this is the one with lowest cost)
00613     //    Traj(xs, ce.zps[0].first, x0);
00614     // cout << "Solution: xs[K]=" << xs[K].transpose() << " c=" << ce.cs[0] << endl;
00615   }
00616 }
00617 
00618 #endif