GCOP: lib/algos/sddp.h Source File

Go to the documentation of this file.
00001 #ifndef GCOP_SDDP_H
00002 #define GCOP_SDDP_H
00003 
00006 #include "docp.h"
00007 #include <exception>      // std::exception
00008 #include <stdexcept>
00009 #include <random>
00010 
00011 namespace gcop {
00012   
00013   using namespace std;
00014   using namespace Eigen;
00015   
00016   template <typename T, int nx = Dynamic, int nu = Dynamic, int np = Dynamic> class SDdp :
00017     public Docp<T, nx, nu, np> {
00018     
00019     typedef Matrix<double, nx, 1> Vectornd;
00020     typedef Matrix<double, nu, 1> Vectorcd;
00021     typedef Matrix<double, np, 1> Vectorpd;
00022 
00023     typedef Matrix<double, nx, nx> Matrixnd;
00024     typedef Matrix<double, nx, nu> Matrixncd;
00025     typedef Matrix<double, nu, nx> Matrixcnd;
00026     typedef Matrix<double, nu, nu> Matrixcd;
00027 
00028     //External render Function for trajectories:
00029     typedef void(RenderFunc)(int, vector<T>&);
00030     
00031   public:
00050     SDdp(System<T, nx, nu, np> &sys, Cost<T, nx, nu, np> &cost, 
00051          vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 
00052          Vectorpd *p = 0, bool update = true);
00053     
00054     virtual ~SDdp();
00055 
00056 
00063     void Iterate();
00064 
00068     void Forward();
00069 
00073     void Backward();
00074 
00078     void Linearize();
00079         
00080     int N;        
00081     
00082     double mu;    
00083     double mu0;   
00084     double dmu0;  
00085     double mumax; 
00086     double a;     
00087     double current_a; 
00088     
00089     std::vector<Vectorcd> dus;  
00090     std::vector<T> xss;             
00091     std::vector<T> xsprev;          
00092     
00093     std::vector<Vectorcd> kus;
00094     std::vector<Vectorcd> du_sigma;
00095     std::vector<Matrixcnd> Kuxs;
00096     std::vector<Vectorcd> Qud; 
00097 
00098     std::default_random_engine randgenerator; 
00099 
00100     std::normal_distribution<double> normal_dist;
00101     //std::uniform_real_distribution<double> uniform_dist;///< Creates a normal distribution
00102 
00103     Vectornd Lx;
00104     Matrixnd Lxx;
00105     Vectorcd Lu;
00106     Matrixcd Luu;
00107     
00108     Matrixnd P;
00109     Vectornd v;
00110 
00111     Vectorcd duscale;
00112     Vectornd dxscale;
00113     RenderFunc *external_render;
00114     
00115     double V;
00116     Vector2d dV;
00117 
00118     double s1;   
00119     double s2;   
00120     double b1;   
00121     double b2;   
00122     
00123     char type;   
00124     int Ns; 
00125     
00126     static const char PURE = 0;     
00127     static const char DDP = 1;      
00128     static const char LQS = 2;      
00129     
00130     /*
00131     class Fx : public Function<T, n, n> {
00132     public:
00133     Fx(System &sys) :
00134       Function<T>(sys.U.nspace), sys(sys) {
00135       }
00136       
00137       void F(Vectornd &f, const T &x) {
00138 
00139         
00140         sys.X.L
00141         this->xa = xa;
00142         sys.FK(this->xa);
00143         sys.ID(f, t, *xb, this->xa, *u, h);
00144       }
00145       
00146       Mbs &sys;
00147       
00148       double t;
00149       const MbsState *xb;
00150       const Vectorcd *u;
00151       double h;
00152     };
00153 
00154     class Fu : public Function<VectorXd> {
00155     public:
00156     Fu(Mbs &sys) :
00157       Function<VectorXd>(sys.U), sys(sys) {
00158       }
00159       
00160       void F(VectorXd &f, const VectorXd &u) {
00161         sys.ID(f, t, *xb, *xa, u, h);
00162       }
00163       
00164       Mbs &sys;
00165       
00166       double t;
00167       const MbsState *xa;
00168       const MbsState *xb;
00169       double h;
00170     };
00171 
00172     */
00173 
00174     bool pd(const Matrixnd &P) {
00175       LLT<Matrixnd> llt;     
00176       llt.compute(P);
00177       return llt.info() == Eigen::Success;
00178     }
00179 
00180     bool pdX(const MatrixXd &P) {
00181       LLT<MatrixXd> llt;     
00182       llt.compute(P);
00183       return llt.info() == Eigen::Success;
00184     }  
00185 
00186     void Reset()
00187     {
00188       this->current_a = 1.0;
00189       this->mu = (1e-3);
00190       this-> mu0 = (1e-3); 
00191       this->dmu0 = (2); 
00192       this->a = (1); 
00193       this->s1 = (0.1); 
00194       this->s2 = (0.5); 
00195       this->b1 = (0.25); 
00196       this->b2 = (2);
00197       for(int count = 0; count < N; count++)
00198       {
00199         Kuxs[count].setZero();
00200         kus[count].setZero();
00201       }
00202     }
00203 
00204   };
00205 
00206   using namespace std;
00207   using namespace Eigen;
00208   
00209   template <typename T, int nx, int nu, int np> 
00210     SDdp<T, nx, nu, np>::SDdp(System<T, nx, nu, np> &sys, 
00211                             Cost<T, nx, nu, np> &cost, 
00212                             vector<double> &ts, 
00213                             vector<T> &xs, 
00214                             vector<Matrix<double, nu, 1> > &us,
00215                             Matrix<double, np, 1> *p,
00216                             bool update) : Docp<T, nx, nu, np>(sys, cost, ts, xs, us, p, false),
00217     N(us.size()), 
00218     mu(1e-3), mu0(1e-3), dmu0(2), mumax(1e6), a(1), current_a(1),
00219     dus(N), kus(N), Kuxs(N), Qud(N), du_sigma(N),
00220     s1(0.1), s2(0.5), b1(0.25), b2(2),
00221     type(LQS),
00222     normal_dist(0,1),
00223     //uniform_dist(-1,1),
00224     external_render(0),
00225     xss(xs),
00226     xsprev(xs),
00227     Ns(30)
00228    // normal_dist(0,0.02)
00229     //normal_dist(0,0.001)
00230     {
00231       assert(N > 0);
00232       assert(sys.X.n > 0);
00233       assert(sys.U.n > 0);
00234 
00235       assert(ts.size() == N+1);
00236       assert(xs.size() == N+1);
00237       assert(xss.size() == N+1);
00238       assert(xsprev.size() == N+1);
00239       assert(us.size() == N);
00240 
00241       if (nx == Dynamic || nu == Dynamic) {
00242         for (int i = 0; i < N; ++i) {
00243           dus[i].resize(sys.U.n);
00244           //          As[i].resize(sys.X.n, sys.X.n);
00245           //          Bs[i].resize(sys.X.n, sys.U.n);
00246           kus[i].resize(sys.U.n);
00247           Kuxs[i].resize(sys.U.n, sys.X.n);
00248         }
00249 
00250         Lx.resize(sys.X.n);
00251         Lxx.resize(sys.X.n, sys.X.n);
00252         Lu.resize(sys.U.n);
00253         Luu.resize(sys.U.n, sys.U.n);
00254 
00255         P.resize(sys.X.n, sys.X.n);
00256         v.resize(sys.X.n);       
00257       }
00258       duscale = Vectorcd::Constant(0.02);//Default initialization of scale
00259       for(int i =0; i<N; ++i)
00260       {
00261         dus[i].setZero();
00262         Kuxs[i].setZero();//Set initial feedback matrices to zero
00263         kus[i].setZero();
00264         Qud[i] = Vectorcd::Constant(1.0/0.0004);//Set the inverse variance to the given value in the beginning
00265         du_sigma[i] = duscale;// Set the sigma to small value to begin with
00266       }
00267       dxscale = Vectornd::Constant(0.02);//Default initialization of scale
00268       //duscale.setZero();
00269       cout<<"duscale: "<<duscale<<endl;
00270       if (update) {
00271         this->Update(false);
00272 
00273      ++(this->nofevaluations);
00274         xss = xs;//Copy xs to xss in the beginning
00275         //this->Update(true);//#DEBUG
00276         //this->Linearize();
00277       }
00278     }
00279   
00280   template <typename T, int nx, int nu, int np> 
00281     SDdp<T, nx, nu, np>::~SDdp()
00282     {
00283     }
00284     
00285   template <typename T, int nx, int nu, int np> 
00286     void SDdp<T, nx, nu, np>::Backward() {
00287 
00288     typedef Matrix<double, nx, 1> Vectornd;
00289     typedef Matrix<double, nu, 1> Vectorcd;
00290     typedef Matrix<double, nx, nx> Matrixnd;
00291     typedef Matrix<double, nx, nu> Matrixncd;
00292     typedef Matrix<double, nu, nx> Matrixcnd;
00293     typedef Matrix<double, nu, nu> Matrixcd;  
00294     
00295     double t = this->ts.back();
00296     const T &x = this->xs.back();
00297     const Vectorcd &u = this->us.back();
00298     double L = this->cost.L(t, x, u, 0, 0, &Lx, &Lxx);
00299     
00300     V = L;
00301     dV.setZero();
00302     
00303     v = Lx;
00304     P = Lxx;
00305     
00306     Vectornd Qx;
00307     Vectorcd Qu;    
00308     Matrixnd Qxx;
00309     Matrixcd Quu;
00310     Matrixcd Quum;
00311     Matrixcnd Qux;   
00312     
00313     Matrixnd At;
00314     Matrixcnd Bt;
00315     
00316     Matrixcd Ic = MatrixXd::Identity(this->sys.U.n, this->sys.U.n);
00317     
00318     for (int k = N-1; k >=0; --k) {
00319       
00320       t = this->ts[k];
00321       double h = this->ts[k+1] - t;
00322 
00323       const T &x = this->xs[k];
00324       const Vectorcd &u = this->us[k];
00325       double L = this->cost.L(t, x, u, h, 0, &Lx, &Lxx, &Lu, &Luu);
00326 
00327       if (std::isnan(L)) {
00328         cout << "NAN" << " k=" << k << " Lx=" << Lx << " Lxx=" << Lxx << endl;
00329         getchar();
00330       }
00331       
00332       V += L;
00333       
00334       const Matrixnd &A = this->As[k];
00335       const Matrixncd &B = this->Bs[k];
00336       Vectorcd &ku = kus[k];
00337       Matrixcnd &Kux = Kuxs[k];
00338       
00339       At = A.transpose();    
00340       Bt = B.transpose();
00341       
00342       Qx = Lx + At*v;
00343       Qu = Lu + Bt*v;
00344       Qxx = Lxx + At*P*A;
00345       Quu = Luu + Bt*P*B;
00346       Qux = Bt*P*A;
00347       
00348       double mu = this->mu;
00349       double dmu = 1;
00350      
00351       if (this->debug) {
00352       if (!pd(P)) {
00353         cout << "P[" << k << "] is not pd =" << endl << P << endl;
00354         cout << "Luu=" << endl << Luu << endl;
00355       }
00356 
00357       Matrixcd Pp = Bt*P*B;
00358       if (!pdX(Pp)) {
00359         cout << "B'PB is not positive definite:" << endl << Pp << endl;
00360       }
00361       }
00362 
00363       LLT<Matrixcd> llt;     
00364       
00365       while (1) {
00366         Quum = Quu + mu*Ic;
00367         llt.compute(Quum);
00368         
00369         // if OK, then reduce mu and break
00370         if (llt.info() == Eigen::Success) {
00371           // this is the standard quadratic rule specified by Tassa and Todorov
00372           dmu = min(1/this->dmu0, dmu/this->dmu0);
00373           if (mu*dmu > this->mu0)
00374             mu = mu*dmu;
00375           else
00376             mu = this->mu0;
00377           
00378         if (this->debug) 
00379           cout << "[I] SDdp::Backward: reduced mu=" << mu << " at k=" << k << endl;
00380 
00381         //Store Diag value for inverse Variance:
00382         Qud[k] = Quum.diagonal();
00383         cout<<"Qud: "<<Qud[k].transpose()<<endl;
00384 
00385 
00386           break;
00387         }
00388         
00389         // if negative then increase mu
00390         dmu = max(this->dmu0, dmu*this->dmu0);
00391         mu = max(this->mu0, mu*dmu);
00392         
00393         if (this->debug) {
00394           cout << "[I] SDdp::Backward: increased mu=" << mu << " at k=" << k << endl;
00395           cout << "[I] SDdp::Backward: Quu=" << Quu << endl;
00396         }
00397 
00398         if (mu > mumax) {
00399           cout << "[W] SDdp::Backward: mu= " << mu << " exceeded maximum !" << endl;          
00400           if (this->debug)
00401             getchar();
00402           break;
00403         }
00404       }
00405 
00406       if (mu > mumax)
00407         break;
00408       
00409       ku = -llt.solve(Qu);
00410       Kux = -llt.solve(Qux);
00411       
00412       assert(!std::isnan(ku[0]));
00413       assert(!std::isnan(Kux(0,0)));
00414 
00415       v = Qx + Kux.transpose()*Qu;
00416       P = Qxx + Kux.transpose()*Qux;
00417       
00418       dV[0] += ku.dot(Qu);
00419       dV[1] += ku.dot(Quu*ku/2);
00420     }
00421     
00422     //    if (debug)
00423     cout << "[I] SDdp::Backward: current V=" << V << endl;    
00424   }
00425 
00426   template <typename T, int nx, int nu, int np> 
00427     void SDdp<T, nx, nu, np>::Forward() {
00428     //static int count_iterate = 0;
00429     //count_iterate++;
00430 
00431     typedef Matrix<double, nx, 1> Vectornd;
00432     typedef Matrix<double, nu, 1> Vectorcd;
00433     typedef Matrix<double, nx, nx> Matrixnd;
00434     typedef Matrix<double, nx, nu> Matrixncd;
00435     typedef Matrix<double, nu, nx> Matrixcnd;
00436     typedef Matrix<double, nu, nu> Matrixcd;  
00437     
00438     // measured change in V
00439     double dVm = 1;
00440     
00441     //double a = this->a;
00442     this->current_a = this->a;//Start with big step size
00443     
00444     while (dVm > 0) {
00445 
00446       Vectornd dx = VectorXd::Zero(this->sys.X.n);
00447       dx.setZero();//Redundancy
00448       T xn = this->xs[0];
00449       this->sys.Reset(xn,this->ts[0]);//Reset to initial state
00450       Vectorcd un;
00451       
00452       double Vm = 0;
00453       
00454       for (int k = 0; k < N; ++k) {
00455         const Vectorcd &u = this->us[k];
00456         Vectorcd &du = dus[k];
00457         
00458         const Vectorcd &ku = kus[k];
00459         const Matrixcnd &Kux = Kuxs[k];
00460         
00461         du = current_a*ku + Kux*dx;
00462         un = u + du; 
00463         
00464         Rn<nu> &U = (Rn<nu>&)this->sys.U;
00465         if (U.bnd) {
00466           for (int j = 0; j < u.size(); ++j) 
00467             if (un[j] < U.lb[j]) {
00468               //cout<<"I hit bound"<<endl;//[DEBUG]
00469               un[j] = U.lb[j];
00470               du[j] = un[j] - u[j];
00471             } else
00472               if (un[j] > U.ub[j]) {
00473                 un[j] = U.ub[j];
00474                 du[j] = un[j] - u[j];
00475               }
00476         }
00477 
00478         /*if(count_iterate == 1)
00479         {
00480           //[DEBUG]:
00481           cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl;
00482           cout<<"dx[ "<<k<<"]:\t"<<dx.transpose()<<endl;
00483           cout<<"ku[ "<<k<<"]:\t"<<ku.transpose()<<endl;
00484           cout<<"Kux[ "<<k<<"]:"<<endl<<Kux<<endl;
00485         }
00486         */
00487         
00488         const double &t = this->ts[k];
00489         double h = this->ts[k+1] - t;
00490 
00491         double L = this->cost.L(t, xn, un, h);
00492         Vm += L;
00493         
00494         // update dx
00495         if (type == PURE) {
00496           dx = this->As[k]*dx + this->Bs[k]*du;
00497           this->sys.X.Retract(xn, xn, dx);
00498         } else {
00499           double h = this->ts[k+1] - t;
00500           //Adding nan catching :
00501           try
00502           {
00503             this->sys.Step(xn, un, h, this->p);
00504           }
00505           catch(std::exception &e)
00506           {
00507             //[DEBUG] Statement
00508             std::cerr << "exception caught: " << e.what() << '\n';
00509             std::cout<<" Iteration counter: "<<k<<endl;
00510             std::cout<<" u: "<<u.transpose()<<endl;
00511             std::cout<<" du: "<<du.transpose()<<endl;
00512             //More debug statements:
00513             //std::cout<<" a: "<<a<<"\t ku: "<<ku.transpose()<<"\t dx: "<<dx.transpose()<<"\n kux: \n"<<Kux<<endl;
00514             //[Culprit dx]
00515 
00516             throw std::runtime_error(std::string("Nan observed"));
00517             return;
00518           }
00519           //cout<<"dx: "<<dx.transpose()<<endl;//[DEBUG]//Previous iteration dx
00520           //std::cout<<" du: "<<du.transpose()<<endl;//[DEBUG]
00521           this->sys.X.Lift(dx, this->xs[k+1], xn);
00522           /*if((k == 29) || (k == 30) || (k == 31))
00523           {
00524             //cout dx:
00525             cout<<"dx[ "<<(k+1)<<"]:\t"<<dx.transpose()<<endl;
00526             cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl;
00527             cout<<"du[ "<<k<<"]:\t"<<du.transpose()<<endl;
00528             cout<<"Printing states ["<<(k+1)<<"]"<<endl;
00529             this->sys.print(this->xs[k+1]);
00530             cout<<"Printing xn: "<<endl;
00531             this->sys.print(xn);
00532             if(k == 31)
00533             {
00534               //exit(0);
00535             }
00536           }
00537           */
00538           //cout<<"xs[ "<<(k+1)<<"]:\t"<<this->xs[k+1]<<endl;
00539           //cout<<"un[ "<<(k+1)<<"]:\t"<<un.transpose()<<endl;
00540           
00541           //          cout << xn.gs[0] << " " << xn.r << " " << xn.vs[0] << " " << xn.dr << endl;
00542 
00543           //          cout << this->xs[k+1].gs[0] << " " << this->xs[k+1].r << " " << this->xs[k+1].vs[0] << " " << this->xs[k+1].dr << endl;
00544           assert(!std::isnan(dx[0]));
00545         }
00546       }
00547       ++(this->nofevaluations);
00548       
00549       double L = this->cost.L(this->ts[N], xn, un, 0);
00550       Vm += L;
00551       
00552       if (this->debug)
00553         cout << "[I] SDdp::Forward: measured V=" << Vm << endl;
00554       
00555       dVm = Vm - V;
00556       
00557       if (dVm > 0) {
00558        current_a *= b1;
00559        if(current_a == 0)
00560          break;
00561         if (current_a < 1e-12)
00562         {
00563           current_a = 0;
00564         }
00565         if (this->debug)
00566           cout << "[I] SDdp::Forward: step-size reduced a=" << current_a << endl;
00567         
00568         continue;
00569       }
00570       
00571       /*double r = dVm/(current_a*dV[0] + current_a*current_a*dV[1]);
00572       if (r < s1)
00573        current_a = b1*current_a;
00574       else 
00575         if (r >= s2) 
00576          current_a = b2*current_a;    
00577     
00578       if (this->debug)
00579         cout << "[I] SDdp::Forward: step-size a=" << current_a << endl;    
00580         */
00581     }
00582     this->J = V + dVm;//Set the optimal cost after one iteration
00583   }
00584 
00585   template <typename T, int nx, int nu, int np> 
00586     void SDdp<T, nx, nu, np>::Iterate() {
00587     
00588     Linearize();//Linearize with the current us
00589     Backward();
00590     Forward();
00591     for (int k = 0; k < N; ++k)
00592       this->us[k] += dus[k];
00593     this->Update(false);
00594     ++(this->nofevaluations);
00595     //this->Update(true);//#DEBUG
00596   }
00597  template <typename T, int nx, int nu, int np>
00598     void SDdp<T, nx, nu, np>::Linearize(){
00599       //static int count_iterate = 0;
00600       randgenerator.seed(370212);
00601       MatrixXd dusmatrix(nu*N,Ns);
00602       MatrixXd dxsmatrix(nx*(N+1),Ns);
00603 
00604       //Vectorcd du;
00605       Vectorcd us1;
00606 
00607       int count = 0;
00608 
00609       //Create dus random matrix:
00610 //#pragma omp parallel for private(count)
00611       /*for(count = 0;count < Ns;count++)
00612       {
00613         for(int count1 = 0;count1 < N*nu;count1++)
00614         {
00615           //dusmatrix(count1,count)  = normal_dist(randgenerator);
00616           int count_u = count1%nu;//find the index corresponding to du_scale
00617           dusmatrix(count1,count)  = duscale(count_u)*uniform_dist(randgenerator);
00618         }
00619       }
00620       */
00621       //cout<<dusmatrix<<endl;//#DEBUG
00622       //getchar();
00623 
00624       /*Find xsprev based on dus and us:
00625       this->sys.Reset(this->xs[0],this->ts[0]);
00626       for(int count_traj = 0; count_traj< N;count_traj++)
00627       {
00628         us1 = this->us[count_traj] - this->dus[count_traj];
00629         this->sys.Step(xsprev[count_traj+1],us1,(this->ts[count_traj+1])-(this->ts[count_traj]), this->p);
00630       }
00631       */
00632 
00633       //dxsmatrix.block(0,0,nx,Ns).setZero();//Set zero first block
00634       Vectornd dx;
00635       T x0_sample;
00636       Rn<nu> &U = (Rn<nu>&)this->sys.U;
00637       //NonParallelizable code for now
00638       // First do some iterations to adjust the dus_scale(stdeviation of du) so that dx_stdeviation lies close to dx_scale (target stdeviation) across trajectory
00639       //Set all du_sigma to given default values:
00640       for(count = 0; count < N; count++)
00641       {
00642         du_sigma[count] = duscale;//Set to given default value
00643       }
00644       //double kp = 0.005;
00645       /*
00646       double kp = 0.002;
00647       int Ns1 = 15;
00648       vector<double> dxsquare(N+1, 0);//0 to N
00649       Vectornd dxscale1 = dxscale;
00650       for(int count_it = 0;count_it < 2;count_it++)
00651       {
00652         for(count = 0;count < Ns1; count++)
00653         {
00654           //Adding Variance to duscale to ensure common variance over the trajectory///////////////////:
00655           // First Sample:
00656           for(int count1 = 0; count1 < nx; count1++)
00657           {
00658             dx(count1) = dxscale1(count1)*normal_dist(randgenerator);//Adjust dx_scale 
00659           }
00660           //dxsmatrix.block<nx,1>(0,count) = dx; 
00661           this->sys.X.Retract(x0_sample, this->xs[0], dx);
00662           xss[0] = x0_sample;
00663           this->sys.Reset(x0_sample,this->ts[0]);
00664           for(int count_traj = 0; count_traj< N;count_traj++)
00665           {
00666             this->sys.X.Lift(dx, this->xs[count_traj], xss[count_traj]);//This is for feedback 
00667             dxsquare[count_traj] += dx.squaredNorm();
00668             //cout<<"dx: "<<dx.transpose()<<endl;
00669             //if(du_sigma[count_traj].maxCoeff() > 0.00011)//Only Use Feedback if variance is atleast 0.002
00670             {
00671               us1 = this->us[count_traj] + this->Kuxs[count_traj]*dx;//Before Update
00672             }
00673             */
00674             /*else
00675             {
00676               us1 = this->us[count_traj];//No Feedback
00677             }
00678             */
00679             /*for(int count_u = 0;count_u < nu; count_u++)
00680             {
00681               */
00682               /*if(du_sigma[count_traj][count_u] < 0.002)
00683                 du_sigma[count_traj][count_u] = 0.002;
00684               else if(du_sigma[count_traj][count_u] > 0.2)
00685                 du_sigma[count_traj][count_u] = 0.2;//Bounds ond du_sigma
00686                 */
00687                 /*
00688               us1[count_u] = us1[count_u] + du_sigma[count_traj][count_u]*normal_dist(randgenerator);
00689             }
00690             //cout<<"du_sigma: "<<du_sigma[count_traj].transpose()<<endl;
00691             //getchar();
00692             this->sys.Step(xss[count_traj+1],us1,(this->ts[count_traj+1])-(this->ts[count_traj]), this->p);
00693           }
00694           this->sys.X.Lift(dx, this->xs[N], xss[N]);//This is for feedback
00695           dxsquare[N] += dx.squaredNorm();
00696           //cout<<(dx.squaredNorm()/dxscale.squaredNorm()-1)<<endl;
00697         }
00698         cout<<(dxsquare[0]/(Ns1*dxscale1.squaredNorm()) - 1)<<endl;
00699         for(count = 1;count <= N; count++)
00700         {
00701           for(int count_sigma = 0;count_sigma < count; count_sigma++)
00702           {
00703             du_sigma[count_sigma] -= (kp*(count_sigma+1)/double(count))*Vectorcd::Constant(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1);
00704             //cout<<"du_sigma, dx-dxscale["<<count_sigma<<"]: "<<du_sigma[count_sigma].transpose()<<endl;
00705             for(int count_u = 0;count_u < nu; count_u++)
00706             {
00707               if(du_sigma[count_sigma][count_u] < 0.005)
00708                 du_sigma[count_sigma][count_u] = 0.005;
00709               else if(du_sigma[count_sigma][count_u] > 0.2)
00710                 du_sigma[count_sigma][count_u] = 0.2;//Bounds ond du_sigma
00711             }
00712           }
00713           //dxscale1 -= (kp/double(count))*Vectornd::Constant(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1);
00714           cout<<(dxsquare[count]/(Ns1*dxscale.squaredNorm()) - 1)<<endl;
00715           //set dxsquare back to zero:
00716           dxsquare[count] = 0;
00717         }
00718         dxsquare[0] = 0;
00719         for(count = 0; count< N; count++)
00720         {
00721           cout<<"du_sigma["<<count<<"]: "<<du_sigma[count].transpose()<<endl;
00722         }
00723         //cout<<(dx.squaredNorm()/dxscale.squaredNorm() - 1)<<endl;
00724         cout<<"------Iteration done----"<<endl;
00725       }
00726         */
00727       //getchar();
00728       for(count = 0;count < Ns;count++)
00729       {
00730         //Set to initial state perturbed by small amount:
00731         for(int count1 = 0; count1 < nx; count1++)
00732         {
00733           dx(count1) = this->dxscale(count1)*normal_dist(randgenerator);//Adjust dx_scale 
00734         }
00735         dxsmatrix.block<nx,1>(0,count) = dx; 
00736         //cout<<"dx0: "<<dx.transpose()<<endl;
00737         //cout<<dx.norm();
00738 
00739         this->sys.X.Retract(x0_sample, this->xs[0], dx);
00740 
00741         xss[0] = x0_sample;
00742         this->sys.Reset(x0_sample,this->ts[0]);
00743         for(int count1 = 0;count1 < N;count1++)
00744         {
00745           //this->sys.X.Lift(dx, xsprev[count1], xss[count1]);//This is for feedback
00746           //us1 = this->us[count1] - this->dus[count1] + (this->current_a)*this->kus[count1] + this->Kuxs[count1]*dx;//Before Update
00747           this->sys.X.Lift(dx, this->xs[count1], xss[count1]);//This is for feedback
00748           dxsmatrix.block<nx,1>((count1)*nx,count) = dx; 
00749           //if(du_sigma[count1].maxCoeff() > 0.00011)//Only Use Feedback if variance is greater than 0.0001(min)
00750           {
00751             us1 = this->us[count1] + this->Kuxs[count1]*dx;//Before Update
00752             //us1 = this->us[count1];//No Feedback
00753             //cout<<"Feedback: "<< (this->Kuxs[count1]*dx).transpose()<<endl;
00754           }
00755           /*else
00756           {
00757             us1 = this->us[count1];//No Feedback
00758           }
00759           */
00760          // if(count_iterate != 2)
00761           {
00762             for(int count_u = 0;count_u < nu; count_u++)
00763             {
00764               //Make sure Qud is not smaller than some threshold:
00765               //dusmatrix(count1,count)  = duscale(count_u)*uniform_dist(randgenerator);
00766               //if(Qud[count1][count_u]<(1.0/(duscale[count_u]*duscale[count_u])))
00767               assert((du_sigma[count1][count_u] < 0.21)&&(du_sigma[count1][count_u]>0.00009));
00768               us1[count_u] = us1[count_u] + du_sigma[count1][count_u]*normal_dist(randgenerator);
00769               //else
00770                 //us1[count_u] = us1[count_u] + (1.0/sqrt(Qud[count1][count_u]))*normal_dist(randgenerator);
00771             }
00772             //getchar();
00773           }
00774           //The sampled control should be within the control bounds of the system !!!
00775           if (U.bnd) {
00776             for (int count_u = 0; count_u < nu; ++count_u)
00777               if (us1[count_u] < U.lb[count_u]) {
00778                 //cout<<"I hit bound"<<endl;//[DEBUG]
00779                 us1[count_u] = U.lb[count_u];
00780              //   du[j] = un[j] - u[j];
00781               } else
00782                 if (us1[count_u] > U.ub[count_u]) {
00783                   us1[count_u] = U.ub[count_u];
00784                   //du[j] = un[j] - u[j];
00785                 }
00786           }
00787 
00788           dusmatrix.block<nu,1>(count1*nu, count) = us1 - this->us[count1];//Verify that this is zero without randomness #DEBUG
00789           //dumatrix.block<nu,1>(count1*nu,count) = us1;
00790           /*if(count_iterate == 2)
00791           {
00792             cout<<"us1, us: "<<us1.transpose()<<"\t , "<<(this->us[count1]).transpose()<<endl;
00793             //cout<<"dus: "<<(this->dus[count1]).transpose()<<"\t"<<.transpose()<<endl;
00794             cout<<"dx[ "<<count1<<"]:\t"<<dx.transpose()<<endl;
00795             cout<<"ku[ "<<count1<<"]:\t"<<this->kus[count1].transpose()<<endl;
00796             cout<<"Kux[ "<<count1<<"]:"<<endl<<this->Kuxs[count1]<<endl;
00797             cout<<"current_a"<<current_a<<endl;
00798             //cout<<"ku, Kxu: "<<endl<<(this->kus[count1])<<endl<<(this->Kuxs[count1])<<endl;
00799           }
00800           */
00801           //cout<<"du, u: "<<du.transpose()<<"\t"<<(this->us[count1]).transpose()<<endl;
00802           this->sys.Step(xss[count1+1],us1,(this->ts[count1+1])-(this->ts[count1]), this->p);
00803           //this->sys.X.Lift(dx, this->xs[count1+1], xss[count1+1]);//This is for fitting Linear Model
00804           //cout<<" "<<dx.norm();
00805           /*if(count_iterate == 2)
00806           {
00807             cout<<"dx: "<<count1<<"\t"<<dx.transpose()<<endl; 
00808             //getchar();
00809           }
00810           */
00811         }
00812         ++(this->nofevaluations);
00813         //Final step:
00814         this->sys.X.Lift(dx, this->xs[N], xss[N]);//This is for feedback
00815         dxsmatrix.block<nx,1>((N)*nx,count) = dx; 
00816         //cout<<endl; 
00817 
00818         //Render trajectory samples if external rendering function is provided:
00819         if(external_render)
00820         {
00821           external_render(count,xss);//ID for the sample trajectory
00822         }
00823         //if(count_iterate == 2)
00824           //getchar();
00825       }
00826 
00827       //Matrix<double, nx, nx+nu>Abs;
00828       //cout<<dxsmatrix<<endl;//#DEBUG
00829       //getchar();
00830       MatrixXd Abs(nx+nu,nx);
00831       MatrixXd XUMatrix(nx+nu,Ns);
00832 
00833       MatrixXd Xverifymatrix(nx,Ns);//Verify 
00834 
00835       //Compute As and Bs for every k:
00836       for(count = 0;count < N;count++)
00837       {
00838         XUMatrix<<dxsmatrix.block(count*nx,0,nx,Ns), dusmatrix.block(count*nu,0,nu,Ns);
00839         Abs = (XUMatrix*XUMatrix.transpose()).ldlt().solve(XUMatrix*dxsmatrix.block((count+1)*nx,0,nx,Ns).transpose());
00840         // Compare As with true As #DEBUG: 
00841         /*cout<<"Computed: As, Bs: "<<endl;
00842         cout<<endl<<Abs.block<nx,nx>(0,0).transpose()<<endl;
00843         cout<<endl<<Abs.block<nu,nx>(nx,0).transpose()<<endl;
00844         cout<<"True As, bs: "<<endl;
00845         cout<<endl<<this->As[count]<<endl;
00846         cout<<endl<<this->Bs[count]<<endl;
00847         cout<<"Diff As, bs: "<<endl;
00848         cout<<endl<<(Abs.block<nx,nx>(0,0).transpose() - this->As[count])<<endl;
00849         cout<<endl<<(Abs.block<nu,nx>(nx,0).transpose() - this->Bs[count])<<endl;
00850         */
00851 //        getchar(); 
00852 
00853         this->As[count] = Abs.block<nx,nx>(0,0).transpose();
00854         this->Bs[count] = Abs.block<nu,nx>(nx,0).transpose();
00855 
00856         /******VERIFY**********/
00857         Xverifymatrix = Abs.transpose()*XUMatrix;
00858         //cout<<endl<<"dXpredicted: "<<endl<<Xverifymatrix<<endl;
00859         //cout<<endl<<"True_dX: "<<dxsmatrix.block((count+1)*nx,0,nx,Ns)<<endl;
00860         cout<<endl<<"Error_predicted: "<<(Xverifymatrix - dxsmatrix.block((count+1)*nx,0,nx,Ns)).squaredNorm()<<endl;
00861         //cout<<endl<<Abs<<endl;
00862         //getchar();
00863 
00864         //Debug: Print:
00865 /*        cout<<"As, Bs, Abs: "<<endl;
00866         cout<<endl<<Abs<<endl;
00867         cout<<endl<<this->As[count]<<endl;
00868         cout<<endl<<this->Bs[count]<<endl;
00869         cout<<"Comparing dxs[count+1] with As*dxs[count] + Bs[count]*du forall samples "<<endl;
00870         cout<<"Prediction - Actual: "<<endl;
00871         cout<<endl<<(Abs.transpose()*XUMatrix - dxsmatrix.block((count+1)*nx,0,nx,Ns))<<endl;
00872         getchar();
00873         */
00874       }
00875       //count_iterate++;
00876     }
00877 }
00878 
00879 #endif