GCOP: lib/algos/aspsa.h Source File

Go to the documentation of this file.
00001 #ifndef GCOP_ASPSA_H
00002 #define GCOP_ASPSA_H
00003 
00004 #include <Eigen/Dense>
00005 #include <Eigen/Cholesky>
00006 #include <vector>
00007 #include <type_traits>
00008 #include <algorithm>
00009 #include <iterator>
00010 #include "system.h"
00011 #include "cost.h"
00012 #include <cmath>
00013 #include "rn.h"
00014 #include <limits>
00015 #include <random>
00016 
00017 namespace gcop {
00018 
00019   using namespace std;
00020   using namespace Eigen;
00021 
00022   template <typename T, int n = Dynamic, int c = Dynamic, int _np = Dynamic> class ASPSA {
00023 
00024     typedef Matrix<double, n, 1> Vectornd;
00025     typedef Matrix<double, c, 1> Vectorcd;
00026     typedef Matrix<double, _np, 1> Vectormd;
00027     typedef Matrix<double, n, n> Matrixnd;
00028     typedef Matrix<double, n, c> Matrixncd;
00029     typedef Matrix<double, c, n> Matrixcnd;
00030     typedef Matrix<double, c, c> Matrixcd;  
00031 
00032                 typedef Matrix<double, _np, _np> Matrixmd;
00033                 typedef Matrix<double, n, _np> Matrixnmd;
00034                 typedef Matrix<double, _np, n> Matrixmnd;
00035 
00036 
00037     public:
00059     ASPSA(System<T, n, c, _np> &sys, Cost<T, n, c, _np> &cost, 
00060                       vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 
00061                       Vectormd *p = 0, 
00062                       bool update = true);
00063                 
00064     virtual ~ASPSA();
00065 
00066 
00070     void Iterate();
00071 
00072 
00079     double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true);
00080 
00081 
00082     System<T, n, c, _np> &sys;    
00083 
00084     Cost<T, n, c, _np> &cost;     
00085 
00086     std::vector<double> &ts; 
00087 
00088     std::vector<T> &xs;      
00089 
00090     std::vector<Vectorcd> &us;      
00091 
00092     std::vector<Vectorcd> dus1;      
00093     
00094     std::vector<Vectorcd> ustemp;      
00095 
00096     std::vector<Vectorcd> dus2;      
00097 
00098     std::vector<T> xss;             
00099 
00100     std::vector<Vectorcd> uss1;      
00101 
00102     std::vector<Vectorcd> uss2;      
00103     
00104     std::vector<Vectorcd> ghat;      
00105 
00106     std::vector<Vectorcd> ghatavg;      
00107 
00108     std::vector<Vectorcd> hessian_currest;      
00109 
00110     std::vector<Vectorcd> hessian_currestavg;      
00111 
00112     std::vector<Vectorcd> hessian_est;      
00113 
00114     std::vector<Vectorcd> hessian_estreg;      
00115 
00116     std::vector<Vectorcd> deltaG;      
00117 
00118     std::default_random_engine randgenerator; 
00119 
00120     //default constructor gives p = 0.5 benoulli
00121     std::bernoulli_distribution bernoulli_dist;     
00122 
00123     int N;        
00124 
00125     int Nit;      
00126 
00127     float toleranceJ; 
00128 
00129     float toleranceHessianmag; 
00130 
00131     struct Stepcoeffs{
00132       double a; 
00133 
00134       double a1; 
00135 
00136       double A;
00137 
00138       double A1;
00139 
00140       double alpha;
00141 
00142       double alpha1;
00143 
00144       double c1; 
00145 
00146       double c11; 
00147 
00148       double c2; 
00149 
00150       double gamma; 
00151 
00152       double gamma1; 
00153 
00154       double Navg;   
00155     } stepc ;
00156 
00157     double J;    
00158 
00159     bool debug;  
00160 
00161     int prevcount; 
00162 
00163   };
00164 
00165   using namespace std;
00166   using namespace Eigen;
00167 
00168   template <typename T, int n, int c, int _np> 
00169           ASPSA<T, n, c, _np>::ASPSA(System<T, n, c, _np> &sys, 
00170         Cost<T, n, c, _np> &cost, 
00171         vector<double> &ts, 
00172         vector<T> &xs, 
00173         vector<Matrix<double, c, 1> > &us,
00174                                 Matrix<double, _np, 1> *p,                                
00175         bool update) : 
00176           sys(sys), cost(cost), ts(ts), xs(xs), us(us), dus1(us), dus2(us), N(us.size()), xss(xs), uss1(us), uss2(us), ustemp(us)
00177       ,Nit(200), debug(true), prevcount(0), toleranceJ(0.0001), toleranceHessianmag(0.1)//Choosing a, A can be done adaptively TODO
00178   {
00179     assert(N > 0);
00180     assert(ts.size() == N+1);
00181     assert(xs.size() == N+1);
00182     assert(us.size() == N);
00183     //dus.resize(us.size());//Resizing the control variations to be same size as us
00184 
00185     stepc.a = 0.01;
00186     stepc.a1 = 1;
00187     stepc.A1 = 0;
00188     stepc.alpha1 = 1;
00189     stepc.gamma1 = 1.0/6.0;
00190     stepc.A = 0.1*Nit;
00191     stepc.c1 = 0.001;
00192     stepc.c11 = 2*0.001;
00193     stepc.c2 = 2*0.001;
00194     stepc.alpha = 0.602;
00195     stepc.gamma = 0.101;//Initialize the step coeffs
00196     stepc.Navg = 4;
00197 
00198     hessian_est.resize(N);//Resizing Hessian to be diagonal matrix
00199     hessian_currest.resize(N);
00200     hessian_currestavg.resize(N);
00201     hessian_estreg.resize(N);
00202     deltaG.resize(N);
00203     ghat.resize(N);
00204     ghatavg.resize(N);
00205     for(int count1 = 0;count1 < N;count1++)
00206     {
00207       hessian_est[count1] = 5000*Vectorcd::Ones();//Initialize hessian_est
00208     }
00209 
00210     if (update) {
00211       J = Update(xs, us);//Initial cost of the trajectory
00212     } else {
00213       J = numeric_limits<double>::max();
00214     }
00215   }
00216 
00217   template <typename T, int n, int c, int _np> 
00218           ASPSA<T, n, c, _np>::~ASPSA()
00219           {
00220           }
00221 
00222 
00223   template <typename T, int n, int c, int _np> 
00224           double ASPSA<T, n, c, _np>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) {    
00225           double J = 0;
00226           sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset themselves. Added Gowtham 8/2/14
00227           for (int k = 0; k < N; ++k) {
00228             double h = ts[k+1] - ts[k];
00229             sys.Step(xs[k+1], us[k], h);
00230             if (evalCost) 
00231               J += cost.L(ts[k], xs[k], us[k], h);
00232             
00233           }
00234           if (evalCost)
00235             J += cost.L(ts[N], xs[N], us[N-1], 0);
00236           return J;
00237         }
00238         
00239   template <typename T, int n, int c, int _np> 
00240           void ASPSA<T, n, c, _np>::Iterate() {
00241           //Reset seed of the random generator
00242           randgenerator.seed(370212);
00243           int ussize = us[0].rows();//Number of rows in each us
00244           if(debug)
00245             cout<<"Number of rows: "<<ussize<<endl;
00246           float ak, ck, cktilde;//step sizes for 1SPSA and 2SPSA
00247           float J1, J2, J3, J4, Jtemp;
00248           
00249           //1SPSA for Nit steps
00250           for(int k = 0;k < 40;k ++)
00251             {
00252               ak = stepc.a/pow((prevcount+k+1+stepc.A),(stepc.alpha));
00253               ck = stepc.c1/pow((prevcount+k+1),(stepc.gamma));
00254               if(debug)
00255                 cout<<"Step ak, ck: "<<ak<<"\t"<<ck<<"\t"<<stepc.c1<<"\t"<<pow((prevcount+k+1),(stepc.gamma))<<endl;
00256               for(int count1 = 0;count1 < N;count1++)
00257                 {
00258                   for(int count2 = 0; count2< ussize; count2++)
00259                     {
00260                       if(bernoulli_dist(randgenerator))
00261                         dus1[count1][count2] = 1;
00262                       else
00263                         dus1[count1][count2] = -1;
00264                     }//Perturbation Vector - dus
00265                   uss1[count1] = us[count1] + ck*dus1[count1];
00266                   uss2[count1] = us[count1] - ck*dus1[count1];
00267                   /*
00268                     if(debug)
00269                     {
00270                     cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl;
00271                     cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl;
00272                     }
00273                   */
00274                 }//Perturbed once thetahatk + ck deltak
00275               J1 = Update(xss, uss1);//Find the cost of the perturbed trajectory 1
00276               J2 = Update(xss, uss2);//Find the cost of the perturbed trajectory 2
00277               
00278               
00279               
00280               //Update the control vector based on the simultaneous gradient:
00281               for(int count1 =0; count1 < N;count1++)
00282                 {
00283                   ghat[count1] = ((J1 - J2)/(2*ck))*(dus1[count1].cwiseInverse());
00284                   us[count1] = us[count1] - ak*ghat[count1];
00285                 }
00286               if(debug)
00287                 {
00288                   J = Update(xs,us);
00289                   cout<<"J #"<<k<<"\t: "<<J<<endl;
00290                 }
00291             }
00292           J = Update(xs,us);
00293           cout<<"J "<<"\t: "<<J<<endl;
00294           getchar();
00295           
00296           prevcount += 40;//Adding the number of iterations done till now
00297           
00298           //2SPSA Begin:
00299           for(int k = 0;k < Nit;k++)
00300             {
00301               ak = stepc.a1/pow((prevcount+k+1+stepc.A1),(stepc.alpha1));
00302               ck = stepc.c11/pow((prevcount+k+1),(stepc.gamma1));
00303               cktilde = stepc.c2/pow((prevcount+k+1),(stepc.gamma1));
00304               if(debug)
00305                 cout<<"Step ak, ck, cktilde: "<<ak<<"\t"<<ck<<"\t"<<cktilde<<endl;
00306               //Averagind Gradient and Hessian information
00307               
00308               for(int count1 = 0;count1 < N;count1++)
00309                 {
00310                   hessian_currestavg[count1].setZero();//Initialize hessian avg to 0
00311                   ghatavg[count1].setZero();//initialize ghatavg
00312                 }
00313               
00314               for(int mavg = 1;mavg <= stepc.Navg;mavg++)
00315                 {
00316                   for(int count1 = 0;count1 < N;count1++)
00317                     {
00318                       for(int count2 = 0; count2< ussize; count2++)
00319                         {
00320                           if(bernoulli_dist(randgenerator))
00321                             dus1[count1][count2] = 1;
00322                           else
00323                             dus1[count1][count2] = -1;
00324                         }//Perturbation Vector - delta
00325                       uss1[count1] = us[count1] + ck*dus1[count1];
00326                       uss2[count1] = us[count1] - ck*dus1[count1];
00327                       /*
00328                         if(debug)
00329                         {
00330                         cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl;
00331                         cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl;
00332                         }
00333                       */
00334                     }//Perturbed once thetahatk +/- ck deltak
00335                   J1 = Update(xss, uss1);//Find the cost of the perturbed trajectory 1
00336                   J2 = Update(xss, uss2);//Find the cost of the perturbed trajectory 2
00337                   
00338                   
00339                   //Second perturbation to find the gradient at thethatk + ck deltak
00340                   for(int count1 = 0;count1 < N;count1++)
00341                     {
00342                       for(int count2 = 0; count2< ussize; count2++)
00343                         {
00344                           if(bernoulli_dist(randgenerator))
00345                             dus2[count1][count2] = 1;
00346                           else
00347                             dus2[count1][count2] = -1;
00348                         }//Perturbation Vector - deltatilde
00349                       uss1[count1] += cktilde*dus2[count1];
00350                       uss2[count1] += cktilde*dus2[count1];
00351                       /*if(debug)
00352                         {
00353                         cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl;
00354                         cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl;
00355               }
00356                       */
00357                     }
00358                   J3 = Update(xss, uss1);//Find the cost of the additional perturbations
00359                   J4 = Update(xss, uss2);
00360                   //Estimate the gradients:
00361                   //Gk1(thetahatk + ck deltak) =  (y(thethatk + ckdeltak + cktilde deltak) - y(thetahatk + ck deltak))/cktilde * deltak inverse
00362                   //float minhessianval = 1e4, minhesscount1 = 0;
00363                   //Finding average Hessian and Average Gradient
00364                   for(int count1 =0; count1 < N;count1++)
00365                     {
00366                       deltaG[count1] = ((J3 - J1 - (J4 - J2))/cktilde)*(dus2[count1].cwiseInverse());
00367                       hessian_currest[count1] = (1/(2*ck))*(deltaG[count1].cwiseProduct(dus1[count1].cwiseInverse()));//Considering only diagonal elements
00368                       /*if(debug
00369                         {
00370                         cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl;
00371                         }
00372                       */
00373                       //hessian_currest[count1] = 0.5*(hessian_currest[count1] + hessian_currest[count1].transpose());
00374                       
00375                       //Computing the averages:
00376                       ghat[count1] = ((J1 - J2)/(2*ck))*(dus1[count1].cwiseInverse());
00377                       ghatavg[count1] = (float(mavg-1)/float(mavg))*ghatavg[count1] + (1.0/float(mavg))*ghat[count1];//Estimating avg of the gradient
00378                       hessian_currestavg[count1] = (float(mavg-1)/float(mavg))*hessian_currestavg[count1] + (1.0/float(mavg))*hessian_currest[count1];//Estimating hessian avg 
00379                       /*  if(debug)
00380                                 {
00381                                 //cout<<"us[#"<<count1<<"]: "<<us[count1].transpose()<<endl;
00382                                 //cout<<"hesscount #"<<count1<<":\n "<<hessian_estreg[count1]<<endl;
00383                                 cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl;
00384                                 }*/
00385                     }
00386                   if(debug)
00387                     {
00388                       cout<<"J1,J2,J3,J4:"<<J1<<"\t"<<J2<<"\t"<<J3<<"\t"<<J4<<"\t"<<endl;
00389                       cout<<"Magof Hessian"<<(J3 - J1 - (J4 - J2))/(2*cktilde*ck)<<"\t First Gradient: "<<(J3 - J1)/(cktilde)<<"\t Second Gradient: "<<(J4 - J2)/(cktilde)<<endl;
00390                       cout<<"Magof Gradient"<<((J1 - J2)/(2*ck))<<endl;
00391                       getchar();
00392                     }
00393                   //          if(debug)
00394                   //getchar();//DEBUG STUFF
00395                 }//Averaging Done
00396               
00397               float magofHessian = abs((J3 - J1 - (J4 - J2))/(2*cktilde*ck));//Just checking one of it instead of the actual average TODO
00398               /*if(magofHessian >toleranceHessianmag)
00399           {
00400           cerr<<"Magnitude of Hessian Big: "<<magofHessian<<endl;
00401           for(int count1 =0; count1 < N;count1++)
00402           {
00403           ustemp[count1] = us[count1] - ak*ghatavg[count1];//Can add constraints here TODO
00404           }
00405           }
00406           else
00407           {
00408               */
00409               //Update the control vector based on the simultaneous gradient:
00410               for(int count1 =0; count1 < N;count1++)
00411                 {
00412                   hessian_est[count1] = (float(k+1)/float(k+2))*hessian_est[count1] + (1/float(k+2))*hessian_currestavg[count1];//Find the Current Hessian estimate using iterative process This is slightly modified to use the initial estimate 
00413                   //hessian_estreg[count1] = hessian_est[count1].cwiseAbs() + (0.001/(k+1)) * Vectorcd::Ones();//Regulated
00414                   hessian_estreg[count1] = hessian_est[count1].cwiseMax(toleranceHessianmag*Vectorcd::Ones());//Regulated
00415                   //hessian_estreg[count1] = Vectorcd::Ones();
00416                   ustemp[count1] = us[count1] - ak*(hessian_estreg[count1].cwiseInverse()).cwiseProduct(ghatavg[count1]);//Can add constraints here TODO
00417                   if(debug)
00418                     {
00419                       //cout<<"us[#"<<count1<<"]: "<<us[count1].transpose()<<endl;
00420                       //cout<<"Hess eig values: "<<hessian_estreg[count1].eigenvalues().transpose()<<endl;
00421                       cout<<"hesscountreg #"<<count1<<": "<<hessian_estreg[count1].transpose()<<endl;
00422                       cout<<"hesscountavgest #"<<count1<<": "<<hessian_currestavg[count1].transpose()<<endl;
00423                       //.part<Eigen::LowerTriangular>() 
00424                       //cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl;
00425                     }
00426                 }
00427               //}
00428               Jtemp = Update(xs,ustemp);//Evaluate the new cost
00429               if(Jtemp < (J + toleranceJ))
00430                 {
00431                   J = Jtemp;
00432                   us = ustemp;  
00433                 }//Else neglect
00434               else
00435                 {
00436                   cerr<<"Neglecting Jtemp, J: "<<Jtemp<<"\t"<<J<<endl;
00437                 }
00438               if(debug)
00439                 {
00440                   cout<<"J Jtemp J1,J2,J3,J4 #"<<k<<"\t: "<<J<<"\t"<<Jtemp<<"\t"<<J1<<"\t"<<J2<<"\t"<<J3<<"\t"<<J4<<endl;
00441                   getchar();
00442                 }
00443             }
00444           prevcount += Nit;
00445           //Terminal xs and J:
00446           J = Update(xs,us);
00447         }
00448 }
00449 
00450 #endif