GCOP  1.0
spsa.h
Go to the documentation of this file.
00001 #ifndef GCOP_SPSA_H
00002 #define GCOP_SPSA_H
00003 
00004 #include <Eigen/Dense>
00005 #include <vector>
00006 #include <type_traits>
00007 #include <algorithm>
00008 #include <iterator>
00009 #include "system.h"
00010 #include "cost.h"
00011 #include <cmath>
00012 #include "rn.h"
00013 #include <limits>
00014 #include <random>
00015 
00016 namespace gcop {
00017   
00018   using namespace std;
00019   using namespace Eigen;
00020  
00021   template <typename T, int n = Dynamic, int c = Dynamic, int _np = Dynamic> class SPSA {
00022     
00023     typedef Matrix<double, n, 1> Vectornd;
00024     typedef Matrix<double, c, 1> Vectorcd;
00025     typedef Matrix<double, _np, 1> Vectormd;
00026     typedef Matrix<double, n, n> Matrixnd;
00027     typedef Matrix<double, n, c> Matrixncd;
00028     typedef Matrix<double, c, n> Matrixcnd;
00029     typedef Matrix<double, c, c> Matrixcd;  
00030     
00031     typedef Matrix<double, _np, _np> Matrixmd;
00032     typedef Matrix<double, n, _np> Matrixnmd;
00033     typedef Matrix<double, _np, n> Matrixmnd;
00034 
00035 
00036   public:
00058     SPSA(System<T, n, c, _np> &sys, Cost<T, n, c, _np> &cost, 
00059          vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 
00060          Vectormd *p = 0,          
00061          bool update = true);
00062     
00063     virtual ~SPSA();
00064     
00065 
00069     void Iterate();
00070 
00071 
00078     double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true);
00079         
00080 
00081     System<T, n, c, _np> &sys;    
00082 
00083     Cost<T, n, c, _np> &cost;     
00084 
00085     std::vector<double> &ts; 
00086 
00087     std::vector<T> &xs;      
00088 
00089     std::vector<Vectorcd> &us;      
00090 
00091     std::vector<Vectorcd> dus;      
00092 
00093     std::vector<T> xss;             
00094     
00095     std::vector<Vectorcd> uss;      
00096 
00097     std::default_random_engine randgenerator; 
00098 
00099     //default constructor gives p = 0.5 benoulli
00100     std::bernoulli_distribution bernoulli_dist;     
00101 
00102     int N;        
00103 
00104     int Nit;      
00105 
00106     struct Stepcoeffs{
00107     double a; 
00108     
00109     double A;
00110 
00111     double alpha;
00112     
00113     double c1; 
00114 
00115     double gamma; 
00116     } stepc ;
00117 
00118     double J;    
00119     
00120     bool debug;  
00121 
00122     int prevcount; 
00123 
00124   };
00125 
00126   using namespace std;
00127   using namespace Eigen;
00128   
00129   template <typename T, int n, int c, int _np> 
00130     SPSA<T, n, c, _np>::SPSA(System<T, n, c, _np> &sys, 
00131                              Cost<T, n, c, _np> &cost, 
00132                              vector<double> &ts, 
00133                              vector<T> &xs, 
00134                              vector<Matrix<double, c, 1> > &us,
00135                              Matrix<double, _np, 1> *p,
00136                              bool update) : 
00137     sys(sys), cost(cost), ts(ts), xs(xs), us(us), N(us.size()), xss(xs), uss(us)
00138     ,Nit(200), debug(true), prevcount(0)//Choosing a, A can be done adaptively TODO
00139     {
00140       assert(N > 0);
00141       assert(ts.size() == N+1);
00142       assert(xs.size() == N+1);
00143       assert(us.size() == N);
00144       dus.resize(N);//Resizing the control variations to be same size as us
00145 
00146       stepc.a = 0.01;
00147       stepc.A = 0.1*Nit;
00148       stepc.c1 = 0.001;
00149       stepc.alpha = 0.602;
00150       stepc.gamma = 0.101;//Initialize the step coeffs
00151 
00152 
00153       if (update) {
00154         J = Update(xs, us);//Initial cost of the trajectory
00155       } else {
00156         J = numeric_limits<double>::max();
00157       }
00158     }
00159   
00160   template <typename T, int n, int c, int _np> 
00161     SPSA<T, n, c, _np>::~SPSA()
00162     {
00163     }
00164 
00165     
00166   template <typename T, int n, int c, int _np> 
00167     double SPSA<T, n, c, _np>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) {    
00168     double J = 0;
00169     sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset themselves. Added Gowtham 8/2/14
00170     for (int k = 0; k < N; ++k) {
00171       double h = ts[k+1] - ts[k];
00172       sys.Step(xs[k+1], us[k], h);
00173       if (evalCost) 
00174         J += cost.L(ts[k], xs[k], us[k], h);
00175       
00176     }
00177     if (evalCost)
00178       J += cost.L(ts[N], xs[N], us[N-1], 0);
00179     return J;
00180   }
00181 
00182   template <typename T, int n, int c, int _np> 
00183     void SPSA<T, n, c, _np>::Iterate() {
00184       //Reset seed of the random generator
00185       randgenerator.seed(370212);
00186       int ussize = us[0].rows();//Number of rows in each us
00187       if(debug)
00188         cout<<"Number of rows: "<<ussize<<endl;
00189       float ak, ck;//step sizes
00190       double J1, J2;
00191 
00192       for(int k = 0;k < Nit;k++)
00193       {
00194         ak = stepc.a/pow((prevcount+k+1+stepc.A),(stepc.alpha));
00195         ck = stepc.c1/pow((prevcount+k+1),(stepc.gamma));
00196         if(debug)
00197           cout<<"Step ak, ck: "<<ak<<"\t"<<ck<<"\t"<<stepc.c1<<"\t"<<pow((prevcount+k+1),(stepc.gamma))<<endl;
00198         for(int count1 = 0; count1 < N; count1++)
00199         {
00200           for(int count2 = 0; count2< ussize; count2++)
00201           {
00202             if(bernoulli_dist(randgenerator))
00203               dus[count1][count2] = 1;
00204             else
00205               dus[count1][count2] = -1;
00206           }
00207         }//Perturbation Vector - dus
00208         for(int count1 = 0;count1 < N;count1++)
00209         {
00210           uss[count1] = us[count1] + ck*dus[count1];
00211           if(debug)
00212             cout<<"Uss1 #"<<count1<<": "<<uss[count1].transpose()<<endl;
00213         }
00214         if(debug)
00215           cout<<"Xss[0]: "<<xss[0].transpose()<<endl;
00216         J1 = Update(xss, uss);//Find the cost of the perturbed trajectory 1
00217 
00218         for(int count1 = 0;count1 < N;count1++)
00219         {
00220           uss[count1] = us[count1] - ck*dus[count1];
00221           if(debug)
00222             cout<<"Uss2 #"<<count1<<": "<<uss[count1].transpose()<<endl;
00223         }
00224         if(debug)
00225           cout<<"Xss[0]: "<<xss[0].transpose()<<endl;
00226         J2 = Update(xss, uss);//Find the cost of the perturbed trajectory 2
00227         //Update the control vector based on the simultaneous gradient:
00228         for(int count1 =0; count1 < N;count1++)
00229         {
00230           us[count1] = us[count1] - ((ak/ck)*(J1 - J2)/2)*dus[count1].cwiseInverse();//This is ok only for the special case of bernoulli distribution with dus being +1 or -1 since element wise inverse gives the same values back again
00231         }
00232       }
00233       prevcount += Nit;
00234       //Terminal xs and J:
00235       J = Update(xs,us);
00236     }
00237 }
00238 
00239 #endif