GCOP
1.0
|
00001 #ifndef GCOP_SPSA_H 00002 #define GCOP_SPSA_H 00003 00004 #include <Eigen/Dense> 00005 #include <vector> 00006 #include <type_traits> 00007 #include <algorithm> 00008 #include <iterator> 00009 #include "system.h" 00010 #include "cost.h" 00011 #include <cmath> 00012 #include "rn.h" 00013 #include <limits> 00014 #include <random> 00015 00016 namespace gcop { 00017 00018 using namespace std; 00019 using namespace Eigen; 00020 00021 template <typename T, int n = Dynamic, int c = Dynamic, int _np = Dynamic> class SPSA { 00022 00023 typedef Matrix<double, n, 1> Vectornd; 00024 typedef Matrix<double, c, 1> Vectorcd; 00025 typedef Matrix<double, _np, 1> Vectormd; 00026 typedef Matrix<double, n, n> Matrixnd; 00027 typedef Matrix<double, n, c> Matrixncd; 00028 typedef Matrix<double, c, n> Matrixcnd; 00029 typedef Matrix<double, c, c> Matrixcd; 00030 00031 typedef Matrix<double, _np, _np> Matrixmd; 00032 typedef Matrix<double, n, _np> Matrixnmd; 00033 typedef Matrix<double, _np, n> Matrixmnd; 00034 00035 00036 public: 00058 SPSA(System<T, n, c, _np> &sys, Cost<T, n, c, _np> &cost, 00059 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 00060 Vectormd *p = 0, 00061 bool update = true); 00062 00063 virtual ~SPSA(); 00064 00065 00069 void Iterate(); 00070 00071 00078 double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true); 00079 00080 00081 System<T, n, c, _np> &sys; 00082 00083 Cost<T, n, c, _np> &cost; 00084 00085 std::vector<double> &ts; 00086 00087 std::vector<T> &xs; 00088 00089 std::vector<Vectorcd> &us; 00090 00091 std::vector<Vectorcd> dus; 00092 00093 std::vector<T> xss; 00094 00095 std::vector<Vectorcd> uss; 00096 00097 std::default_random_engine randgenerator; 00098 00099 //default constructor gives p = 0.5 benoulli 00100 std::bernoulli_distribution bernoulli_dist; 00101 00102 int N; 00103 00104 int Nit; 00105 00106 struct Stepcoeffs{ 00107 double a; 00108 00109 double A; 00110 00111 double alpha; 00112 00113 double c1; 00114 00115 double gamma; 00116 } stepc ; 00117 00118 double J; 00119 00120 bool debug; 00121 00122 int prevcount; 00123 00124 }; 00125 00126 using namespace std; 00127 using namespace Eigen; 00128 00129 template <typename T, int n, int c, int _np> 00130 SPSA<T, n, c, _np>::SPSA(System<T, n, c, _np> &sys, 00131 Cost<T, n, c, _np> &cost, 00132 vector<double> &ts, 00133 vector<T> &xs, 00134 vector<Matrix<double, c, 1> > &us, 00135 Matrix<double, _np, 1> *p, 00136 bool update) : 00137 sys(sys), cost(cost), ts(ts), xs(xs), us(us), N(us.size()), xss(xs), uss(us) 00138 ,Nit(200), debug(true), prevcount(0)//Choosing a, A can be done adaptively TODO 00139 { 00140 assert(N > 0); 00141 assert(ts.size() == N+1); 00142 assert(xs.size() == N+1); 00143 assert(us.size() == N); 00144 dus.resize(N);//Resizing the control variations to be same size as us 00145 00146 stepc.a = 0.01; 00147 stepc.A = 0.1*Nit; 00148 stepc.c1 = 0.001; 00149 stepc.alpha = 0.602; 00150 stepc.gamma = 0.101;//Initialize the step coeffs 00151 00152 00153 if (update) { 00154 J = Update(xs, us);//Initial cost of the trajectory 00155 } else { 00156 J = numeric_limits<double>::max(); 00157 } 00158 } 00159 00160 template <typename T, int n, int c, int _np> 00161 SPSA<T, n, c, _np>::~SPSA() 00162 { 00163 } 00164 00165 00166 template <typename T, int n, int c, int _np> 00167 double SPSA<T, n, c, _np>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) { 00168 double J = 0; 00169 sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset themselves. Added Gowtham 8/2/14 00170 for (int k = 0; k < N; ++k) { 00171 double h = ts[k+1] - ts[k]; 00172 sys.Step(xs[k+1], us[k], h); 00173 if (evalCost) 00174 J += cost.L(ts[k], xs[k], us[k], h); 00175 00176 } 00177 if (evalCost) 00178 J += cost.L(ts[N], xs[N], us[N-1], 0); 00179 return J; 00180 } 00181 00182 template <typename T, int n, int c, int _np> 00183 void SPSA<T, n, c, _np>::Iterate() { 00184 //Reset seed of the random generator 00185 randgenerator.seed(370212); 00186 int ussize = us[0].rows();//Number of rows in each us 00187 if(debug) 00188 cout<<"Number of rows: "<<ussize<<endl; 00189 float ak, ck;//step sizes 00190 double J1, J2; 00191 00192 for(int k = 0;k < Nit;k++) 00193 { 00194 ak = stepc.a/pow((prevcount+k+1+stepc.A),(stepc.alpha)); 00195 ck = stepc.c1/pow((prevcount+k+1),(stepc.gamma)); 00196 if(debug) 00197 cout<<"Step ak, ck: "<<ak<<"\t"<<ck<<"\t"<<stepc.c1<<"\t"<<pow((prevcount+k+1),(stepc.gamma))<<endl; 00198 for(int count1 = 0; count1 < N; count1++) 00199 { 00200 for(int count2 = 0; count2< ussize; count2++) 00201 { 00202 if(bernoulli_dist(randgenerator)) 00203 dus[count1][count2] = 1; 00204 else 00205 dus[count1][count2] = -1; 00206 } 00207 }//Perturbation Vector - dus 00208 for(int count1 = 0;count1 < N;count1++) 00209 { 00210 uss[count1] = us[count1] + ck*dus[count1]; 00211 if(debug) 00212 cout<<"Uss1 #"<<count1<<": "<<uss[count1].transpose()<<endl; 00213 } 00214 if(debug) 00215 cout<<"Xss[0]: "<<xss[0].transpose()<<endl; 00216 J1 = Update(xss, uss);//Find the cost of the perturbed trajectory 1 00217 00218 for(int count1 = 0;count1 < N;count1++) 00219 { 00220 uss[count1] = us[count1] - ck*dus[count1]; 00221 if(debug) 00222 cout<<"Uss2 #"<<count1<<": "<<uss[count1].transpose()<<endl; 00223 } 00224 if(debug) 00225 cout<<"Xss[0]: "<<xss[0].transpose()<<endl; 00226 J2 = Update(xss, uss);//Find the cost of the perturbed trajectory 2 00227 //Update the control vector based on the simultaneous gradient: 00228 for(int count1 =0; count1 < N;count1++) 00229 { 00230 us[count1] = us[count1] - ((ak/ck)*(J1 - J2)/2)*dus[count1].cwiseInverse();//This is ok only for the special case of bernoulli distribution with dus being +1 or -1 since element wise inverse gives the same values back again 00231 } 00232 } 00233 prevcount += Nit; 00234 //Terminal xs and J: 00235 J = Update(xs,us); 00236 } 00237 } 00238 00239 #endif