GCOP
1.0
|
00001 #ifndef GCOP_ASPSA_H 00002 #define GCOP_ASPSA_H 00003 00004 #include <Eigen/Dense> 00005 #include <Eigen/Cholesky> 00006 #include <vector> 00007 #include <type_traits> 00008 #include <algorithm> 00009 #include <iterator> 00010 #include "system.h" 00011 #include "cost.h" 00012 #include <cmath> 00013 #include "rn.h" 00014 #include <limits> 00015 #include <random> 00016 00017 namespace gcop { 00018 00019 using namespace std; 00020 using namespace Eigen; 00021 00022 template <typename T, int n = Dynamic, int c = Dynamic, int _np = Dynamic> class ASPSA { 00023 00024 typedef Matrix<double, n, 1> Vectornd; 00025 typedef Matrix<double, c, 1> Vectorcd; 00026 typedef Matrix<double, _np, 1> Vectormd; 00027 typedef Matrix<double, n, n> Matrixnd; 00028 typedef Matrix<double, n, c> Matrixncd; 00029 typedef Matrix<double, c, n> Matrixcnd; 00030 typedef Matrix<double, c, c> Matrixcd; 00031 00032 typedef Matrix<double, _np, _np> Matrixmd; 00033 typedef Matrix<double, n, _np> Matrixnmd; 00034 typedef Matrix<double, _np, n> Matrixmnd; 00035 00036 00037 public: 00059 ASPSA(System<T, n, c, _np> &sys, Cost<T, n, c, _np> &cost, 00060 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, 00061 Vectormd *p = 0, 00062 bool update = true); 00063 00064 virtual ~ASPSA(); 00065 00066 00070 void Iterate(); 00071 00072 00079 double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true); 00080 00081 00082 System<T, n, c, _np> &sys; 00083 00084 Cost<T, n, c, _np> &cost; 00085 00086 std::vector<double> &ts; 00087 00088 std::vector<T> &xs; 00089 00090 std::vector<Vectorcd> &us; 00091 00092 std::vector<Vectorcd> dus1; 00093 00094 std::vector<Vectorcd> ustemp; 00095 00096 std::vector<Vectorcd> dus2; 00097 00098 std::vector<T> xss; 00099 00100 std::vector<Vectorcd> uss1; 00101 00102 std::vector<Vectorcd> uss2; 00103 00104 std::vector<Vectorcd> ghat; 00105 00106 std::vector<Vectorcd> ghatavg; 00107 00108 std::vector<Vectorcd> hessian_currest; 00109 00110 std::vector<Vectorcd> hessian_currestavg; 00111 00112 std::vector<Vectorcd> hessian_est; 00113 00114 std::vector<Vectorcd> hessian_estreg; 00115 00116 std::vector<Vectorcd> deltaG; 00117 00118 std::default_random_engine randgenerator; 00119 00120 //default constructor gives p = 0.5 benoulli 00121 std::bernoulli_distribution bernoulli_dist; 00122 00123 int N; 00124 00125 int Nit; 00126 00127 float toleranceJ; 00128 00129 float toleranceHessianmag; 00130 00131 struct Stepcoeffs{ 00132 double a; 00133 00134 double a1; 00135 00136 double A; 00137 00138 double A1; 00139 00140 double alpha; 00141 00142 double alpha1; 00143 00144 double c1; 00145 00146 double c11; 00147 00148 double c2; 00149 00150 double gamma; 00151 00152 double gamma1; 00153 00154 double Navg; 00155 } stepc ; 00156 00157 double J; 00158 00159 bool debug; 00160 00161 int prevcount; 00162 00163 }; 00164 00165 using namespace std; 00166 using namespace Eigen; 00167 00168 template <typename T, int n, int c, int _np> 00169 ASPSA<T, n, c, _np>::ASPSA(System<T, n, c, _np> &sys, 00170 Cost<T, n, c, _np> &cost, 00171 vector<double> &ts, 00172 vector<T> &xs, 00173 vector<Matrix<double, c, 1> > &us, 00174 Matrix<double, _np, 1> *p, 00175 bool update) : 00176 sys(sys), cost(cost), ts(ts), xs(xs), us(us), dus1(us), dus2(us), N(us.size()), xss(xs), uss1(us), uss2(us), ustemp(us) 00177 ,Nit(200), debug(true), prevcount(0), toleranceJ(0.0001), toleranceHessianmag(0.1)//Choosing a, A can be done adaptively TODO 00178 { 00179 assert(N > 0); 00180 assert(ts.size() == N+1); 00181 assert(xs.size() == N+1); 00182 assert(us.size() == N); 00183 //dus.resize(us.size());//Resizing the control variations to be same size as us 00184 00185 stepc.a = 0.01; 00186 stepc.a1 = 1; 00187 stepc.A1 = 0; 00188 stepc.alpha1 = 1; 00189 stepc.gamma1 = 1.0/6.0; 00190 stepc.A = 0.1*Nit; 00191 stepc.c1 = 0.001; 00192 stepc.c11 = 2*0.001; 00193 stepc.c2 = 2*0.001; 00194 stepc.alpha = 0.602; 00195 stepc.gamma = 0.101;//Initialize the step coeffs 00196 stepc.Navg = 4; 00197 00198 hessian_est.resize(N);//Resizing Hessian to be diagonal matrix 00199 hessian_currest.resize(N); 00200 hessian_currestavg.resize(N); 00201 hessian_estreg.resize(N); 00202 deltaG.resize(N); 00203 ghat.resize(N); 00204 ghatavg.resize(N); 00205 for(int count1 = 0;count1 < N;count1++) 00206 { 00207 hessian_est[count1] = 5000*Vectorcd::Ones();//Initialize hessian_est 00208 } 00209 00210 if (update) { 00211 J = Update(xs, us);//Initial cost of the trajectory 00212 } else { 00213 J = numeric_limits<double>::max(); 00214 } 00215 } 00216 00217 template <typename T, int n, int c, int _np> 00218 ASPSA<T, n, c, _np>::~ASPSA() 00219 { 00220 } 00221 00222 00223 template <typename T, int n, int c, int _np> 00224 double ASPSA<T, n, c, _np>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) { 00225 double J = 0; 00226 sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset themselves. Added Gowtham 8/2/14 00227 for (int k = 0; k < N; ++k) { 00228 double h = ts[k+1] - ts[k]; 00229 sys.Step(xs[k+1], us[k], h); 00230 if (evalCost) 00231 J += cost.L(ts[k], xs[k], us[k], h); 00232 00233 } 00234 if (evalCost) 00235 J += cost.L(ts[N], xs[N], us[N-1], 0); 00236 return J; 00237 } 00238 00239 template <typename T, int n, int c, int _np> 00240 void ASPSA<T, n, c, _np>::Iterate() { 00241 //Reset seed of the random generator 00242 randgenerator.seed(370212); 00243 int ussize = us[0].rows();//Number of rows in each us 00244 if(debug) 00245 cout<<"Number of rows: "<<ussize<<endl; 00246 float ak, ck, cktilde;//step sizes for 1SPSA and 2SPSA 00247 float J1, J2, J3, J4, Jtemp; 00248 00249 //1SPSA for Nit steps 00250 for(int k = 0;k < 40;k ++) 00251 { 00252 ak = stepc.a/pow((prevcount+k+1+stepc.A),(stepc.alpha)); 00253 ck = stepc.c1/pow((prevcount+k+1),(stepc.gamma)); 00254 if(debug) 00255 cout<<"Step ak, ck: "<<ak<<"\t"<<ck<<"\t"<<stepc.c1<<"\t"<<pow((prevcount+k+1),(stepc.gamma))<<endl; 00256 for(int count1 = 0;count1 < N;count1++) 00257 { 00258 for(int count2 = 0; count2< ussize; count2++) 00259 { 00260 if(bernoulli_dist(randgenerator)) 00261 dus1[count1][count2] = 1; 00262 else 00263 dus1[count1][count2] = -1; 00264 }//Perturbation Vector - dus 00265 uss1[count1] = us[count1] + ck*dus1[count1]; 00266 uss2[count1] = us[count1] - ck*dus1[count1]; 00267 /* 00268 if(debug) 00269 { 00270 cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl; 00271 cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl; 00272 } 00273 */ 00274 }//Perturbed once thetahatk + ck deltak 00275 J1 = Update(xss, uss1);//Find the cost of the perturbed trajectory 1 00276 J2 = Update(xss, uss2);//Find the cost of the perturbed trajectory 2 00277 00278 00279 00280 //Update the control vector based on the simultaneous gradient: 00281 for(int count1 =0; count1 < N;count1++) 00282 { 00283 ghat[count1] = ((J1 - J2)/(2*ck))*(dus1[count1].cwiseInverse()); 00284 us[count1] = us[count1] - ak*ghat[count1]; 00285 } 00286 if(debug) 00287 { 00288 J = Update(xs,us); 00289 cout<<"J #"<<k<<"\t: "<<J<<endl; 00290 } 00291 } 00292 J = Update(xs,us); 00293 cout<<"J "<<"\t: "<<J<<endl; 00294 getchar(); 00295 00296 prevcount += 40;//Adding the number of iterations done till now 00297 00298 //2SPSA Begin: 00299 for(int k = 0;k < Nit;k++) 00300 { 00301 ak = stepc.a1/pow((prevcount+k+1+stepc.A1),(stepc.alpha1)); 00302 ck = stepc.c11/pow((prevcount+k+1),(stepc.gamma1)); 00303 cktilde = stepc.c2/pow((prevcount+k+1),(stepc.gamma1)); 00304 if(debug) 00305 cout<<"Step ak, ck, cktilde: "<<ak<<"\t"<<ck<<"\t"<<cktilde<<endl; 00306 //Averagind Gradient and Hessian information 00307 00308 for(int count1 = 0;count1 < N;count1++) 00309 { 00310 hessian_currestavg[count1].setZero();//Initialize hessian avg to 0 00311 ghatavg[count1].setZero();//initialize ghatavg 00312 } 00313 00314 for(int mavg = 1;mavg <= stepc.Navg;mavg++) 00315 { 00316 for(int count1 = 0;count1 < N;count1++) 00317 { 00318 for(int count2 = 0; count2< ussize; count2++) 00319 { 00320 if(bernoulli_dist(randgenerator)) 00321 dus1[count1][count2] = 1; 00322 else 00323 dus1[count1][count2] = -1; 00324 }//Perturbation Vector - delta 00325 uss1[count1] = us[count1] + ck*dus1[count1]; 00326 uss2[count1] = us[count1] - ck*dus1[count1]; 00327 /* 00328 if(debug) 00329 { 00330 cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl; 00331 cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl; 00332 } 00333 */ 00334 }//Perturbed once thetahatk +/- ck deltak 00335 J1 = Update(xss, uss1);//Find the cost of the perturbed trajectory 1 00336 J2 = Update(xss, uss2);//Find the cost of the perturbed trajectory 2 00337 00338 00339 //Second perturbation to find the gradient at thethatk + ck deltak 00340 for(int count1 = 0;count1 < N;count1++) 00341 { 00342 for(int count2 = 0; count2< ussize; count2++) 00343 { 00344 if(bernoulli_dist(randgenerator)) 00345 dus2[count1][count2] = 1; 00346 else 00347 dus2[count1][count2] = -1; 00348 }//Perturbation Vector - deltatilde 00349 uss1[count1] += cktilde*dus2[count1]; 00350 uss2[count1] += cktilde*dus2[count1]; 00351 /*if(debug) 00352 { 00353 cout<<"Uss1 #"<<count1<<": "<<uss1[count1].transpose()<<endl; 00354 cout<<"Uss2 #"<<count1<<": "<<uss2[count1].transpose()<<endl; 00355 } 00356 */ 00357 } 00358 J3 = Update(xss, uss1);//Find the cost of the additional perturbations 00359 J4 = Update(xss, uss2); 00360 //Estimate the gradients: 00361 //Gk1(thetahatk + ck deltak) = (y(thethatk + ckdeltak + cktilde deltak) - y(thetahatk + ck deltak))/cktilde * deltak inverse 00362 //float minhessianval = 1e4, minhesscount1 = 0; 00363 //Finding average Hessian and Average Gradient 00364 for(int count1 =0; count1 < N;count1++) 00365 { 00366 deltaG[count1] = ((J3 - J1 - (J4 - J2))/cktilde)*(dus2[count1].cwiseInverse()); 00367 hessian_currest[count1] = (1/(2*ck))*(deltaG[count1].cwiseProduct(dus1[count1].cwiseInverse()));//Considering only diagonal elements 00368 /*if(debug 00369 { 00370 cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl; 00371 } 00372 */ 00373 //hessian_currest[count1] = 0.5*(hessian_currest[count1] + hessian_currest[count1].transpose()); 00374 00375 //Computing the averages: 00376 ghat[count1] = ((J1 - J2)/(2*ck))*(dus1[count1].cwiseInverse()); 00377 ghatavg[count1] = (float(mavg-1)/float(mavg))*ghatavg[count1] + (1.0/float(mavg))*ghat[count1];//Estimating avg of the gradient 00378 hessian_currestavg[count1] = (float(mavg-1)/float(mavg))*hessian_currestavg[count1] + (1.0/float(mavg))*hessian_currest[count1];//Estimating hessian avg 00379 /* if(debug) 00380 { 00381 //cout<<"us[#"<<count1<<"]: "<<us[count1].transpose()<<endl; 00382 //cout<<"hesscount #"<<count1<<":\n "<<hessian_estreg[count1]<<endl; 00383 cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl; 00384 }*/ 00385 } 00386 if(debug) 00387 { 00388 cout<<"J1,J2,J3,J4:"<<J1<<"\t"<<J2<<"\t"<<J3<<"\t"<<J4<<"\t"<<endl; 00389 cout<<"Magof Hessian"<<(J3 - J1 - (J4 - J2))/(2*cktilde*ck)<<"\t First Gradient: "<<(J3 - J1)/(cktilde)<<"\t Second Gradient: "<<(J4 - J2)/(cktilde)<<endl; 00390 cout<<"Magof Gradient"<<((J1 - J2)/(2*ck))<<endl; 00391 getchar(); 00392 } 00393 // if(debug) 00394 //getchar();//DEBUG STUFF 00395 }//Averaging Done 00396 00397 float magofHessian = abs((J3 - J1 - (J4 - J2))/(2*cktilde*ck));//Just checking one of it instead of the actual average TODO 00398 /*if(magofHessian >toleranceHessianmag) 00399 { 00400 cerr<<"Magnitude of Hessian Big: "<<magofHessian<<endl; 00401 for(int count1 =0; count1 < N;count1++) 00402 { 00403 ustemp[count1] = us[count1] - ak*ghatavg[count1];//Can add constraints here TODO 00404 } 00405 } 00406 else 00407 { 00408 */ 00409 //Update the control vector based on the simultaneous gradient: 00410 for(int count1 =0; count1 < N;count1++) 00411 { 00412 hessian_est[count1] = (float(k+1)/float(k+2))*hessian_est[count1] + (1/float(k+2))*hessian_currestavg[count1];//Find the Current Hessian estimate using iterative process This is slightly modified to use the initial estimate 00413 //hessian_estreg[count1] = hessian_est[count1].cwiseAbs() + (0.001/(k+1)) * Vectorcd::Ones();//Regulated 00414 hessian_estreg[count1] = hessian_est[count1].cwiseMax(toleranceHessianmag*Vectorcd::Ones());//Regulated 00415 //hessian_estreg[count1] = Vectorcd::Ones(); 00416 ustemp[count1] = us[count1] - ak*(hessian_estreg[count1].cwiseInverse()).cwiseProduct(ghatavg[count1]);//Can add constraints here TODO 00417 if(debug) 00418 { 00419 //cout<<"us[#"<<count1<<"]: "<<us[count1].transpose()<<endl; 00420 //cout<<"Hess eig values: "<<hessian_estreg[count1].eigenvalues().transpose()<<endl; 00421 cout<<"hesscountreg #"<<count1<<": "<<hessian_estreg[count1].transpose()<<endl; 00422 cout<<"hesscountavgest #"<<count1<<": "<<hessian_currestavg[count1].transpose()<<endl; 00423 //.part<Eigen::LowerTriangular>() 00424 //cout<<"hesscount2 #"<<count1<<":\n "<<hessian_currest[count1]<<endl; 00425 } 00426 } 00427 //} 00428 Jtemp = Update(xs,ustemp);//Evaluate the new cost 00429 if(Jtemp < (J + toleranceJ)) 00430 { 00431 J = Jtemp; 00432 us = ustemp; 00433 }//Else neglect 00434 else 00435 { 00436 cerr<<"Neglecting Jtemp, J: "<<Jtemp<<"\t"<<J<<endl; 00437 } 00438 if(debug) 00439 { 00440 cout<<"J Jtemp J1,J2,J3,J4 #"<<k<<"\t: "<<J<<"\t"<<Jtemp<<"\t"<<J1<<"\t"<<J2<<"\t"<<J3<<"\t"<<J4<<endl; 00441 getchar(); 00442 } 00443 } 00444 prevcount += Nit; 00445 //Terminal xs and J: 00446 J = Update(xs,us); 00447 } 00448 } 00449 00450 #endif