GCOP
1.0
|
00001 // This file is part of libgcop, a library for Geometric Control, Optimization, and Planning (GCOP) 00002 // 00003 // Copyright (C) 2004-2014 Marin Kobilarov <marin(at)jhu.edu> 00004 // 00005 // This Source Code Form is subject to the terms of the Mozilla 00006 // Public License v. 2.0. If a copy of the MPL was not distributed 00007 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00008 00009 #ifndef GCOP_SYSTEMCE_H 00010 #define GCOP_SYSTEMCE_H 00011 00012 #include <Eigen/Dense> 00013 #include <vector> 00014 #include <type_traits> 00015 #include <algorithm> 00016 #include <iterator> 00017 #include "system.h" 00018 #include "cost.h" 00019 #include "ce.h" 00020 #include <cmath> 00021 #include "rn.h" 00022 #include <limits> 00023 00024 #include "tparam.h" 00025 #include "creator.h" 00026 00027 namespace gcop { 00028 00029 using namespace std; 00030 using namespace Eigen; 00031 00039 template <typename T, 00040 int n = Dynamic, 00041 int c = Dynamic, 00042 int np = Dynamic, 00043 int ntp = Dynamic, 00044 typename Tc = T> 00045 class SystemCe { 00046 00047 typedef Matrix<double, n, 1> Vectornd; 00048 typedef Matrix<double, c, 1> Vectorcd; 00049 typedef Matrix<double, np, 1> Vectormd; 00050 00051 typedef Matrix<double, n, n> Matrixnd; 00052 typedef Matrix<double, n, c> Matrixncd; 00053 typedef Matrix<double, c, n> Matrixcnd; 00054 typedef Matrix<double, c, c> Matrixcd; 00055 00056 typedef Matrix<double, ntp, 1> Vectortpd; 00057 typedef Matrix<double, ntp, ntp> Matrixtpd; 00058 00059 //External render Function for trajectories: 00060 typedef void(RenderFunc)(int, vector<T>&); 00061 public: 00079 SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, 00080 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p, 00081 vector<Vectorcd> &dus, vector<Vectorcd> &es, 00082 bool update = true); 00083 00105 SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, Tc> &tp, 00106 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p, 00107 vector<Vectorcd> &dus, vector<Vectorcd> &es, 00108 bool update = true); 00109 00131 SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, Tc> &tp, 00132 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p, 00133 VectorXd &dss, VectorXd &dess, 00134 bool update = true); 00135 00159 SystemCe(System<T, n, c, np> &sys, Cost<T, n, c, np, Tc> &cost, Tparam<T, n, c, np, ntp, 00160 Tc> &tp, Creator<Tc> *contextSampler, 00161 vector<double> &ts, vector<T> &xs, vector<Vectorcd> &us, Vectormd *p, 00162 Vectortpd &mu0, Matrixtpd &P0, Matrixtpd &S, bool update = true); 00163 00164 00165 00166 virtual ~SystemCe(); 00167 00168 00172 void Iterate(bool updatexsfromus = true); 00173 00180 double Update(std::vector<T> &xs, const std::vector<Vectorcd> &us, bool evalCost = true); 00181 00182 void us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const; 00183 00184 void z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const; 00185 00186 System<T, n, c, np> &sys; 00187 00188 Cost<T, n, c, np, Tc> &cost; 00189 00190 Tparam<T, n, c, np, ntp, Tc> *tp; 00191 00192 Creator<Tc> *contextSampler; 00193 00194 std::vector<double> &ts; 00195 00196 std::vector<T> &xs; 00197 00198 std::vector<Vectorcd> &us; 00199 00200 Vectormd *p; 00201 00202 std::vector<Vectorcd> &dus; 00203 00204 std::vector<T> xss; 00205 00206 std::vector<Vectorcd> uss; 00207 00208 std::vector< std::vector<T> > xsas; 00209 00210 std::vector< std::vector<Vectorcd> >usas; 00211 00212 std::vector< bool >bs; 00213 00214 int N; 00215 00216 Ce<ntp> ce; 00217 00218 int Ns; 00219 00220 double J; 00221 00222 bool enforceUpperBound; 00223 00224 bool updateUpperBound; 00225 00226 double Jub; 00227 00228 Vectortpd zmin; 00229 00230 int nofevaluations; 00231 00232 bool debug; 00233 00234 // TODO: fix this: call it an external callback, etc... 00235 RenderFunc *external_render; 00236 00237 }; 00238 00239 using namespace std; 00240 using namespace Eigen; 00241 00242 template <typename T, int n, int c, int np, int ntp, typename Tc> 00243 SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 00244 Cost<T, n, c, np, Tc> &cost, 00245 vector<double> &ts, 00246 vector<T> &xs, 00247 vector<Matrix<double, c, 1> > &us, 00248 Matrix<double, np, 1> *p, 00249 vector<Matrix<double, c, 1> > &dus, 00250 vector<Matrix<double, c, 1> > &es, 00251 bool update) : 00252 sys(sys), cost(cost), tp(0), contextSampler(0), 00253 ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 00254 ce(N*sys.U.n, 1), Ns(1000), 00255 J(numeric_limits<double>::max()), 00256 enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()), 00257 debug(true), external_render(0), nofevaluations(0) 00258 { 00259 // do not use an external tparam, just assume discrete controls are the params 00260 00261 if (ntp != Dynamic) { 00262 assert(ntp == N*sys.U.n); 00263 if (ntp > 16) 00264 cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl; 00265 } 00266 00267 assert(N > 0); 00268 assert(ts.size() == N+1); 00269 assert(xs.size() == N+1); 00270 assert(us.size() == N); 00271 assert(dus.size() == N); 00272 assert(xss.size() == N+1); 00273 assert(uss.size() == N); 00274 00275 if (update) { 00276 J = Update(xs, us); 00277 } 00278 00279 us2z(ce.gmm.ns[0].mu, us); 00280 Vectortpd z; 00281 if (ntp == Dynamic) 00282 z.resize(sys.U.n*N); 00283 00284 us2z(z, dus); 00285 ce.gmm.ns[0].P = (z.cwiseProduct()).asDiagonal(); 00286 ce.gmm.Update(); 00287 00288 us2z(z, es); 00289 ce.S = z.asDiagonal(); 00290 } 00291 00292 template <typename T, int n, int c, int np, int ntp, typename Tc> 00293 SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 00294 Cost<T, n, c, np, Tc> &cost, 00295 Tparam<T, n, c, np, ntp, Tc> &tp, 00296 vector<double> &ts, 00297 vector<T> &xs, 00298 vector<Vectorcd> &us, 00299 Vectormd *p, 00300 VectorXd &dss, 00301 VectorXd &dess, 00302 bool update) : 00303 sys(sys), cost(cost), tp(&tp), contextSampler(0), 00304 ts(ts), xs(xs), us(us), dus(us), p(p), xss(xs), uss(us), N(us.size()), 00305 ce(tp.ntp, 1), Ns(1000), 00306 J(numeric_limits<double>::max()), 00307 enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()), 00308 debug(true), external_render(0), nofevaluations(0) 00309 { 00310 00311 assert(N > 0); 00312 assert(ts.size() == N+1); 00313 assert(xs.size() == N+1); 00314 assert(us.size() == N); 00315 assert(xss.size() == N+1); 00316 assert(uss.size() == N); 00317 assert(dss.size() == tp.ntp); 00318 assert(dess.size() == tp.ntp); 00319 00320 if (update) { 00321 J = Update(xs, us); 00322 } 00323 00324 tp.To(ce.gmm.ns[0].mu, ts, xs, us, p); 00325 00326 ce.gmm.ns[0].P = dss.asDiagonal(); 00327 ce.gmm.Update(); 00328 00329 ce.S = dess.asDiagonal(); 00330 } 00331 00332 template <typename T, int n, int c, int np, int ntp, typename Tc> 00333 SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 00334 Cost<T, n, c, np, Tc> &cost, 00335 Tparam<T, n, c, np, ntp, Tc> &tp, 00336 Creator<Tc> *contextSampler, 00337 vector<double> &ts, 00338 vector<T> &xs, 00339 vector<Vectorcd> &us, 00340 Vectormd *p, 00341 Vectortpd &mu0, 00342 Matrixtpd &P0, 00343 Matrixtpd &S, 00344 bool update) : 00345 sys(sys), cost(cost), tp(&tp), contextSampler(contextSampler), 00346 ts(ts), xs(xs), us(us), dus(us), p(p), xss(xs), uss(us), N(us.size()), 00347 ce(tp.ntp, 1), Ns(1000), 00348 J(numeric_limits<double>::max()), 00349 enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()), 00350 debug(true), external_render(0), nofevaluations(0) 00351 { 00352 00353 assert(N > 0); 00354 assert(ts.size() == N+1); 00355 assert(xs.size() == N+1); 00356 assert(us.size() == N); 00357 assert(xss.size() == N+1); 00358 assert(uss.size() == N); 00359 assert(mu0.size() == tp.ntp); 00360 // assert(tpCov.size() == tp.ntp); 00361 // assert(tpS.size() == tp.ntp); 00362 00363 // if (update) { 00364 // J = Update(xs, us); 00365 // } else { 00366 // } 00367 00368 ce.gmm.ns[0].mu = mu0; 00369 ce.gmm.ns[0].P = P0; 00370 ce.S = S; 00371 00372 ce.gmm.Update(); 00373 00374 // tp.To(ce.gmm.ns[0].mu, ts, xs, us, p); 00375 if (contextSampler) { 00376 tp.SetContext( (*contextSampler)() ); 00377 } 00378 tp.From(ts, xs, us, mu0, p); 00379 } 00380 00381 00382 template <typename T, int n, int c, int np, int ntp, typename Tc> 00383 SystemCe<T, n, c, np, ntp, Tc>::SystemCe(System<T, n, c, np> &sys, 00384 Cost<T, n, c, np, Tc> &cost, 00385 Tparam<T, n, c, np, ntp, Tc> &tp, 00386 vector<double> &ts, 00387 vector<T> &xs, 00388 vector<Matrix<double, c, 1> > &us, 00389 Matrix<double, np, 1> *p, 00390 vector<Matrix<double, c, 1> > &dus, 00391 vector<Matrix<double, c, 1> > &es, 00392 bool update) : 00393 sys(sys), cost(cost), tp(&tp), contextSampler(0), 00394 ts(ts), xs(xs), us(us), p(p), dus(dus), xss(xs), uss(us), N(us.size()), 00395 ce(tp.ntp, 1), Ns(1000), 00396 J(numeric_limits<double>::max()), 00397 enforceUpperBound(true), updateUpperBound(true), Jub(numeric_limits<double>::max()), 00398 debug(true), external_render(0), nofevaluations(0) 00399 { 00400 /* 00401 if (ntp != Dynamic) { 00402 assert(ntp == N*sys.U.n); 00403 if (ntp > 16) 00404 cout << "[W] gcop::SystemCe::SystemCe: fixed Eigen size is not recommended above 16!" << endl; 00405 } 00406 */ 00407 00408 assert(N > 0); 00409 assert(ts.size() == N+1); 00410 assert(xs.size() == N+1); 00411 assert(us.size() == N); 00412 assert(dus.size() == N); 00413 assert(xss.size() == N+1); 00414 assert(uss.size() == N); 00415 00416 if (update) { 00417 J = Update(xs, us); 00418 } 00419 00420 // initialize mean using the provided initial trajectory and controls 00421 tp.To(ce.gmm.ns[0].mu, ts, xs, us, p); 00422 Vectortpd z; 00423 Vectortpd dz; 00424 if (ntp == Dynamic) { 00425 z.resize(tp.ntp); 00426 dz.resize(tp.ntp); 00427 } 00428 00429 // perturbed controls: ups = us + dus 00430 vector<Matrix<double, c, 1> > ups(us.size()); 00431 transform(us.begin(), us.end(), dus.begin(), ups.begin(),plus< Matrix<double, c, 1> >()); 00432 00433 // initialize the covariance using the provided variance in the initial controls 00434 tp.To(z, ts, xs, ups, p); 00435 dz = z - ce.gmm.ns[0].mu; 00436 ce.gmm.ns[0].P = (dz.cwiseProduct(dz)).asDiagonal(); 00437 ce.gmm.Update(); 00438 00439 tp.To(z, ts, xs, es, p); 00440 ce.S = z.asDiagonal(); 00441 } 00442 00443 00444 template <typename T, int n, int c, int np, int ntp, typename Tc> 00445 SystemCe<T, n, c, np, ntp, Tc>::~SystemCe() 00446 { 00447 } 00448 00449 template <typename T, int n, int c, int np, int ntp, typename Tc> 00450 void SystemCe<T, n, c, np, ntp, Tc>::us2z(Vectortpd &z, const std::vector<Vectorcd> &us) const 00451 { 00452 assert(us.size() > 0); 00453 assert(z.size() == us.size()*us[0].size()); 00454 for (int i = 0; i < us.size(); ++i) { 00455 z.segment(i*sys.U.n, sys.U.n) = us[i]; 00456 } 00457 } 00458 00459 template <typename T, int n, int c, int np, int ntp, typename Tc> 00460 void SystemCe<T, n, c, np, ntp, Tc>::z2us(std::vector<Vectorcd> &us, const Vectortpd &z) const 00461 { 00462 assert(us.size() > 0); 00463 assert(z.size() == us.size()*us[0].size()); 00464 for (int i = 0; i < us.size(); ++i) { 00465 us[i] = z.segment(i*sys.U.n, sys.U.n); 00466 } 00467 } 00468 00469 00470 template <typename T, int n, int c, int np, int ntp, typename Tc> 00471 double SystemCe<T, n, c, np, ntp, Tc>::Update(vector<T> &xs, const vector<Vectorcd> &us, bool evalCost) { 00472 double J = 0; 00473 sys.Reset(xs[0],ts[0]);//gives a chance for physics engines to reset to specific state and time. 00474 00475 for (int k = 0; k < N; ++k) { 00476 double h = ts[k+1] - ts[k]; 00477 sys.Step(xs[k+1], us[k], h, p); 00478 if (evalCost) 00479 J += cost.L(ts[k], xs[k], us[k], h, p); 00480 00481 } 00482 if (evalCost) 00483 J += cost.L(ts[N], xs[N], us[N-1], 0, p); 00484 return J; 00485 } 00486 00487 template <typename T, int n, int c, int np, int ntp, typename Tc> 00488 void SystemCe<T, n, c, np, ntp, Tc>::Iterate(bool updatexsfromus) { 00489 00490 if (ce.inc) 00491 { 00492 Vectortpd z; 00493 if (ntp == Dynamic) 00494 if (tp) 00495 z.resize(tp->ntp); 00496 else 00497 z.resize(us.size()*this->N); 00498 00499 ce.Sample(z); 00500 if (tp) { 00501 // set context (e.g. goal, environment, etc...) possibly using random sampling 00502 if (contextSampler) { 00503 Tc &context = (*contextSampler)(); 00504 tp->SetContext(context); 00505 cost.SetContext(context); 00506 } 00507 tp->From(ts, xss, uss, z, p); 00508 double cost_trajectory = 0; 00509 int N = uss.size(); 00510 for(int k = 0;k < N; k++) 00511 { 00512 cost_trajectory += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p); 00513 } 00514 cost_trajectory += cost.L(ts[N], xss[N], uss[N-1], 0, p); 00515 ce.AddSample(z, cost_trajectory); 00516 } 00517 else 00518 { 00519 z2us(uss, z); 00520 ce.AddSample(z, Update(xss, uss)); 00521 } 00522 00523 xsas.push_back(xss); 00524 usas.push_back(uss); 00525 } else // non-incremental 00526 { 00527 // Jub is used to discard any samples with cost below Jub 00528 ce.Reset(); 00529 xsas.clear(); 00530 usas.clear(); 00531 bs.clear(); 00532 00533 Vectortpd z; 00534 if (ntp == Dynamic) 00535 if (tp) 00536 z.resize(tp->ntp); 00537 else 00538 z.resize(us.size()*this->N); 00539 00540 for (int j = 0; j < Ns; ++j) { 00541 00542 // sample context 00543 if (contextSampler) { 00544 Tc &context = (*contextSampler)(); 00545 tp->SetContext(context); 00546 cost.SetContext(context); 00547 } 00548 00549 double J = 0; 00550 bool good = true; 00551 00552 do { 00553 ce.Sample(z); 00554 if (tp) { 00555 good = tp->From(ts, xss, uss, z, p); 00556 int N = uss.size(); 00557 J = 0; 00558 for(int k = 0;k < N; k++) { 00559 J += cost.L(ts[k], xss[k], uss[k], ts[k+1]-ts[k], p); 00560 } 00561 J += cost.L(ts[N], xss[N], uss[N-1], 0, p); 00562 } else { 00563 z2us(uss, z); 00564 J = Update(xss, uss); 00565 } 00566 } while(enforceUpperBound && J > Jub); 00567 00568 ce.AddSample(z, J); 00569 00570 // add to list of all samples 00571 xsas.push_back(xss); 00572 usas.push_back(uss); 00573 bs.push_back(good); 00574 00575 //#DEBUG Number of function evaluations: 00576 //cout<<++nofevaluations<<endl; 00577 ++nofevaluations; 00578 00579 //Render trajectory samples if external rendering function is provided: 00580 if(external_render) 00581 { 00582 external_render(j,xss);//ID for the sample trajectory 00583 } 00584 } 00585 00586 if (updateUpperBound) { 00587 cout << "Jub=" << Jub << " ce.Jmax=" << ce.Jmax << endl; 00588 Jub = min(Jub, ce.Jmax); 00589 cout << "Jub=" << Jub << endl; 00590 } 00591 } 00592 00593 // estimate distribution 00594 ce.Select(); 00595 00596 if (!ce.Fit()) { 00597 cout << "[W] TrajectoryPrmSample::Sample: ce.Fit failed!" << endl; 00598 } 00599 00600 if (ce.Jmin < J) { 00601 if (tp) 00602 tp->From(ts, xs, us, ce.zmin, p); 00603 else 00604 z2us(us, ce.zmin); 00605 00606 if(updatexsfromus) 00607 Update(xs, us, false); 00608 J = ce.Jmin; 00609 zmin = ce.zmin;//Store the parameters also 00610 } 00611 00612 // construct trajectory using the first sample (this is the one with lowest cost) 00613 // Traj(xs, ce.zps[0].first, x0); 00614 // cout << "Solution: xs[K]=" << xs[K].transpose() << " c=" << ce.cs[0] << endl; 00615 } 00616 } 00617 00618 #endif