00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include <vector>
00020 #include <valarray>
00021
00022 #include "MultiFunction.h"
00023
00024 #include "ErrorMeasure.h"
00025 #include "Dataset.h"
00026 #include "Sym.h"
00027 #include "FunDef.h"
00028 #include "sym_compile.h"
00029 #include "TargetInfo.h"
00030 #include "stats.h"
00031
00032 using namespace std;
00033
00034 #ifdef INTERVAL_DEBUG
00035
00036 #include <BoundsCheck.h>
00037 #include <FunDef.h>
00038
00039 vector<double> none;
00040 IntervalBoundsCheck bounds(none, none);
00041
00042 #endif
00043
00044
00045
00046 static double not_a_number = atof("nan");
00047
00048 class ErrorMeasureImpl {
00049 public:
00050 const Dataset& data;
00051 TargetInfo train_info;
00052
00053 ErrorMeasure::measure measure;
00054
00055 Scaling no_scaling;
00056
00057 ErrorMeasureImpl(const Dataset& d, double t_p, ErrorMeasure::measure m) : data(d), measure(m) {
00058
00059 #ifdef INTERVAL_DEBUG
00060 bounds = IntervalBoundsCheck(d.input_minima(), d.input_maxima());
00061 #endif
00062
00063 unsigned nrecords = d.n_records();
00064 unsigned cases = unsigned(t_p * nrecords);
00065
00066 valarray<double> t(cases);
00067
00068 for (unsigned i = 0; i < cases; ++i) {
00069 t[i] = data.get_target(i);
00070 }
00071
00072 train_info = TargetInfo(t);
00073 no_scaling = Scaling(new NoScaling);
00074 }
00075
00076 ErrorMeasure::result eval(const valarray<double>& y) {
00077
00078 ErrorMeasure::result result;
00079 result.scaling = no_scaling;
00080
00081
00082 switch(measure) {
00083 case ErrorMeasure::mean_squared:
00084 result.error = pow(train_info.targets() - y, 2.0).sum() / y.size();
00085 return result;
00086 case ErrorMeasure::absolute:
00087 result.error = abs(train_info.targets() - y).sum() / y.size();
00088 return result;
00089 case ErrorMeasure::mean_squared_scaled:
00090 result.scaling = ols(y, train_info);
00091 result.error = pow(train_info.targets() - result.scaling->transform(y), 2.0).sum() / y.size();
00092 return result;
00093 default:
00094 cerr << "Unknown measure encountered: " << measure << " " << __FILE__ << " " << __LINE__ << endl;
00095 }
00096
00097 return result;
00098 }
00099
00100 unsigned train_cases() const {
00101 return train_info.targets().size();
00102 }
00103
00104 vector<ErrorMeasure::result> multi_function_eval(const vector<Sym>& pop) {
00105
00106 if (pop.size() == 0) return vector<ErrorMeasure::result>();
00107
00108 multi_function all = compile(pop);
00109
00110 std::vector<double> y(pop.size());
00111
00112 Scaling noScaling = Scaling(new NoScaling);
00113
00114 const std::valarray<double>& t = train_info.targets();
00115
00116 cout << "Population size " << pop.size() << endl;
00117
00118 if (measure == ErrorMeasure::mean_squared_scaled) {
00119 std::vector<Var> var(pop.size());
00120 std::vector<Cov> cov(pop.size());
00121
00122 Var vart;
00123
00124 for (unsigned i = 0; i < t.size(); ++i) {
00125 vart.update(t[i]);
00126
00127 all(&data.get_inputs(i)[0], &y[0]);
00128
00129
00130 for (unsigned j = 0; j < pop.size(); ++j) {
00131 var[j].update(y[j]);
00132 cov[j].update(y[j], t[i]);
00133 }
00134 }
00135
00136 std::vector<ErrorMeasure::result> result(pop.size());
00137
00138 for (unsigned i = 0; i < pop.size(); ++i) {
00139
00140
00141 double b = cov[i].get_cov() / var[i].get_var();
00142
00143 if (!finite(b)) {
00144 result[i].scaling = noScaling;
00145 result[i].error = vart.get_var();
00146 continue;
00147 }
00148
00149 double a = vart.get_mean() - b * var[i].get_mean();
00150
00151 result[i].scaling = Scaling( new LinearScaling(a,b));
00152
00153
00154 double c = cov[i].get_cov();
00155 c *= c;
00156
00157 double err = vart.get_var() - c / var[i].get_var();
00158 result[i].error = err;
00159 if (!finite(err)) {
00160
00161 cout << "b " << b << endl;
00162 cout << "var t " << vart.get_var() << endl;
00163 cout << "var i " << var[i].get_var() << endl;
00164 cout << "cov " << cov[i].get_cov() << endl;
00165
00166 for (unsigned j = 0; j < t.size(); ++j) {
00167 all(&data.get_inputs(i)[0], &y[0]);
00168
00169
00170 cout << y[i] << ' ' << ::eval(pop[i], data.get_inputs(j)) << endl;
00171 }
00172
00173 exit(1);
00174 }
00175 }
00176
00177 return result;
00178 }
00179
00180
00181 std::vector<double> err(pop.size());
00182
00183 for (unsigned i = 0; i < train_cases(); ++i) {
00184
00185 all(&data.get_inputs(i)[0], &y[0]);
00186
00187
00188 for (unsigned j = 0; j < pop.size(); ++j) {
00189 double diff = y[j] - t[i];
00190 if (measure == ErrorMeasure::mean_squared) {
00191 err[j] += diff * diff;
00192 } else {
00193 err[j] += fabs(diff);
00194 }
00195
00196 }
00197
00198 }
00199
00200 std::vector<ErrorMeasure::result> result(pop.size());
00201
00202 double n = train_cases();
00203 for (unsigned i = 0; i < pop.size(); ++i) {
00204 result[i].error = err[i] / n;
00205 result[i].scaling = noScaling;
00206 }
00207
00208 return result;
00209
00210 }
00211
00212 vector<ErrorMeasure::result> single_function_eval(const vector<Sym> & pop) {
00213
00214 vector<single_function> funcs(pop.size());
00215 compile(pop, funcs);
00216
00217 valarray<double> y(train_cases());
00218 vector<ErrorMeasure::result> result(pop.size());
00219 for (unsigned i = 0; i < funcs.size(); ++i) {
00220 for (unsigned j = 0; j < train_cases(); ++j) {
00221 y[j] = funcs[i](&data.get_inputs(j)[0]);
00222 }
00223
00224 #ifdef INTERVAL_DEBUG
00225
00226 pair<double, double> b = bounds.calc_bounds(pop[i]);
00227
00228
00229 for (unsigned j = 0; j < y.size(); ++j) {
00230 if (y[j] < b.first -1e-4 || y[j] > b.second + 1e-4 || !finite(y[j])) {
00231 cout << "Error " << y[j] << " not in " << b.first << ' ' << b.second << endl;
00232 cout << "Function " << pop[i] << endl;
00233 exit(1);
00234 }
00235 }
00236 #endif
00237
00238 result[i] = eval(y);
00239 }
00240
00241 return result;
00242 }
00243
00244 vector<ErrorMeasure::result> calc_error(const vector<Sym>& pop) {
00245
00246
00247 #if USE_TR1
00248 typedef std::tr1::unordered_map<Sym, unsigned, HashSym> HashMap;
00249 #else
00250 typedef hash_map<Sym, unsigned, HashSym> HashMap;
00251 #endif
00252 HashMap clone_map;
00253 vector<Sym> decloned;
00254 decloned.reserve(pop.size());
00255
00256 for (unsigned i = 0; i < pop.size(); ++i) {
00257 HashMap::iterator it = clone_map.find(pop[i]);
00258
00259 if (it == clone_map.end()) {
00260 clone_map[ pop[i] ] = decloned.size();
00261 decloned.push_back(pop[i]);
00262 }
00263
00264 }
00265
00266
00267 vector<ErrorMeasure::result> dresult;
00268
00269 switch(measure) {
00270 case ErrorMeasure::mean_squared:
00271 case ErrorMeasure::absolute:
00272 dresult = multi_function_eval(decloned);
00273 break;
00274 case ErrorMeasure::mean_squared_scaled:
00275 dresult = multi_function_eval(decloned);
00276 break;
00277 }
00278
00279 vector<ErrorMeasure::result> result(pop.size());
00280 for (unsigned i = 0; i < result.size(); ++i) {
00281 result[i] = dresult[ clone_map[pop[i]] ];
00282 }
00283
00284 return result;
00285 }
00286
00287 };
00288
00289 ErrorMeasure::result::result() {
00290 error = 0.0;
00291 scaling = Scaling(0);
00292 }
00293
00294 bool ErrorMeasure::result::valid() const {
00295 return isfinite(error);
00296 }
00297
00298 ErrorMeasure::ErrorMeasure(const Dataset& data, double train_perc, measure meas) {
00299 pimpl = new ErrorMeasureImpl(data, train_perc, meas);
00300 }
00301
00302 ErrorMeasure::~ErrorMeasure() { delete pimpl; }
00303 ErrorMeasure::ErrorMeasure(const ErrorMeasure& that) { pimpl = new ErrorMeasureImpl(*that.pimpl); }
00304
00305
00306 ErrorMeasure::result ErrorMeasure::calc_error(Sym sym) {
00307
00308 single_function f = compile(sym);
00309
00310 valarray<double> y(pimpl->train_cases());
00311
00312 for (unsigned i = 0; i < y.size(); ++i) {
00313
00314 y[i] = f(&pimpl->data.get_inputs(i)[0]);
00315
00316 if (!finite(y[i])) {
00317 result res;
00318 res.scaling = Scaling(new NoScaling);
00319 res.error = not_a_number;
00320 return res;
00321 }
00322 }
00323
00324 return pimpl->eval(y);
00325 }
00326
00327 vector<ErrorMeasure::result> ErrorMeasure::calc_error(const vector<Sym>& syms) {
00328 return pimpl->calc_error(syms);
00329
00330 }
00331
00332 double ErrorMeasure::worst_performance() const {
00333
00334 if (pimpl->measure == mean_squared_scaled) {
00335 return pimpl->train_info.tvar();
00336 }
00337
00338 return 1e+20;
00339 }
00340