00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "Dataset.h"
00019 #include <fstream>
00020 #include <sstream>
00021
00022 #include <iostream>
00023
00024 using namespace std;
00025
00026 class DataSetImpl {
00027 public:
00028 vector< vector<double> > inputs;
00029 vector<double> targets;
00030
00031 void read_data(vector<string> strings) {
00032
00033
00034 istringstream cnt(strings[0]);
00035 unsigned n = 0;
00036 for (;;) {
00037 string s;
00038 cnt >> s;
00039 if (!cnt) break;
00040 ++n;
00041 }
00042
00043 inputs.resize(strings.size(), vector<double>(n-1));
00044 targets.resize(strings.size());
00045
00046 for (unsigned i = 0; i < strings.size(); ++i) {
00047 istringstream is(strings[i]);
00048 for (unsigned j = 0; j < n; ++j) {
00049
00050 if (!is) {
00051 cerr << "Too few targets in record " << i << endl;
00052 exit(1);
00053 }
00054
00055 if (j < n-1) {
00056 is >> inputs[i][j];
00057 } else {
00058 is >> targets[i];
00059 }
00060
00061 }
00062 }
00063
00064 }
00065
00066 };
00067
00068 Dataset::Dataset() { pimpl = new DataSetImpl; }
00069 Dataset::~Dataset() { delete pimpl; }
00070 Dataset::Dataset(const Dataset& that) { pimpl = new DataSetImpl(*that.pimpl); }
00071 Dataset& Dataset::operator=(const Dataset& that) { *pimpl = *that.pimpl; return *this; }
00072
00073 unsigned Dataset::n_records() const { return pimpl->targets.size(); }
00074 unsigned Dataset::n_fields() const { return pimpl->inputs[0].size(); }
00075 const std::vector<double>& Dataset::get_inputs(unsigned record) const { return pimpl->inputs[record]; }
00076 double Dataset::get_target(unsigned record) const { return pimpl->targets[record]; }
00077
00078 double error(string errstr);
00079
00080 void Dataset::load_data(std::string filename) {
00081 vector<string> strings;
00082
00083 ifstream is(filename.c_str());
00084
00085 for(;;) {
00086 string s;
00087 getline(is, s);
00088 if (!is) break;
00089
00090 if (s[0] == '#') continue;
00091
00092 strings.push_back(s);
00093 }
00094
00095 is.close();
00096
00097 if (strings.size() == 0) {
00098 error("No data could be loaded");
00099 }
00100
00101 pimpl->read_data(strings);
00102
00103 }
00104
00105 std::vector<double> Dataset::input_minima() const {
00106 vector<vector<double> >& in = pimpl->inputs;
00107
00108 vector<double> mn(in[0].size(), 1e+50);
00109 for (unsigned i = 0; i < in.size(); ++i) {
00110 for (unsigned j = 0; j < in[i].size(); ++j) {
00111 mn[j] = std::min(mn[j], in[i][j]);
00112 }
00113 }
00114
00115 return mn;
00116 }
00117
00118 vector<double> Dataset::input_maxima() const {
00119 vector<vector<double> >& in = pimpl->inputs;
00120
00121 vector<double> mx(in[0].size(), -1e+50);
00122 for (unsigned i = 0; i < in.size(); ++i) {
00123 for (unsigned j = 0; j < in[i].size(); ++j) {
00124 mx[j] = std::max(mx[j], in[i][j]);
00125 }
00126 }
00127
00128 return mx;
00129 }
00130
00131
00132
00133