#include "profile.h" #include #include using namespace std; namespace fs = boost::filesystem; namespace delphin { Profile::Profile(const string &pn) { pname = pn; int profilecounter=0; if (pname.at(pname.length()-1) == '/') pname.erase(pname.length()-1, 1); //remove trailing slash fs::path ip(pname); if (!fs::is_directory(ip)) { cerr << "Profile path '" << ip << "' not found. Cannot continue"; exit(1); } ifstream virtualf(string(ip.string()+"/virtual").c_str()); if (virtualf.is_open()) { //virtual profile, so add actual profiles string prof_count; getline(virtualf, prof_count); while (!virtualf.eof()) { // erase quotes int prof_start = prof_count.find_first_of('"',0)+1; int prof_end = prof_count.find_last_of('"'); string prof = prof_count.substr(prof_start, prof_end-prof_start); profiles.push_back(fs::path(ip.parent_path()/prof)); getline(virtualf, prof_count); } virtualprofile = true; } else { profiles.push_back(ip); //not virtual virtualprofile = false; } for (vector::iterator piter = profiles.begin(); piter != profiles.end(); ++piter) { // these need to be declare in scope, although they are // assigned in the openf function boost::iostreams::filtering_stream in; ifstream fstream; map > *rptr = new map >; ifstream rfile(string((*piter).string()+"/relations").c_str()); if (rfile.is_open()) { string line, fname, relation; int fieldcount = 0; getline(rfile, line); while (!rfile.eof()) { if (line.empty()){ getline(rfile, line); continue; } if (!isspace(line.at(0))) {//file name fname = line.substr(0, line.find(':')); (*rptr)[fname] = map(); fieldcount = 0; } else { while (isspace(line.at(0))) line.erase(0,1); relation = line.substr(0,line.find_first_of(" ")); (*rptr)[fname][relation] = fieldcount++; } getline(rfile, line); } relations.push_back(rptr); rfile.close(); } else { cerr << "No relation file in profile. Exiting." << endl; exit(1); } map *iptr = new map; map *lptr = new map; if (openf(in, fstream, string((*piter).string()+"/item"))) { string line; getline(in, line); while (!in.eof()) { tIid item_id; int ilen; istringstream(getField(line, (*rptr)["item"]["i-id"])) >> item_id; iptr->insert(tIidSValue(item_id, getField(line, (*rptr)["item"]["i-input"]))); lptr->insert(map::value_type(item_id, ilen)); istringstream(getField(line, (*rptr)["item"]["i-length"])) >> ilen; itemToProfile.insert(tIidIValue(item_id, profilecounter)); getline(in, line); } items.push_back(iptr); lengths.push_back(lptr); fstream.close(); in.reset(); } else { cerr << "Problem opening item file in " << (*piter).string() << endl; } map *readingsptr = new map; map *pinputptr = new map; map > *prefptr = new map >; if (openf(in, fstream, string((*piter).string()+"/parse"))) { string line; getline(in,line); while (!in.eof()) { tIid item_id; int num_readings; istringstream(getField(line, (*rptr)["parse"]["i-id"])) >> item_id; istringstream(getField(line, (*rptr)["parse"]["readings"])) >> num_readings; readingsptr->insert(tIidIValue(item_id, num_readings)); pinputptr->insert(tIidSValue(item_id, getField(line, (*rptr)["parse"]["p-input"]))); prefptr->insert(map >::value_type(item_id, map())); getline(in, line); } readings.push_back(readingsptr); inputs.push_back(pinputptr); fstream.close(); in.reset(); } else { cerr << "Problem opening parse file in " << (*piter).string() << endl; } if (openf(in, fstream, string((*piter).string()+"/preference"))) { string line; getline(in,line); while (!in.eof()) { tIid item_id; int parse_id; istringstream(getField(line, (*rptr)["preference"]["parse-id"])) >> item_id; istringstream(getField(line, (*rptr)["preference"]["result-id"])) >> parse_id; ((*prefptr)[item_id]).insert(tIidIValue(parse_id, 1)); getline(in, line); } fstream.close(); in.reset(); } else { cerr << "Problem opening preference file in " << (*piter).string() << "; no gold preferences recorded" << endl; } preferences.push_back(prefptr); profilecounter++; } if (!profilecounter) { cerr << "No profiles were loaded from " << pname << ". Cannot continue." << endl; exit(1); } resultprofilecount = 0; } Profile::~Profile() { for (vector *>::iterator iter = items.begin(); iter != items.end(); ++iter) { delete *iter; } for (vector *>::iterator iter = inputs.begin(); iter != inputs.end(); ++iter) { delete *iter; } for (vector *>::iterator iter = lengths.begin(); iter != lengths.end(); ++iter) { delete *iter; } for (vector > *>::iterator iter = preferences.begin(); iter != preferences.end(); ++iter) { delete *iter; } for (vector > *>::iterator iter = relations.begin(); iter != relations.end(); ++iter) { delete *iter; } for (vector *>::iterator iter = readings.begin(); iter != readings.end(); ++iter) { delete *iter; } } bool Profile::openf( boost::iostreams::filtering_stream &in, ifstream &filestream, const string &fname) { filestream.open(string(fname+".gz").c_str(), ios_base::in | ios_base::binary); if (filestream.is_open()) { in.push(boost::iostreams::gzip_decompressor()); in.push(filestream); } else { filestream.open(fname.c_str()); if (filestream.is_open()) in.push(filestream); else return false; } return true; } //i-input (usually the raw string) string Profile::getItem(tIid item) { if (itemToProfile.count(item)) { if (items[itemToProfile[item]]->count(item)) { return (items[itemToProfile[item]]->find(item))->second; } } return string(); } //i-length int Profile::getLength(tIid item) { if (itemToProfile.count(item)) { if (lengths[itemToProfile[item]]->count(item)) { return (lengths[itemToProfile[item]]->find(item))->second; } } return 0; } //p-input: parser input, after REPP and tagging string Profile::getInput(tIid item) { if (itemToProfile.count(item)) { if (inputs[itemToProfile[item]]->count(item)) { return (inputs[itemToProfile[item]]->find(item))->second; } } return string(); } int Profile::getPreference(tIid item) { if (itemToProfile.count(item)) { if (preferences[itemToProfile[item]]->count(item)) { if (!(*preferences[itemToProfile[item]])[item].empty()) return (*preferences[itemToProfile[item]])[item].begin()->first; } } return -1; } void Profile::setPreference(tIid &item, int &parse, bool append) { if (itemToProfile.count(item)) { if (append==false) (*preferences[itemToProfile[item]])[item].clear(); (*preferences[itemToProfile[item]])[item].insert(tIIValue(parse,1)); } } bool Profile::isGold(tIid item, int parse) { if ((*preferences[itemToProfile[item]])[item].count(parse) == 1 && ((*preferences[itemToProfile[item]])[item])[parse] == 1) return true; else return false; } int Profile::numGold(tIid item) { return (*preferences[itemToProfile[item]])[item].size(); } void Profile::getItemIDs(set *itemset) { for (vector*>::const_iterator pi_itr = items.begin(); pi_itr != items.end(); pi_itr++) { // foreach (tIidSValue iid_value, **pi_itr) { for (map::iterator iitr = (*pi_itr)->begin(); iitr != (*pi_itr)->end(); ++iitr) { itemset->insert(iitr->first); } } } int Profile::getReadings(tIid item) { if (itemToProfile.count(item)) { if (readings[itemToProfile[item]]->count(item)) { return (readings[itemToProfile[item]]->find(item))->second; } } return -1; } pair,string> Profile::getResult(ResultType type) { if (resultfilter.empty()) { //first result if (!openf(resultfilter, resultfile, string(profiles[0].string()+"/result"))) { cerr << "Couldn't open a result file in " << profiles[0].string() << endl; return pair,string>(pair(-1,-1), string()); } } string line; getline(resultfilter,line); while (resultfilter.eof()) { resultprofilecount++; if (resultprofilecount == profiles.size()) { //read all profiles resultfile.close(); resultfilter.reset(); return pair,string>(pair(-2,-1), string()); } else { resultfile.close(); resultfilter.reset(); if (!openf(resultfilter, resultfile, string(profiles[resultprofilecount].string()+"/result"))) { cerr << "Couldn't open a result file in " << profiles[resultprofilecount].string() << endl; return pair,string>(pair(-3,-1), string()); } } getline(resultfilter,line); } if (resultfilter.eof()) { //last line return pair,string>(pair(-4,-1), string()); } tIid item_id; int parse_id; istringstream(getField(line, (*(relations[resultprofilecount]))["result"]["parse-id"])) >> item_id; istringstream(getField(line, (*(relations[resultprofilecount]))["result"]["result-id"])) >> parse_id; string rtype("derivation"); if (type == MRS) rtype = string("mrs"); return pair,string>(pair(item_id, parse_id), getField(line, (*(relations[resultprofilecount]))["result"][rtype])); } string Profile::getResult(tIid &item, int &parse, ResultType type) { if (itemToProfile.count(item)) { boost::iostreams::filtering_stream in; ifstream fstream; if (openf(in, fstream, string(profiles[itemToProfile[item]].string()+"/result"))) { string line; getline(in,line); while (!in.eof()) { tIid item_id; int parse_id; istringstream(getField(line, (*(relations[resultprofilecount]))["result"]["parse-id"])) >> item_id; istringstream(getField(line, (*(relations[resultprofilecount]))["result"]["result-id"])) >> parse_id; if (item_id == item && parse_id == parse) { fstream.close(); in.reset(); string rtype("derivation"); if (type == MRS) rtype = string("mrs"); return getField(line, (*(relations[resultprofilecount]))["result"][rtype]); } getline(in, line); } fstream.close(); in.reset(); cerr << "result " << parse << " of item " << item << " not found in result file in " << profiles[itemToProfile[item]].string() << endl; return string(); } else { cerr << "Problem opening result file in " << profiles[itemToProfile[item]].string() << endl; return string(); } } else { cerr << "No item " << item << "in profile " << pname << endl; return string(); } } string &Profile::getPath() { return pname; } bool Profile::isVirtual() { return virtualprofile; } void Profile::scoreProfile(const string &scorefile, bool all, int n) { int total_exact = 0; int total_seen = 0; for (vector*>::const_iterator piter = items.begin(); piter != items.end(); piter++) { for (map::iterator iiter=(*piter)->begin(); iiter != (*piter)->end(); ++iiter) { if (!all && getReadings(iiter->first) == 0) continue; // skip items without results if all==false ++total_seen; for (int r = 0; r < n; ++r) { if (isGold(iiter->first, r)) { ++total_exact; break; } } } } ostringstream outfile; outfile << scorefile << ".exact" << n; ofstream outf(outfile.str().c_str()); if (outf.is_open()) { outf << (float) total_exact / total_seen << "\n" << total_exact << "/" << total_seen << endl; } outf.close(); } string Profile::getField(string line, int field) { int start = 0; int end = line.find('@'); int atcount; for (atcount=0; atcount < field && end != string::npos; atcount++) { start = end+1; end = line.find('@', start); } if (atcount != field) { cerr << "Mal-formed line: " << line << endl; cerr << "Insufficent delimiters." << endl; exit(1); } return line.substr(start, end-start); } } //namespace