Skip to content

Commit

Permalink
Implement functionid merging
Browse files Browse the repository at this point in the history
  • Loading branch information
dspinellis committed Aug 28, 2024
1 parent c5bf9d2 commit aef84e6
Showing 1 changed file with 122 additions and 32 deletions.
154 changes: 122 additions & 32 deletions src/dbtoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,16 +65,23 @@ unify_and_clear(Dbtoken &found, Dbtoken &read)
read.clear();
}

static int line;
static int line_number;

static void
warn(const char *file_name, Tokid ti)
{
cerr << file_name << '(' << line << "): missing EC for file "
cerr << file_name << '(' << line_number << "): missing EC for file "
<< ti.get_fileid().get_id() << " offset "
<< (unsigned)ti.get_streampos() << "\n";
}

static void
warn(const char *file_name, const string &s)
{
cerr << file_name << '(' << line_number << "): malformed input: "
<< s << "\n";
}

/*
* Return the tokid with its negative fid
* ECs are supposed to be unique for each tokid.
Expand All @@ -101,12 +108,7 @@ Dbtoken::read_eclasses(const char *in_path)

ifstream input(in_path);
verify_open(in_path, input);
line = 0;

int dbid;
int fid;
unsigned long offset;
int len;
Eclass *ec = NULL, *prev_ec = NULL; // EC pointer as created / found
long ecid, prev_ecid = 0; // EC identifier read from file
enum {
Expand All @@ -126,15 +128,29 @@ Dbtoken::read_eclasses(const char *in_path)
s_found_ov, // A matched EC overflowed the read entry
} state = s_read_first;

Dbtoken found, read;
/*
* Invariant: always contains difference in the length of characters
* stored in the above "found" and "read" variables.
* Invariant: found_minus_read always contains difference in the
* length of characters stored in the above "found" and "read"
* variables.
*/
Dbtoken found, read;
int found_minus_read;
while (input >> dbid >> fid >> offset >> len >> ecid) {

line++;
string line_record;
line_number = 0;
while (getline(input, line_record)) {
line_number++;

istringstream line_stream(line_record);
int dbid;
int fid;
unsigned long offset;
int len;
if (!(line_stream >> dbid >> fid >> offset >> len >> ecid)) {
warn(in_path, line_record);
continue;
}

// fids from second read part are stored -ve to avoid clashes
if (state != s_read_first)
fid = -fid;
Expand All @@ -143,7 +159,7 @@ Dbtoken::read_eclasses(const char *in_path)

state_process:
if (DP())
cout << "State " << state << " line " << line << " ti " << ti << '\n';
cout << "State " << state << " line_number " << line_number << " ti " << ti << '\n';
switch (state) {
case s_read_first:
if (dbid == 0) {
Expand Down Expand Up @@ -330,6 +346,20 @@ Dbtoken::write_eclasses(const char *out_path)
}
}

// Return the Eclass for the specified tokid or its twin.
// If it isn't found return NULL.
static Eclass *
check_ec(const Tokid &ti)
{
Eclass *ec = ti.check_ec();

if (ec != NULL)
return ec;

// Try the twin
return twin(ti).check_ec();
}

// Read identifiers from in_path and set the EC attributes
void
Dbtoken::read_ids(const char *in_path)
Expand Down Expand Up @@ -358,7 +388,7 @@ Dbtoken::read_ids(const char *in_path)
ifstream input(in_path);
verify_open(in_path, input);

line = 0;
line_number = 0;
while (input >> dbid >> fid >> offset >> ecid >> name
>> v_readonly
>> v_undefined_macro
Expand All @@ -376,18 +406,12 @@ Dbtoken::read_ids(const char *in_path)
>> v_lscope
>> v_unused) {

line++;
line_number++;
Tokid ti(Fileid(fid), offset);
int name_length = name.length();
int covered = 0;
while (covered < name_length) {
Eclass *ec = ti.check_ec();

if (ec == NULL) {
// Try the twin
ti = twin(ti);
ec = ti.check_ec();
}
Eclass *ec = check_ec(ti);

if (ec == NULL) {
warn(in_path, ti);
Expand Down Expand Up @@ -466,29 +490,26 @@ Dbtoken::write_ids(const char *in_path, const char *out_path)
verify_open(out_path, out);

string line_record;
line_number = 0;
while (getline(in, line_record)) {
line++;
line_number++;

istringstream line_stream(line_record);

int dbid;
int fid;
unsigned long offset;
unsigned long ecid;
string name;
line_stream >> dbid >> fid >> offset >> ecid >> name;
if (!(line_stream >> dbid >> fid >> offset >> ecid >> name)) {
warn(in_path, line_record);
continue;
}

Tokid ti(Fileid(fid), offset);
int name_length = name.length();
int covered = 0;
while (covered < name_length) {
Eclass *ec = ti.check_ec();

if (ec == NULL) {
// Try the twin
ti = twin(ti);
ec = ti.check_ec();
}
Eclass *ec = check_ec(ti);

if (ec == NULL) {
warn(in_path, ti);
Expand All @@ -514,4 +535,73 @@ Dbtoken::read_write_functionids(const char *in_path, const char *out_path)

ofstream out(out_path);
verify_open(out_path, out);

// Avoid duplicate entries
static set <int> dumped;

string line_record;
line_number = 0;
int out_ordinal = 0;
int prev_functionid = -1;
while (getline(in, line_record)) {
line_number++;
if (DP())
cout << "Read: " << line_record << '\n';

istringstream line_stream(line_record);
int functionid, in_ordinal;
int fileid;
unsigned long offset;
int len;
if (!(line_stream >> functionid >> in_ordinal >> fileid >> offset >> len)) {
warn(in_path, line_record);
continue;
}

if (dumped.find(functionid) != dumped.end()) {
if (DP())
cout << "Skip dumped function " << functionid << '\n';
continue;
}

if (functionid != prev_functionid) {
out_ordinal = 0;
if (prev_functionid != -1)
dumped.insert(prev_functionid);
prev_functionid = functionid;
}

Tokid ti(Fileid(fileid), offset);
int covered = 0;
while (covered < len) {
Eclass *ec = check_ec(ti);

if (ec == NULL) {
warn(in_path, ti);
goto next_line;
}

if (DP())
cout << "f: " << functionid
<< " len: " << len
<< " EC len: " << ec->get_len()
<< " in_ordinal: " << in_ordinal
<< " in_ordinal: " << in_ordinal
<< " out_ordinal: " << out_ordinal
<< " covered: " << covered
<< '\n';

out
<< functionid << ','
<< out_ordinal++ << ','
<< ptr_offset(ec)
<< '\n';

covered += ec->get_len();
ti += ec->get_len();
}
next_line:
continue;
}

}

0 comments on commit aef84e6

Please sign in to comment.