Project - Stage 2: Clone-Pruning Analysis Pass
Project - Stage 2: Clone-Pruning Analysis Pass
In Stage 1 of the project, I learned how to develop a basic pass in GCC. However, I may need to refine my methods for accurately counting the number of basic blocks and GIMPLE statements. In Stage 2 of the project, I was required to modify my pass to determine if a function had been cloned and to make decisions about whether it should be pruned.
Getting the testing code files
First, I got the testing code files from /public/spo600-test-clone.tgz. To get the file, I used the following command to unpack the tar archive file.
cd ~ ; tar xvf /public/spo600-test-clone.tgz
Then, a folder named spo600 will appear in our home directory, which will contain the code to test.
Identify functions that have been cloned
objdump -d clone-test-x86-prune | grep 'scale_samples' > 'prune_inspect'
The image shows two functions with the same name but different suffixes, along with a resolver. Both functions have the same assembly instructions. The .default version is the original, and .popcnt is the cloned version.Next, I tried to find the cloned function in the Pass. In the Pass, I tried to use a vector to store all the function names in the file.
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "cfg.h"
#include <vector>
namespace{
const pass_data pass_data_project = {
GIMPLE_PASS, /* type */
"pass_project", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
// Pass class
class pass_project : public gimple_opt_pass {
std::vector<const char*> functions;
public:
// Constructor
pass_project(gcc::context* ctxt) : gimple_opt_pass(pass_data_project, ctxt){};
unsigned int execute(function* fun) override{
cgraph_node* node = cgraph_node::get(fun->decl);
functions.push_back(node->name());
print_functions(dump_file);
return 0;
}
void print_functions(FILE* dump_file){
if(dump_file){
for(size_t i = 0; i < functions.size(); ++i){
fprintf(dump_file, "===== Function Name: %s =====\n\n", functions[i]);
}
}
}
};
}
//Custom pass creation function
gimple_opt_pass* make_pass_project(gcc::context* ctxt){
return new pass_project(ctxt);
}
After collecting all the function names, I compared them to identify the cloned functions. A name with a suffix usually indicates a cloned function. However, since resolvers also have suffixes, I had to check the suffix carefully to tell them apart.
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "cfg.h"
#include <vector>
namespace{
const pass_data pass_data_project = {
GIMPLE_PASS, /* type */
"pass_project", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
// Pass class
class pass_project : public gimple_opt_pass {
std::vector<const char*> functions;
public:
// Constructor
pass_project(gcc::context* ctxt) : gimple_opt_pass(pass_data_project, ctxt){};
unsigned int execute(function* fun) override{
cgraph_node* node = cgraph_node::get(fun->decl);
if(dump_file){
int funIndex = findClonedFunctionByName(node->name());
if(funIndex == EOF){
fprintf(dump_file, "===== This is not a cloned function =====\n\n");
}else{
fprintf(dump_file, "===== This %s maight be a clonded function =====\n"
"===== Another function with the same name: %s =====\n\n", node->name(), functions[funIndex]);
}
}
functions.push_back(node->name());
return 0;
}
// Function recevie the name of current checking function
// Return the function name index in the vector if the vector has the same function name but it is not a resolver.
// Reutrn -1 if no function name match.
int findClonedFunctionByName(const char* checkingFunName){
if(functions.empty()){
return -1;
}
for(size_t i = 0; i < functions.size(); ++i){
size_t j = 0;
for(; checkingFunName[j] != '.' || checkingFunName[j] != '\0' ; ++j){
if(functions[i][j] != checkingFunName[j]){
break;
}
}
if(checkingFunName[j] == '.'){
const char* resolver = "resolver";
const char* suffix = &checkingFunName[j + 1];
for(size_t k = 0; suffix[k] != '\0'; ++k){
if(suffix[k] != resolver[k]){
return i;
}
}
}
}
return -1;
}
};
}
//Custom pass creation function
gimple_opt_pass* make_pass_project(gcc::context* ctxt){
return new pass_project(ctxt);
}
After several attempts, I changed the code to use std::string instead of const char* to store function names. This made it easier to manage memory and compare strings.
To check if two functions are identical, I compared the number of basic blocks and the GIMPLE statements in each function. If both matched, the functions were likely identical.
Based on this comparison, I could decide whether a function should be pruned. For each function, a message is written to the dump file to indicate whether it should be pruned or not. Some parts of the process are also logged in the dump file for reference.
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tree.h"
#include "tree-pass.h"
#include "cgraph.h"
#include "function.h"
#include "basic-block.h"
#include "gimple.h"
#include "gimple-iterator.h"
#include "gimple-pretty-print.h"
#include "cfg.h"
#include <string>
#include <vector>
#include <map>
namespace{
const pass_data pass_data_project = {
GIMPLE_PASS, /* type */
"pass_project", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
PROP_cfg, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
// Pass class
class pass_project : public gimple_opt_pass {
std::vector<std::string> functions;
std::map<std::string, std::vector<int>> gimpleCodes;
std::map<std::string, int> bbCnts;
public:
// Constructor
pass_project(gcc::context* ctxt) : gimple_opt_pass(pass_data_project, ctxt){};
unsigned int execute(function* fun) override{
cgraph_node* node = cgraph_node::get(fun->decl);
std::string currFunName = node->name();
basic_block bb;
int bb_cnt = 0;
int funIndex = findClonedFunction(currFunName);
if(dump_file){
if(funIndex != EOF){
fprintf(dump_file, "===== This %s is a cloned function =====\n", currFunName.c_str());
}else{
fprintf(dump_file, "===== This is not a clonded function =====\n");
}
}
// Store function names
functions.push_back(currFunName);
// Store Gimple code
FOR_EACH_BB_FN(bb, fun){
bb_cnt++;
for(gimple_stmt_iterator gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)){
gimple* stmt = gsi_stmt(gsi);
gimpleCodes[node->name()].push_back(gimple_code(stmt));
}
}
bbCnts[currFunName] = (bb_cnt);
if(dump_file){
if(funIndex == EOF){
fprintf(dump_file, "NOPRUNE: %s\n\n", removeSuffix(currFunName).c_str());
}else{
fprintf(dump_file, "%s: %s\n\n", isIdenticalFunction(currFunName, funIndex) ? "PRUNE" : "NOPRUNE", removeSuffix(currFunName).c_str());
}
}
return 0;
}
// Function recevie the name of current checking function
// Return the function name index in the vector if the vector has the same function name but it is not a resolver.
// Reutrn -1 if no function name match.
int findClonedFunction(std::string checkingFunName){
if(functions.empty()){
return -1;
}
size_t dotPos = checkingFunName.find('.');
std::string funName = checkingFunName.substr(0, dotPos);
std::string suffix = "";
if(dotPos != std::string::npos){
suffix = checkingFunName.substr(dotPos + 1);
}
for(size_t i = 0; i < functions.size(); ++i){
if(funName == functions[i] && suffix != "resolver"){
return i;
}
}
return -1;
}
bool isIdenticalFunction(std::string currFunName, int orgFunIndex){
fprintf(dump_file, "========== Comparing Original Function and the Cloned Function ==========\n");
fprintf(dump_file, "*** Compare Basic Block ***\n");
fprintf(dump_file, "Current Function Basic Block: %d\n"
"Original Function Basic Block: %d\n", bbCnts[currFunName], bbCnts[functions[orgFunIndex]]);
if(bbCnts[currFunName] != bbCnts[functions[orgFunIndex]]){
fprintf(dump_file, "*** Differnt amount of Basic Blocks ***\n");
return false;
}
fprintf(dump_file, "*** They have same amount of Basic Block ***\n");
fprintf(dump_file, "Cloned FUnction Gimple code(Current) | Orignal Function Gimple code\n");
for(size_t i = 0; i < gimpleCodes[currFunName].size(); ++i){
if(gimpleCodes[currFunName][i] == gimpleCodes[functions[orgFunIndex]][i]){
fprintf(dump_file, "%d | %d\n", gimpleCodes[currFunName][i], gimpleCodes[functions[orgFunIndex]][i]);
}else{
return false;
}
}
return true;
}
std::string removeSuffix(std::string fullName){
size_t dotPos = fullName.find('.');
return dotPos != std::string::npos ? fullName.substr(0, dotPos) : fullName;
}
};
}
//Custom pass creation function
gimple_opt_pass* make_pass_project(gcc::context* ctxt){
return new pass_project(ctxt);
}
The output in the dump file for the clone-text-x86-prune of the pass will look like the following:
Comments
Post a Comment