Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions include/Analysis/Clones/CloneFinder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "Analysis/Clones/HashDeepWalker.h"

#include <iostream>
#include <map>
#include <set>

namespace OPS {

namespace Clones {

class CloneFinder {
size_t MassThreshold;
double SimilarityThreshold;

public:
struct Clone {
int size;
set<shared_ptr<HashDeepWalker::SubTreeInfo>> refs;
Clone() {}
Clone(int s) :size(s), refs() {}
};
private:

map<size_t, Clone> clones;

int basicTreeCompare(shared_ptr <HashDeepWalker::SubTreeInfo> t1, shared_ptr <HashDeepWalker::SubTreeInfo> t2);
int isSeqSimilar(shared_ptr < HashDeepWalker::SubTreeInfo> t1, shared_ptr < HashDeepWalker::SubTreeInfo> t2);
bool isSimilar(shared_ptr <HashDeepWalker::SubTreeInfo> t1, shared_ptr <HashDeepWalker::SubTreeInfo> t2);
void cloneGeneralizing();
void eraseByHash(shared_ptr < HashDeepWalker::SubTreeInfo> n);
void eraseChildClones(shared_ptr < HashDeepWalker::SubTreeInfo> root);
void eraseSubClones();
void addClonePair(shared_ptr < HashDeepWalker::SubTreeInfo> s1, shared_ptr < HashDeepWalker::SubTreeInfo> s2);

public:
CloneFinder(size_t mt, double st) : MassThreshold(mt), SimilarityThreshold(st) {
clones = map<size_t, Clone>();
}

vector<Clone> getClones(TranslationUnit& unit, bool removeSubClones=false);
};

}
}
121 changes: 121 additions & 0 deletions include/Analysis/Clones/HashDeepWalker.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#pragma once

#include "Reprise/Reprise.h"
#include "Frontend/Frontend.h"
#include <OPS_Core/IO.h>
#include <Reprise/Service/DeepWalker.h>

#include<iostream>
#include<memory>

using namespace std;
using namespace OPS;
using namespace OPS::Reprise;

class HashDeepWalker : public Service::DeepWalker
{
public:
struct SubTreeInfo {
RepriseBase* node;
size_t hashCode = 0;
size_t subTreeSize=0;
shared_ptr<SubTreeInfo> parent;
vector<shared_ptr<SubTreeInfo>> children;
SubTreeInfo(RepriseBase* n):node(n) {}
SubTreeInfo(RepriseBase* n, int h, int s) : node(n), hashCode(h), subTreeSize(s),children() {}
};

protected:
size_t h = 0;
int size = 0;

shared_ptr<SubTreeInfo> currentNode;

vector< shared_ptr<SubTreeInfo>> nodes;

template<class Func>
void processNode(Func f, RepriseBase* n)
{
size_t th = h;
int ts = size;
h = 0;
size = 0;

shared_ptr <SubTreeInfo> tp = currentNode;
shared_ptr <SubTreeInfo> sti = make_shared<SubTreeInfo>(n);
currentNode = sti;

f();


sti->hashCode = h;
sti->subTreeSize = size;

nodes.push_back(sti);

if (tp != nullptr)
{
tp->children.push_back(sti);
}

size = ts + size;
size++;
size_t h2 = h << 1;
h = th + h2;
sti->parent = tp;
currentNode = tp;
}

public:
void visit(ProgramUnit&);
void visit(TranslationUnit&);

//void visit(ProgramFragment&);

void visit(Declarations&);
void visit(VariableDeclaration&);
void visit(TypeDeclaration&);
void visit(SubroutineDeclaration&);

void visit(BlockStatement&);
void visit(ForStatement&);
void visit(WhileStatement&);
void visit(IfStatement&);
void visit(PlainCaseLabel&);
void visit(PlainSwitchStatement&);
void visit(GotoStatement&);
void visit(ReturnStatement&);
void visit(ExpressionStatement&);
void visit(ASMStatement&);
void visit(EmptyStatement&);

void visit(BasicType&);
void visit(PtrType&);
void visit(TypedefType&);
void visit(ArrayType&);
void visit(StructMemberDescriptor& structMember);
void visit(StructType&);
void visit(EnumMemberDescriptor&);
void visit(EnumType&);
void visit(ParameterDescriptor&);
void visit(SubroutineType&);
void visit(DeclaredType&);
void visit(VectorType&);

void visit(BasicLiteralExpression&);
void visit(StrictLiteralExpression&);
void visit(CompoundLiteralExpression&);
void visit(ReferenceExpression&);
void visit(SubroutineReferenceExpression&);
void visit(StructAccessExpression&);
void visit(EnumAccessExpression&);
void visit(TypeCastExpression&);
void visit(BasicCallExpression&);
void visit(SubroutineCallExpression&);
void visit(EmptyExpression&);

map<size_t, vector<shared_ptr<HashDeepWalker::SubTreeInfo>>> getBuckets(int MassThreshold);

int getSize() { return size; }
int getHash() { return h; }
};
4 changes: 4 additions & 0 deletions source/Analysis/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ ops_filesin(INTERFACE "../../include/Analysis/"
CalculationGraph/CalculationGraphBase.h
CalculationGraph/CalculationGraphBuilder.h
CalculationGraph/ExpressionGraph.h
Clones/HashDeepWalker.h
Clones/CloneFinder.h
CallGraph.h
ComplexOccurrenceAnalysis/GrouppedOccurrences.h
ConsistCheck/Conditions.h
Expand Down Expand Up @@ -86,6 +88,8 @@ ops_project(
CalculationGraph/CalculationGraph.cpp
CalculationGraph/CalculationGraphBase.cpp
CalculationGraph/CalculationGraphBuilder.cpp
Clones/HashDeepWalker.cpp
Clones/CloneFinder.cpp
CallGraph/CallGraph.cpp
ComplexOccurrenceAnalysis/GrouppedOccurrences.cpp
ConsistCheck/Conditions.cpp
Expand Down
186 changes: 186 additions & 0 deletions source/Analysis/Clones/CloneFinder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#include "Analysis/Clones/HashDeepWalker.h"
#include "Analysis/Clones/CloneFinder.h"

#include <iostream>
#include <map>
#include <set>

namespace OPS {

namespace Clones {


int CloneFinder::basicTreeCompare(shared_ptr <HashDeepWalker::SubTreeInfo> t1, shared_ptr <HashDeepWalker::SubTreeInfo> t2)
{
int shared = 0;

if (t1->node->is_a<BlockStatement>() && t2->node->is_a<BlockStatement>())
{
return isSeqSimilar(t1,t2);
}
else if (typeid(*t1->node) == typeid(*t2->node))
{
shared++;
}

int minChildren = min(t1->children.size(), t2->children.size());
for (int i = 0; i < minChildren; i++)
shared += this->basicTreeCompare(t1->children[i], t2->children[i]);

return shared;
}

int CloneFinder::isSeqSimilar(shared_ptr < HashDeepWalker::SubTreeInfo> t1, shared_ptr < HashDeepWalker::SubTreeInfo> t2)
{
set<int> t1UsedInd = set<int>();
set<int> t2UsedInd = set<int>();

double sharedNodes = 0;

for (int i = 0; i < t1->children.size(); i++)
{
for (int j = 0; j < t2->children.size(); j++)
{
if (t1->children[i]->hashCode == t2->children[j]->hashCode)
{
sharedNodes += basicTreeCompare(t1->children[i], t2->children[j]);
t1UsedInd.insert(i);
t2UsedInd.insert(j);
break;
}

}
}

for (int i = 0; i < t1->children.size(); i++)
{
if (t1UsedInd.find(i) != t1UsedInd.end())
{
continue;
}

for (int j = 0; j < t2->children.size(); j++)
{
if (t2UsedInd.find(j) != t2UsedInd.end())
{
continue;
}

sharedNodes += basicTreeCompare(t1->children[i], t2->children[j]);
break;
}
}

return sharedNodes;
}

bool CloneFinder::isSimilar(shared_ptr <HashDeepWalker::SubTreeInfo> t1, shared_ptr <HashDeepWalker::SubTreeInfo> t2)
{
double sharedNodes = (double)basicTreeCompare(t1, t2);
double similarity = (2 * sharedNodes) / (2 * sharedNodes + (t1->subTreeSize - sharedNodes) + (t2->subTreeSize - sharedNodes));
return similarity > SimilarityThreshold;
}

void CloneFinder::cloneGeneralizing()
{
for (auto& clone : clones)
{
for (auto& i : clone.second.refs)
{
for (auto& j : clone.second.refs)
{
if (i != j)
{
if (i->parent && j->parent)
{
if (isSimilar(i->parent, j->parent))
{
if (clones.find(i->hashCode) == clones.end())
{
clones[i->hashCode] = Clone(i->subTreeSize);
}
clones[i->hashCode].refs.insert(i);
clones[i->hashCode].refs.insert(j);
clone.second.refs.erase(i);
clone.second.refs.erase(j);
}
}
}
}
}
}
}

void CloneFinder::eraseByHash(shared_ptr < HashDeepWalker::SubTreeInfo> n)
{
if (clones.find(n->hashCode) != clones.end())
{
clones[n->hashCode].refs.erase(n);
if (clones[n->hashCode].refs.size() == 1)
clones[n->hashCode].refs.clear();
}
eraseChildClones(n);
}

void CloneFinder::eraseChildClones(shared_ptr < HashDeepWalker::SubTreeInfo> root)
{
for (int i = 0; i < root->children.size(); i++)
eraseByHash(root->children[i]);
}

void CloneFinder::eraseSubClones()
{
for (auto& clone : clones)
{
for (auto& ref : clone.second.refs)
{
eraseChildClones(ref);
}
}
}

void CloneFinder::addClonePair(shared_ptr < HashDeepWalker::SubTreeInfo> s1, shared_ptr < HashDeepWalker::SubTreeInfo> s2)
{
if (clones.find(s1->hashCode) == clones.end())
{
clones[s1->hashCode] = Clone(s1->subTreeSize);
}
clones[s1->hashCode].refs.insert(s1);
clones[s1->hashCode].refs.insert(s2);
}

vector<CloneFinder::Clone> CloneFinder::getClones(TranslationUnit& unit, bool removeSubClones)
{
HashDeepWalker hdw;
hdw.visit(unit);
auto buckets = hdw.getBuckets(MassThreshold);

for (auto& bucket : buckets)
{
for (int i = 0; i < bucket.second.size(); i++)
{
for (int j = i + 1; j < bucket.second.size(); j++)
{
if (isSimilar(bucket.second[i], bucket.second[j]))
{
//cout << "Seems like clone found" << endl;
addClonePair(bucket.second[i], bucket.second[j]);
}
}
}
}

if (removeSubClones)
eraseSubClones();

vector<Clone> cloneLst = vector<Clone>();

for (auto& c : clones)
{
cloneLst.push_back(c.second);
}

return cloneLst;
}
}
}
Loading