structCluster { // A cluster of network flows vector<vector<int> > member; // svae the flows contained in each cluster. unordered_map<int, int> medoids2index; // save the position(index) of the medoids in the flow. vector<int> index2medoids; // mapping the medoids index to flow index. vector<bool> none_medoids;
} cluster_init;
// The global variable vector<pair<int, pair<double, double>> > flow; // network flow network_packet net_packet[10000]; // network packet
int size_packet = 0; int k; // nums of medoids
classKMedoids { /* This class is designed to calculate the k-Medoids clustering results for a set of data. */ private: vector<vector<double> > Distance; // the distance between two flow pairs. Cluster &cluster; // vector<pair<double, double> > net_flow; int K; public: //Constructors //KMedoids(); KMedoids(vector<pair<int, pair<double, double>> > &flow, int k, Cluster &clust) : cluster(clust) { for (constauto &it : flow) { net_flow.emplace_back(make_pair(it.second.first, it.second.second)); } Distance.resize(net_flow.size(), vector<double>(net_flow.size())); init_dist(); K = k; } voidinit_dist();
doubleget_dist(int flow1, int flow2); doubleget_cost(Cluster &cluster, int medoid_index, int calc_mem); doubletotal_cost();
boolregroup(); voidexchange(int, int);
boolK_medoids_algorithm(); };
void KMedoids::exchange(int medoids_index, int mem) { // exchange the medoids by mem. int medoids = cluster.index2medoids[medoids_index]; if (medoids == mem) return ; cluster.medoids2index.erase(medoids); cluster.medoids2index.insert({mem, medoids_index}); cluster.index2medoids[medoids_index] = mem;
bool KMedoids::K_medoids_algorithm() { bool changed = true; int N = net_flow.size(); regroup(); while (changed) { flag:; changed = false; double last_cost = total_cost();
for (int h = 0; h < N; ++ h) { if (!cluster.none_medoids[h]) continue; // assert h is not medoids. int min_med = 0, idx;
for (int i = 0; i < K; ++ i) { // bool is_medoids = false; // for (const auto & it : cluster.member[i]) { // if (it == h) is_medoids = true; // } // if (!is_medoids) continue; int last_med = cluster.index2medoids[i]; exchange(i, h); // replace i by h. regroup(); double cur_cost = total_cost(); if (cur_cost < last_cost) { changed = true; goto flag ; } else { exchange(i, last_med); regroup(); } } } } // end while. returntrue; }
void KMedoids::init_dist() {
for (int i = 0; i < net_flow.size(); ++ i) { for (int j = 0; j < net_flow.size(); ++ j) { Distance[i][j] = abs(net_flow[i].first - net_flow[j].first) + abs(net_flow[i].second - net_flow[j].second); } } }
double KMedoids::get_dist(int flow1, int flow2) { return Distance[flow1][flow2]; }
double KMedoids::get_cost(Cluster &cluster, int medoid_index, int calc_mem) { double cost = 0; for (constauto &mem : cluster.member[medoid_index]){ // Enumerates all members that belong to the medoid cluster cost += Distance[calc_mem][mem]; } return cost; }
double KMedoids::total_cost() { double cost = 0; int medoid; for (int i = 0; i < K; ++ i) { medoid = cluster.index2medoids[i]; cost += get_cost(cluster, i, medoid); } return cost; }
bool KMedoids::regroup() { // classify each net_flow . for (auto &a : cluster.member){ a.clear(); }
int med = 0; double min_dist; for (int i = 0; i < net_flow.size(); ++ i) { min_dist = INT_MAX; for (int j = 0; j < net_flow.size(); ++ j) { if (Distance[i][j] < min_dist) { if (!cluster.none_medoids[j]) // assert j is the medoids. { min_dist = Distance[i][j]; med = j; } } }// end inerfor cluster.member[cluster.medoids2index[med]].emplace_back(i); }// end for;
returntrue; }
intpreProcess(){ /* this preprocessor function used to extract network flow from network packet. rule: a network flow includes at least TWO packets with same source address, source port, destination address, destination port, and protocol, */ stable_sort(net_packet, net_packet + size_packet);
int l = 0, r = 0; // L and R are used to find the left and right boundaries of a network flow, respectively. while (r < size_packet) { while (r < size_packet - 1 && net_packet[l] == net_packet[r + 1]) ++ r; if (r > l) { int idx = l; double aver_trans_time = 0, aver_len = 0; int denom = r - l; while (l < r) { aver_trans_time += (net_packet[l + 1].arri_time - net_packet[l].arri_time); aver_len += net_packet[l].pack_len; ++ l; } aver_len += net_packet[r].pack_len; aver_trans_time /= denom; aver_len /= (denom + 1); flow.emplace_back(make_pair(net_packet[idx].arri_time, make_pair(aver_trans_time, aver_len))); } l = r = r + 1; } sort(flow.begin(), flow.end()); return0; }
boolread_packet_helper(conststring &filepath){ /* read the network packet from file1.txt . Save the data to a global structure variable: net_packet. */ ifstream infile(filepath, ios::in); if (!infile) returnfalse; string s; getline(infile, s); // Discard the first line of text