diff --git a/iids/classifier/mlclassifiers.py b/iids/classifier/mlclassifiers.py index 8c60e5a..fe044d3 100644 --- a/iids/classifier/mlclassifiers.py +++ b/iids/classifier/mlclassifiers.py @@ -1,12 +1,103 @@ #import all the modules below from sklearn import tree +import json +from sklearn.ensemble import RandomForestClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.ensemble import AdaBoostClassifier +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.gaussian_process import GaussianProcessClassifier +import joblib +from main.management.commands import * +#import all necessay packages and all modules +# +# remove false from save model +class DecisionTree(): + def __init__(self, data, features_list="all", criterion="gini", splitter="best", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, presort='deprecated', ccp_alpha=0.0): + self.features_list = features_list + if self.features_list == "all": + self.data = data + else: + self.data = data[features_list] -class DecisionTree: + def train(self): + + model.fit(X_train,y_train) + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + #return False + return True - def __init__(self, data, features_list="all", criterion="gini", splitter="best", max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, class_weight=None, presort='deprecated', ccp_alpha=0.0): + +class RandomForest(): + + def __init__(self,data, features_list="all",criterion="gini", min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features="auto", max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, n_jobs=-1, verbose=1, ccp_alpha=0.0, max_samples=None, n_estimators=100, max_depth=2,random_state=0,class_weight='balanced'): + self.features_list = features_list + if self.features_list == "all": + self.data = data + else: + self.data = data[features_list] + + def train(self): + + model.fit(X_train,y_train) #model will be declared as global variable in views.py + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + return False + return True + + + +class KNeighbors(): + def __init__(self,data, features_list="all",algorithm='auto', leaf_size=30, metric='minkowski',metric_params=None, n_jobs=None, n_neighbors=3, p=2, weights='uniform'): + self.features_list = features_list + if self.features_list == "all": + self.data = data + else: + self.data = data[features_list] + + def train(self): + + model.fit(X_train,y_train) + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + return False + return True + + + +class GaussianProcess(): + def __init__(self,data, features_list="all",copy_X_train=True,max_iter_predict=100, multi_class='one_vs_rest',n_jobs=None, n_restarts_optimizer=0,optimizer='fmin_l_bfgs_b', random_state=None,warm_start=False): self.features_list = features_list if self.features_list == "all": self.data = data @@ -14,8 +105,76 @@ def __init__(self, data, features_list="all", criterion="gini", splitter="best", self.data = data[features_list] + + def train(self): + + model.fit(X_train,y_train) + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + return False + return True +class AdaBoost(): + def __init__(self,data, features_list="all",algorithm='SAMME.R', base_estimator=None, learning_rate=1.0, n_estimators=50, random_state=None): + self.features_list = features_list + if self.features_list == "all": + self.data = data + else: + self.data = data[features_list] - def train(self): + + def train(self): + + model.fit(X_train,y_train) + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + return False + return True + + +class MLP(): + def __init__(self,data, features_list="all",activation='relu', alpha=1, batch_size='auto', beta_1=0.9,beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(100,), learning_rate='constant', learning_rate_init=0.001, max_fun=15000, max_iter=1000,momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='adam',tol=0.0001, validation_fraction=0.1, verbose=False,warm_start=False): + self.features_list = features_list + if self.features_list == "all": + self.data = data + else: + self.data = data[features_list] + + + + def train(self): + + model.fit(X_train,y_train) + score = model.score(X_test, y_test) + print(f'The score by {model} is {score}') + return model, score + + def save_model(self,model,filename): + try: + joblib.dump(model,filename) + print("Model saved to the disk") + except Exception as e: + raise IOError("Error saving model data to disk: {}".format(str(e))) + return False + return True + diff --git a/iids/classifier/nnclassifiers.py b/iids/classifier/nnclassifiers.py new file mode 100644 index 0000000..f4d994a --- /dev/null +++ b/iids/classifier/nnclassifiers.py @@ -0,0 +1,195 @@ +#neural net classifier +import torch +import torch.nn as nn +import torch.optim as optim +from torch.autograd import Variable + + +class CNN(nn.Module): + def __init__(self, in_dim =1, n_class =23): + super(CNN, self).__init__() + + self.conv = nn.Sequential( + nn.Conv2d(in_dim, 6, 3, stride=1, padding=1), + nn.BatchNorm2d(6), + nn.ReLU(True), + nn.Conv2d(6, 16, 3, stride=1, padding=0), + nn.BatchNorm2d(16), + nn.ReLU(True), + nn.MaxPool2d(2, 2) + ) + + self.fc = nn.Sequential( + nn.Linear(144, 512), + nn.Linear(512, 256), + nn.Linear(256, n_class) + ) + + def forward(self, x): + out = self.conv(x) + out = out.view(out.size(0), -1) + out = self.fc(out) + return out + + +class RNN(nn.Module): + + def __init__(self, input_dim =1, hidden_dim= 3, layer_dim =3, output_dim=1): + super(RNN, self).__init__() + self.hidden_dim = hidden_dim + self.layer_dim = layer_dim + self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True, nonlinearity='relu') + + self.fc = nn.Linear(hidden_dim, output_dim) + + def forward(self, x): + + h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)) + + + out, hn = self.rnn(x, h0) + out = self.fc(out[:, -1, :]) + return out + + + + +class Autoencoder(nn.Module): + def __init__(self): + super(Autoencoder,self).__init__() + + self.encoder = nn.Sequential( + nn.Conv2d(3, 6, kernel_size=5), + nn.ReLU(True), + nn.Conv2d(6,16,kernel_size=5), + nn.ReLU(True)) + self.decoder = nn.Sequential( + nn.ConvTranspose2d(16,6,kernel_size=5), + nn.ReLU(True), + nn.ConvTranspose2d(6,3,kernel_size=5), + nn.ReLU(True)) + + def forward(self,x): + x = self.encoder(x) + x = self.decoder(x) + return x + + +class SOM(nn.Module): + + def __init__(self, m, n, dim, niter, alpha=None, sigma=None): + super(SOM, self).__init__() + self.m = m + self.n = n + self.dim = dim + self.niter = niter + if alpha is None: + self.alpha = 0.3 + else: + self.alpha = float(alpha) + if sigma is None: + self.sigma = max(m, n) / 2.0 + else: + self.sigma = float(sigma) + + self.weights = torch.randn(m*n, dim) + self.locations = torch.LongTensor(np.array(list(self.neuron_locations()))) + self.pdist = nn.PairwiseDistance(p=2) + + def get_weights(self): + return self.weights + + def get_locations(self): + return self.locations + + def neuron_locations(self): + for i in range(self.m): + for j in range(self.n): + yield np.array([i, j]) + + def map_vects(self, input_vects): + to_return = [] + for vect in input_vects: + min_index = min([i for i in range(len(self.weights))], + key=lambda x: np.linalg.norm(vect-self.weights[x])) + to_return.append(self.locations[min_index]) + + return to_return + + def forward(self, x, it): + dists = self.pdist(torch.stack([x for i in range(self.m*self.n)]), self.weights) + _, bmu_index = torch.min(dists, 0) + bmu_loc = self.locations[bmu_index,:] + bmu_loc = bmu_loc.squeeze() + + +# Neural network parameters " would be collected from JSON Config" +batch_size = 128 +learning_rate = 1e-2 +num_epoches = 5 +USE_GPU = torch.cuda.is_available() + +model = CNN(1, 23) #would be set in config +#model = SOM(1, 23) +#model = RNN(1, 23) +#model = AE(1, 23) + +def neural_train(): + + global model + + if USE_GPU: + model = model.cuda() + + criterion = nn.CrossEntropyLoss() + optimizer = optim.SGD(model.parameters(), lr=learning_rate) + + for epoch in range(num_epoches): + print('epoch {}'.format(epoch + 1)) + print('*' * 10) + running_loss = 0.0 + running_acc = 0.0 + for i, data in enumerate(dataset.train_dataloader, 1): + img, label = data + if USE_GPU: + img = img.cuda() + label = label.cuda() + img = Variable(img) + label = Variable(label) + # Spread forward + out = model(img) + loss = criterion(out, label) + running_loss += loss.item() * label.size(0) + _, pred = torch.max(out, 1) + num_correct = (pred == label).sum() + accuracy = (pred == label).float().mean() + running_acc += num_correct.item() + # Spread backward + optimizer.zero_grad() + loss.backward() + optimizer.step() + + print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format( + epoch + 1, running_loss / (len(dataset.train_dataset)), running_acc / (len( + dataset.train_dataset)))) + model.eval() + eval_loss = 0 + eval_acc = 0 + for data in dataset.test_dataloader: + img, label = data + if USE_GPU: + img = Variable(img, volatile=True).cuda() + label = Variable(label, volatile=True).cuda() + else: + img = Variable(img, volatile=True) + label = Variable(label, volatile=True) + out = model(img) + loss = criterion(out, label) + eval_loss += loss.item() * label.size(0) + _, pred = torch.max(out, 1) + num_correct = (pred == label).sum() + eval_acc += num_correct.item() + print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len( + dataset.test_dataset)), eval_acc / (len(dataset.test_dataset)))) + torch.save(model, filepath) + print() diff --git a/iids/iids/settings.py b/iids/iids/settings.py index 609fe4c..b8a2213 100644 --- a/iids/iids/settings.py +++ b/iids/iids/settings.py @@ -37,6 +37,10 @@ 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', + 'rest_framework', + 'classifier', + 'preprocessor', + 'main', ] MIDDLEWARE = [ diff --git a/iids/iids/urls.py b/iids/iids/urls.py index 6b9d60f..109bd2c 100644 --- a/iids/iids/urls.py +++ b/iids/iids/urls.py @@ -15,7 +15,8 @@ """ from django.contrib import admin from django.urls import path - +from main.views import * urlpatterns = [ path('admin/', admin.site.urls), + ] diff --git a/iids/main/__init__.py b/iids/main/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/iids/main/admin.py b/iids/main/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/iids/main/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/iids/main/apps.py b/iids/main/apps.py new file mode 100644 index 0000000..833bff6 --- /dev/null +++ b/iids/main/apps.py @@ -0,0 +1,5 @@ +from django.apps import AppConfig + + +class MainConfig(AppConfig): + name = 'main' diff --git a/iids/main/management/commands/get_data.py b/iids/main/management/commands/get_data.py new file mode 100644 index 0000000..e90b2ba --- /dev/null +++ b/iids/main/management/commands/get_data.py @@ -0,0 +1,115 @@ +from django.core.management.base import BaseCommand, CommandError +from preprocessor.preprocessors import * +import pandas as pd +from django.shortcuts import render +from django.http import HttpResponse +from django.http import JsonResponse +import torch +from rest_framework.decorators import api_view +from rest_framework.views import APIView +from rest_framework.response import Response +import pandas as pd +from preprocessor.preprocessors import * +from classifier.mlclassifiers import * +from classifier.nnclassifiers import * +import shutil +import os +#import all from others + +class Command(BaseCommand): + + help = 'provide the path of dataset and config.json ' + + def add_arguments(self, parser): + parser.add_argument('-d', '--dataset', type=dir , help='Provide the dataset for training') + parser.add_argument('-c', '--config', type=dir, help='Provide the config file of Model') + parser.add_argument('-m', '--model_type ', type=str, help='Mention the type of model "ml" if machine learning is to be used and "nn" if neural networks are to be used' ) + parser.add_argument('-i', '--input', type=list, help='Provide the input data for prediction', ) + + + def handle(self, *args, **kwargs): + global dataset_path, config_path, model_type + dataset_path = kwargs['dataset'] + config_path = kwargs['config'] + model_type = kwargs['model_type'] + input_data = kwargs['input'] + + data = pd.read_csv(dataset_path) + Preprocessor(data) + if 'input' in input_data: + predict(input_data) + shutil.make_archive(model_config, 'zip',config_path) + return dataset_path, config_path, model_type + + + + + +def config(dir): + global response,model + with open(dir,"r") as f: + data = json.load(f) + if data['model_type'] == 'nn': + if data['model_name'] == 'CNN': + model= CNN() + elif data['model_name'] == 'RNN': + model = RNN() + elif data['model_name'] == 'Autoencoder': + model = Autoencoder() + else: + response = 'Model doesn\'t exist' + + elif data['model_type'] == 'ml': + if data['model_name'] == 'DecisionTree': + model = DecisionTree() + elif data['model_name'] == 'RandomForest': + model = RandomForest() + elif data['model_name'] == 'KNeighbors': + model = KNeighbors() + elif data['model_name'] == 'GaussianProcess': + model = GaussianProcess() + elif data['model_name'] == 'AdaBoost': + model = AdaBoost() + elif data['model_name'] == 'MLP': + model = MLP() + else: + response = 'Model doesn\'t exist' + else: + response = 'Model Type not choosen' + + model.train() + model.save_model(model, config_path) + zip_file = zipfile.ZipFile("/local/my_files/my_file.zip", "w") + + return model, zip_file + + + + + + + + +#@api_view(['GET']) +def predict(data ): + global model + config(config_path) + + if data['model_type'] == 'nn': + model = torch.load(config_path) + _data = dataset.encode(input_data) + _data = torch.from_numpy(np.pad(_data, (0, 64 - len(_data)), 'constant').astype(np.float32)).reshape(-1, 1, 8, 8).cuda() + _out = int(torch.max(model(_data).data, 1)[1].cpu().numpy()) + response = dataset.decode(_out, label=True) + + elif data['model_type'] == 'ml': + model = load_model(config_path) + response = model.predict() + else: + response = 'Model Type not choosen' + + output = {'prediction': response} + + return JsonResponse(output) + + diff --git a/iids/main/migrations/__init__.py b/iids/main/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/iids/main/models.py b/iids/main/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/iids/main/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/iids/main/tests.py b/iids/main/tests.py new file mode 100644 index 0000000..7ce503c --- /dev/null +++ b/iids/main/tests.py @@ -0,0 +1,3 @@ +from django.test import TestCase + +# Create your tests here. diff --git a/iids/main/views.py b/iids/main/views.py new file mode 100644 index 0000000..f659703 --- /dev/null +++ b/iids/main/views.py @@ -0,0 +1,89 @@ +from django.shortcuts import render +from django.http import HttpResponse +from django.http import JsonResponse +import torch +from rest_framework.decorators import api_view +from rest_framework.views import APIView +from rest_framework.response import Response +import pandas as pd +from preprocessor.preprocessors import * +from classifier.mlclassifiers import * +from classifier.nnclassifiers import * +# Create your views here. +# pylint: disable=E1120 +""" +global data +data = pd.read_csv(a) + + + +output ={} +a=Preprocessor(data) + + +def config(dir): + global response + with open(dir,"r") as f: + data = json.load(f) + if data['model_type'] == 'nn': + if data['model_name'] == 'CNN': + model= CNN() + elif data['model_name'] == 'RNN': + model = RNN() + elif data['model_name'] == 'Autoencoder': + model = Autoencoder() + else: + response = 'Model doesn\'t exist' + + elif data['model_type'] == 'ml': + if data['model_name'] == 'DecisionTree': + model = DecisionTree() + elif data['model_name'] == 'RandomForest': + model = RandomForest() + elif data['model_name'] == 'KNeighbors': + model = KNeighbors() + elif data['model_name'] == 'GaussianProcess': + model = GaussianProcess() + elif data['model_name'] == 'AdaBoost': + model = AdaBoost() + elif data['model_name'] == 'MLP': + model = MLP() + else: + response = 'Model doesn\'t exist' + else: + response = 'Model Type not choosen' + + model.train() + model.save_model(model, config_path) + + return model + + + + + + + + +@api_view(['GET']) +def predict(get_response ): + global model + config(config_path) + input_data = [] #Should be provided with cUrl get Request or by using Postman + if data['model_type'] == 'nn': + model = torch.load(config_path) + _data = dataset.encode(input_data) + _data = torch.from_numpy(np.pad(_data, (0, 64 - len(_data)), 'constant').astype(np.float32)).reshape(-1, 1, 8, 8).cuda() + _out = int(torch.max(model(_data).data, 1)[1].cpu().numpy()) + response = dataset.decode(_out, label=True) + + elif data['model_type'] == 'ml': + model = load_model(config_path) + response = model.predict() + else: + response = 'Model Type not choosen' + + output = {'prediction': response} + + return JsonResponse(output) +""" diff --git a/iids/preprocessor/preprocessors.py b/iids/preprocessor/preprocessors.py index 42438f3..72d2962 100644 --- a/iids/preprocessor/preprocessors.py +++ b/iids/preprocessor/preprocessors.py @@ -1,5 +1,6 @@ -#Import all the modules from sklearn.preprocessing import LabelEncoder +import torch +from sklearn.model_selection import train_test_split class Preprocessor: @@ -7,12 +8,12 @@ class Preprocessor: def __init__(self, data): self.data = data - def removeNullValues(self): + def removeNullValues(self,data): "This method is used to remove null values from the dataset" return self.data.dropna() - - def labelEncoder(self, columns):i + + def labelEncoder(self, columns): "Label Encoder that converts categorical data" le = LabelEncoder() @@ -20,3 +21,57 @@ def labelEncoder(self, columns):i self.data[str(each)] = le.fit_transform(df[str(each)]) return self.data + + def train_test(self,data): + global X_train,X_test,y_train,y_test + X_train,X_test,y_train,y_test = train_test_split(X,y,random_state = 42,test_size = 0.4) + + return data + + #for Neural Networks + + #"""Convert the string part of the data into numbers, and convert the input 41-dimensional features into an 8*8 matrix """ + def __encode_data(self, data_X, data_y): + self._encoder['protocal'].fit(list(set(data_X[:, 1]))) + self._encoder['service'].fit(list(set(data_X[:, 2]))) + self._encoder['flag'].fit((list(set(data_X[:, 3])))) + self._encoder['label'].fit(list(set(data_y))) + data_X[:, 1] = self._encoder['protocal'].transform(data_X[:, 1]) + data_X[:, 2] = self._encoder['service'].transform(data_X[:, 2]) + data_X[:, 3] = self._encoder['flag'].transform(data_X[:, 3]) + data_X = np.pad(data_X, ((0, 0), (0, 64 - len(data_X[0]))), 'constant').reshape(-1, 1, 8, 8) + data_y = self._encoder['label'].transform(data_y) + return data_X, data_y + + #"""Split the data into training and test sets, and convert to TensorDataset object""" + def __split_data_to_tensor(self, data_X, data_y): + X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.3) + train_dataset = TensorDataset( + torch.from_numpy(X_train.astype(np.float32)), + torch.from_numpy(y_train.astype(np.int)) + ) + test_dataset = TensorDataset( + torch.from_numpy(X_test.astype(np.float32)), + torch.from_numpy(y_test.astype(np.int)) + ) + return train_dataset, test_dataset + + def decode(self, data, label=False): + if not label: + _data = list(data) + _data[1] = self._encoder['protocal'].inverse_transform([_data[1]])[0] + _data[2] = self._encoder['service'].inverse_transform([_data[2]])[0] + _data[2] = self._encoder['flag'].inverse_transform([_data[3]])[0] + return _data + return self._encoder['label'].inverse_transform(data) + + def encode(self, data, label=False): + if not label: + _data = list(data) + _data[1] = self._encoder['protocal'].transform([_data[1]])[0] + _data[2] = self._encoder['service'].transform([_data[2]])[0] + _data[3] = self._encoder['flag'].transform([_data[3]])[0] + return _data + return self._encoder['label'].transform([data])[0] + +