import pandas as pd
import numpy as np
from pandas import Series,DataFrame
df_baseline_1 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042219_1000.pcap_Flow.csv", header=0)
df_baseline_2 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00000_20190423090037.pcap_Flow.csv", header=0)
df_baseline_3 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00001_20190423112114.pcap_Flow.csv", header=0)
df_baseline_4 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00002_20190423142733.pcap_Flow.csv", header=0)
df_baseline_5 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00003_20190423172324.pcap_Flow.csv", header=0)
df_baseline_6 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00004_20190423204716.pcap_Flow.csv", header=0)
df_baseline_7 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00005_20190423234420.pcap_Flow.csv", header=0)
df_baseline_8 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00006_20190424024732.pcap_Flow.csv", header=0)
df_baseline_9 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042319_1000_00007_20190424060257.pcap_Flow.csv", header=0)
df_baseline_10 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00000_20190424090033.pcap_Flow.csv", header=0)
df_baseline_11 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00001_20190424120953.pcap_Flow.csv", header=0)
df_baseline_12 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00002_20190424152735.pcap_Flow.csv", header=0)
df_baseline_13 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00003_20190424184622.pcap_Flow.csv", header=0)
df_baseline_14 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00004_20190424212615.pcap_Flow.csv", header=0)
df_baseline_15 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00005_20190425004512.pcap_Flow.csv", header=0)
df_baseline_16 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00006_20190425033458.pcap_Flow.csv", header=0)
df_baseline_17 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/042419_1000_00007_20190425062020.pcap_Flow.csv", header=0)
df_baseline_18 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00000_20191024083134.pcap_Flow.csv", header=0)
df_baseline_19 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00001_20191024111356.pcap_Flow.csv", header=0)
df_baseline_20 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00002_20191024134814.pcap_Flow.csv", header=0)
df_baseline_21 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00003_20191024170045.pcap_Flow.csv", header=0)
df_baseline_22 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00004_20191024195649.pcap_Flow.csv", header=0)
df_baseline_23 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00005_20191024224425.pcap_Flow.csv", header=0)
df_baseline_24 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00006_20191025013841.pcap_Flow.csv", header=0)
df_baseline_25 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00007_20191025044455.pcap_Flow.csv", header=0)
df_baseline_26 = pd.read_csv("/home/z/Downloads/CUPID-Baselines-CICFlowMeter/102519_00008_20191025075121.pcap_Flow.csv", header=0)
df_baselines = pd.concat([df_baseline_1, df_baseline_2, df_baseline_3, df_baseline_4, df_baseline_5, df_baseline_6, df_baseline_7, df_baseline_8, df_baseline_9, df_baseline_10, df_baseline_11, df_baseline_12, df_baseline_13, df_baseline_14, df_baseline_15, df_baseline_16, df_baseline_17, df_baseline_18, df_baseline_19, df_baseline_20, df_baseline_21, df_baseline_22, df_baseline_23, df_baseline_24, df_baseline_25, df_baseline_26], axis=0)
df_baselines['Label']=0
df_baselines.to_csv(r'/home/z/Python/jupyter-notebooks/CUPID-Baselines-Labeled.csv')
df_human = pd.read_csv("/home/z/Python/jupyter-notebooks/CUPID-Human-Surfing-Queen.csv", header=0)
df_human['Label']=0
Import and Label Human DVWA
df_dvwa = pd.read_csv("/home/z/Python/jupyter-notebooks/CUPID-Human-DVWA-Queen.csv", header=0)
# Label based on the IP addresses. P2_DVWA and P10_DVWA have ip.src==10.10.10.2
def bad(x):
if x == '10.10.10.1':
return 1
if x == '10.10.10.2':
return 1
else:
return 0
# Apply the criteria to a new column called 'label'
df_dvwa['Label'] = df_dvwa['Src IP'].apply(bad)
df_human = pd.concat([df_human, df_dvwa])
df_human.to_csv(r'/home/z/Python/jupyter-notebooks/CUPID-Human-Labeled.csv')
df_auto = pd.read_csv("/home/z/Python/jupyter-notebooks/CUPID-Auto-Queen.csv", header=0)
# Label based on the IP addresses.
def bad(x):
if x == '10.10.10.13':
return 1
if x == '10.10.10.18':
return 1
if x == '10.10.10.19':
return 1
if x == '0.0.0.0':
return 1
else:
return 0
df_auto['Label'] = df_auto['Src IP'].apply(bad)
df_auto.to_csv(r'/home/z/Python/jupyter-notebooks/CUPID-Auto-Labeled.csv')
# Count the instances of malicious traffic to benign traffic
df_auto.groupby('Label').count()
Flow ID | Src IP | Src Port | Dst IP | Dst Port | Protocol | Timestamp | Flow Duration | Total Fwd Packet | Total Bwd packets | ... | Fwd Act Data Pkts | Fwd Seg Size Min | Active Mean | Active Std | Active Max | Active Min | Idle Mean | Idle Std | Idle Max | Idle Min | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Label | |||||||||||||||||||||
0 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | ... | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 | 74695 |
1 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | ... | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 | 114873 |
2 rows × 83 columns