robbiecarroll.com

GIS | Data Science & Analysis | Coding & Automation | Deep Learning

Python Example 2

DATA

#Robert Carroll
#DSC615 Spring 2024

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math 

#import the data
data_path = r'mypath\data_banknote_authentication.csv'
df = pd.read_csv (data_path)
#print(df)

variance = df['variance']
skewness = df['skewness']
kurtosis = df['kurtosis']
entropy = df['entropy']
counterfeit = df['counterfeit']

# SEPERATE THE CURRENCIES & CALCULATE MEAN & STANDARD DEV__________________________________________________

#Seperate the real currency in to a group

real_df = df.loc[df['counterfeit'] == 0]
#means
avg_rl_vari = real_df['variance'].mean()
avg_rl_skew = real_df['skewness'].mean()
avg_rl_kurt = real_df['kurtosis'].mean()
avg_rl_entr = real_df['entropy'].mean()
#standard deviations
std_rl_vari = real_df['variance'].std()
std_rl_skew = real_df['skewness'].std()
std_rl_kurt = real_df['kurtosis'].std()
std_rl_entr = real_df['entropy'].std()

#Seperate the counterfeit currency in to a group
counterfeit_df = df.loc[df['counterfeit'] == 1]

#means
avg_ct_vari = counterfeit_df['variance'].mean()
avg_ct_skew = counterfeit_df['skewness'].mean()
avg_ct_kurt = counterfeit_df['kurtosis'].mean()
avg_ct_entr = counterfeit_df['entropy'].mean()
#standard deviations
std_ct_vari = counterfeit_df['variance'].std()
std_ct_skew = counterfeit_df['skewness'].std()
std_ct_kurt = counterfeit_df['kurtosis'].std()
std_ct_entr = counterfeit_df['entropy'].std()

# PROBABILITIES, FORMULAS, & ACCURACY__________________________________________________

#Euclidian distances (SKIPPED AS PER PROFESSOR BECAUSE I AM GRAD STUDENT)
euclid_dist_rl = np.sqrt(((variance - avg_rl_vari)**2) + ((skewness - avg_rl_skew)**2) + (kurtosis - avg_rl_kurt)**2 + (entropy - avg_rl_entr)**2)
euclid_dist_ct = np.sqrt((variance - avg_ct_vari)**2 + (skewness - avg_ct_skew)**2 + (kurtosis - avg_ct_kurt)**2 + (entropy - avg_ct_entr)**2)

#Gaussian distances & probabilities real
prob_gauss_vari_rl = (1/(std_rl_vari * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((variance-avg_rl_vari)/std_rl_vari)**2))
prob_gauss_skew_rl = (1/(std_rl_skew * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((skewness-avg_rl_skew)/std_rl_skew)**2))
prob_gauss_kurt_rl = (1/(std_rl_kurt * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((kurtosis-avg_rl_kurt)/std_rl_kurt)**2))
prob_gauss_entr_rl = (1/(std_rl_entr * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((entropy-avg_rl_entr)/std_rl_entr)**2))
probability_real = prob_gauss_vari_rl * prob_gauss_skew_rl * prob_gauss_kurt_rl * prob_gauss_entr_rl

#Gaussian distances & probabilities counterfeit
prob_gauss_vari_ct = (1/(std_ct_vari * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((variance-avg_ct_vari)/std_ct_vari)**2))
prob_gauss_skew_ct = (1/(std_ct_skew * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((skewness-avg_ct_skew)/std_ct_skew)**2))
prob_gauss_kurt_ct = (1/(std_ct_kurt * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((kurtosis-avg_ct_kurt)/std_ct_kurt)**2))
prob_gauss_entr_ct = (1/(std_ct_entr * np.sqrt(2 * np.pi))) * (np.e**((-1/2) * ((entropy-avg_ct_entr)/std_ct_entr)**2))
probability_counterfeit = prob_gauss_vari_ct * prob_gauss_skew_ct * prob_gauss_kurt_ct * prob_gauss_entr_ct

#make prediction
counterfeit_prediction = []
for i in range(0, len(probability_real)):
    prob_real = probability_real[i]
    prob_counterfeit = probability_counterfeit[i]
    if prob_real > prob_counterfeit:
        counterfeit_prediction.append(0) 
    else:
        counterfeit_prediction.append(1)         

#accuracy
accuracy = sum(1 for x,y in zip(counterfeit,counterfeit_prediction) if x == y) / len(counterfeit)
accuracy = round(accuracy*100, 2)
#print(accuracy)

# RECLASSIFY__________________________________________________

#classify correct and incorrect predictions
new_classifications = []
for i in range(0, len(counterfeit)):
    count_pred = counterfeit_prediction[i]
    count = counterfeit[i]   
    
    #correct prediction of real
    if count == 0 and count_pred == 0:
        new_classifications.append(0) 
    
    #correct prediction of counterfeit
    if count == 1 and count_pred == 1:
        new_classifications.append(1)
    
    #incorrect prediction of real
    if count == 1 and count_pred == 0:
        new_classifications.append(2)
    
    #incorrect prediction of counterfeit
    if count == 0 and count_pred == 1:
        new_classifications.append(3) 
        
#turn the classification list into an array
new_classifications = np.array(new_classifications)

# GRAPH AND PLOT STUFF__________________________________________________
                                                                        
#plots information
plots_data = [(variance, skewness), (variance, kurtosis), (variance, entropy), (skewness, kurtosis), (skewness, entropy), (kurtosis, entropy)]
plots_labels = [('variance', 'skewness'), ('variance', 'kurtosis'), ('variance', 'entropy'), ('skewness', 'kurtosis'), ('skewness', 'entropy'), ('kurtosis', 'entropy')]
plot_titles = ['variance vs skewness', 'variance vs kurtosis', 'variance vs entropy', 'skewness vs kurtosis', 'skewness vs entropy', 'kurtosis vs entropy']

#plot the data in the list
for i in range(0, len(plots_data)):
    
    #var tuples
    plot_data = plots_data[i]
    plots_label = plots_labels[i]
    plot_title = plot_titles[i]
    
    #turn the tuples into individual vectors
    x = plot_data[0]
    y = plot_data[1]
    x_label = plots_label[0]
    y_label = plots_label[1]

    #create plot graph
    fig, ax = plt.subplots(1, figsize=(10,10))
    plt.grid(True)  
    plt.xlabel(x_label, fontsize=14)
    plt.ylabel(y_label, fontsize=14)
    plt.title(f'{plot_title}, accuracy {accuracy} %', fontsize=17)
    
    #plot real centroid
    real_df = df.loc[df['counterfeit'] == 0]
    centroidx_real = real_df[x_label].mean()
    centroidy_real = real_df[y_label].mean()
    
    #plot counterfeir centroid
    counterfeit_df = df.loc[df['counterfeit'] == 1]
    centroidx_counterfeit = counterfeit_df[x_label].mean()
    centroidy_counterfeit = counterfeit_df[y_label].mean()
        
    # assign categories & use colormap
    colormap = np.array(['#8888FF', '#FF8888', '#FFCCCC', '#CCCCFF'])

    #plot x & y values  
    plt.scatter(x, y, c=colormap[new_classifications], s=80)
    
    #plot real centroid
    plt.scatter(centroidx_real, centroidy_real, s=300, marker ="x", color = 'black', label = 'real')
    plt.text(centroidx_real, centroidy_real-1.5, 'real', ha = 'center', va = 'bottom', fontsize=17, color='black', weight="bold")

    #plot counterfeit centroid
    plt.scatter(centroidx_counterfeit, centroidy_counterfeit, s=300, marker ="x", color = 'black', label = 'counterfeit')
    plt.text(cen

© 2024 Robert Carroll