import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import Accuracy

bank=pd.read_csv('/Users/mekki/Python_Projects_Datasets/Bank Loan Approval Prediction With Artificial Neural Nets/UniversalBank.csv')
bank

bank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  5000 non-null   int64  
 1   Age                 5000 non-null   int64  
 2   Experience          5000 non-null   int64  
 3   Income              5000 non-null   int64  
 4   ZIP Code            5000 non-null   int64  
 5   Family              5000 non-null   int64  
 6   CCAvg               5000 non-null   float64
 7   Education           5000 non-null   int64  
 8   Mortgage            5000 non-null   int64  
 9   Personal Loan       5000 non-null   int64  
 10  Securities Account  5000 non-null   int64  
 11  CD Account          5000 non-null   int64  
 12  Online              5000 non-null   int64  
 13  CreditCard          5000 non-null   int64  
dtypes: float64(1), int64(13)
memory usage: 547.0 KB

bank.describe().T

bank.isnull().sum().sum()

0

x=bank['Age'].mean().round(2)
print(x, 'years old')

45.34 years old

creditcard=bank[bank['CreditCard'] == 1]
len(creditcard)

1470

creditcardperc=len(creditcard)/len(bank)*100
print('% of customers who have credit cards is', creditcardperc, '%')

% of customers who have credit cards is 29.4 %

personalloans=bank[bank['Personal Loan'] == 1]
len(personalloans)

480

personalloansperc=len(personalloans)/len(bank)*100
print('% of customers who accepted the personal lian is', personalloansperc, '%')

% of customers who accepted the personal lian is 9.6 %

colors=['#95BB8F','#2B6E75']
ax=sns.countplot(data=bank, x='Personal Loan', palette=colors)
total = len(bank['Personal Loan'])
for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_height() / total)
    text_color = 'black' if np.average(p.get_facecolor()[:3]) > 0.5 else 'white'
    ax.annotate(percentage, (p.get_x() + p.get_width() / 2., p.get_height() / 2),
                ha='center', va='center', color=text_color, xytext=(0, 0), textcoords='offset points')
plt.show()

colors=['#95BB8F','#2B6E75','#2D335D']
ax=sns.countplot(data=bank, x='Education', palette=colors)
total_education = len(bank['Education'])
for p in ax.patches:
    percentage_education = 100 * p.get_height() / total_education
    text_color = 'black' if np.average(p.get_facecolor()[:3]) > 0.5 else 'white'
    ax.annotate(f'{percentage_education:.1f}%', (p.get_x() + p.get_width() / 2., p.get_height() / 2),
                ha='center', va='center', color=text_color, xytext=(0, 10), textcoords='offset points')

plt.show()

plt.figure(figsize=(20,6))
sns.countplot(data=bank, x='Age', palette='crest')

<Axes: xlabel='Age', ylabel='count'>

colors=['#95BB8F','#2B6E75']
ax=sns.countplot(data=bank, x='CreditCard', palette=colors)
total_education = len(bank['CreditCard'])
for p in ax.patches:
    percentage_education = 100 * p.get_height() / total_education
    text_color = 'black' if np.average(p.get_facecolor()[:3]) > 0.5 else 'white'
    ax.annotate(f'{percentage_education:.1f}%', (p.get_x() + p.get_width() / 2., p.get_height() / 2),
                ha='center', va='center', color=text_color, xytext=(0, 10), textcoords='offset points')
plt.show()

sns.distplot(bank['Income'], color='#95BB8F', kde_kws={'color': '#2B6E75'})

<Axes: xlabel='Income', ylabel='Density'>

no_personalloans = bank[bank['Personal Loan'] == 0]

personalloans

no_personalloans

no_personalloans.describe().T

sns.boxplot(data=bank, x='Personal Loan', y='Income', color='#95BB8F')
sns.swarmplot(x='Personal Loan', y='Income', data=bank, color='black', alpha=0.5)

<Axes: xlabel='Personal Loan', ylabel='Income'>

sns.boxplot(data=bank, x='Personal Loan', y='Income', color='#95BB8F')
plt.title('A')
plt.xlabel('B')
plt.ylabel('C')
plt.show()

plt.figure(figsize=(10, 6))

colors=['#95BB8F','#2B6E75']
sns.violinplot(x='Personal Loan', y='Income', data=bank, inner='quartile', palette=colors)

plt.title('A')
plt.xlabel('B')
plt.ylabel('C')
plt.show()

plt.figure(figsize=(10,6))
sns.distplot(personalloans['Income'], color='#95BB8F')
sns.distplot(no_personalloans['Income'], color='#2B6E75')

<Axes: xlabel='Income', ylabel='Density'>

g = sns.PairGrid(bank, diag_sharey=False)
g.map_upper(sns.scatterplot, color='#2D335D')
g.map_lower(sns.scatterplot, color='#95BB8F')
g.map_diag(sns.histplot, color='#2B6E75')

<seaborn.axisgrid.PairGrid at 0x174d39f50>

plt.figure(figsize=(20,20))
corrmat=bank.corr()
sns.heatmap(corrmat, annot=True, cmap='crest')
plt.title('Correlation Matrix')
plt.show()

sns.distplot(bank['CCAvg'], color='#95BB8F')
plt.title('Distribution of the Average Credit Card Spending')
plt.show()

plt.figure(figsize=(10,6))
sns.distplot(personalloans['CCAvg'], color='#95BB8F')
sns.distplot(no_personalloans['CCAvg'], color='#2B6E75')

<Axes: xlabel='CCAvg', ylabel='Density'>

bank.columns

Index(['ID', 'Age', 'Experience', 'Income', 'ZIP Code', 'Family', 'CCAvg',
       'Education', 'Mortgage', 'Personal Loan', 'Securities Account',
       'CD Account', 'Online', 'CreditCard'],
      dtype='object')

bank.shape

(5000, 14)

x=bank.drop(columns = ['Personal Loan'])
x

y=bank['Personal Loan']
y

0       0
1       0
2       0
3       0
4       0
       ..
4995    0
4996    0
4997    0
4998    0
4999    0
Name: Personal Loan, Length: 5000, dtype: int64

from tensorflow.keras.utils import to_categorical
y=to_categorical(y)
y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

from sklearn import metrics
from sklearn.preprocessing import StandardScaler, MinMaxScaler

scaler_x=StandardScaler()
x=scaler_x.fit_transform(x)

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

x_train.shape, x_test.shape, y_train.shape, y_test.shape

((4500, 13), (500, 13), (4500, 2), (500, 2))

#Create Keras Sequential model
ANN_model = keras.Sequential()

#Add Dense Layer
ANN_model.add(Dense(250, input_dim=13, kernel_initializer='normal', activation='relu'))

ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(500, activation='relu'))

ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(500, activation='relu'))

ANN_model.add(Dropout(0.3))
ANN_model.add(Dense(500, activation='relu'))

ANN_model.add(Dropout(0.4))
ANN_model.add(Dense(250, activation='linear'))

ANN_model.add(Dropout(0.4))

#Add Dense Layer with Softmax Activation
ANN_model.add(Dense(2, activation = 'softmax'))
#2 neurons because y is categorical

ANN_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense (Dense)               (None, 250)               3500      
                                                                 
 dropout (Dropout)           (None, 250)               0         
                                                                 
 dense_1 (Dense)             (None, 500)               125500    
                                                                 
 dropout_1 (Dropout)         (None, 500)               0         
                                                                 
 dense_2 (Dense)             (None, 500)               250500    
                                                                 
 dropout_2 (Dropout)         (None, 500)               0         
                                                                 
 dense_3 (Dense)             (None, 500)               250500    
                                                                 
 dropout_3 (Dropout)         (None, 500)               0         
                                                                 
 dense_4 (Dense)             (None, 250)               125250    
                                                                 
 dropout_4 (Dropout)         (None, 250)               0         
                                                                 
 dense_5 (Dense)             (None, 2)                 502       
                                                                 
=================================================================
Total params: 755752 (2.88 MB)
Trainable params: 755752 (2.88 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________

ANN_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])

history = ANN_model.fit(x_train, y_train, epochs = 20, validation_split=0.2, verbose = 1)

Epoch 1/20
113/113 [==============================] - 1s 4ms/step - loss: 0.1831 - accuracy: 0.9311 - val_loss: 0.1161 - val_accuracy: 0.9533
Epoch 2/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0981 - accuracy: 0.9636 - val_loss: 0.0829 - val_accuracy: 0.9678
Epoch 3/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0853 - accuracy: 0.9703 - val_loss: 0.0851 - val_accuracy: 0.9644
Epoch 4/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0736 - accuracy: 0.9739 - val_loss: 0.0906 - val_accuracy: 0.9644
Epoch 5/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0690 - accuracy: 0.9747 - val_loss: 0.0712 - val_accuracy: 0.9733
Epoch 6/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0652 - accuracy: 0.9767 - val_loss: 0.0910 - val_accuracy: 0.9678
Epoch 7/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0567 - accuracy: 0.9822 - val_loss: 0.0787 - val_accuracy: 0.9722
Epoch 8/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0618 - accuracy: 0.9792 - val_loss: 0.0647 - val_accuracy: 0.9722
Epoch 9/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0583 - accuracy: 0.9808 - val_loss: 0.0645 - val_accuracy: 0.9756
Epoch 10/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0499 - accuracy: 0.9836 - val_loss: 0.0686 - val_accuracy: 0.9733
Epoch 11/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0548 - accuracy: 0.9833 - val_loss: 0.0731 - val_accuracy: 0.9733
Epoch 12/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0479 - accuracy: 0.9842 - val_loss: 0.0608 - val_accuracy: 0.9744
Epoch 13/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0373 - accuracy: 0.9867 - val_loss: 0.0813 - val_accuracy: 0.9733
Epoch 14/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0401 - accuracy: 0.9869 - val_loss: 0.0865 - val_accuracy: 0.9778
Epoch 15/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0438 - accuracy: 0.9878 - val_loss: 0.0851 - val_accuracy: 0.9767
Epoch 16/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0381 - accuracy: 0.9839 - val_loss: 0.0706 - val_accuracy: 0.9744
Epoch 17/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0415 - accuracy: 0.9861 - val_loss: 0.0811 - val_accuracy: 0.9733
Epoch 18/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0355 - accuracy: 0.9881 - val_loss: 0.0734 - val_accuracy: 0.9722
Epoch 19/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0274 - accuracy: 0.9900 - val_loss: 0.0954 - val_accuracy: 0.9722
Epoch 20/20
113/113 [==============================] - 1s 6ms/step - loss: 0.0358 - accuracy: 0.9881 - val_loss: 0.0875 - val_accuracy: 0.9711

plt.plot(history.history['loss'], color='#95BB8F' )
plt.plot(history.history['val_loss'], color='#2B6E75')
plt.legend(['train_loss','val_loss'], loc='upper right')
plt.show()

#Make Predictions
predictions=ANN_model.predict(x_test)

#Append the index of max value using argmax function
predict=[]
for i in predictions:
    predict.append(np.argmax(i))

16/16 [==============================] - 0s 1ms/step

result=ANN_model.evaluate(x_test, y_test)

print('Accuracy : {}'.format(result[1]))

16/16 [==============================] - 0s 1ms/step - loss: 0.0691 - accuracy: 0.9800
Accuracy : 0.9800000190734863

y_original = []

for i in y_test:
    y_original.append(np.argmax(i))

confusion_matrix = metrics.confusion_matrix(y_original, predict)
sns.heatmap(confusion_matrix, annot=True, cmap='crest')

<Axes: >

from sklearn.metrics import classification_report
print(classification_report(y_original, predict))

              precision    recall  f1-score   support

           0       0.98      0.99      0.99       453
           1       0.93      0.85      0.89        47

    accuracy                           0.98       500
   macro avg       0.96      0.92      0.94       500
weighted avg       0.98      0.98      0.98       500

	count	mean	std	min	25%	50%	75%	max
ID	5000.0	2500.500000	1443.520003	1.0	1250.75	2500.5	3750.25	5000.0
Age	5000.0	45.338400	11.463166	23.0	35.00	45.0	55.00	67.0
Experience	5000.0	20.104600	11.467954	-3.0	10.00	20.0	30.00	43.0
Income	5000.0	73.774200	46.033729	8.0	39.00	64.0	98.00	224.0
ZIP Code	5000.0	93152.503000	2121.852197	9307.0	91911.00	93437.0	94608.00	96651.0
Family	5000.0	2.396400	1.147663	1.0	1.00	2.0	3.00	4.0
CCAvg	5000.0	1.937938	1.747659	0.0	0.70	1.5	2.50	10.0
Education	5000.0	1.881000	0.839869	1.0	1.00	2.0	3.00	3.0
Mortgage	5000.0	56.498800	101.713802	0.0	0.00	0.0	101.00	635.0
Personal Loan	5000.0	0.096000	0.294621	0.0	0.00	0.0	0.00	1.0
Securities Account	5000.0	0.104400	0.305809	0.0	0.00	0.0	0.00	1.0
CD Account	5000.0	0.060400	0.238250	0.0	0.00	0.0	0.00	1.0
Online	5000.0	0.596800	0.490589	0.0	0.00	1.0	1.00	1.0
CreditCard	5000.0	0.294000	0.455637	0.0	0.00	0.0	1.00	1.0

	count	mean	std	min	25%	50%	75%	max
ID	4520.0	2512.165487	1448.299331	1.0	1259.75	2518.5	3768.25	5000.0
Age	4520.0	45.367257	11.450427	23.0	35.00	45.0	55.00	67.0
Experience	4520.0	20.132301	11.456672	-3.0	10.00	20.0	30.00	43.0
Income	4520.0	66.237389	40.578534	8.0	35.00	59.0	84.00	224.0
ZIP Code	4520.0	93152.428761	2156.949654	9307.0	91911.00	93437.0	94608.00	96651.0
Family	4520.0	2.373451	1.148771	1.0	1.00	2.0	3.00	4.0
CCAvg	4520.0	1.729009	1.567647	0.0	0.60	1.4	2.30	8.8
Education	4520.0	1.843584	0.839975	1.0	1.00	2.0	3.00	3.0
Mortgage	4520.0	51.789381	92.038931	0.0	0.00	0.0	98.00	635.0
Personal Loan	4520.0	0.000000	0.000000	0.0	0.00	0.0	0.00	0.0
Securities Account	4520.0	0.102212	0.302961	0.0	0.00	0.0	0.00	1.0
CD Account	4520.0	0.035841	0.185913	0.0	0.00	0.0	0.00	1.0
Online	4520.0	0.595796	0.490792	0.0	0.00	1.0	1.00	1.0
CreditCard	4520.0	0.293584	0.455454	0.0	0.00	0.0	1.00	1.0

	ID	Age	Experience	Income	ZIP Code	Family	CCAvg	Education	Mortgage	Personal Loan	Securities Account	CD Account	Online	CreditCard
0	1	25	1	49	91107	4	1.6	1	0	0	1	0	0	0
1	2	45	19	34	90089	3	1.5	1	0	0	1	0	0	0
2	3	39	15	11	94720	1	1.0	1	0	0	0	0	0	0
3	4	35	9	100	94112	1	2.7	2	0	0	0	0	0	0
4	5	35	8	45	91330	4	1.0	2	0	0	0	0	0	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4995	4996	29	3	40	92697	1	1.9	3	0	0	0	0	1	0
4996	4997	30	4	15	92037	4	0.4	1	85	0	0	0	1	0
4997	4998	63	39	24	93023	2	0.3	3	0	0	0	0	0	0
4998	4999	65	40	49	90034	3	0.5	2	0	0	0	0	1	0
4999	5000	28	4	83	92612	3	0.8	1	0	0	0	0	1	1

	ID	Age	Experience	Income	ZIP Code	Family	CCAvg	Education	Mortgage	Personal Loan	Securities Account	CD Account	Online	CreditCard
9	10	34	9	180	93023	1	8.9	3	0	1	0	0	0	0
16	17	38	14	130	95010	4	4.7	3	134	1	0	0	0	0
18	19	46	21	193	91604	2	8.1	3	0	1	0	0	0	0
29	30	38	13	119	94104	1	3.3	2	0	1	0	1	1	1
38	39	42	18	141	94114	3	5.0	3	0	1	1	1	1	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4883	4884	38	13	129	92646	3	4.1	3	0	1	0	1	1	1
4927	4928	43	19	121	94720	1	0.7	2	0	1	0	1	1	1
4941	4942	28	4	112	90049	2	1.6	2	0	1	0	0	1	0
4962	4963	46	20	122	90065	3	3.0	3	0	1	0	1	1	1
4980	4981	29	5	135	95762	3	5.3	1	0	1	0	1	1	1