Home » Mortgage Default Demo Mortgage Default Demo
View the training data
Exploratory data analysis
Correlation chart
View the predicted data
import asyncio
#import pyodide
import numpy as np
import pandas as pd
import panel as pn
import matplotlib.pyplot as plt
import seaborn as sns
from pyodide.http import open_url
from panel.io.pyodide import show
print('Downloading the training and test data . . .')
data_train = pd.read_csv(open_url('/mtgdefault.csv'))
#data_train.sample(3)
df = pd.DataFrame(data_train)
print('Downloading the validation data . . .')
data_test = pd.read_csv(open_url('/mtgdefaulttest.csv'))
print('View the training and test data')
#
source_table = pn.widgets.Tabulator(pagination='local', page_size=10)
#source_table = pn.widgets.Tabulator(pagination='remote', layout='fit_data_stretch', width=330, page_size=10)
source_table.value = data_train
#new
filename, button = source_table.download_menu(
text_kwargs={'name': 'Enter filename', 'value': 'default.csv'},
button_kwargs={'name': 'Download table'}
)
source_table = pn.Row(
pn.Column(filename, button),
source_table
)
#new end
document.getElementById('source_table').style.display = 'block'
await show(source_table, 'source_table')
print('View the correlation matrix')
import matplotlib.pyplot as plt
import numpy as np
x = np.random.randn(100)
y = np.random.randn(100)
fig, ax = plt.subplots()
ax.scatter(x, y)
fig
await show(fig, 'fig')
from sklearn.model_selection import train_test_split
X_all = data_train.drop(['outcome', 'ID'], axis=1)
y_all = data_train['outcome']
num_test = 0.20
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=num_test, random_state=23)
print('Split of training and test data complete . . .')
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.model_selection import GridSearchCV
# Choose the type of classifier.
clf = RandomForestClassifier()
#old parameters, very accurate
# Choose some parameter combinations to try
#parameters = {'n_estimators': [4, 6, 9],
# 'max_features': ['log2', 'sqrt','auto'],
# 'criterion': ['entropy', 'gini'],
# 'max_depth': [2, 3, 5, 10],
# 'min_samples_split': [2, 3, 5],
# 'min_samples_leaf': [1,5,8]
# }
# Choose some parameter combinations to try
parameters = {'n_estimators': [2],
'max_features': ['auto'],
'criterion': ['entropy'],
'max_depth': [1],
'min_samples_split': [2],
'min_samples_leaf': [5]
}
# Type of scoring used to compare parameter combinations
acc_scorer = make_scorer(accuracy_score)
print('Running the model on training data . . .')
# Run the grid search
grid_obj = GridSearchCV(clf, parameters, scoring=acc_scorer)
grid_obj = grid_obj.fit(X_train, y_train)
# Set the clf to the best combination of parameters
clf = grid_obj.best_estimator_
# Fit the best algorithm to the data.
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(accuracy_score(y_test, predictions))
print('Making predictions on new data that does not have mortgage default status yet')
ids = data_test['ID']
cred = data_test['Credit_score']
predictions = clf.predict(data_test.drop('ID', axis=1))
output = pd.DataFrame({ 'ID' : ids, 'Credit_score' : cred, 'outcome': predictions })
#outputcsv = output.to_csv('predictions.csv', index = False)
#print(output.head())
table = pn.widgets.Tabulator(pagination='remote', page_size=10)
table.value = pd.DataFrame({ 'ID' : ids, 'Credit_score' : cred, 'outcome': predictions })
document.getElementById('table').style.display = 'block'
await show(table, 'table')
#await show(pred, 'pred')