ML and DL - Session 5 : Logistic Regression

※ Session 6 부터 정형화해서 업로드 예정입니다.

<이론>

<실습 1> Sigmoid_Function.py

import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model  import LogisticRegression
 
# data
x1 = np.array([0, 0.6, 1.1, 1.5, 1.8, 2.5, 3, 3.1, 3.9, 4, 4.9, 5, 5.1])
y1 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
 
x2 = np.array([3, 3.8, 4.4, 5.2, 5.5, 6.5, 6, 6.1, 6.9, 7, 7.9, 8, 8.1])
y2 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
# outcome can be 0 or 1, overlapping value exists
 
X = np.array(np.concatenate([x1, x2])).reshape(-1, 1)
y = np.concatenate([y1, y2])
 
print(X)
print(y)
 
# fitting
model = LogisticRegression() # Gradient Descent
model.fit(X, y)
 
print("b0 is: ", model.intercept_)
print("b1 is: ", model.coef_)
 
# plot
plt.plot(x1, y1, 'ro', color='blue')
plt.plot(x2, y2, 'ro', color='red')
 
# logistic function
def logistic(classifier, x):
    return 1 / (1 + np.exp(-(model.intercept_ + model.coef_ * x)))
 
 
# plot
for i in range(1, 120):
    plt.plot(i / 10.0 - 2, logistic(model, i / 10.0), 'ro', color='green')
 
plt.axis([-2, 10, -0.5, 2])
plt.show()
 
# prediction
pred = model.predict([[1]])
print("Prediction: ", pred)
pred = model.predict_proba([[1]])
print("Probability: ", pred)
Colored by Color Scripter

cs

 
코드 결과 및 분석

<실습 2> Credit_Scoring.py

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
 
# Data
credit_data = pd.read_csv("credit_data.csv")
 
print(credit_data.head())
print(credit_data.describe())
print(credit_data.corr())
 
features = credit_data[["income", "age", "loan"]] # x_1, x_2, x_3
target = credit_data.default # p(x)
 
# data split : 30% of the data-set is for testing, 70% of the data-set is for training
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.3)
 
# fitting
model = LogisticRegression() # Gradient Descent 방법으로 b parameter 구함
model.fit = model.fit(feature_train, target_train)
 
print(model.fit.coef_) # b1, b2, b3
print(model.fit.intercept_) # b0
 
 
# prediction
predictions = model.fit.predict(feature_test) # test data를 통해 예측값 구함
 
print(confusion_matrix(target_test, predictions))
print(accuracy_score(target_test, predictions))
 
Colored by Color Scripter

cs

실습 결과 및 분석

<실습 3> CrossValidation.py

import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
 
credit_data = pd.read_csv("credit_data.csv")
 
features = credit_data[["income", "age", "loan"]]
target = credit_data.default
 
# machine learning handle arrays not data-frames
X = np.array(features).reshape(-1, 3) # 2d array (3열)
y = np.array(target) # 1d array
 
model = LogisticRegression()
predicted = cross_validate(model, X, y, cv=5) # cv : fold 수
 
print(np.mean(predicted['test_score']))
# test_score 이외에 다른 기능도 있음 (scikit-learn.org 참조)

cs

ML and DL - Session 5 : Logistic Regression

<이론>

<실습 1> Sigmoid_Function.py

<실습 2> Credit_Scoring.py

<실습 3> CrossValidation.py

전체 카테고리

블로그 인기글

티스토리툴바

<이론>

<실습 1> Sigmoid_Function.py

<실습 2> Credit_Scoring.py

<실습 3> CrossValidation.py

전체 카테고리

최근 글

최근댓글

블로그 인기글

티스토리툴바