300x250
※ Session 6 부터 정형화해서 업로드 예정입니다.
<이론>
<실습 1> Sigmoid_Function.py
import numpy as np
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
# data
x1 = np.array([0, 0.6, 1.1, 1.5, 1.8, 2.5, 3, 3.1, 3.9, 4, 4.9, 5, 5.1])
y1 = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
x2 = np.array([3, 3.8, 4.4, 5.2, 5.5, 6.5, 6, 6.1, 6.9, 7, 7.9, 8, 8.1])
y2 = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
# outcome can be 0 or 1, overlapping value exists
X = np.array(np.concatenate([x1, x2])).reshape(-1, 1)
y = np.concatenate([y1, y2])
print(X)
print(y)
# fitting
model = LogisticRegression() # Gradient Descent
model.fit(X, y)
print("b0 is: ", model.intercept_)
print("b1 is: ", model.coef_)
# plot
plt.plot(x1, y1, 'ro', color='blue')
plt.plot(x2, y2, 'ro', color='red')
# logistic function
def logistic(classifier, x):
return 1 / (1 + np.exp(-(model.intercept_ + model.coef_ * x)))
# plot
for i in range(1, 120):
plt.plot(i / 10.0 - 2, logistic(model, i / 10.0), 'ro', color='green')
plt.axis([-2, 10, -0.5, 2])
plt.show()
# prediction
pred = model.predict([[1]])
print("Prediction: ", pred)
pred = model.predict_proba([[1]])
print("Probability: ", pred)
|
cs |
- 코드 결과 및 분석
<실습 2> Credit_Scoring.py
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
# Data
credit_data = pd.read_csv("credit_data.csv")
print(credit_data.head())
print(credit_data.describe())
print(credit_data.corr())
features = credit_data[["income", "age", "loan"]] # x_1, x_2, x_3
target = credit_data.default # p(x)
# data split : 30% of the data-set is for testing, 70% of the data-set is for training
feature_train, feature_test, target_train, target_test = train_test_split(features, target, test_size=0.3)
# fitting
model = LogisticRegression() # Gradient Descent 방법으로 b parameter 구함
model.fit = model.fit(feature_train, target_train)
print(model.fit.coef_) # b1, b2, b3
print(model.fit.intercept_) # b0
# prediction
predictions = model.fit.predict(feature_test) # test data를 통해 예측값 구함
print(confusion_matrix(target_test, predictions))
print(accuracy_score(target_test, predictions))
|
cs |
- 실습 결과 및 분석
<실습 3> CrossValidation.py
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_validate
credit_data = pd.read_csv("credit_data.csv")
features = credit_data[["income", "age", "loan"]]
target = credit_data.default
# machine learning handle arrays not data-frames
X = np.array(features).reshape(-1, 3) # 2d array (3열)
y = np.array(target) # 1d array
model = LogisticRegression()
predicted = cross_validate(model, X, y, cv=5) # cv : fold 수
print(np.mean(predicted['test_score']))
# test_score 이외에 다른 기능도 있음 (scikit-learn.org 참조)
|
cs |
728x90
최근댓글