1.SVM
与逻辑斯蒂回归,提升方法类似区别在于代理损失函数,逻辑斯蒂回归用的是逻辑斯蒂损失函数,提升方法使用的是指数损失函数,而SVM则使用的是合页损失函数。
分别记为:
1,exp(-yf(x))
2, log[1+exp(-yf(x))]
3, [1-yf(x)]+
(最后一个加号是下标,是非负的表示方法)
跟据上面的方法,使用梯度下降法可以写出如下python和R代码
思路见图
数据下载digits.csv
代码部分:
python
|
|
R语言部分
library(data.table) # allows us to use function fread,
# which quickly reads data from csv files
train<-function(X,Y,num_iterations=1000,learning_rate=0.01,lamda=0.1) #lamda is regularization coefficient
{
n=dim(X)[1]
p=dim(X)[2]+1
X1=cbind(rep(1,n),X)
Y=2*Y-1
beta=matrix(rep(0,p),nrow=p)
for (i in 1:num_iterations)
{
S=X1%*%beta
db=S*Y<1
#这里合理的矩阵的使用可以进行矩阵行列求和,矩阵行列逐个数相乘
dbeta = matrix(rep(1,n),nrow=1)%*%(matrix(db*Y,n,p)*X1)
beta=beta+learning_rate*t(dbeta)
beta[2:p]=beta[2:p]-lamda*beta[2:p]
}
return(beta)
}
getAccuracy <- function(beta, X, Y)
{
numSamples = dim(X)[1]
X1 = cbind(rep(1, numSamples), X)
p = sign(X1%*%beta)
Y = 2*Y-1
num = 0
for(i in 1:numSamples)
{
if(p[i]==Y[i])
num = num + 1
}
accuracy = num/numSamples
return(accuracy)
}
# load data
load_digits <- function(subset=NULL, normalize=TRUE) {
#Load digits and labels from digits.csv.
#Args:
#subset: A subset of digit from 0 to 9 to return.
#If not specified, all digits will be returned.
#normalize: Whether to normalize data values to between 0 and 1.
#Returns:
#digits: Digits data matrix of the subset specified.
#The shape is (n, p), where
#n is the number of examples,
#p is the dimension of features.
#labels: Labels of the digits in an (n, ) array.
#Each of label[i] is the label for data[i, :]
# load digits.csv, adopted from sklearn.
df <- fread("digits.csv")
df <- as.matrix(df)
## only keep the numbers we want.
if (length(subset)>0) {
c <- dim(df)[2]
l_col <- df[,c]
index = NULL
for (i in 1:length(subset)){
number = subset[i]
index = c(index,which(l_col == number))
}
sort(index)
df = df[index,]
}
# convert to arrays.
digits = df[,-1]
labels = df[,c]
# Normalize digit values to 0 and 1.
if (normalize == TRUE) {
digits = digits - min(digits)
digits = digits/max(digits)}
# Change the labels to 0 and 1.
for (i in 1:length(subset)) {
labels[labels == subset[i]] = i-1
}
return(list(digits, labels))
}
split_samples <- function(digits,labels) {
# Split the data into a training set (70%) and a testing set (30%).
num_samples <- dim(digits)[1]
num_training <- round(num_samples*0.7)
indices = sample(1:num_samples, size = num_samples)
training_idx <- indices[1:num_training]
testing_idx <- indices[-(1:num_training)]
return (list(digits[training_idx,], labels[training_idx],
digits[testing_idx,], labels[testing_idx]))
}
#====================================
# Load digits and labels.
result = load_digits(subset=c(1, 7), normalize=TRUE)
digits = result[[1]]
labels = result[[2]]
result = split_samples(digits,labels)
training_digits = result[[1]]
training_labels = result[[2]]
testing_digits = result[[3]]
testing_labels = result[[4]]
# print dimensions
length(training_digits)
length(testing_digits)
# Train a net and display training accuracy.
beta = train(training_digits, training_labels)
trainingaccuracy = getAccuracy(beta, training_digits, training_labels)
testingaccuracy = getAccuracy(beta, testing_digits, testing_labels)