梯度下降实现SVM

1.SVM

与逻辑斯蒂回归，提升方法类似区别在于代理损失函数，逻辑斯蒂回归用的是逻辑斯蒂损失函数，提升方法使用的是指数损失函数，而SVM则使用的是合页损失函数。

分别记为：

1,exp(-yf(x))

2, log[1+exp(-yf(x))]

3, [1-yf(x)]+

(最后一个加号是下标，是非负的表示方法)

跟据上面的方法，使用梯度下降法可以写出如下python和R代码

思路见图

数据下载digits.csv

代码部分：

python

import numpy as np
import matplotlib.pyplot as plt
import math
np.random.seed(1)
def train(X,Y,num_iterations=1000,learning_rate=0.01,lamda=0.1):
    (n,p)= X.shape         #定义特征矩阵n个样本，p维特征
    p=p+1                  #加上常数项，特征维度加一
    X1 = np.hstack((np.ones(n).reshape((n, 1)),X))    #hstack函数能把
    Y = 2*Y-1       #Y由0和1的取值转化成正负一取值，即用来表示正样例和负样例
    beta = np.zeros(p).reshape(p,1)     #beta参数初始化   
    for i in range(num_iterations):     
        s = np.dot(X1,beta)         
        db = s*Y<1
        dbeta = np.dot(np.ones(n),np.tile(db*Y,(1,p))*X1)
#tile 这里表示将db*Y 在行方向表示一次，列方向表示p次，相当于列方向复制db*Y共p次
        beta = beta + (learning_rate*dbeta).reshape(p,1)
        beta[2:p]=beta[2:p]-lamda*beta[2:p]
    return(beta)
def getAccuracy(beta,X,Y):
    (n,p)= X.shape
    X1 = np.hstack((np.ones(n).reshape((n, 1)),X))
    p = np.dot(X1,beta)
    Y = 2*Y-1
    num = 0
    for i in range(n):
        if p[i]*Y[i]>0: num = num + 1
    accuracy = num/n
    return(accuracy)
# load data
def load_digits(subset=None, normalize=True):
    """
    Load digits and labels from digits.csv.
    Args:
        subset: A subset of digit from 0 to 9 to return.
                If not specified, all digits will be returned.
        normalize: Whether to normalize data values to between 0 and 1.
    Returns:
        digits: Digits data matrix of the subset specified.
                The shape is (n, p), where
                    n is the number of examples,
                    p is the dimension of features.
        labels: Labels of the digits in an (n, ) array.
                Each of label[i] is the label for data[i, :]
    """
    # load digits.csv, adopted from sklearn.
    import pandas as pd
    df = pd.read_csv('digits.csv')
    # only keep the numbers we want.
    if subset is not None:
        df = df[df.iloc[:,-1].isin(subset)]
    # convert to numpy arrays.
    digits = df.iloc[:,:-1].values.astype('float')
    labels = df.iloc[:,-1].values.astype('int')
    # Normalize digit values to 0 and 1.
    if normalize:
        digits -= digits.min()
        digits /= digits.max()
    # Change the labels to 0 and 1.
    for i in range(len(subset)):
        labels[labels == subset[i]] = i
    labels = labels.reshape((labels.shape[0], 1))
    return digits, labels
def split_samples(digits, labels):
    """Split the data into a training set (70%) and a testing set (30%)."""
    num_samples = digits.shape[0]
    num_training = round(num_samples * 0.7)
    indices = np.random.permutation(num_samples)
    training_idx, testing_idx = indices[:num_training], indices[num_training:]
    return (digits[training_idx], labels[training_idx],
            digits[testing_idx], labels[testing_idx])
#====================================
# Load digits and labels.
digits, labels = load_digits(subset=[3, 5], normalize=True)
training_digits, training_labels, testing_digits, testing_labels = split_samples(digits, labels)
print ('# training', training_digits.shape[0])
print ('# testing', testing_digits.shape[0])
# Train a net and display training accuracy.
beta = train(training_digits, training_labels)
training_accuracy = getAccuracy(beta, training_digits, training_labels)
testing_accuracy = getAccuracy(beta, testing_digits, testing_labels)
print ('Accuracy on training data: %f' % training_accuracy)
print ('Accuracy on testing data: %f' % testing_accuracy)

R语言部分

library(data.table) # allows us to use function fread,
# which quickly reads data from csv files 


train<-function(X,Y,num_iterations=1000,learning_rate=0.01,lamda=0.1)  #lamda is regularization coefficient
{
  n=dim(X)[1]
  p=dim(X)[2]+1
  X1=cbind(rep(1,n),X)
  Y=2*Y-1
  beta=matrix(rep(0,p),nrow=p)
  for (i in 1:num_iterations)
  {
    S=X1%*%beta
    db=S*Y<1
    #这里合理的矩阵的使用可以进行矩阵行列求和，矩阵行列逐个数相乘
    dbeta = matrix(rep(1,n),nrow=1)%*%(matrix(db*Y,n,p)*X1)

    beta=beta+learning_rate*t(dbeta)
    beta[2:p]=beta[2:p]-lamda*beta[2:p]
  }
 return(beta)
}


getAccuracy <- function(beta, X, Y)
{
  numSamples = dim(X)[1]
  X1 = cbind(rep(1, numSamples), X)
  p = sign(X1%*%beta)
  Y = 2*Y-1
  num = 0
  for(i in 1:numSamples)
  {
    if(p[i]==Y[i])
      num = num + 1
  }
  accuracy = num/numSamples
  return(accuracy)
}
# load data
load_digits <- function(subset=NULL, normalize=TRUE) {

  #Load digits and labels from digits.csv.

  #Args:
  #subset: A subset of digit from 0 to 9 to return.
  #If not specified, all digits will be returned.
  #normalize: Whether to normalize data values to between 0 and 1.

  #Returns:
  #digits: Digits data matrix of the subset specified.
  #The shape is (n, p), where
  #n is the number of examples,
  #p is the dimension of features.
  #labels: Labels of the digits in an (n, ) array.
  #Each of label[i] is the label for data[i, :]

  # load digits.csv, adopted from sklearn.

  df <- fread("digits.csv") 
  df <- as.matrix(df)

  ## only keep the numbers we want.
  if (length(subset)>0) {

    c <- dim(df)[2]
    l_col <- df[,c]
    index = NULL

    for (i in 1:length(subset)){

      number = subset[i]
      index = c(index,which(l_col == number))
    }
    sort(index)
    df = df[index,]
  }

  # convert to arrays.
  digits = df[,-1]
  labels = df[,c]

  # Normalize digit values to 0 and 1.
  if (normalize == TRUE) {
    digits = digits - min(digits)
    digits = digits/max(digits)}


  # Change the labels to 0 and 1.
  for (i in 1:length(subset)) {
    labels[labels == subset[i]] = i-1
  }

  return(list(digits, labels))

}

split_samples <- function(digits,labels) {

  # Split the data into a training set (70%) and a testing set (30%).

  num_samples <- dim(digits)[1]
  num_training <- round(num_samples*0.7)
  indices = sample(1:num_samples, size = num_samples)
  training_idx <- indices[1:num_training]
  testing_idx <- indices[-(1:num_training)]

  return (list(digits[training_idx,], labels[training_idx],
               digits[testing_idx,], labels[testing_idx]))
}

#====================================
# Load digits and labels.
result = load_digits(subset=c(1, 7), normalize=TRUE)
digits = result[[1]]
labels = result[[2]]

result = split_samples(digits,labels)
training_digits = result[[1]]
training_labels = result[[2]]
testing_digits = result[[3]]
testing_labels = result[[4]]

# print dimensions
length(training_digits)
length(testing_digits)

# Train a net and display training accuracy.
beta = train(training_digits, training_labels)

trainingaccuracy = getAccuracy(beta, training_digits, training_labels) 
testingaccuracy = getAccuracy(beta, testing_digits, testing_labels)