支持向量机

it2022-05-07  5

 #不少文本引用自#http://blog.csdn.net/Felomeng/archive/2009/04/09/4058669.aspx#和其他等等地方from svm import *#一个有3个类的问题labels = [0, 1, 1, 2]samples = [0, 0], [0, 1], [1, 0], [1, 1]"""可以理解为这个图B-C| |A-B"""problem = svm_problem(labels, samples)#结构体svm_problem将问题形式化:#struct svm_problem#{#  int l;#  double *y;#  struct svm_node **x;#};# ##其中“l”表示训练数据的实例数,#而“y”是一个数组,用于存放它们的目标值。(类型值用整型数据,回归值用实数)#“x”是一个数组指针,每一个指针指向一个稀疏的训练向量(即一个svm_node数组)。##例如,如果我们有如下的训练数据:# ##LABEL   ATTR1    ATTR2    ATTR3    ATTR4    ATTR5#-----   -----    -----    -----    -----    -----#  1        0      0.1      0.2       0        0#  2        0      0.1      0.3     -1.2       0#  1        0.4     0        0        0        0#  2        0      0.1       0       1.4      0.5#  3       -0.1   -0.2      0.1      1.1      0.1# ##于是svm_problem的构成如下:#l = 5#y -> 1 2 1 2 3#x -> [ ] -> (2,0.1) (3,0.2) (-1,?)#     [ ] -> (2,0.1) (3,0.3) (4,-1.2) (-1,?)#     [ ] -> (1,0.4) (-1,?)#     [ ] -> (2,0.1) (4,1.4) (5,0.5) (-1,?)#     [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (-1,?)##其中(索引,值)存储在结构“svm_node”中:#struct svm_node{#  int index;#  double value;#};##当索引为-1时表示已经到达向量的末端。注意索引必须“升序”排列。size = len(samples)param = svm_parameter(C = 10, nr_weight = 2, weight_label = [1, 0], weight = [10, 1])#结构体svm_parameter描述了一个支持向量机的参数:#struct svm_parameter#{#      int svm_type;##svm_type可以是C_SVC、NU_SVC、ONE_CLASS、EPSILON_SVR或NU_SVR其中的一种。#C_SVC:         C-SVM 分类#NU_SVC:              nu-SVM 分类#ONE_CLASS:              one-class-SVM#EPSILON_SVR:    epsilon-SVM回归#NU_SVR:              nu-SVM回归##      int kernel_type;##kernel_type可以是LINEAR、POLY、RBF、SIGMOID其中一种。#LINEAR:       u'*v#POLY:     (gamma*u'*v + coef0)^degree#RBF:       exp(-gamma*|u-v|^2)#SIGMOID:     tanh(gamma*u'*v + coef0)#PRECOMPUTED:训练集文件中的核心值##      int degree;      /* for poly */#      double gamma;      /* for poly/rbf/sigmoid */#      double coef0;  /* for poly/sigmoid */##      /* these are for training only */#      double cache_size; /* in MB */#cache_size是核心缓存的大小,单位为MB。C是违背约束成本(惩罚值)。eps是结束条件。(一般地,我们在nu-SVC模型中使用0.00001,在其它模型中使用0.001)。nu是nu-SVM、nu-SVR与one-class-SVM中的一个参数。p 是epsilon-SVM回归对epsilon不敏感函数的epsilon值。shirnking = 1表示使用压缩,否则 = 0。 probability = 1表示得到带概率信息的模型,否则 = 0。##      double eps;     /* stopping criteria */##      double C;       /* for C_SVC, EPSILON_SVR, and NU_SVR */##      int nr_weight;        /* for C_SVC */#      int *weight_label;   /* for C_SVC */#      double* weight;            /* for C_SVC */#nr_weight、weight_label和weight(权重)用于改变某些类的惩罚因子(如果一个类的权重不变,则将权重值设定为1)这在使用不均衡输入数据或使用不均匀错误成本训练分类器时尤其有用。##nr_weight是数组weight_label和weight中的元素个数。每个weight[i]对应一个weight_label[i],表示类weight_label[i]的惩罚是由因子weight[i]确定的。##如果不想改变任何类的惩罚因子,那么只需要将nr_weight设定为0。##Z注:这个可以用来调整 召回率 和 准确率##      double nu;      /* for NU_SVC, ONE_CLASS, and NU_SVR */#      double p; /* for EPSILON_SVR */#      int shrinking;  /* use the shrinking heuristics */#      int probability; /* do probability estimates */#};## ##*注意*因为svm_model含有指向svm_problem的指针,如果仍要使用由svm_train()产生的svm_model,那么就不要释放svm_problem的内存。##*注意*为了避免错误的参数,在调用svm_train()之前应该先调用svm_check_parameter()。#这是几个经典核函数#有资料说 对于文本这样的高维数据 用简单的线性核函数就又快又好kernels = [LINEAR, POLY, RBF]kname = ['linear', 'polynomial', 'rbf']for name, k in zip(kname, kernels):    print "---"*10, "训练:", name, "---"*10    param.kernel_type = k    #这个函数根据给定的参数和训练数据构建并返回一个支持向量机模型。    model = svm_model(problem, param)    errors = 0    print "=== 分类 ==="    for i in range(size):        thing = samples[i]        #Ok,这就分类了        prediction = model.predict(thing)        print "%s -> %s"%(thing, prediction)        if (labels[i] != prediction):            errors = errors + 1    print "核函数 %s: 错误率 = %d / %d" % (kname[param.kernel_type], errors, size)    print ">>>"*10print "---"*10, "阴阳距:", name, "---"*10param = svm_parameter(kernel_type = RBF, C=10)model = svm_model(problem, param)print "%s" % (samples[1])print "类别数:", model.get_nr_class()#根据libsvm的原理,他是先对一个训练集进行建模,在高维映射空间中形成一个超平面。然后再输入测试集,再对测试集根据 f(x)=wx+b 公式进行计算,如果>0,则归为阳性,反之则为阴性。根据他这个原理,那么我每个输入的训练集都应该有个 f(x) 值。d = model.predict_values(samples[1])for i in model.get_labels():    for j in model.get_labels():        if j > i:            print "{%d, %d} = %9.5f" % (i, j, d[i, j])print "---"*10, "置信率:", name, "---"*10param = svm_parameter(kernel_type = RBF, C=10, probability = 1)model = svm_model(problem, param)pred_label, pred_probability = model.predict_probability(samples[1])print "%s" % (samples[1])print "predicted class: %d" % (pred_label)for i in model.get_labels():    print "类 %d 置信率 %f" % (i, pred_probability[i])print "总概率 为 %s"%sum(pred_probability.itervalues())#PRECOMPUTED: kernel values in training_set_file#从一个训练文件中读取的核函数print "---"*10, "文件中的核函数:", name, "---"*10samples = [1, 0, 0, 0, 0], [2, 0, 1, 0, 1], [3, 0, 0, 1, 1], [4, 0, 1, 1, 2]problem = svm_problem(labels, samples);param = svm_parameter(kernel_type=PRECOMPUTED,C = 10,nr_weight = 2,weight_label = [1,0],weight = [10,1])model = svm_model(problem, param)pred_label = model.predict(samples[0])print "%s %s"%(samples[0],pred_label)"""libsvm里面分类除了跟kernel_type有关还和许多其他参数有关,可以采用libsvm附带的easy.py工具调整参数,各个参数意义在包内的README文件里有说明,也可以参考http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf,此外,这个入门http://ntu.csie.org/~piaip/svm/svm_tutorial.html也不错http://huangbo929.blog.edu.cn/sort/?act=search&keyword=SVM这里也有不少不错的文章比如http://huangbo929.blog.edu.cn/2008/64686.htmlSVM String Kernel 核函数介绍 (2008-07-03 09:38) http://huangbo929.blog.edu.cn/2008/64689.html如何使用SVM基于Suffix Array的String Kernel核函数 (2008-07-03 10:07)http://huangbo929.blog.edu.cn/2008/64688.htmlthe fastest SVM learning algorithm:最快的SVM算法 (2008-07-03 10:04) http://huangbo929.blog.edu.cn/2008/64687.htmlTree Kernel SVM 介绍 (2008-07-03 09:41) 等等"""------------------------------------------------------------程序输出------------------------------------------------------------ ~/svm/libsvm_study/python $ python svm_test.py ------------------------------ 训练: linear ------------------------------.....*..*optimization finished, #iter = 22obj = -3.999999, rho = -0.999349nSV = 3, nBSV = 0*optimization finished, #iter = 1nu = 0.100000obj = -1.000000, rho = -1.000000nSV = 2, nBSV = 0*optimization finished, #iter = 2obj = -4.000000, rho = -3.000000nSV = 3, nBSV = 0Total nSV = 4=== 分类 ===[0, 0] -> 0.0[0, 1] -> 1.0[1, 0] -> 1.0[1, 1] -> 2.0核函数 linear: 错误率 = 0 / 4>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>------------------------------ 训练: polynomial ------------------------------*optimization finished, #iter = 2obj = -16.875000, rho = 0.375000nSV = 3, nBSV = 1*optimization finished, #iter = 1nu = 0.200000obj = -2.000000, rho = -1.000000nSV = 2, nBSV = 0.*.*optimization finished, #iter = 6obj = -2.461538, rho = -1.153547nSV = 3, nBSV = 0Total nSV = 4=== 分类 ===[0, 0] -> 1.0[0, 1] -> 1.0[1, 0] -> 1.0[1, 1] -> 2.0核函数 polynomial: 错误率 = 1 / 4>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>------------------------------ 训练: rbf ------------------------------..*.*optimization finished, #iter = 9obj = -4.247381, rho = 0.671181nSV = 3, nBSV = 0*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0.*.*optimization finished, #iter = 7obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 4=== 分类 ===[0, 0] -> 0.0[0, 1] -> 1.0[1, 0] -> 1.0[1, 1] -> 2.0核函数 rbf: 错误率 = 0 / 4>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>------------------------------ 阴阳距: rbf ------------------------------..*.*optimization finished, #iter = 9nu = 0.283131obj = -4.247381, rho = 0.671181nSV = 3, nBSV = 0*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 4[0, 1]类别数: 3{0, 1} =  -1.00013{0, 2} =   0.00000{1, 2} =   0.99936------------------------------ 置信率: rbf ------------------------------.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 3*optimization finished, #iter = 1nu = 0.254149obj = -2.541494, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 3*optimization finished, #iter = 1nu = 0.254149obj = -2.541494, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2..*.*optimization finished, #iter = 9nu = 0.283131obj = -4.247381, rho = 0.671181nSV = 3, nBSV = 0*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2*optimization finished, #iter = 1nu = 0.158198obj = -1.581977, rho = 0.000000nSV = 2, nBSV = 0.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 3*optimization finished, #iter = 1nu = 0.254149obj = -2.541494, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 3*optimization finished, #iter = 1nu = 0.254149obj = -2.541494, rho = 0.000000nSV = 2, nBSV = 0Total nSV = 2.*.*optimization finished, #iter = 7nu = 0.283090obj = -4.247381, rho = -0.671133nSV = 3, nBSV = 0Total nSV = 4[0, 1]predicted class: 0类 0 置信率 0.400236类 1 置信率 0.199806类 2 置信率 0.399958总概率 为 1.0------------------------------ 文件中的核函数: rbf ------------------------------.....*..*optimization finished, #iter = 22obj = -3.999999, rho = -0.999349nSV = 3, nBSV = 0*optimization finished, #iter = 1nu = 0.100000obj = -1.000000, rho = -1.000000nSV = 2, nBSV = 0*optimization finished, #iter = 2obj = -4.000000, rho = -3.000000nSV = 3, nBSV = 0Total nSV = 4[1, 0, 0, 0, 0] 0.0zuroc@aragorn ~/svm/

转载于:https://www.cnblogs.com/tankzhouqiang/archive/2009/11/17/1968350.html

相关资源:svm 支持向量机 回归 预测

最新回复(0)