'''
案例:事件预测----加载event.txt,预测某个时间段是否会出现特殊事件。步骤如下:
1.数据预处理:
1>.读取文件,加载data数组,删除索引为1的列
2>.针对每一列做编码,离散数据使用LabelEncoder,连续的数字数据使用DigitEncoder(需要自定义),编码器需要保存
3>.整理数据集,划分测试集和训练集
2.训练SVM模型分类器
3.对测试集进行预测
4.自定义测试数据,实现事件预测
5.画图
'''
import numpy as np
import matplotlib.pyplot as mp
import sklearn.preprocessing as sp
import sklearn.model_selection as sm
import sklearn.svm as svm
import warnings
warnings.filterwarnings('ignore')
class DigitEncoder:
# 自定义编码器:针对数字字符串做标签编码
def fit_transform(self, y):
return y.astype(
'i4')
def transform(self, y):
return y.astype(
'i4')
def inverse_transform(self, y):
return y.astype(
'str')
data =
[]
with open('./ml_data/event.txt',
'r') as f:
for line
in f.readlines():
data.append(line[:-1].split(
','))
data =
np.array(data)
# 删除第二列
data = np.delete(data, 1, axis=1
)
print(data.shape)
# 整理输入集与输出集
encoders, x, y =
[], [], []
data =
data.T
for row
in range(len(data)):
# 判断每个特征值是否为数字
if data[row][0].isdigit():
encoder =
DigitEncoder()
else:
encoder =
sp.LabelEncoder()
if row < len(data) - 1
:
x.append(encoder.fit_transform(data[row]))
else:
y =
encoder.fit_transform(data[row])
encoders.append(encoder)
x =
np.array(x).T
# print(x)
# print(y)
# 拆分测试集与训练集
train_x, test_x, train_y, test_y = sm.train_test_split(x, y, test_size=0.25, random_state=7
)
# 交叉验证
model = svm.SVC(kernel=
'rbf', class_weight=
'balanced')
scores = sm.cross_val_score(model, train_x, train_y, cv=5, scoring=
'f1_weighted')
print(
'交叉验证平均得分:', scores.mean())
model.fit(train_x, train_y)
# 测试集测试
pred_test_y =
model.predict(test_x)
print(
'预测精度:', (test_y == pred_test_y).sum() /
test_y.size)
# 对测试数据进行测试
data = [[
'Tuesday',
'13:30:00',
'21',
'23'],
['Thursday',
'13:30:00',
'21',
'23']]
# 对测试数据进行编码
data =
np.array(data).T
test_x =
[]
for row
in range(len(data)):
encoder =
encoders[row]
test_x.append(encoder.transform(data[row]))
test_x =
np.array(test_x).T
# print(test_x)
pred_test_y =
model.predict(test_x)
pred_test_y = encoders[-1
].inverse_transform(pred_test_y)
print(
'预测结果为: ', pred_test_y)
输出结果:
(5040, 5
)
交叉验证平均得分: 0.9458699461165295
预测精度: 0.9476190476190476
预测结果为: ['noevent' 'noevent']
'''
案例:交通流量预测(回归)。步骤如下:
1.数据预处理:
1>.读取文件,加载data数组,删除索引为1的列
2>.针对每一列做编码,离散数据使用LabelEncoder,连续的数字数据使用DigitEncoder(需要自定义),编码器需要保存
3>.整理数据集,划分测试集和训练集
2.训练SVM模型回归器
3.对测试集进行预测
4.自定义测试数据,实现事件预测
5.画图
'''
import numpy as np
import matplotlib.pyplot as mp
import sklearn.preprocessing as sp
import sklearn.model_selection as sm
import sklearn.svm as svm
import warnings
import sklearn.metrics as mm
warnings.filterwarnings('ignore')
class DigitEncoder:
# 自定义编码器:针对数字字符串做标签编码
def fit_transform(self, y):
return y.astype(
'i4')
def transform(self, y):
return y.astype(
'i4')
def inverse_transform(self, y):
return y.astype(
'str')
data =
[]
with open('./ml_data/traffic.txt',
'r') as f:
for line
in f.readlines():
data.append(line[:-1].split(
','))
data =
np.array(data)
# 整理输入集与输出集
encoders, x, y =
[], [], []
data =
data.T
for row
in range(len(data)):
# 判断每个特征值是否为数字
if data[row][0].isdigit():
encoder =
DigitEncoder()
else:
encoder =
sp.LabelEncoder()
if row < len(data) - 1
:
x.append(encoder.fit_transform(data[row]))
else:
y =
encoder.fit_transform(data[row])
encoders.append(encoder)
x =
np.array(x).T
# print(x)
# print(y)
# 拆分测试集与训练集
train_x, test_x, train_y, test_y = sm.train_test_split(x, y, test_size=0.25, random_state=7
)
# 基于支持向量机的回归模型-----epsilon为支持向量间距
model = svm.SVR(kernel=
'rbf', C=10, epsilon=0.2
)
model.fit(train_x, train_y)
# 测试集测试
pred_test_y =
model.predict(test_x)
# 模型得分
score =
mm.r2_score(test_y, pred_test_y)
print(
'r2得分: ', score)
# 对测试数据进行测试
data = [[
'Tuesday',
'13:30',
'San Francisco',
'yes'],
['Thursday',
'13:30',
'San Francisco',
'no']]
# 对测试数据进行编码
data =
np.array(data).T
test_x =
[]
for row
in range(len(data)):
encoder =
encoders[row]
test_x.append(encoder.transform(data[row]))
test_x =
np.array(test_x).T
# print(test_x)
pred_test_y =
model.predict(test_x)
pred_test_y = encoders[-1
].inverse_transform(pred_test_y)
print(
'预测结果为: ', pred_test_y)
预测结果:
r2得分: 0.6484595603352119
预测结果为: ['24.111978908657576' '23.61904092888905']
转载于:https://www.cnblogs.com/yuxiangyang/p/11203512.html
相关资源:SVM预测及回归