请找管理员授权。/funnyscript/edit_node_item.php
# -*- coding: utf-8 -*- import csv import copy import numpy as np import pandas as pd import sys import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc ###计算roc和auc #from sklearn import cross_validation y_score=[] y_test=[] file='' file2='' if len(sys.argv)>1: file=sys.argv[1] if len(sys.argv)>2: file2=sys.argv[2] with open(file,'r') as csvfile: reader = csv.reader(csvfile) y_score = [row[0] for row in reader] with open(file,'r') as csvfile: reader = csv.reader(csvfile) y_test = [row[1] for row in reader] print(len(y_test)) print(len(y_score)) print(y_test[0:100]) print(y_score[0:100]) y_test2=[] for a in y_test: y_test2.append(int(float(a))) y_score2=[] for a in y_score: y_score2.append(float(a)) print(len(y_test2)) print(len(y_score2)) print(y_test2[0:100]) print(y_score2[0:100]) #print(column1) # 自定义绘制ks曲线的函数 def plot_ks(y_test, y_score, positive_flag): # 对y_test,y_score重新设置索引 #y_test.index = np.arange(len(y_test)) #y_score.index = np.arange(len(y_score)) # 构建目标数据集 target_data = pd.DataFrame({'y_test':y_test, 'y_score':y_score}) # 按y_score降序排列 target_data.sort_values(by = 'y_score', ascending = False, inplace = True) # 自定义分位点 cuts = np.arange(0.05,1,0.05) print(cuts) # 计算各分位点对应的Score值 count=len(target_data.y_score) print(count) index = count*cuts print(index) scores = target_data.y_score.iloc[index.astype('int')] # 根据不同的Score值,计算Sensitivity和Specificity Sensitivity = [] Specificity = [] Scores = [] for score in scores: # 正例覆盖样本数量与实际正例样本量 positive_recall = target_data.loc[(target_data.y_test == positive_flag) & (target_data.y_score>score),:].shape[0] positive = sum(target_data.y_test == positive_flag) # 负例覆盖样本数量与实际负例样本量 negative_recall = target_data.loc[(target_data.y_test != positive_flag) & (target_data.y_score<=score),:].shape[0] negative = sum(target_data.y_test != positive_flag) Sensitivity.append(positive_recall/positive) Specificity.append(negative_recall/negative) Scores.append(score) # 构建绘图数据 plot_data = pd.DataFrame({'cuts':cuts, 'y1':1-np.array(Specificity), 'y2':np.array(Sensitivity), 'score':np.array(Scores), 'y3':np.array(Sensitivity)-(1-np.array(Specificity)), 'ks':np.array(Sensitivity)-(1-np.array(Specificity))}) # 寻找Sensitivity和1-Specificity之差的最大值索引 max_ks_index = np.argmax(plot_data.ks) plt.figure() plt.figure(figsize=(10,10)) plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y1.tolist()+[1], label = 'FPR') plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y2.tolist()+[1], label = 'TPR') plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y3.tolist()+[0], label = 'ks') # 添加参考线 plt.vlines(plot_data.cuts[max_ks_index], ymin = plot_data.y1[max_ks_index], ymax = plot_data.y2[max_ks_index], linestyles = '--') # 添加文本信息 plt.text(x = plot_data.cuts[max_ks_index]+0.01, y = plot_data.y1[max_ks_index]+plot_data.ks[max_ks_index]/2, s = 'KS= %.2f score=%.2f ' % (plot_data.ks[max_ks_index],plot_data.score[max_ks_index]) ) # 显示图例 plt.legend() plt.savefig(file2,format='png') # 显示图形 plt.show() # 调用自定义函数,绘制K-S曲线 plot_ks(y_test = y_test2, y_score = y_score2, positive_flag = 1)
ID=6114 draw_ks.py
保存
# -*- coding: utf-8 -*- import csv import copy import numpy as np import pandas as pd import sys import matplotlib as mpl mpl.use('Agg') import matplotlib.pyplot as plt from sklearn import svm, datasets from sklearn.metrics import roc_curve, auc ###计算roc和auc #from sklearn import cross_validation y_score=[] y_test=[] file='' file2='' if len(sys.argv)>1: file=sys.argv[1] if len(sys.argv)>2: file2=sys.argv[2] with open(file,'r') as csvfile: reader = csv.reader(csvfile) y_score = [row[0] for row in reader] with open(file,'r') as csvfile: reader = csv.reader(csvfile) y_test = [row[1] for row in reader] print(len(y_test)) print(len(y_score)) print(y_test[0:100]) print(y_score[0:100]) y_test2=[] for a in y_test: y_test2.append(int(float(a))) y_score2=[] for a in y_score: y_score2.append(float(a)) print(len(y_test2)) print(len(y_score2)) print(y_test2[0:100]) print(y_score2[0:100]) #print(column1) # 自定义绘制ks曲线的函数 def plot_ks(y_test, y_score, positive_flag): # 对y_test,y_score重新设置索引 #y_test.index = np.arange(len(y_test)) #y_score.index = np.arange(len(y_score)) # 构建目标数据集 target_data = pd.DataFrame({'y_test':y_test, 'y_score':y_score}) # 按y_score降序排列 target_data.sort_values(by = 'y_score', ascending = False, inplace = True) # 自定义分位点 cuts = np.arange(0.05,1,0.05) print(cuts) # 计算各分位点对应的Score值 count=len(target_data.y_score) print(count) index = count*cuts print(index) scores = target_data.y_score.iloc[index.astype('int')] # 根据不同的Score值,计算Sensitivity和Specificity Sensitivity = [] Specificity = [] Scores = [] for score in scores: # 正例覆盖样本数量与实际正例样本量 positive_recall = target_data.loc[(target_data.y_test == positive_flag) & (target_data.y_score>score),:].shape[0] positive = sum(target_data.y_test == positive_flag) # 负例覆盖样本数量与实际负例样本量 negative_recall = target_data.loc[(target_data.y_test != positive_flag) & (target_data.y_score<=score),:].shape[0] negative = sum(target_data.y_test != positive_flag) Sensitivity.append(positive_recall/positive) Specificity.append(negative_recall/negative) Scores.append(score) # 构建绘图数据 plot_data = pd.DataFrame({'cuts':cuts, 'y1':1-np.array(Specificity), 'y2':np.array(Sensitivity), 'score':np.array(Scores), 'y3':np.array(Sensitivity)-(1-np.array(Specificity)), 'ks':np.array(Sensitivity)-(1-np.array(Specificity))}) # 寻找Sensitivity和1-Specificity之差的最大值索引 max_ks_index = np.argmax(plot_data.ks) plt.figure() plt.figure(figsize=(10,10)) plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y1.tolist()+[1], label = 'FPR') plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y2.tolist()+[1], label = 'TPR') plt.plot([0]+cuts.tolist()+[1], [0]+plot_data.y3.tolist()+[0], label = 'ks') # 添加参考线 plt.vlines(plot_data.cuts[max_ks_index], ymin = plot_data.y1[max_ks_index], ymax = plot_data.y2[max_ks_index], linestyles = '--') # 添加文本信息 plt.text(x = plot_data.cuts[max_ks_index]+0.01, y = plot_data.y1[max_ks_index]+plot_data.ks[max_ks_index]/2, s = 'KS= %.2f score=%.2f ' % (plot_data.ks[max_ks_index],plot_data.score[max_ks_index]) ) # 显示图例 plt.legend() plt.savefig(file2,format='png') # 显示图形 plt.show() # 调用自定义函数,绘制K-S曲线 plot_ks(y_test = y_test2, y_score = y_score2, positive_flag = 1)