美国警方致命枪击案数据可视化分析 下

it2022-05-05  107

percent_over_25_completed_highSchool.percent_completed_hs.replace(['-'],0.0,inplace = True) percent_over_25_completed_highSchool.percent_completed_hs = percent_over_25_completed_highSchool.percent_completed_hs.astype(float) area_list = list(percent_over_25_completed_highSchool['Geographic Area'].unique()) area_highschool = [] for i in area_list: x = percent_over_25_completed_highSchool[percent_over_25_completed_highSchool['Geographic Area']==i] area_highschool_rate = sum(x.percent_completed_hs)/len(x) area_highschool.append(area_highschool_rate) # sorting data = pd.DataFrame({'area_list': area_list,'area_highschool_ratio':area_highschool}) new_index = (data['area_highschool_ratio'].sort_values(ascending=True)).index.values sorted_data2 = data.reindex(new_index) # visualization plt.figure(figsize=(15,10)) sns.barplot(x=sorted_data2['area_list'], y=sorted_data2['area_highschool_ratio']) plt.xticks(rotation= 90) plt.xlabel('States') plt.ylabel('High School Graduate Rate') plt.title("Percentage of Given State's Population Above 25 that Has Graduated High School") plt.show() print(share_race_city.head()) # 按黑人、白人、印第安人、亚洲人和西班牙人的种族划分,该州人口的百分比 share_race_city.replace(['-'],0.0,inplace = True) share_race_city.replace(['(X)'],0.0,inplace = True) share_race_city.loc[:,['share_white','share_black','share_native_american','share_asian','share_hispanic']] = share_race_city.loc[:,['share_white','share_black','share_native_american','share_asian','share_hispanic']].astype(float) area_list = list(share_race_city['Geographic area'].unique()) share_white = [] share_black = [] share_native_american = [] share_asian = [] share_hispanic = [] for i in area_list: x = share_race_city[share_race_city['Geographic area']==i] share_white.append(sum(x.share_white)/len(x)) share_black.append(sum(x.share_black) / len(x)) share_native_american.append(sum(x.share_native_american) / len(x)) share_asian.append(sum(x.share_asian) / len(x)) share_hispanic.append(sum(x.share_hispanic) / len(x)) # visualization f,ax = plt.subplots(figsize = (9,15)) sns.barplot(x=share_white,y=area_list,color='green',alpha = 0.5,label='White' ) sns.barplot(x=share_black,y=area_list,color='blue',alpha = 0.7,label='African American') sns.barplot(x=share_native_american,y=area_list,color='cyan',alpha = 0.6,label='Native American') sns.barplot(x=share_asian,y=area_list,color='yellow',alpha = 0.6,label='Asian') sns.barplot(x=share_hispanic,y=area_list,color='red',alpha = 0.6,label='Hispanic') ax.legend(loc='lower right',frameon = True) # legendlarin gorunurlugu ax.set(xlabel='Percentage of Races', ylabel='States',title = "Percentage of State's Population According to Races ") plt.show() # 各州高中毕业率与贫困率 sorted_data['area_poverty_ratio'] = sorted_data['area_poverty_ratio']/max( sorted_data['area_poverty_ratio']) sorted_data2['area_highschool_ratio'] = sorted_data2['area_highschool_ratio']/max( sorted_data2['area_highschool_ratio']) data = pd.concat([sorted_data,sorted_data2['area_highschool_ratio']],axis=1) data.sort_values('area_poverty_ratio',inplace=True) # visualize f,ax1 = plt.subplots(figsize =(20,10)) sns.pointplot(x='area_list',y='area_poverty_ratio',data=data,color='lime',alpha=0.8) sns.pointplot(x='area_list',y='area_highschool_ratio',data=data,color='red',alpha=0.8) plt.text(40,0.6,'high school graduate ratio',color='red',fontsize = 17,style = 'italic') plt.text(40,0.55,'poverty ratio',color='lime',fontsize = 18,style = 'italic') plt.xlabel('States',fontsize = 15,color='blue') plt.ylabel('Values',fontsize = 15,color='blue') plt.title('High School Graduate VS Poverty Rate',fontsize = 20,color='blue') plt.grid() plt.show() print(data.head()) g = sns.jointplot(data.area_poverty_ratio, data.area_highschool_ratio, kind='kde', size=7) plt.savefig('graph.png') plt.show() print("data.head:", data.head()) g = sns.jointplot('area_poverty_ratio', 'area_highschool_ratio', data=data, size=5, ratio=4, color="r") plt.show() # Pie Chart print(kill.race.head(15)) print(kill.race.value_counts()) # Race rates according in kill data kill.race.dropna(inplace = True) labels = kill.race.value_counts().index colors = ['grey', 'blue', 'yellow', 'green', 'brown'] explode = [0, 0, 0, 0, 0, 0] sizes = kill.race.value_counts().values # visual plt.figure(figsize = (7, 7)) plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%') plt.title('Killed People According To Races:', color = 'blue', fontsize = 15) plt.show() sns.lmplot(x = 'area_poverty_ratio', y = 'area_highschool_ratio', data=data) plt.show() print(data.head()) sns.kdeplot(data.area_poverty_ratio, data.area_highschool_ratio, shade=True, cut=3) plt.show() # distribution pal = sns.cubehelix_palette(2, rot=-.5, dark=.3) sns.violinplot(data=data, palette=pal, inner='points') plt.show()

想获取python学习资料的小伙伴可以加QQ:728711576​​​​​​​


最新回复(0)