percent_over_25_completed_highSchool.percent_completed_hs.replace(['-'],0.0,inplace = True)
percent_over_25_completed_highSchool.percent_completed_hs = percent_over_25_completed_highSchool.percent_completed_hs.astype(float)
area_list = list(percent_over_25_completed_highSchool['Geographic Area'].unique())
area_highschool = []
for i in area_list:
x = percent_over_25_completed_highSchool[percent_over_25_completed_highSchool['Geographic Area']==i]
area_highschool_rate = sum(x.percent_completed_hs)/len(x)
area_highschool.append(area_highschool_rate)
# sorting
data = pd.DataFrame({'area_list': area_list,'area_highschool_ratio':area_highschool})
new_index = (data['area_highschool_ratio'].sort_values(ascending=True)).index.values
sorted_data2 = data.reindex(new_index)
# visualization
plt.figure(figsize=(15,10))
sns.barplot(x=sorted_data2['area_list'], y=sorted_data2['area_highschool_ratio'])
plt.xticks(rotation= 90)
plt.xlabel('States')
plt.ylabel('High School Graduate Rate')
plt.title("Percentage of Given State's Population Above 25 that Has Graduated High School")
plt.show()
print(share_race_city.head())
# 按黑人、白人、印第安人、亚洲人和西班牙人的种族划分,该州人口的百分比
share_race_city.replace(['-'],0.0,inplace = True)
share_race_city.replace(['(X)'],0.0,inplace = True)
share_race_city.loc[:,['share_white','share_black','share_native_american','share_asian','share_hispanic']] = share_race_city.loc[:,['share_white','share_black','share_native_american','share_asian','share_hispanic']].astype(float)
area_list = list(share_race_city['Geographic area'].unique())
share_white = []
share_black = []
share_native_american = []
share_asian = []
share_hispanic = []
for i in area_list:
x = share_race_city[share_race_city['Geographic area']==i]
share_white.append(sum(x.share_white)/len(x))
share_black.append(sum(x.share_black) / len(x))
share_native_american.append(sum(x.share_native_american) / len(x))
share_asian.append(sum(x.share_asian) / len(x))
share_hispanic.append(sum(x.share_hispanic) / len(x))
# visualization
f,ax = plt.subplots(figsize = (9,15))
sns.barplot(x=share_white,y=area_list,color='green',alpha = 0.5,label='White' )
sns.barplot(x=share_black,y=area_list,color='blue',alpha = 0.7,label='African American')
sns.barplot(x=share_native_american,y=area_list,color='cyan',alpha = 0.6,label='Native American')
sns.barplot(x=share_asian,y=area_list,color='yellow',alpha = 0.6,label='Asian')
sns.barplot(x=share_hispanic,y=area_list,color='red',alpha = 0.6,label='Hispanic')
ax.legend(loc='lower right',frameon = True) # legendlarin gorunurlugu
ax.set(xlabel='Percentage of Races', ylabel='States',title = "Percentage of State's Population According to Races ")
plt.show()
# 各州高中毕业率与贫困率
sorted_data['area_poverty_ratio'] = sorted_data['area_poverty_ratio']/max( sorted_data['area_poverty_ratio'])
sorted_data2['area_highschool_ratio'] = sorted_data2['area_highschool_ratio']/max( sorted_data2['area_highschool_ratio'])
data = pd.concat([sorted_data,sorted_data2['area_highschool_ratio']],axis=1)
data.sort_values('area_poverty_ratio',inplace=True)
# visualize
f,ax1 = plt.subplots(figsize =(20,10))
sns.pointplot(x='area_list',y='area_poverty_ratio',data=data,color='lime',alpha=0.8)
sns.pointplot(x='area_list',y='area_highschool_ratio',data=data,color='red',alpha=0.8)
plt.text(40,0.6,'high school graduate ratio',color='red',fontsize = 17,style = 'italic')
plt.text(40,0.55,'poverty ratio',color='lime',fontsize = 18,style = 'italic')
plt.xlabel('States',fontsize = 15,color='blue')
plt.ylabel('Values',fontsize = 15,color='blue')
plt.title('High School Graduate VS Poverty Rate',fontsize = 20,color='blue')
plt.grid()
plt.show()
print(data.head())
g = sns.jointplot(data.area_poverty_ratio, data.area_highschool_ratio, kind='kde', size=7)
plt.savefig('graph.png')
plt.show()
print("data.head:", data.head())
g = sns.jointplot('area_poverty_ratio', 'area_highschool_ratio', data=data, size=5, ratio=4, color="r")
plt.show()
# Pie Chart
print(kill.race.head(15))
print(kill.race.value_counts())
# Race rates according in kill data
kill.race.dropna(inplace = True)
labels = kill.race.value_counts().index
colors = ['grey', 'blue', 'yellow', 'green', 'brown']
explode = [0, 0, 0, 0, 0, 0]
sizes = kill.race.value_counts().values
# visual
plt.figure(figsize = (7, 7))
plt.pie(sizes, explode=explode, labels=labels, colors=colors, autopct='%1.1f%%')
plt.title('Killed People According To Races:', color = 'blue', fontsize = 15)
plt.show()
sns.lmplot(x = 'area_poverty_ratio', y = 'area_highschool_ratio', data=data)
plt.show()
print(data.head())
sns.kdeplot(data.area_poverty_ratio, data.area_highschool_ratio, shade=True, cut=3)
plt.show()
# distribution
pal = sns.cubehelix_palette(2, rot=-.5, dark=.3)
sns.violinplot(data=data, palette=pal, inner='points')
plt.show()
想获取python学习资料的小伙伴可以加QQ:728711576