美国警方致命枪击案数据可视化分析 上

it2022-05-05  144

import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import seaborn as sns import matplotlib.pyplot as plt from collections import Counter # 输入数据文件可在"input/" 目录 # For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory from subprocess import check_output # Any results you write to the current directory are saved as output. median_house_hold_in_come = pd.read_csv('input/MedianHouseholdIncome2015.csv', encoding="windows-1252") percentage_people_below_poverty_level = pd.read_csv('input/PercentagePeopleBelowPovertyLevel.csv', encoding="windows-1252") percent_over_25_completed_highSchool = pd.read_csv('input/PercentOver25CompletedHighSchool.csv', encoding="windows-1252") share_race_city = pd.read_csv('input/ShareRaceByCity.csv', encoding="windows-1252") kill = pd.read_csv('input/PoliceKillingsUS.csv', encoding="windows-1252") print(percentage_people_below_poverty_level.head()) print(percentage_people_below_poverty_level.info()) print(percentage_people_below_poverty_level['Geographic Area'].unique()) percentage_people_below_poverty_level.poverty_rate.replace(['-'],0.0,inplace = True) percentage_people_below_poverty_level.poverty_rate = percentage_people_below_poverty_level.poverty_rate.astype(float) area_list = list(percentage_people_below_poverty_level['Geographic Area'].unique()) area_poverty_ratio = [] for i in area_list: x = percentage_people_below_poverty_level[percentage_people_below_poverty_level['Geographic Area']==i] area_poverty_rate = sum(x.poverty_rate)/len(x) area_poverty_ratio.append(area_poverty_rate) data = pd.DataFrame({'area_list': area_list,'area_poverty_ratio':area_poverty_ratio}) new_index = (data['area_poverty_ratio'].sort_values(ascending=False)).index.values sorted_data = data.reindex(new_index) # visualization plt.figure(figsize=(15,10)) sns.barplot(x=sorted_data['area_list'], y=sorted_data['area_poverty_ratio']) plt.xticks(rotation= 45) plt.xlabel('States') plt.ylabel('Poverty Rate') plt.title('Poverty Rate Given States') plt.show() print(kill.head()) separate = kill.name[kill.name != 'TK TK'].str.split() a,b = zip(*separate) name_list = a+b name_count = Counter(name_list) most_common_names = name_count.most_common(15) x,y = zip(*most_common_names) x,y = list(x),list(y) plt.figure(figsize=(15,10)) ax= sns.barplot(x=x, y=y,palette = sns.cubehelix_palette(len(x))) plt.xlabel('Name or Surname of killed people') plt.ylabel('Frequency') plt.title('Most common 15 Name or Surname of killed people') plt.show()

想获取python学习资料的小伙伴可以加QQ:728711576​​​​​​​


最新回复(0)