Keyerror while using pandas in PYTHON 2.7 -
7 , while running on command window getting problem. have updated pandas , have double checked please me how resolve problem. have twitter data using generated through tweepy.
import json import pandas pd import matplotlib.pyplot plt tweets_data_path = 'c:/users/e2sn7cy/documents/github/twitter_data.txt' tweets_data = [] tweets_file = open(tweets_data_path, 'r') line in tweets_file: try: tweet = json.loads(line) tweets_data.append(tweet) except: continue #print len(tweets_data) #dataframe tweets = pd.dataframe() #adding columns tweets['text'] = map(lambda tweet: tweet['text'], tweets_data) tweets['lang'] = map(lambda tweet: tweet['lang'], tweets_data) tweets['country'] = map(lambda tweet: tweet['place']['country'] if tweet['place'] != none else none, tweets_data) #adding charts tweets_by_lang = tweets['lang'].value_count() fig, ax = plt.subplots() ax.tick_params(axis='x', labelsize=15) ax.tick_params(axis='y', labelsize=10) ax.set_xlabel('languages', fontsize=15) ax.set_ylabel('number of tweets' , fontsize=15) ax.set_title('top 5 languages', fontsize=15, fontweight='bold') tweets_by_lang[:5].plot(ax=ax, kind='bar', color='red')
error:
(venv) c:\users\e2sn7cy\documents\github\twitter-app>python twitter_analytics.py traceback (most recent call last): file "twitter_analytics.py", line 25, in <module> tweets['text'] = map(lambda tweet:tweet['text'] if tweet['text'] else '', tweets_data) file "twitter_analytics.py", line 25, in <lambda> tweets['text'] = map(lambda tweet:tweet['text'] if tweet['text'] else '', tweets_data) keyerror: 'text'
twitter data
{"created_at":"wed jun 10 10:01:16 +0000 2015","id":608574643897778176,"id_str":"608574643897778176","text":"d\u00e9couvrez le top 10 des plus beaux buts de la ligue des champions ! #ldc \n\nvid\u00e9o > http:\/\/t.co\/qros6jgbya http:\/\/t.co\/gsbzkhpdwq","source":"\u003ca href=\"https:\/\/about.twitter.com\/products\/tweetdeck\" rel=\"nofollow\"\u003etweetdeck\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":18802382,"id_str":"18802382","name":"football.fr","screen_name":"football_fr","location":"","url":"http:\/\/www.football.fr","description":"compte twitter officiel de football.fr, site d'actualit\u00e9s de newsweb lagard\u00e8re active.","protected":false,"verified":true,"followers_count":15012,"friends_count":1290,"listed_count":165,"favourites_count":30,"statuses_count":19099,"created_at":"fri jan 09 14:35:24 +0000 2009","utc_offset":7200,"time_zone":"paris","geo_enabled":true,"lang":"fr","contributors_enabled":false,"is_translator":false,"profile_background_color":"c0deed","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/403703872\/h_h_h_h_.png","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/403703872\/h_h_h_h_.png","profile_background_tile":false,"profile_link_color":"0084b4","profile_sidebar_border_color":"c0deed","profile_sidebar_fill_color":"ddeef6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/562565984548970496\/ggwgwybh_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/562565984548970496\/ggwgwybh_normal.jpeg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/18802382\/1433626125","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"ldc","indices":[68,72]}],"trends":[],"urls":[{"url":"http:\/\/t.co\/qros6jgbya","expanded_url":"http:\/\/www.football.fr\/ligue-des-champions\/articles\/ronaldo-messi-benzema-les-10-plus-beaux-buts-de-la-saison-video-668464\/","display_url":"football.fr\/ligue-des-cham\u2026","indices":[86,108]}],"user_mentions":[],"symbols":[],"media":[{"id":608574518903341056,"id_str":"608574518903341056","indices":[109,131],"media_url":"http:\/\/pbs.twimg.com\/media\/chixtpkwgaaqsxs.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/chixtpkwgaaqsxs.jpg","url":"http:\/\/t.co\/gsbzkhpdwq","display_url":"pic.twitter.com\/gsbzkhpdwq","expanded_url":"http:\/\/twitter.com\/football_fr\/status\/608574643897778176\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":226,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":400,"resize":"fit"},"large":{"w":900,"h":600,"resize":"fit"}}}]},"extended_entities":{"media":[{"id":608574518903341056,"id_str":"608574518903341056","indices":[109,131],"media_url":"http:\/\/pbs.twimg.com\/media\/chixtpkwgaaqsxs.jpg","media_url_https":"https:\/\/pbs.twimg.com\/media\/chixtpkwgaaqsxs.jpg","url":"http:\/\/t.co\/gsbzkhpdwq","display_url":"pic.twitter.com\/gsbzkhpdwq","expanded_url":"http:\/\/twitter.com\/football_fr\/status\/608574643897778176\/photo\/1","type":"photo","sizes":{"small":{"w":340,"h":226,"resize":"fit"},"thumb":{"w":150,"h":150,"resize":"crop"},"medium":{"w":600,"h":400,"resize":"fit"},"large":{"w":900,"h":600,"resize":"fit"}}}]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"fr","timestamp_ms":"1433930476532"} {"created_at":"wed jun 10 10:01:16 +0000 2015","id":608574645676204032,"id_str":"608574645676204032","text":"#coach training manager opportunity lawn #tennis association http:\/\/t.co\/elubyqvjzj","source":"\u003ca href=\"http:\/\/www.hootsuite.com\" rel=\"nofollow\"\u003ehootsuite\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":191782743,"id_str":"191782743","name":"jump in sport","screen_name":"jumpinsport","location":"lausanne, switzerland ","url":"http:\/\/www.jumpinsport.com\/","description":"we collect best sports related jobs top sources around world , show them in 1 place! follow great job opportunities.","protected":false,"verified":false,"followers_count":8279,"friends_count":882,"listed_count":168,"favourites_count":38,"statuses_count":30202,"created_at":"fri sep 17 10:18:02 +0000 2010","utc_offset":7200,"time_zone":"bern","geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"dbe9ed","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/215611005\/logo_jump_3_square__4_.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/215611005\/logo_jump_3_square__4_.jpeg","profile_background_tile":false,"profile_link_color":"cc3366","profile_sidebar_border_color":"dbe9ed","profile_sidebar_fill_color":"e6f6f9","profile_text_color":"333333","profile_use_background_image":false,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/1267864780\/logo_jump_3_square__4__normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/1267864780\/logo_jump_3_square__4__normal.jpeg","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[{"text":"coach","indices":[0,6]},{"text":"tennis","indices":[46,53]}],"trends":[],"urls":[{"url":"http:\/\/t.co\/elubyqvjzj","expanded_url":"http:\/\/www.jumpinsport.com\/?q=node\/30549","display_url":"jumpinsport.com\/?q=node\/30549","indices":[66,88]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1433930476956"}
here program through have generated twitter data
#import important methods tweepy tweepy.streaming import streamlistener tweepy import oauthhandler tweepy import stream #definig variables twitter credentials access_token = "abc" access_token_secret = "xyz" consumer_key = "a123" consumer_secret = "z123" #stdout received tweets class stdoutlistener(streamlistener): def on_data(self,data): print data return true def on_error(self,status): print status if __name__ == '__main__': #twitter authentication , connection l = stdoutlistener() auth = oauthhandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) stream = stream(auth, l) #filter twitter streams keywords stream.filter(track=['cricket','football','tennis'])
after have used command capture data in txt file
python twitter_streaming.py > twitter_data.txt
there no problem in doing:
import pandas pd aux = pd.dataframe() aux['text'] = [0, 1, 2]
so problem tweet['text']
(inside map
function) in:
tweets['text'] = map(lambda tweet: tweet['text'], tweets_data)
in (at least) 1 of lines reading file tweets_data_path
, loading json
, there no such field 'text'
.
also, side note, consider list comprehensions alternative map
, lambda
:
array = [{'a': 1, 'b': 2}, {'a': 11, 'b': 22}] map(lambda x: x['a'], array) [x['a'] x in array]
see how last 2 lines output same array, although consider latter easier read.
update (workaround)
thought information above enough understanding problem , how find solution own (when learning programming language think better learn doing).
at request, here workaround: avoid appending tweet tweets_data
when loading lines file if line incomplete. like:
for line in tweets_file: tweet = json.loads(line) if not all(x in tweet x in ['text', 'lang', 'place']): continue if tweet['place'] , not 'country' in tweet['place']: continue tweets_data.append(tweet)
please, note code has not been tested. try understand how works , fix or adapt needs.
Comments
Post a Comment