31. Airbnb Data#

import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from urllib3.exceptions import ProtocolError
import pprint
from pandas.io.json import json_normalize
import pandas as pd
from urllib3.exceptions import ProtocolError
api_base = 'https://www.airbnb.com/api/v2/explore_tabs?version=1.3.8&_format=for_explore_search_web&experiences_per_grid=20&items_per_grid=18&guidebooks_per_grid=20&auto_ib=true&fetch_filters=true&has_zero_guest_treatment=false&is_guided_search=true&is_new_cards_experiment=true&luxury_pre_launch=false&query_understanding_enabled=true&show_groupings=true&supports_for_you_v3=true&timezone_offset=-360&client_session_id=82674acc-b274-41dc-8b00-499d5c2fea44&metadata_only=false&is_standard_search=true&refinement_paths%5B%5D=%2Fhomes&selected_tab_id=home_tab&place_id=ChIJ5S-raZElv0cR8HcqSvxgJwQ&allow_override%5B%5D=&s_tag=kke9hwU1&screen_size=medium&query=Cologne%2C+Germany&_intents=p1'
api_key = '&key=d306zoyjsyarp7ifhu67rjxn52tv0t20&currency=USD&locale=en'
api_section_offset = '&section_offset=4'
import requests
import json
retry_lvl = [100, 0.1]

32. Key#

from bs4 import BeautifulSoup
airbnb_home = requests.get("http://airbnb.com")

soup = BeautifulSoup(airbnb_home.content, "html.parser")

metatags = soup.find_all('meta', id="_bootstrap-layout-init")
metacontents = [metatag["content"] for metatag in metatags]

metajson = json.loads(metacontents[0])

metajson

api_key = metajson['api_config']['key']
api_key = '&key='+api_key
print(api_key)
&key=d306zoyjsyarp7ifhu67rjxn52tv0t20

33. Explore API#

rsp = requests.get(api_base+api_key)
rsp_json = rsp.json()
len(rsp_json)
2
rsp_json.keys()
dict_keys(['explore_tabs', 'metadata'])
"pagination_metadata": {
        "has_next_page": true,
        "items_offset": 18,
        "section_offset": 4,
        "search_session_id": "eee6b55b-100d-4314-800a-393bec147f25"
      }
rsp_json['explore_tabs'][0]['pagination_metadata']['has_next_page']
True
rsp_json['explore_tabs'][0]['home_tab_metadata'].keys()
dict_keys(['urgency_commitment', 'golden_ticket_urgency_commitment', 'listing_cards_urgency_commitment_metadata', 'listing_cards_price_line_urgency_commitment_metadata', 'messages', 'facets', 'overrides', 'search_feed_items', 'remarketing_ids', 'location', 'breadcrumbs', 'listings_count', 'search', 'geography', 'price_histogram', 'filters'])
len(rsp_json['explore_tabs'][0]['sections'])
4
rsp_json['explore_tabs'][0]['sections'][3].keys()
dict_keys(['backend_search_id', 'display_type', 'experiments_metadata', 'result_type', 'search_session_id', 'section_id', 'section_type_uid', 'is_paginated', 'bankai_section_id', 'refinements', 'inserts', 'listings', 'review_items', 'breadcrumbs'])
len(rsp_json['explore_tabs'][0]['sections'][3]['listings'])
12
rsp_json['explore_tabs'][0]['sections'][3]['listings'][0];
flatten_json(rsp_json['explore_tabs'][0]['sections'][3]['listings'][0]);

34. Functions#

# fields

listing_fields = [
'bathrooms',
'bedrooms',
'beds',
'person_capacity',
# 'host_languages',
'id',
'is_business_travel_ready',
'is_fully_refundable',
'is_host_highly_rated',
'is_rebookable',
'is_superhost',
'lat',
'lng',
'picture_count',
'preview_amenities',
'reviews_count',
'star_rating',
'tier_id'
]

pricing_quote_rate_with_service_fee_fields = [
'amount',
'currency'
]

fields_default = ['listing_'+ field for field in listing_fields] + ['pricing_quote_rate_with_service_fee_'+field for field in pricing_quote_rate_with_service_fee_fields] + ['pricing_quote_rate_type']
fields_default
['listing_bathrooms',
 'listing_bedrooms',
 'listing_beds',
 'listing_person_capacity',
 'listing_id',
 'listing_is_business_travel_ready',
 'listing_is_fully_refundable',
 'listing_is_host_highly_rated',
 'listing_is_rebookable',
 'listing_is_superhost',
 'listing_lat',
 'listing_lng',
 'listing_picture_count',
 'listing_preview_amenities',
 'listing_reviews_count',
 'listing_star_rating',
 'listing_tier_id',
 'pricing_quote_rate_with_service_fee_amount',
 'pricing_quote_rate_with_service_fee_currency',
 'pricing_quote_rate_type']
def listing_parser(listing, fields = None):
    if fields == None:
        fields = ['listing_bathrooms', 'listing_bedrooms', 'listing_beds', 'listing_person_capacity', 'listing_host_languages', 'listing_id', 'listing_is_business_travel_ready', 'listing_is_fully_refundable', 'listing_is_host_highly_rated', 'listing_is_rebookable', 'listing_is_superhost', 'listing_lat', 'listing_lng', 'listing_picture_count', 'listing_preview_amenities', 'listing_reviews_count', 'listing_star_rating', 'listing_tier_id', 'pricing_quote_rate_with_service_fee_amount', 'pricing_quote_rate_with_service_fee_currency', 'pricing_quote_rate_type']
    
    return pd.DataFrame(listing)[fields_default]

        
    
    
def next_indicator(response_json):
    return response_json['explore_tabs'][0]['pagination_metadata']['has_next_page']
def flatten_json(y):
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

35. Demo Scraping#

next_sec = True
sec_off = 0
rsp = requests.get(api_base+api_key+'&section_offset=' + str(sec_off))
rsp_json = rsp.json()

listing_array = [flatten_json(item) for item in rsp_json['explore_tabs'][0]['sections'][3]['listings'] ]
df_total = listing_parser(listing_array)

df_total = pd.concat([df, df_total], join='inner')

next_sec = next_indicator(rsp_json)

sec_off = sec_off + 1
while next_sec == True:
    
    try_flag = 0

    while True and (try_flag < retry_lvl[0]):
        try:
            with requests.Session() as s:
                rsp = s.get(api_base+api_key+'&section_offset='+str(sec_off))
        except requests.exceptions.RequestException as err:
            print('request error', err)
            try_flag = try_flag + 1
            sleep(try_flag * retry_lvl[1])
            pass
        else:
            break

    rsp_json = rsp.json()
    

    listing_array = [flatten_json(item) for item in rsp_json['explore_tabs'][0]['sections'][0]['listings'] ]

    df = listing_parser(listing_array)
    df_total = pd.concat([df, df_total], join='inner')
    
    sec_off = sec_off + 1
    
    next_sec = next_indicator(rsp_json)
    
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-254-21197f9853a9> in <module>()
     20     listing_array = [flatten_json(item) for item in rsp_json['explore_tabs'][0]['sections'][0]['listings'] ]
     21 
---> 22     df = listing_parser(listing_array)
     23     df_total = pd.concat([df, df_total], join='inner')
     24 

<ipython-input-217-392cc902e153> in listing_parser(listing, fields)
      3         fields = ['listing_bathrooms', 'listing_bedrooms', 'listing_beds', 'listing_person_capacity', 'listing_host_languages', 'listing_id', 'listing_is_business_travel_ready', 'listing_is_fully_refundable', 'listing_is_host_highly_rated', 'listing_is_rebookable', 'listing_is_superhost', 'listing_lat', 'listing_lng', 'listing_picture_count', 'listing_preview_amenities', 'listing_reviews_count', 'listing_star_rating', 'listing_tier_id', 'pricing_quote_rate_with_service_fee_amount', 'pricing_quote_rate_with_service_fee_currency', 'pricing_quote_rate_type']
      4 
----> 5     return pd.DataFrame(listing)[fields_default]
      6 
      7 

~/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2131         if isinstance(key, (Series, np.ndarray, Index, list)):
   2132             # either boolean or fancy integer index
-> 2133             return self._getitem_array(key)
   2134         elif isinstance(key, DataFrame):
   2135             return self._getitem_frame(key)

~/anaconda/lib/python3.6/site-packages/pandas/core/frame.py in _getitem_array(self, key)
   2175             return self._take(indexer, axis=0, convert=False)
   2176         else:
-> 2177             indexer = self.loc._convert_to_indexer(key, axis=1)
   2178             return self._take(indexer, axis=1, convert=True)
   2179 

~/anaconda/lib/python3.6/site-packages/pandas/core/indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1267                 if mask.any():
   1268                     raise KeyError('{mask} not in index'
-> 1269                                    .format(mask=objarr[mask]))
   1270 
   1271                 return _values_from_object(indexer)

KeyError: "['listing_lat' 'listing_lng'] not in index"
df_total
listing_bathrooms listing_bedrooms listing_beds listing_person_capacity listing_id listing_is_business_travel_ready listing_is_fully_refundable listing_is_host_highly_rated listing_is_rebookable listing_is_superhost listing_lat listing_lng listing_picture_count listing_preview_amenities listing_reviews_count listing_star_rating listing_tier_id pricing_quote_rate_with_service_fee_amount pricing_quote_rate_with_service_fee_currency pricing_quote_rate_type
0 1.0 1 1 2 2787889 False True False False True 50.941302 6.936276 24 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 165 4.5 0 76.0 USD nightly
1 1.0 1 1 1 24889964 False True False False False 50.932664 6.959956 9 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 1 NaN 0 37.0 USD nightly
2 1.0 0 1 2 23365913 False True True False True 50.921521 6.933439 12 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 36 5.0 0 44.0 USD nightly
3 1.0 1 2 3 277516 False True True False False 50.948113 6.950925 19 Kitchen 69 4.5 0 70.0 USD nightly
4 1.0 2 3 6 151877 False True False False True 50.918680 6.962310 34 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 197 4.5 0 96.0 USD nightly
5 1.0 1 2 2 10851686 False True True False True 50.918086 6.961938 6 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 65 5.0 0 37.0 USD nightly
6 1.0 1 2 3 18425009 False True False False False 50.951706 6.956479 60 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 57 4.5 0 58.0 USD nightly
7 1.0 1 2 3 21246774 False True True False True 50.938014 6.941539 14 Wifi, Hair dryer 58 5.0 0 82.0 USD nightly
8 1.0 1 1 2 10612377 False True False False False 50.941124 6.999401 15 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 74 4.5 0 26.0 USD nightly
9 1.0 0 2 2 23447819 False True True False False 50.934467 6.973998 5 36 5.0 0 35.0 USD nightly
10 1.0 1 1 2 18263865 False True True False True 50.929644 6.942007 2 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 75 5.0 0 34.0 USD nightly
11 1.0 1 1 2 13881993 False True True False False 50.941947 6.941310 38 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 68 5.0 0 72.0 USD nightly
12 1.5 1 1 3 4974414 False True True False True 50.927488 6.955648 28 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 147 5.0 0 67.0 USD nightly
13 1.0 1 1 2 15740991 False True True False False 50.934543 6.945216 7 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 66 4.5 0 35.0 USD nightly
14 1.0 0 3 7 22464711 False True True False True 50.930502 6.981885 38 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 46 5.0 0 22.0 USD nightly
15 1.0 1 1 2 3291488 False True False False True 50.925255 6.928739 10 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 66 4.5 0 35.0 USD nightly
16 1.0 1 1 2 5939146 False True False False False 50.939653 7.000305 5 Wifi 63 4.5 0 40.0 USD nightly
17 1.0 1 1 2 21429877 False True True False False 50.908204 6.924854 7 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 28 5.0 0 35.0 USD nightly
0 1.0 1 1 2 2787889 False True False False True 50.941302 6.936276 24 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 165 4.5 0 76.0 USD nightly
1 1.0 1 1 1 24889964 False True False False False 50.932664 6.959956 9 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 1 NaN 0 37.0 USD nightly
2 1.0 0 1 2 23365913 False True True False True 50.921521 6.933439 12 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 36 5.0 0 44.0 USD nightly
3 1.0 1 2 3 277516 False True True False False 50.948113 6.950925 19 Kitchen 69 4.5 0 70.0 USD nightly
4 1.0 2 3 6 151877 False True False False True 50.918680 6.962310 34 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 197 4.5 0 96.0 USD nightly
5 1.0 1 2 2 10851686 False True True False True 50.918086 6.961938 6 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 65 5.0 0 37.0 USD nightly
6 1.0 1 2 3 18425009 False True False False False 50.951706 6.956479 60 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 57 4.5 0 58.0 USD nightly
7 1.0 1 2 3 21246774 False True True False True 50.938014 6.941539 14 Wifi, Hair dryer 58 5.0 0 82.0 USD nightly
8 1.0 1 1 2 10612377 False True False False False 50.941124 6.999401 15 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 74 4.5 0 26.0 USD nightly
9 1.0 0 2 2 23447819 False True True False False 50.934467 6.973998 5 36 5.0 0 35.0 USD nightly
10 1.0 1 1 2 18263865 False True True False True 50.929644 6.942007 2 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 75 5.0 0 34.0 USD nightly
11 1.0 1 1 2 13881993 False True True False False 50.941947 6.941310 38 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 68 5.0 0 72.0 USD nightly
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
12 1.0 0 2 2 22780131 False True False False False 50.933009 6.950762 10 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 42 4.5 0 35.0 USD nightly
13 1.0 1 1 2 16669399 False True True False False 50.953993 6.952722 5 Wifi, Hair dryer, Laptop friendly workspace 151 5.0 0 23.0 USD nightly
14 1.0 1 1 2 2434296 False True True False True 50.949020 6.961224 23 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 250 5.0 0 58.0 USD nightly
15 1.0 1 3 3 611898 False True True False False 50.928164 6.943894 8 Wifi, Hair dryer 191 5.0 0 25.0 USD nightly
16 1.0 0 3 5 21360894 False True True False True 50.932803 7.001185 17 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 70 5.0 0 23.0 USD nightly
17 1.0 1 2 3 15441038 False True True False True 50.948075 6.951266 21 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 101 5.0 0 58.0 USD nightly
0 1.0 1 2 3 173230 False True True False True 50.949734 6.960503 24 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 299 4.5 0 62.0 USD nightly
1 1.0 1 3 5 1050191 False True False False False 50.930537 6.938651 17 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 269 4.5 0 35.0 USD nightly
2 1.0 0 1 2 22107173 False True True False True 50.939449 6.936492 15 Wifi, Kitchen, Hair dryer 69 5.0 0 53.0 USD nightly
3 1.0 1 1 2 13821383 False True False False False 50.921376 6.951496 6 Wifi, Kitchen 264 4.5 0 53.0 USD nightly
4 1.0 1 1 2 17401432 False True False False False 50.911821 6.943862 25 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 166 4.5 0 30.0 USD nightly
5 2.0 3 6 6 571118 False True True False True 50.935100 6.863792 55 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 305 5.0 0 32.0 USD nightly
6 1.0 1 1 2 1455896 False True True False False 50.933412 6.932869 15 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 350 4.5 0 50.0 USD nightly
7 1.0 1 1 2 7977357 False True True False False 50.931095 6.934439 22 Wifi, Kitchen, Hair dryer 289 4.5 0 29.0 USD nightly
8 1.0 1 1 2 3452377 False True False False False 50.920560 6.911326 21 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 232 4.5 0 41.0 USD nightly
9 1.0 1 3 3 19353511 False True True False False 50.921005 6.962108 18 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 89 5.0 0 46.0 USD nightly
10 1.0 1 1 2 4534029 False True False False False 50.948891 6.955226 11 Wifi 193 4.5 0 46.0 USD nightly
11 1.0 1 1 2 4562588 False True False False True 50.949547 6.924072 11 Wifi, Kitchen, Hair dryer 270 4.5 0 42.0 USD nightly
0 1.0 1 2 3 173230 False True True False True 50.949734 6.960503 24 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 299 4.5 0 62.0 USD nightly
1 1.0 1 3 5 1050191 False True False False False 50.930537 6.938651 17 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 269 4.5 0 35.0 USD nightly
2 1.0 0 1 2 22107173 False True True False True 50.939449 6.936492 15 Wifi, Kitchen, Hair dryer 69 5.0 0 53.0 USD nightly
3 1.0 1 1 2 13821383 False True False False False 50.921376 6.951496 6 Wifi, Kitchen 264 4.5 0 53.0 USD nightly
4 1.0 1 1 2 17401432 False True False False False 50.911821 6.943862 25 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 166 4.5 0 30.0 USD nightly
5 2.0 3 6 6 571118 False True True False True 50.935100 6.863792 55 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 305 5.0 0 32.0 USD nightly
6 1.0 1 1 2 1455896 False True True False False 50.933412 6.932869 15 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 350 4.5 0 50.0 USD nightly
7 1.0 1 1 2 7977357 False True True False False 50.931095 6.934439 22 Wifi, Kitchen, Hair dryer 289 4.5 0 29.0 USD nightly
8 1.0 1 1 2 3452377 False True False False False 50.920560 6.911326 21 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 232 4.5 0 41.0 USD nightly
9 1.0 1 3 3 19353511 False True True False False 50.921005 6.962108 18 Wifi, Kitchen, Hair dryer, Laptop friendly wor... 89 5.0 0 46.0 USD nightly
10 1.0 1 1 2 4534029 False True False False False 50.948891 6.955226 11 Wifi 193 4.5 0 46.0 USD nightly
11 1.0 1 1 2 4562588 False True False False True 50.949547 6.924072 11 Wifi, Kitchen, Hair dryer 270 4.5 0 42.0 USD nightly

9060 rows × 20 columns

df_total.to_csv('data/cologne.csv')