import pandas 

# Uses the built-in function to read in the 2019 data from the CSV file
df_2019 = pandas.read_csv("https://raw.githubusercontent.com/joshlavitz/joshlavitz.github.io/main/listings2019.csv")

df_2019 # Displays the dataframe


# Uses the built-in function to read in the 2020 data from the CSV file
df_2020 = pandas.read_csv("https://raw.githubusercontent.com/joshlavitz/joshlavitz.github.io/main/listings.csv")

df_2020 # Displays the dataframe


import json
import requests

# Using the requests library, we can get the data and then parse it as a json file with the json library
url = 'https://raw.githubusercontent.com/joshlavitz/joshlavitz.github.io/main/neighbourhoods.geojson'
county_geo = requests.get(url).json()


# Defines the dataframe to only be the columns we want to keep
df_2020 = df_2020[['id', 'neighbourhood_cleansed', 
                   'latitude', 'longitude', 
                   'room_type', 'number_of_reviews',
                   'review_scores_rating',
                   'amenities', 'price']]

# Renames the columns to have a shorter name
df_2020.rename(columns={'neighbourhood_cleansed':'neighborhood',
                        'number_of_reviews':'num_reviews',
                        'review_scores_rating':'rating'}, inplace=True) 

df_2020


# Drops unnecessary columns from the 2019 dataframe since we are mainly concerned about adding the 2019 prices as a column
df_2019 = df_2019[['id', 'price']] 

# Merges based on rentals with the same id
df = df_2020.merge(df_2019, how="inner", right_on=['id'], left_on=['id'])

# Renames the columns appropriately
df.rename(columns={'price_x':'price_2020', 'price_y':'price_2019'}, inplace=True)

df


# Iterates over every row
for index, row in df.iterrows():
    price_2020 = float(row['price_2020'][1:].replace(',','')) # Removes the $ in the beginning of the price 
    price_2019 = float(row['price_2019'][1:].replace(',',''))

    df.at[index, 'price_2020'] = price_2020
    df.at[index, 'price_2019'] = price_2019

df['price_2020'] = df['price_2020'].astype(int) # Ensures the column of prices are treated like floats
df['price_2019'] = df['price_2019'].astype(int) # Ensures the column of prices are treated like floats


df.head() # Displays the first 5 listings


# Iterates over every row 
for index, row in df.iterrows():
  # Removes the extraneous characters from the amenities list
  row['amenities'] = row['amenities'].replace('[','').replace(']','').replace('"','')

# Converts the string of words into an actual list
df['amenities'] = df.amenities.apply(lambda x: x[1:-1].split(','))
# Adds a column that contains the length of the list of amenities
df['num_amenities'] = [len(amen_list) for amen_list in df['amenities']]

df.head() # Displays the first 5 listings


df.dropna(inplace=True)


import folium
from folium.plugins import MarkerCluster
from folium import Marker

location_map = folium.Map(location=[42.3, -71.057083], zoom_start=12, tiles='cartodbpositron') # Defines the base map

clusters = MarkerCluster() 

# Iterates over ever listing in our table
for index, row in df.iterrows():
  # Adds a marker for the listing at the corresponding coordinates and showing the URL when clicked
  popup = 'https://www.airbnb.com/rooms/' + str(row['id'])
  clusters.add_child(Marker([row['latitude'], row['longitude']], popup=popup))
location_map.add_child(clusters) 

location_map # Displays the map with the marker clusters


from collections import Counter
import numpy as np
import plotly.express as px

# Defines an empty dictionary to keep track of the counts of for each amenity
all_amenities = {} 

# Iterates over every listing, adding to the counts for the respective amenities provided
for index, row in df.iterrows():
    all_amenities = Counter(all_amenities) + Counter(row['amenities'])

# Reads the amenities with the top 10 counts into a dataframe
most_amenities = all_amenities.most_common()[:10] 
amenities_df = pandas.DataFrame(most_amenities)
amenities_df.columns = ['Amenities', 'Frequency']

# Produces the bar plot of the top 10 amenities with their frequencies
amenities_df.sort_values(by='Frequency', inplace=True) # Sorts so that the most common amenity is at the top of the plot
fig = px.bar(amenities_df, x='Frequency', y='Amenities', orientation='h', title='10 Most Common Amenities Provided')
fig.show()


import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Box(y=df['price_2019'].values, name='2019')) # Box plot for 2019 prices
fig.add_trace(go.Box(y=df['price_2020'].values, name='2020')) # Box plot for 2020 prices

# Sets appropriate titles
fig.update_layout(showlegend=False, 
                  title='Distribution of Rental Price in 2019 and 2020',
                  xaxis_title='Year',
                  yaxis_title='Price ($)')

fig.show()


trim_df = df

# Removes outliers for the price columns for both years
for column in ['price_2019', 'price_2020']:
  # Calculates the IQR
  Q1 = trim_df[column].quantile(0.25)
  Q3 = trim_df[column].quantile(0.75)
  IQR = Q3 - Q1

  # Uses the IQR to remove outliers and update the dataframe
  trim_df = trim_df[trim_df[column] >= Q1 - IQR * 1.5] # Removes the lower outliers
  trim_df = trim_df[trim_df[column] <= Q3 + IQR * 1.5] # Removes the upper outliers

trim_df # Outputs the dataset without the major outliers


fig = go.Figure()
fig.add_trace(go.Box(y=trim_df['price_2019'].values, name='2019')) # Box plot for 2019 prices
fig.add_trace(go.Box(y=trim_df['price_2020'].values, name='2020')) # Box plot for 2020 prices

# Sets appropriate titles
fig.update_layout(showlegend=False, 
                  title='Distribution of Rental Price in 2019 and 2020',
                  xaxis_title='Year',
                  yaxis_title='Price ($)')

fig.show()


# Defines a function that draws the choropleth map for the specified year
def choropleth_map(year):
  # Calculates the average rental price for each neighborhood
  means = trim_df.groupby('neighborhood')['price_' + year].mean()

  # Defines the base map
  price_map = folium.Map(location=[42.3, -71.057083], tiles='cartodbpositron', zoom_start=11)

  # Defines the choropleth map's properties
  choropleth = folium.Choropleth(
      geo_data = county_geo, 
      data = means,
      key_on = 'feature.properties.neighbourhood',
      fill_color ='YlGnBu',
      fill_opacity = 0.7,
      line_opacity = 0.2,
      legend_name='Price ($)').add_to(price_map)

  choropleth.geojson.add_child(folium.features.GeoJsonTooltip(['neighbourhood'],labels=False))

  # Sets the title for the map
  text = 'Choropleth Map of Airbnb Prices in Boston (' + year + ')'
  title_html = '''<p align="center" style="font-size:18px">{}</p>'''.format(text)   
  price_map.get_root().html.add_child(folium.Element(title_html))

  display(price_map) # Displays the map


choropleth_map('2019') # Displays the choropleth map for 2019


choropleth_map('2020') # Displays the choropleth map for 2020


fig = go.Figure()

# Makes box plots for 2019 and then 2020
fig.add_trace(go.Box(y=trim_df['price_2019'], x=trim_df['room_type'], boxpoints=False, name='2019'))
fig.add_trace(go.Box(y=trim_df['price_2020'], x=trim_df['room_type'], boxpoints=False, name='2020'))

# Sets appropriate titles
fig.update_layout(boxmode='group',
                  title='Distribution of Rental Price for Different Room Types',
                  xaxis_title='Room Type',
                  yaxis_title='Price ($)')

fig.show()


from plotly.subplots import make_subplots

# Defines that we want a row of 3 subplots
fig = make_subplots(rows=1, cols=3, horizontal_spacing=0.1,
                    subplot_titles=("Price vs. # of Reviews", "Price vs. # of Amenities", "Price vs. Rating"))

# Plots price vs. # of reviews
fig.add_trace(go.Scatter(x=trim_df['num_reviews'], y=trim_df['price_2020'], mode='markers'), 
              row=1, col=1)

# Plots price vs. # of amenities
fig.add_trace(go.Scatter(x=trim_df['num_amenities'], y=trim_df['price_2020'], mode='markers'), 
              row=1, col=2)

# Plots price vs. rating
fig.add_trace(go.Scatter(x=trim_df['rating'], y=trim_df['price_2020'], mode='markers'), 
              row=1, col=3)

# Sets appropriate titles
fig.update_layout(showlegend=False)
fig.update_yaxes(title_text="Price ($)", row=1, col=1)
fig.update_xaxes(title_text="# of Reviews", row=1, col=1)
fig.update_xaxes(title_text="# of Amenities", row=1, col=2)
fig.update_xaxes(title_text="Rating", row=1, col=3)


from scipy import stats

# Performs the t-test and prints the result of the test
result = stats.ttest_rel(trim_df['price_2020'], trim_df['price_2019'])
print('Test Result: \n' + 
      't-statistic= ' + str(np.round(result.statistic, decimals=3)) + '\n' +
      'p-value= ' + str(result.pvalue) + ' ≈ ' + str(np.round(result.pvalue, decimals=3)) + '\n')

# Prints the mean prices in 2020 and 2019 for comparison
print("Mean 2019 Price ($): " + str(np.round(trim_df['price_2019'].mean(), decimals=2)))
print("Mean 2020 Price ($): " + str(np.round(trim_df['price_2020'].mean(), decimals=2)))

Test Result: 
t-statistic= -12.708
p-value= 2.1061464045654373e-35 ≈ 0.0

Mean 2019 Price ($): 131.15
Mean 2020 Price ($): 115.44


from statsmodels.formula.api import ols

# Defines that we want a multiple linear regression model to predict 2019 price based on the listed variables
model_2019 = ols('price_2019 ~ neighborhood + room_type + num_reviews + rating + num_amenities', data=trim_df)
model_2019 = model_2019.fit() # Fits the model 

# Repeats for predicting 2020 price
model_2020 = ols('price_2020 ~ neighborhood + room_type + num_reviews + rating + num_amenities', data=trim_df)
model_2020 = model_2020.fit()

# Prints out the computed R squared value and f-test p value of both modles
print('2019 model:\n' + 
      'R-squared value: ' + str(np.round(model_2019.rsquared, decimals=3)) + '\n'
      'F-test p-value: ' + str(model_2019.f_pvalue) + ' ≈ ' + str(np.round(model_2019.f_pvalue, decimals=3)) + '\n')

print('2020 model:\n' + 
      'R-squared value: ' + str(np.round(model_2020.rsquared, decimals=3)) + '\n'
      'F-test p-value: ' + str(model_2020.f_pvalue) + ' ≈ ' + str(np.round(model_2020.f_pvalue, decimals=3)))

2019 model:
R-squared value: 0.539
F-test p-value: 1.570599321055285e-253 ≈ 0.0

2020 model:
R-squared value: 0.385
F-test p-value: 5.208932825861506e-152 ≈ 0.0


model_2020.summary() # Displays the full summary for the 2020 model


p_values = model_2020.pvalues # List of the p-values for the coefficients
alpha = 0.10 # Defines significance level
significant_predictors = [] # Will store the list of significant predictors

# Iterates over every predictor
for predictor in p_values.index:
  # Adds predictor to list if it is significant
  if p_values[predictor] < alpha:
    significant_predictors.append(predictor)

# Prints out the rounded coefficients for each variable of the 2020 model in sorted order
model_2020.params[significant_predictors].sort_values().round(decimals=2)

room_type[T.Shared room]                  -102.29
room_type[T.Private room]                  -56.04
neighborhood[T.Hyde Park]                  -20.43
num_reviews                                 -0.06
rating                                       0.94
num_amenities                                1.32
neighborhood[T.Dorchester]                  13.05
neighborhood[T.Downtown]                    13.82
neighborhood[T.Jamaica Plain]               19.13
neighborhood[T.Beacon Hill]                 22.27
neighborhood[T.East Boston]                 24.71
neighborhood[T.South End]                   25.99
neighborhood[T.Back Bay]                    27.52
neighborhood[T.Mission Hill]                27.87
neighborhood[T.South Boston]                34.64
neighborhood[T.Bay Village]                 37.37
neighborhood[T.Chinatown]                   44.16
neighborhood[T.North End]                   51.16
neighborhood[T.Charlestown]                 51.54
neighborhood[T.Fenway]                      53.24
neighborhood[T.West End]                    58.15
neighborhood[T.South Boston Waterfront]     64.40
room_type[T.Hotel room]                     68.67
dtype: float64


# Defines that we want a multiple linear regression model to predict 2019 price based on the listed variables
# The *'s instead of +'s indicate that we want the variables to interact with each other in the model
model_2019 = ols('price_2019 ~ neighborhood*room_type*num_reviews*rating*num_amenities', data=trim_df)
model_2019 = model_2019.fit() # Fits the model 

# Repeats for predicting 2020 price
model_2020 = ols('price_2020 ~ neighborhood*room_type*num_reviews*rating*num_amenities', data=trim_df)
model_2020 = model_2020.fit()

# Prints out the computed R squared value and f-test p value of both models
print('2019 model:\n' + 
      'R-squared value: ' + str(np.round(model_2019.rsquared, decimals=3)) + '\n'
      'F-test p-value: ' + str(model_2019.f_pvalue) + ' ≈ ' + str(np.round(model_2019.f_pvalue, decimals=3)) + '\n')

print('2020 model:\n' + 
      'R-squared value: ' + str(np.round(model_2020.rsquared, decimals=3)) + '\n'
      'F-test p-value: ' + str(model_2020.f_pvalue) + ' ≈ ' + str(np.round(model_2020.f_pvalue, decimals=3)))

2019 model:
R-squared value: 0.668
F-test p-value: 5.255463015621847e-162 ≈ 0.0

2020 model:
R-squared value: 0.572
F-test p-value: 1.0085380063186761e-100 ≈ 0.0

	id	listing_url	scrape_id	last_scraped	name	summary	space	description	experiences_offered	neighborhood_overview	...	instant_bookable	is_business_travel_ready	cancellation_policy	require_guest_profile_picture	require_guest_phone_verification	calculated_host_listings_count	calculated_host_listings_count_entire_homes	calculated_host_listings_count_private_rooms	calculated_host_listings_count_shared_rooms	reviews_per_month
0	3781	https://www.airbnb.com/rooms/3781	20191018230017	2019-10-19	HARBORSIDE-Walk to subway	Fully separate apartment in a two apartment bu...	This is a totally separate apartment located o...	Fully separate apartment in a two apartment bu...	none	Mostly quiet ( no loud music, no crowed sidewa...	...	f	f	super_strict_30	f	f	2	2	0	0	0.29
1	5506	https://www.airbnb.com/rooms/5506	20191018230017	2019-10-19	$99 Special Private! Minutes to center!	Private guest room with private bath, You do n...	THE BEST Value in BOSTON!!* PRIVATE GUEST ...	Private guest room with private bath, You do n...	none	Peacful, Architecturally interesting, historic...	...	t	f	strict_14_with_grace_period	f	f	6	6	0	0	0.80
2	6695	https://www.airbnb.com/rooms/6695	20191018230017	2019-10-19	$99 Special!! Home Away! Condo	NaN	WELCOME * FULL PRIVATE APARTMENT In a His...	WELCOME * FULL PRIVATE APARTMENT In a His...	none	Peaceful, Architecturally interesting, histori...	...	t	f	strict_14_with_grace_period	f	f	6	6	0	0	0.89
3	6976	https://www.airbnb.com/rooms/6976	20191018230017	2019-10-19	Mexican Folk Art Showcase in Boston Neighborhood	Come stay with me in Boston's Roslindale neigh...	This is a well-maintained, two-family house bu...	Come stay with me in Boston's Roslindale neigh...	none	The LOCATION: Roslindale is a safe and diverse...	...	f	f	moderate	t	f	1	0	1	0	0.66
4	8789	https://www.airbnb.com/rooms/8789	20191018230017	2019-10-18	Curved Glass Studio/1bd facing Park	Bright, 1 bed with curved glass windows facing...	Fully Furnished studio with enclosed bedroom. ...	Bright, 1 bed with curved glass windows facing...	none	Beacon Hill is a historic neighborhood filled ...	...	f	f	strict_14_with_grace_period	f	f	10	10	0	0	0.38
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
5642	39461104	https://www.airbnb.com/rooms/39461104	20191018230017	2019-10-19	Convenient North End Studio w/ W/D + Gym near ...	Show up and start living from day one in Bosto...	Gorgeous furniture, fully-equipped kitchen, sm...	Show up and start living from day one in Bosto...	none	This furnished apartment is located in the Nor...	...	t	f	flexible	f	f	92	92	0	0	NaN
5643	39461138	https://www.airbnb.com/rooms/39461138	20191018230017	2019-10-19	Equipped North End Studio w/ W/D (BOS128)	Show up and start living from day one in Bosto...	Gorgeous furniture, fully-equipped kitchen, sm...	Show up and start living from day one in Bosto...	none	This furnished apartment is located in the Nor...	...	t	f	flexible	f	f	92	92	0	0	NaN
5644	39461190	https://www.airbnb.com/rooms/39461190	20191018230017	2019-10-19	Comfy North End Studio w/ Doorman + W/D near T...	Show up and start living from day one in Bosto...	Thoughtfully designed with bespoke finishes, m...	Show up and start living from day one in Bosto...	none	This furnished apartment is located in the Nor...	...	t	f	flexible	f	f	92	92	0	0	NaN
5645	39461223	https://www.airbnb.com/rooms/39461223	20191018230017	2019-10-19	Bespoke North End Studio w/ Gym + W/D near Nor...	Discover the best of Boston, with this studio ...	Thoughtfully designed with bespoke finishes, m...	Discover the best of Boston, with this studio ...	none	This furnished apartment is located in the Nor...	...	t	f	flexible	f	f	92	92	0	0	NaN
5646	39462969	https://www.airbnb.com/rooms/39462969	20191018230017	2019-10-19	Your Home in Back Bay!	Located on the corner of Gloucester & Newbury ...	The apartment is on the third floor - and ther...	Located on the corner of Gloucester & Newbury ...	none	The neighborhood is just fantastic! Five minut...	...	f	f	flexible	f	f	1	1	0	0	NaN

	id	listing_url	scrape_id	last_scraped	name	description	neighborhood_overview	picture_url	host_id	host_url	...	review_scores_communication	review_scores_location	review_scores_value	license	instant_bookable	calculated_host_listings_count	calculated_host_listings_count_entire_homes	calculated_host_listings_count_private_rooms	calculated_host_listings_count_shared_rooms	reviews_per_month
0	3781	https://www.airbnb.com/rooms/3781	20201024170420	2020-10-24	HARBORSIDE-Walk to subway	Fully separate apartment in a two apartment bu...	Mostly quiet ( no loud music, no crowed sidewa...	https://a0.muscache.com/pictures/24670/b2de044...	4804	https://www.airbnb.com/users/show/4804	...	10.0	10.0	10.0	NaN	f	1	1	0	0	0.26
1	5506	https://www.airbnb.com/rooms/5506	20201024170420	2020-10-24	$49 Special Private! Minutes to center!	Private guest room with private bath, You do n...	Peacful, Architecturally interesting, historic...	https://a0.muscache.com/pictures/1598e8b6-5a55...	8229	https://www.airbnb.com/users/show/8229	...	10.0	9.0	10.0	Exempt: This listing is a unit that has contra...	f	6	6	0	0	0.76
2	6695	https://www.airbnb.com/rooms/6695	20201024170420	2020-10-24	$99 Special!! Home Away! Condo	Comfortable, Fully Equipped private apartment...	Peaceful, Architecturally interesting, histori...	https://a0.muscache.com/pictures/38ac4797-e7a4...	8229	https://www.airbnb.com/users/show/8229	...	10.0	9.0	10.0	STR-404620	f	6	6	0	0	0.84
3	10730	https://www.airbnb.com/rooms/10730	20201024170420	2020-10-24	Bright 1bed facing Golden Dome	Bright, spacious unit, new galley kitchen, new...	Beacon Hill is located downtown and is conveni...	https://a0.muscache.com/pictures/miso/Hosting-...	26988	https://www.airbnb.com/users/show/26988	...	10.0	10.0	9.0	NaN	f	7	7	0	0	0.24
4	10813	https://www.airbnb.com/rooms/10813	20201024170420	2020-10-24	Back Bay Apt-blocks to subway, Newbury St, The...	Stunning Back Bay furnished studio apartment. ...	Wander around this quintessential neighborhood...	https://a0.muscache.com/pictures/20b5b9c9-e1f4...	38997	https://www.airbnb.com/users/show/38997	...	10.0	10.0	10.0	NaN	f	11	11	0	0	0.94
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
3249	46021420	https://www.airbnb.com/rooms/46021420	20201024170420	2020-10-24	Stunning 1BR in Downtown + 100 WalkScore \| Evo...	Whether you are just getting away for the week...	Downtown’s Theater District bustles with energ...	https://a0.muscache.com/pictures/956c6254-61ea...	212359760	https://www.airbnb.com/users/show/212359760	...	NaN	NaN	NaN	Exempt: This listing is a unit used for furnis...	t	43	43	0	0	NaN
3250	46021809	https://www.airbnb.com/rooms/46021809	20201024170420	2020-10-24	Spacious and Modern 2BD in the Heart of Boston	This is a modern 2 bed in The Heart of Boston<...	NaN	https://a0.muscache.com/pictures/70f28a8d-36e0...	2356643	https://www.airbnb.com/users/show/2356643	...	NaN	NaN	NaN	NaN	t	11	11	0	0	NaN
3251	46022872	https://www.airbnb.com/rooms/46022872	20201024170420	2020-10-24	Room in Large Brookline House, Phenomenal Loca...	Room A in 7 Bed, 3 Bath<br />Extremely spaciou...	Just off Harvard Ave, connecting Packards Corn...	https://a0.muscache.com/pictures/2114bef5-443a...	373050156	https://www.airbnb.com/users/show/373050156	...	NaN	NaN	NaN	NaN	t	2	0	2	0	NaN
3252	46024344	https://www.airbnb.com/rooms/46024344	20201024170420	2020-10-24	Furnished Room, Big Brookline House, Top Location	Room C in 7 Bed, 3 Bath apartment<br />Extreme...	Just off Harvard Ave, connecting Packards Corn...	https://a0.muscache.com/pictures/2114bef5-443a...	373050156	https://www.airbnb.com/users/show/373050156	...	NaN	NaN	NaN	NaN	t	2	0	2	0	NaN
3253	46025053	https://www.airbnb.com/rooms/46025053	20201024170420	2020-10-24	A place of your own \| Studio in Boston	Stay for 30+ nights (minimum nights and rates ...	NaN	https://a0.muscache.com/pictures/8860911a-df51...	359229620	https://www.airbnb.com/users/show/359229620	...	NaN	NaN	NaN	NaN	t	177	177	0	0	NaN

	id	neighborhood	latitude	longitude	room_type	num_reviews	rating	amenities	price
0	3781	East Boston	42.364130	-71.029910	Entire home/apt	17	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$150.00
1	5506	Roxbury	42.329810	-71.095590	Entire home/apt	107	95.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$145.00
2	6695	Roxbury	42.329940	-71.093510	Entire home/apt	115	96.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$169.00
3	10730	Downtown	42.358400	-71.061850	Entire home/apt	32	96.0	["Cable TV", "Smoke alarm", "TV", "Bed linens"...	$81.00
4	10813	Back Bay	42.350610	-71.087870	Entire home/apt	10	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$87.00
...	...	...	...	...	...	...	...	...	...
3249	46021420	Beacon Hill	42.353290	-71.065380	Entire home/apt	0	NaN	["Shower gel", "Shampoo", "Smoke alarm", "TV",...	$239.00
3250	46021809	Roxbury	42.330500	-71.071270	Entire home/apt	0	NaN	["Air conditioning", "Heating", "Laptop-friend...	$47.00
3251	46022872	Allston	42.347372	-71.130569	Private room	0	NaN	["Hangers", "Heating", "Laptop-friendly worksp...	$44.00
3252	46024344	Allston	42.348080	-71.129930	Private room	0	NaN	["Hangers", "Heating", "Laptop-friendly worksp...	$44.00
3253	46025053	East Boston	42.371010	-71.043770	Entire home/apt	0	NaN	["BBQ grill", "Shampoo", "Smoke alarm", "TV", ...	$147.00

	id	neighborhood	latitude	longitude	room_type	num_reviews	rating	amenities	price_2020	price_2019
0	3781	East Boston	42.36413	-71.02991	Entire home/apt	17	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$150.00	$125.00
1	5506	Roxbury	42.32981	-71.09559	Entire home/apt	107	95.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$145.00	$145.00
2	6695	Roxbury	42.32994	-71.09351	Entire home/apt	115	96.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$169.00	$169.00
3	10730	Downtown	42.35840	-71.06185	Entire home/apt	32	96.0	["Cable TV", "Smoke alarm", "TV", "Bed linens"...	$81.00	$150.00
4	10813	Back Bay	42.35061	-71.08787	Entire home/apt	10	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	$87.00	$179.00
...	...	...	...	...	...	...	...	...	...	...
2053	39445807	Back Bay	42.34645	-71.07803	Entire home/apt	2	100.0	["Shampoo", "Smoke alarm", "TV", "Bed linens",...	$125.00	$200.00
2054	39446774	Back Bay	42.34663	-71.07915	Entire home/apt	1	100.0	["Shower gel", "Cable TV", "Shampoo", "Smoke a...	$148.00	$245.00
2055	39447297	Back Bay	42.34635	-71.07792	Entire home/apt	0	NaN	["Garden or backyard", "Shampoo", "Smoke alarm...	$148.00	$245.00
2056	39447462	Back Bay	42.34603	-71.07920	Entire home/apt	0	NaN	["Shampoo", "Smoke alarm", "TV", "Private entr...	$148.00	$245.00
2057	39447565	Back Bay	42.34834	-71.08152	Entire home/apt	1	100.0	["Shampoo", "Smoke alarm", "TV", "Baking sheet...	$148.00	$245.00

	id	neighborhood	latitude	longitude	room_type	num_reviews	rating	amenities	price_2020	price_2019
0	3781	East Boston	42.36413	-71.02991	Entire home/apt	17	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	150	125
1	5506	Roxbury	42.32981	-71.09559	Entire home/apt	107	95.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	145	145
2	6695	Roxbury	42.32994	-71.09351	Entire home/apt	115	96.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	169	169
3	10730	Downtown	42.35840	-71.06185	Entire home/apt	32	96.0	["Cable TV", "Smoke alarm", "TV", "Bed linens"...	81	150
4	10813	Back Bay	42.35061	-71.08787	Entire home/apt	10	99.0	["Cable TV", "Shampoo", "Smoke alarm", "TV", "...	87	179

Analyzing the Prices of Boston Airbnb Rentals: What Affects Prices and Have Prices Changed Since the Pandemic?¶

Introduction

Python Libraries¶

Data Collection

Listings Data¶

GeoJSON Data¶

Data Cleaning and Curation

Dropping Unnecessary Columns¶

Combining the Dataframes into One¶

Converting Price Column to Int¶

Adding a Column for Number of Amenities¶

Dropping Missing Values¶

Exploratory Data Analysis

General Exploration¶

Plotting the Locations of Rentals¶

What Amenities are Most Common?¶

Exploring Price¶

Visualizing the Distribution of Prices¶

Excluding Outliers¶

Does Price Vary Depending on the Neighborhood?¶

Does Price Vary Depending on Room Type?¶

Does Price Vary Based on Other Variables?¶

Hypothesis Testing and Machine Learning

Are the prices in 2020 significantly different from 2019?¶

How Well Can We Predict Prices?¶

Basic Model¶

Comparing Regression Models from 2019 and 2020¶

Takeaways from 2020 Model for Predicting Prices¶

Can We Improve With an Interaction Model?¶

Conclusion

Dep. Variable:	price_2020	R-squared:	0.385
Model:	OLS	Adj. R-squared:	0.374
Method:	Least Squares	F-statistic:	34.70
Date:	Mon, 21 Dec 2020	Prob (F-statistic):	5.21e-152
Time:	09:17:12	Log-Likelihood:	-9027.8
No. Observations:	1692	AIC:	1.812e+04
Df Residuals:	1661	BIC:	1.829e+04
Df Model:	30
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Intercept	2.1291	18.099	0.118	0.906	-33.371	37.629
neighborhood[T.Back Bay]	27.5163	7.615	3.614	0.000	12.581	42.452
neighborhood[T.Bay Village]	37.3705	11.533	3.240	0.001	14.750	59.991
neighborhood[T.Beacon Hill]	22.2723	7.574	2.941	0.003	7.417	37.128
neighborhood[T.Brighton]	-1.3180	7.051	-0.187	0.852	-15.149	12.513
neighborhood[T.Charlestown]	51.5437	9.207	5.598	0.000	33.484	69.603
neighborhood[T.Chinatown]	44.1577	16.129	2.738	0.006	12.522	75.794
neighborhood[T.Dorchester]	13.0525	5.998	2.176	0.030	1.287	24.818
neighborhood[T.Downtown]	13.8168	7.179	1.925	0.054	-0.265	27.898
neighborhood[T.East Boston]	24.7075	7.144	3.458	0.001	10.695	38.720
neighborhood[T.Fenway]	53.2351	8.691	6.126	0.000	36.190	70.281
neighborhood[T.Hyde Park]	-20.4330	10.023	-2.039	0.042	-40.091	-0.775
neighborhood[T.Jamaica Plain]	19.1311	6.552	2.920	0.004	6.280	31.982
neighborhood[T.Leather District]	54.8785	50.976	1.077	0.282	-45.106	154.863
neighborhood[T.Longwood Medical Area]	-9.8361	36.202	-0.272	0.786	-80.842	61.170
neighborhood[T.Mattapan]	-6.0983	10.440	-0.584	0.559	-26.574	14.378
neighborhood[T.Mission Hill]	27.8666	10.265	2.715	0.007	7.732	48.001
neighborhood[T.North End]	51.1574	9.852	5.193	0.000	31.834	70.481
neighborhood[T.Roslindale]	6.0916	8.562	0.711	0.477	-10.702	22.885
neighborhood[T.Roxbury]	3.7661	6.502	0.579	0.563	-8.988	16.520
neighborhood[T.South Boston]	34.6359	7.453	4.648	0.000	20.019	49.253
neighborhood[T.South Boston Waterfront]	64.3980	19.859	3.243	0.001	25.446	103.350
neighborhood[T.South End]	25.9874	6.992	3.717	0.000	12.273	39.702
neighborhood[T.West End]	58.1533	13.648	4.261	0.000	31.384	84.923
neighborhood[T.West Roxbury]	6.2262	11.517	0.541	0.589	-16.364	28.816
room_type[T.Hotel room]	68.6711	12.257	5.603	0.000	44.630	92.712
room_type[T.Private room]	-56.0406	2.918	-19.203	0.000	-61.765	-50.316
room_type[T.Shared room]	-102.2883	19.421	-5.267	0.000	-140.380	-64.197
num_reviews	-0.0603	0.016	-3.693	0.000	-0.092	-0.028
rating	0.9422	0.182	5.176	0.000	0.585	1.299
num_amenities	1.3247	0.164	8.095	0.000	1.004	1.646

Omnibus:	304.363	Durbin-Watson:	1.796
Prob(Omnibus):	0.000	Jarque-Bera (JB):	547.118
Skew:	1.116	Prob(JB):	1.57e-119
Kurtosis:	4.668	Cond. No.	5.32e+03