import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/AB_NYC_2019.csv')

df.rename(columns={'host_id': 'host id'}, inplace=True)
df.rename(columns={'host_name': 'host name'}, inplace=True)
df.rename(columns={'neighbourhood_group': 'neighbourhood group'}, inplace=True)
df.rename(columns={'room_type': 'room type'}, inplace=True)
df.rename(columns={'minimum_nights': 'minimum nights'}, inplace=True)
df.rename(columns={'number_of_reviews': 'number of reviews'}, inplace=True)
df.rename(columns={'last_review': 'last review'}, inplace=True)
df.rename(columns={'reviews_per_month': 'reviews per month'}, inplace=True)
df.rename(columns={'calculated_host_listings_count': 'calculated host listings count'}, inplace=True)
df.rename(columns={'availability_365': 'availability 365'}, inplace=True)
df.head()

df.shape

(48895, 16)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48895 entries, 0 to 48894
Data columns (total 16 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              48895 non-null  int64  
 1   name                            48879 non-null  object 
 2   host id                         48895 non-null  int64  
 3   host name                       48874 non-null  object 
 4   neighbourhood group             48895 non-null  object 
 5   neighbourhood                   48895 non-null  object 
 6   latitude                        48895 non-null  float64
 7   longitude                       48895 non-null  float64
 8   room type                       48895 non-null  object 
 9   price                           48895 non-null  int64  
 10  minimum nights                  48895 non-null  int64  
 11  number of reviews               48895 non-null  int64  
 12  last review                     38843 non-null  object 
 13  reviews per month               38843 non-null  float64
 14  calculated host listings count  48895 non-null  int64  
 15  availability 365                48895 non-null  int64  
dtypes: float64(3), int64(7), object(6)
memory usage: 6.0+ MB

df.head()

df.duplicated().sum()

0

na_count = df.isna().sum()
na_ratio = df.isna().sum()/len(df)*100

nulls_df = pd.DataFrame({
    'Missing Count': na_count,
    'Missing Ratio': na_ratio
})

nulls_df

sns.heatmap(df.isnull())

<Axes: >

df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/AB_NYC_2019.csv')

print('Number of null values is :', df.isnull().sum().sum())

Number of null values is : 20141

non_numeric_columns = df.select_dtypes(exclude=['number']).columns

# Fill missing values for non-numeric columns with mode
df.loc[:, non_numeric_columns]=df[non_numeric_columns].fillna(df.mode().iloc[0])

print('Number of null values is :', df.isnull().sum().sum())

Number of null values is : 10052

numeric_columns = df.select_dtypes(include=['number']).columns

# Create a copy of the subset before filling missing values
# Filling missing values for numeric columns with mean
df.loc[:, numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].mean())

print('Number of null values is :', df.isnull().sum().sum())

Number of null values is : 0

#Now if we plot it, there should be no missing values
sns.heatmap(df.isnull())

<Axes: >

df.duplicated().sum()
# df.drop_duplicates(inplace=True)

0

df=pd.read_csv('/Users/mekki/Python_Projects_Datasets/AB_NYC_2019.csv')

df.shape #16 columns

(48895, 16)

df.rename(columns={'host_id': 'host id'}, inplace=True)
df.rename(columns={'host_name': 'host name'}, inplace=True)
df.rename(columns={'neighbourhood_group': 'neighbourhood group'}, inplace=True)
df.rename(columns={'room_type': 'room type'}, inplace=True)
df.rename(columns={'minimum_nights': 'minimum nights'}, inplace=True)
df.rename(columns={'number_of_reviews': 'number of reviews'}, inplace=True)
df.rename(columns={'last_review': 'last review'}, inplace=True)
df.rename(columns={'reviews_per_month': 'reviews per month'}, inplace=True)
df.rename(columns={'calculated_host_listings_count': 'calculated host listings count'}, inplace=True)
df.rename(columns={'availability_365': 'availability 365'}, inplace=True)

droplist=["host name","neighbourhood","latitude","longitude","room type","calculated host listings count","reviews per month"]
df2=df.drop(droplist, axis="columns")
df2.shape
#Dropped 7 columns as shape goes from 16 columns to 9 columns

(48895, 9)

df2.head(1)

df2.shape

(48895, 9)

df2.dropna(inplace=True)
df3=df2.dropna()
df3.shape

(38837, 9)

df3.head()

#Making sure that all data in 'last_review' are in the correct format
df3.loc[:,'last review']=pd.to_datetime(df3['last review']).dt.date
df3.head()

df4 = df3.query('price < 101 and `minimum nights` < 3 and `number of reviews` > 99 and `availability 365` > 179')
df4.shape

(554, 9)

df4.head()

df4.sort_values(by=["neighbourhood group", "last review"], inplace=True, ascending=[True, True])
df5 = df4.copy()
df5.head()

df4.sort_values(by=["neighbourhood group","price"], inplace=True, ascending=[True, True])
promotion=df4.copy()
promotion.head()

grouped_counts = promotion.groupby('neighbourhood group').size().reset_index(name='count')
grouped_counts = grouped_counts.sort_values(by='count', ascending=False)
grouped_counts.head()

data=pd.read_csv('/Users/mekki/Python_Projects_Datasets/AB_NYC_2019.csv')
data.shape

(48895, 16)

data

data.dropna(inplace=True)
data.drop_duplicates(inplace=True)
data.shape

(38821, 16)

data.rename(columns={'neighbourhood_group': 'neighbourhood group'}, inplace=True)
data.rename(columns={'minimum_nights': 'minimum nights'}, inplace=True)

data=data.query('price > 1000 & `minimum nights` <3')
data.shape

(47, 16)

data=data.query('`neighbourhood group`=="Brooklyn" or `neighbourhood group`=="Manhattan"')
data.shape

(46, 16)

data_final = data.copy()
data_final.sort_values(by=["neighbourhood group", "price"], inplace=True)
data_final.head()

fig, axes = plt.subplots(2, 1, figsize=(6, 12))
sns.barplot(data=data_final, x="neighbourhood group", y="price", hue='neighbourhood group', palette=['#cdb4db','#ffa3a5'], ax=axes[0])
axes[0].set_title('Average Prices of Units per Neighbourhood');
sns.histplot(data=data_final, x="neighbourhood group", y="price", cbar=True, color='#52b69a', ax=axes[1])
axes[1].set_title('Distribution of Prices within each Neighbourhood')

Text(0.5, 1.0, 'Distribution of Prices within each Neighbourhood')

	id	name	host id	host name	neighbourhood group	neighbourhood	latitude	longitude	room type	price	minimum nights	number of reviews	last review	reviews per month	calculated host listings count	availability 365
0	2539	Clean & quiet apt home by the park	2787	John	Brooklyn	Kensington	40.64749	-73.97237	Private room	149	1	9	2018-10-19	0.21	6	365
1	2595	Skylit Midtown Castle	2845	Jennifer	Manhattan	Midtown	40.75362	-73.98377	Entire home/apt	225	1	45	2019-05-21	0.38	2	355
2	3647	THE VILLAGE OF HARLEM....NEW YORK !	4632	Elisabeth	Manhattan	Harlem	40.80902	-73.94190	Private room	150	3	0	NaN	NaN	1	365
3	3831	Cozy Entire Floor of Brownstone	4869	LisaRoxanne	Brooklyn	Clinton Hill	40.68514	-73.95976	Entire home/apt	89	1	270	2019-07-05	4.64	1	194
4	5022	Entire Apt: Spacious Studio/Loft by central park	7192	Laura	Manhattan	East Harlem	40.79851	-73.94399	Entire home/apt	80	10	9	2018-11-19	0.10	1	0

	id	name	host id	host name	neighbourhood group	neighbourhood	latitude	longitude	room type	price	minimum nights	number of reviews	last review	reviews per month	calculated host listings count	availability 365
0	2539	Clean & quiet apt home by the park	2787	John	Brooklyn	Kensington	40.64749	-73.97237	Private room	149	1	9	2018-10-19	0.21	6	365
1	2595	Skylit Midtown Castle	2845	Jennifer	Manhattan	Midtown	40.75362	-73.98377	Entire home/apt	225	1	45	2019-05-21	0.38	2	355
2	3647	THE VILLAGE OF HARLEM....NEW YORK !	4632	Elisabeth	Manhattan	Harlem	40.80902	-73.94190	Private room	150	3	0	NaN	NaN	1	365
3	3831	Cozy Entire Floor of Brownstone	4869	LisaRoxanne	Brooklyn	Clinton Hill	40.68514	-73.95976	Entire home/apt	89	1	270	2019-07-05	4.64	1	194
4	5022	Entire Apt: Spacious Studio/Loft by central park	7192	Laura	Manhattan	East Harlem	40.79851	-73.94399	Entire home/apt	80	10	9	2018-11-19	0.10	1	0

	id	name	host id	neighbourhood group	price	minimum nights	number of reviews	last review	availability 365
0	2539	Clean & quiet apt home by the park	2787	Brooklyn	149	1	9	2018-10-19	365
1	2595	Skylit Midtown Castle	2845	Manhattan	225	1	45	2019-05-21	355
3	3831	Cozy Entire Floor of Brownstone	4869	Brooklyn	89	1	270	2019-07-05	194
4	5022	Entire Apt: Spacious Studio/Loft by central park	7192	Manhattan	80	10	9	2018-11-19	0
5	5099	Large Cozy 1 BR Apartment In Midtown East	7322	Manhattan	200	3	74	2019-06-22	129

	id	name	host id	neighbourhood group	price	minimum nights	number of reviews	last review	availability 365
0	2539	Clean & quiet apt home by the park	2787	Brooklyn	149	1	9	2018-10-19	365
1	2595	Skylit Midtown Castle	2845	Manhattan	225	1	45	2019-05-21	355
3	3831	Cozy Entire Floor of Brownstone	4869	Brooklyn	89	1	270	2019-07-05	194
4	5022	Entire Apt: Spacious Studio/Loft by central park	7192	Manhattan	80	10	9	2018-11-19	0
5	5099	Large Cozy 1 BR Apartment In Midtown East	7322	Manhattan	200	3	74	2019-06-22	129

	id	name	host id	neighbourhood group	price	minimum nights	number of reviews	last review	availability 365
3	3831	Cozy Entire Floor of Brownstone	4869	Brooklyn	89	1	270	2019-07-05	194
7	5178	Large Furnished Room Near B'way	8967	Manhattan	79	2	430	2019-06-24	220
13	6021	Wonderful Guest Bedroom in Manhattan for SINGLES	11528	Manhattan	85	2	113	2019-07-05	333
39	12048	LowerEastSide apt share shortterm 1	7549	Manhattan	40	1	214	2019-07-05	188
50	13808	Blue Room for 2 in Brownstone for $1350 monthly	54275	Brooklyn	80	1	112	2019-06-16	251

	Missing Count	Missing Ratio
id	0	0.000000
name	16	0.032723
host id	0	0.000000
host name	21	0.042949
neighbourhood group	0	0.000000
neighbourhood	0	0.000000
latitude	0	0.000000
longitude	0	0.000000
room type	0	0.000000
price	0	0.000000
minimum nights	0	0.000000
number of reviews	0	0.000000
last review	10052	20.558339
reviews per month	10052	20.558339
calculated host listings count	0	0.000000
availability 365	0	0.000000

	id	name	host id	neighbourhood group	price	minimum nights	number of reviews	last review	availability 365
11894	9260415	Comfy Room Near Bronx Zoo and NYBG!	48106825	Bronx	47	2	138	2019-05-20	353
11303	8772654	Friendly-colorful-cozy W/ breakfast	45990565	Bronx	33	2	103	2019-05-31	282
1578	715270	2 Beds/Queen & Full Beautiful Room 40 minsT.Sq...	3684360	Bronx	39	2	169	2019-06-12	306
22881	18540170	Great room across the street from the High bridge	128692351	Bronx	100	2	107	2019-06-14	344
18243	14301951	A beautiful cozy room with balcony	87370616	Bronx	75	2	110	2019-06-17	350

	id	name	host id	neighbourhood group	price	minimum nights	number of reviews	last review	availability 365
1965	858695	Very Large Private Room on quiet st	4494343	Bronx	30	2	291	2019-06-20	208
11303	8772654	Friendly-colorful-cozy W/ breakfast	45990565	Bronx	33	2	103	2019-05-31	282
1706	773041	Nice beautiful room In the Bronx	3684360	Bronx	38	1	187	2019-06-23	241
1578	715270	2 Beds/Queen & Full Beautiful Room 40 minsT.Sq...	3684360	Bronx	39	2	169	2019-06-12	306
15793	12774885	Spacious, cozy bedroom in a private home	69507287	Bronx	40	1	151	2019-07-01	338

	id	name	host_id	host_name	neighbourhood group	neighbourhood	latitude	longitude	room_type	price	minimum nights	number_of_reviews	last_review	reviews_per_month	calculated_host_listings_count	availability_365
23411	18948914	MODERN GARDEN 1BR IN NEWLY RENOVATED TOWNHOUSE	70804640	Akiko	Brooklyn	Bedford-Stuyvesant	40.68378	-73.95592	Entire home/apt	1067	2	64	2019-06-29	2.51	1	32
25064	20070296	Stunning Brooklyn Heights House, Manhattan 5 mins	5173627	Tara	Brooklyn	Brooklyn Heights	40.70136	-73.99372	Entire home/apt	1095	2	24	2019-03-24	1.01	1	240
10341	7915831	Lovely, Room/with Private bathroom	5719436	Leonie	Brooklyn	Flatbush	40.63982	-73.96634	Private room	1100	1	1	2016-09-11	0.03	1	365
2236	1056256	Beautiful eco triplex w/green roof. Free yoga/...	462379	Loretta	Brooklyn	Carroll Gardens	40.67881	-73.99379	Entire home/apt	1395	1	55	2019-06-02	0.73	2	362
3761	2261367	brooklyn 14 bedroom gated community	10416706	Tzvi	Brooklyn	Sea Gate	40.57645	-74.01065	Entire home/apt	1485	2	6	2019-06-30	0.24	1	260

	neighbourhood group	count
1	Brooklyn	231
2	Manhattan	143
3	Queens	133
0	Bronx	29
4	Staten Island	18