import pandas as pd
import numpy as np
df = pd.read_csv('/content/sample_data/california_housing_test.csv')
df = pd.DataFrame({'a':np.random.rand(10),
'b':np.random.randint(10, size=10),
'b':['London','Paris','New York','Istanbul',
op: <class 'pandas.core.frame.DataFrame'> RangeIndex: 3000 entries,
0 to 2999 Data columns (total 9 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 longitude 3000 non-null float64 1 latitude 3000 non-null float64 2 housing_median_age 3000 non-null float64 3 total_rooms 3000 non-null float64 4 total_bedrooms 3000 non-null float64 5 population 3000 non-null float64 6 households 3000 non-null float64 7 median_income 3000 non-null float64 8 median_house_value 3000 non-null float64 dtypes: float64(9) memory usage: 211.1 KB
df.dropna() #drops all null values rows
df.fillna(0) #fill NaN values with zeros
df.fillna().mean() #fills NaN values with mean
df.replace(np.nan, 0) #replace with mean
df.replace(np.nan, df.column.mean()) #replace with mean
longitude -118.300 latitude 34.260 housing_median_age 43.000 total_rooms 1510.000 total_bedrooms 310.000 population 809.000 households 277.000 median_income 3.599 median_house_value 176500.000 Name: 1, dtype: float64
0 3885.0
1 1510.0
2 3589.0
Name: total_rooms, dtype: float64
!pip install pandas-profiling
from pandas_profiling import ProfileReport
prof = ProfileReport(df)




