# For this file we will just need pandas and seaborn libraries, for now lets import only pandas
import pandas as pd
# We import our file again
dropped = pd.read_csv("dropped.csv", engine="python")
# General checking if anything missing during the saving and reading
dropped.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 671205 entries, 0 to 671204 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 funded_amount 671205 non-null float64 1 loan_amount 671205 non-null float64 2 activity 671205 non-null object 3 sector 671205 non-null object 4 country_code 671197 non-null object 5 country 671205 non-null object 6 region 614405 non-null object 7 term_in_months 671205 non-null float64 8 lender_count 671205 non-null int64 9 repayment_interval 671205 non-null object 10 borrowers_total 666984 non-null float64 11 female 666984 non-null float64 12 male 666984 non-null float64 dtypes: float64(6), int64(1), object(6) memory usage: 66.6+ MB
# I also want to import the file with outliers and want to see the effect of outliers on pairplot
dropped_w_o = pd.read_csv("dropped_w_o.csv", engine="python")
# General info from our data frame with outliers
dropped_w_o.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 671205 entries, 0 to 671204 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 funded_amount 671205 non-null float64 1 loan_amount 671205 non-null float64 2 activity 671205 non-null object 3 sector 671205 non-null object 4 country_code 671197 non-null object 5 country 671205 non-null object 6 region 614405 non-null object 7 term_in_months 671205 non-null float64 8 lender_count 671205 non-null int64 9 repayment_interval 671205 non-null object 10 borrowers_total 666984 non-null float64 11 b_g_percent_female 666984 non-null float64 12 female 666984 non-null float64 13 male 666984 non-null float64 dtypes: float64(7), int64(1), object(6) memory usage: 71.7+ MB
# now we see the pairplot of the dataframe without outliers
import seaborn as sns
sns.pairplot(dropped)
<seaborn.axisgrid.PairGrid at 0x1d1544ae850>
# And here is the dataframe with outliers.
sns.pairplot(dropped_w_o)
<seaborn.axisgrid.PairGrid at 0x1d14e465850>