import pandas as pd


df_part1 = pd.read_csv("part1.csv", sep="/n", engine="python", nrows=2)
df_part1      # We see in this output that the seperator is ",", so we import our file without sep.  And the file has an index column.


df_part1 = pd.read_csv("part1.csv", engine="python", index_col=0)
df_part1.head(2)


df_part2 = pd.read_csv("part2.csv", sep="/n", engine="python", nrows=2)
df_part2      # We see in this output that the seperator is "#", so we import our file that way. And the file has an index column.


df_part2 = pd.read_csv("part2.csv", sep="#", engine="python", index_col=0)
df_part2.head(2)


df_part1.columns == df_part2.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True])


len(df_part1.index)+len(df_part2.index)

671205


df_funding = pd.concat([df_part1,df_part2])
len(df_funding.index)

671205


df_funding.reset_index(inplace=True)
df_funding.drop(columns="index", inplace=True)


df_funding.head(51)


df_funding.tail(51)


df_funding.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 671205 entries, 0 to 671204
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   funded_amount       671205 non-null  float64
 1   loan_amount         671205 non-null  float64
 2   activity            671205 non-null  object 
 3   sector              671205 non-null  object 
 4   use                 666973 non-null  object 
 5   country_code        671197 non-null  object 
 6   country             671205 non-null  object 
 7   region              614405 non-null  object 
 8   currency            671205 non-null  object 
 9   term_in_months      671205 non-null  float64
 10  lender_count        671205 non-null  int64  
 11  borrower_genders    666984 non-null  object 
 12  repayment_interval  671205 non-null  object 
dtypes: float64(3), int64(1), object(9)
memory usage: 66.6+ MB


df_funding.nunique()

funded_amount            610
loan_amount              479
activity                 163
sector                    15
use                   424912
country_code              86
country                   87
region                 12695
currency                  67
term_in_months           148
lender_count             503
borrower_genders       11298
repayment_interval         4
dtype: int64


df_funding.sector.unique()

array(['Food', 'Transportation', 'Arts', 'Services', 'Agriculture',
       'Manufacturing', 'Wholesale', 'Retail', 'Clothing', 'Construction',
       'Health', 'Education', 'Personal Use', 'Housing', 'Entertainment'],
      dtype=object)


df_funding.country.unique()

array(['Pakistan', 'India', 'Kenya', 'Nicaragua', 'El Salvador',
       'Tanzania', 'Philippines', 'Peru', 'Senegal', 'Cambodia',
       'Liberia', 'Vietnam', 'Iraq', 'Honduras', 'Palestine', 'Mongolia',
       'United States', 'Mali', 'Colombia', 'Tajikistan', 'Guatemala',
       'Ecuador', 'Bolivia', 'Yemen', 'Ghana', 'Sierra Leone', 'Haiti',
       'Chile', 'Jordan', 'Uganda', 'Burundi', 'Burkina Faso',
       'Timor-Leste', 'Indonesia', 'Georgia', 'Ukraine', 'Kosovo',
       'Albania', 'The Democratic Republic of the Congo', 'Costa Rica',
       'Somalia', 'Zimbabwe', 'Cameroon', 'Turkey', 'Azerbaijan',
       'Dominican Republic', 'Brazil', 'Mexico', 'Kyrgyzstan', 'Armenia',
       'Paraguay', 'Lebanon', 'Samoa', 'Israel', 'Rwanda', 'Zambia',
       'Nepal', 'Congo', 'Mozambique', 'South Africa', 'Togo', 'Benin',
       'Belize', 'Suriname', 'Thailand', 'Nigeria', 'Mauritania',
       'Vanuatu', 'Panama', 'Virgin Islands',
       'Saint Vincent and the Grenadines',
       "Lao People's Democratic Republic", 'Malawi', 'Myanmar (Burma)',
       'Moldova', 'South Sudan', 'Solomon Islands', 'China', 'Egypt',
       'Guam', 'Afghanistan', 'Madagascar', 'Namibia', 'Puerto Rico',
       'Lesotho', "Cote D'Ivoire", 'Bhutan'], dtype=object)


df_funding.country_code.unique()

array(['PK', 'IN', 'KE', 'NI', 'SV', 'TZ', 'PH', 'PE', 'SN', 'KH', 'LR',
       'VN', 'IQ', 'HN', 'PS', 'MN', 'US', 'ML', 'CO', 'TJ', 'GT', 'EC',
       'BO', 'YE', 'GH', 'SL', 'HT', 'CL', 'JO', 'UG', 'BI', 'BF', 'TL',
       'ID', 'GE', 'UA', 'XK', 'AL', 'CD', 'CR', 'SO', 'ZW', 'CM', 'TR',
       'AZ', 'DO', 'BR', 'MX', 'KG', 'AM', 'PY', 'LB', 'WS', 'IL', 'RW',
       'ZM', 'NP', 'CG', 'MZ', 'ZA', 'TG', 'BJ', 'BZ', 'SR', 'TH', 'NG',
       'MR', 'VU', 'PA', 'VI', 'VC', 'LA', 'MW', 'MM', 'MD', 'SS', 'SB',
       'CN', 'EG', 'GU', 'AF', 'MG', nan, 'PR', 'LS', 'CI', 'BT'],
      dtype=object)


df_funding.loc[df_funding.country_code.isna()==True, :]


df_funding.loc[df_funding.country_code.isna()==True, "country_code"] = "NA"


# We check it again if everything is alright
df_funding.nunique()

funded_amount            610
loan_amount              479
activity                 163
sector                    15
use                   424912
country_code              87
country                   87
region                 12695
currency                  67
term_in_months           148
lender_count             503
borrower_genders       11298
repayment_interval         4
dtype: int64


### Now lets focus on the columns "Term in Months" and "Borrower Genders"
df_funding.term_in_months.unique()
### it seems everything is ok, the more quantity was origined because of a big range of payment flexibility

array([ 12.,  11.,  43.,  14.,   4.,  13.,  10.,   8.,   5.,  20.,   7.,
         3.,  17.,  15.,   9.,  39.,  23.,  26.,   6.,  30.,  22.,  25.,
        27.,  16.,  52.,  18.,  36.,  19.,  28.,  62.,  32.,  24.,  21.,
        50.,  38.,   2.,  35.,  72., 137.,   1.,  49.,  33.,  42.,  29.,
        37.,  51., 113.,  79.,  31.,  44.,  74.,  34.,  48., 114.,  40.,
       124., 104.,  63.,  85.,  78.,  70.,  86.,  61.,  60.,  67.,  55.,
        53.,  41.,  68., 143.,  77., 130.,  45., 111., 134., 107., 142.,
       148.,  56., 122., 133., 141., 110.,  81., 106.,  54., 147., 112.,
        59., 145., 121., 109.,  80.,  47.,  97.,  75., 101., 128.,  98.,
        87.,  71.,  66.,  46., 125.,  76.,  73., 120., 144., 118., 131.,
        65., 108.,  58., 123.,  84.,  99.,  82.,  92.,  69.,  91.,  57.,
        90.,  93., 129.,  89.,  88.,  64., 126., 138., 158.,  83., 100.,
       105., 132.,  96., 127., 135.,  95., 154., 156.,  94., 115., 102.,
       116., 136., 103., 139., 146.])


df_funding.borrower_genders.unique()
# This columns seem to have lots of different unique values

array(['female', 'female, female', 'female, female, female', ...,
       'female, female, male, female, female, female, female, female, female, female, male, male, female, female, male, female, female, female, female, female, female, female',
       'male, female, female, female, female, female, female, female, male, male, female, male, female, male, male, male',
       'female, female, female, male, female, female, female, male, female, female, female, male, female, male, female, female, female, female, female, female, female, female, female, female, female, female, female, female, male'],
      dtype=object)


df_funding.loc[df_funding["borrower_genders"].isna(), "borrower_genders"]

140       NaN
145       NaN
170       NaN
412       NaN
414       NaN
         ... 
671151    NaN
671174    NaN
671178    NaN
671185    NaN
671202    NaN
Name: borrower_genders, Length: 4221, dtype: object


# There are two options in front of us,
# Delete the column of borrower_genders, or somehow make use of it and maybe get some valuable decision out of it.
# I say, lets parse the column into two other columns, which we can hold the info of the columns as integers

b_g_procent_female = []
borrowers_total = []
female_list = []
male_list = []
for i in df_funding["borrower_genders"]:
    female = 0
    male = 0
    total = 0
    try:
        if len(i) > 6:
            for j in i.split(", "):
                if j == "female":
                    female += 1
                if j == "male":
                    male += 1
        else:
            if len(i) == 6:
                female += 1
            else:
                male +=1
        b_g_procent_female.append(round(100*female / (female + male), 2) )
        borrowers_total.append(female + male)
        female_list.append(female)
        male_list.append(male)
    except:
        b_g_procent_female.append(np.nan)
        borrowers_total.append(np.nan)
        female_list.append(np.nan)
        male_list.append(np.nan)


df_funding["borrowers_total"]= borrowers_total
df_funding["b_g_percent_female"]= b_g_procent_female
df_funding["female"]= female_list
df_funding["male"]= male_list


df_funding.head(51)


df_funding.repayment_interval.unique()

array(['irregular', 'bullet', 'monthly', 'weekly'], dtype=object)


# This line proves each row with value "0" in lender_count column, corresponds to the row with the value "0" in "funded_amount" row
(df_funding.loc[(df_funding.loc[:,"lender_count"]== 0), "lender_count"] != df_funding.loc[(df_funding.loc[:,"funded_amount"]== 0), "lender_count"]).sum()

0


# There are no 0 value for "loan_amount" column
df_funding.loc[(df_funding.loc[:,"loan_amount"]== 0), "loan_amount"]

Series([], Name: loan_amount, dtype: float64)


df_dropped = df_funding.drop(columns=["use", "currency", "borrower_genders", "b_g_percent_female"])
df_dropped_with_outliers = df_funding.drop(columns=["use", "currency", "borrower_genders"])


df_dropped.describe().T


import seaborn as sns
sns.boxplot(x=df_dropped.funded_amount)

<AxesSubplot:xlabel='funded_amount'>


# Calculate the Interquartile portion
Q1 = df_dropped.funded_amount.quantile(0.25)
Q3 = df_dropped.funded_amount.quantile(0.75)
IQR = Q3 - Q1
Q1, Q3

(250.0, 900.0)


# Then the limits of the boxplot
lower_limit = Q1 - 1.5*IQR
upper_limit = Q3 + 1.5*IQR
lower_limit, upper_limit

(-725.0, 1875.0)


#Checking the upper outliers in the column "funded_amount"
df_dropped.loc[df_dropped.loc[:,"funded_amount"] >upper_limit, "funded_amount"]

35        2225.0
50        3175.0
59        3175.0
62        4275.0
70        2000.0
           ...  
671105    2600.0
671108    4025.0
671110    2700.0
671115    2650.0
671134    3525.0
Name: funded_amount, Length: 55112, dtype: float64


# 26000 rows is better than 55000 to go, so we increase the upper outlier limit to 3000
df_dropped.loc[df_dropped.loc[:,"funded_amount"] > 3000, "funded_amount"]

50        3175.0
59        3175.0
62        4275.0
85        3175.0
86        3050.0
           ...  
670843    4300.0
670997    6025.0
671011    4925.0
671108    4025.0
671134    3525.0
Name: funded_amount, Length: 26025, dtype: float64


# because our lower outlier limit is minus, there are no lower outliers in the column "funded amount"
df_dropped.loc[df_dropped.loc[:,"funded_amount"] < lower_limit, "funded_amount"]

Series([], Name: funded_amount, dtype: float64)


# Now we can permanentyly change the column by pressing down the values
upper_limit = 3000
df_dropped['funded_amount'] = df_dropped['funded_amount'].apply(lambda x: upper_limit if x > upper_limit else x)


#Checking the upper outliers in the column "funded_amount", this time there are no outliers
df_dropped.loc[df_dropped.loc[:,"funded_amount"] >upper_limit, "funded_amount"]

Series([], Name: funded_amount, dtype: float64)


#we are checking the outliers in funded_amount again, now all our Outliers are between the value of 1875 and 3000. 
sns.boxplot(x=df_dropped.funded_amount)

<AxesSubplot:xlabel='funded_amount'>


# We check the "loan_amount" column
sns.boxplot(x=df_dropped.loan_amount)

<AxesSubplot:xlabel='loan_amount'>


# Same situation we see in here, so i will push down the values more than 3000 to 3000.
df_dropped['loan_amount'] = df_dropped['loan_amount'].apply(lambda x: upper_limit if x > upper_limit else x)
sns.boxplot(x=df_dropped.funded_amount)

<AxesSubplot:xlabel='funded_amount'>


# Lets check other two columns
sns.boxplot(x=df_dropped.term_in_months)

<AxesSubplot:xlabel='term_in_months'>


# It seems 30 is a good value to go with. It is a little higher than the 3. quartile, we got the quantity of almost 25000
df_dropped.loc[df_dropped.loc[:,"term_in_months"] >30, "term_in_months"]

2         43.0
6         43.0
14        43.0
18        43.0
25        43.0
          ... 
671086    38.0
671089    38.0
671092    38.0
671100    38.0
671101    62.0
Name: term_in_months, Length: 24527, dtype: float64


# We press down the term_in_months columns values to lower than 30
df_dropped['term_in_months'] = df_dropped['term_in_months'].apply(lambda x: 30 if x > 30 else x)
sns.boxplot(x=df_dropped.term_in_months)

<AxesSubplot:xlabel='term_in_months'>


# checking another column for outliers
sns.boxplot(x=df_dropped.lender_count)

<AxesSubplot:xlabel='lender_count'>


# lets try to catch the count of 25000 again
df_dropped.loc[df_dropped.loc[:,"lender_count"] >70, "lender_count"]

50         93
59        114
62        144
70         71
73        101
         ... 
668164    110
668582    278
670283    123
670503     94
670734    158
Name: lender_count, Length: 27708, dtype: int64


# Presses the values of lender_count column's values under 70
df_dropped['lender_count'] = df_dropped['lender_count'].apply(lambda x: 70 if x > 70 else x)
sns.boxplot(x=df_dropped.lender_count)

<AxesSubplot:xlabel='lender_count'>


df_dropped.describe().T


df_dropped.to_csv('dropped.csv', index = False)


df_dropped_with_outliers.to_csv('dropped_w_o.csv', index = False)

	count	mean	std	min	25%	50%	75%	max
funded_amount	671205.0	785.995061	1130.398941	0.0	250.0	450.0	900.0	100000.0
loan_amount	671205.0	842.397107	1198.660073	25.0	275.0	500.0	1000.0	100000.0
term_in_months	671205.0	13.739022	8.598919	1.0	8.0	13.0	14.0	158.0
lender_count	671205.0	20.590922	28.459551	0.0	7.0	13.0	24.0	2986.0
borrowers_total	666984.0	2.018357	3.413631	1.0	1.0	1.0	1.0	50.0
female	666984.0	1.606197	3.041054	0.0	1.0	1.0	1.0	50.0
male	666984.0	0.412160	1.119501	0.0	0.0	0.0	1.0	44.0

	count	mean	std	min	25%	50%	75%	max
funded_amount	671205.0	709.756125	705.929139	0.0	250.0	450.0	900.0	3000.0
loan_amount	671205.0	754.061762	728.286964	25.0	275.0	500.0	1000.0	3000.0
term_in_months	671205.0	13.161863	6.118905	1.0	8.0	13.0	14.0	30.0
lender_count	671205.0	18.727781	17.287630	0.0	7.0	13.0	24.0	70.0
borrowers_total	666984.0	2.018357	3.413631	1.0	1.0	1.0	1.0	50.0
female	666984.0	1.606197	3.041054	0.0	1.0	1.0	1.0	50.0
male	666984.0	0.412160	1.119501	0.0	0.0	0.0	1.0	44.0

1.1 1. Teil - Theoriefragen¶

1.1.1 1.1) 4 V´s¶

Erkläre die 4 V´s mit deinen eigenen Worten anhand eines Beispiels. Vergiss dabei nicht die 4 V´s thematisch einzuordnen.¶

Sample Case 2: In this case, lets consider a Security company, which hold the Servers too for their installed Cameras. Lets say these cameras also have microphone. And this company should keep the data at least 1 year for the past reasearches. For :¶

1.1.2 1.2) fehlende Werte¶

1.2 2. Teil - Praxis¶

Our 2 files seem to have same columns, lets check it¶

Since we know that there are no duplicates, in this case, we consider to concat them one after another¶

There are Null values in the columns of "use", "country_code", "region" and "borrower genders"¶

We check the number of unique counts of the columns¶

Evaluation of the Columns unique values¶

1. We suppose that "country code" quantity and "country" quantity should be same. But here we have a 1 difference.¶

2. "Term in Months" and "Borrower Genders" values seem to be higher than expected¶

We see the values are ordered and, "nan" value belongs to Namibia¶

So we try to check if nan belongs to Namibia, if yes, we replace it with NA¶

The Relation between the missing Values:¶

It seems there is relation between the "funded_amount" and "lender_count" column.¶

What we see on both columns is, they don't have missing values...¶

But what about the noisy Data??¶

Ideas:¶

- Our income depends on how big the loan money is.¶

- We can also compare the sectors, which one is helping us more in which country maybe.¶

- How good is the terms in countries and sectors¶

Lets do some changes on the DataFrame to be able to have first comparison of our datas with pairplot.¶

In case the amount values are in the local currencies¶

To use API for our exchange rate convertion, we need requests library¶

Lets drop some columns we would not need¶

lets check our outliers¶

i say 55000 is very much to supress to the max level, so i will take the upper limit a little higher.¶

Now we are at the end of Outliers part,¶

Before we have our first visuals we save our data frame into a csv and go on on another file.¶

	,funded_amount,loan_amount,activity,sector,use,country_code,country,region,currency,term_in_months,lender_count,borrower_genders,repayment_interval
0	0,300.0,300.0,Fruits & Vegetables,Food,"To buy...
1	1,575.0,575.0,Rickshaw,Transportation,to repai...

	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
0	300.0	300.0	Fruits & Vegetables	Food	To buy seasonal, fresh fruits to sell.	PK	Pakistan	Lahore	PKR	12.0	12	female	irregular
1	575.0	575.0	Rickshaw	Transportation	to repair and maintain the auto rickshaw used ...	PK	Pakistan	Lahore	PKR	11.0	14	female, female	irregular

	#funded_amount#loan_amount#activity#sector#use#country_code#country#region#currency#term_in_months#lender_count#borrower_genders#repayment_interval
0	0#175.0#175.0#Liquor Store / Off-License#Food#...
1	1#325.0#325.0#Livestock#Agriculture#to buy 3 z...

	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
0	175.0	175.0	Liquor Store / Off-License	Food	to purchase additional stock of coconut wine t...	PH	Philippines	Palo, Leyte	PHP	8.0	6	female	irregular
1	325.0	325.0	Livestock	Agriculture	to buy 3 zebus and food to fatten them up.	MG	Madagascar	Antsirabe	MGA	12.0	13	female	monthly

	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
671154	0.0	725.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female	monthly
671155	50.0	875.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	2	female, female	monthly
671156	75.0	250.0	Livestock	Agriculture	Reviewed loan use in english.	GH	Ghana	Dansoman	GHS	13.0	3	female	monthly
671157	75.0	75.0	Livestock	Agriculture	Pretend the issue with spanish loan was addres...	MX	Mexico	Iztacalco	MXN	13.0	3	female	monthly
671158	0.0	75.0	Livestock	Agriculture	Pretend the issue with spanish loan was addres...	MX	Mexico	Iztacalco	MXN	13.0	0	female	monthly
671159	50.0	725.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	2	female	monthly
671160	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671161	0.0	725.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female	monthly
671162	50.0	875.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	2	female, female	monthly
671163	25.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	1	female	monthly
671164	25.0	25.0	Livestock	Agriculture	Reviewed loan use in english.	PK	Pakistan	Attock	PKR	13.0	1	female	monthly
671165	50.0	875.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	2	female, female	monthly
671166	25.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	1	female	monthly
671167	0.0	25.0	Livestock	Agriculture	Kiva Coordinator fixed issue loan (no longer v...	KE	Kenya	NaN	KES	13.0	0	female, female	monthly
671168	0.0	25.0	Livestock	Agriculture	Pretend the issue with loan got addressed by K...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671169	0.0	25.0	Livestock	Agriculture	Kiva Coordinator fixed issue loan (no longer v...	KE	Kenya	NaN	KES	13.0	0	female, female	monthly
671170	25.0	25.0	Livestock	Agriculture	Reviewed loan use in english.	PK	Pakistan	Attock	PKR	13.0	1	female	monthly
671171	0.0	25.0	Games	Entertainment	Kiva Coordinator replaced loan use. Should see...	KE	Kenya	NaN	KES	13.0	0	female, female	monthly
671172	0.0	25.0	Livestock	Agriculture	Pretend the issue with loan got addressed by K...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671173	75.0	125.0	Livestock	Agriculture	Pretend the flagged issue was addressed by KC.	MX	Mexico	Iztacalco	MXN	13.0	3	female, female	monthly
671174	0.0	25.0	Games	Entertainment	NaN	KE	Kenya	NaN	KES	13.0	0	NaN	monthly
671175	75.0	125.0	Livestock	Agriculture	Pretend the flagged issue was addressed by KC.	MX	Mexico	Iztacalco	MXN	13.0	3	female, female	monthly
671176	0.0	50.0	Livestock	Agriculture	Edited loan use in english.	GH	Ghana	Dansoman	GHS	13.0	0	female, female	monthly
671177	25.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	1	female	monthly
671178	0.0	25.0	Livestock	Agriculture	NaN	KE	Kenya	NaN	KES	13.0	0	NaN	monthly
671179	0.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	0	female	monthly
671180	0.0	725.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female	monthly
671181	0.0	25.0	Livestock	Agriculture	Reviewed loan use in english.	PK	Pakistan	Attock	PKR	13.0	0	female	monthly
671182	0.0	125.0	Livestock	Agriculture	Pretend the flagged issue was addressed by KC.	MX	Mexico	Iztacalco	MXN	13.0	0	female, female	monthly
671183	0.0	75.0	Livestock	Agriculture	Pretend the issue with spanish loan was addres...	MX	Mexico	Iztacalco	MXN	13.0	0	female	monthly
671184	0.0	875.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female, female	monthly
671185	0.0	25.0	Livestock	Agriculture	NaN	KE	Kenya	NaN	KES	13.0	0	NaN	monthly
671186	25.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	1	female	monthly
671187	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671188	0.0	250.0	Livestock	Agriculture	Reviewed loan use in english.	GH	Ghana	Dansoman	GHS	13.0	0	female	monthly
671189	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671190	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671191	0.0	725.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female	monthly
671192	0.0	875.0	Livestock	Agriculture	Translated loan use to english.	BO	Bolivia	La Paz	BOB	13.0	0	female, female	monthly
671193	0.0	125.0	Livestock	Agriculture	Pretend the flagged issue was addressed by KC.	MX	Mexico	Iztacalco	MXN	13.0	0	female, female	monthly
671194	0.0	25.0	Livestock	Agriculture	Kiva Coordinator fixed issue loan (no longer v...	KE	Kenya	NaN	KES	13.0	0	female, female	monthly
671195	0.0	50.0	Livestock	Agriculture	Edited loan use in english.	GH	Ghana	Dansoman	GHS	13.0	0	female, female	monthly
671196	0.0	250.0	Livestock	Agriculture	Reviewed loan use in english.	GH	Ghana	Dansoman	GHS	13.0	0	female	monthly
671197	0.0	25.0	Livestock	Agriculture	Pretend the issue with loan got addressed by K...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671198	0.0	75.0	Livestock	Agriculture	Pretend the issue with spanish loan was addres...	MX	Mexico	Iztacalco	MXN	13.0	0	female	monthly
671199	0.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	0	female	monthly
671200	0.0	25.0	Livestock	Agriculture	[True, u'para compara: cemento, arenya y ladri...	PY	Paraguay	Concepción	USD	13.0	0	female	monthly
671201	25.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	1	female	monthly
671202	0.0	25.0	Games	Entertainment	NaN	KE	Kenya	NaN	KES	13.0	0	NaN	monthly
671203	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly
671204	0.0	25.0	Livestock	Agriculture	[True, u'to start a turducken farm.'] - this l...	KE	Kenya	NaN	KES	13.0	0	female	monthly

	funded_amount	loan_amount	activity	sector	use	country_code	country	region	currency	term_in_months	lender_count	borrower_genders	repayment_interval
202537	4150.0	4150.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	6.0	162	female	bullet
202823	4150.0	4150.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	Rundu	NAD	6.0	159	male	bullet
344929	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	7.0	120	female	bullet
351177	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	Rundu	NAD	7.0	126	male	bullet
420953	3325.0	3325.0	Wholesale	Wholesale	To purchase lighting products for sale to loca...	NaN	Namibia	EEnhana	NAD	7.0	118	female	bullet
421218	4000.0	4000.0	Wholesale	Wholesale	purchase solar lighting products for sale to l...	NaN	Namibia	Rundu	NAD	7.0	150	male	bullet
487207	5100.0	5100.0	Renewable Energy Products	Retail	to pay for stock of solar lights and cell phon...	NaN	Namibia	Katima Mulilo	NAD	7.0	183	male	bullet
487653	5000.0	5000.0	Wholesale	Wholesale	to maintain a stock of solar lights and cell p...	NaN	Namibia	Oshakati	NAD	7.0	183	female	bullet