v2['Order']=v2.groupby(['NID']).Month.transform(lambda x : pd.factorize(x)[0]+1)
Rename Columns
df.rename(columns={'W':'A', 'X':'B', etc})
Pie Chart
import matplotlib.pyplot as plt
fig1, ax1 = plt.subplots()
labels = v2.index
ax1.pie(v[["cnt","Pdate"]].pivot_table(index="cnt" , aggfunc="count" ), autopct='%1.1f%%',labels=labels,
shadow=True, startangle=90)
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.show()
Retention
g = pd.DataFrame(df['Frequency'].value_counts())
g
h = pd.DataFrame(columns = ['F', 'Retention Rate'])
for i in range(291):
h.loc[i] = [i + 1, (g['Frequency'].iloc[i+1:].sum()/(g['Frequency'].iloc[i:].sum()))]
h.plot(x = 'F', y = 'Retention Rate', figsize=[16,8], legend=False);
plt.xlim(0,60)
# plt.xticks([1,2,3,4,5,6,7,8,9,10,15,20,30,40,50,60]);
# plt.yticks([0,0.20,0.40,0.45,0.50,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.0]);
plt.xlabel('Order Number')
plt.ylabel('Retention Rate')
h.plot(x = 'F', y = 'Retention Rate', figsize=[16,8], legend=False);
plt.xlim(0,60)
plt.xticks([1,2,3,4,5,6,7,8,9,10,20,30,40,50,60]);
plt.yticks([0,0.20,0.40,0.50,0.56,0.60,0.70,0.80,0.90,1.0]);
plt.xlabel('Order Number')
plt.ylabel('Retention Rate')
plt.axvline(x = 2, linewidth = 2, color = 'grey')
plt.axhline(y = 0.56, linewidth = 2, color = 'grey')
plt.axvline(x = 5, linewidth = 2, color = 'cyan')
plt.axhline(y = 0.71, linewidth = 2, color = 'cyan')
plt.axvline(x = 9, linewidth = 2, color = 'green')
plt.axhline(y = 0.8, linewidth = 2, color = 'green')
m = df.groupby(['Level','Company','Item'])['Value'].sum().unstack(level=['Company','Item'])
m = m.assign(total=m.sum(1))
m = m.stack(level='Company')
m = m.assign(total=m.sum(1))
m = m.stack(level='Item')
#df['new column name'] = df['column name'].apply(lambda x: 'value if condition is met' if x condition else 'value if condition is not met')
df['equal_or_lower_than_4?'] = df['set_of_numbers'].apply(lambda x: 'True' if x <= 4 else 'False')
h = ret.copy()
for j in range(len(h.columns)):
for i in range(len(h)):
h.iloc[i,j] = ret.iloc[i+1:,j].sum()/(ret.iloc[i:,j].sum())
h.reset_index(inplace = True)
h
h = pd.DataFrame(columns = ['F', 'cnt'])
for i in range(10):
h.loc[i] = [i + 1, (ret['DistCount'].iloc[i+1:].sum()/(ret['DistCount'].iloc[i:].sum()))]
h
clv = pd.DataFrame(columns = ['F','clv'])
for i in range(10):
clv.loc[i+1] = [i+1,v2[v2['Order'] == i+1]['paym'].sum()/v2[v2['Order'] == i+1]['NID'].nunique()]
clv = clv.merge(h,on = 'F')
cl = 0
for i in range(10):
cl = cl +clv.iloc[i,1] * clv.iloc[i-1,2]
cl = cl + clv.iloc[0,1]
cl
F
clv
Ret
1
1.81209
22%
2
3.14298
52%
3
4.18291
65%
4
4.41992
70%
5
4.63124
57%
6
5.80514
48%
7
3.34229
86%
8
6.49992
25%
9
4.48067
67%
10
6.0045
0%
Import all csv files from a folder
import pandas as pd
import os
path = os.getcwd()
files = os.listdir(path)
files_csv = [f for f in files if f[-3:] == 'csv']
df = pd.DataFrame()
for f in files_csv:
data = pd.read_csv(f)
df = df.append(data)