## Prerequisites

Run this cell to prepare the environment. This step is obligatory.

In [None]:
!pip install retentioneering

In [None]:
import retentioneering
import pandas as pd

## Stattests

The full text of [Stattests](https://doc.retentioneering.com/stable/doc/user_guides/stattests.html) user guide is available on the retentioneering website.


## Loading data

In [None]:
import numpy as np
from retentioneering import datasets

stream = datasets.load_simple_shop()

## General stattests usage

In [None]:
data = stream.to_dataframe()
users = data['user_id'].unique()
index_separator = int(users.shape[0]/2)
user_groups = users[:index_separator], users[index_separator:]

print(user_groups[0])
print(user_groups[1])

[219483890 964964743 629881394 ... 901422808 523047643 724268790]
[315196393 443659932 865093748 ... 965024600 831491833 962761227]


In [None]:
def cart_share(df):
    return len(df[df['event'] == 'cart']) / len(df)

In [None]:
some_user = user_groups[0][378]
cart_share(data[data['user_id'] == some_user])

0.15384615384615385

In [None]:
stream.stattests(
    groups=user_groups,
    func=cart_share,
    group_names=['random_group_1', 'random_group_2'],
    test='ttest'
)

random_group_1 (mean ± SD): 0.075 ± 0.095, n = 1875
random_group_2 (mean ± SD): 0.078 ± 0.102, n = 1876
'random_group_1' is greater than 'random_group_2' with p-value: 0.21369
power of the test: 8.85%


<retentioneering.tooling.stattests.stattests.StatTests at 0x7f5a977465e0>

### Test power

In [None]:
stream.stattests(
    groups=user_groups,
    func=cart_share,
    group_names=['random_group_1', 'random_group_2'],
    test='ttest',
    alpha=0.01
)

random_group_1 (mean ± SD): 0.075 ± 0.095, n = 1875
random_group_2 (mean ± SD): 0.078 ± 0.102, n = 1876
'random_group_1' is greater than 'random_group_2' with p-value: 0.21369
power of the test: 2.11%


<retentioneering.tooling.stattests.stattests.StatTests at 0x7f5a97746fd0>

### Categorical variables


In [None]:
user_group_1 = set(data[data['event'] == 'product1']['user_id'])
user_group_2 = set(data[data['event'] == 'product2']['user_id'])

user_group_1 -= user_group_1 & user_group_2
user_group_2 -= user_group_1 & user_group_2

In [None]:
def cart_count(df):
    cart_count = len(df[df['event'] == 'cart'])
    if cart_count <= 2:
        return str(cart_count)
    return '>2'

some_user = user_groups[0][378]
cart_count(data[data['user_id'] == some_user])

'2'

In [None]:
some_user = user_groups[0][379]
cart_count(data[data['user_id'] == some_user])

'0'

### Chi2 contingency test

In [None]:
stream.stattests(
    groups=(user_group_1, user_group_2),
    func=cart_count,
    group_names=('product_1_group', 'product_2_group'),
    test='chi2_contingency'
)

product_1_group (size): n = 580
product_2_group (size): n = 1430
Group difference test with p-value: 0.00000


<retentioneering.tooling.stattests.stattests.StatTests at 0x7f5a983deca0>