{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "056e9ac4-0910-4a86-a85f-c3f95ddc4cb0", "metadata": { "tags": [] }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "9d179665-48fa-4f45-b623-0e08ef2fbb27", "metadata": { "tags": [] }, "outputs": [], "source": [ "!pip install retentioneering" ] }, { "cell_type": "code", "execution_count": 3, "id": "f937e528-df49-4609-a1e5-a60e4561d2a7", "metadata": { "tags": [] }, "outputs": [], "source": [ "from retentioneering import datasets\n", "from retentioneering.eventstream import Eventstream\n", "stream = datasets.load_simple_shop()" ] }, { "cell_type": "markdown", "id": "2b1e46d2-b743-4842-998c-54e17ef2ff8c", "metadata": {}, "source": [ "## Basic example" ] }, { "cell_type": "code", "execution_count": 4, "id": "76830741-8a54-4ffa-b209-2a1477abef70", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 user_iduser_id_sharecountcount_shareavg_countsequence_typeuser_id_sample
Sequence       
path_end3 7511.003 7510.091.00other[572097469]
path_start3 7511.003 7510.091.00other[572097469]
catalog3 6110.9614 5180.364.02other[962547996]
main2 3850.645 6350.142.36other[670686056]
cart1 9240.512 8420.071.48other[656119954]
product21 4300.382 1720.051.52other[90644948]
delivery_choice1 3560.361 6860.041.24other[46668818]
product11 1220.301 5150.041.35other[858537514]
payment_choice9580.261 1070.031.16other[859867220]
delivery_courier7480.208340.021.11other[768244430]
payment_done6530.177060.021.08other[182629547]
payment_card5210.145650.011.08other[965424287]
delivery_pickup4690.135060.011.08other[120190752]
payment_cash1900.051970.001.04other[898482504]
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.sequences()" ] }, { "cell_type": "markdown", "id": "30f46bf4-ec3a-4ceb-9c77-8b64ebec9cb1", "metadata": {}, "source": [ "## Tuning the arguments" ] }, { "cell_type": "code", "execution_count": 5, "id": "574a611d-f45b-4963-8663-b10d90bbbd78", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 countcount_sharesession_id_sharesequence_typesession_id_sample
Sequence     
catalog->catalog4 8570.060.40loop['122145952_1', '168248042_1', '872837948_1']
main->catalog4 0640.050.51other['462395474_2', '736698499_2', '819489198_1']
session_start->main3 7680.040.58other['950168808_1', '816437104_1', '154140118_5']
path_start->session_start3 7510.040.58other['607824880_1', '343679414_1', '891177460_1']
session_end->path_end3 7510.040.58other['607824880_1', '343679414_2', '891177460_1']
catalog->session_end2 8520.030.44other['598153538_1', '190589169_1', '57179757_1']
path_start->session_start->catalog2 6860.030.42other['316559524_1', '380327213_1', '226199400_1']
session_start->catalog2 6860.030.42other['316559524_1', '380327213_1', '226199400_1']
session_start->main->catalog2 6190.030.41other['873271748_2', '15921702_2', '550059631_3']
catalog->product22 1720.030.27other['422594385_1', '895368058_1', '166354605_1']
catalog->session_end->path_end1 7200.020.27other['412253944_1', '887001560_1', '159129921_1']
catalog->cart1 7090.020.24other['409502554_1', '264978536_1', '969062882_1']
cart->delivery_choice1 6860.020.25other['118445583_1', '168248042_1', '490005311_1']
catalog->catalog->catalog1 6510.020.16loop['63228235_1', '779990437_1', '684454528_1']
catalog->product11 5150.020.20other['435687574_1', '617601620_1', '317869590_1']
catalog->main1 4130.020.18other['275248696_2', '734462138_1', '50010643_1']
main->catalog->catalog1 3610.020.20other['959704236_4', '729793240_1', '485499827_1']
catalog->product2->catalog1 2980.020.16cycle['126514366_3', '4635402_1', '558399454_2']
product2->catalog1 2980.020.16other['126514366_3', '4635402_1', '558399454_2']
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream\\\n", " .split_sessions(timeout=(30, 'm'))\\\n", " .sequences(\n", " ngram_range=(2, 3),\n", " weight_col='session_id',\n", " metrics=['count', 'count_share', 'paths_share'],\n", " threshold=['count', 1200],\n", " sorting=['count_share', False],\n", " heatmap_cols=['session_id_share'],\n", " sample_size=3\n", " )" ] }, { "cell_type": "markdown", "id": "05a0ccee-8879-4160-a827-7285dd683be0", "metadata": {}, "source": [ "## Comparing groups" ] }, { "cell_type": "code", "execution_count": 6, "id": "ff832330-8105-4d15-8b54-d069c1035f56", "metadata": { "tags": [] }, "outputs": [], "source": [ "np.random.seed(111)\n", "users = set(stream.to_dataframe()['user_id'])\n", "group1 = set(np.random.choice(list(users), size=len(users)//2))\n", "group2 = users - group1" ] }, { "cell_type": "code", "execution_count": 7, "id": "f549fae7-e844-46dc-9f6e-06c464ba0c9c", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
 user_id_sharecount_sharesequence_typeuser_id_sample
 ABdelta_absdelta_relABdelta_absdelta_relAB
Sequence           
product10.310.290.020.080.040.040.000.08other[138113721][953962586]
main0.640.630.010.020.150.140.010.05other[221205779][263197489]
path_end1.001.000.000.000.090.09-0.00-0.00other[95158569][91859222]
path_start1.001.000.000.000.090.09-0.00-0.00other[95158569][91859222]
payment_cash0.050.050.000.020.000.01-0.00-0.04other[68568059][738009902]
\n" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.sequences(\n", " groups=[group1), group2],\n", " group_names=['A', 'B'],\n", " metrics=['paths_share', 'count_share'],\n", " threshold=[('user_id_share', 'delta_abs'), 0],\n", " sorting=[('count_share', 'delta_rel'), False]\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "rete", "language": "python", "name": "rete" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 5 }