{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "vtGqS-SRKnlx" }, "source": [ "## Prerequisites\n", "\n", "Run this cell to prepare the environment. This step is obligatory." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 46390, "status": "ok", "timestamp": 1682533467826, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "bieK6_UAZ94I", "outputId": "180531d1-1adb-4299-ba62-dd724465cd66", "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "!pip install retentioneering" ] }, { "cell_type": "markdown", "metadata": { "id": "ehhXXP78mkKN" }, "source": [ "The full text of [Eventstream user guide](https://doc.retentioneering.com/release3/doc/user_guides/eventstream.html) is available on the retentioneering website." ] }, { "cell_type": "markdown", "metadata": { "id": "5lNAjTawnauQ" }, "source": [ "## Eventstream creation" ] }, { "cell_type": "markdown", "metadata": { "id": "aTSWoRC3neE1" }, "source": [ "### Default field names" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "Qnv8CLLknifp" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df1 = pd.DataFrame(\n", " [\n", " ['user_1', 'A', '2023-01-01 00:00:00'],\n", " ['user_1', 'B', '2023-01-01 00:00:01'],\n", " ['user_2', 'B', '2023-01-01 00:00:02'],\n", " ['user_2', 'A', '2023-01-01 00:00:03'],\n", " ['user_2', 'A', '2023-01-01 00:00:04'],\n", " ],\n", " columns=['user_id', 'event', 'timestamp']\n", ")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "RKad2rLIn7AX" }, "outputs": [], "source": [ "from retentioneering.eventstream import Eventstream\n", "\n", "stream1 = Eventstream(df1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 250 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1682533511878, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "GvdbciRron1h", "outputId": "5f7d8db2-6188-43cb-f03e-eab737321966" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0e6c37953-a47c-474d-b2bb-c805b5eadcd4path_start0path_start2023-01-01 00:00:00user_1
1e6c37953-a47c-474d-b2bb-c805b5eadcd4raw0A2023-01-01 00:00:00user_1
245fce2bb-536e-4e05-a998-5ceec13fabb8raw1B2023-01-01 00:00:01user_1
345fce2bb-536e-4e05-a998-5ceec13fabb8path_end1path_end2023-01-01 00:00:01user_1
4f43a320b-77b3-4973-a809-783f32df1e2fpath_start2path_start2023-01-01 00:00:02user_2
5f43a320b-77b3-4973-a809-783f32df1e2fraw2B2023-01-01 00:00:02user_2
65beb2751-7ba5-4071-872c-81db191a6fd7raw3A2023-01-01 00:00:03user_2
7778efbbd-48e6-40cb-8051-ad68a1d8c7a4raw4A2023-01-01 00:00:04user_2
8778efbbd-48e6-40cb-8051-ad68a1d8c7a4path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 e6c37953-a47c-474d-b2bb-c805b5eadcd4 path_start 0 path_start \n", "1 e6c37953-a47c-474d-b2bb-c805b5eadcd4 raw 0 A \n", "2 45fce2bb-536e-4e05-a998-5ceec13fabb8 raw 1 B \n", "3 45fce2bb-536e-4e05-a998-5ceec13fabb8 path_end 1 path_end \n", "4 f43a320b-77b3-4973-a809-783f32df1e2f path_start 2 path_start \n", "5 f43a320b-77b3-4973-a809-783f32df1e2f raw 2 B \n", "6 5beb2751-7ba5-4071-872c-81db191a6fd7 raw 3 A \n", "7 778efbbd-48e6-40cb-8051-ad68a1d8c7a4 raw 4 A \n", "8 778efbbd-48e6-40cb-8051-ad68a1d8c7a4 path_end 4 path_end \n", "\n", " timestamp user_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:01 user_1 \n", "3 2023-01-01 00:00:01 user_1 \n", "4 2023-01-01 00:00:02 user_2 \n", "5 2023-01-01 00:00:02 user_2 \n", "6 2023-01-01 00:00:03 user_2 \n", "7 2023-01-01 00:00:04 user_2 \n", "8 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream1.to_dataframe()" ] }, { "cell_type": "markdown", "metadata": { "id": "wgEuD1LbpIch" }, "source": [ "### Custom field names" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 187 }, "executionInfo": { "elapsed": 864, "status": "ok", "timestamp": 1682533522926, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "MahQ47jZpWQ9", "outputId": "906f96fc-cf3b-4d7b-f70c-0a66e6179680" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0e719c52f-5677-49c2-a02d-d3ec7eb376c1path_start0path_start2023-01-01 00:00:00user_1
1e719c52f-5677-49c2-a02d-d3ec7eb376c1raw0A2023-01-01 00:00:00user_1
27cfa78bd-37ee-4f0d-9fc9-ffb858fac940raw1B2023-01-01 00:00:01user_1
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 e719c52f-5677-49c2-a02d-d3ec7eb376c1 path_start 0 path_start \n", "1 e719c52f-5677-49c2-a02d-d3ec7eb376c1 raw 0 A \n", "2 7cfa78bd-37ee-4f0d-9fc9-ffb858fac940 raw 1 B \n", "\n", " timestamp user_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:01 user_1 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = pd.DataFrame(\n", " [\n", " ['user_1', 'A', '2023-01-01 00:00:00'],\n", " ['user_1', 'B', '2023-01-01 00:00:01'],\n", " ['user_2', 'B', '2023-01-01 00:00:02'],\n", " ['user_2', 'A', '2023-01-01 00:00:03'],\n", " ['user_2', 'A', '2023-01-01 00:00:04']\n", " ],\n", " columns=['client_id', 'action', 'datetime']\n", ")\n", "\n", "raw_data_schema = {\n", " 'user_id': 'client_id',\n", " 'event_name': 'action',\n", " 'event_timestamp': 'datetime'\n", "}\n", "\n", "stream2 = Eventstream(df2, raw_data_schema=raw_data_schema)\n", "stream2.to_dataframe().head(3)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 240 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1682533616073, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "nF_GgR1Kpt5S", "outputId": "bebbf70d-24bf-4a7a-fba1-ec513e255ad3" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
0091d0318-0c3b-4e91-9d70-f55652717886path_start0path_start2023-01-01 00:00:00user_1session_1
1091d0318-0c3b-4e91-9d70-f55652717886raw0A2023-01-01 00:00:00user_1session_1
2ceecfa3b-1521-46d3-95b0-4c4b0c074bd4raw1B2023-01-01 00:00:01user_1session_1
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 091d0318-0c3b-4e91-9d70-f55652717886 path_start 0 path_start \n", "1 091d0318-0c3b-4e91-9d70-f55652717886 raw 0 A \n", "2 ceecfa3b-1521-46d3-95b0-4c4b0c074bd4 raw 1 B \n", "\n", " timestamp user_id session_id \n", "0 2023-01-01 00:00:00 user_1 session_1 \n", "1 2023-01-01 00:00:00 user_1 session_1 \n", "2 2023-01-01 00:00:01 user_1 session_1 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = pd.DataFrame(\n", " [\n", " ['user_1', 'A', '2023-01-01 00:00:00', 'session_1', 'mobile'],\n", " ['user_1', 'B', '2023-01-01 00:00:01', 'session_1', 'mobile'],\n", " ['user_2', 'B', '2023-01-01 00:00:02', 'session_2', 'desktop'],\n", " ['user_2', 'A', '2023-01-01 00:00:03', 'session_3', 'desktop'],\n", " ['user_2', 'A', '2023-01-01 00:00:04', 'session_3', 'desktop']\n", " ],\n", " columns=['client_id', 'action', 'datetime', 'session', 'device']\n", " )\n", "raw_data_schema = {\n", " 'user_id': 'client_id',\n", " 'event_name': 'action',\n", " 'event_timestamp': 'datetime',\n", " 'custom_cols': [\n", " {\n", " 'raw_data_col': 'session',\n", " 'custom_col': 'session_id'\n", " }\n", " ]\n", "}\n", "\n", "stream3 = Eventstream(df3, raw_data_schema=raw_data_schema)\n", "stream3.to_dataframe().head(3)" ] }, { "cell_type": "markdown", "metadata": { "id": "wEdtu-T5vUQO" }, "source": [ "### Eventstream field names" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 240 }, "executionInfo": { "elapsed": 988, "status": "ok", "timestamp": 1682533622903, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "hH6YUPxZvXj8", "outputId": "ad3eb187-36a7-476f-e34f-6c170d48ddea" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexactiondatetimeclient_id
081d042d4-ae4b-42af-9691-b87b2f2ed489path_start0path_start2023-01-01 00:00:00user_1
181d042d4-ae4b-42af-9691-b87b2f2ed489raw0A2023-01-01 00:00:00user_1
2aba7b523-2be6-4b06-8fc5-793e3597c41fraw1B2023-01-01 00:00:01user_1
\n", "
" ], "text/plain": [ " event_id event_type event_index action \\\n", "0 81d042d4-ae4b-42af-9691-b87b2f2ed489 path_start 0 path_start \n", "1 81d042d4-ae4b-42af-9691-b87b2f2ed489 raw 0 A \n", "2 aba7b523-2be6-4b06-8fc5-793e3597c41f raw 1 B \n", "\n", " datetime client_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:01 user_1 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from retentioneering.eventstream import EventstreamSchema\n", "\n", "new_eventstream_schema = EventstreamSchema(\n", " user_id='client_id',\n", " event_name='action',\n", " event_timestamp='datetime'\n", ")\n", "\n", "stream1_new_schema = Eventstream(df1, schema=new_eventstream_schema)\n", "stream1_new_schema.to_dataframe().head(3)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1682533623512, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "NuayqnFGyLuU", "outputId": "c84dde70-e1b2-4fd8-b46d-66fb914cca5f" }, "outputs": [ { "data": { "text/plain": [ "EventstreamSchema(event_id='event_id', event_type='event_type', event_index='event_index', event_name='action', event_timestamp='datetime', user_id='client_id', custom_cols=[])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream1_new_schema.schema" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Timestamp column format" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "df1_1 = pd.DataFrame(\n", " [\n", " ['user_1', 'A', '2023-01-01 00:00:00+02:00'],\n", " ['user_1', 'B', '2023-01-01 00:00:01+02:00'],\n", " ['user_2', 'B', '2023-01-01 00:00:02+04:00'],\n", " ['user_2', 'A', '2023-01-01 00:00:03+04:00'],\n", " ['user_2', 'A', '2023-01-01 00:00:04+02:00'],\n", " ],\n", " columns=['user_id', 'event', 'timestamp']\n", " )" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/eventstream/eventstream.py:689: FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n", " events[self.schema.event_timestamp] = pd.to_datetime(events[self.schema.event_timestamp])\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0ae6ad401-2658-4080-99d3-f8b6250795ddpath_start0path_start2023-01-01 00:00:00user_1
1ae6ad401-2658-4080-99d3-f8b6250795ddraw0A2023-01-01 00:00:00user_1
26d23ba48-0805-4a44-8a3a-68fcf1fb474craw1B2023-01-01 00:00:01user_1
36d23ba48-0805-4a44-8a3a-68fcf1fb474cpath_end1path_end2023-01-01 00:00:01user_1
42e59a0cc-48e1-4ead-ad79-ffa70c6d3d64path_start2path_start2023-01-01 00:00:02user_2
52e59a0cc-48e1-4ead-ad79-ffa70c6d3d64raw2B2023-01-01 00:00:02user_2
6099120eb-b9b2-492d-b87a-e2055d013898raw3A2023-01-01 00:00:03user_2
7f505fa28-4dfe-4bcd-80da-5c4ea05d0e02raw4A2023-01-01 00:00:04user_2
8f505fa28-4dfe-4bcd-80da-5c4ea05d0e02path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 ae6ad401-2658-4080-99d3-f8b6250795dd path_start 0 path_start \n", "1 ae6ad401-2658-4080-99d3-f8b6250795dd raw 0 A \n", "2 6d23ba48-0805-4a44-8a3a-68fcf1fb474c raw 1 B \n", "3 6d23ba48-0805-4a44-8a3a-68fcf1fb474c path_end 1 path_end \n", "4 2e59a0cc-48e1-4ead-ad79-ffa70c6d3d64 path_start 2 path_start \n", "5 2e59a0cc-48e1-4ead-ad79-ffa70c6d3d64 raw 2 B \n", "6 099120eb-b9b2-492d-b87a-e2055d013898 raw 3 A \n", "7 f505fa28-4dfe-4bcd-80da-5c4ea05d0e02 raw 4 A \n", "8 f505fa28-4dfe-4bcd-80da-5c4ea05d0e02 path_end 4 path_end \n", "\n", " timestamp user_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:01 user_1 \n", "3 2023-01-01 00:00:01 user_1 \n", "4 2023-01-01 00:00:02 user_2 \n", "5 2023-01-01 00:00:02 user_2 \n", "6 2023-01-01 00:00:03 user_2 \n", "7 2023-01-01 00:00:04 user_2 \n", "8 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream_local = Eventstream(df1_1, convert_tz='local')\n", "stream_local.to_dataframe()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/eventstream/eventstream.py:689: FutureWarning: In a future version of pandas, parsing datetimes with mixed time zones will raise an error unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour and silence this warning. To create a `Series` with mixed offsets and `object` dtype, please use `apply` and `datetime.datetime.strptime`\n", " events[self.schema.event_timestamp] = pd.to_datetime(events[self.schema.event_timestamp])\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
08034815d-8e2a-46da-bfc1-51ea3f293cd3path_start0path_start2022-12-31 20:00:02user_2
18034815d-8e2a-46da-bfc1-51ea3f293cd3raw0B2022-12-31 20:00:02user_2
2986885b4-5815-4d7b-a706-35f358a95eb8raw1A2022-12-31 20:00:03user_2
37449c3ae-a6ed-454f-8e48-b37f8dfe83e6path_start2path_start2022-12-31 22:00:00user_1
47449c3ae-a6ed-454f-8e48-b37f8dfe83e6raw2A2022-12-31 22:00:00user_1
563529402-fd3d-489e-9e0e-9ee595f5db86raw3B2022-12-31 22:00:01user_1
663529402-fd3d-489e-9e0e-9ee595f5db86path_end3path_end2022-12-31 22:00:01user_1
75095eed1-4bd2-428c-9d3a-527b36827faeraw4A2022-12-31 22:00:04user_2
85095eed1-4bd2-428c-9d3a-527b36827faepath_end4path_end2022-12-31 22:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 8034815d-8e2a-46da-bfc1-51ea3f293cd3 path_start 0 path_start \n", "1 8034815d-8e2a-46da-bfc1-51ea3f293cd3 raw 0 B \n", "2 986885b4-5815-4d7b-a706-35f358a95eb8 raw 1 A \n", "3 7449c3ae-a6ed-454f-8e48-b37f8dfe83e6 path_start 2 path_start \n", "4 7449c3ae-a6ed-454f-8e48-b37f8dfe83e6 raw 2 A \n", "5 63529402-fd3d-489e-9e0e-9ee595f5db86 raw 3 B \n", "6 63529402-fd3d-489e-9e0e-9ee595f5db86 path_end 3 path_end \n", "7 5095eed1-4bd2-428c-9d3a-527b36827fae raw 4 A \n", "8 5095eed1-4bd2-428c-9d3a-527b36827fae path_end 4 path_end \n", "\n", " timestamp user_id \n", "0 2022-12-31 20:00:02 user_2 \n", "1 2022-12-31 20:00:02 user_2 \n", "2 2022-12-31 20:00:03 user_2 \n", "3 2022-12-31 22:00:00 user_1 \n", "4 2022-12-31 22:00:00 user_1 \n", "5 2022-12-31 22:00:01 user_1 \n", "6 2022-12-31 22:00:01 user_1 \n", "7 2022-12-31 22:00:04 user_2 \n", "8 2022-12-31 22:00:04 user_2 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream_utc = Eventstream(df1_1, convert_tz='UTC')\n", "stream_utc.to_dataframe()" ] }, { "cell_type": "markdown", "metadata": { "id": "Lo-EDNjup0y3" }, "source": [ "### User sampling" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 1177, "status": "ok", "timestamp": 1682533628796, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "xvC0Y23cqYwB", "outputId": "406b908f-e00c-4f15-ac96-15dfb2d7570a" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Original number of the events: 32283\n", "Sampled number of the events: 4048\n", "Original unique users number: 3751\n", "Sampled unique users number: 375\n" ] } ], "source": [ "from retentioneering import datasets\n", "\n", "simple_shop_df = datasets.load_simple_shop(as_dataframe=True)\n", "sampled_stream = Eventstream(\n", " simple_shop_df,\n", " user_sample_size=0.1,\n", " user_sample_seed=42\n", ")\n", "\n", "print('Original number of the events:', len(simple_shop_df))\n", "print('Sampled number of the events:', len(sampled_stream.to_dataframe()))\n", "\n", "unique_users_original = simple_shop_df['user_id'].nunique()\n", "unique_users_sampled = sampled_stream.to_dataframe()['user_id'].nunique()\n", "\n", "print('Original unique users number: ', unique_users_original)\n", "print('Sampled unique users number: ', unique_users_sampled)" ] }, { "cell_type": "markdown", "metadata": { "id": "i9CxVJyezztC" }, "source": [ "\n", "### Displaying eventstream" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 337 }, "executionInfo": { "elapsed": 13, "status": "ok", "timestamp": 1682533633144, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "p4KUwnZh0BvG", "outputId": "8436d30d-b016-4417-f8bd-ad8943dff4d7" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
0091d0318-0c3b-4e91-9d70-f55652717886path_start0path_start2023-01-01 00:00:00user_1session_1
1091d0318-0c3b-4e91-9d70-f55652717886raw0A2023-01-01 00:00:00user_1session_1
2ceecfa3b-1521-46d3-95b0-4c4b0c074bd4raw1B2023-01-01 00:00:01user_1session_1
3ceecfa3b-1521-46d3-95b0-4c4b0c074bd4path_end1path_end2023-01-01 00:00:01user_1session_1
4d5c32957-f74d-4304-8ee2-45dfd935f89apath_start2path_start2023-01-01 00:00:02user_2session_2
5d5c32957-f74d-4304-8ee2-45dfd935f89araw2B2023-01-01 00:00:02user_2session_2
6ddfe3a33-18fd-40d2-a302-55fd1cb6d646raw3A2023-01-01 00:00:03user_2session_3
7e9dff7e2-fcb8-4354-b2cd-db9d68caaa0araw4A2023-01-01 00:00:04user_2session_3
8e9dff7e2-fcb8-4354-b2cd-db9d68caaa0apath_end4path_end2023-01-01 00:00:04user_2session_3
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 091d0318-0c3b-4e91-9d70-f55652717886 path_start 0 path_start \n", "1 091d0318-0c3b-4e91-9d70-f55652717886 raw 0 A \n", "2 ceecfa3b-1521-46d3-95b0-4c4b0c074bd4 raw 1 B \n", "3 ceecfa3b-1521-46d3-95b0-4c4b0c074bd4 path_end 1 path_end \n", "4 d5c32957-f74d-4304-8ee2-45dfd935f89a path_start 2 path_start \n", "5 d5c32957-f74d-4304-8ee2-45dfd935f89a raw 2 B \n", "6 ddfe3a33-18fd-40d2-a302-55fd1cb6d646 raw 3 A \n", "7 e9dff7e2-fcb8-4354-b2cd-db9d68caaa0a raw 4 A \n", "8 e9dff7e2-fcb8-4354-b2cd-db9d68caaa0a path_end 4 path_end \n", "\n", " timestamp user_id session_id \n", "0 2023-01-01 00:00:00 user_1 session_1 \n", "1 2023-01-01 00:00:00 user_1 session_1 \n", "2 2023-01-01 00:00:01 user_1 session_1 \n", "3 2023-01-01 00:00:01 user_1 session_1 \n", "4 2023-01-01 00:00:02 user_2 session_2 \n", "5 2023-01-01 00:00:02 user_2 session_2 \n", "6 2023-01-01 00:00:03 user_2 session_3 \n", "7 2023-01-01 00:00:04 user_2 session_3 \n", "8 2023-01-01 00:00:04 user_2 session_3 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream3.to_dataframe()" ] }, { "cell_type": "markdown", "metadata": { "id": "0En63MqeFDWP" }, "source": [ "## Eventstream index and reindex" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
037f57a8e-8f63-4ad7-9cd8-56144c88c8c7path_start0path_start2023-01-01 00:00:00user_1
137f57a8e-8f63-4ad7-9cd8-56144c88c8c7raw0A2023-01-01 00:00:00user_1
2b373b7b8-0f6c-49ca-bc8e-4aef491050efraw1B2023-01-01 00:00:00user_1
3b373b7b8-0f6c-49ca-bc8e-4aef491050efpath_end1path_end2023-01-01 00:00:00user_1
4b41d1e67-93c6-44b8-b48a-33812c1031b1path_start2path_start2023-01-01 00:00:03user_2
5b41d1e67-93c6-44b8-b48a-33812c1031b1raw2B2023-01-01 00:00:03user_2
610098a3e-f672-4696-a2a0-caf0ba5aab6eraw3A2023-01-01 00:00:03user_2
7df4a481e-ffdd-4abf-8997-7c6fa494e875raw4A2023-01-01 00:00:04user_2
8df4a481e-ffdd-4abf-8997-7c6fa494e875path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 37f57a8e-8f63-4ad7-9cd8-56144c88c8c7 path_start 0 path_start \n", "1 37f57a8e-8f63-4ad7-9cd8-56144c88c8c7 raw 0 A \n", "2 b373b7b8-0f6c-49ca-bc8e-4aef491050ef raw 1 B \n", "3 b373b7b8-0f6c-49ca-bc8e-4aef491050ef path_end 1 path_end \n", "4 b41d1e67-93c6-44b8-b48a-33812c1031b1 path_start 2 path_start \n", "5 b41d1e67-93c6-44b8-b48a-33812c1031b1 raw 2 B \n", "6 10098a3e-f672-4696-a2a0-caf0ba5aab6e raw 3 A \n", "7 df4a481e-ffdd-4abf-8997-7c6fa494e875 raw 4 A \n", "8 df4a481e-ffdd-4abf-8997-7c6fa494e875 path_end 4 path_end \n", "\n", " timestamp user_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:00 user_1 \n", "3 2023-01-01 00:00:00 user_1 \n", "4 2023-01-01 00:00:03 user_2 \n", "5 2023-01-01 00:00:03 user_2 \n", "6 2023-01-01 00:00:03 user_2 \n", "7 2023-01-01 00:00:04 user_2 \n", "8 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4 = pd.DataFrame(\n", " [\n", " ['user_1', 'A', '2023-01-01 00:00:00'],\n", " ['user_1', 'B', '2023-01-01 00:00:00'],\n", " ['user_2', 'B', '2023-01-01 00:00:03'],\n", " ['user_2', 'A', '2023-01-01 00:00:03'],\n", " ['user_2', 'A', '2023-01-01 00:00:04']\n", " ],\n", " columns=['user_id', 'event', 'timestamp']\n", ")\n", "\n", "stream4 = Eventstream(df4)\n", "stream4.to_dataframe()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
006ad3b46-0b4e-4111-9a36-a65971e90de4path_start0path_start2023-01-01 00:00:00user_1
106ad3b46-0b4e-4111-9a36-a65971e90de4raw0B2023-01-01 00:00:00user_1
210aa32eb-19ce-4986-8046-7de2aa7a1cearaw1A2023-01-01 00:00:00user_1
310aa32eb-19ce-4986-8046-7de2aa7a1ceapath_end1path_end2023-01-01 00:00:00user_1
4280bbcc9-a67b-40ec-81b4-e57ebc641930path_start2path_start2023-01-01 00:00:03user_2
5280bbcc9-a67b-40ec-81b4-e57ebc641930raw2B2023-01-01 00:00:03user_2
683b218a4-9fa6-4da5-ae97-ffb27d0f4fd5raw3A2023-01-01 00:00:03user_2
7038c7455-8c39-493c-9692-d3e001f3e924raw4A2023-01-01 00:00:04user_2
8038c7455-8c39-493c-9692-d3e001f3e924path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 06ad3b46-0b4e-4111-9a36-a65971e90de4 path_start 0 path_start \n", "1 06ad3b46-0b4e-4111-9a36-a65971e90de4 raw 0 B \n", "2 10aa32eb-19ce-4986-8046-7de2aa7a1cea raw 1 A \n", "3 10aa32eb-19ce-4986-8046-7de2aa7a1cea path_end 1 path_end \n", "4 280bbcc9-a67b-40ec-81b4-e57ebc641930 path_start 2 path_start \n", "5 280bbcc9-a67b-40ec-81b4-e57ebc641930 raw 2 B \n", "6 83b218a4-9fa6-4da5-ae97-ffb27d0f4fd5 raw 3 A \n", "7 038c7455-8c39-493c-9692-d3e001f3e924 raw 4 A \n", "8 038c7455-8c39-493c-9692-d3e001f3e924 path_end 4 path_end \n", "\n", " timestamp user_id \n", "0 2023-01-01 00:00:00 user_1 \n", "1 2023-01-01 00:00:00 user_1 \n", "2 2023-01-01 00:00:00 user_1 \n", "3 2023-01-01 00:00:00 user_1 \n", "4 2023-01-01 00:00:03 user_2 \n", "5 2023-01-01 00:00:03 user_2 \n", "6 2023-01-01 00:00:03 user_2 \n", "7 2023-01-01 00:00:04 user_2 \n", "8 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Eventstream(df4, events_order=[\"B\", \"A\"]).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "id": "Qye9ZuiJFgVZ" }, "outputs": [], "source": [ "IndexOrder = [\n", " \"profile\",\n", " \"path_start\",\n", " \"new_user\",\n", " \"existing_user\",\n", " \"cropped_left\",\n", " \"session_start\",\n", " \"session_start_cropped\",\n", " \"group_alias\",\n", " \"raw\",\n", " \"raw_sleep\",\n", " None,\n", " \"synthetic\",\n", " \"synthetic_sleep\",\n", " \"positive_target\",\n", " \"negative_target\",\n", " \"session_end_cropped\",\n", " \"session_end\",\n", " \"session_sleep\",\n", " \"cropped_right\",\n", " \"absent_user\",\n", " \"lost_user\",\n", " \"path_end\"\n", "]" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 489 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1682534665066, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "uHPBO_fsIKSC", "outputId": "1ded345e-fd0c-4b6c-e649-99b38293521f" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
02cf0da85-4fd6-405d-8631-adb5fe16ec9fpath_start0path_start2023-01-01 00:00:00user_1
119267cab-22da-4d05-9c52-e48a7042e3cbraw0A2023-01-01 00:00:00user_1
2d0554de7-ffc3-4b42-9ced-3124356b60daraw1B2023-01-01 00:00:00user_1
3d0554de7-ffc3-4b42-9ced-3124356b60dapositive_target1positive_target_B2023-01-01 00:00:00user_1
42114017e-4672-4bd2-973f-3584508404f4path_end1path_end2023-01-01 00:00:00user_1
59f127e9a-b3b6-4f2b-bbd3-310c49774419path_start2path_start2023-01-01 00:00:03user_2
66f8dc678-6fbd-4880-8ec4-99cb4ef6c727raw2B2023-01-01 00:00:03user_2
76f8dc678-6fbd-4880-8ec4-99cb4ef6c727positive_target2positive_target_B2023-01-01 00:00:03user_2
8fa56cae5-77c5-40ff-8a9b-71725ac7f5dfraw3A2023-01-01 00:00:03user_2
92869bb04-f5e9-4a3b-8701-73497e794f50raw4A2023-01-01 00:00:04user_2
10c73cf1b4-0f38-4276-8cd9-15e9e64cf880path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 2cf0da85-4fd6-405d-8631-adb5fe16ec9f path_start 0 \n", "1 19267cab-22da-4d05-9c52-e48a7042e3cb raw 0 \n", "2 d0554de7-ffc3-4b42-9ced-3124356b60da raw 1 \n", "3 d0554de7-ffc3-4b42-9ced-3124356b60da positive_target 1 \n", "4 2114017e-4672-4bd2-973f-3584508404f4 path_end 1 \n", "5 9f127e9a-b3b6-4f2b-bbd3-310c49774419 path_start 2 \n", "6 6f8dc678-6fbd-4880-8ec4-99cb4ef6c727 raw 2 \n", "7 6f8dc678-6fbd-4880-8ec4-99cb4ef6c727 positive_target 2 \n", "8 fa56cae5-77c5-40ff-8a9b-71725ac7f5df raw 3 \n", "9 2869bb04-f5e9-4a3b-8701-73497e794f50 raw 4 \n", "10 c73cf1b4-0f38-4276-8cd9-15e9e64cf880 path_end 4 \n", "\n", " event timestamp user_id \n", "0 path_start 2023-01-01 00:00:00 user_1 \n", "1 A 2023-01-01 00:00:00 user_1 \n", "2 B 2023-01-01 00:00:00 user_1 \n", "3 positive_target_B 2023-01-01 00:00:00 user_1 \n", "4 path_end 2023-01-01 00:00:00 user_1 \n", "5 path_start 2023-01-01 00:00:03 user_2 \n", "6 B 2023-01-01 00:00:03 user_2 \n", "7 positive_target_B 2023-01-01 00:00:03 user_2 \n", "8 A 2023-01-01 00:00:03 user_2 \n", "9 A 2023-01-01 00:00:04 user_2 \n", "10 path_end 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "add_events_stream = stream4.add_positive_events(targets=['B'])\n", "add_events_stream.to_dataframe()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 434 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1682534703220, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "eLBIvTv5Icy6", "outputId": "45d16359-284f-4471-90c8-61cd9471ad47" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
02cf0da85-4fd6-405d-8631-adb5fe16ec9fpath_start0path_start2023-01-01 00:00:00user_1
119267cab-22da-4d05-9c52-e48a7042e3cbraw0A2023-01-01 00:00:00user_1
2d0554de7-ffc3-4b42-9ced-3124356b60dapositive_target1positive_target_B2023-01-01 00:00:00user_1
3d0554de7-ffc3-4b42-9ced-3124356b60daraw1B2023-01-01 00:00:00user_1
42114017e-4672-4bd2-973f-3584508404f4path_end1path_end2023-01-01 00:00:00user_1
59f127e9a-b3b6-4f2b-bbd3-310c49774419path_start2path_start2023-01-01 00:00:03user_2
66f8dc678-6fbd-4880-8ec4-99cb4ef6c727positive_target2positive_target_B2023-01-01 00:00:03user_2
76f8dc678-6fbd-4880-8ec4-99cb4ef6c727raw2B2023-01-01 00:00:03user_2
8fa56cae5-77c5-40ff-8a9b-71725ac7f5dfraw3A2023-01-01 00:00:03user_2
92869bb04-f5e9-4a3b-8701-73497e794f50raw4A2023-01-01 00:00:04user_2
10c73cf1b4-0f38-4276-8cd9-15e9e64cf880path_end4path_end2023-01-01 00:00:04user_2
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 2cf0da85-4fd6-405d-8631-adb5fe16ec9f path_start 0 \n", "1 19267cab-22da-4d05-9c52-e48a7042e3cb raw 0 \n", "2 d0554de7-ffc3-4b42-9ced-3124356b60da positive_target 1 \n", "3 d0554de7-ffc3-4b42-9ced-3124356b60da raw 1 \n", "4 2114017e-4672-4bd2-973f-3584508404f4 path_end 1 \n", "5 9f127e9a-b3b6-4f2b-bbd3-310c49774419 path_start 2 \n", "6 6f8dc678-6fbd-4880-8ec4-99cb4ef6c727 positive_target 2 \n", "7 6f8dc678-6fbd-4880-8ec4-99cb4ef6c727 raw 2 \n", "8 fa56cae5-77c5-40ff-8a9b-71725ac7f5df raw 3 \n", "9 2869bb04-f5e9-4a3b-8701-73497e794f50 raw 4 \n", "10 c73cf1b4-0f38-4276-8cd9-15e9e64cf880 path_end 4 \n", "\n", " event timestamp user_id \n", "0 path_start 2023-01-01 00:00:00 user_1 \n", "1 A 2023-01-01 00:00:00 user_1 \n", "2 positive_target_B 2023-01-01 00:00:00 user_1 \n", "3 B 2023-01-01 00:00:00 user_1 \n", "4 path_end 2023-01-01 00:00:00 user_1 \n", "5 path_start 2023-01-01 00:00:03 user_2 \n", "6 positive_target_B 2023-01-01 00:00:03 user_2 \n", "7 B 2023-01-01 00:00:03 user_2 \n", "8 A 2023-01-01 00:00:03 user_2 \n", "9 A 2023-01-01 00:00:04 user_2 \n", "10 path_end 2023-01-01 00:00:04 user_2 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "custom_sorting = [\n", " 'profile',\n", " 'path_start',\n", " 'new_user',\n", " 'existing_user',\n", " 'cropped_left',\n", " 'session_start',\n", " 'session_start_cropped',\n", " 'group_alias',\n", " 'positive_target',\n", " 'raw',\n", " 'raw_sleep',\n", " None,\n", " 'synthetic',\n", " 'synthetic_sleep',\n", " 'negative_target',\n", " 'session_end_cropped',\n", " 'session_end',\n", " 'session_sleep',\n", " 'cropped_right',\n", " 'absent_user',\n", " 'lost_user',\n", " 'path_end'\n", "]\n", "\n", "add_events_stream.index_order = custom_sorting\n", "add_events_stream.index_events()\n", "add_events_stream.to_dataframe()" ] }, { "cell_type": "markdown", "metadata": { "id": "bGmRhXMhSCS8" }, "source": [ "## Descriptive methods" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 337 }, "executionInfo": { "elapsed": 1442, "status": "ok", "timestamp": 1682534832955, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "d1RZ51Hd1C_w", "outputId": "dee65fdf-c5ca-48f3-e1e2-425d54cd18be" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/data_processors_lib/split_sessions.py:318: FutureWarning: The provided callable is currently using SeriesGroupBy.cumsum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"cumsum\" instead.\n", " df[session_col] = df.groupby(user_col)[self.IS_SESSION_START_COL].transform(np.cumsum)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
0c7809e68-2b29-4432-a4a0-e88d9728b234path_start0path_start2019-11-01 17:59:13.273932219483890219483890_1
1a6c58408-8ad4-4523-b15a-d03b80a54da5session_start0session_start2019-11-01 17:59:13.273932219483890219483890_1
2c7809e68-2b29-4432-a4a0-e88d9728b234raw0catalog2019-11-01 17:59:13.273932219483890219483890_1
33e1772a5-03c7-4471-b3c2-0bd551ce787draw1product12019-11-01 17:59:28.459271219483890219483890_1
44f95e0b7-90a6-44ba-bda8-a603ac7d51b8raw2cart2019-11-01 17:59:29.502214219483890219483890_1
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 c7809e68-2b29-4432-a4a0-e88d9728b234 path_start 0 \n", "1 a6c58408-8ad4-4523-b15a-d03b80a54da5 session_start 0 \n", "2 c7809e68-2b29-4432-a4a0-e88d9728b234 raw 0 \n", "3 3e1772a5-03c7-4471-b3c2-0bd551ce787d raw 1 \n", "4 4f95e0b7-90a6-44ba-bda8-a603ac7d51b8 raw 2 \n", "\n", " event timestamp user_id session_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "1 session_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "2 catalog 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "3 product1 2019-11-01 17:59:28.459271 219483890 219483890_1 \n", "4 cart 2019-11-01 17:59:29.502214 219483890 219483890_1 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from retentioneering import datasets\n", "\n", "stream_with_sessions = datasets\\\n", " .load_simple_shop()\\\n", " .split_sessions(timeout=(30, 'm'))\n", "\n", "stream_with_sessions.to_dataframe().head()" ] }, { "cell_type": "markdown", "metadata": { "id": "w9SLW02d1Nk6" }, "source": [ "### General statistics" ] }, { "cell_type": "markdown", "metadata": { "id": "NHe2F9OXJjaE" }, "source": [ "#### Describe" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 896 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1682534895622, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "1KB_Nfs34LEJ", "outputId": "baf72aae-20aa-4371-c5ed-3db16de51168" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
value
categorymetric
overallunique_users3751
unique_events16
unique_sessions6454
eventstream_start2019-11-01 17:59:13
eventstream_end2020-04-29 12:48:07
eventstream_length179 days 18:48:53
path_length_timemean9 days 11:15:18
std23 days 02:52:25
median0 days 00:01:21
min0 days 00:00:00
max149 days 04:51:05
path_length_stepsmean14.05
std11.43
median11.0
min5
max124
session_length_timemean0 days 00:00:52
std0 days 00:01:08
median0 days 00:00:30
min0 days 00:00:00
max0 days 00:23:44
session_length_stepsmean8.16
std4.28
median7.0
min3
max55
\n", "
" ], "text/plain": [ " value\n", "category metric \n", "overall unique_users 3751\n", " unique_events 16\n", " unique_sessions 6454\n", " eventstream_start 2019-11-01 17:59:13\n", " eventstream_end 2020-04-29 12:48:07\n", " eventstream_length 179 days 18:48:53\n", "path_length_time mean 9 days 11:15:18\n", " std 23 days 02:52:25\n", " median 0 days 00:01:21\n", " min 0 days 00:00:00\n", " max 149 days 04:51:05\n", "path_length_steps mean 14.05\n", " std 11.43\n", " median 11.0\n", " min 5\n", " max 124\n", "session_length_time mean 0 days 00:00:52\n", " std 0 days 00:01:08\n", " median 0 days 00:00:30\n", " min 0 days 00:00:00\n", " max 0 days 00:23:44\n", "session_length_steps mean 8.16\n", " std 4.28\n", " median 7.0\n", " min 3\n", " max 55" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream_with_sessions.describe()" ] }, { "cell_type": "markdown", "metadata": { "id": "7q6kjYjdMIgz" }, "source": [ "#### Describe events\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 952 }, "executionInfo": { "elapsed": 650, "status": "ok", "timestamp": 1682535411485, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "9SkeBrB1Nr6m", "outputId": "6aafafa7-30c3-4d22-98df-da2237c87650" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
basic_statisticstime_to_FO_user_wisesteps_to_FO_user_wise
number_of_occurrencesunique_usersnumber_of_occurrences_sharedunique_users_sharedmeanstdmedianminmaxmeanstdmedianminmax
event
cart284219240.070.513 days 08:59:13.85157461011 days 19:28:46.3977788610 days 00:00:56.0029960 days 00:00:00.890726118 days 16:11:35.9602695.514.094.0242
catalog1451836110.360.960 days 05:44:21.3312838483 days 03:22:31.7924221240 days 00:00:000 days 00:00:00100 days 08:19:50.9809731.300.571.018
delivery_choice168613560.040.365 days 09:18:08.14255879615 days 03:19:14.5417664380 days 00:01:12.2237155000 days 00:00:02.553428118 days 16:11:37.4822790017.785.566.0350
delivery_courier8347480.020.206 days 18:14:54.97579740616 days 17:51:38.6931154300 days 00:01:28.3531260 days 00:00:05.975916118 days 16:11:37.8828879.966.848.0446
delivery_pickup5064690.010.137 days 21:12:16.55208045418 days 22:51:54.2670169830 days 00:01:34.4536000 days 00:00:05.710733114 days 01:24:05.69857010.518.068.0472
main563523850.140.643 days 20:15:36.4226630989 days 02:58:22.7360615590 days 00:00:06.5597210 days 00:00:0097 days 21:24:23.4798533.002.942.0121
path_end375137510.091.009 days 11:15:17.57568597723 days 02:52:24.8518657360 days 00:01:20.8492910 days 00:00:00149 days 04:51:04.7234109999.619.107.02101
path_start375137510.091.000 days 00:00:000 days 00:00:000 days 00:00:000 days 00:00:000 days 00:00:000.000.000.000
payment_card5655210.010.146 days 21:42:26.48090631717 days 18:52:32.5674225290 days 00:01:40.0956100 days 00:00:07.867798138 days 04:51:25.10996312.147.3410.0666
payment_cash1971900.000.0513 days 23:17:25.22800537924 days 00:00:01.7538874760 days 00:02:18.1192585000 days 00:00:10.290188118 days 16:11:39.43880599915.1511.1010.5674
payment_choice11079580.030.266 days 12:49:38.25180176517 days 02:54:50.6384150620 days 00:01:23.5653610 days 00:00:06.065129999118 days 16:11:38.85197010.426.378.0553
payment_done7066530.020.177 days 01:37:54.22871669117 days 09:09:59.9950920460 days 00:01:34.0830540 days 00:00:08.051634115 days 09:18:59.00547013.218.2911.0685
product1151511220.040.305 days 23:49:43.21096572316 days 04:36:13.1088877440 days 00:00:50.1929190 days 00:00:00.001065118 days 19:38:39.6731306.466.044.0262
product2217214300.050.384 days 06:13:23.87090039513 days 03:26:16.7984015830 days 00:00:34.2870910 days 00:00:00.016249126 days 23:36:44.7373760015.324.514.0237
\n", "
" ], "text/plain": [ " basic_statistics \\\n", " number_of_occurrences unique_users \n", "event \n", "cart 2842 1924 \n", "catalog 14518 3611 \n", "delivery_choice 1686 1356 \n", "delivery_courier 834 748 \n", "delivery_pickup 506 469 \n", "main 5635 2385 \n", "path_end 3751 3751 \n", "path_start 3751 3751 \n", "payment_card 565 521 \n", "payment_cash 197 190 \n", "payment_choice 1107 958 \n", "payment_done 706 653 \n", "product1 1515 1122 \n", "product2 2172 1430 \n", "\n", " \\\n", " number_of_occurrences_shared unique_users_shared \n", "event \n", "cart 0.07 0.51 \n", "catalog 0.36 0.96 \n", "delivery_choice 0.04 0.36 \n", "delivery_courier 0.02 0.20 \n", "delivery_pickup 0.01 0.13 \n", "main 0.14 0.64 \n", "path_end 0.09 1.00 \n", "path_start 0.09 1.00 \n", "payment_card 0.01 0.14 \n", "payment_cash 0.00 0.05 \n", "payment_choice 0.03 0.26 \n", "payment_done 0.02 0.17 \n", "product1 0.04 0.30 \n", "product2 0.05 0.38 \n", "\n", " time_to_FO_user_wise \\\n", " mean std \n", "event \n", "cart 3 days 08:59:13.851574610 11 days 19:28:46.397778861 \n", "catalog 0 days 05:44:21.331283848 3 days 03:22:31.792422124 \n", "delivery_choice 5 days 09:18:08.142558796 15 days 03:19:14.541766438 \n", "delivery_courier 6 days 18:14:54.975797406 16 days 17:51:38.693115430 \n", "delivery_pickup 7 days 21:12:16.552080454 18 days 22:51:54.267016983 \n", "main 3 days 20:15:36.422663098 9 days 02:58:22.736061559 \n", "path_end 9 days 11:15:17.575685977 23 days 02:52:24.851865736 \n", "path_start 0 days 00:00:00 0 days 00:00:00 \n", "payment_card 6 days 21:42:26.480906317 17 days 18:52:32.567422529 \n", "payment_cash 13 days 23:17:25.228005379 24 days 00:00:01.753887476 \n", "payment_choice 6 days 12:49:38.251801765 17 days 02:54:50.638415062 \n", "payment_done 7 days 01:37:54.228716691 17 days 09:09:59.995092046 \n", "product1 5 days 23:49:43.210965723 16 days 04:36:13.108887744 \n", "product2 4 days 06:13:23.870900395 13 days 03:26:16.798401583 \n", "\n", " \\\n", " median min \n", "event \n", "cart 0 days 00:00:56.002996 0 days 00:00:00.890726 \n", "catalog 0 days 00:00:00 0 days 00:00:00 \n", "delivery_choice 0 days 00:01:12.223715500 0 days 00:00:02.553428 \n", "delivery_courier 0 days 00:01:28.353126 0 days 00:00:05.975916 \n", "delivery_pickup 0 days 00:01:34.453600 0 days 00:00:05.710733 \n", "main 0 days 00:00:06.559721 0 days 00:00:00 \n", "path_end 0 days 00:01:20.849291 0 days 00:00:00 \n", "path_start 0 days 00:00:00 0 days 00:00:00 \n", "payment_card 0 days 00:01:40.095610 0 days 00:00:07.867798 \n", "payment_cash 0 days 00:02:18.119258500 0 days 00:00:10.290188 \n", "payment_choice 0 days 00:01:23.565361 0 days 00:00:06.065129999 \n", "payment_done 0 days 00:01:34.083054 0 days 00:00:08.051634 \n", "product1 0 days 00:00:50.192919 0 days 00:00:00.001065 \n", "product2 0 days 00:00:34.287091 0 days 00:00:00.016249 \n", "\n", " steps_to_FO_user_wise \\\n", " max mean std \n", "event \n", "cart 118 days 16:11:35.960269 5.51 4.09 \n", "catalog 100 days 08:19:50.980973 1.30 0.57 \n", "delivery_choice 118 days 16:11:37.482279001 7.78 5.56 \n", "delivery_courier 118 days 16:11:37.882887 9.96 6.84 \n", "delivery_pickup 114 days 01:24:05.698570 10.51 8.06 \n", "main 97 days 21:24:23.479853 3.00 2.94 \n", "path_end 149 days 04:51:04.723410999 9.61 9.10 \n", "path_start 0 days 00:00:00 0.00 0.00 \n", "payment_card 138 days 04:51:25.109963 12.14 7.34 \n", "payment_cash 118 days 16:11:39.438805999 15.15 11.10 \n", "payment_choice 118 days 16:11:38.851970 10.42 6.37 \n", "payment_done 115 days 09:18:59.005470 13.21 8.29 \n", "product1 118 days 19:38:39.673130 6.46 6.04 \n", "product2 126 days 23:36:44.737376001 5.32 4.51 \n", "\n", " \n", " median min max \n", "event \n", "cart 4.0 2 42 \n", "catalog 1.0 1 8 \n", "delivery_choice 6.0 3 50 \n", "delivery_courier 8.0 4 46 \n", "delivery_pickup 8.0 4 72 \n", "main 2.0 1 21 \n", "path_end 7.0 2 101 \n", "path_start 0.0 0 0 \n", "payment_card 10.0 6 66 \n", "payment_cash 10.5 6 74 \n", "payment_choice 8.0 5 53 \n", "payment_done 11.0 6 85 \n", "product1 4.0 2 62 \n", "product2 4.0 2 37 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream = datasets.load_simple_shop()\n", "stream.describe_events()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 488 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1682535468578, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "Isw57wZlRKmc", "outputId": "14b6772c-f89d-4ea6-ecb0-3ff5f6084739" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
eventcartpayment_done
basic_statisticsnumber_of_occurrences2842706
unique_users1924653
number_of_occurrences_shared0.070.02
unique_users_shared0.510.17
time_to_FO_user_wisemean3 days 08:59:13.8515746107 days 01:37:54.228716691
std11 days 19:28:46.39777886117 days 09:09:59.995092046
median0 days 00:00:56.0029960 days 00:01:34.083054
min0 days 00:00:00.8907260 days 00:00:08.051634
max118 days 16:11:35.960269115 days 09:18:59.005470
steps_to_FO_user_wisemean5.5113.21
std4.098.29
median4.011.0
min26
max4285
\n", "
" ], "text/plain": [ "event cart \\\n", "basic_statistics number_of_occurrences 2842 \n", " unique_users 1924 \n", " number_of_occurrences_shared 0.07 \n", " unique_users_shared 0.51 \n", "time_to_FO_user_wise mean 3 days 08:59:13.851574610 \n", " std 11 days 19:28:46.397778861 \n", " median 0 days 00:00:56.002996 \n", " min 0 days 00:00:00.890726 \n", " max 118 days 16:11:35.960269 \n", "steps_to_FO_user_wise mean 5.51 \n", " std 4.09 \n", " median 4.0 \n", " min 2 \n", " max 42 \n", "\n", "event payment_done \n", "basic_statistics number_of_occurrences 706 \n", " unique_users 653 \n", " number_of_occurrences_shared 0.02 \n", " unique_users_shared 0.17 \n", "time_to_FO_user_wise mean 7 days 01:37:54.228716691 \n", " std 17 days 09:09:59.995092046 \n", " median 0 days 00:01:34.083054 \n", " min 0 days 00:00:08.051634 \n", " max 115 days 09:18:59.005470 \n", "steps_to_FO_user_wise mean 13.21 \n", " std 8.29 \n", " median 11.0 \n", " min 6 \n", " max 85 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.describe_events()\n", "stream.describe_events(event_list=['payment_done', 'cart']).T" ] }, { "cell_type": "markdown", "metadata": { "id": "k7v2zN1p4Rkj" }, "source": [ "### Time-based histograms" ] }, { "cell_type": "markdown", "metadata": { "id": "20wdTUNL5WJs" }, "source": [ "#### User lifetime" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 759, "status": "ok", "timestamp": 1682535876902, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "5ILZ2Lwu4hTg", "outputId": "d3872853-ed28-4b11-b996-967712276f3c" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.user_lifetime_hist();" ] }, { "cell_type": "markdown", "metadata": { "id": "HTW7lgU5S3c_" }, "source": [ "#### Timedelta between two events\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 453 }, "executionInfo": { "elapsed": 994, "status": "ok", "timestamp": 1682536647332, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "oSmM7jrpT1-1", "outputId": "0aa5bd30-3209-41f0-9155-234f06ed44f1" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.timedelta_hist(log_scale=True, timedelta_unit='m');" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 944 }, "executionInfo": { "elapsed": 5377, "status": "ok", "timestamp": 1682536659137, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "oOmuXJjxWNcU", "outputId": "c0d1964a-f4ac-4a34-8492-a4968d9e205f" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/data_processors_lib/split_sessions.py:318: FutureWarning: The provided callable is currently using SeriesGroupBy.cumsum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"cumsum\" instead.\n", " df[session_col] = df.groupby(user_col)[self.IS_SESSION_START_COL].transform(np.cumsum)\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream_with_synthetic = datasets\\\n", " .load_simple_shop()\\\n", " .add_start_end_events()\\\n", " .split_sessions(timeout=(30, 'm'))\n", "\n", "stream_with_synthetic.timedelta_hist(log_scale=True, timedelta_unit='m')\n", "stream_with_synthetic.timedelta_hist(\n", " raw_events_only=True,\n", " log_scale=True,\n", " timedelta_unit='m'\n", ");" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 937, "status": "ok", "timestamp": 1682539478849, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "3Ca9XUOVXbRK", "outputId": "d14d127e-521b-42c3-ddbb-c4dcdc162386" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.timedelta_hist(event_pair=('product1', 'cart'), timedelta_unit='m');" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 1117, "status": "ok", "timestamp": 1682536989414, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "_XDrIryfXmWM", "outputId": "3bddc560-849b-425d-9076-04a6e2f85573" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.timedelta_hist(\n", " event_pair=('product1', 'cart'),\n", " timedelta_unit='m',\n", " adjacent_events_only=False\n", ");" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1682539467236, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "Q62Lpl6LKQBg", "outputId": "d49dc1ea-021b-4c52-95fe-8103de202f39" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/tooling/timedelta_hist/timedelta_hist.py:109: FutureWarning: Logical ops (and, or, xor) between Pandas objects and dtype-less sequences (e.g. list, tuple) are deprecated and will raise in a future version. Wrap the object in a Series, Index, or np.array before operating instead.\n", " idx &= series >= series.quantile(self.lower_cutoff_quantile)\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAEyCAYAAAAiIgOxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAAsTAAALEwEAmpwYAAAjcElEQVR4nO3debwcVZ338c+XhLBLEogIWQygMqKjiFcF3BAcRHRkdNCAiIBgUB8dt0cGdJ6BcfAZd3EnUTEoiCDihgqibCoKBmRfFBFIIJDLIpsIxPzmj3OaVJrue/tuffp2fd+vV79u1antV3Wq6td1qm6XIgIzMzPrrnVKB2BmZlZHTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBYwpAUu6WtKu4xPKkMuZLykkTe1g3IMk/Wocl32TpJe3GfZiSdeP17KsHEk/lXTgOM9zPUnXSNpyPOfb4bKPlnRit5c7GUh6l6SPFVjuA5K26XDckPSUiY6pV5Y72Uial+tzSpvhHR1/QybgvIDGZ7Wkhyr9+0fEMyLivFGuQ1dM5A4VEb+MiO06iMEnwzGa6G0YEa+MiBPGebYLgQsiYgWApCWSDhrnZUyo5uNH0jRJp+UvptH8BXwk6yjpvIn4At9qX2la1leA/SU9cbyXPZSI2DgibhzrfMb7IsNGLiJuyfX597HMZ8gEnBewcURsDNwC/HOl7KSxLNjGRyetAjY2Y9jGbwO+OQ7Ll6Reul30K+BNwO2lA2nWSV1FxN+AnwJvnviIbLLqxrl1rE3QjzXP5m+d35F0oqT7JV0p6WmSjpS0UtIySXtUpt1U0tckrZB0q6RjGpfzkqZI+qSkOyXdCLyqabltp20a74LceXm+al8gaYakMyQNSrond88ZZlV3kHSFpHslnSJp/Tz/XSUtryzv33M890u6XtLukvYEPggsyDFcnsfdStIPJd0t6QZJb63MZwNJJ+T4rpV0eNNybsrLugJ4UNJUSUdI+lNe9jWSXlsZ/yBJv5b0GUl/kXSjpF1y+bJcPx03v0r6B0ln59ivl/SGXP4CSbdX60LSa3OcSFqnEuddkk6VNDMPa9xmOFDSLbnuP5SHtdyGLeK6Ke9v1+Rt9/VKXQ1Z70pXSIe22F53AUd3um0q85sHbANc1GZ4YxlfyPvVdZJ2b4rnI5J+DfwV2CbX2e/y+L+TtEtl/K0lnZ/r/2xg88qwtfbTyrZqHLtTJH2wsv9cImmuWhw/EfFIRBwbEb8CxvTtvymeljHkYZ/N++l9ufzFlemOVroiP1HSfaQvPcPuK8B5NJ1XhojtfEn/mrtfmPfTV+X+3SVdVhn3LUrH7D2SzpL05Mqwx1oTJG0m6Ud5nX6ndA5rvqp9uaQ/Kh2zX1TydOA4YOe8fn9pE/PMvP/flmP5fmXYW5XOOXcrnYO2ajOPx46J3L/WlXden3fkGO+X9N+StpV0YV6vUyVNy+PuKmm5pPcrnW9WSDq4k+3fJrYlko6p9A97Ls7lnZyDDpF0C3DOEMtf67boUMffkCKiow9wE/DydmWkk9TfgFcAU4FvAH8GPgSsC7wV+HNl2u8Bi4CNgCcCFwOH5WFvA64D5gIzgXOBAKZ2MO1BwK8qywngKZX+zYB/BTYENgG+A3x/mPW+GNgqx3It8LY8bFdgee7eDlgGbJX75wPbVrbNiU3zvQD4ErA+sAMwCOyWh30UOB+YAcwBrmgspxLTZXn7bJDLXp9jXAdYADwIbFnZJquAg4EpwDGkFo0vAusBewD3Axt3sB9slNfz4FzPzwHuBLbPw/8E/FNl/O8AR+TudwO/zeu0Xq7DkyvbK0jNgxsAzwYeBp7ebhu2qaurWLPf/Bo4ppN6J52QD23aXu/K67hBp8dJZX6vAq4eYnhjGe8lHR8LgHuBmZV4bgGekWPYArgHOCD375f7N8vj/wb4dN6uL8n1eWLzftrm2P0AcCVpH1be9o35rnX8NM1jObDrSLdNm3kNFcObcv1NBd5PuvJev7JfPAr8C2nf36DDfWVH4O4OY/sw8Pnc/UHSPv6xyrDP5u69gRuAp+dY/wO4sNW5CPh2/mwIbE86pprPW2cA04F5pPPDnq3OcW1i/jFwCukcsi7w0ly+G+l43THvK58n3SZpFeN55GOi1XLzuD8AnkDaTx8GfkH64rkpcA1wYGUfXJW317rAXqQvljNGub8sIR/bzfs4Q5+LOzkHfYN0nmt73FfGbeSktsffkOsxghW+ieET8NmVYf8MPABMyf2b5ICnk04mD1dXkHRCOTd3n0NOcrl/j8bKdjBtq52k5QkkD98BuGeY9X5Tpf/jwHEtKv0pwErg5cC6TfM4uloZpATxd2CTStn/AEty943AKyrDDuXxCfgtw9TXZcDelW3yx8qwf8zbZYtK2V3ADh3sBwuAXzaVLQKOyt3HAMdX6vxB4Mm5/1pg98p0W5JOnlNZs0PPqQy/GNi31TYcoq6q+81ewJ86qXcen4Bv6fTYaDP//YHfDjH8IOA2QE3re0Alng9Xhh0AXNw0j9/k+cwjndw2qgz7Fp0n4Osb+0qLOLuVgNvG0GLce4BnV/aLC5qGd7KvPBX4e4fL2x24InefSToef5v7zwdel7t/ChxSmW4dUpJp7P9BOk9Myfv9dpVxj+Hx560XVfpPZc0X2YMYIgGTjqvVtEhuwNeAj1f6N86xzG+ubzpLwC+s9F8C/Hul/1PAsZV98CFywsplK4GdRrm/LKF9Ah7qXNzJOWibDpbfGHcqwxx/Q33G+77SHZXuh4A7Y81N6ofy342BJ5O+Ba3IzSt/IZ3EGw9FbEX6BtNwc6V7uGmHJGlDSYsk3ZybrC4ApqvN02xZ9V7XX/M6rCUibgDeQzr4V0r6drumHdL63R0R91fKbgZmV4ZX17/a3bJM0pslXVbZJs9k7WaQ5rohIprLHrdeLTwZeEFjOXlZ+wNPysO/BbxO0nrA64BLI+LmyrTfq0x3LemLyBaV+Q+7rYfRvN9sBaOq91bb/DFa+wHFeS1GuYf0BWQot0Y+WpvjbRHDVqx9HDTGn52H3RMRDzYN69Rc0lVdSW1jkPR/c7PuvXm/2ZS19+0h66qNTUgtDp34DfA0SVuQvrh9A5graXPg+aR9CdL+/dnK/n036Wp+dtP8ZpFO3MMd46M9FuaSzi/3tBi21n4UEQ+Qvnw3x9ip5nPIUOeUuyJiVaW/5Top/XdJ49i6eqQBDXMu7uQcNNL9adTHX6kHO5aRrmI3j4jp+fOEiHhGHr6CtBM1zBvBtMN5P6mJ4gUR8QRScwGkA2VMIuJbEfEiUiUH0PhXh2ga9TZgpqTqCXoecGvuXkFqImmobovHFtfoyPeZvgK8k9RsN53UFDvmdWphGXB+ZdtPj/RQ3tsBIuIa0s73SuCNpIRcnfaVTdOuHxG3Pm4pj9e8Ddtp3m9uy90jrfchlxeVBxQj4pYWo1wBbK2hH+SYLam6/Gq8zTHcRtqvqhr7zApghqSNmoY1PEhq6gTS/VZSEmhYBmw7RJzd0DIGpfu9hwNvIF3RTSclzup2a66rTvaVpwPt7g+vPbOIv5Ku7t4NXBURjwAXAu8jtbDcWVmHw5r27w0i4sKmWQ6SrpiGO8bbhjTM8GWk88v0FsPW2o/yPrMZa849VWvtN6z5kj2hIv13SePYandeHzK2Ic7FnZyDOj3XNAx3/LVVJAFH+reMnwGfkvSEfGN8W0kvzaOcCvybpDmSZgBHjGDaZneQ7kk0bEL6ZvaXfPP9qPFYJ0nbSdotX/n9LS9jdSWG+cpPskbEMtIB/D+S1pf0LOAQoPGvE6cCRyo9ODSblFiHshFppxnMsRxMugIe7bocJOmmNoPPIF0NHCBp3fx5ntLDIQ3fIp2sXkK619pwHPCR/IUBSbMk7d1hWGttwyH8n7zfzCQ9f3BKLp+Qem8nIpaT7gc+f4jRnkjaz9eV9HpSUvhJm3F/Qtrub1R66G4B6d7hGbmFYSnwX0r/JvQi0i2ghj8A60t6laR1Sfcm16sM/yrw35KequRZkjbLw5qPH5T+v3n93Dst78OP+yKjNQ+qzB9iGwwXwyakZDUITJX0n6R7jkPpZF95KanJuBHrEklLhhj/fNJxeH7uP6+pH9L+faSkZ+R5bprrdS25VfB04OjcMvMPjOyJ7DuAOcoPOLWY/wrSun0pn0PWldT4wnkycLCkHfK56v8DF0XETS1mdRmpNWtDpYfHDhlBjBPtMmAvpYfNnkS64gWGPReP5RzUUgfHX1sl/7XhzcA00o36e4DTSO3xkK7mziJ9Q72UtLN2Om2zo4ETcpPDG4BjSQ9q3Em6GX/muKxNOqF9NM/3dtLJ9cg8rJGE7pJ0ae7ej3Qf4TbSQ2VHRcTP87APk+6v/Rn4OWn9Hm634HzV+SlSU9kdpHu8vx7DusxtN31uNt8D2DfHfjvp22X1hH4y6QR3TuXqAOCzwA+Bn0m6n7T9X9BhTK22YSvfIn1Bu5HUpNl4UvJYJqbeh7KIdO+2nYtI9yLvBD4C7BMRd7UaMZe/mnQlfxfpqvDVle37RtK2vJv05eIblWnvBd5BSnK3kq4eqk9Ff5r0pe9nwH2k+4Qb5GFHs/bxA+l+7UOkZsuzcnfz1Tmk/ehmWl9dNWsXw1mkuvpDntffGL6JcMh9JX952Auo/t93230+O5/0ZeCCNv1ExPdIx8K3lW5zXEVqCWrlnaSm9NtJ/6p2MkMc403OAa4Gbpd0Z5txDiDd27yOdD/0PTnGnwP/D/gu6cptW9Kx3MpngEdI55QTgF7619NvkvLDTaR95pTKsKHOxWM5Bw2l7fE3FK19C8p6kaS3kx5GaneVP97L+xnw7oi4thvLGy/5qv3QyheZovI38N+THvpY0TTsIFKsLyoRWzdI+g9gMCIWlY6lStK7gLkRcXjun0Y6mT8rIh4tFNPHgCdFxIEllm9l+EccepDSTxduQ7qifSrpqucL3Vp+ROwx/Fg2nIh4mNRMXEsRcczwY3VfRHy+qf8RUvN/1+Rm52mkf716Hql599AhJ7K+4wTcm6aRmi+3Bv5C+n/BL5UMyMzG1SakZuetSE28nyL9T631CEn7k87DzW4ewUO/Qy/DTdBmZmbd10u/L2tmZlYbTsBmZmYF+B5wBzbffPOYP39+6TDMzCaNSy655M6ImDX8mPXlBNyB+fPns3Tp0tJhmJlNGpJG8nOoteQmaDMzswKcgM3MzApwAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACnmCz585D0qg+s+fOKx2+mZlNEP8W9AS7bfkyFiy6cFTTnnLYLuMcjZmZ9QpfAZuZmRXgBGxmZlaAE7CZmVkBTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBfRtApZ0vKSVkq5qKn+XpOskXS3p46XiMzOzeuvbBAwsAfasFkh6GbA38OyIeAbwyQJxmZmZ9W8CjogLgLubit8OfDQiHs7jrOx6YGZmZvRxAm7jacCLJV0k6XxJzysdkJmZ1VPdXsYwFZgJ7AQ8DzhV0jYREc0jSloILASYN89vJTIzs/FVtyvg5cDpkVwMrAY2bzViRCyOiIGIGJg1a1ZXgzQzs/5XtwT8feBlAJKeBkwD7iwZkJmZ1VPfNkFLOhnYFdhc0nLgKOB44Pj8r0mPAAe2an42MzObaH2bgCNivzaD3tTVQMzMzFqoWxO0mZlZT3ACNjMzK8AJ2MzMrAAnYDMzswKcgM3MzApwAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACNjMzK8AJ2MzMrAAnYDMzswKcgM3MzApwAjYzMyugbxOwpOMlrZR0VYth75cUkjYvEZuZmVnfJmBgCbBnc6GkucAewC3dDsjMzKyhbxNwRFwA3N1i0GeAw4HobkRmZmZr9G0CbkXS3sCtEXF56VjMzKzeppYOoFskbQh8kNT83Mn4C4GFAPPmzZvAyMzMrI7qdAW8LbA1cLmkm4A5wKWSntRq5IhYHBEDETEwa9asLoZpZmZ1UJsr4Ii4Enhioz8n4YGIuLNYUGZmVlt9ewUs6WTgN8B2kpZLOqR0TGZmZg19ewUcEfsNM3x+l0IxMzN7nL69AjYzM+tlTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBTgBm5mZFeAEbGZmVoATsJmZWQFOwGZmZgU4AZuZmRXgBGxmZlaAE7CZmVkBTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBfRtApZ0vKSVkq6qlH1C0nWSrpD0PUnTC4ZoZmY11rcJGFgC7NlUdjbwzIh4FvAH4MhuB2VmZgZ9nIAj4gLg7qayn0XEqtz7W2BO1wMzMzOjjxNwB94C/LR0EGZmVk+1TMCSPgSsAk4aYpyFkpZKWjo4ONi94MbJ7LnzkDTqz+y580qvgplZX5taOoBuk3QQ8Gpg94iIduNFxGJgMcDAwEDb8XrVbcuXsWDRhaOe/pTDdhnHaMzMrFmtErCkPYHDgZdGxF9Lx2NmZvXVt03Qkk4GfgNsJ2m5pEOALwCbAGdLukzScUWDNDOz2urbK+CI2K9F8de6HoiZmVkLfXsFbGZm1sucgM3MzApwAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAe9k6U0f9NqOSy/ablMzMhte3P0XZF1avGvUbjcb8NqOSyzYzqwFfAZuZmRXgBGxmZlaAE7CZmVkBTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBfRtApZ0vKSVkq6qlM2UdLakP+a/M0rGaGZm9dW3CRhYAuzZVHYE8IuIeCrwi9xvZmbWdX2bgCPiAuDupuK9gRNy9wnAv3QzJjMzs4a+TcBtbBERK3L37cAWJYMxM7P6qlsCfkxEBBDthktaKGmppKWDg4NdjMzMzOqg5xOwpBd2UtahOyRtmeexJbCy3YgRsTgiBiJiYNasWaNcnJmZWWs9n4CBz3dY1okfAgfm7gOBH4xyPmZmZmPSs+8DlrQzsAswS9L7KoOeAEzpYPqTgV2BzSUtB44CPgqcKukQ4GbgDeMdt5mZWSd6NgED04CNSTFuUim/D9hnuIkjYr82g3Yfe2hmZmZj07MJOCLOB86XtCQibi4dj5mZ2Xjq2QRcsZ6kxcB8KvFGxG7FIjIzMxujyZCAvwMcB3wV+HvhWMzMzMbFZEjAqyLiy6WDMDMzG0+T4d+QfiTpHZK2zC9TmClpZumgzMzMxmIyXAE3/m/3A5WyALYpEIuZmdm46PkEHBFbl47BzMxsvPV8Apb05lblEfGNbsdiZmY2Xno+AQPPq3SvT/ohjUsBJ2AzM5u0ej4BR8S7qv2SpgPfLhONmZnZ+JgMT0E3exDwfWEzM5vUev4KWNKPWPPe3inA04FTy0VkZmY2dj2fgIFPVrpXATdHxPJSwZiZmY2Hnm+Czi9luI70RqQZwCNlIzIzMxu7nk/Akt4AXAy8nvT+3oskDfs6QjMzs142GZqgPwQ8LyJWAkiaBfwcOK1oVGZmZmPQ81fAwDqN5JvdxeSI28zMrK3JcAV8pqSzgJNz/wLgJwXjMTMzG7OeTcCSngJsEREfkPQ64EV50G+Ak8Y47/cCh5L+velK4OCI+NtY5mlmZjYSvdyUeyxwH0BEnB4R74uI9wHfy8NGRdJs4N+AgYh4Jul/i/cdc7RmZmYj0MsJeIuIuLK5MJfNH+O8pwIbSJoKbAjcNsb5mZmZjUgvJ+DpQwzbYLQzjYhbST/ucQuwArg3In422vmZmZmNRi8n4KWS3tpcKOlQ4JLRzlTSDGBv0u9JbwVsJOlNLcZbKGmppKWDg4OjXZyZmVlLPfsQFvAe4HuS9mdNwh0ApgGvHcN8Xw78OSIGASSdDuwCnFgdKSIWA4sBBgYGonkmZmZmY9GzCTgi7gB2kfQy4Jm5+McRcc4YZ30LsJOkDYGHSO8XXjrGeZqZmY1Izybghog4Fzh3HOd3kaTTgEtJL3f4PflK18zMrFt6PgFPhIg4CjiqdBxmZlZfvfwQlpmZWd9yAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACtvG3zlQkjeoze+680tGbmXVFLX+K0ibY6lUsWHThqCY95bBdxjkYM7Pe5CtgMzOzApyAzczMCnACNjMzK8AJ2MzMrAAnYDMzswKcgM3MzApwAjYzMyvACdjMzKyAWiZgSdMlnSbpOknXStq5dExmZlYvdf0lrM8CZ0bEPpKmARuWDsjMzOqldglY0qbAS4CDACLiEeCRkjGZmVn91LEJemtgEPi6pN9L+qqkjUoHZWZm9VLHBDwV2BH4ckQ8B3gQOKJ5JEkLJS2VtHRwcLDbMZqZWZ+rYwJeDiyPiIty/2mkhLyWiFgcEQMRMTBr1qyuBmhmZv2vdgk4Im4HlknaLhftDlxTMCQzM6uh2j2Elb0LOCk/AX0jcHDheMzMrGZqmYAj4jJgoHQcZmZWX7VrgjYzM+sFTsBmZmYFOAGbmZkV4ARsZmZWgBOwmZlZAU7AZmZmBTgBm5mZFeAEbGZmVoATsPWWdaYiaVSf2XPnFQt79tx5kzJuMyunlr+EZT1s9SoWLLpwVJOectgu4xxM525bvmxSxm1m5fgK2MzMrAAnYDMzswKcgM3MzApwAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMroLYJWNIUSb+XdEbpWMzMrH5qm4CBdwPXlg7CzMzqqZYJWNIc4FXAV0vHYmZm9VTLBAwcCxwOrC4ch5mZ1VTtErCkVwMrI+KSYcZbKGmppKWDg4Ndis7MzOqidgkYeCHwGkk3Ad8GdpN0YvNIEbE4IgYiYmDWrFndjtHMzPpc7RJwRBwZEXMiYj6wL3BORLypcFhmZlYztUvAZmZmvWBq6QBKiojzgPMKh2FmZjXkK2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACNjMzK8AJ2MzMrAAnYDMzswKcgM1KW2cqkkb1mT13XunozWyUav1TlGY9YfUqFiy6cFSTnnLYLuMcjJl1i6+AzczMCnACNjMzK8AJ2MzMrAAnYDMzswKcgM3MzApwAjYzMyvACdjMzKyA2iVgSXMlnSvpGklXS3p36ZjMzKx+6vhDHKuA90fEpZI2AS6RdHZEXFM6MDMzq4/aXQFHxIqIuDR33w9cC8wuG5WZmdVN7RJwlaT5wHOAiwqHYmZmNVPbBCxpY+C7wHsi4r4WwxdKWipp6eDgYPcDtJEbw0sNpk5bf9TTSiq95mZtzZ47zy/76FF1vAeMpHVJyfekiDi91TgRsRhYDDAwMBBdDM9Ga4wvNRjttI3pzXrRbcuX+WUfPap2V8BKlytfA66NiE+XjsfMzOqpdgkYeCFwALCbpMvyZ6/SQZmZWb3Urgk6In4F+KadmZkVVccrYDMzs+KcgM3MzApwAjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzSazgm+AGsubcsbyhp6xxF0q5pLLtt5Vu5+iNOsrk/QNUGN9Q0+Jt/uMJeaSy/YbjXqXr4DNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACNjMzK8AJ2MzMrAAnYDMzswJqmYAl7Snpekk3SDqidDxmZlY/tUvAkqYAXwReCWwP7Cdp+7JRmZlZ3dQuAQPPB26IiBsj4hHg28DehWMyM7OaqWMCng0sq/Qvz2VmZmZdo4goHUNXSdoH2DMiDs39BwAviIh3No23EFiYe7cDrm+a1ebAnRMcbmn9vo5ev8mv39dxMq/fkyNiVukgelkd34Z0KzC30j8nl60lIhYDi9vNRNLSiBgY//B6R7+vo9dv8uv3dez39au7OjZB/w54qqStJU0D9gV+WDgmMzOrmdpdAUfEKknvBM4CpgDHR8TVhcMyM7OaqV0CBoiInwA/GeNs2jZP95F+X0ev3+TX7+vY7+tXa7V7CMvMzKwX1PEesJmZWXFOwKMwmX7KUtJcSedKukbS1ZLenctnSjpb0h/z3xm5XJI+l9ftCkk7VuZ1YB7/j5IOrJQ/V9KVeZrPSVKB9Zwi6feSzsj9W0u6KMd0Sn7gDknr5f4b8vD5lXkcmcuvl/SKSnnx+pY0XdJpkq6TdK2knfupDiW9N++fV0k6WdL6k70OJR0vaaWkqyplE15n7ZZhPSgi/BnBh/Tg1p+AbYBpwOXA9qXjGiLeLYEdc/cmwB9IP8H5ceCIXH4E8LHcvRfwU0DATsBFuXwmcGP+OyN3z8jDLs7jKk/7ygLr+T7gW8AZuf9UYN/cfRzw9tz9DuC43L0vcEru3j7X5XrA1rmOp/RKfQMnAIfm7mnA9H6pQ9IP4fwZ2KBSdwdN9joEXgLsCFxVKZvwOmu3DH9671M8gMn2AXYGzqr0HwkcWTquEcT/A+CfSD8ssmUu2xK4PncvAvarjH99Hr4fsKhSviiXbQlcVylfa7wurdMc4BfAbsAZ+YR0JzC1uc5IT7/vnLun5vHUXI+N8XqhvoFNc4JSU3lf1CFrfp1uZq6TM4BX9EMdAvNZOwFPeJ21W4Y/vfdxE/TITdqfssxNdc8BLgK2iIgVedDtwBa5u936DVW+vEV5Nx0LHA6szv2bAX+JiFUtYnpsPfLwe/P4I13vbtoaGAS+npvZvyppI/qkDiPiVuCTwC3AClKdXEJ/1WFDN+qs3TKsxzgB14SkjYHvAu+JiPuqwyJ9VZ6Uj8NLejWwMiIuKR3LBJpKasr8ckQ8B3iQ1LT4mElehzNIL0TZGtgK2AjYs2hQXdCNOpvM+0UdOAGPXEc/ZdlLJK1LSr4nRcTpufgOSVvm4VsCK3N5u/UbqnxOi/JueSHwGkk3kd5stRvwWWC6pMb/uVdjemw98vBNgbsY+Xp303JgeURclPtPIyXkfqnDlwN/jojBiHgUOJ1Ur/1Uhw3dqLN2y7Ae4wQ8cpPqpyzzk5FfA66NiE9XBv0QaDxReSDp3nCj/M35qcydgHtzc9ZZwB6SZuQrlj1I99VWAPdJ2ikv682VeU24iDgyIuZExHxSXZwTEfsD5wL7tFm/xnrvk8ePXL5vfsJ2a+CppIdcitd3RNwOLJO0XS7aHbiGPqlDUtPzTpI2zMtvrF/f1GFFN+qs3TKs15S+CT0ZP6QnFv9AerLyQ6XjGSbWF5GaoK4ALsufvUj3zH4B/BH4OTAzjy/gi3ndrgQGKvN6C3BD/hxcKR8ArsrTfIGmh4W6uK67suYp6G1IJ98bgO8A6+Xy9XP/DXn4NpXpP5TX4XoqTwH3Qn0DOwBLcz1+n/REbN/UIfBfwHU5hm+SnmSe1HUInEy6p/0oqRXjkG7UWbtl+NN7H/8SlpmZWQFugjYzMyvACdjMzKwAJ2AzM7MCnIDNzMwKcAI2MzMrwAnYbBQkbSbpsvy5XdKtufsBSV8qHR9A/snK7XP3B0vHY2Zr878hmY2RpKOBByLik6VjaUfSAxGxcek4zGwNXwGbjSNJu2rNO4mPlnSCpF9KulnS6yR9PL/D9cz8E6GN97qeL+kSSWc1fkawab5LJO1T6X+gsrzztOZdwSdV3gt7nqQBSR8FNshX6CdJ2kjSjyVdrvT+3QXDrFNH62FmI+MEbDaxtiX9PvVrgBOBcyPiH4GHgFfl5PV5YJ+IeC5wPPCRES7jOcB7SO/D3Yb0O8qPiYgjgIciYodIP9O5J3BbRDw7Ip4JnAkg6cOSXjOa9RhhvGZGesuKmU2cn0bEo5KuJL0Y/sxcfiXpXbHbAc8Ezs4XrlNIP184EhdHxHIASZfl+f5qiPGvBD4l6WOkn+78JUBE/OcY1sPMRsgJ2GxiPQwQEaslPRprHrpYTTr+BFwdETsPM59V5BYrSesA05qXkf2dYY7riPiDpB1Jv498jKRfRMSHx7geZjZCboI2K+t6YJaknSG9OlLSM1qMdxPw3Nz9GmCk910frdxz3gr4a0ScCHyC9GpDM+syf3M1KygiHskPV31O0qakY/JY4OqmUb8C/EDS5aTm3wdHuKjFwBWSLgW+AXxC0mrSm3reDukeMLA0Inr29Zpm/cT/hmRmZlaAm6DNzMwKcAI2MzMrwAnYzMysACdgMzOzApyAzczMCnACNjMzK8AJ2MzMrAAnYDMzswL+Fwwiqzs1YI2NAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.timedelta_hist(\n", " event_pair=('product1', 'cart'),\n", " timedelta_unit='m',\n", " adjacent_events_only=False,\n", " lower_cutoff_quantile=0.9\n", " );" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 496, "status": "ok", "timestamp": 1682539468246, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "3OPKdBOrKcc_", "outputId": "cbdc983f-e548-420d-c23e-bad68fe471a3" }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/tooling/timedelta_hist/timedelta_hist.py:107: FutureWarning: Logical ops (and, or, xor) between Pandas objects and dtype-less sequences (e.g. list, tuple) are deprecated and will raise in a future version. Wrap the object in a Series, Index, or np.array before operating instead.\n", " idx &= series <= series.quantile(self.upper_cutoff_quantile)\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.timedelta_hist(\n", " event_pair=('product1', 'cart'),\n", " timedelta_unit='m',\n", " adjacent_events_only=False,\n", " upper_cutoff_quantile=0.1\n", " );" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 491, "status": "ok", "timestamp": 1682539470356, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "Cq6NAvPEYGw_", "outputId": "da06bb64-3cd8-43eb-ef45-c90b3ffe181d" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream_with_synthetic\\\n", " .timedelta_hist(\n", " event_pair=('product1', 'cart'),\n", " timedelta_unit='m',\n", " adjacent_events_only=False,\n", " weight_col='session_id'\n", " );" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1682540317182, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "NxJnm-HMYYfX", "outputId": "5e51583a-da64-4c64-bf09-eb73285e4157" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream\\\n", " .timedelta_hist(\n", " event_pair=('main', 'catalog'),\n", " timedelta_unit='m',\n", " adjacent_events_only=False,\n", " weight_col='user_id',\n", " time_agg='mean'\n", " );" ] }, { "cell_type": "markdown", "metadata": { "id": "U-fHCaabYqu-" }, "source": [ "#### Eventstream global events\n" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "executionInfo": { "elapsed": 880, "status": "ok", "timestamp": 1682540766203, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "DZsJ-_deZlGL", "outputId": "53303527-e812-4465-a6f3-90ef4257227a" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream_with_synthetic\\\n", " .timedelta_hist(\n", " event_pair=('eventstream_start', 'path_end'),\n", " timedelta_unit='h',\n", " adjacent_events_only=False\n", " );" ] }, { "cell_type": "markdown", "metadata": { "id": "eT5TUd5O5Pp_" }, "source": [ "### Events intensity" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 429 }, "executionInfo": { "elapsed": 895, "status": "ok", "timestamp": 1682540857450, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "5hURSECJ8ixm", "outputId": "97d3a3bc-4f98-47a5-fd2b-fc81a96b81e6" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream.event_timestamp_hist();" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 484 }, "executionInfo": { "elapsed": 1371, "status": "ok", "timestamp": 1682540882764, "user": { "displayName": "Anatoliy Zaykovskiy", "userId": "11710160701524337708" }, "user_tz": -180 }, "id": "L5cLS-Z08qqX", "outputId": "321e2187-b465-4cf6-a6f7-7399c236ba01" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "stream\\\n", " .add_start_end_events()\\\n", " .event_timestamp_hist(event_list=['path_start']);" ] }, { "cell_type": "markdown", "metadata": { "jupyter": { "outputs_hidden": false }, "pycharm": { "is_executing": true } }, "source": [ "## Path metrics" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
path_lengthhas_carttime_to_cartcart_countactive_days
12291534True6 days 01:22:39.09042212
46345812FalseNaT01
147590716True23 days 13:03:45.21350912
15766263FalseNaT01
21123387FalseNaT01
..................
9992751097True0 days 00:00:08.92158511
99964290510True0 days 00:00:41.32085611
99991455419True0 days 00:02:58.00300911
9999161636True0 days 00:00:08.27419621
9999419673FalseNaT01
\n", "

3751 rows × 5 columns

\n", "
" ], "text/plain": [ " path_length has_cart time_to_cart cart_count \\\n", "122915 34 True 6 days 01:22:39.090422 1 \n", "463458 12 False NaT 0 \n", "1475907 16 True 23 days 13:03:45.213509 1 \n", "1576626 3 False NaT 0 \n", "2112338 7 False NaT 0 \n", "... ... ... ... ... \n", "999275109 7 True 0 days 00:00:08.921585 1 \n", "999642905 10 True 0 days 00:00:41.320856 1 \n", "999914554 19 True 0 days 00:02:58.003009 1 \n", "999916163 6 True 0 days 00:00:08.274196 2 \n", "999941967 3 False NaT 0 \n", "\n", " active_days \n", "122915 2 \n", "463458 1 \n", "1475907 2 \n", "1576626 1 \n", "2112338 1 \n", "... ... \n", "999275109 1 \n", "999642905 1 \n", "999914554 1 \n", "999916163 1 \n", "999941967 1 \n", "\n", "[3751 rows x 5 columns]" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "metrics = [\n", " ('len', 'path_length'),\n", " ('has:cart', 'has_cart'),\n", " ('time_to:cart', 'time_to_cart'),\n", " (lambda _df: (_df['event'] == 'cart').sum(), 'cart_count'),\n", " (pd.NamedAgg('timestamp', lambda s: len(s.dt.date.unique())), 'active_days')\n", "]\n", "\n", "stream.path_metrics(metrics)" ] } ], "metadata": { "colab": { "provenance": [ { "file_id": "1Gcj_hfBODZj4noEHMXl5O-ij4gcs5qzn", "timestamp": 1671629341415 } ] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }