{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "1qcdnuAet_El" }, "source": [ "# Prerequisites\n", "Run this cell to prepare the environment. This step is obligatory." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 25578, "status": "ok", "timestamp": 1683188242647, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "bieK6_UAZ94I", "outputId": "9979688d-1f14-4a2f-d733-c261b3cce373", "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "!pip install retentioneering" ] }, { "cell_type": "markdown", "metadata": { "id": "rwHSi8hf1jlc" }, "source": [ "The full text of [TransitionGraph](https://doc.retentioneering.com/stable/doc/user_guides/transition_graph.html) user guide is available on the retentioneering website." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# TransitionGraph" ] }, { "cell_type": "markdown", "metadata": { "id": "B_l0s_3Ax_Dx" }, "source": [ "## Loading data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1683188260267, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "FJdD7NNXl_lV" }, "outputs": [], "source": [ "import retentioneering\n", "import pandas as pd\n", "\n", "from retentioneering import datasets\n", "\n", "stream = datasets.load_simple_shop()" ] }, { "cell_type": "markdown", "metadata": { "id": "DRYrsb_08DIY" }, "source": [ "## A basic example" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 392, "status": "ok", "timestamp": 1683188260654, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "3wllHdNvZJJV", "outputId": "066f9a72-23a7-4437-ebe0-78cb4fcbcdfa" }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "stream.transition_graph();" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Transition graph parameters" ] }, { "cell_type": "markdown", "metadata": { "id": "1uPdv0rcGqMq" }, "source": [ "#### Setting the weight options" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683188260654, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "x1TI8L2EHQO0", "outputId": "fc3e41d5-471a-4bb2-f0c4-e208bb3d157d" }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "stream.transition_graph(\n", " edges_norm_type='node',\n", " edges_weight_col='user_id'\n", ");" ] }, { "cell_type": "markdown", "metadata": { "id": "RwMnbDHMHozf" }, "source": [ "### Thresholds" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 311, "status": "ok", "timestamp": 1683188260959, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "5wGC_mp1H_OH", "outputId": "6d4b8d21-91bc-4974-c440-1b71795dd8ee" }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "stream.transition_graph(\n", " edges_norm_type='node',\n", " edges_weight_col='user_id',\n", " edges_threshold={'user_id': 0.12},\n", " nodes_threshold={'event_id': 500}\n", ");" ] }, { "cell_type": "markdown", "metadata": { "id": "qCpWRTFVIN5j" }, "source": [ "### Color settings" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 1243, "status": "ok", "timestamp": 1683188262200, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "qVfwLcY1I0Ov", "outputId": "a8c9e404-e819-442b-d16b-d92a02e03a14" }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "stream\\\n", " .transition_graph(\n", " targets={\n", " 'positive': ['payment_done', 'cart'],\n", " 'negative': 'path_end',\n", " 'source': 'path_start'\n", " }\n", " );" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nodes_colors = {\n", " 'product1': 'gold',\n", " 'product2': 'gold',\n", " 'cart': 'green'\n", "}\n", "\n", "edges_colors = {\n", " ('path_start', 'catalog'): '#cc29c4',\n", " ('path_start', 'main'): '#cc29c4',\n", "}\n", "\n", "stream\\\n", " .transition_graph(\n", " nodes_custom_colors=nodes_custom_colors,\n", " edges_custom_colors=edges_custom_colors,\n", " targets={'negative': 'path_end'}\n", " )" ] }, { "cell_type": "markdown", "metadata": { "id": "aUcCs_2tIOCH" }, "source": [ "### Graph settings\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 19, "status": "ok", "timestamp": 1683188262200, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "pr3bw0HOJqy-", "outputId": "554b685a-d510-47a8-9980-475e66486fb2", "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.transition_graph(\n", " edges_norm_type='node',\n", " show_weights=True,\n", " show_percents=True,\n", " show_nodes_names=True,\n", " show_all_edges_for_targets=False,\n", " show_nodes_without_links=False\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import and export graph layout" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/v.kukushkin/rete/retentioneering-tools-new-arch/retentioneering/tooling/transition_graph/transition_graph.py:957: UserWarning: Failed to load layout dump\n", " warnings.warn(f\"Failed to load layout dump\")\n" ] }, { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path_link = '/path/to/node_params.json'\n", "stream.transition_graph(layout_dump=path_link)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Export the modified eventstream" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "tg = stream.transition_graph()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0fa074790-7666-461d-b2fc-81b1c4c1529apath_start0path_start2019-11-01 17:59:13.273932219483890
1fa074790-7666-461d-b2fc-81b1c4c1529araw0catalog2019-11-01 17:59:13.273932219483890
2457a2250-36f4-4a1f-b840-20a00480e0d4raw1product12019-11-01 17:59:28.459271219483890
320bf8be0-06e3-490d-9c56-bbc892e0d260raw2cart2019-11-01 17:59:29.502214219483890
4fda93414-8937-43c2-a19d-fc7f412c8020raw3catalog2019-11-01 17:59:32.557029219483890
.....................
3978077132615-0d52-4562-a83a-af89dc86a264raw32279catalog2020-04-29 12:47:40.975732501098384
39781166da77e-efe8-4829-a917-33117b468fc7raw32280catalog2020-04-29 12:48:01.809577501098384
39782da930f61-0594-4b83-832b-efd68c26f1a2raw32281main2020-04-29 12:48:01.938488501098384
3978395ebf86f-6f92-4eea-b4ce-011a3e299291raw32282catalog2020-04-29 12:48:06.595390501098384
3978495ebf86f-6f92-4eea-b4ce-011a3e299291path_end32282path_end2020-04-29 12:48:06.595390501098384
\n", "

39785 rows × 6 columns

\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 fa074790-7666-461d-b2fc-81b1c4c1529a path_start 0 \n", "1 fa074790-7666-461d-b2fc-81b1c4c1529a raw 0 \n", "2 457a2250-36f4-4a1f-b840-20a00480e0d4 raw 1 \n", "3 20bf8be0-06e3-490d-9c56-bbc892e0d260 raw 2 \n", "4 fda93414-8937-43c2-a19d-fc7f412c8020 raw 3 \n", "... ... ... ... \n", "39780 77132615-0d52-4562-a83a-af89dc86a264 raw 32279 \n", "39781 166da77e-efe8-4829-a917-33117b468fc7 raw 32280 \n", "39782 da930f61-0594-4b83-832b-efd68c26f1a2 raw 32281 \n", "39783 95ebf86f-6f92-4eea-b4ce-011a3e299291 raw 32282 \n", "39784 95ebf86f-6f92-4eea-b4ce-011a3e299291 path_end 32282 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "... ... ... ... \n", "39780 catalog 2020-04-29 12:47:40.975732 501098384 \n", "39781 catalog 2020-04-29 12:48:01.809577 501098384 \n", "39782 main 2020-04-29 12:48:01.938488 501098384 \n", "39783 catalog 2020-04-29 12:48:06.595390 501098384 \n", "39784 path_end 2020-04-29 12:48:06.595390 501098384 \n", "\n", "[39785 rows x 6 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# group some nodes in GUI and perform recalculation before running this cell\n", "tg.recalculation_result.to_dataframe()" ] }, { "cell_type": "markdown", "metadata": { "id": "6FFm6C3TbOZW" }, "source": [ "## Transition matrix" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 19, "status": "ok", "timestamp": 1683188262201, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "0kOE5qD3cZ6H", "outputId": "cafa5be9-303e-4cd5-d310-9c6cf45bf9be" }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cartcatalogdelivery_choicemainpath_endproduct1product2delivery_courierdelivery_pickuppayment_choicepath_startpayment_cardpayment_donepayment_cash
cart0.0005200.2484410.7047820.1060290.1824320.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
catalog0.3666570.5549710.0000000.4098590.4763220.3107170.3960120.0000000.000000.0000000.00.0000000.0000000.00000
delivery_choice0.0000000.1268440.0000000.0501470.0715340.0000000.0000000.5516220.345870.0000000.00.0000000.0000000.00000
main0.0000000.8448640.0000000.2528300.2218030.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
path_end0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
product10.3841350.5525850.0000000.1016040.1541890.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
product20.4069930.6531470.0000000.0615380.0860140.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
delivery_courier0.0000000.0000000.0000000.0454550.0641710.0000000.0000000.0000000.000000.9131020.00.0000000.0000000.00000
delivery_pickup0.0000000.0000000.0000000.1172710.2025590.0000000.0000000.0000000.000000.7078890.00.0000000.0000000.00000
payment_choice0.0000000.1127350.0000000.0427970.0981210.0000000.0000000.0000000.000000.0000000.00.5438410.0981210.19833
path_start0.0000000.7160760.0000000.2839240.0000000.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
payment_card0.0000000.0000000.0000000.0383880.0729370.0000000.0000000.0000000.000000.0000000.00.0000000.9117080.00000
payment_done0.0000000.0000000.0000000.3705970.6692190.0000000.0000000.0000000.000000.0000000.00.0000000.0000000.00000
payment_cash0.0000000.0000000.0000000.2368420.2421050.0000000.0000000.0000000.000000.0000000.00.0000000.5368420.00000
\n", "
" ], "text/plain": [ " cart catalog delivery_choice main path_end \\\n", "cart 0.000520 0.248441 0.704782 0.106029 0.182432 \n", "catalog 0.366657 0.554971 0.000000 0.409859 0.476322 \n", "delivery_choice 0.000000 0.126844 0.000000 0.050147 0.071534 \n", "main 0.000000 0.844864 0.000000 0.252830 0.221803 \n", "path_end 0.000000 0.000000 0.000000 0.000000 0.000000 \n", "product1 0.384135 0.552585 0.000000 0.101604 0.154189 \n", "product2 0.406993 0.653147 0.000000 0.061538 0.086014 \n", "delivery_courier 0.000000 0.000000 0.000000 0.045455 0.064171 \n", "delivery_pickup 0.000000 0.000000 0.000000 0.117271 0.202559 \n", "payment_choice 0.000000 0.112735 0.000000 0.042797 0.098121 \n", "path_start 0.000000 0.716076 0.000000 0.283924 0.000000 \n", "payment_card 0.000000 0.000000 0.000000 0.038388 0.072937 \n", "payment_done 0.000000 0.000000 0.000000 0.370597 0.669219 \n", "payment_cash 0.000000 0.000000 0.000000 0.236842 0.242105 \n", "\n", " product1 product2 delivery_courier delivery_pickup \\\n", "cart 0.000000 0.000000 0.000000 0.00000 \n", "catalog 0.310717 0.396012 0.000000 0.00000 \n", "delivery_choice 0.000000 0.000000 0.551622 0.34587 \n", "main 0.000000 0.000000 0.000000 0.00000 \n", "path_end 0.000000 0.000000 0.000000 0.00000 \n", "product1 0.000000 0.000000 0.000000 0.00000 \n", "product2 0.000000 0.000000 0.000000 0.00000 \n", "delivery_courier 0.000000 0.000000 0.000000 0.00000 \n", "delivery_pickup 0.000000 0.000000 0.000000 0.00000 \n", "payment_choice 0.000000 0.000000 0.000000 0.00000 \n", "path_start 0.000000 0.000000 0.000000 0.00000 \n", "payment_card 0.000000 0.000000 0.000000 0.00000 \n", "payment_done 0.000000 0.000000 0.000000 0.00000 \n", "payment_cash 0.000000 0.000000 0.000000 0.00000 \n", "\n", " payment_choice path_start payment_card payment_done \\\n", "cart 0.000000 0.0 0.000000 0.000000 \n", "catalog 0.000000 0.0 0.000000 0.000000 \n", "delivery_choice 0.000000 0.0 0.000000 0.000000 \n", "main 0.000000 0.0 0.000000 0.000000 \n", "path_end 0.000000 0.0 0.000000 0.000000 \n", "product1 0.000000 0.0 0.000000 0.000000 \n", "product2 0.000000 0.0 0.000000 0.000000 \n", "delivery_courier 0.913102 0.0 0.000000 0.000000 \n", "delivery_pickup 0.707889 0.0 0.000000 0.000000 \n", "payment_choice 0.000000 0.0 0.543841 0.098121 \n", "path_start 0.000000 0.0 0.000000 0.000000 \n", "payment_card 0.000000 0.0 0.000000 0.911708 \n", "payment_done 0.000000 0.0 0.000000 0.000000 \n", "payment_cash 0.000000 0.0 0.000000 0.536842 \n", "\n", " payment_cash \n", "cart 0.00000 \n", "catalog 0.00000 \n", "delivery_choice 0.00000 \n", "main 0.00000 \n", "path_end 0.00000 \n", "product1 0.00000 \n", "product2 0.00000 \n", "delivery_courier 0.00000 \n", "delivery_pickup 0.00000 \n", "payment_choice 0.19833 \n", "path_start 0.00000 \n", "payment_card 0.00000 \n", "payment_done 0.00000 \n", "payment_cash 0.00000 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.transition_matrix(norm_type='node', weight_col='user_id')" ] }, { "cell_type": "markdown", "metadata": { "id": "yPm321DlbOjv" }, "source": [ "## Using a separate instance" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 427, "status": "ok", "timestamp": 1683188262610, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "FXOVZ8J-dEjR", "outputId": "11468aea-f86f-4b8c-9aa2-ff44852621b8", "tags": [] }, "outputs": [ { "data": { "text/html": [ "\n", "" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from retentioneering.tooling.transition_graph import TransitionGraph\n", "\n", "tg = TransitionGraph(stream)\n", " \n", "tg.plot(\n", " edges_norm_type='node',\n", " edges_weight_col='user_id',\n", " edges_threshold={'user_id': 0.12},\n", " nodes_threshold={'event_id': 500},\n", " targets={'positive': ['payment_done', 'cart']}\n", ")" ] } ], "metadata": { "colab": { "provenance": [ { "file_id": "1NkFbLAdIQ_3XqHnSObM_WtDOmlu6ig_Q", "timestamp": 1671020860970 }, { "file_id": "1DBNz_5rl_xErD-g5tZYZP8CMniLOI9Zn", "timestamp": 1670969297847 } ], "toc_visible": true }, "kernelspec": { "display_name": "rete", "language": "python", "name": "rete" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" } }, "nbformat": 4, "nbformat_minor": 4 }