{ "cells": [ { "cell_type": "markdown", "id": "bff3d155", "metadata": { "id": "2K4KCsDcK85_" }, "source": [ "# Data processors user guide" ] }, { "cell_type": "markdown", "id": "a24490e7", "metadata": { "id": "TdnGPZWzSxZe" }, "source": [ "The full text of [Data processors](https://doc.retentioneering.com/stable/doc/user_guides/dataprocessors.html) user guide is available on the retentioneering website." ] }, { "cell_type": "markdown", "id": "53f0f536", "metadata": { "id": "0666939c" }, "source": [ "## Prerequisites\n", "\n", "Run this cell to prepare the environment. This step is obligatory." ] }, { "cell_type": "code", "execution_count": 1, "id": "5cd3f14f", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 41572, "status": "ok", "timestamp": 1683202215227, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "eKWKwFpiRj0R", "outputId": "ffd7455c-41b8-496e-da41-319bec80d8bd", "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "!pip install retentioneering" ] }, { "cell_type": "markdown", "id": "65c92e18", "metadata": { "id": "Zm2DAiT_Sa7L" }, "source": [ "## Creating an eventstream" ] }, { "cell_type": "code", "execution_count": 2, "id": "115e20e1", "metadata": { "executionInfo": { "elapsed": 364, "status": "ok", "timestamp": 1683202244341, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "KK0JITft614a", "pycharm": { "is_executing": true }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "from retentioneering import datasets\n", "from retentioneering.eventstream import Eventstream\n", "\n", "stream = datasets.load_simple_shop()" ] }, { "cell_type": "markdown", "id": "5dba6a11", "metadata": { "id": "Bvb0h5RAvdWe" }, "source": [ "## What is a data processor?\n" ] }, { "cell_type": "markdown", "id": "9bd6683b", "metadata": { "id": "sdQf1DHIdds0" }, "source": [ "## Helpers and chaining usage" ] }, { "cell_type": "code", "execution_count": 3, "id": "d4236dbf", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 519 }, "executionInfo": { "elapsed": 2691, "status": "ok", "timestamp": 1683202247024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "uXsQCXxAIlOT", "outputId": "2f4179e2-bce0-45dc-a046-3b2c544aedda", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890219483890_1
116b77e72-7633-4e4b-8fcd-a066dded8983session_start0session_start2019-11-01 17:59:13.273932219483890219483890_1
266f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890219483890_1
31a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890219483890_1
4dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890219483890_1
513f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890219483890_1
6cd4dbcda-a43d-44e2-900b-6e6db9b2faf0session_end3session_end2019-11-01 17:59:32.557029219483890219483890_1
339257f88a71-eb11-486d-9a0f-e23a7a5233casession_start2096session_start2019-12-06 16:22:57.484842219483890219483890_2
3393bd14ea53-9688-466d-87b2-0494ace4e011raw2096main2019-12-06 16:22:57.484842219483890219483890_2
33947ef1bf49-c88b-4b40-b424-c7ac53a267a6raw2097catalog2019-12-06 16:23:01.331109219483890219483890_2
3395e1800047-f1c9-48c7-a49a-80c3a87853e7raw2098catalog2019-12-06 16:23:48.116617219483890219483890_2
33961b6571bb-fbd7-4fb9-92c6-a6af7e153e89session_end2098session_end2019-12-06 16:23:48.116617219483890219483890_2
7311f8040e30-a37a-4e0a-b862-d74dd5c5e947session_start4542session_start2020-01-06 22:10:13.635011219483890219483890_3
7312d329f192-401c-44c3-8a1e-b0d4da680b6craw4542main2020-01-06 22:10:13.635011219483890219483890_3
731359e82765-c236-4f5f-b05c-73ce6efdc84braw4543catalog2020-01-06 22:10:15.228575219483890219483890_3
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 16b77e72-7633-4e4b-8fcd-a066dded8983 session_start 0 \n", "2 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "3 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "4 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "5 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "6 cd4dbcda-a43d-44e2-900b-6e6db9b2faf0 session_end 3 \n", "3392 57f88a71-eb11-486d-9a0f-e23a7a5233ca session_start 2096 \n", "3393 bd14ea53-9688-466d-87b2-0494ace4e011 raw 2096 \n", "3394 7ef1bf49-c88b-4b40-b424-c7ac53a267a6 raw 2097 \n", "3395 e1800047-f1c9-48c7-a49a-80c3a87853e7 raw 2098 \n", "3396 1b6571bb-fbd7-4fb9-92c6-a6af7e153e89 session_end 2098 \n", "7311 f8040e30-a37a-4e0a-b862-d74dd5c5e947 session_start 4542 \n", "7312 d329f192-401c-44c3-8a1e-b0d4da680b6c raw 4542 \n", "7313 59e82765-c236-4f5f-b05c-73ce6efdc84b raw 4543 \n", "\n", " event timestamp user_id session_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "1 session_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "2 catalog 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "3 product1 2019-11-01 17:59:28.459271 219483890 219483890_1 \n", "4 cart 2019-11-01 17:59:29.502214 219483890 219483890_1 \n", "5 catalog 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "6 session_end 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "3392 session_start 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3393 main 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3394 catalog 2019-12-06 16:23:01.331109 219483890 219483890_2 \n", "3395 catalog 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "3396 session_end 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "7311 session_start 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7312 main 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7313 catalog 2020-01-06 22:10:15.228575 219483890 219483890_3 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream\\\n", " .split_sessions(timeout=(10, 'm'))\\\n", " .to_dataframe()\n", "res[res['user_id'] == 219483890].head(15)" ] }, { "cell_type": "markdown", "id": "3747ff7e", "metadata": { "id": "Zuf43I0-u36P" }, "source": [ "## Data processors library" ] }, { "cell_type": "markdown", "id": "a0a29db4", "metadata": { "id": "829dd269" }, "source": [ "### Adding processors" ] }, { "cell_type": "markdown", "id": "4ca42902", "metadata": { "id": "pkJG5M7Y21m8" }, "source": [ "#### AddStartEndEvents" ] }, { "cell_type": "code", "execution_count": 4, "id": "a7308a80", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 613 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202247025, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "54409a0d", "outputId": "9a47db76-1b85-4b8b-ef12-807d3b6b098b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890
166f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890
21a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890
3dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890
413f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890
2566bd14ea53-9688-466d-87b2-0494ace4e011raw2096main2019-12-06 16:22:57.484842219483890
25677ef1bf49-c88b-4b40-b424-c7ac53a267a6raw2097catalog2019-12-06 16:23:01.331109219483890
2568e1800047-f1c9-48c7-a49a-80c3a87853e7raw2098catalog2019-12-06 16:23:48.116617219483890
5427d329f192-401c-44c3-8a1e-b0d4da680b6craw4542main2020-01-06 22:10:13.635011219483890
542859e82765-c236-4f5f-b05c-73ce6efdc84braw4543catalog2020-01-06 22:10:15.228575219483890
542985f5bb33-7223-4c41-884a-544d7d1fefbfraw4544cart2020-01-06 22:10:42.309028219483890
5430ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4raw4545catalog2020-01-06 22:10:52.255859219483890
54318577fdb0-8c24-4c64-992e-a92a0ecfc03draw4546product12020-01-06 22:11:01.709800219483890
5432c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62raw4547catalog2020-01-06 22:11:02.899490219483890
54335d7091d0-e8ec-4974-9dcf-f8336bb2b92draw4548catalog2020-01-06 22:11:28.271366219483890
9689098dbd63-8c5b-4842-bb44-bad07923f13draw8215main2020-02-14 21:04:49.450696219483890
969083896884-2fd2-4567-9d29-791aa1d45795raw8216catalog2020-02-14 21:04:51.717127219483890
969183896884-2fd2-4567-9d29-791aa1d45795path_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "2 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "3 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "4 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "2566 bd14ea53-9688-466d-87b2-0494ace4e011 raw 2096 \n", "2567 7ef1bf49-c88b-4b40-b424-c7ac53a267a6 raw 2097 \n", "2568 e1800047-f1c9-48c7-a49a-80c3a87853e7 raw 2098 \n", "5427 d329f192-401c-44c3-8a1e-b0d4da680b6c raw 4542 \n", "5428 59e82765-c236-4f5f-b05c-73ce6efdc84b raw 4543 \n", "5429 85f5bb33-7223-4c41-884a-544d7d1fefbf raw 4544 \n", "5430 ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4 raw 4545 \n", "5431 8577fdb0-8c24-4c64-992e-a92a0ecfc03d raw 4546 \n", "5432 c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62 raw 4547 \n", "5433 5d7091d0-e8ec-4974-9dcf-f8336bb2b92d raw 4548 \n", "9689 098dbd63-8c5b-4842-bb44-bad07923f13d raw 8215 \n", "9690 83896884-2fd2-4567-9d29-791aa1d45795 raw 8216 \n", "9691 83896884-2fd2-4567-9d29-791aa1d45795 path_end 8216 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2566 main 2019-12-06 16:22:57.484842 219483890 \n", "2567 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2568 catalog 2019-12-06 16:23:48.116617 219483890 \n", "5427 main 2020-01-06 22:10:13.635011 219483890 \n", "5428 catalog 2020-01-06 22:10:15.228575 219483890 \n", "5429 cart 2020-01-06 22:10:42.309028 219483890 \n", "5430 catalog 2020-01-06 22:10:52.255859 219483890 \n", "5431 product1 2020-01-06 22:11:01.709800 219483890 \n", "5432 catalog 2020-01-06 22:11:02.899490 219483890 \n", "5433 catalog 2020-01-06 22:11:28.271366 219483890 \n", "9689 main 2020-02-14 21:04:49.450696 219483890 \n", "9690 catalog 2020-02-14 21:04:51.717127 219483890 \n", "9691 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream.add_start_end_events().to_dataframe()\n", "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "537f5f49", "metadata": { "id": "eCd-ueYXPtiV" }, "source": [ "#### SplitSessions" ] }, { "cell_type": "markdown", "id": "6cdc5192-8a43-452d-a548-6d59b7460931", "metadata": {}, "source": [ "##### timeout delimiter" ] }, { "cell_type": "code", "execution_count": 5, "id": "557409a4", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 802 }, "executionInfo": { "elapsed": 721, "status": "ok", "timestamp": 1683202247740, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "084e702d", "outputId": "f27858c1-8777-4832-df3b-c2cb5b1eaea3", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890219483890_1
10f3fb872-72b6-4aba-8e1d-80f0d99cbcf5session_start0session_start2019-11-01 17:59:13.273932219483890219483890_1
266f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890219483890_1
31a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890219483890_1
4dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890219483890_1
513f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890219483890_1
656228b93-4648-4276-931b-0644829cd8f5session_end3session_end2019-11-01 17:59:32.557029219483890219483890_1
3392685656a2-5c2b-4879-a3ae-cf948aea8271session_start2096session_start2019-12-06 16:22:57.484842219483890219483890_2
3393bd14ea53-9688-466d-87b2-0494ace4e011raw2096main2019-12-06 16:22:57.484842219483890219483890_2
33947ef1bf49-c88b-4b40-b424-c7ac53a267a6raw2097catalog2019-12-06 16:23:01.331109219483890219483890_2
3395e1800047-f1c9-48c7-a49a-80c3a87853e7raw2098catalog2019-12-06 16:23:48.116617219483890219483890_2
3396f7470a17-e6a1-4aaa-aa19-6540cfd5a956session_end2098session_end2019-12-06 16:23:48.116617219483890219483890_2
7311acec84c0-5cd6-4658-aa91-8afd9e376931session_start4542session_start2020-01-06 22:10:13.635011219483890219483890_3
7312d329f192-401c-44c3-8a1e-b0d4da680b6craw4542main2020-01-06 22:10:13.635011219483890219483890_3
731359e82765-c236-4f5f-b05c-73ce6efdc84braw4543catalog2020-01-06 22:10:15.228575219483890219483890_3
731485f5bb33-7223-4c41-884a-544d7d1fefbfraw4544cart2020-01-06 22:10:42.309028219483890219483890_3
7315ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4raw4545catalog2020-01-06 22:10:52.255859219483890219483890_3
73168577fdb0-8c24-4c64-992e-a92a0ecfc03draw4546product12020-01-06 22:11:01.709800219483890219483890_3
7317c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62raw4547catalog2020-01-06 22:11:02.899490219483890219483890_3
73185d7091d0-e8ec-4974-9dcf-f8336bb2b92draw4548catalog2020-01-06 22:11:28.271366219483890219483890_3
7319a286c046-ee77-4e2f-bdd0-022fdf3c0ca4session_end4548session_end2020-01-06 22:11:28.271366219483890219483890_3
13259622fcb6c-e735-402d-b559-ab52382d14bdsession_start8215session_start2020-02-14 21:04:49.450696219483890219483890_4
13260098dbd63-8c5b-4842-bb44-bad07923f13draw8215main2020-02-14 21:04:49.450696219483890219483890_4
1326183896884-2fd2-4567-9d29-791aa1d45795raw8216catalog2020-02-14 21:04:51.717127219483890219483890_4
13262fcb1e57d-5f0d-43a9-afbd-109366e26b4dsession_end8216session_end2020-02-14 21:04:51.717127219483890219483890_4
1326383896884-2fd2-4567-9d29-791aa1d45795path_end8216path_end2020-02-14 21:04:51.717127219483890219483890_4
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 0f3fb872-72b6-4aba-8e1d-80f0d99cbcf5 session_start 0 \n", "2 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "3 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "4 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "5 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "6 56228b93-4648-4276-931b-0644829cd8f5 session_end 3 \n", "3392 685656a2-5c2b-4879-a3ae-cf948aea8271 session_start 2096 \n", "3393 bd14ea53-9688-466d-87b2-0494ace4e011 raw 2096 \n", "3394 7ef1bf49-c88b-4b40-b424-c7ac53a267a6 raw 2097 \n", "3395 e1800047-f1c9-48c7-a49a-80c3a87853e7 raw 2098 \n", "3396 f7470a17-e6a1-4aaa-aa19-6540cfd5a956 session_end 2098 \n", "7311 acec84c0-5cd6-4658-aa91-8afd9e376931 session_start 4542 \n", "7312 d329f192-401c-44c3-8a1e-b0d4da680b6c raw 4542 \n", "7313 59e82765-c236-4f5f-b05c-73ce6efdc84b raw 4543 \n", "7314 85f5bb33-7223-4c41-884a-544d7d1fefbf raw 4544 \n", "7315 ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4 raw 4545 \n", "7316 8577fdb0-8c24-4c64-992e-a92a0ecfc03d raw 4546 \n", "7317 c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62 raw 4547 \n", "7318 5d7091d0-e8ec-4974-9dcf-f8336bb2b92d raw 4548 \n", "7319 a286c046-ee77-4e2f-bdd0-022fdf3c0ca4 session_end 4548 \n", "13259 622fcb6c-e735-402d-b559-ab52382d14bd session_start 8215 \n", "13260 098dbd63-8c5b-4842-bb44-bad07923f13d raw 8215 \n", "13261 83896884-2fd2-4567-9d29-791aa1d45795 raw 8216 \n", "13262 fcb1e57d-5f0d-43a9-afbd-109366e26b4d session_end 8216 \n", "13263 83896884-2fd2-4567-9d29-791aa1d45795 path_end 8216 \n", "\n", " event timestamp user_id session_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "1 session_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "2 catalog 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "3 product1 2019-11-01 17:59:28.459271 219483890 219483890_1 \n", "4 cart 2019-11-01 17:59:29.502214 219483890 219483890_1 \n", "5 catalog 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "6 session_end 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "3392 session_start 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3393 main 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3394 catalog 2019-12-06 16:23:01.331109 219483890 219483890_2 \n", "3395 catalog 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "3396 session_end 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "7311 session_start 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7312 main 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7313 catalog 2020-01-06 22:10:15.228575 219483890 219483890_3 \n", "7314 cart 2020-01-06 22:10:42.309028 219483890 219483890_3 \n", "7315 catalog 2020-01-06 22:10:52.255859 219483890 219483890_3 \n", "7316 product1 2020-01-06 22:11:01.709800 219483890 219483890_3 \n", "7317 catalog 2020-01-06 22:11:02.899490 219483890 219483890_3 \n", "7318 catalog 2020-01-06 22:11:28.271366 219483890 219483890_3 \n", "7319 session_end 2020-01-06 22:11:28.271366 219483890 219483890_3 \n", "13259 session_start 2020-02-14 21:04:49.450696 219483890 219483890_4 \n", "13260 main 2020-02-14 21:04:49.450696 219483890 219483890_4 \n", "13261 catalog 2020-02-14 21:04:51.717127 219483890 219483890_4 \n", "13262 session_end 2020-02-14 21:04:51.717127 219483890 219483890_4 \n", "13263 path_end 2020-02-14 21:04:51.717127 219483890 219483890_4 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream.split_sessions(timeout=(10, 'm')).to_dataframe()\n", "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "3a675a23-c635-4ad4-b850-f62e337b6693", "metadata": {}, "source": [ "##### single delimiting event" ] }, { "cell_type": "code", "execution_count": 6, "id": "d7f91794-b9c5-4cd6-9fe4-e833b844beed", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_id
0111path_start2023-01-01 00:00:00111_0
1111session_start2023-01-01 00:00:00111_1
2111session_end2023-01-01 00:00:00111_0
3111A2023-01-01 00:00:01111_1
4111B2023-01-01 00:00:02111_1
5111session_end2023-01-01 00:00:02111_1
6111session_start2023-01-01 00:00:04111_2
7111C2023-01-01 00:00:04111_2
8111session_end2023-01-01 00:00:04111_2
9111path_end2023-01-01 00:00:04111_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id\n", "0 111 path_start 2023-01-01 00:00:00 111_0\n", "1 111 session_start 2023-01-01 00:00:00 111_1\n", "2 111 session_end 2023-01-01 00:00:00 111_0\n", "3 111 A 2023-01-01 00:00:01 111_1\n", "4 111 B 2023-01-01 00:00:02 111_1\n", "5 111 session_end 2023-01-01 00:00:02 111_1\n", "6 111 session_start 2023-01-01 00:00:04 111_2\n", "7 111 C 2023-01-01 00:00:04 111_2\n", "8 111 session_end 2023-01-01 00:00:04 111_2\n", "9 111 path_end 2023-01-01 00:00:04 111_2" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"session_delimiter\", \"2023-01-01 00:00:00\"],\n", " [111, \"A\", \"2023-01-01 00:00:01\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\"],\n", " [111, \"session_delimiter\", \"2023-01-01 00:00:04\"],\n", " [111, \"C\", \"2023-01-01 00:00:04\"],\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\"]\n", ")\n", "Eventstream(df)\\\n", " .split_sessions(delimiter_events=['session_delimiter'])\\\n", " .to_dataframe()\\\n", " .sort_values(['user_id', 'event_index'])\\\n", " [['user_id', 'event', 'timestamp', 'session_id']]" ] }, { "cell_type": "markdown", "id": "c40c7e80-24d7-483c-ae27-e3a7340695a2", "metadata": {}, "source": [ "##### paired delimiting event" ] }, { "cell_type": "code", "execution_count": 7, "id": "306a2844-6af2-43f8-8b0f-7416be1c71f6", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_id
0111path_start2023-01-01 00:00:00111_0
1111session_start2023-01-01 00:00:00111_1
2111A2023-01-01 00:00:01111_1
3111B2023-01-01 00:00:02111_1
4111session_end2023-01-01 00:00:02111_1
5111session_start2023-01-01 00:00:04111_2
6111C2023-01-01 00:00:04111_2
7111session_end2023-01-01 00:00:04111_2
8111path_end2023-01-01 00:00:04111_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id\n", "0 111 path_start 2023-01-01 00:00:00 111_0\n", "1 111 session_start 2023-01-01 00:00:00 111_1\n", "2 111 A 2023-01-01 00:00:01 111_1\n", "3 111 B 2023-01-01 00:00:02 111_1\n", "4 111 session_end 2023-01-01 00:00:02 111_1\n", "5 111 session_start 2023-01-01 00:00:04 111_2\n", "6 111 C 2023-01-01 00:00:04 111_2\n", "7 111 session_end 2023-01-01 00:00:04 111_2\n", "8 111 path_end 2023-01-01 00:00:04 111_2" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"custom_start\", \"2023-01-01 00:00:00\"],\n", " [111, \"A\", \"2023-01-01 00:00:01\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\"],\n", " [111, \"custom_end\", \"2023-01-01 00:00:02\"],\n", " [111, \"custom_start\", \"2023-01-01 00:00:04\"],\n", " [111, \"C\", \"2023-01-01 00:00:04\"],\n", " [111, \"custom_end\", \"2023-01-01 00:00:04\"]\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\"]\n", ")\n", "dummy_stream = Eventstream(df)\n", "dummy_stream.split_sessions(delimiter_events=['custom_start', 'custom_end'])\\\n", " .to_dataframe()\\\n", " .sort_values(['user_id', 'event_index'])\\\n", " [['user_id', 'event', 'timestamp', 'session_id']]" ] }, { "cell_type": "markdown", "id": "2bf003c1-01f6-4013-8fc3-2c10582daf3c", "metadata": {}, "source": [ "##### custom session column" ] }, { "cell_type": "code", "execution_count": 8, "id": "5554d4dd-cbff-4045-8ee7-e2900d050135", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_idcustom_ses_id
0111path_start2023-01-01 00:00:01111_1session_1
1111session_start2023-01-01 00:00:01111_1session_1
2111A2023-01-01 00:00:01111_1session_1
3111B2023-01-01 00:00:02111_1session_1
4111session_end2023-01-01 00:00:02111_1session_1
5111session_start2023-01-01 00:00:03111_2session_2
6111C2023-01-01 00:00:03111_2session_2
7111D2023-01-01 00:00:04111_2session_2
8111session_end2023-01-01 00:00:04111_2session_2
9111path_end2023-01-01 00:00:04111_2session_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id custom_ses_id\n", "0 111 path_start 2023-01-01 00:00:01 111_1 session_1\n", "1 111 session_start 2023-01-01 00:00:01 111_1 session_1\n", "2 111 A 2023-01-01 00:00:01 111_1 session_1\n", "3 111 B 2023-01-01 00:00:02 111_1 session_1\n", "4 111 session_end 2023-01-01 00:00:02 111_1 session_1\n", "5 111 session_start 2023-01-01 00:00:03 111_2 session_2\n", "6 111 C 2023-01-01 00:00:03 111_2 session_2\n", "7 111 D 2023-01-01 00:00:04 111_2 session_2\n", "8 111 session_end 2023-01-01 00:00:04 111_2 session_2\n", "9 111 path_end 2023-01-01 00:00:04 111_2 session_2" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"A\", \"2023-01-01 00:00:01\", \"session_1\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\", \"session_1\"],\n", " [111, \"C\", \"2023-01-01 00:00:03\", \"session_2\"],\n", " [111, \"D\", \"2023-01-01 00:00:04\", \"session_2\"],\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\", \"custom_ses_id\"]\n", ")\n", "raw_data_schema = {\"custom_cols\": [{\"raw_data_col\": \"custom_ses_id\", \"custom_col\": \"custom_ses_id\"}]}\n", "dummy_stream = Eventstream(df, raw_data_schema=raw_data_schema)\n", "dummy_stream.split_sessions(delimiter_col=\"custom_ses_id\")\\\n", " .to_dataframe()\\\n", " .sort_values([\"user_id\", \"event_index\"])\\\n", " [[\"user_id\", \"event\", \"timestamp\", \"session_id\", \"custom_ses_id\"]]\n" ] }, { "cell_type": "markdown", "id": "48554ba0", "metadata": { "id": "_e5K-yFLOnAL" }, "source": [ "#### LabelNewUsers" ] }, { "cell_type": "code", "execution_count": 9, "id": "637b20ee", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 266, "status": "ok", "timestamp": 1683202248002, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "78be028a", "outputId": "31b3dd35-f078-4ad6-f168-5404dd962ac5", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890
166f48c7c-bb89-4de8-88d9-4c8226a3dfe0new_user0new_user2019-11-01 17:59:13.273932219483890
266f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890
31a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890
4dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 path_start \n", "1 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 new_user 0 new_user \n", "2 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 catalog \n", "3 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 product1 \n", "4 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 cart \n", "\n", " timestamp user_id \n", "0 2019-11-01 17:59:13.273932 219483890 \n", "1 2019-11-01 17:59:13.273932 219483890 \n", "2 2019-11-01 17:59:13.273932 219483890 \n", "3 2019-11-01 17:59:28.459271 219483890 \n", "4 2019-11-01 17:59:29.502214 219483890 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_users = [219483890, 964964743, 965024600]\n", "res = stream.label_new_users(new_users_list=new_users).to_dataframe()\n", "res[res['user_id'] == 219483890].head()" ] }, { "cell_type": "code", "execution_count": 10, "id": "fc13e19e", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202248003, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "892ce5f5", "outputId": "f5086658-2a85-4202-d192-b8a010849771", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
190891a9aaa3d-b99c-4535-9b9e-31dc45c31288path_start14768path_start2020-04-02 05:36:04.896839501098384
190901a9aaa3d-b99c-4535-9b9e-31dc45c31288existing_user14768existing_user2020-04-02 05:36:04.896839501098384
190911a9aaa3d-b99c-4535-9b9e-31dc45c31288raw14768main2020-04-02 05:36:04.896839501098384
19092fdf942d2-6963-47a9-a676-33fce9ae9598raw14769catalog2020-04-02 05:36:05.371141501098384
19093dc9b2f40-5473-42d4-990b-fb72f71c6022raw14770main2020-04-02 05:36:40.814504501098384
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "19089 1a9aaa3d-b99c-4535-9b9e-31dc45c31288 path_start 14768 \n", "19090 1a9aaa3d-b99c-4535-9b9e-31dc45c31288 existing_user 14768 \n", "19091 1a9aaa3d-b99c-4535-9b9e-31dc45c31288 raw 14768 \n", "19092 fdf942d2-6963-47a9-a676-33fce9ae9598 raw 14769 \n", "19093 dc9b2f40-5473-42d4-990b-fb72f71c6022 raw 14770 \n", "\n", " event timestamp user_id \n", "19089 path_start 2020-04-02 05:36:04.896839 501098384 \n", "19090 existing_user 2020-04-02 05:36:04.896839 501098384 \n", "19091 main 2020-04-02 05:36:04.896839 501098384 \n", "19092 catalog 2020-04-02 05:36:05.371141 501098384 \n", "19093 main 2020-04-02 05:36:40.814504 501098384 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 501098384].head()" ] }, { "cell_type": "markdown", "id": "c03b4d11", "metadata": { "id": "fEH11q4-Tk7D" }, "source": [ "#### LabelLostUsers" ] }, { "cell_type": "code", "execution_count": 11, "id": "032cb8eb", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 1025, "status": "ok", "timestamp": 1683202249020, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8a507b33", "outputId": "a4a8f437-ff06-4a3a-f316-041dc13dbbca", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
57665d7091d0-e8ec-4974-9dcf-f8336bb2b92draw4548catalog2020-01-06 22:11:28.271366219483890
10282098dbd63-8c5b-4842-bb44-bad07923f13draw8215main2020-02-14 21:04:49.450696219483890
1028383896884-2fd2-4567-9d29-791aa1d45795raw8216catalog2020-02-14 21:04:51.717127219483890
1028483896884-2fd2-4567-9d29-791aa1d45795lost_user8216lost_user2020-02-14 21:04:51.717127219483890
1028583896884-2fd2-4567-9d29-791aa1d45795path_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "5766 5d7091d0-e8ec-4974-9dcf-f8336bb2b92d raw 4548 \n", "10282 098dbd63-8c5b-4842-bb44-bad07923f13d raw 8215 \n", "10283 83896884-2fd2-4567-9d29-791aa1d45795 raw 8216 \n", "10284 83896884-2fd2-4567-9d29-791aa1d45795 lost_user 8216 \n", "10285 83896884-2fd2-4567-9d29-791aa1d45795 path_end 8216 \n", "\n", " event timestamp user_id \n", "5766 catalog 2020-01-06 22:11:28.271366 219483890 \n", "10282 main 2020-02-14 21:04:49.450696 219483890 \n", "10283 catalog 2020-02-14 21:04:51.717127 219483890 \n", "10284 lost_user 2020-02-14 21:04:51.717127 219483890 \n", "10285 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lost_users_list = [219483890, 964964743, 965024600]\n", "res = stream.label_lost_users(lost_users_list=lost_users_list).to_dataframe()\n", "res[res['user_id'] == 219483890].tail()" ] }, { "cell_type": "code", "execution_count": 12, "id": "a454f04f", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 21, "status": "ok", "timestamp": 1683202249022, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "WHUHp-uRVqc1", "outputId": "495f54ec-ccf5-4b75-ca12-3ad3b5df27dd", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
4353142ff0327-d17e-42c7-b116-66c447f46848raw32280catalog2020-04-29 12:48:01.809577501098384
43532d79171c2-8164-4e74-b306-0c531583716eraw32281main2020-04-29 12:48:01.938488501098384
4353391d3aca3-8031-4782-80ec-0f0abd1d68a1raw32282catalog2020-04-29 12:48:06.595390501098384
4353491d3aca3-8031-4782-80ec-0f0abd1d68a1absent_user32282absent_user2020-04-29 12:48:06.595390501098384
4353591d3aca3-8031-4782-80ec-0f0abd1d68a1path_end32282path_end2020-04-29 12:48:06.595390501098384
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "43531 42ff0327-d17e-42c7-b116-66c447f46848 raw 32280 \n", "43532 d79171c2-8164-4e74-b306-0c531583716e raw 32281 \n", "43533 91d3aca3-8031-4782-80ec-0f0abd1d68a1 raw 32282 \n", "43534 91d3aca3-8031-4782-80ec-0f0abd1d68a1 absent_user 32282 \n", "43535 91d3aca3-8031-4782-80ec-0f0abd1d68a1 path_end 32282 \n", "\n", " event timestamp user_id \n", "43531 catalog 2020-04-29 12:48:01.809577 501098384 \n", "43532 main 2020-04-29 12:48:01.938488 501098384 \n", "43533 catalog 2020-04-29 12:48:06.595390 501098384 \n", "43534 absent_user 2020-04-29 12:48:06.595390 501098384 \n", "43535 path_end 2020-04-29 12:48:06.595390 501098384 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 501098384].tail()" ] }, { "cell_type": "code", "execution_count": 13, "id": "ba4adec4", "metadata": { "executionInfo": { "elapsed": 19, "status": "ok", "timestamp": 1683202249022, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "44f99fee", "tags": [] }, "outputs": [], "source": [ "res = stream.label_lost_users(timeout=(30, 'D')).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 14, "id": "d363b1a1", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 20, "status": "ok", "timestamp": 1683202249023, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "jxysYxu6bDac", "outputId": "b19d7c0b-afa9-4f52-e854-30860b1f0c9f", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Timestamp('2020-04-29 12:48:06.595390')" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['timestamp'].max()" ] }, { "cell_type": "code", "execution_count": 15, "id": "dab04f17", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 15, "status": "ok", "timestamp": 1683202249024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "Dk-3_rwIa8HZ", "outputId": "bf77c9d2-66bd-4f0c-ba0a-a7a6aa188432", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
51999137f1-9316-4d9f-969e-e3781fdedca4path_start47path_start2019-11-02 01:14:08.664850495985018
52999137f1-9316-4d9f-969e-e3781fdedca4raw47catalog2019-11-02 01:14:08.664850495985018
539e37b313-2536-4a52-8b28-a67da83db18araw48cart2019-11-02 01:14:37.435643495985018
549e37b313-2536-4a52-8b28-a67da83db18alost_user48lost_user2019-11-02 01:14:37.435643495985018
559e37b313-2536-4a52-8b28-a67da83db18apath_end48path_end2019-11-02 01:14:37.435643495985018
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "51 999137f1-9316-4d9f-969e-e3781fdedca4 path_start 47 path_start \n", "52 999137f1-9316-4d9f-969e-e3781fdedca4 raw 47 catalog \n", "53 9e37b313-2536-4a52-8b28-a67da83db18a raw 48 cart \n", "54 9e37b313-2536-4a52-8b28-a67da83db18a lost_user 48 lost_user \n", "55 9e37b313-2536-4a52-8b28-a67da83db18a path_end 48 path_end \n", "\n", " timestamp user_id \n", "51 2019-11-02 01:14:08.664850 495985018 \n", "52 2019-11-02 01:14:08.664850 495985018 \n", "53 2019-11-02 01:14:37.435643 495985018 \n", "54 2019-11-02 01:14:37.435643 495985018 \n", "55 2019-11-02 01:14:37.435643 495985018 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 495985018]" ] }, { "cell_type": "code", "execution_count": 16, "id": "798aca6a", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 425 }, "executionInfo": { "elapsed": 14, "status": "ok", "timestamp": 1683202249024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "MqYBfIIhcV3R", "outputId": "18d98cb7-3ee6-4902-d2d0-9c2c6d64fe59", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
29525095d2d6e-969d-49c2-b161-05580f7a297apath_start22394path_start2020-04-15 21:02:36.903678819489198
29526095d2d6e-969d-49c2-b161-05580f7a297araw22394main2020-04-15 21:02:36.903678819489198
295273ef986a9-1032-4028-a43e-2ef003f744c6raw22395catalog2020-04-15 21:02:37.658557819489198
29528df5c68ca-56af-49b1-bdfb-9c20530da455raw22396catalog2020-04-15 21:02:48.699804819489198
29529512434e5-eb39-4631-8a1f-fffcac3f431eraw22397product22020-04-15 21:02:51.173118819489198
29532176711f2-f2ce-4c24-91c0-1499418344b0raw22399catalog2020-04-15 21:03:05.813046819489198
29534de5eba5c-bef1-406b-9699-15c28c1bd5b5raw22401cart2020-04-15 21:03:35.216033819489198
29540d60cefd1-df17-4ebb-8344-e70528b50f27raw22404delivery_choice2020-04-15 21:03:40.745520819489198
29541fa3025a2-6148-4d45-a92b-02984b44b85craw22405delivery_pickup2020-04-15 21:03:46.448349819489198
29542fd4a6b2b-5883-45d6-b891-f800f9de8b6craw22406payment_choice2020-04-15 21:03:46.575300819489198
29543a39b3138-d592-4ee6-a3f2-96d975152473raw22407payment_card2020-04-15 21:03:46.862126819489198
29544dff2e3d5-a083-4311-9168-053f32128224raw22408payment_done2020-04-15 21:03:47.074946819489198
29545dff2e3d5-a083-4311-9168-053f32128224absent_user22408absent_user2020-04-15 21:03:47.074946819489198
29546dff2e3d5-a083-4311-9168-053f32128224path_end22408path_end2020-04-15 21:03:47.074946819489198
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "29525 095d2d6e-969d-49c2-b161-05580f7a297a path_start 22394 \n", "29526 095d2d6e-969d-49c2-b161-05580f7a297a raw 22394 \n", "29527 3ef986a9-1032-4028-a43e-2ef003f744c6 raw 22395 \n", "29528 df5c68ca-56af-49b1-bdfb-9c20530da455 raw 22396 \n", "29529 512434e5-eb39-4631-8a1f-fffcac3f431e raw 22397 \n", "29532 176711f2-f2ce-4c24-91c0-1499418344b0 raw 22399 \n", "29534 de5eba5c-bef1-406b-9699-15c28c1bd5b5 raw 22401 \n", "29540 d60cefd1-df17-4ebb-8344-e70528b50f27 raw 22404 \n", "29541 fa3025a2-6148-4d45-a92b-02984b44b85c raw 22405 \n", "29542 fd4a6b2b-5883-45d6-b891-f800f9de8b6c raw 22406 \n", "29543 a39b3138-d592-4ee6-a3f2-96d975152473 raw 22407 \n", "29544 dff2e3d5-a083-4311-9168-053f32128224 raw 22408 \n", "29545 dff2e3d5-a083-4311-9168-053f32128224 absent_user 22408 \n", "29546 dff2e3d5-a083-4311-9168-053f32128224 path_end 22408 \n", "\n", " event timestamp user_id \n", "29525 path_start 2020-04-15 21:02:36.903678 819489198 \n", "29526 main 2020-04-15 21:02:36.903678 819489198 \n", "29527 catalog 2020-04-15 21:02:37.658557 819489198 \n", "29528 catalog 2020-04-15 21:02:48.699804 819489198 \n", "29529 product2 2020-04-15 21:02:51.173118 819489198 \n", "29532 catalog 2020-04-15 21:03:05.813046 819489198 \n", "29534 cart 2020-04-15 21:03:35.216033 819489198 \n", "29540 delivery_choice 2020-04-15 21:03:40.745520 819489198 \n", "29541 delivery_pickup 2020-04-15 21:03:46.448349 819489198 \n", "29542 payment_choice 2020-04-15 21:03:46.575300 819489198 \n", "29543 payment_card 2020-04-15 21:03:46.862126 819489198 \n", "29544 payment_done 2020-04-15 21:03:47.074946 819489198 \n", "29545 absent_user 2020-04-15 21:03:47.074946 819489198 \n", "29546 path_end 2020-04-15 21:03:47.074946 819489198 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 819489198]" ] }, { "cell_type": "markdown", "id": "b2dab007", "metadata": { "id": "Zaox_z_rdfN_" }, "source": [ "#### AddPositiveEvents" ] }, { "cell_type": "code", "execution_count": 17, "id": "dcf9fa06", "metadata": { "executionInfo": { "elapsed": 252, "status": "ok", "timestamp": 1683202249263, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "e3085bf2", "tags": [] }, "outputs": [], "source": [ "positive_events = ['cart', 'payment_done']\n", "res = stream.add_positive_events(\n", " targets=positive_events\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 18, "id": "ca07590b", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 582 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1683202249263, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ac0c040a", "outputId": "8658076d-e712-4c41-bff9-e230465e2e17", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0b3cfcc8e-45ea-43da-86b5-f629f066edcdpath_start0path_start2019-11-01 17:59:13.273932219483890
1ff9aef51-ac0a-4f28-b180-5bf2594cb316raw0catalog2019-11-01 17:59:13.273932219483890
211d21f53-5058-4141-8e27-cc7cd1198eb3raw1product12019-11-01 17:59:28.459271219483890
3a1194898-0973-4190-9561-4dbc00ee8b48raw2cart2019-11-01 17:59:29.502214219483890
4a1194898-0973-4190-9561-4dbc00ee8b48positive_target2positive_target_cart2019-11-01 17:59:29.502214219483890
563f87bb5-19ce-4482-a877-9210a26499c4raw3catalog2019-11-01 17:59:32.557029219483890
271436301165-07c7-44b8-8ca1-a393a223c630raw2096main2019-12-06 16:22:57.484842219483890
2715a96b35df-cb62-4172-88eb-c3c3f83d3f07raw2097catalog2019-12-06 16:23:01.331109219483890
2716628246e3-7192-4855-8dc1-649efe6636acraw2098catalog2019-12-06 16:23:48.116617219483890
5705e98ac714-fbdf-4ea6-ba6f-f5e7708f7280raw4542main2020-01-06 22:10:13.635011219483890
57060ef45045-e18a-49a6-8296-a04ba5d5b6f8raw4543catalog2020-01-06 22:10:15.228575219483890
57071ec9e6e6-60de-42f7-a151-96795b60d0d8raw4544cart2020-01-06 22:10:42.309028219483890
5708f5cbf21a-2ad6-4395-b66f-8b8cfa6724d0raw4545catalog2020-01-06 22:10:52.255859219483890
5709308aba0e-99d8-4757-bbb4-60186fe6088draw4546product12020-01-06 22:11:01.709800219483890
571000d630fe-f54f-49c4-852b-637463aebc41raw4547catalog2020-01-06 22:11:02.899490219483890
5711d9b1f739-2a38-437b-9097-822e75854c65raw4548catalog2020-01-06 22:11:28.271366219483890
10139f32a12f2-932e-4730-843c-16ae4cd5cb85raw8215main2020-02-14 21:04:49.450696219483890
10140c1bade3f-7137-4fdb-b2ee-383b6ba41dccraw8216catalog2020-02-14 21:04:51.717127219483890
101411d72e981-b1a0-4498-9cfc-64096289492bpath_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 b3cfcc8e-45ea-43da-86b5-f629f066edcd path_start 0 \n", "1 ff9aef51-ac0a-4f28-b180-5bf2594cb316 raw 0 \n", "2 11d21f53-5058-4141-8e27-cc7cd1198eb3 raw 1 \n", "3 a1194898-0973-4190-9561-4dbc00ee8b48 raw 2 \n", "4 a1194898-0973-4190-9561-4dbc00ee8b48 positive_target 2 \n", "5 63f87bb5-19ce-4482-a877-9210a26499c4 raw 3 \n", "2714 36301165-07c7-44b8-8ca1-a393a223c630 raw 2096 \n", "2715 a96b35df-cb62-4172-88eb-c3c3f83d3f07 raw 2097 \n", "2716 628246e3-7192-4855-8dc1-649efe6636ac raw 2098 \n", "5705 e98ac714-fbdf-4ea6-ba6f-f5e7708f7280 raw 4542 \n", "5706 0ef45045-e18a-49a6-8296-a04ba5d5b6f8 raw 4543 \n", "5707 1ec9e6e6-60de-42f7-a151-96795b60d0d8 raw 4544 \n", "5708 f5cbf21a-2ad6-4395-b66f-8b8cfa6724d0 raw 4545 \n", "5709 308aba0e-99d8-4757-bbb4-60186fe6088d raw 4546 \n", "5710 00d630fe-f54f-49c4-852b-637463aebc41 raw 4547 \n", "5711 d9b1f739-2a38-437b-9097-822e75854c65 raw 4548 \n", "10139 f32a12f2-932e-4730-843c-16ae4cd5cb85 raw 8215 \n", "10140 c1bade3f-7137-4fdb-b2ee-383b6ba41dcc raw 8216 \n", "10141 1d72e981-b1a0-4498-9cfc-64096289492b path_end 8216 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 positive_target_cart 2019-11-01 17:59:29.502214 219483890 \n", "5 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2714 main 2019-12-06 16:22:57.484842 219483890 \n", "2715 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2716 catalog 2019-12-06 16:23:48.116617 219483890 \n", "5705 main 2020-01-06 22:10:13.635011 219483890 \n", "5706 catalog 2020-01-06 22:10:15.228575 219483890 \n", "5707 cart 2020-01-06 22:10:42.309028 219483890 \n", "5708 catalog 2020-01-06 22:10:52.255859 219483890 \n", "5709 product1 2020-01-06 22:11:01.709800 219483890 \n", "5710 catalog 2020-01-06 22:11:02.899490 219483890 \n", "5711 catalog 2020-01-06 22:11:28.271366 219483890 \n", "10139 main 2020-02-14 21:04:49.450696 219483890 \n", "10140 catalog 2020-02-14 21:04:51.717127 219483890 \n", "10141 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "code", "execution_count": 19, "id": "442001b3", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 174 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1683202249264, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "5f154748", "outputId": "8b197e5d-8430-4761-f3ba-3570c5879969", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
754be8a46a-4463-4268-8978-739cb6dbcd09path_start60path_start2019-11-02 07:28:07.28554124427596
762d23e50c-aea2-4bcc-a316-1f91439e907draw60main2019-11-02 07:28:07.28554124427596
77dca1467a-511b-4747-8c0d-cff0d9a9b99araw61catalog2019-11-02 07:28:14.31985024427596
78f7a4cd26-6364-4486-aaac-cf5e5adaedc7raw62catalog2019-11-02 07:29:08.30133324427596
79e1f8a956-f98b-4857-bb4c-0895dc41d828raw63catalog2019-11-02 07:29:41.84839624427596
804633035e-853e-429c-8721-2b6be2a4fdb2path_end63path_end2019-11-02 07:29:41.84839624427596
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "75 4be8a46a-4463-4268-8978-739cb6dbcd09 path_start 60 path_start \n", "76 2d23e50c-aea2-4bcc-a316-1f91439e907d raw 60 main \n", "77 dca1467a-511b-4747-8c0d-cff0d9a9b99a raw 61 catalog \n", "78 f7a4cd26-6364-4486-aaac-cf5e5adaedc7 raw 62 catalog \n", "79 e1f8a956-f98b-4857-bb4c-0895dc41d828 raw 63 catalog \n", "80 4633035e-853e-429c-8721-2b6be2a4fdb2 path_end 63 path_end \n", "\n", " timestamp user_id \n", "75 2019-11-02 07:28:07.285541 24427596 \n", "76 2019-11-02 07:28:07.285541 24427596 \n", "77 2019-11-02 07:28:14.319850 24427596 \n", "78 2019-11-02 07:29:08.301333 24427596 \n", "79 2019-11-02 07:29:41.848396 24427596 \n", "80 2019-11-02 07:29:41.848396 24427596 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 24427596]" ] }, { "cell_type": "code", "execution_count": 20, "id": "ac627d7b", "metadata": { "executionInfo": { "elapsed": 250, "status": "ok", "timestamp": 1683202249509, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "raAvh_BFqK2i", "tags": [] }, "outputs": [], "source": [ "def custom_func(eventstream, targets) -> pd.DataFrame:\n", "\n", " event_col = eventstream.schema.event_name\n", " df = eventstream.to_dataframe()\n", "\n", " return df[df[event_col].isin(targets)]\n", "\n", "res = stream.add_positive_events(\n", " targets=positive_events,\n", " func=custom_func\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 21, "id": "a023a8ba", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 613 }, "executionInfo": { "elapsed": 260, "status": "ok", "timestamp": 1683202249767, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "n1j7mtHdqeoM", "outputId": "9dbc9448-63b1-4bc3-ef55-aff1842ecd09", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0e474f5c7-13ed-4adc-99e7-1ccc79cd8f08path_start0path_start2019-11-01 17:59:13.273932219483890
14583f92a-e2ce-4dae-8c36-1551713bc387raw0catalog2019-11-01 17:59:13.273932219483890
2b383388a-fcaa-4fe4-b261-b19305be33efraw1product12019-11-01 17:59:28.459271219483890
3840eab3d-0ff3-420b-b40a-22ffae6e2288raw2cart2019-11-01 17:59:29.502214219483890
4840eab3d-0ff3-420b-b40a-22ffae6e2288positive_target2positive_target_cart2019-11-01 17:59:29.502214219483890
5ec59652a-45f1-4621-9f2a-0749e5385aferaw3catalog2019-11-01 17:59:32.557029219483890
2810a2107536-9673-470f-ad9d-754990b8f30craw2096main2019-12-06 16:22:57.484842219483890
281119785679-dca7-4834-be1c-bb3b7a5a0698raw2097catalog2019-12-06 16:23:01.331109219483890
2812811193b0-05a4-43b7-af58-08e70061d02araw2098catalog2019-12-06 16:23:48.116617219483890
5926bc01debb-96d9-40d7-b2bb-8c28fb2f6c02raw4542main2020-01-06 22:10:13.635011219483890
5927dcaaac82-fa4c-43c5-bf85-751bc3f6cee7raw4543catalog2020-01-06 22:10:15.228575219483890
5928bf61f0ae-d2fa-4942-bd46-a4f50dd4db47raw4544cart2020-01-06 22:10:42.309028219483890
5929bf61f0ae-d2fa-4942-bd46-a4f50dd4db47positive_target4544positive_target_cart2020-01-06 22:10:42.309028219483890
5930a876bd20-b994-45d9-8b99-926b3e2dc57fraw4545catalog2020-01-06 22:10:52.255859219483890
5931d00a90cd-a40b-4493-87e9-81d157089534raw4546product12020-01-06 22:11:01.709800219483890
59329d175bdd-f9be-4ed1-a2aa-5bf581962e37raw4547catalog2020-01-06 22:11:02.899490219483890
59334cf358cf-29e6-4eff-a397-7a9bb66952daraw4548catalog2020-01-06 22:11:28.271366219483890
1057227788259-df5c-4f79-81b2-46a464eabd04raw8215main2020-02-14 21:04:49.450696219483890
10573f333f2fb-a766-4eb0-bbfc-d172a9c542dbraw8216catalog2020-02-14 21:04:51.717127219483890
10574fff5c4b7-c18b-4005-b5bc-765491c44f97path_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 e474f5c7-13ed-4adc-99e7-1ccc79cd8f08 path_start 0 \n", "1 4583f92a-e2ce-4dae-8c36-1551713bc387 raw 0 \n", "2 b383388a-fcaa-4fe4-b261-b19305be33ef raw 1 \n", "3 840eab3d-0ff3-420b-b40a-22ffae6e2288 raw 2 \n", "4 840eab3d-0ff3-420b-b40a-22ffae6e2288 positive_target 2 \n", "5 ec59652a-45f1-4621-9f2a-0749e5385afe raw 3 \n", "2810 a2107536-9673-470f-ad9d-754990b8f30c raw 2096 \n", "2811 19785679-dca7-4834-be1c-bb3b7a5a0698 raw 2097 \n", "2812 811193b0-05a4-43b7-af58-08e70061d02a raw 2098 \n", "5926 bc01debb-96d9-40d7-b2bb-8c28fb2f6c02 raw 4542 \n", "5927 dcaaac82-fa4c-43c5-bf85-751bc3f6cee7 raw 4543 \n", "5928 bf61f0ae-d2fa-4942-bd46-a4f50dd4db47 raw 4544 \n", "5929 bf61f0ae-d2fa-4942-bd46-a4f50dd4db47 positive_target 4544 \n", "5930 a876bd20-b994-45d9-8b99-926b3e2dc57f raw 4545 \n", "5931 d00a90cd-a40b-4493-87e9-81d157089534 raw 4546 \n", "5932 9d175bdd-f9be-4ed1-a2aa-5bf581962e37 raw 4547 \n", "5933 4cf358cf-29e6-4eff-a397-7a9bb66952da raw 4548 \n", "10572 27788259-df5c-4f79-81b2-46a464eabd04 raw 8215 \n", "10573 f333f2fb-a766-4eb0-bbfc-d172a9c542db raw 8216 \n", "10574 fff5c4b7-c18b-4005-b5bc-765491c44f97 path_end 8216 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 positive_target_cart 2019-11-01 17:59:29.502214 219483890 \n", "5 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2810 main 2019-12-06 16:22:57.484842 219483890 \n", "2811 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2812 catalog 2019-12-06 16:23:48.116617 219483890 \n", "5926 main 2020-01-06 22:10:13.635011 219483890 \n", "5927 catalog 2020-01-06 22:10:15.228575 219483890 \n", "5928 cart 2020-01-06 22:10:42.309028 219483890 \n", "5929 positive_target_cart 2020-01-06 22:10:42.309028 219483890 \n", "5930 catalog 2020-01-06 22:10:52.255859 219483890 \n", "5931 product1 2020-01-06 22:11:01.709800 219483890 \n", "5932 catalog 2020-01-06 22:11:02.899490 219483890 \n", "5933 catalog 2020-01-06 22:11:28.271366 219483890 \n", "10572 main 2020-02-14 21:04:49.450696 219483890 \n", "10573 catalog 2020-02-14 21:04:51.717127 219483890 \n", "10574 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "6c14278e", "metadata": { "id": "Dsy2d3d4kgPo" }, "source": [ "#### AddNegativeEvents" ] }, { "cell_type": "code", "execution_count": 22, "id": "43fe9601", "metadata": { "executionInfo": { "elapsed": 255, "status": "ok", "timestamp": 1683202250018, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "NuZkvgl3DgO7", "tags": [] }, "outputs": [], "source": [ "negative_events = ['delivery_courier']\n", "\n", "res = stream.add_negative_events(\n", " targets=negative_events\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 23, "id": "b17d4327", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 269 }, "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1683202250018, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8d6e4fa4", "outputId": "aeb2fb97-dcbb-4b25-a329-a49aac6ab6ee", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
371ea61a7b-aaaf-4396-a1d6-de1832c34e48raw33product22019-11-01 22:35:33.142711629881394
407be57d4e-10cf-4843-b133-66b78dc92c18raw36cart2019-11-01 22:35:50.437706629881394
42e9e5ce4d-6e6e-4ab6-9ff0-e6df52aa7319raw38delivery_choice2019-11-01 22:35:57.649549629881394
43ca50768a-b4db-45dd-bd8a-01fe22a3bfebraw39delivery_courier2019-11-01 22:36:02.009271629881394
44ca50768a-b4db-45dd-bd8a-01fe22a3bfebnegative_target39negative_target_delivery_courier2019-11-01 22:36:02.009271629881394
48a90e7550-4949-458d-99ec-d52240b2a3d1raw42payment_choice2019-11-01 22:36:02.243274629881394
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "37 1ea61a7b-aaaf-4396-a1d6-de1832c34e48 raw 33 \n", "40 7be57d4e-10cf-4843-b133-66b78dc92c18 raw 36 \n", "42 e9e5ce4d-6e6e-4ab6-9ff0-e6df52aa7319 raw 38 \n", "43 ca50768a-b4db-45dd-bd8a-01fe22a3bfeb raw 39 \n", "44 ca50768a-b4db-45dd-bd8a-01fe22a3bfeb negative_target 39 \n", "48 a90e7550-4949-458d-99ec-d52240b2a3d1 raw 42 \n", "\n", " event timestamp user_id \n", "37 product2 2019-11-01 22:35:33.142711 629881394 \n", "40 cart 2019-11-01 22:35:50.437706 629881394 \n", "42 delivery_choice 2019-11-01 22:35:57.649549 629881394 \n", "43 delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "44 negative_target_delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "48 payment_choice 2019-11-01 22:36:02.243274 629881394 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 629881394].loc[36:48]" ] }, { "cell_type": "markdown", "id": "45402690", "metadata": { "id": "AHKODj-tpTA-" }, "source": [ "#### LabelCroppedPaths" ] }, { "cell_type": "code", "execution_count": 24, "id": "a9038764", "metadata": { "colab": { "background_save": true }, "executionInfo": { "elapsed": 442, "status": "ok", "timestamp": 1683202250455, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ea101f85", "tags": [] }, "outputs": [], "source": [ "params = {\n", " 'left_cutoff': (4, 'D'),\n", " 'right_cutoff': (3, 'D')\n", "}\n", "\n", "res = stream.label_cropped_paths(**params).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 25, "id": "a64226c8", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202250455, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9bb12ad4", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Eventstream start: 2019-11-01 17:59:13.273932\n", "Eventstream end: 2020-04-29 12:48:06.595390\n" ] } ], "source": [ "print('Eventstream start: {}'.format(res.timestamp.min()))\n", "print('Eventstream end: {}'.format(res.timestamp.max()))" ] }, { "cell_type": "code", "execution_count": 26, "id": "a6b083e9", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202250456, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9062db7a", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
51999137f1-9316-4d9f-969e-e3781fdedca4path_start47path_start2019-11-02 01:14:08.664850495985018
52999137f1-9316-4d9f-969e-e3781fdedca4cropped_left47cropped_left2019-11-02 01:14:08.664850495985018
53999137f1-9316-4d9f-969e-e3781fdedca4raw47catalog2019-11-02 01:14:08.664850495985018
549e37b313-2536-4a52-8b28-a67da83db18araw48cart2019-11-02 01:14:37.435643495985018
559e37b313-2536-4a52-8b28-a67da83db18apath_end48path_end2019-11-02 01:14:37.435643495985018
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "51 999137f1-9316-4d9f-969e-e3781fdedca4 path_start 47 \n", "52 999137f1-9316-4d9f-969e-e3781fdedca4 cropped_left 47 \n", "53 999137f1-9316-4d9f-969e-e3781fdedca4 raw 47 \n", "54 9e37b313-2536-4a52-8b28-a67da83db18a raw 48 \n", "55 9e37b313-2536-4a52-8b28-a67da83db18a path_end 48 \n", "\n", " event timestamp user_id \n", "51 path_start 2019-11-02 01:14:08.664850 495985018 \n", "52 cropped_left 2019-11-02 01:14:08.664850 495985018 \n", "53 catalog 2019-11-02 01:14:08.664850 495985018 \n", "54 cart 2019-11-02 01:14:37.435643 495985018 \n", "55 path_end 2019-11-02 01:14:37.435643 495985018 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 495985018]" ] }, { "cell_type": "code", "execution_count": 27, "id": "49fb8111", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 237 }, "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1683202250456, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "b2814dfd", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
40028b23acb30-1775-47d4-a12e-fe9770bb8ce3path_start32258path_start2020-04-29 12:24:21.538805831491833
40029b23acb30-1775-47d4-a12e-fe9770bb8ce3raw32258catalog2020-04-29 12:24:21.538805831491833
40030977a2deb-b62f-4c5e-909d-5d1e1c392fc3raw32259catalog2020-04-29 12:24:33.841264831491833
40031a4b1d5fd-2494-4e0a-b20f-58bf3c6fb9b1raw32260product22020-04-29 12:24:39.415424831491833
400324ed1e625-2fff-4978-ab66-ccab5ed2fdfcraw32261cart2020-04-29 12:24:59.928499831491833
40033653ed2cc-e52e-469f-a8c0-57f069fea29fraw32262catalog2020-04-29 12:25:06.262205831491833
40034653ed2cc-e52e-469f-a8c0-57f069fea29fcropped_right32262cropped_right2020-04-29 12:25:06.262205831491833
40035653ed2cc-e52e-469f-a8c0-57f069fea29fpath_end32262path_end2020-04-29 12:25:06.262205831491833
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "40028 b23acb30-1775-47d4-a12e-fe9770bb8ce3 path_start 32258 \n", "40029 b23acb30-1775-47d4-a12e-fe9770bb8ce3 raw 32258 \n", "40030 977a2deb-b62f-4c5e-909d-5d1e1c392fc3 raw 32259 \n", "40031 a4b1d5fd-2494-4e0a-b20f-58bf3c6fb9b1 raw 32260 \n", "40032 4ed1e625-2fff-4978-ab66-ccab5ed2fdfc raw 32261 \n", "40033 653ed2cc-e52e-469f-a8c0-57f069fea29f raw 32262 \n", "40034 653ed2cc-e52e-469f-a8c0-57f069fea29f cropped_right 32262 \n", "40035 653ed2cc-e52e-469f-a8c0-57f069fea29f path_end 32262 \n", "\n", " event timestamp user_id \n", "40028 path_start 2020-04-29 12:24:21.538805 831491833 \n", "40029 catalog 2020-04-29 12:24:21.538805 831491833 \n", "40030 catalog 2020-04-29 12:24:33.841264 831491833 \n", "40031 product2 2020-04-29 12:24:39.415424 831491833 \n", "40032 cart 2020-04-29 12:24:59.928499 831491833 \n", "40033 catalog 2020-04-29 12:25:06.262205 831491833 \n", "40034 cropped_right 2020-04-29 12:25:06.262205 831491833 \n", "40035 path_end 2020-04-29 12:25:06.262205 831491833 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 831491833]" ] }, { "cell_type": "markdown", "id": "6df7d930", "metadata": { "id": "e3f107b8" }, "source": [ "### Removing processors" ] }, { "cell_type": "markdown", "id": "3dff09d1", "metadata": { "id": "JvdFDkSdEJ6N" }, "source": [ "#### FilterEvents" ] }, { "cell_type": "code", "execution_count": 28, "id": "4f9fe247", "metadata": { "executionInfo": { "elapsed": 1947, "status": "ok", "timestamp": 1683202252397, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9f6232c9", "tags": [] }, "outputs": [], "source": [ "def save_specific_users(df, schema):\n", " users_to_save = [219483890, 964964743, 965024600]\n", " return df[schema.user_id].isin(users_to_save)\n", "\n", "res = stream.filter_events(func=save_specific_users).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 29, "id": "2f6972df", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202252398, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "YHeFnaqoF9ml", "outputId": "25bf26b9-e299-45a4-c95c-fb0afbf97e12", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "array([219483890, 964964743, 965024600])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['user_id'].unique().astype(int)" ] }, { "cell_type": "code", "execution_count": 30, "id": "2fd9ebe2", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202252398, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "77215f06", "outputId": "bad0bc27-154a-48d0-e60b-2b15e2ca27c2", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "catalog 14518\n", "main 5635\n", "Name: event, dtype: int64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe()\\\n", " ['event']\\\n", " .value_counts()\\\n", " [lambda s: s.index.isin(['catalog', 'main'])]" ] }, { "cell_type": "code", "execution_count": 31, "id": "a6d09576", "metadata": { "executionInfo": { "elapsed": 2448, "status": "ok", "timestamp": 1683202254843, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "066908e9", "tags": [] }, "outputs": [], "source": [ "def exclude_events(df, schema):\n", " events_to_exclude = ['catalog', 'main']\n", " return ~df[schema.event_name].isin(events_to_exclude)\n", "\n", "res = stream.filter_events(func=exclude_events).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 32, "id": "737001f9", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202254843, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "5889ff41", "outputId": "728c3db4-2210-465d-d1f1-917176402ad5", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Series([], Name: event, dtype: int64)" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['event']\\\n", " .value_counts()\\\n", " [lambda s: s.index.isin(['catalog', 'main'])]" ] }, { "cell_type": "markdown", "id": "2753a754", "metadata": { "id": "0o0cznzfJAjO" }, "source": [ "#### DropPaths" ] }, { "cell_type": "code", "execution_count": 33, "id": "ecbfe814", "metadata": { "executionInfo": { "elapsed": 953, "status": "ok", "timestamp": 1683202255792, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "3f345d14", "tags": [] }, "outputs": [], "source": [ "res = stream.drop_paths(min_steps=25).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 34, "id": "90589065", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202255793, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "a466cd41", "outputId": "d672640f-e215-46ac-8a39-b71b00eb70c7", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0faab069d-3d78-49b2-aeee-b8ee32646b34path_start7path_start2019-11-01 22:28:54.791683629881394
1faab069d-3d78-49b2-aeee-b8ee32646b34raw7main2019-11-01 22:28:54.791683629881394
2775dd323-bb8e-4d93-89c5-aa9f3879182araw9catalog2019-11-01 22:29:01.049513629881394
3e38453d7-f7ff-4cb9-8b28-b71854aba7c4raw11catalog2019-11-01 22:29:32.322458629881394
406a1521b-8449-48fa-aa46-e2c143c4eba9raw13catalog2019-11-01 22:30:09.450839629881394
570ddfbab-ea29-42cd-bd37-3d6bcfd2b9d3raw14catalog2019-11-01 22:31:05.565762629881394
6d23f9b60-823f-4d5b-ac15-c598a01cf932raw15main2019-11-01 22:31:08.333560629881394
701afe727-9206-45d7-a132-658278277912raw16catalog2019-11-01 22:31:09.010626629881394
8bfbb6659-eb96-49ec-b9a4-f558f4ad5953raw17product12019-11-01 22:31:10.416231629881394
925212bf7-ad65-4b5b-8f59-783d9822ea56raw18catalog2019-11-01 22:31:43.019527629881394
104ef0a0e8-9b4a-4cf4-a1f4-af96e4a98eb6raw19catalog2019-11-01 22:32:01.596163629881394
1184b5ba70-b8a8-4f3d-967d-b2663e50d0c7raw20product12019-11-01 22:32:07.348536629881394
12407bc43e-78a0-4d8e-810b-75521ded0f4fraw21catalog2019-11-01 22:32:10.165568629881394
13a37a2989-04e8-4bbb-b8ce-962d61d54c8craw22product22019-11-01 22:32:16.496241629881394
14b8cc93cf-33ef-48c8-aa01-be1d4ce8bd67raw23catalog2019-11-01 22:33:17.682468629881394
158f9114e6-cc5a-46d5-955f-2bc9d9a2297draw24product12019-11-01 22:33:19.961115629881394
1656d79e1f-b41e-435d-b582-bc35067c0e48raw25catalog2019-11-01 22:33:35.460345629881394
17f30cdd3e-f826-41a3-a958-0a4a5418395fraw27catalog2019-11-01 22:34:02.301293629881394
1855aa87f0-7385-49d4-a3ad-9dcc20b535c8raw29product22019-11-01 22:34:32.362386629881394
197c8655ef-c50b-41c2-9fbc-ec8e1f116248raw31catalog2019-11-01 22:35:01.462515629881394
20477f5b30-ff98-4a96-abd2-8ed7c49e17c2raw33product22019-11-01 22:35:33.142711629881394
21a5d18359-5bff-4e54-8f56-7c926abec884raw36cart2019-11-01 22:35:50.437706629881394
2283565b1c-2952-455d-be0a-f7fbe1c83ff1raw38delivery_choice2019-11-01 22:35:57.649549629881394
238e751012-9bd8-4d25-a7e4-f61ba64e0c52raw39delivery_courier2019-11-01 22:36:02.009271629881394
2436e2a973-0aa3-4c2a-a80a-1ae7a0e330bdraw42payment_choice2019-11-01 22:36:02.243274629881394
2557ebc8c3-fcc9-466e-a42c-a2792ba74592raw44payment_cash2019-11-01 22:36:03.415201629881394
266035880f-559e-4371-b1ed-86844b8a6ecbraw45payment_done2019-11-01 22:36:03.999697629881394
11845e065d5-d9dd-46f5-82eb-2d571e13b23braw317main2019-11-07 12:40:46.004674629881394
11977a957e9-44e2-4445-a6d1-15d08fecd5b1raw319catalog2019-11-07 12:40:55.724185629881394
12011b16917-cfab-4fd4-a1d8-894793f6313draw322cart2019-11-07 12:41:04.107187629881394
3561f157c36-83b0-4205-afa9-0c53fdc65ff8raw1230main2019-11-25 23:16:56.317624629881394
357df5e2705-e84d-4a8f-a5f6-fadc9fb57a86raw1231catalog2019-11-25 23:17:16.266242629881394
35894b869a2-8c01-4aba-a85a-1415f8401ac4raw1232catalog2019-11-25 23:17:48.747304629881394
2331a52c1249-3bb2-4089-9424-c7cd652b27dfraw5922main2020-01-23 18:56:23.445236629881394
2332e14366c6-e93c-4d01-9505-3715ca5660d3raw5923catalog2020-01-23 18:56:30.461624629881394
4061865aadc9-1f26-491a-bc8e-e0f2a6070fd1raw10013main2020-03-01 23:23:43.392597629881394
4062ce3d9317-677d-4f76-935b-2feb33d2e4c6raw10014catalog2020-03-01 23:23:44.755693629881394
40638f14e9c6-a4c6-4880-b390-9c391e38e32eraw10015main2020-03-01 23:23:51.625991629881394
4414d6407690-5f9f-4ec4-ba0a-34ee642fac95raw10754main2020-03-07 12:05:23.938242629881394
4415444c5cd6-f000-474c-9a89-917a8778190craw10755catalog2020-03-07 12:05:32.476046629881394
5362b671bcb7-f293-4e2b-b67e-dc11ef57f2abraw12937main2020-03-26 13:00:47.844886629881394
536305138fb0-1c11-4854-9c7f-51f60b2436d7raw12938catalog2020-03-26 13:00:49.531644629881394
557745ff8d67-3b91-4349-9550-e4f7e8b312e8raw13436main2020-03-30 00:11:47.920536629881394
55781fe22a22-e5a4-4c77-af3b-d7428efb2645raw13456main2020-03-30 03:17:46.909845629881394
5579463a6fee-ff8a-44af-a9a3-1e870cffbf88raw13457catalog2020-03-30 03:17:49.918774629881394
55804e97d962-368b-4a19-aebf-dc02c32b16d5raw13458catalog2020-03-30 03:18:17.835191629881394
5581c8a4be8b-baea-4c84-9575-1e1c2f83b6d1raw13459catalog2020-03-30 03:18:53.715887629881394
5582302d6418-d86c-4368-8f79-f12f5a31bdc5raw13460catalog2020-03-30 03:19:36.034262629881394
5583d10ccb6a-e563-46c7-be6f-661d09b5301craw13461catalog2020-03-30 03:19:59.515094629881394
5584d10ccb6a-e563-46c7-be6f-661d09b5301cpath_end13461path_end2020-03-30 03:19:59.515094629881394
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 faab069d-3d78-49b2-aeee-b8ee32646b34 path_start 7 \n", "1 faab069d-3d78-49b2-aeee-b8ee32646b34 raw 7 \n", "2 775dd323-bb8e-4d93-89c5-aa9f3879182a raw 9 \n", "3 e38453d7-f7ff-4cb9-8b28-b71854aba7c4 raw 11 \n", "4 06a1521b-8449-48fa-aa46-e2c143c4eba9 raw 13 \n", "5 70ddfbab-ea29-42cd-bd37-3d6bcfd2b9d3 raw 14 \n", "6 d23f9b60-823f-4d5b-ac15-c598a01cf932 raw 15 \n", "7 01afe727-9206-45d7-a132-658278277912 raw 16 \n", "8 bfbb6659-eb96-49ec-b9a4-f558f4ad5953 raw 17 \n", "9 25212bf7-ad65-4b5b-8f59-783d9822ea56 raw 18 \n", "10 4ef0a0e8-9b4a-4cf4-a1f4-af96e4a98eb6 raw 19 \n", "11 84b5ba70-b8a8-4f3d-967d-b2663e50d0c7 raw 20 \n", "12 407bc43e-78a0-4d8e-810b-75521ded0f4f raw 21 \n", "13 a37a2989-04e8-4bbb-b8ce-962d61d54c8c raw 22 \n", "14 b8cc93cf-33ef-48c8-aa01-be1d4ce8bd67 raw 23 \n", "15 8f9114e6-cc5a-46d5-955f-2bc9d9a2297d raw 24 \n", "16 56d79e1f-b41e-435d-b582-bc35067c0e48 raw 25 \n", "17 f30cdd3e-f826-41a3-a958-0a4a5418395f raw 27 \n", "18 55aa87f0-7385-49d4-a3ad-9dcc20b535c8 raw 29 \n", "19 7c8655ef-c50b-41c2-9fbc-ec8e1f116248 raw 31 \n", "20 477f5b30-ff98-4a96-abd2-8ed7c49e17c2 raw 33 \n", "21 a5d18359-5bff-4e54-8f56-7c926abec884 raw 36 \n", "22 83565b1c-2952-455d-be0a-f7fbe1c83ff1 raw 38 \n", "23 8e751012-9bd8-4d25-a7e4-f61ba64e0c52 raw 39 \n", "24 36e2a973-0aa3-4c2a-a80a-1ae7a0e330bd raw 42 \n", "25 57ebc8c3-fcc9-466e-a42c-a2792ba74592 raw 44 \n", "26 6035880f-559e-4371-b1ed-86844b8a6ecb raw 45 \n", "118 45e065d5-d9dd-46f5-82eb-2d571e13b23b raw 317 \n", "119 77a957e9-44e2-4445-a6d1-15d08fecd5b1 raw 319 \n", "120 11b16917-cfab-4fd4-a1d8-894793f6313d raw 322 \n", "356 1f157c36-83b0-4205-afa9-0c53fdc65ff8 raw 1230 \n", "357 df5e2705-e84d-4a8f-a5f6-fadc9fb57a86 raw 1231 \n", "358 94b869a2-8c01-4aba-a85a-1415f8401ac4 raw 1232 \n", "2331 a52c1249-3bb2-4089-9424-c7cd652b27df raw 5922 \n", "2332 e14366c6-e93c-4d01-9505-3715ca5660d3 raw 5923 \n", "4061 865aadc9-1f26-491a-bc8e-e0f2a6070fd1 raw 10013 \n", "4062 ce3d9317-677d-4f76-935b-2feb33d2e4c6 raw 10014 \n", "4063 8f14e9c6-a4c6-4880-b390-9c391e38e32e raw 10015 \n", "4414 d6407690-5f9f-4ec4-ba0a-34ee642fac95 raw 10754 \n", "4415 444c5cd6-f000-474c-9a89-917a8778190c raw 10755 \n", "5362 b671bcb7-f293-4e2b-b67e-dc11ef57f2ab raw 12937 \n", "5363 05138fb0-1c11-4854-9c7f-51f60b2436d7 raw 12938 \n", "5577 45ff8d67-3b91-4349-9550-e4f7e8b312e8 raw 13436 \n", "5578 1fe22a22-e5a4-4c77-af3b-d7428efb2645 raw 13456 \n", "5579 463a6fee-ff8a-44af-a9a3-1e870cffbf88 raw 13457 \n", "5580 4e97d962-368b-4a19-aebf-dc02c32b16d5 raw 13458 \n", "5581 c8a4be8b-baea-4c84-9575-1e1c2f83b6d1 raw 13459 \n", "5582 302d6418-d86c-4368-8f79-f12f5a31bdc5 raw 13460 \n", "5583 d10ccb6a-e563-46c7-be6f-661d09b5301c raw 13461 \n", "5584 d10ccb6a-e563-46c7-be6f-661d09b5301c path_end 13461 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 22:28:54.791683 629881394 \n", "1 main 2019-11-01 22:28:54.791683 629881394 \n", "2 catalog 2019-11-01 22:29:01.049513 629881394 \n", "3 catalog 2019-11-01 22:29:32.322458 629881394 \n", "4 catalog 2019-11-01 22:30:09.450839 629881394 \n", "5 catalog 2019-11-01 22:31:05.565762 629881394 \n", "6 main 2019-11-01 22:31:08.333560 629881394 \n", "7 catalog 2019-11-01 22:31:09.010626 629881394 \n", "8 product1 2019-11-01 22:31:10.416231 629881394 \n", "9 catalog 2019-11-01 22:31:43.019527 629881394 \n", "10 catalog 2019-11-01 22:32:01.596163 629881394 \n", "11 product1 2019-11-01 22:32:07.348536 629881394 \n", "12 catalog 2019-11-01 22:32:10.165568 629881394 \n", "13 product2 2019-11-01 22:32:16.496241 629881394 \n", "14 catalog 2019-11-01 22:33:17.682468 629881394 \n", "15 product1 2019-11-01 22:33:19.961115 629881394 \n", "16 catalog 2019-11-01 22:33:35.460345 629881394 \n", "17 catalog 2019-11-01 22:34:02.301293 629881394 \n", "18 product2 2019-11-01 22:34:32.362386 629881394 \n", "19 catalog 2019-11-01 22:35:01.462515 629881394 \n", "20 product2 2019-11-01 22:35:33.142711 629881394 \n", "21 cart 2019-11-01 22:35:50.437706 629881394 \n", "22 delivery_choice 2019-11-01 22:35:57.649549 629881394 \n", "23 delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "24 payment_choice 2019-11-01 22:36:02.243274 629881394 \n", "25 payment_cash 2019-11-01 22:36:03.415201 629881394 \n", "26 payment_done 2019-11-01 22:36:03.999697 629881394 \n", "118 main 2019-11-07 12:40:46.004674 629881394 \n", "119 catalog 2019-11-07 12:40:55.724185 629881394 \n", "120 cart 2019-11-07 12:41:04.107187 629881394 \n", "356 main 2019-11-25 23:16:56.317624 629881394 \n", "357 catalog 2019-11-25 23:17:16.266242 629881394 \n", "358 catalog 2019-11-25 23:17:48.747304 629881394 \n", "2331 main 2020-01-23 18:56:23.445236 629881394 \n", "2332 catalog 2020-01-23 18:56:30.461624 629881394 \n", "4061 main 2020-03-01 23:23:43.392597 629881394 \n", "4062 catalog 2020-03-01 23:23:44.755693 629881394 \n", "4063 main 2020-03-01 23:23:51.625991 629881394 \n", "4414 main 2020-03-07 12:05:23.938242 629881394 \n", "4415 catalog 2020-03-07 12:05:32.476046 629881394 \n", "5362 main 2020-03-26 13:00:47.844886 629881394 \n", "5363 catalog 2020-03-26 13:00:49.531644 629881394 \n", "5577 main 2020-03-30 00:11:47.920536 629881394 \n", "5578 main 2020-03-30 03:17:46.909845 629881394 \n", "5579 catalog 2020-03-30 03:17:49.918774 629881394 \n", "5580 catalog 2020-03-30 03:18:17.835191 629881394 \n", "5581 catalog 2020-03-30 03:18:53.715887 629881394 \n", "5582 catalog 2020-03-30 03:19:36.034262 629881394 \n", "5583 catalog 2020-03-30 03:19:59.515094 629881394 \n", "5584 path_end 2020-03-30 03:19:59.515094 629881394 " ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 629881394]" ] }, { "cell_type": "code", "execution_count": 35, "id": "04312afc", "metadata": { "executionInfo": { "elapsed": 1159, "status": "ok", "timestamp": 1683202256946, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ecf77028", "tags": [] }, "outputs": [], "source": [ "res = stream.drop_paths(min_time=(1, 'M')).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 36, "id": "61a53bf9", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 112 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202256946, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "21332803", "outputId": "59e00ad5-434d-4b10-ad68-b10c90043f25", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
5ef3f8502-1bd3-4fb2-a285-a33fc80ad9aepath_start4path_start2019-11-01 21:38:19.283663964964743
6ef3f8502-1bd3-4fb2-a285-a33fc80ad9aeraw4catalog2019-11-01 21:38:19.283663964964743
71f94de87-e9b6-472e-997c-1dc2a9fee367raw5cart2019-11-01 21:38:36.761221964964743
89ef1ef56-658f-455e-bdfb-8e6315d151ddraw6delivery_choice2019-11-01 21:38:37.564693964964743
123583b22098-bcc5-436e-915d-92794a935637raw2275main2019-12-09 01:42:22.801831964964743
123641f40816-ba2e-4829-8a41-e53415897508raw2276catalog2019-12-09 01:42:23.617764964964743
1237cc22204f-7d6e-43ed-b0a8-4f204cec78c6raw2277product22019-12-09 01:42:56.877340964964743
1238e0995a5a-549e-4536-bf50-7350a11fccafraw2278catalog2019-12-09 01:43:05.436223964964743
12390987e6eb-b194-407d-9058-1c275a30ce99raw2279catalog2019-12-09 01:43:36.923178964964743
124059542694-71a2-4bba-be0f-f38ee5086b39raw2280product22019-12-09 01:43:41.174195964964743
12413aac5020-32b9-496b-b83d-5c5c0f392fa0raw2281cart2019-12-09 01:43:57.325569964964743
12422f70f20a-7ad0-482c-bd72-54fe640bd7e7raw2282delivery_choice2019-12-09 01:43:57.486518964964743
124345483162-3e01-42a4-8338-a461e2b7fbferaw2283delivery_pickup2019-12-09 01:43:57.766850964964743
124445483162-3e01-42a4-8338-a461e2b7fbfepath_end2283path_end2019-12-09 01:43:57.766850964964743
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "5 ef3f8502-1bd3-4fb2-a285-a33fc80ad9ae path_start 4 \n", "6 ef3f8502-1bd3-4fb2-a285-a33fc80ad9ae raw 4 \n", "7 1f94de87-e9b6-472e-997c-1dc2a9fee367 raw 5 \n", "8 9ef1ef56-658f-455e-bdfb-8e6315d151dd raw 6 \n", "1235 83b22098-bcc5-436e-915d-92794a935637 raw 2275 \n", "1236 41f40816-ba2e-4829-8a41-e53415897508 raw 2276 \n", "1237 cc22204f-7d6e-43ed-b0a8-4f204cec78c6 raw 2277 \n", "1238 e0995a5a-549e-4536-bf50-7350a11fccaf raw 2278 \n", "1239 0987e6eb-b194-407d-9058-1c275a30ce99 raw 2279 \n", "1240 59542694-71a2-4bba-be0f-f38ee5086b39 raw 2280 \n", "1241 3aac5020-32b9-496b-b83d-5c5c0f392fa0 raw 2281 \n", "1242 2f70f20a-7ad0-482c-bd72-54fe640bd7e7 raw 2282 \n", "1243 45483162-3e01-42a4-8338-a461e2b7fbfe raw 2283 \n", "1244 45483162-3e01-42a4-8338-a461e2b7fbfe path_end 2283 \n", "\n", " event timestamp user_id \n", "5 path_start 2019-11-01 21:38:19.283663 964964743 \n", "6 catalog 2019-11-01 21:38:19.283663 964964743 \n", "7 cart 2019-11-01 21:38:36.761221 964964743 \n", "8 delivery_choice 2019-11-01 21:38:37.564693 964964743 \n", "1235 main 2019-12-09 01:42:22.801831 964964743 \n", "1236 catalog 2019-12-09 01:42:23.617764 964964743 \n", "1237 product2 2019-12-09 01:42:56.877340 964964743 \n", "1238 catalog 2019-12-09 01:43:05.436223 964964743 \n", "1239 catalog 2019-12-09 01:43:36.923178 964964743 \n", "1240 product2 2019-12-09 01:43:41.174195 964964743 \n", "1241 cart 2019-12-09 01:43:57.325569 964964743 \n", "1242 delivery_choice 2019-12-09 01:43:57.486518 964964743 \n", "1243 delivery_pickup 2019-12-09 01:43:57.766850 964964743 \n", "1244 path_end 2019-12-09 01:43:57.766850 964964743 " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 964964743]" ] }, { "cell_type": "markdown", "id": "e0acdec5", "metadata": { "id": "7NGDhhJVPLX5" }, "source": [ "#### TruncatePaths" ] }, { "cell_type": "code", "execution_count": 37, "id": "5256a8d0", "metadata": { "executionInfo": { "elapsed": 6136, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8eb2156a", "tags": [] }, "outputs": [], "source": [ "res = stream.truncate_paths(\n", " drop_before='cart',\n", " shift_before=-2\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 38, "id": "62a47ef7", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 551 }, "executionInfo": { "elapsed": 30, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "3245a6fe", "outputId": "2bef0472-9df1-4e17-e2f3-51c39eb4d5db", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890
166f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890
21a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890
3dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890
413f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890
1980bd14ea53-9688-466d-87b2-0494ace4e011raw2096main2019-12-06 16:22:57.484842219483890
19817ef1bf49-c88b-4b40-b424-c7ac53a267a6raw2097catalog2019-12-06 16:23:01.331109219483890
1982e1800047-f1c9-48c7-a49a-80c3a87853e7raw2098catalog2019-12-06 16:23:48.116617219483890
4223d329f192-401c-44c3-8a1e-b0d4da680b6craw4542main2020-01-06 22:10:13.635011219483890
422459e82765-c236-4f5f-b05c-73ce6efdc84braw4543catalog2020-01-06 22:10:15.228575219483890
422585f5bb33-7223-4c41-884a-544d7d1fefbfraw4544cart2020-01-06 22:10:42.309028219483890
4226ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4raw4545catalog2020-01-06 22:10:52.255859219483890
42278577fdb0-8c24-4c64-992e-a92a0ecfc03draw4546product12020-01-06 22:11:01.709800219483890
4228c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62raw4547catalog2020-01-06 22:11:02.899490219483890
42295d7091d0-e8ec-4974-9dcf-f8336bb2b92draw4548catalog2020-01-06 22:11:28.271366219483890
7788098dbd63-8c5b-4842-bb44-bad07923f13draw8215main2020-02-14 21:04:49.450696219483890
778983896884-2fd2-4567-9d29-791aa1d45795raw8216catalog2020-02-14 21:04:51.717127219483890
779083896884-2fd2-4567-9d29-791aa1d45795path_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "2 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "3 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "4 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "1980 bd14ea53-9688-466d-87b2-0494ace4e011 raw 2096 \n", "1981 7ef1bf49-c88b-4b40-b424-c7ac53a267a6 raw 2097 \n", "1982 e1800047-f1c9-48c7-a49a-80c3a87853e7 raw 2098 \n", "4223 d329f192-401c-44c3-8a1e-b0d4da680b6c raw 4542 \n", "4224 59e82765-c236-4f5f-b05c-73ce6efdc84b raw 4543 \n", "4225 85f5bb33-7223-4c41-884a-544d7d1fefbf raw 4544 \n", "4226 ba1eb7bb-71d2-4ab0-8649-5e236cc15cc4 raw 4545 \n", "4227 8577fdb0-8c24-4c64-992e-a92a0ecfc03d raw 4546 \n", "4228 c87ddbdf-e0eb-40ff-b3cc-71a53aef8d62 raw 4547 \n", "4229 5d7091d0-e8ec-4974-9dcf-f8336bb2b92d raw 4548 \n", "7788 098dbd63-8c5b-4842-bb44-bad07923f13d raw 8215 \n", "7789 83896884-2fd2-4567-9d29-791aa1d45795 raw 8216 \n", "7790 83896884-2fd2-4567-9d29-791aa1d45795 path_end 8216 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "1980 main 2019-12-06 16:22:57.484842 219483890 \n", "1981 catalog 2019-12-06 16:23:01.331109 219483890 \n", "1982 catalog 2019-12-06 16:23:48.116617 219483890 \n", "4223 main 2020-01-06 22:10:13.635011 219483890 \n", "4224 catalog 2020-01-06 22:10:15.228575 219483890 \n", "4225 cart 2020-01-06 22:10:42.309028 219483890 \n", "4226 catalog 2020-01-06 22:10:52.255859 219483890 \n", "4227 product1 2020-01-06 22:11:01.709800 219483890 \n", "4228 catalog 2020-01-06 22:11:02.899490 219483890 \n", "4229 catalog 2020-01-06 22:11:28.271366 219483890 \n", "7788 main 2020-02-14 21:04:49.450696 219483890 \n", "7789 catalog 2020-02-14 21:04:51.717127 219483890 \n", "7790 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "code", "execution_count": 39, "id": "eabff395", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 174 }, "executionInfo": { "elapsed": 27, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "c4b22d33", "outputId": "d9188c02-2681-47e5-d9c2-a4218e856890", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
371e79e27f-5b47-43d1-b542-462e6ec7953bpath_start60path_start2019-11-02 07:28:07.28554124427596
381e79e27f-5b47-43d1-b542-462e6ec7953braw60main2019-11-02 07:28:07.28554124427596
398eed4edd-291c-458a-8bc0-28710a7fca6fraw61catalog2019-11-02 07:28:14.31985024427596
4022e66711-1f43-4e34-818d-c814542d5c7araw62catalog2019-11-02 07:29:08.30133324427596
41632f12a8-d984-47b4-8e3b-68764c694383raw63catalog2019-11-02 07:29:41.84839624427596
42632f12a8-d984-47b4-8e3b-68764c694383path_end63path_end2019-11-02 07:29:41.84839624427596
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "37 1e79e27f-5b47-43d1-b542-462e6ec7953b path_start 60 path_start \n", "38 1e79e27f-5b47-43d1-b542-462e6ec7953b raw 60 main \n", "39 8eed4edd-291c-458a-8bc0-28710a7fca6f raw 61 catalog \n", "40 22e66711-1f43-4e34-818d-c814542d5c7a raw 62 catalog \n", "41 632f12a8-d984-47b4-8e3b-68764c694383 raw 63 catalog \n", "42 632f12a8-d984-47b4-8e3b-68764c694383 path_end 63 path_end \n", "\n", " timestamp user_id \n", "37 2019-11-02 07:28:07.285541 24427596 \n", "38 2019-11-02 07:28:07.285541 24427596 \n", "39 2019-11-02 07:28:14.319850 24427596 \n", "40 2019-11-02 07:29:08.301333 24427596 \n", "41 2019-11-02 07:29:41.848396 24427596 \n", "42 2019-11-02 07:29:41.848396 24427596 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 24427596]" ] }, { "cell_type": "code", "execution_count": 40, "id": "a80b552c", "metadata": { "executionInfo": { "elapsed": 12140, "status": "ok", "timestamp": 1683202275190, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "1c967d10", "tags": [] }, "outputs": [], "source": [ "res = stream.truncate_paths(\n", " drop_after='cart',\n", " occurrence_after=\"last\"\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 41, "id": "cb92283f", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "executionInfo": { "elapsed": 33, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "cf8aa845", "outputId": "2098d943-6bb5-4e5e-a817-c452359cfaf7", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890
166f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890
21a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890
3dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890
413f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890
2183bd14ea53-9688-466d-87b2-0494ace4e011raw2096main2019-12-06 16:22:57.484842219483890
21847ef1bf49-c88b-4b40-b424-c7ac53a267a6raw2097catalog2019-12-06 16:23:01.331109219483890
2185e1800047-f1c9-48c7-a49a-80c3a87853e7raw2098catalog2019-12-06 16:23:48.116617219483890
4623d329f192-401c-44c3-8a1e-b0d4da680b6craw4542main2020-01-06 22:10:13.635011219483890
462459e82765-c236-4f5f-b05c-73ce6efdc84braw4543catalog2020-01-06 22:10:15.228575219483890
462585f5bb33-7223-4c41-884a-544d7d1fefbfraw4544cart2020-01-06 22:10:42.309028219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "2 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "3 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "4 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "2183 bd14ea53-9688-466d-87b2-0494ace4e011 raw 2096 \n", "2184 7ef1bf49-c88b-4b40-b424-c7ac53a267a6 raw 2097 \n", "2185 e1800047-f1c9-48c7-a49a-80c3a87853e7 raw 2098 \n", "4623 d329f192-401c-44c3-8a1e-b0d4da680b6c raw 4542 \n", "4624 59e82765-c236-4f5f-b05c-73ce6efdc84b raw 4543 \n", "4625 85f5bb33-7223-4c41-884a-544d7d1fefbf raw 4544 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2183 main 2019-12-06 16:22:57.484842 219483890 \n", "2184 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2185 catalog 2019-12-06 16:23:48.116617 219483890 \n", "4623 main 2020-01-06 22:10:13.635011 219483890 \n", "4624 catalog 2020-01-06 22:10:15.228575 219483890 \n", "4625 cart 2020-01-06 22:10:42.309028 219483890 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "cedba87b", "metadata": { "id": "3e7ed632" }, "source": [ "### Editing processors" ] }, { "cell_type": "markdown", "id": "9f52f455", "metadata": { "id": "SJkjuACbWoPJ" }, "source": [ "#### GroupEvents" ] }, { "cell_type": "markdown", "id": "45320158", "metadata": { "id": "a3ddebb9" }, "source": [ "With ``GroupEvents``, we can group events based on the event name. Suppose\n", "we need to assign a common name ``product`` to events ``product1`` and\n", "``product2``:" ] }, { "cell_type": "code", "execution_count": 42, "id": "c7b5fad5", "metadata": { "executionInfo": { "elapsed": 29, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "206ac0da", "tags": [] }, "outputs": [], "source": [ "def group_events(df, schema):\n", " events_to_group = ['product1', 'product2']\n", " return df[schema.event_name].isin(events_to_group)\n", "\n", "params = {\n", " 'event_name': 'product',\n", " 'func': group_events\n", "}\n", "\n", "res = stream.group_events(**params).to_dataframe()" ] }, { "cell_type": "markdown", "id": "f54324fc", "metadata": { "id": "1tAdSeRbdwZY" }, "source": [ "As we can see, user ``456870964`` now has two ``product`` events\n", "(``event_index=160, 164``) with ``event_type=‘group_alias’``)." ] }, { "cell_type": "code", "execution_count": 43, "id": "ee93a2dc", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "executionInfo": { "elapsed": 28, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "0B4vS0xdd1s1", "outputId": "921e7162-d5b0-488c-bf14-68cc39528be5", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
160ea591a5a-61fd-4f7b-b120-638cfeace55fpath_start129path_start2019-11-03 11:46:55.411714456870964
161ea591a5a-61fd-4f7b-b120-638cfeace55fraw129catalog2019-11-03 11:46:55.411714456870964
162c06e61ab-9b12-470d-a8bd-958321a89d23raw130catalog2019-11-03 11:47:46.131302456870964
163f14a959c-d810-4ba5-a8bf-35ef8d30b7a4raw131catalog2019-11-03 11:47:58.401143456870964
164e535652a-6036-4029-bbe8-829e1a6298b5group_alias132product2019-11-03 11:48:43.243587456870964
1654e175e40-dd60-4b03-84dd-21fd1c089c1eraw133cart2019-11-03 11:49:17.050519456870964
1668e7438a2-7ee7-403f-ba77-8c4a495d2787raw134catalog2019-11-03 11:49:17.516398456870964
1672ec87c38-2d08-4465-8a43-a1db673fba35group_alias135product2019-11-03 11:49:28.927721456870964
16864b3b4ed-4e7b-4605-bf18-1d27f38f0938raw136catalog2019-11-03 11:49:30.788195456870964
16964b3b4ed-4e7b-4605-bf18-1d27f38f0938path_end136path_end2019-11-03 11:49:30.788195456870964
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "160 ea591a5a-61fd-4f7b-b120-638cfeace55f path_start 129 \n", "161 ea591a5a-61fd-4f7b-b120-638cfeace55f raw 129 \n", "162 c06e61ab-9b12-470d-a8bd-958321a89d23 raw 130 \n", "163 f14a959c-d810-4ba5-a8bf-35ef8d30b7a4 raw 131 \n", "164 e535652a-6036-4029-bbe8-829e1a6298b5 group_alias 132 \n", "165 4e175e40-dd60-4b03-84dd-21fd1c089c1e raw 133 \n", "166 8e7438a2-7ee7-403f-ba77-8c4a495d2787 raw 134 \n", "167 2ec87c38-2d08-4465-8a43-a1db673fba35 group_alias 135 \n", "168 64b3b4ed-4e7b-4605-bf18-1d27f38f0938 raw 136 \n", "169 64b3b4ed-4e7b-4605-bf18-1d27f38f0938 path_end 136 \n", "\n", " event timestamp user_id \n", "160 path_start 2019-11-03 11:46:55.411714 456870964 \n", "161 catalog 2019-11-03 11:46:55.411714 456870964 \n", "162 catalog 2019-11-03 11:47:46.131302 456870964 \n", "163 catalog 2019-11-03 11:47:58.401143 456870964 \n", "164 product 2019-11-03 11:48:43.243587 456870964 \n", "165 cart 2019-11-03 11:49:17.050519 456870964 \n", "166 catalog 2019-11-03 11:49:17.516398 456870964 \n", "167 product 2019-11-03 11:49:28.927721 456870964 \n", "168 catalog 2019-11-03 11:49:30.788195 456870964 \n", "169 path_end 2019-11-03 11:49:30.788195 456870964 " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 456870964]" ] }, { "cell_type": "markdown", "id": "b5408068", "metadata": { "id": "vjrKADA9ecuY" }, "source": [ "Previously, both events were named\n", "``product1`` and ``product2`` and had ``raw`` event types:" ] }, { "cell_type": "code", "execution_count": 44, "id": "35576635", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "executionInfo": { "elapsed": 28, "status": "ok", "timestamp": 1683202275192, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "BzLnXqL3d9LM", "outputId": "d0667464-05e9-418e-a56c-aee213afbd3b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
160ea591a5a-61fd-4f7b-b120-638cfeace55fpath_start129path_start2019-11-03 11:46:55.411714456870964
161ea591a5a-61fd-4f7b-b120-638cfeace55fraw129catalog2019-11-03 11:46:55.411714456870964
162c06e61ab-9b12-470d-a8bd-958321a89d23raw130catalog2019-11-03 11:47:46.131302456870964
163f14a959c-d810-4ba5-a8bf-35ef8d30b7a4raw131catalog2019-11-03 11:47:58.401143456870964
164e535652a-6036-4029-bbe8-829e1a6298b5raw132product12019-11-03 11:48:43.243587456870964
1654e175e40-dd60-4b03-84dd-21fd1c089c1eraw133cart2019-11-03 11:49:17.050519456870964
1668e7438a2-7ee7-403f-ba77-8c4a495d2787raw134catalog2019-11-03 11:49:17.516398456870964
1672ec87c38-2d08-4465-8a43-a1db673fba35raw135product22019-11-03 11:49:28.927721456870964
16864b3b4ed-4e7b-4605-bf18-1d27f38f0938raw136catalog2019-11-03 11:49:30.788195456870964
16964b3b4ed-4e7b-4605-bf18-1d27f38f0938path_end136path_end2019-11-03 11:49:30.788195456870964
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "160 ea591a5a-61fd-4f7b-b120-638cfeace55f path_start 129 \n", "161 ea591a5a-61fd-4f7b-b120-638cfeace55f raw 129 \n", "162 c06e61ab-9b12-470d-a8bd-958321a89d23 raw 130 \n", "163 f14a959c-d810-4ba5-a8bf-35ef8d30b7a4 raw 131 \n", "164 e535652a-6036-4029-bbe8-829e1a6298b5 raw 132 \n", "165 4e175e40-dd60-4b03-84dd-21fd1c089c1e raw 133 \n", "166 8e7438a2-7ee7-403f-ba77-8c4a495d2787 raw 134 \n", "167 2ec87c38-2d08-4465-8a43-a1db673fba35 raw 135 \n", "168 64b3b4ed-4e7b-4605-bf18-1d27f38f0938 raw 136 \n", "169 64b3b4ed-4e7b-4605-bf18-1d27f38f0938 path_end 136 \n", "\n", " event timestamp user_id \n", "160 path_start 2019-11-03 11:46:55.411714 456870964 \n", "161 catalog 2019-11-03 11:46:55.411714 456870964 \n", "162 catalog 2019-11-03 11:47:46.131302 456870964 \n", "163 catalog 2019-11-03 11:47:58.401143 456870964 \n", "164 product1 2019-11-03 11:48:43.243587 456870964 \n", "165 cart 2019-11-03 11:49:17.050519 456870964 \n", "166 catalog 2019-11-03 11:49:17.516398 456870964 \n", "167 product2 2019-11-03 11:49:28.927721 456870964 \n", "168 catalog 2019-11-03 11:49:30.788195 456870964 \n", "169 path_end 2019-11-03 11:49:30.788195 456870964 " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe().query('user_id == 456870964')" ] }, { "cell_type": "markdown", "id": "0a5e5e9a-0520-4d10-b488-2a4e3817359f", "metadata": {}, "source": [ "#### GroupEventsBulk" ] }, { "cell_type": "markdown", "id": "7f8c70bc-7a91-4d79-b56f-7154f797716c", "metadata": {}, "source": [ "Similar to `GroupEvent`, but allows to apply multiple grouping rules simultaneously." ] }, { "cell_type": "code", "execution_count": 45, "id": "633457b5-3dee-4fa9-b821-6c2af4824162", "metadata": {}, "outputs": [], "source": [ "res = stream.group_events_bulk(\n", " [\n", " {\n", " 'event_name': 'product',\n", " 'event_type': 'group_product',\n", " 'func': lambda _df: _df['event'].str.startswith('product')\n", " },\n", " {\n", " 'event_name': 'delivery',\n", " 'func': lambda _df: _df['event'].str.startswith('delivery')\n", " }\n", " ]\n", ").to_dataframe()" ] }, { "cell_type": "code", "execution_count": 46, "id": "ffb4633b-daa4-4b3e-8c42-22ca70c0ed36", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
2882833944c49-501e-4c8a-b653-7d7b487855a9path_start23620path_start2020-04-17 23:55:39.85671783322888
2882933944c49-501e-4c8a-b653-7d7b487855a9raw23620catalog2020-04-17 23:55:39.85671783322888
28830b4a67d8c-3e93-4ca3-afb8-a4753206c640group_product23621product2020-04-17 23:55:45.48219683322888
28831e56ba71e-dad3-4aaa-b7bc-124c8366309eraw23622catalog2020-04-17 23:55:45.52225083322888
28832a7f18a56-91af-4c07-9b87-f1d883721ba9group_product23623product2020-04-17 23:55:57.45880983322888
2883380b47b04-2f15-4ce5-b494-44c3137d4211raw23624cart2020-04-17 23:56:48.67434483322888
28834cf3358c7-d5c5-4106-84ee-1842703ece22group_alias23625delivery2020-04-17 23:56:49.38072883322888
2883524580b02-c897-4f25-b6d0-e3a2f441f437group_alias23626delivery2020-04-17 23:56:51.31263283322888
2883624580b02-c897-4f25-b6d0-e3a2f441f437path_end23626path_end2020-04-17 23:56:51.31263283322888
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "28828 33944c49-501e-4c8a-b653-7d7b487855a9 path_start 23620 \n", "28829 33944c49-501e-4c8a-b653-7d7b487855a9 raw 23620 \n", "28830 b4a67d8c-3e93-4ca3-afb8-a4753206c640 group_product 23621 \n", "28831 e56ba71e-dad3-4aaa-b7bc-124c8366309e raw 23622 \n", "28832 a7f18a56-91af-4c07-9b87-f1d883721ba9 group_product 23623 \n", "28833 80b47b04-2f15-4ce5-b494-44c3137d4211 raw 23624 \n", "28834 cf3358c7-d5c5-4106-84ee-1842703ece22 group_alias 23625 \n", "28835 24580b02-c897-4f25-b6d0-e3a2f441f437 group_alias 23626 \n", "28836 24580b02-c897-4f25-b6d0-e3a2f441f437 path_end 23626 \n", "\n", " event timestamp user_id \n", "28828 path_start 2020-04-17 23:55:39.856717 83322888 \n", "28829 catalog 2020-04-17 23:55:39.856717 83322888 \n", "28830 product 2020-04-17 23:55:45.482196 83322888 \n", "28831 catalog 2020-04-17 23:55:45.522250 83322888 \n", "28832 product 2020-04-17 23:55:57.458809 83322888 \n", "28833 cart 2020-04-17 23:56:48.674344 83322888 \n", "28834 delivery 2020-04-17 23:56:49.380728 83322888 \n", "28835 delivery 2020-04-17 23:56:51.312632 83322888 \n", "28836 path_end 2020-04-17 23:56:51.312632 83322888 " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 83322888]" ] }, { "cell_type": "code", "execution_count": 47, "id": "fee10ca7-cf4d-41ae-ab5f-575f63c907e1", "metadata": {}, "outputs": [], "source": [ "res = stream.group_events_bulk(\n", " {\n", " 'product': lambda _df: _df['event'].str.startswith('product'),\n", " 'delivery': lambda _df: _df['event'].str.startswith('delivery')\n", " }\n", ").to_dataframe()" ] }, { "cell_type": "code", "execution_count": 48, "id": "f16c9400-285b-4bff-a40d-118e1f61ddf3", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
2882833944c49-501e-4c8a-b653-7d7b487855a9path_start23620path_start2020-04-17 23:55:39.85671783322888
2882933944c49-501e-4c8a-b653-7d7b487855a9raw23620catalog2020-04-17 23:55:39.85671783322888
28830b4a67d8c-3e93-4ca3-afb8-a4753206c640group_alias23621product2020-04-17 23:55:45.48219683322888
28831e56ba71e-dad3-4aaa-b7bc-124c8366309eraw23622catalog2020-04-17 23:55:45.52225083322888
28832a7f18a56-91af-4c07-9b87-f1d883721ba9group_alias23623product2020-04-17 23:55:57.45880983322888
2883380b47b04-2f15-4ce5-b494-44c3137d4211raw23624cart2020-04-17 23:56:48.67434483322888
28834cf3358c7-d5c5-4106-84ee-1842703ece22group_alias23625delivery2020-04-17 23:56:49.38072883322888
2883524580b02-c897-4f25-b6d0-e3a2f441f437group_alias23626delivery2020-04-17 23:56:51.31263283322888
2883624580b02-c897-4f25-b6d0-e3a2f441f437path_end23626path_end2020-04-17 23:56:51.31263283322888
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "28828 33944c49-501e-4c8a-b653-7d7b487855a9 path_start 23620 \n", "28829 33944c49-501e-4c8a-b653-7d7b487855a9 raw 23620 \n", "28830 b4a67d8c-3e93-4ca3-afb8-a4753206c640 group_alias 23621 \n", "28831 e56ba71e-dad3-4aaa-b7bc-124c8366309e raw 23622 \n", "28832 a7f18a56-91af-4c07-9b87-f1d883721ba9 group_alias 23623 \n", "28833 80b47b04-2f15-4ce5-b494-44c3137d4211 raw 23624 \n", "28834 cf3358c7-d5c5-4106-84ee-1842703ece22 group_alias 23625 \n", "28835 24580b02-c897-4f25-b6d0-e3a2f441f437 group_alias 23626 \n", "28836 24580b02-c897-4f25-b6d0-e3a2f441f437 path_end 23626 \n", "\n", " event timestamp user_id \n", "28828 path_start 2020-04-17 23:55:39.856717 83322888 \n", "28829 catalog 2020-04-17 23:55:39.856717 83322888 \n", "28830 product 2020-04-17 23:55:45.482196 83322888 \n", "28831 catalog 2020-04-17 23:55:45.522250 83322888 \n", "28832 product 2020-04-17 23:55:57.458809 83322888 \n", "28833 cart 2020-04-17 23:56:48.674344 83322888 \n", "28834 delivery 2020-04-17 23:56:49.380728 83322888 \n", "28835 delivery 2020-04-17 23:56:51.312632 83322888 \n", "28836 path_end 2020-04-17 23:56:51.312632 83322888 " ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 83322888]" ] }, { "cell_type": "markdown", "id": "4d87c941", "metadata": { "id": "4TsI3HFLg7Z_" }, "source": [ "#### CollapseLoops" ] }, { "cell_type": "code", "execution_count": 49, "id": "b3c8e7a7", "metadata": { "executionInfo": { "elapsed": 672, "status": "ok", "timestamp": 1683202275838, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "45a48d47", "tags": [] }, "outputs": [], "source": [ "res = stream.collapse_loops(suffix='loop', time_agg='max').to_dataframe()" ] }, { "cell_type": "code", "execution_count": 50, "id": "de01d74d", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 454, "status": "ok", "timestamp": 1683202276288, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "LYnmqr3MjcnL", "outputId": "3072b66c-2638-4a1b-b338-b747fa61d2be", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
4015167dd3a9-9b87-481a-9cc6-8c7ee9d5917cpath_start3327path_start2019-12-24 12:58:04.8912492112338
4016167dd3a9-9b87-481a-9cc6-8c7ee9d5917craw3327main2019-12-24 12:58:04.8912492112338
4017c8afeb3b-f12d-4f41-941d-81634adf48d8raw3328catalog2019-12-24 12:58:08.0969232112338
40189c9874fa-5728-4c2c-b9f6-4a9c75027077raw3329catalog2019-12-24 12:58:16.4295522112338
40195efb4a14-a77b-43b1-b06c-286ea2b0a745raw3330catalog2019-12-24 12:58:44.9651042112338
40205a01f957-fb07-449e-8121-fdbe1de4e0a5raw3331main2019-12-24 12:58:52.9848532112338
40215a01f957-fb07-449e-8121-fdbe1de4e0a5path_end3331path_end2019-12-24 12:58:52.9848532112338
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "4015 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c path_start 3327 \n", "4016 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c raw 3327 \n", "4017 c8afeb3b-f12d-4f41-941d-81634adf48d8 raw 3328 \n", "4018 9c9874fa-5728-4c2c-b9f6-4a9c75027077 raw 3329 \n", "4019 5efb4a14-a77b-43b1-b06c-286ea2b0a745 raw 3330 \n", "4020 5a01f957-fb07-449e-8121-fdbe1de4e0a5 raw 3331 \n", "4021 5a01f957-fb07-449e-8121-fdbe1de4e0a5 path_end 3331 \n", "\n", " event timestamp user_id \n", "4015 path_start 2019-12-24 12:58:04.891249 2112338 \n", "4016 main 2019-12-24 12:58:04.891249 2112338 \n", "4017 catalog 2019-12-24 12:58:08.096923 2112338 \n", "4018 catalog 2019-12-24 12:58:16.429552 2112338 \n", "4019 catalog 2019-12-24 12:58:44.965104 2112338 \n", "4020 main 2019-12-24 12:58:52.984853 2112338 \n", "4021 path_end 2019-12-24 12:58:52.984853 2112338 " ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe().query('user_id == 2112338')" ] }, { "cell_type": "code", "execution_count": 51, "id": "b0880e7c", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202276289, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "u5jHmoWejF4P", "outputId": "c8626cad-9de9-46aa-873c-176c2f13604b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
3141167dd3a9-9b87-481a-9cc6-8c7ee9d5917cpath_start3327path_start2019-12-24 12:58:04.8912492112338
3142167dd3a9-9b87-481a-9cc6-8c7ee9d5917craw3327main2019-12-24 12:58:04.8912492112338
3143b3446101-0bf2-4aa7-a275-71b02f0dcd15group_alias3330catalog_loop2019-12-24 12:58:44.9651042112338
31445a01f957-fb07-449e-8121-fdbe1de4e0a5raw3331main2019-12-24 12:58:52.9848532112338
31455a01f957-fb07-449e-8121-fdbe1de4e0a5path_end3331path_end2019-12-24 12:58:52.9848532112338
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "3141 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c path_start 3327 \n", "3142 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c raw 3327 \n", "3143 b3446101-0bf2-4aa7-a275-71b02f0dcd15 group_alias 3330 \n", "3144 5a01f957-fb07-449e-8121-fdbe1de4e0a5 raw 3331 \n", "3145 5a01f957-fb07-449e-8121-fdbe1de4e0a5 path_end 3331 \n", "\n", " event timestamp user_id \n", "3141 path_start 2019-12-24 12:58:04.891249 2112338 \n", "3142 main 2019-12-24 12:58:04.891249 2112338 \n", "3143 catalog_loop 2019-12-24 12:58:44.965104 2112338 \n", "3144 main 2019-12-24 12:58:52.984853 2112338 \n", "3145 path_end 2019-12-24 12:58:52.984853 2112338 " ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 2112338]" ] }, { "cell_type": "code", "execution_count": 52, "id": "c77b5469", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 868, "status": "ok", "timestamp": 1683202277150, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "0ea8e922", "outputId": "ad6102bb-7f8a-42a9-a172-ade7edfba88c", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
3151167dd3a9-9b87-481a-9cc6-8c7ee9d5917cpath_start3327path_start2019-12-24 12:58:04.8912490002112338
3152167dd3a9-9b87-481a-9cc6-8c7ee9d5917craw3327main2019-12-24 12:58:04.8912490002112338
315390da9bbe-fea4-4959-9d41-69f6b8f7820bgroup_alias3329catalog_loop_32019-12-24 12:58:23.1638597122112338
31545a01f957-fb07-449e-8121-fdbe1de4e0a5raw3331main2019-12-24 12:58:52.9848530002112338
31555a01f957-fb07-449e-8121-fdbe1de4e0a5path_end3331path_end2019-12-24 12:58:52.9848530002112338
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "3151 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c path_start 3327 \n", "3152 167dd3a9-9b87-481a-9cc6-8c7ee9d5917c raw 3327 \n", "3153 90da9bbe-fea4-4959-9d41-69f6b8f7820b group_alias 3329 \n", "3154 5a01f957-fb07-449e-8121-fdbe1de4e0a5 raw 3331 \n", "3155 5a01f957-fb07-449e-8121-fdbe1de4e0a5 path_end 3331 \n", "\n", " event timestamp user_id \n", "3151 path_start 2019-12-24 12:58:04.891249000 2112338 \n", "3152 main 2019-12-24 12:58:04.891249000 2112338 \n", "3153 catalog_loop_3 2019-12-24 12:58:23.163859712 2112338 \n", "3154 main 2019-12-24 12:58:52.984853000 2112338 \n", "3155 path_end 2019-12-24 12:58:52.984853000 2112338 " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = {\n", " 'suffix': 'count',\n", " 'time_agg': 'mean'\n", "}\n", "\n", "res = stream.collapse_loops(**params).to_dataframe()\n", "res[res['user_id'] == 2112338]" ] }, { "cell_type": "markdown", "id": "69fa8043-96b5-4a6b-95fc-e8d98de4c2bd", "metadata": {}, "source": [ "## Pipe" ] }, { "cell_type": "code", "execution_count": 53, "id": "29569b11-228d-4bd7-978d-a8e104b7b55c", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idnew_column
066f48c7c-bb89-4de8-88d9-4c8226a3dfe0path_start0path_start2019-11-01 17:59:13.273932219483890100
166f48c7c-bb89-4de8-88d9-4c8226a3dfe0raw0catalog2019-11-01 17:59:13.273932219483890100
21a1748eb-0063-4d9d-aaea-ba564ad70b1draw1product12019-11-01 17:59:28.459271219483890100
3dd691ad2-5ab3-4450-83d8-21485a565abbraw2cart2019-11-01 17:59:29.502214219483890100
413f2b0a9-b2f4-4ffa-b0d1-446f12f6c570raw3catalog2019-11-01 17:59:32.557029219483890100
........................
397801ab7adfe-6b74-42c5-a2ee-b5ef69348719raw32279catalog2020-04-29 12:47:40.975732501098384100
3978142ff0327-d17e-42c7-b116-66c447f46848raw32280catalog2020-04-29 12:48:01.809577501098384100
39782d79171c2-8164-4e74-b306-0c531583716eraw32281main2020-04-29 12:48:01.938488501098384100
3978391d3aca3-8031-4782-80ec-0f0abd1d68a1raw32282catalog2020-04-29 12:48:06.595390501098384100
3978491d3aca3-8031-4782-80ec-0f0abd1d68a1path_end32282path_end2020-04-29 12:48:06.595390501098384100
\n", "

39785 rows × 7 columns

\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 path_start 0 \n", "1 66f48c7c-bb89-4de8-88d9-4c8226a3dfe0 raw 0 \n", "2 1a1748eb-0063-4d9d-aaea-ba564ad70b1d raw 1 \n", "3 dd691ad2-5ab3-4450-83d8-21485a565abb raw 2 \n", "4 13f2b0a9-b2f4-4ffa-b0d1-446f12f6c570 raw 3 \n", "... ... ... ... \n", "39780 1ab7adfe-6b74-42c5-a2ee-b5ef69348719 raw 32279 \n", "39781 42ff0327-d17e-42c7-b116-66c447f46848 raw 32280 \n", "39782 d79171c2-8164-4e74-b306-0c531583716e raw 32281 \n", "39783 91d3aca3-8031-4782-80ec-0f0abd1d68a1 raw 32282 \n", "39784 91d3aca3-8031-4782-80ec-0f0abd1d68a1 path_end 32282 \n", "\n", " event timestamp user_id new_column \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 100 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 100 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 100 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 100 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 100 \n", "... ... ... ... ... \n", "39780 catalog 2020-04-29 12:47:40.975732 501098384 100 \n", "39781 catalog 2020-04-29 12:48:01.809577 501098384 100 \n", "39782 main 2020-04-29 12:48:01.938488 501098384 100 \n", "39783 catalog 2020-04-29 12:48:06.595390 501098384 100 \n", "39784 path_end 2020-04-29 12:48:06.595390 501098384 100 \n", "\n", "[39785 rows x 7 columns]" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.pipe(lambda _df: _df.assign(new_column=100))\\\n", " .to_dataframe()" ] } ], "metadata": { "colab": { "name": "", "provenance": [ { "file_id": "1QlOf2MtJ3lE9cTOlTtHkPY4npVLdxFhB", "timestamp": 1671649889781 } ], "toc_visible": true, "version": "" }, "kernelspec": { "display_name": "rete", "language": "python", "name": "rete" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }