{ "cells": [ { "cell_type": "markdown", "id": "bff3d155", "metadata": { "id": "2K4KCsDcK85_" }, "source": [ "# Data processors user guide" ] }, { "cell_type": "markdown", "id": "a24490e7", "metadata": { "id": "TdnGPZWzSxZe" }, "source": [ "The full text of [Data processors](https://doc.retentioneering.com/release3/doc/user_guides/dataprocessors.html) user guide is available on the retentioneering website." ] }, { "cell_type": "markdown", "id": "53f0f536", "metadata": { "id": "0666939c" }, "source": [ "## Prerequisites\n", "\n", "Run this cell to prepare the environment. This step is obligatory." ] }, { "cell_type": "code", "execution_count": null, "id": "5cd3f14f", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 41572, "status": "ok", "timestamp": 1683202215227, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "eKWKwFpiRj0R", "outputId": "ffd7455c-41b8-496e-da41-319bec80d8bd", "pycharm": { "is_executing": true } }, "outputs": [], "source": [ "!pip install retentioneering" ] }, { "cell_type": "markdown", "id": "65c92e18", "metadata": { "id": "Zm2DAiT_Sa7L" }, "source": [ "## Creating an eventstream" ] }, { "cell_type": "code", "execution_count": 1, "id": "115e20e1", "metadata": { "executionInfo": { "elapsed": 364, "status": "ok", "timestamp": 1683202244341, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "KK0JITft614a", "pycharm": { "is_executing": true }, "tags": [] }, "outputs": [], "source": [ "import pandas as pd\n", "from retentioneering import datasets\n", "from retentioneering.eventstream import Eventstream\n", "\n", "stream = datasets.load_simple_shop()" ] }, { "cell_type": "markdown", "id": "5dba6a11", "metadata": { "id": "Bvb0h5RAvdWe" }, "source": [ "## What is a data processor?\n" ] }, { "cell_type": "markdown", "id": "9bd6683b", "metadata": { "id": "sdQf1DHIdds0" }, "source": [ "## Helpers and chaining usage" ] }, { "cell_type": "code", "execution_count": 2, "id": "d4236dbf", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 519 }, "executionInfo": { "elapsed": 2691, "status": "ok", "timestamp": 1683202247024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "uXsQCXxAIlOT", "outputId": "2f4179e2-bce0-45dc-a046-3b2c544aedda", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
0ecec01ad-6f60-46b5-b125-69a7ca27a685path_start0path_start2019-11-01 17:59:13.273932219483890219483890_1
12619c384-1a08-460f-9e3b-f52fe46fc183session_start0session_start2019-11-01 17:59:13.273932219483890219483890_1
2ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890219483890_1
39ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890219483890_1
4a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890219483890_1
5ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890219483890_1
686f7a2b0-1475-4cc8-a83d-c2f99121ae17session_end3session_end2019-11-01 17:59:32.557029219483890219483890_1
33927498e18f-8762-440d-a5ac-f6165f8e2058session_start2096session_start2019-12-06 16:22:57.484842219483890219483890_2
33939735f8a7-baac-4a90-b390-9b378c0ed4e3raw2096main2019-12-06 16:22:57.484842219483890219483890_2
3394d11c3f44-5472-46ba-83b9-b11d568be624raw2097catalog2019-12-06 16:23:01.331109219483890219483890_2
3395562884ac-4ad4-4773-b22d-852efb4d7961raw2098catalog2019-12-06 16:23:48.116617219483890219483890_2
33963ced471f-7194-4876-bcd2-907a053ed648session_end2098session_end2019-12-06 16:23:48.116617219483890219483890_2
7311b111b4f8-f98a-4fa6-b471-30bb14664006session_start4542session_start2020-01-06 22:10:13.635011219483890219483890_3
73126a095bf1-f34c-4ba9-b933-9a470e28d4f4raw4542main2020-01-06 22:10:13.635011219483890219483890_3
731311705e29-00ce-4da3-9c4d-6bc556a0acadraw4543catalog2020-01-06 22:10:15.228575219483890219483890_3
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 ecec01ad-6f60-46b5-b125-69a7ca27a685 path_start 0 \n", "1 2619c384-1a08-460f-9e3b-f52fe46fc183 session_start 0 \n", "2 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 \n", "3 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 \n", "4 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 \n", "5 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 \n", "6 86f7a2b0-1475-4cc8-a83d-c2f99121ae17 session_end 3 \n", "3392 7498e18f-8762-440d-a5ac-f6165f8e2058 session_start 2096 \n", "3393 9735f8a7-baac-4a90-b390-9b378c0ed4e3 raw 2096 \n", "3394 d11c3f44-5472-46ba-83b9-b11d568be624 raw 2097 \n", "3395 562884ac-4ad4-4773-b22d-852efb4d7961 raw 2098 \n", "3396 3ced471f-7194-4876-bcd2-907a053ed648 session_end 2098 \n", "7311 b111b4f8-f98a-4fa6-b471-30bb14664006 session_start 4542 \n", "7312 6a095bf1-f34c-4ba9-b933-9a470e28d4f4 raw 4542 \n", "7313 11705e29-00ce-4da3-9c4d-6bc556a0acad raw 4543 \n", "\n", " event timestamp user_id session_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "1 session_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "2 catalog 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "3 product1 2019-11-01 17:59:28.459271 219483890 219483890_1 \n", "4 cart 2019-11-01 17:59:29.502214 219483890 219483890_1 \n", "5 catalog 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "6 session_end 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "3392 session_start 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3393 main 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "3394 catalog 2019-12-06 16:23:01.331109 219483890 219483890_2 \n", "3395 catalog 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "3396 session_end 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "7311 session_start 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7312 main 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "7313 catalog 2020-01-06 22:10:15.228575 219483890 219483890_3 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream\\\n", " .add_start_end_events()\\\n", " .split_sessions(timeout=(10, 'm'))\\\n", " .to_dataframe()\n", "res[res['user_id'] == 219483890].head(15)" ] }, { "cell_type": "markdown", "id": "3747ff7e", "metadata": { "id": "Zuf43I0-u36P" }, "source": [ "## Data processors library" ] }, { "cell_type": "markdown", "id": "a0a29db4", "metadata": { "id": "829dd269" }, "source": [ "### Adding processors" ] }, { "cell_type": "markdown", "id": "4ca42902", "metadata": { "id": "pkJG5M7Y21m8" }, "source": [ "#### AddStartEndEvents" ] }, { "cell_type": "code", "execution_count": 3, "id": "a7308a80", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 613 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202247025, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "54409a0d", "outputId": "9a47db76-1b85-4b8b-ef12-807d3b6b098b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0ecec01ad-6f60-46b5-b125-69a7ca27a685path_start0path_start2019-11-01 17:59:13.273932219483890
1ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890
29ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890
3a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890
4ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890
25669735f8a7-baac-4a90-b390-9b378c0ed4e3raw2096main2019-12-06 16:22:57.484842219483890
2567d11c3f44-5472-46ba-83b9-b11d568be624raw2097catalog2019-12-06 16:23:01.331109219483890
2568562884ac-4ad4-4773-b22d-852efb4d7961raw2098catalog2019-12-06 16:23:48.116617219483890
54276a095bf1-f34c-4ba9-b933-9a470e28d4f4raw4542main2020-01-06 22:10:13.635011219483890
542811705e29-00ce-4da3-9c4d-6bc556a0acadraw4543catalog2020-01-06 22:10:15.228575219483890
54293648646a-8797-4563-9d2f-6ff2dc8290fbraw4544cart2020-01-06 22:10:42.309028219483890
5430fdda68c6-d6f7-4d5e-9c80-2c4fcae57850raw4545catalog2020-01-06 22:10:52.255859219483890
5431d8c9a53e-44b6-4054-a878-358d58e3588fraw4546product12020-01-06 22:11:01.709800219483890
5432d3c84c18-0465-4a37-b2f5-ca5529faa0eeraw4547catalog2020-01-06 22:11:02.899490219483890
5433e70ef300-f288-49b9-b079-f4c8f1e1216craw4548catalog2020-01-06 22:11:28.271366219483890
96891a349c5e-a8bd-4d99-8b5b-5095a3da809braw8215main2020-02-14 21:04:49.450696219483890
9690f0983ad4-e199-4ced-bd98-acf161ef60b5raw8216catalog2020-02-14 21:04:51.717127219483890
9691f0983ad4-e199-4ced-bd98-acf161ef60b5path_end8216path_end2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 ecec01ad-6f60-46b5-b125-69a7ca27a685 path_start 0 \n", "1 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 \n", "2 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 \n", "3 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 \n", "4 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 \n", "2566 9735f8a7-baac-4a90-b390-9b378c0ed4e3 raw 2096 \n", "2567 d11c3f44-5472-46ba-83b9-b11d568be624 raw 2097 \n", "2568 562884ac-4ad4-4773-b22d-852efb4d7961 raw 2098 \n", "5427 6a095bf1-f34c-4ba9-b933-9a470e28d4f4 raw 4542 \n", "5428 11705e29-00ce-4da3-9c4d-6bc556a0acad raw 4543 \n", "5429 3648646a-8797-4563-9d2f-6ff2dc8290fb raw 4544 \n", "5430 fdda68c6-d6f7-4d5e-9c80-2c4fcae57850 raw 4545 \n", "5431 d8c9a53e-44b6-4054-a878-358d58e3588f raw 4546 \n", "5432 d3c84c18-0465-4a37-b2f5-ca5529faa0ee raw 4547 \n", "5433 e70ef300-f288-49b9-b079-f4c8f1e1216c raw 4548 \n", "9689 1a349c5e-a8bd-4d99-8b5b-5095a3da809b raw 8215 \n", "9690 f0983ad4-e199-4ced-bd98-acf161ef60b5 raw 8216 \n", "9691 f0983ad4-e199-4ced-bd98-acf161ef60b5 path_end 8216 \n", "\n", " event timestamp user_id \n", "0 path_start 2019-11-01 17:59:13.273932 219483890 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2566 main 2019-12-06 16:22:57.484842 219483890 \n", "2567 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2568 catalog 2019-12-06 16:23:48.116617 219483890 \n", "5427 main 2020-01-06 22:10:13.635011 219483890 \n", "5428 catalog 2020-01-06 22:10:15.228575 219483890 \n", "5429 cart 2020-01-06 22:10:42.309028 219483890 \n", "5430 catalog 2020-01-06 22:10:52.255859 219483890 \n", "5431 product1 2020-01-06 22:11:01.709800 219483890 \n", "5432 catalog 2020-01-06 22:11:02.899490 219483890 \n", "5433 catalog 2020-01-06 22:11:28.271366 219483890 \n", "9689 main 2020-02-14 21:04:49.450696 219483890 \n", "9690 catalog 2020-02-14 21:04:51.717127 219483890 \n", "9691 path_end 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream.add_start_end_events().to_dataframe()\n", "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "537f5f49", "metadata": { "id": "eCd-ueYXPtiV" }, "source": [ "#### SplitSessions" ] }, { "cell_type": "markdown", "id": "6cdc5192-8a43-452d-a548-6d59b7460931", "metadata": {}, "source": [ "##### timeout delimiter" ] }, { "cell_type": "code", "execution_count": 4, "id": "557409a4", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 802 }, "executionInfo": { "elapsed": 721, "status": "ok", "timestamp": 1683202247740, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "084e702d", "outputId": "f27858c1-8777-4832-df3b-c2cb5b1eaea3", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_idsession_id
011d2a4e8-a65a-4d8c-b6b1-1ef84207cd64session_start0session_start2019-11-01 17:59:13.273932219483890219483890_1
1ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890219483890_1
29ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890219483890_1
3a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890219483890_1
4ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890219483890_1
5e263d4f9-f77f-457d-83a5-9a7636286ba2session_end3session_end2019-11-01 17:59:32.557029219483890219483890_1
2922ab3529ff-447f-46d9-bc6c-4bd54512f3dfsession_start2096session_start2019-12-06 16:22:57.484842219483890219483890_2
29239735f8a7-baac-4a90-b390-9b378c0ed4e3raw2096main2019-12-06 16:22:57.484842219483890219483890_2
2924d11c3f44-5472-46ba-83b9-b11d568be624raw2097catalog2019-12-06 16:23:01.331109219483890219483890_2
2925562884ac-4ad4-4773-b22d-852efb4d7961raw2098catalog2019-12-06 16:23:48.116617219483890219483890_2
2926463544ba-f814-4bb4-a1bc-61656642b6dbsession_end2098session_end2019-12-06 16:23:48.116617219483890219483890_2
6426f2d9c9b3-d260-4006-9476-f19261fb5d35session_start4542session_start2020-01-06 22:10:13.635011219483890219483890_3
64276a095bf1-f34c-4ba9-b933-9a470e28d4f4raw4542main2020-01-06 22:10:13.635011219483890219483890_3
642811705e29-00ce-4da3-9c4d-6bc556a0acadraw4543catalog2020-01-06 22:10:15.228575219483890219483890_3
64293648646a-8797-4563-9d2f-6ff2dc8290fbraw4544cart2020-01-06 22:10:42.309028219483890219483890_3
6430fdda68c6-d6f7-4d5e-9c80-2c4fcae57850raw4545catalog2020-01-06 22:10:52.255859219483890219483890_3
6431d8c9a53e-44b6-4054-a878-358d58e3588fraw4546product12020-01-06 22:11:01.709800219483890219483890_3
6432d3c84c18-0465-4a37-b2f5-ca5529faa0eeraw4547catalog2020-01-06 22:11:02.899490219483890219483890_3
6433e70ef300-f288-49b9-b079-f4c8f1e1216craw4548catalog2020-01-06 22:11:28.271366219483890219483890_3
64345a8316e0-9225-4b99-9ccf-29325a2dd2b0session_end4548session_end2020-01-06 22:11:28.271366219483890219483890_3
11785703836c4-86b5-4979-b633-0ee403561a8csession_start8215session_start2020-02-14 21:04:49.450696219483890219483890_4
117861a349c5e-a8bd-4d99-8b5b-5095a3da809braw8215main2020-02-14 21:04:49.450696219483890219483890_4
11787f0983ad4-e199-4ced-bd98-acf161ef60b5raw8216catalog2020-02-14 21:04:51.717127219483890219483890_4
117889a1cf9aa-76a5-4a01-aab1-cc3b9750576bsession_end8216session_end2020-02-14 21:04:51.717127219483890219483890_4
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 11d2a4e8-a65a-4d8c-b6b1-1ef84207cd64 session_start 0 \n", "1 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 \n", "2 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 \n", "3 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 \n", "4 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 \n", "5 e263d4f9-f77f-457d-83a5-9a7636286ba2 session_end 3 \n", "2922 ab3529ff-447f-46d9-bc6c-4bd54512f3df session_start 2096 \n", "2923 9735f8a7-baac-4a90-b390-9b378c0ed4e3 raw 2096 \n", "2924 d11c3f44-5472-46ba-83b9-b11d568be624 raw 2097 \n", "2925 562884ac-4ad4-4773-b22d-852efb4d7961 raw 2098 \n", "2926 463544ba-f814-4bb4-a1bc-61656642b6db session_end 2098 \n", "6426 f2d9c9b3-d260-4006-9476-f19261fb5d35 session_start 4542 \n", "6427 6a095bf1-f34c-4ba9-b933-9a470e28d4f4 raw 4542 \n", "6428 11705e29-00ce-4da3-9c4d-6bc556a0acad raw 4543 \n", "6429 3648646a-8797-4563-9d2f-6ff2dc8290fb raw 4544 \n", "6430 fdda68c6-d6f7-4d5e-9c80-2c4fcae57850 raw 4545 \n", "6431 d8c9a53e-44b6-4054-a878-358d58e3588f raw 4546 \n", "6432 d3c84c18-0465-4a37-b2f5-ca5529faa0ee raw 4547 \n", "6433 e70ef300-f288-49b9-b079-f4c8f1e1216c raw 4548 \n", "6434 5a8316e0-9225-4b99-9ccf-29325a2dd2b0 session_end 4548 \n", "11785 703836c4-86b5-4979-b633-0ee403561a8c session_start 8215 \n", "11786 1a349c5e-a8bd-4d99-8b5b-5095a3da809b raw 8215 \n", "11787 f0983ad4-e199-4ced-bd98-acf161ef60b5 raw 8216 \n", "11788 9a1cf9aa-76a5-4a01-aab1-cc3b9750576b session_end 8216 \n", "\n", " event timestamp user_id session_id \n", "0 session_start 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "1 catalog 2019-11-01 17:59:13.273932 219483890 219483890_1 \n", "2 product1 2019-11-01 17:59:28.459271 219483890 219483890_1 \n", "3 cart 2019-11-01 17:59:29.502214 219483890 219483890_1 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "5 session_end 2019-11-01 17:59:32.557029 219483890 219483890_1 \n", "2922 session_start 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "2923 main 2019-12-06 16:22:57.484842 219483890 219483890_2 \n", "2924 catalog 2019-12-06 16:23:01.331109 219483890 219483890_2 \n", "2925 catalog 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "2926 session_end 2019-12-06 16:23:48.116617 219483890 219483890_2 \n", "6426 session_start 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "6427 main 2020-01-06 22:10:13.635011 219483890 219483890_3 \n", "6428 catalog 2020-01-06 22:10:15.228575 219483890 219483890_3 \n", "6429 cart 2020-01-06 22:10:42.309028 219483890 219483890_3 \n", "6430 catalog 2020-01-06 22:10:52.255859 219483890 219483890_3 \n", "6431 product1 2020-01-06 22:11:01.709800 219483890 219483890_3 \n", "6432 catalog 2020-01-06 22:11:02.899490 219483890 219483890_3 \n", "6433 catalog 2020-01-06 22:11:28.271366 219483890 219483890_3 \n", "6434 session_end 2020-01-06 22:11:28.271366 219483890 219483890_3 \n", "11785 session_start 2020-02-14 21:04:49.450696 219483890 219483890_4 \n", "11786 main 2020-02-14 21:04:49.450696 219483890 219483890_4 \n", "11787 catalog 2020-02-14 21:04:51.717127 219483890 219483890_4 \n", "11788 session_end 2020-02-14 21:04:51.717127 219483890 219483890_4 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = stream.split_sessions(timeout=(10, 'm')).to_dataframe()\n", "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "3a675a23-c635-4ad4-b850-f62e337b6693", "metadata": {}, "source": [ "##### single delimiting event" ] }, { "cell_type": "code", "execution_count": 5, "id": "d7f91794-b9c5-4cd6-9fe4-e833b844beed", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_id
0111session_start2023-01-01 00:00:00111_1
1111A2023-01-01 00:00:01111_1
2111B2023-01-01 00:00:02111_1
3111session_end2023-01-01 00:00:02111_1
4111session_start2023-01-01 00:00:04111_2
5111C2023-01-01 00:00:04111_2
6111session_end2023-01-01 00:00:04111_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id\n", "0 111 session_start 2023-01-01 00:00:00 111_1\n", "1 111 A 2023-01-01 00:00:01 111_1\n", "2 111 B 2023-01-01 00:00:02 111_1\n", "3 111 session_end 2023-01-01 00:00:02 111_1\n", "4 111 session_start 2023-01-01 00:00:04 111_2\n", "5 111 C 2023-01-01 00:00:04 111_2\n", "6 111 session_end 2023-01-01 00:00:04 111_2" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"session_delimiter\", \"2023-01-01 00:00:00\"],\n", " [111, \"A\", \"2023-01-01 00:00:01\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\"],\n", " [111, \"session_delimiter\", \"2023-01-01 00:00:04\"],\n", " [111, \"C\", \"2023-01-01 00:00:04\"],\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\"]\n", ")\n", "Eventstream(df)\\\n", " .split_sessions(delimiter_events=['session_delimiter'])\\\n", " .to_dataframe()\\\n", " .sort_values(['user_id', 'event_index'])\\\n", " [['user_id', 'event', 'timestamp', 'session_id']]" ] }, { "cell_type": "markdown", "id": "c40c7e80-24d7-483c-ae27-e3a7340695a2", "metadata": {}, "source": [ "##### paired delimiting event" ] }, { "cell_type": "code", "execution_count": 6, "id": "306a2844-6af2-43f8-8b0f-7416be1c71f6", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_id
0111session_start2023-01-01 00:00:00111_1
1111A2023-01-01 00:00:01111_1
2111B2023-01-01 00:00:02111_1
3111session_end2023-01-01 00:00:02111_1
4111session_start2023-01-01 00:00:04111_2
5111C2023-01-01 00:00:04111_2
6111session_end2023-01-01 00:00:04111_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id\n", "0 111 session_start 2023-01-01 00:00:00 111_1\n", "1 111 A 2023-01-01 00:00:01 111_1\n", "2 111 B 2023-01-01 00:00:02 111_1\n", "3 111 session_end 2023-01-01 00:00:02 111_1\n", "4 111 session_start 2023-01-01 00:00:04 111_2\n", "5 111 C 2023-01-01 00:00:04 111_2\n", "6 111 session_end 2023-01-01 00:00:04 111_2" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"custom_start\", \"2023-01-01 00:00:00\"],\n", " [111, \"A\", \"2023-01-01 00:00:01\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\"],\n", " [111, \"custom_end\", \"2023-01-01 00:00:02\"],\n", " [111, \"custom_start\", \"2023-01-01 00:00:04\"],\n", " [111, \"C\", \"2023-01-01 00:00:04\"],\n", " [111, \"custom_end\", \"2023-01-01 00:00:04\"]\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\"]\n", ")\n", "dummy_stream = Eventstream(df)\n", "dummy_stream.split_sessions(delimiter_events=['custom_start', 'custom_end'])\\\n", " .to_dataframe()\\\n", " .sort_values(['user_id', 'event_index'])\\\n", " [['user_id', 'event', 'timestamp', 'session_id']]" ] }, { "cell_type": "markdown", "id": "2bf003c1-01f6-4013-8fc3-2c10582daf3c", "metadata": {}, "source": [ "##### custom session column" ] }, { "cell_type": "code", "execution_count": 7, "id": "5554d4dd-cbff-4045-8ee7-e2900d050135", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
user_ideventtimestampsession_idcustom_ses_id
0111session_start2023-01-01 00:00:01111_1session_1
1111A2023-01-01 00:00:01111_1session_1
2111B2023-01-01 00:00:02111_1session_1
3111session_end2023-01-01 00:00:02111_1session_1
4111session_start2023-01-01 00:00:03111_2session_2
5111C2023-01-01 00:00:03111_2session_2
6111D2023-01-01 00:00:04111_2session_2
7111session_end2023-01-01 00:00:04111_2session_2
\n", "
" ], "text/plain": [ " user_id event timestamp session_id custom_ses_id\n", "0 111 session_start 2023-01-01 00:00:01 111_1 session_1\n", "1 111 A 2023-01-01 00:00:01 111_1 session_1\n", "2 111 B 2023-01-01 00:00:02 111_1 session_1\n", "3 111 session_end 2023-01-01 00:00:02 111_1 session_1\n", "4 111 session_start 2023-01-01 00:00:03 111_2 session_2\n", "5 111 C 2023-01-01 00:00:03 111_2 session_2\n", "6 111 D 2023-01-01 00:00:04 111_2 session_2\n", "7 111 session_end 2023-01-01 00:00:04 111_2 session_2" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.DataFrame(\n", " [\n", " [111, \"A\", \"2023-01-01 00:00:01\", \"session_1\"],\n", " [111, \"B\", \"2023-01-01 00:00:02\", \"session_1\"],\n", " [111, \"C\", \"2023-01-01 00:00:03\", \"session_2\"],\n", " [111, \"D\", \"2023-01-01 00:00:04\", \"session_2\"],\n", " ],\n", " columns=[\"user_id\", \"event\", \"timestamp\", \"custom_ses_id\"]\n", ")\n", "raw_data_schema = {\"custom_cols\": [{\"raw_data_col\": \"custom_ses_id\", \"custom_col\": \"custom_ses_id\"}]}\n", "dummy_stream = Eventstream(df, raw_data_schema=raw_data_schema)\n", "dummy_stream.split_sessions(delimiter_col=\"custom_ses_id\")\\\n", " .to_dataframe()\\\n", " .sort_values([\"user_id\", \"event_index\"])\\\n", " [[\"user_id\", \"event\", \"timestamp\", \"session_id\", \"custom_ses_id\"]]\n" ] }, { "cell_type": "markdown", "id": "48554ba0", "metadata": { "id": "_e5K-yFLOnAL" }, "source": [ "#### LabelNewUsers" ] }, { "cell_type": "code", "execution_count": 8, "id": "637b20ee", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 266, "status": "ok", "timestamp": 1683202248002, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "78be028a", "outputId": "31b3dd35-f078-4ad6-f168-5404dd962ac5", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0ecec01ad-6f60-46b5-b125-69a7ca27a685new_user0new_user2019-11-01 17:59:13.273932219483890
1ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890
29ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890
3a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890
4ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 ecec01ad-6f60-46b5-b125-69a7ca27a685 new_user 0 new_user \n", "1 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 catalog \n", "2 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 product1 \n", "3 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 cart \n", "4 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 catalog \n", "\n", " timestamp user_id \n", "0 2019-11-01 17:59:13.273932 219483890 \n", "1 2019-11-01 17:59:13.273932 219483890 \n", "2 2019-11-01 17:59:28.459271 219483890 \n", "3 2019-11-01 17:59:29.502214 219483890 \n", "4 2019-11-01 17:59:32.557029 219483890 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_users = [219483890, 964964743, 965024600]\n", "res = stream.label_new_users(new_users_list=new_users).to_dataframe()\n", "res[res['user_id'] == 219483890].head()" ] }, { "cell_type": "code", "execution_count": 9, "id": "fc13e19e", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202248003, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "892ce5f5", "outputId": "f5086658-2a85-4202-d192-b8a010849771", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
162409d3dcdb2-473f-42eb-a2a1-7665877c828cexisting_user14768existing_user2020-04-02 05:36:04.896839501098384
162419d3dcdb2-473f-42eb-a2a1-7665877c828craw14768main2020-04-02 05:36:04.896839501098384
1624272459794-2d7b-465b-af08-30ee37b8f603raw14769catalog2020-04-02 05:36:05.371141501098384
1624393d419c1-9075-4714-bcf3-3e65a04ab84fraw14770main2020-04-02 05:36:40.814504501098384
16244dd5a3672-c675-4c96-b211-d0bdeb2b33aaraw14771catalog2020-04-02 05:36:41.190946501098384
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "16240 9d3dcdb2-473f-42eb-a2a1-7665877c828c existing_user 14768 \n", "16241 9d3dcdb2-473f-42eb-a2a1-7665877c828c raw 14768 \n", "16242 72459794-2d7b-465b-af08-30ee37b8f603 raw 14769 \n", "16243 93d419c1-9075-4714-bcf3-3e65a04ab84f raw 14770 \n", "16244 dd5a3672-c675-4c96-b211-d0bdeb2b33aa raw 14771 \n", "\n", " event timestamp user_id \n", "16240 existing_user 2020-04-02 05:36:04.896839 501098384 \n", "16241 main 2020-04-02 05:36:04.896839 501098384 \n", "16242 catalog 2020-04-02 05:36:05.371141 501098384 \n", "16243 main 2020-04-02 05:36:40.814504 501098384 \n", "16244 catalog 2020-04-02 05:36:41.190946 501098384 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 501098384].head()" ] }, { "cell_type": "markdown", "id": "c03b4d11", "metadata": { "id": "fEH11q4-Tk7D" }, "source": [ "#### LabelLostUsers" ] }, { "cell_type": "code", "execution_count": 10, "id": "032cb8eb", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 1025, "status": "ok", "timestamp": 1683202249020, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8a507b33", "outputId": "a4a8f437-ff06-4a3a-f316-041dc13dbbca", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
4880d3c84c18-0465-4a37-b2f5-ca5529faa0eeraw4547catalog2020-01-06 22:11:02.899490219483890
4881e70ef300-f288-49b9-b079-f4c8f1e1216craw4548catalog2020-01-06 22:11:28.271366219483890
88081a349c5e-a8bd-4d99-8b5b-5095a3da809braw8215main2020-02-14 21:04:49.450696219483890
8809f0983ad4-e199-4ced-bd98-acf161ef60b5raw8216catalog2020-02-14 21:04:51.717127219483890
8810f0983ad4-e199-4ced-bd98-acf161ef60b5lost_user8216lost_user2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "4880 d3c84c18-0465-4a37-b2f5-ca5529faa0ee raw 4547 catalog \n", "4881 e70ef300-f288-49b9-b079-f4c8f1e1216c raw 4548 catalog \n", "8808 1a349c5e-a8bd-4d99-8b5b-5095a3da809b raw 8215 main \n", "8809 f0983ad4-e199-4ced-bd98-acf161ef60b5 raw 8216 catalog \n", "8810 f0983ad4-e199-4ced-bd98-acf161ef60b5 lost_user 8216 lost_user \n", "\n", " timestamp user_id \n", "4880 2020-01-06 22:11:02.899490 219483890 \n", "4881 2020-01-06 22:11:28.271366 219483890 \n", "8808 2020-02-14 21:04:49.450696 219483890 \n", "8809 2020-02-14 21:04:51.717127 219483890 \n", "8810 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lost_users_list = [219483890, 964964743, 965024600]\n", "res = stream.label_lost_users(lost_users_list=lost_users_list).to_dataframe()\n", "res[res['user_id'] == 219483890].tail()" ] }, { "cell_type": "code", "execution_count": 11, "id": "a454f04f", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 21, "status": "ok", "timestamp": 1683202249022, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "WHUHp-uRVqc1", "outputId": "495f54ec-ccf5-4b75-ca12-3ad3b5df27dd", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
36029bbd75140-bbfc-4fac-a76e-a24c30bcc2f9raw32279catalog2020-04-29 12:47:40.975732501098384
36030c9749644-decf-424c-b95d-8d1a47b7a0bfraw32280catalog2020-04-29 12:48:01.809577501098384
360312d436d10-6f88-4e6c-9d03-e9ac9dd3b4dbraw32281main2020-04-29 12:48:01.938488501098384
360325b3b8ee9-226c-4759-8c21-2bf1f52fc2e3raw32282catalog2020-04-29 12:48:06.595390501098384
360335b3b8ee9-226c-4759-8c21-2bf1f52fc2e3absent_user32282absent_user2020-04-29 12:48:06.595390501098384
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "36029 bbd75140-bbfc-4fac-a76e-a24c30bcc2f9 raw 32279 \n", "36030 c9749644-decf-424c-b95d-8d1a47b7a0bf raw 32280 \n", "36031 2d436d10-6f88-4e6c-9d03-e9ac9dd3b4db raw 32281 \n", "36032 5b3b8ee9-226c-4759-8c21-2bf1f52fc2e3 raw 32282 \n", "36033 5b3b8ee9-226c-4759-8c21-2bf1f52fc2e3 absent_user 32282 \n", "\n", " event timestamp user_id \n", "36029 catalog 2020-04-29 12:47:40.975732 501098384 \n", "36030 catalog 2020-04-29 12:48:01.809577 501098384 \n", "36031 main 2020-04-29 12:48:01.938488 501098384 \n", "36032 catalog 2020-04-29 12:48:06.595390 501098384 \n", "36033 absent_user 2020-04-29 12:48:06.595390 501098384 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 501098384].tail()" ] }, { "cell_type": "code", "execution_count": 12, "id": "ba4adec4", "metadata": { "executionInfo": { "elapsed": 19, "status": "ok", "timestamp": 1683202249022, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "44f99fee", "tags": [] }, "outputs": [], "source": [ "res = stream.label_lost_users(timeout=(30, 'D')).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 13, "id": "d363b1a1", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 20, "status": "ok", "timestamp": 1683202249023, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "jxysYxu6bDac", "outputId": "b19d7c0b-afa9-4f52-e854-30860b1f0c9f", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Timestamp('2020-04-29 12:48:06.595390')" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['timestamp'].max()" ] }, { "cell_type": "code", "execution_count": 14, "id": "dab04f17", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 15, "status": "ok", "timestamp": 1683202249024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "Dk-3_rwIa8HZ", "outputId": "bf77c9d2-66bd-4f0c-ba0a-a7a6aa188432", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
472a324422-7e5d-4966-95c3-343145643789raw47catalog2019-11-02 01:14:08.664850495985018
48062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8raw48cart2019-11-02 01:14:37.435643495985018
49062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8lost_user48lost_user2019-11-02 01:14:37.435643495985018
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "47 2a324422-7e5d-4966-95c3-343145643789 raw 47 catalog \n", "48 062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8 raw 48 cart \n", "49 062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8 lost_user 48 lost_user \n", "\n", " timestamp user_id \n", "47 2019-11-02 01:14:08.664850 495985018 \n", "48 2019-11-02 01:14:37.435643 495985018 \n", "49 2019-11-02 01:14:37.435643 495985018 " ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 495985018]" ] }, { "cell_type": "code", "execution_count": 15, "id": "798aca6a", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 425 }, "executionInfo": { "elapsed": 14, "status": "ok", "timestamp": 1683202249024, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "MqYBfIIhcV3R", "outputId": "18d98cb7-3ee6-4902-d2d0-9c2c6d64fe59", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
24644f1362fa9-b513-4823-86f2-9783ea53b2c3raw22394main2020-04-15 21:02:36.903678819489198
2464532f15a83-5cad-4e7a-90e5-1b1a77c71b63raw22395catalog2020-04-15 21:02:37.658557819489198
246469964c30b-b4db-4ef8-896f-343da191b5bbraw22396catalog2020-04-15 21:02:48.699804819489198
24647019faafa-ed8b-4f5c-82a5-5af1143d7090raw22397product22020-04-15 21:02:51.173118819489198
24649b066e86f-6c57-4ff2-b4e7-72b43fc1926craw22399catalog2020-04-15 21:03:05.813046819489198
24651b96256ac-8cb1-4f1b-a027-85eaacdbdd8craw22401cart2020-04-15 21:03:35.216033819489198
24655437b69b1-236b-489f-9ea3-f401b1650890raw22404delivery_choice2020-04-15 21:03:40.745520819489198
246569679e83c-382a-47fc-bb67-3d96b226be53raw22405delivery_pickup2020-04-15 21:03:46.448349819489198
246573e04a2b5-132a-45d0-b72a-719d047b2a2eraw22406payment_choice2020-04-15 21:03:46.575300819489198
24658dfeb4d3c-1011-4c8c-8879-f847deb08e28raw22407payment_card2020-04-15 21:03:46.862126819489198
24659c9703af8-b64b-46cf-8a5f-d56873d81e18raw22408payment_done2020-04-15 21:03:47.074946819489198
24660c9703af8-b64b-46cf-8a5f-d56873d81e18absent_user22408absent_user2020-04-15 21:03:47.074946819489198
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "24644 f1362fa9-b513-4823-86f2-9783ea53b2c3 raw 22394 \n", "24645 32f15a83-5cad-4e7a-90e5-1b1a77c71b63 raw 22395 \n", "24646 9964c30b-b4db-4ef8-896f-343da191b5bb raw 22396 \n", "24647 019faafa-ed8b-4f5c-82a5-5af1143d7090 raw 22397 \n", "24649 b066e86f-6c57-4ff2-b4e7-72b43fc1926c raw 22399 \n", "24651 b96256ac-8cb1-4f1b-a027-85eaacdbdd8c raw 22401 \n", "24655 437b69b1-236b-489f-9ea3-f401b1650890 raw 22404 \n", "24656 9679e83c-382a-47fc-bb67-3d96b226be53 raw 22405 \n", "24657 3e04a2b5-132a-45d0-b72a-719d047b2a2e raw 22406 \n", "24658 dfeb4d3c-1011-4c8c-8879-f847deb08e28 raw 22407 \n", "24659 c9703af8-b64b-46cf-8a5f-d56873d81e18 raw 22408 \n", "24660 c9703af8-b64b-46cf-8a5f-d56873d81e18 absent_user 22408 \n", "\n", " event timestamp user_id \n", "24644 main 2020-04-15 21:02:36.903678 819489198 \n", "24645 catalog 2020-04-15 21:02:37.658557 819489198 \n", "24646 catalog 2020-04-15 21:02:48.699804 819489198 \n", "24647 product2 2020-04-15 21:02:51.173118 819489198 \n", "24649 catalog 2020-04-15 21:03:05.813046 819489198 \n", "24651 cart 2020-04-15 21:03:35.216033 819489198 \n", "24655 delivery_choice 2020-04-15 21:03:40.745520 819489198 \n", "24656 delivery_pickup 2020-04-15 21:03:46.448349 819489198 \n", "24657 payment_choice 2020-04-15 21:03:46.575300 819489198 \n", "24658 payment_card 2020-04-15 21:03:46.862126 819489198 \n", "24659 payment_done 2020-04-15 21:03:47.074946 819489198 \n", "24660 absent_user 2020-04-15 21:03:47.074946 819489198 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 819489198]" ] }, { "cell_type": "markdown", "id": "b2dab007", "metadata": { "id": "Zaox_z_rdfN_" }, "source": [ "#### AddPositiveEvents" ] }, { "cell_type": "code", "execution_count": 16, "id": "dcf9fa06", "metadata": { "executionInfo": { "elapsed": 252, "status": "ok", "timestamp": 1683202249263, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "e3085bf2", "tags": [] }, "outputs": [], "source": [ "positive_events = ['cart', 'payment_done']\n", "res = stream.add_positive_events(\n", " targets=positive_events\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 17, "id": "ca07590b", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 582 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1683202249263, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ac0c040a", "outputId": "8658076d-e712-4c41-bff9-e230465e2e17", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0680f3db3-8641-4cc2-bdb4-e559fb40c37braw0catalog2019-11-01 17:59:13.273932219483890
19a64cf4a-6770-420c-82ff-1b9bb3e9d57braw1product12019-11-01 17:59:28.459271219483890
209a12a74-32a0-49d9-8a87-5fff7041903fraw2cart2019-11-01 17:59:29.502214219483890
309a12a74-32a0-49d9-8a87-5fff7041903fpositive_target2positive_target_cart2019-11-01 17:59:29.502214219483890
4859420b5-48cd-4f47-ad85-b9faca15def4raw3catalog2019-11-01 17:59:32.557029219483890
2244ff073183-f2ed-48d0-833f-1b064823bc33raw2096main2019-12-06 16:22:57.484842219483890
22456dd364d0-5cc7-450a-8a13-e8abba6a0ab5raw2097catalog2019-12-06 16:23:01.331109219483890
224645d691f3-9ea4-414a-8c51-c3b57f2ea328raw2098catalog2019-12-06 16:23:48.116617219483890
48201a317681-a863-4507-b840-810a7db308d0raw4542main2020-01-06 22:10:13.635011219483890
482141618dc0-95ad-4c6f-ba1b-2ad577ebff47raw4543catalog2020-01-06 22:10:15.228575219483890
48227f4504d7-0245-440f-ac4e-dcf7258662d6raw4544cart2020-01-06 22:10:42.309028219483890
482371b2ba63-1e75-46ae-a87e-a1c68b4728b1raw4545catalog2020-01-06 22:10:52.255859219483890
4824174ba987-d1ae-4b5b-85a9-876b67e2c060raw4546product12020-01-06 22:11:01.709800219483890
4825b87d0455-3f9a-4309-a6a8-45bda9fd9341raw4547catalog2020-01-06 22:11:02.899490219483890
4826f0e1a8a6-76e8-439c-b538-20bfbef1da6araw4548catalog2020-01-06 22:11:28.271366219483890
8665fa2c94d1-cf0d-4d33-8a40-b93cff2ca99eraw8215main2020-02-14 21:04:49.450696219483890
8666c881e3fd-8d3a-4cce-ae37-ad4131c703f0raw8216catalog2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 680f3db3-8641-4cc2-bdb4-e559fb40c37b raw 0 \n", "1 9a64cf4a-6770-420c-82ff-1b9bb3e9d57b raw 1 \n", "2 09a12a74-32a0-49d9-8a87-5fff7041903f raw 2 \n", "3 09a12a74-32a0-49d9-8a87-5fff7041903f positive_target 2 \n", "4 859420b5-48cd-4f47-ad85-b9faca15def4 raw 3 \n", "2244 ff073183-f2ed-48d0-833f-1b064823bc33 raw 2096 \n", "2245 6dd364d0-5cc7-450a-8a13-e8abba6a0ab5 raw 2097 \n", "2246 45d691f3-9ea4-414a-8c51-c3b57f2ea328 raw 2098 \n", "4820 1a317681-a863-4507-b840-810a7db308d0 raw 4542 \n", "4821 41618dc0-95ad-4c6f-ba1b-2ad577ebff47 raw 4543 \n", "4822 7f4504d7-0245-440f-ac4e-dcf7258662d6 raw 4544 \n", "4823 71b2ba63-1e75-46ae-a87e-a1c68b4728b1 raw 4545 \n", "4824 174ba987-d1ae-4b5b-85a9-876b67e2c060 raw 4546 \n", "4825 b87d0455-3f9a-4309-a6a8-45bda9fd9341 raw 4547 \n", "4826 f0e1a8a6-76e8-439c-b538-20bfbef1da6a raw 4548 \n", "8665 fa2c94d1-cf0d-4d33-8a40-b93cff2ca99e raw 8215 \n", "8666 c881e3fd-8d3a-4cce-ae37-ad4131c703f0 raw 8216 \n", "\n", " event timestamp user_id \n", "0 catalog 2019-11-01 17:59:13.273932 219483890 \n", "1 product1 2019-11-01 17:59:28.459271 219483890 \n", "2 cart 2019-11-01 17:59:29.502214 219483890 \n", "3 positive_target_cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2244 main 2019-12-06 16:22:57.484842 219483890 \n", "2245 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2246 catalog 2019-12-06 16:23:48.116617 219483890 \n", "4820 main 2020-01-06 22:10:13.635011 219483890 \n", "4821 catalog 2020-01-06 22:10:15.228575 219483890 \n", "4822 cart 2020-01-06 22:10:42.309028 219483890 \n", "4823 catalog 2020-01-06 22:10:52.255859 219483890 \n", "4824 product1 2020-01-06 22:11:01.709800 219483890 \n", "4825 catalog 2020-01-06 22:11:02.899490 219483890 \n", "4826 catalog 2020-01-06 22:11:28.271366 219483890 \n", "8665 main 2020-02-14 21:04:49.450696 219483890 \n", "8666 catalog 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "code", "execution_count": 18, "id": "442001b3", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 174 }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1683202249264, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "5f154748", "outputId": "8b197e5d-8430-4761-f3ba-3570c5879969", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
67de63a200-508b-4b3b-851e-630fcbecd662raw60main2019-11-02 07:28:07.28554124427596
68a44c6b84-e6b5-410c-9dce-9db051322cf4raw61catalog2019-11-02 07:28:14.31985024427596
69f53be094-fc0f-4227-838d-d044862fb1fdraw62catalog2019-11-02 07:29:08.30133324427596
7007a9f5e4-bc43-4140-a0cb-fcf68b923fbbraw63catalog2019-11-02 07:29:41.84839624427596
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "67 de63a200-508b-4b3b-851e-630fcbecd662 raw 60 main \n", "68 a44c6b84-e6b5-410c-9dce-9db051322cf4 raw 61 catalog \n", "69 f53be094-fc0f-4227-838d-d044862fb1fd raw 62 catalog \n", "70 07a9f5e4-bc43-4140-a0cb-fcf68b923fbb raw 63 catalog \n", "\n", " timestamp user_id \n", "67 2019-11-02 07:28:07.285541 24427596 \n", "68 2019-11-02 07:28:14.319850 24427596 \n", "69 2019-11-02 07:29:08.301333 24427596 \n", "70 2019-11-02 07:29:41.848396 24427596 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 24427596]" ] }, { "cell_type": "code", "execution_count": 19, "id": "ac627d7b", "metadata": { "executionInfo": { "elapsed": 250, "status": "ok", "timestamp": 1683202249509, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "raAvh_BFqK2i", "tags": [] }, "outputs": [], "source": [ "def custom_func(eventstream, targets) -> pd.DataFrame:\n", "\n", " event_col = eventstream.schema.event_name\n", " df = eventstream.to_dataframe()\n", "\n", " return df[df[event_col].isin(targets)]\n", "\n", "res = stream.add_positive_events(\n", " targets=positive_events,\n", " func=custom_func\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 20, "id": "a023a8ba", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 613 }, "executionInfo": { "elapsed": 260, "status": "ok", "timestamp": 1683202249767, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "n1j7mtHdqeoM", "outputId": "9dbc9448-63b1-4bc3-ef55-aff1842ecd09", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
039c5cb78-1ead-4f7a-9259-96a7e3748886raw0catalog2019-11-01 17:59:13.273932219483890
115101e9d-6a36-424c-b73a-419f77b1b3e9raw1product12019-11-01 17:59:28.459271219483890
256f93cf1-f4cc-4409-8d1a-ea88b9f959d3raw2cart2019-11-01 17:59:29.502214219483890
356f93cf1-f4cc-4409-8d1a-ea88b9f959d3positive_target2positive_target_cart2019-11-01 17:59:29.502214219483890
4ec563472-5c99-485a-94fa-cab2763d7bd8raw3catalog2019-11-01 17:59:32.557029219483890
23404f03a03f-10ed-46ec-b1c5-8951376db4edraw2096main2019-12-06 16:22:57.484842219483890
23410fbb9b88-dac5-4a9d-999e-c708380ee6b4raw2097catalog2019-12-06 16:23:01.331109219483890
23426a901e98-9db2-49e5-9b55-66fe0c800953raw2098catalog2019-12-06 16:23:48.116617219483890
504121e37e31-b349-4f1b-a447-eec89fd14a4craw4542main2020-01-06 22:10:13.635011219483890
50426901a99c-7766-4a1c-bb22-1b12b224f1a3raw4543catalog2020-01-06 22:10:15.228575219483890
5043c5c2517c-31b3-4c36-b67e-4cda45ce8a7braw4544cart2020-01-06 22:10:42.309028219483890
5044c5c2517c-31b3-4c36-b67e-4cda45ce8a7bpositive_target4544positive_target_cart2020-01-06 22:10:42.309028219483890
50454f5d5eb7-60b9-4c14-80b4-50ee770bd690raw4545catalog2020-01-06 22:10:52.255859219483890
5046b581162b-fc24-48b1-9009-c26e06062b77raw4546product12020-01-06 22:11:01.709800219483890
5047c5eb706d-41da-4860-94a6-5f6e5655428craw4547catalog2020-01-06 22:11:02.899490219483890
504879c77d6b-a8c1-4a68-bd67-b8fde6d21032raw4548catalog2020-01-06 22:11:28.271366219483890
909805170f22-7158-4009-867c-1b21d90e0aferaw8215main2020-02-14 21:04:49.450696219483890
909955ec5d5b-5448-4710-8fb6-f035c8931bbbraw8216catalog2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 39c5cb78-1ead-4f7a-9259-96a7e3748886 raw 0 \n", "1 15101e9d-6a36-424c-b73a-419f77b1b3e9 raw 1 \n", "2 56f93cf1-f4cc-4409-8d1a-ea88b9f959d3 raw 2 \n", "3 56f93cf1-f4cc-4409-8d1a-ea88b9f959d3 positive_target 2 \n", "4 ec563472-5c99-485a-94fa-cab2763d7bd8 raw 3 \n", "2340 4f03a03f-10ed-46ec-b1c5-8951376db4ed raw 2096 \n", "2341 0fbb9b88-dac5-4a9d-999e-c708380ee6b4 raw 2097 \n", "2342 6a901e98-9db2-49e5-9b55-66fe0c800953 raw 2098 \n", "5041 21e37e31-b349-4f1b-a447-eec89fd14a4c raw 4542 \n", "5042 6901a99c-7766-4a1c-bb22-1b12b224f1a3 raw 4543 \n", "5043 c5c2517c-31b3-4c36-b67e-4cda45ce8a7b raw 4544 \n", "5044 c5c2517c-31b3-4c36-b67e-4cda45ce8a7b positive_target 4544 \n", "5045 4f5d5eb7-60b9-4c14-80b4-50ee770bd690 raw 4545 \n", "5046 b581162b-fc24-48b1-9009-c26e06062b77 raw 4546 \n", "5047 c5eb706d-41da-4860-94a6-5f6e5655428c raw 4547 \n", "5048 79c77d6b-a8c1-4a68-bd67-b8fde6d21032 raw 4548 \n", "9098 05170f22-7158-4009-867c-1b21d90e0afe raw 8215 \n", "9099 55ec5d5b-5448-4710-8fb6-f035c8931bbb raw 8216 \n", "\n", " event timestamp user_id \n", "0 catalog 2019-11-01 17:59:13.273932 219483890 \n", "1 product1 2019-11-01 17:59:28.459271 219483890 \n", "2 cart 2019-11-01 17:59:29.502214 219483890 \n", "3 positive_target_cart 2019-11-01 17:59:29.502214 219483890 \n", "4 catalog 2019-11-01 17:59:32.557029 219483890 \n", "2340 main 2019-12-06 16:22:57.484842 219483890 \n", "2341 catalog 2019-12-06 16:23:01.331109 219483890 \n", "2342 catalog 2019-12-06 16:23:48.116617 219483890 \n", "5041 main 2020-01-06 22:10:13.635011 219483890 \n", "5042 catalog 2020-01-06 22:10:15.228575 219483890 \n", "5043 cart 2020-01-06 22:10:42.309028 219483890 \n", "5044 positive_target_cart 2020-01-06 22:10:42.309028 219483890 \n", "5045 catalog 2020-01-06 22:10:52.255859 219483890 \n", "5046 product1 2020-01-06 22:11:01.709800 219483890 \n", "5047 catalog 2020-01-06 22:11:02.899490 219483890 \n", "5048 catalog 2020-01-06 22:11:28.271366 219483890 \n", "9098 main 2020-02-14 21:04:49.450696 219483890 \n", "9099 catalog 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "6c14278e", "metadata": { "id": "Dsy2d3d4kgPo" }, "source": [ "#### AddNegativeEvents" ] }, { "cell_type": "code", "execution_count": 21, "id": "43fe9601", "metadata": { "executionInfo": { "elapsed": 255, "status": "ok", "timestamp": 1683202250018, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "NuZkvgl3DgO7", "tags": [] }, "outputs": [], "source": [ "negative_events = ['delivery_courier']\n", "\n", "res = stream.add_negative_events(\n", " targets=negative_events\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 22, "id": "b17d4327", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 269 }, "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1683202250018, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8d6e4fa4", "outputId": "aeb2fb97-dcbb-4b25-a329-a49aac6ab6ee", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
36a1cc3d65-12d7-487b-919f-1fa0f47d33d8raw36cart2019-11-01 22:35:50.437706629881394
38a0097cba-bacc-40ee-9158-306c7a1bad4eraw38delivery_choice2019-11-01 22:35:57.649549629881394
398715bf14-1ddd-4356-9c5d-eacd5f49af31raw39delivery_courier2019-11-01 22:36:02.009271629881394
408715bf14-1ddd-4356-9c5d-eacd5f49af31negative_target39negative_target_delivery_courier2019-11-01 22:36:02.009271629881394
44bc254a86-4be3-4ffe-a763-3b3233eb133eraw42payment_choice2019-11-01 22:36:02.243274629881394
46e6e489f6-4736-4483-83b2-2fd693df0052raw44payment_cash2019-11-01 22:36:03.415201629881394
473d30c9e4-9d75-43cf-ad08-392974455f17raw45payment_done2019-11-01 22:36:03.999697629881394
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "36 a1cc3d65-12d7-487b-919f-1fa0f47d33d8 raw 36 \n", "38 a0097cba-bacc-40ee-9158-306c7a1bad4e raw 38 \n", "39 8715bf14-1ddd-4356-9c5d-eacd5f49af31 raw 39 \n", "40 8715bf14-1ddd-4356-9c5d-eacd5f49af31 negative_target 39 \n", "44 bc254a86-4be3-4ffe-a763-3b3233eb133e raw 42 \n", "46 e6e489f6-4736-4483-83b2-2fd693df0052 raw 44 \n", "47 3d30c9e4-9d75-43cf-ad08-392974455f17 raw 45 \n", "\n", " event timestamp user_id \n", "36 cart 2019-11-01 22:35:50.437706 629881394 \n", "38 delivery_choice 2019-11-01 22:35:57.649549 629881394 \n", "39 delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "40 negative_target_delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "44 payment_choice 2019-11-01 22:36:02.243274 629881394 \n", "46 payment_cash 2019-11-01 22:36:03.415201 629881394 \n", "47 payment_done 2019-11-01 22:36:03.999697 629881394 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 629881394].loc[36:48]" ] }, { "cell_type": "markdown", "id": "45402690", "metadata": { "id": "AHKODj-tpTA-" }, "source": [ "#### LabelCroppedPaths" ] }, { "cell_type": "code", "execution_count": 23, "id": "a9038764", "metadata": { "colab": { "background_save": true }, "executionInfo": { "elapsed": 442, "status": "ok", "timestamp": 1683202250455, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ea101f85", "tags": [] }, "outputs": [], "source": [ "params = {\n", " 'left_cutoff': (4, 'D'),\n", " 'right_cutoff': (3, 'D')\n", "}\n", "\n", "res = stream.label_cropped_paths(**params).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 24, "id": "a64226c8", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202250455, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9bb12ad4", "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Eventstream start: 2019-11-01 17:59:13.273932\n", "Eventstream end: 2020-04-29 12:48:06.595390\n" ] } ], "source": [ "print('Eventstream start: {}'.format(res.timestamp.min()))\n", "print('Eventstream end: {}'.format(res.timestamp.max()))" ] }, { "cell_type": "code", "execution_count": 25, "id": "a6b083e9", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202250456, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9062db7a", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
472a324422-7e5d-4966-95c3-343145643789cropped_left47cropped_left2019-11-02 01:14:08.664850495985018
482a324422-7e5d-4966-95c3-343145643789raw47catalog2019-11-02 01:14:08.664850495985018
49062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8raw48cart2019-11-02 01:14:37.435643495985018
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "47 2a324422-7e5d-4966-95c3-343145643789 cropped_left 47 \n", "48 2a324422-7e5d-4966-95c3-343145643789 raw 47 \n", "49 062a6cb1-c3ab-43d9-be9e-ab5ea865c0e8 raw 48 \n", "\n", " event timestamp user_id \n", "47 cropped_left 2019-11-02 01:14:08.664850 495985018 \n", "48 catalog 2019-11-02 01:14:08.664850 495985018 \n", "49 cart 2019-11-02 01:14:37.435643 495985018 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 495985018]" ] }, { "cell_type": "code", "execution_count": 26, "id": "49fb8111", "metadata": { "colab": { "background_save": true, "base_uri": "https://localhost:8080/", "height": 237 }, "executionInfo": { "elapsed": 7, "status": "ok", "timestamp": 1683202250456, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "b2814dfd", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
32533f7ab012f-239b-48be-bb14-ffd2c17f30caraw32258catalog2020-04-29 12:24:21.538805831491833
32534f133a5e6-66fa-4dcc-8b22-07f16db424e1raw32259catalog2020-04-29 12:24:33.841264831491833
32535b30b756c-842b-4713-a020-353269638c66raw32260product22020-04-29 12:24:39.415424831491833
325369b61306b-dfd4-48ff-bb7f-417bc901eaaeraw32261cart2020-04-29 12:24:59.928499831491833
32537e5e2e96b-9def-49c2-a118-68c7cb2d4176raw32262catalog2020-04-29 12:25:06.262205831491833
32538e5e2e96b-9def-49c2-a118-68c7cb2d4176cropped_right32262cropped_right2020-04-29 12:25:06.262205831491833
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "32533 f7ab012f-239b-48be-bb14-ffd2c17f30ca raw 32258 \n", "32534 f133a5e6-66fa-4dcc-8b22-07f16db424e1 raw 32259 \n", "32535 b30b756c-842b-4713-a020-353269638c66 raw 32260 \n", "32536 9b61306b-dfd4-48ff-bb7f-417bc901eaae raw 32261 \n", "32537 e5e2e96b-9def-49c2-a118-68c7cb2d4176 raw 32262 \n", "32538 e5e2e96b-9def-49c2-a118-68c7cb2d4176 cropped_right 32262 \n", "\n", " event timestamp user_id \n", "32533 catalog 2020-04-29 12:24:21.538805 831491833 \n", "32534 catalog 2020-04-29 12:24:33.841264 831491833 \n", "32535 product2 2020-04-29 12:24:39.415424 831491833 \n", "32536 cart 2020-04-29 12:24:59.928499 831491833 \n", "32537 catalog 2020-04-29 12:25:06.262205 831491833 \n", "32538 cropped_right 2020-04-29 12:25:06.262205 831491833 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 831491833]" ] }, { "cell_type": "markdown", "id": "6df7d930", "metadata": { "id": "e3f107b8" }, "source": [ "### Removing processors" ] }, { "cell_type": "markdown", "id": "3dff09d1", "metadata": { "id": "JvdFDkSdEJ6N" }, "source": [ "#### FilterEvents" ] }, { "cell_type": "code", "execution_count": 27, "id": "4f9fe247", "metadata": { "executionInfo": { "elapsed": 1947, "status": "ok", "timestamp": 1683202252397, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "9f6232c9", "tags": [] }, "outputs": [], "source": [ "def save_specific_users(df, schema):\n", " users_to_save = [219483890, 964964743, 965024600]\n", " return df[schema.user_id].isin(users_to_save)\n", "\n", "res = stream.filter_events(func=save_specific_users).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 28, "id": "2f6972df", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202252398, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "YHeFnaqoF9ml", "outputId": "25bf26b9-e299-45a4-c95c-fb0afbf97e12", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "array([219483890, 964964743, 965024600])" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['user_id'].unique().astype(int)" ] }, { "cell_type": "code", "execution_count": 29, "id": "2fd9ebe2", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202252398, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "77215f06", "outputId": "bad0bc27-154a-48d0-e60b-2b15e2ca27c2", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "catalog 14518\n", "main 5635\n", "Name: event, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe()\\\n", " ['event']\\\n", " .value_counts()\\\n", " [lambda s: s.index.isin(['catalog', 'main'])]" ] }, { "cell_type": "code", "execution_count": 30, "id": "a6d09576", "metadata": { "executionInfo": { "elapsed": 2448, "status": "ok", "timestamp": 1683202254843, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "066908e9", "tags": [] }, "outputs": [], "source": [ "def exclude_events(df, schema):\n", " events_to_exclude = ['catalog', 'main']\n", " return ~df[schema.event_name].isin(events_to_exclude)\n", "\n", "res = stream.filter_events(func=exclude_events).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 31, "id": "737001f9", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1683202254843, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "5889ff41", "outputId": "728c3db4-2210-465d-d1f1-917176402ad5", "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Series([], Name: event, dtype: int64)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res['event']\\\n", " .value_counts()\\\n", " [lambda s: s.index.isin(['catalog', 'main'])]" ] }, { "cell_type": "markdown", "id": "2753a754", "metadata": { "id": "0o0cznzfJAjO" }, "source": [ "#### DropPaths" ] }, { "cell_type": "code", "execution_count": 32, "id": "ecbfe814", "metadata": { "executionInfo": { "elapsed": 953, "status": "ok", "timestamp": 1683202255792, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "3f345d14", "tags": [] }, "outputs": [], "source": [ "res = stream.drop_paths(min_steps=25).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 33, "id": "90589065", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202255793, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "a466cd41", "outputId": "d672640f-e215-46ac-8a39-b71b00eb70c7", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0e29fe364-56cd-43f4-a566-ca31e48abf0fraw7main2019-11-01 22:28:54.791683629881394
11a1e9640-6bdd-477b-af37-b8dae119754araw9catalog2019-11-01 22:29:01.049513629881394
29b2faeaa-2b64-45ad-bed7-81238a85bb6draw11catalog2019-11-01 22:29:32.322458629881394
3ac28a75d-48df-43b7-b7fd-e5109a7eb4d5raw13catalog2019-11-01 22:30:09.450839629881394
4778b1d6e-98c2-4fd4-aca5-eb688600c800raw14catalog2019-11-01 22:31:05.565762629881394
560227d0e-3d93-4ada-9acd-473dede4a6beraw15main2019-11-01 22:31:08.333560629881394
61d2cc619-6d03-442f-b686-1316e490af3craw16catalog2019-11-01 22:31:09.010626629881394
7c6278b8b-1266-4c95-aeaf-886ffeb13625raw17product12019-11-01 22:31:10.416231629881394
83e136eff-7f2f-449b-885b-c1d80aa4aceeraw18catalog2019-11-01 22:31:43.019527629881394
9e980bbd4-0457-462b-83a9-0057adbb1b8craw19catalog2019-11-01 22:32:01.596163629881394
10470bb963-80f4-4285-b981-56ed5ff638d0raw20product12019-11-01 22:32:07.348536629881394
11d41267ec-334f-4fea-9a38-891383fe8f70raw21catalog2019-11-01 22:32:10.165568629881394
12025b3395-c5ff-4409-bc41-364dffd3d0c4raw22product22019-11-01 22:32:16.496241629881394
13263ae4d7-b9c9-4dd0-8207-7e544a48d3e2raw23catalog2019-11-01 22:33:17.682468629881394
146c23fe1d-4253-4a6d-b1ce-f6e2214b8555raw24product12019-11-01 22:33:19.961115629881394
15809e52a8-584c-419d-ac1b-4b067eb39eccraw25catalog2019-11-01 22:33:35.460345629881394
16cabd37ee-a5bf-41bf-8b1c-ad25e233ac71raw27catalog2019-11-01 22:34:02.301293629881394
179f74cb09-d274-4e60-a647-7e9b91a87578raw29product22019-11-01 22:34:32.362386629881394
1883f7c150-2fef-415c-a7f5-47b6017cea28raw31catalog2019-11-01 22:35:01.462515629881394
1945a9dd95-c8ac-426d-adc4-42a0a360fa27raw33product22019-11-01 22:35:33.142711629881394
203ddff911-ea08-439a-98f3-8479ddbc91f1raw36cart2019-11-01 22:35:50.437706629881394
21d1cc5cc5-5066-4ff3-96cb-ddbf0b793e51raw38delivery_choice2019-11-01 22:35:57.649549629881394
2205192dc7-d2c8-49e8-a955-1644a39a7794raw39delivery_courier2019-11-01 22:36:02.009271629881394
23982cec85-a48c-4b19-b69d-4de361cbea97raw42payment_choice2019-11-01 22:36:02.243274629881394
24db2ea2d4-e7cd-464e-aeb8-8fd6462b1773raw44payment_cash2019-11-01 22:36:03.415201629881394
255eefb24c-99f2-4f4c-932a-1a54b82c6220raw45payment_done2019-11-01 22:36:03.999697629881394
109af3c4e44-49c9-45c3-bf9e-db331ed89dberaw317main2019-11-07 12:40:46.004674629881394
110e3c72e82-c760-4875-984d-a2900f1b32d2raw319catalog2019-11-07 12:40:55.724185629881394
111f8f7b419-36ac-4ecf-8faf-5516d220b738raw322cart2019-11-07 12:41:04.107187629881394
321399b03f0-0c7d-45af-a055-cd5707fd6dderaw1230main2019-11-25 23:16:56.317624629881394
32210ceaafe-87a5-44c8-826d-4f29a69a95d4raw1231catalog2019-11-25 23:17:16.266242629881394
323fd5f7286-bd5d-4a71-99c1-41865fe8f346raw1232catalog2019-11-25 23:17:48.747304629881394
22181b3b1888-34cc-41ba-ad5f-fe31885d182braw5922main2020-01-23 18:56:23.445236629881394
22193fd5e75d-b2c1-40ec-bbb7-55d08ebd26f2raw5923catalog2020-01-23 18:56:30.461624629881394
39126c215cb1-e606-45ba-936a-e40ca54fdd79raw10013main2020-03-01 23:23:43.392597629881394
3913f396a972-e94c-4d34-b7f2-6cf08ce132c7raw10014catalog2020-03-01 23:23:44.755693629881394
391484d6fd44-ea6b-4269-bde4-ebd8502a89b2raw10015main2020-03-01 23:23:51.625991629881394
42532483222e-bbd9-4041-b59f-5c20e5571359raw10754main2020-03-07 12:05:23.938242629881394
42548d1c68b9-c1ac-4c39-bdac-4e7dd21c36d8raw10755catalog2020-03-07 12:05:32.476046629881394
51294052deec-a393-4737-93c2-75e211807a8craw12937main2020-03-26 13:00:47.844886629881394
513098a029d7-f9e9-4a46-918a-0d48b374a619raw12938catalog2020-03-26 13:00:49.531644629881394
5320e89b12d2-99fb-444b-bffe-cc4a35b9bcc1raw13436main2020-03-30 00:11:47.920536629881394
532167d170f0-2709-4eb0-8d83-49cce6c421dcraw13456main2020-03-30 03:17:46.909845629881394
532209e3ab3f-8174-4330-91bc-d710ececcbb1raw13457catalog2020-03-30 03:17:49.918774629881394
5323187ff7a6-81b1-4a60-a12f-14147235a500raw13458catalog2020-03-30 03:18:17.835191629881394
53246b957263-9ac6-4c93-a70b-3eb226d289a1raw13459catalog2020-03-30 03:18:53.715887629881394
5325bca5f0e0-3f2f-434b-bb95-ceffa744df67raw13460catalog2020-03-30 03:19:36.034262629881394
5326f7e65d9a-8937-49f7-b750-4d8fa1547944raw13461catalog2020-03-30 03:19:59.515094629881394
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "0 e29fe364-56cd-43f4-a566-ca31e48abf0f raw 7 \n", "1 1a1e9640-6bdd-477b-af37-b8dae119754a raw 9 \n", "2 9b2faeaa-2b64-45ad-bed7-81238a85bb6d raw 11 \n", "3 ac28a75d-48df-43b7-b7fd-e5109a7eb4d5 raw 13 \n", "4 778b1d6e-98c2-4fd4-aca5-eb688600c800 raw 14 \n", "5 60227d0e-3d93-4ada-9acd-473dede4a6be raw 15 \n", "6 1d2cc619-6d03-442f-b686-1316e490af3c raw 16 \n", "7 c6278b8b-1266-4c95-aeaf-886ffeb13625 raw 17 \n", "8 3e136eff-7f2f-449b-885b-c1d80aa4acee raw 18 \n", "9 e980bbd4-0457-462b-83a9-0057adbb1b8c raw 19 \n", "10 470bb963-80f4-4285-b981-56ed5ff638d0 raw 20 \n", "11 d41267ec-334f-4fea-9a38-891383fe8f70 raw 21 \n", "12 025b3395-c5ff-4409-bc41-364dffd3d0c4 raw 22 \n", "13 263ae4d7-b9c9-4dd0-8207-7e544a48d3e2 raw 23 \n", "14 6c23fe1d-4253-4a6d-b1ce-f6e2214b8555 raw 24 \n", "15 809e52a8-584c-419d-ac1b-4b067eb39ecc raw 25 \n", "16 cabd37ee-a5bf-41bf-8b1c-ad25e233ac71 raw 27 \n", "17 9f74cb09-d274-4e60-a647-7e9b91a87578 raw 29 \n", "18 83f7c150-2fef-415c-a7f5-47b6017cea28 raw 31 \n", "19 45a9dd95-c8ac-426d-adc4-42a0a360fa27 raw 33 \n", "20 3ddff911-ea08-439a-98f3-8479ddbc91f1 raw 36 \n", "21 d1cc5cc5-5066-4ff3-96cb-ddbf0b793e51 raw 38 \n", "22 05192dc7-d2c8-49e8-a955-1644a39a7794 raw 39 \n", "23 982cec85-a48c-4b19-b69d-4de361cbea97 raw 42 \n", "24 db2ea2d4-e7cd-464e-aeb8-8fd6462b1773 raw 44 \n", "25 5eefb24c-99f2-4f4c-932a-1a54b82c6220 raw 45 \n", "109 af3c4e44-49c9-45c3-bf9e-db331ed89dbe raw 317 \n", "110 e3c72e82-c760-4875-984d-a2900f1b32d2 raw 319 \n", "111 f8f7b419-36ac-4ecf-8faf-5516d220b738 raw 322 \n", "321 399b03f0-0c7d-45af-a055-cd5707fd6dde raw 1230 \n", "322 10ceaafe-87a5-44c8-826d-4f29a69a95d4 raw 1231 \n", "323 fd5f7286-bd5d-4a71-99c1-41865fe8f346 raw 1232 \n", "2218 1b3b1888-34cc-41ba-ad5f-fe31885d182b raw 5922 \n", "2219 3fd5e75d-b2c1-40ec-bbb7-55d08ebd26f2 raw 5923 \n", "3912 6c215cb1-e606-45ba-936a-e40ca54fdd79 raw 10013 \n", "3913 f396a972-e94c-4d34-b7f2-6cf08ce132c7 raw 10014 \n", "3914 84d6fd44-ea6b-4269-bde4-ebd8502a89b2 raw 10015 \n", "4253 2483222e-bbd9-4041-b59f-5c20e5571359 raw 10754 \n", "4254 8d1c68b9-c1ac-4c39-bdac-4e7dd21c36d8 raw 10755 \n", "5129 4052deec-a393-4737-93c2-75e211807a8c raw 12937 \n", "5130 98a029d7-f9e9-4a46-918a-0d48b374a619 raw 12938 \n", "5320 e89b12d2-99fb-444b-bffe-cc4a35b9bcc1 raw 13436 \n", "5321 67d170f0-2709-4eb0-8d83-49cce6c421dc raw 13456 \n", "5322 09e3ab3f-8174-4330-91bc-d710ececcbb1 raw 13457 \n", "5323 187ff7a6-81b1-4a60-a12f-14147235a500 raw 13458 \n", "5324 6b957263-9ac6-4c93-a70b-3eb226d289a1 raw 13459 \n", "5325 bca5f0e0-3f2f-434b-bb95-ceffa744df67 raw 13460 \n", "5326 f7e65d9a-8937-49f7-b750-4d8fa1547944 raw 13461 \n", "\n", " event timestamp user_id \n", "0 main 2019-11-01 22:28:54.791683 629881394 \n", "1 catalog 2019-11-01 22:29:01.049513 629881394 \n", "2 catalog 2019-11-01 22:29:32.322458 629881394 \n", "3 catalog 2019-11-01 22:30:09.450839 629881394 \n", "4 catalog 2019-11-01 22:31:05.565762 629881394 \n", "5 main 2019-11-01 22:31:08.333560 629881394 \n", "6 catalog 2019-11-01 22:31:09.010626 629881394 \n", "7 product1 2019-11-01 22:31:10.416231 629881394 \n", "8 catalog 2019-11-01 22:31:43.019527 629881394 \n", "9 catalog 2019-11-01 22:32:01.596163 629881394 \n", "10 product1 2019-11-01 22:32:07.348536 629881394 \n", "11 catalog 2019-11-01 22:32:10.165568 629881394 \n", "12 product2 2019-11-01 22:32:16.496241 629881394 \n", "13 catalog 2019-11-01 22:33:17.682468 629881394 \n", "14 product1 2019-11-01 22:33:19.961115 629881394 \n", "15 catalog 2019-11-01 22:33:35.460345 629881394 \n", "16 catalog 2019-11-01 22:34:02.301293 629881394 \n", "17 product2 2019-11-01 22:34:32.362386 629881394 \n", "18 catalog 2019-11-01 22:35:01.462515 629881394 \n", "19 product2 2019-11-01 22:35:33.142711 629881394 \n", "20 cart 2019-11-01 22:35:50.437706 629881394 \n", "21 delivery_choice 2019-11-01 22:35:57.649549 629881394 \n", "22 delivery_courier 2019-11-01 22:36:02.009271 629881394 \n", "23 payment_choice 2019-11-01 22:36:02.243274 629881394 \n", "24 payment_cash 2019-11-01 22:36:03.415201 629881394 \n", "25 payment_done 2019-11-01 22:36:03.999697 629881394 \n", "109 main 2019-11-07 12:40:46.004674 629881394 \n", "110 catalog 2019-11-07 12:40:55.724185 629881394 \n", "111 cart 2019-11-07 12:41:04.107187 629881394 \n", "321 main 2019-11-25 23:16:56.317624 629881394 \n", "322 catalog 2019-11-25 23:17:16.266242 629881394 \n", "323 catalog 2019-11-25 23:17:48.747304 629881394 \n", "2218 main 2020-01-23 18:56:23.445236 629881394 \n", "2219 catalog 2020-01-23 18:56:30.461624 629881394 \n", "3912 main 2020-03-01 23:23:43.392597 629881394 \n", "3913 catalog 2020-03-01 23:23:44.755693 629881394 \n", "3914 main 2020-03-01 23:23:51.625991 629881394 \n", "4253 main 2020-03-07 12:05:23.938242 629881394 \n", "4254 catalog 2020-03-07 12:05:32.476046 629881394 \n", "5129 main 2020-03-26 13:00:47.844886 629881394 \n", "5130 catalog 2020-03-26 13:00:49.531644 629881394 \n", "5320 main 2020-03-30 00:11:47.920536 629881394 \n", "5321 main 2020-03-30 03:17:46.909845 629881394 \n", "5322 catalog 2020-03-30 03:17:49.918774 629881394 \n", "5323 catalog 2020-03-30 03:18:17.835191 629881394 \n", "5324 catalog 2020-03-30 03:18:53.715887 629881394 \n", "5325 catalog 2020-03-30 03:19:36.034262 629881394 \n", "5326 catalog 2020-03-30 03:19:59.515094 629881394 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 629881394]" ] }, { "cell_type": "code", "execution_count": 34, "id": "04312afc", "metadata": { "executionInfo": { "elapsed": 1159, "status": "ok", "timestamp": 1683202256946, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "ecf77028", "tags": [] }, "outputs": [], "source": [ "res = stream.drop_paths(min_time=(1, 'M')).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 35, "id": "61a53bf9", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 112 }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1683202256946, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "21332803", "outputId": "59e00ad5-434d-4b10-ad68-b10c90043f25", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
463cfb2da-6a02-4046-954b-f3b3f8a8b829raw4catalog2019-11-01 21:38:19.283663964964743
5303b81dc-4bed-4586-93bb-318828dc048craw5cart2019-11-01 21:38:36.761221964964743
68a8305ae-6c9f-48db-b782-d0c58bf88291raw6delivery_choice2019-11-01 21:38:37.564693964964743
1101f35dfc72-f933-491b-84b2-5f0fbeea6c36raw2275main2019-12-09 01:42:22.801831964964743
1102442d0c8c-7c55-45e7-b1b2-2424dc8d784craw2276catalog2019-12-09 01:42:23.617764964964743
1103025d2b66-381c-4b4d-98d5-573fbfe12cacraw2277product22019-12-09 01:42:56.877340964964743
1104803089ca-c038-4fdd-9eda-ea7d8f6b1d4araw2278catalog2019-12-09 01:43:05.436223964964743
11052a9388e0-aef5-4f78-a001-5c40136900a0raw2279catalog2019-12-09 01:43:36.923178964964743
11067529c9b6-5040-4c06-97ff-1f53d9824e2craw2280product22019-12-09 01:43:41.174195964964743
11079a17e336-e3a6-442a-b13a-796da48ca7feraw2281cart2019-12-09 01:43:57.325569964964743
11081facd101-1547-4f03-a085-09dd4e9a02a4raw2282delivery_choice2019-12-09 01:43:57.486518964964743
1109bedee6e9-7e39-49de-877b-a663760ca7d2raw2283delivery_pickup2019-12-09 01:43:57.766850964964743
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "4 63cfb2da-6a02-4046-954b-f3b3f8a8b829 raw 4 \n", "5 303b81dc-4bed-4586-93bb-318828dc048c raw 5 \n", "6 8a8305ae-6c9f-48db-b782-d0c58bf88291 raw 6 \n", "1101 f35dfc72-f933-491b-84b2-5f0fbeea6c36 raw 2275 \n", "1102 442d0c8c-7c55-45e7-b1b2-2424dc8d784c raw 2276 \n", "1103 025d2b66-381c-4b4d-98d5-573fbfe12cac raw 2277 \n", "1104 803089ca-c038-4fdd-9eda-ea7d8f6b1d4a raw 2278 \n", "1105 2a9388e0-aef5-4f78-a001-5c40136900a0 raw 2279 \n", "1106 7529c9b6-5040-4c06-97ff-1f53d9824e2c raw 2280 \n", "1107 9a17e336-e3a6-442a-b13a-796da48ca7fe raw 2281 \n", "1108 1facd101-1547-4f03-a085-09dd4e9a02a4 raw 2282 \n", "1109 bedee6e9-7e39-49de-877b-a663760ca7d2 raw 2283 \n", "\n", " event timestamp user_id \n", "4 catalog 2019-11-01 21:38:19.283663 964964743 \n", "5 cart 2019-11-01 21:38:36.761221 964964743 \n", "6 delivery_choice 2019-11-01 21:38:37.564693 964964743 \n", "1101 main 2019-12-09 01:42:22.801831 964964743 \n", "1102 catalog 2019-12-09 01:42:23.617764 964964743 \n", "1103 product2 2019-12-09 01:42:56.877340 964964743 \n", "1104 catalog 2019-12-09 01:43:05.436223 964964743 \n", "1105 catalog 2019-12-09 01:43:36.923178 964964743 \n", "1106 product2 2019-12-09 01:43:41.174195 964964743 \n", "1107 cart 2019-12-09 01:43:57.325569 964964743 \n", "1108 delivery_choice 2019-12-09 01:43:57.486518 964964743 \n", "1109 delivery_pickup 2019-12-09 01:43:57.766850 964964743 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 964964743]" ] }, { "cell_type": "markdown", "id": "e0acdec5", "metadata": { "id": "7NGDhhJVPLX5" }, "source": [ "#### TruncatePaths" ] }, { "cell_type": "code", "execution_count": 36, "id": "5256a8d0", "metadata": { "executionInfo": { "elapsed": 6136, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "8eb2156a", "tags": [] }, "outputs": [], "source": [ "res = stream.truncate_paths(\n", " drop_before='cart',\n", " shift_before=-2\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 37, "id": "62a47ef7", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 551 }, "executionInfo": { "elapsed": 30, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "3245a6fe", "outputId": "2bef0472-9df1-4e17-e2f3-51c39eb4d5db", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890
19ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890
2a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890
3ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890
16359735f8a7-baac-4a90-b390-9b378c0ed4e3raw2096main2019-12-06 16:22:57.484842219483890
1636d11c3f44-5472-46ba-83b9-b11d568be624raw2097catalog2019-12-06 16:23:01.331109219483890
1637562884ac-4ad4-4773-b22d-852efb4d7961raw2098catalog2019-12-06 16:23:48.116617219483890
35536a095bf1-f34c-4ba9-b933-9a470e28d4f4raw4542main2020-01-06 22:10:13.635011219483890
355411705e29-00ce-4da3-9c4d-6bc556a0acadraw4543catalog2020-01-06 22:10:15.228575219483890
35553648646a-8797-4563-9d2f-6ff2dc8290fbraw4544cart2020-01-06 22:10:42.309028219483890
3556fdda68c6-d6f7-4d5e-9c80-2c4fcae57850raw4545catalog2020-01-06 22:10:52.255859219483890
3557d8c9a53e-44b6-4054-a878-358d58e3588fraw4546product12020-01-06 22:11:01.709800219483890
3558d3c84c18-0465-4a37-b2f5-ca5529faa0eeraw4547catalog2020-01-06 22:11:02.899490219483890
3559e70ef300-f288-49b9-b079-f4c8f1e1216craw4548catalog2020-01-06 22:11:28.271366219483890
66351a349c5e-a8bd-4d99-8b5b-5095a3da809braw8215main2020-02-14 21:04:49.450696219483890
6636f0983ad4-e199-4ced-bd98-acf161ef60b5raw8216catalog2020-02-14 21:04:51.717127219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 catalog \n", "1 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 product1 \n", "2 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 cart \n", "3 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 catalog \n", "1635 9735f8a7-baac-4a90-b390-9b378c0ed4e3 raw 2096 main \n", "1636 d11c3f44-5472-46ba-83b9-b11d568be624 raw 2097 catalog \n", "1637 562884ac-4ad4-4773-b22d-852efb4d7961 raw 2098 catalog \n", "3553 6a095bf1-f34c-4ba9-b933-9a470e28d4f4 raw 4542 main \n", "3554 11705e29-00ce-4da3-9c4d-6bc556a0acad raw 4543 catalog \n", "3555 3648646a-8797-4563-9d2f-6ff2dc8290fb raw 4544 cart \n", "3556 fdda68c6-d6f7-4d5e-9c80-2c4fcae57850 raw 4545 catalog \n", "3557 d8c9a53e-44b6-4054-a878-358d58e3588f raw 4546 product1 \n", "3558 d3c84c18-0465-4a37-b2f5-ca5529faa0ee raw 4547 catalog \n", "3559 e70ef300-f288-49b9-b079-f4c8f1e1216c raw 4548 catalog \n", "6635 1a349c5e-a8bd-4d99-8b5b-5095a3da809b raw 8215 main \n", "6636 f0983ad4-e199-4ced-bd98-acf161ef60b5 raw 8216 catalog \n", "\n", " timestamp user_id \n", "0 2019-11-01 17:59:13.273932 219483890 \n", "1 2019-11-01 17:59:28.459271 219483890 \n", "2 2019-11-01 17:59:29.502214 219483890 \n", "3 2019-11-01 17:59:32.557029 219483890 \n", "1635 2019-12-06 16:22:57.484842 219483890 \n", "1636 2019-12-06 16:23:01.331109 219483890 \n", "1637 2019-12-06 16:23:48.116617 219483890 \n", "3553 2020-01-06 22:10:13.635011 219483890 \n", "3554 2020-01-06 22:10:15.228575 219483890 \n", "3555 2020-01-06 22:10:42.309028 219483890 \n", "3556 2020-01-06 22:10:52.255859 219483890 \n", "3557 2020-01-06 22:11:01.709800 219483890 \n", "3558 2020-01-06 22:11:02.899490 219483890 \n", "3559 2020-01-06 22:11:28.271366 219483890 \n", "6635 2020-02-14 21:04:49.450696 219483890 \n", "6636 2020-02-14 21:04:51.717127 219483890 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "code", "execution_count": 38, "id": "eabff395", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 174 }, "executionInfo": { "elapsed": 27, "status": "ok", "timestamp": 1683202263075, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "c4b22d33", "outputId": "d9188c02-2681-47e5-d9c2-a4218e856890", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
321656c2bc-d027-4e9e-9da1-a097cab08200raw60main2019-11-02 07:28:07.28554124427596
33f0252960-28da-488b-a9c6-49b0adc135e7raw61catalog2019-11-02 07:28:14.31985024427596
34fee2af15-23b8-4274-951c-3f75447a50c3raw62catalog2019-11-02 07:29:08.30133324427596
35499ca4b5-f897-4043-a9bb-676a633e066braw63catalog2019-11-02 07:29:41.84839624427596
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "32 1656c2bc-d027-4e9e-9da1-a097cab08200 raw 60 main \n", "33 f0252960-28da-488b-a9c6-49b0adc135e7 raw 61 catalog \n", "34 fee2af15-23b8-4274-951c-3f75447a50c3 raw 62 catalog \n", "35 499ca4b5-f897-4043-a9bb-676a633e066b raw 63 catalog \n", "\n", " timestamp user_id \n", "32 2019-11-02 07:28:07.285541 24427596 \n", "33 2019-11-02 07:28:14.319850 24427596 \n", "34 2019-11-02 07:29:08.301333 24427596 \n", "35 2019-11-02 07:29:41.848396 24427596 " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 24427596]" ] }, { "cell_type": "code", "execution_count": 39, "id": "a80b552c", "metadata": { "executionInfo": { "elapsed": 12140, "status": "ok", "timestamp": 1683202275190, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "1c967d10", "tags": [] }, "outputs": [], "source": [ "res = stream.truncate_paths(\n", " drop_after='cart',\n", " occurrence_after=\"last\"\n", " ).to_dataframe()" ] }, { "cell_type": "code", "execution_count": 40, "id": "cb92283f", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 363 }, "executionInfo": { "elapsed": 33, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "cf8aa845", "outputId": "2098d943-6bb5-4e5e-a817-c452359cfaf7", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
0ecec01ad-6f60-46b5-b125-69a7ca27a685raw0catalog2019-11-01 17:59:13.273932219483890
19ff63036-fac6-435a-85f4-b0d5a54c557fraw1product12019-11-01 17:59:28.459271219483890
2a3b37935-dd30-4edb-9328-12e6f87ac8bcraw2cart2019-11-01 17:59:29.502214219483890
3ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9raw3catalog2019-11-01 17:59:32.557029219483890
17739735f8a7-baac-4a90-b390-9b378c0ed4e3raw2096main2019-12-06 16:22:57.484842219483890
1774d11c3f44-5472-46ba-83b9-b11d568be624raw2097catalog2019-12-06 16:23:01.331109219483890
1775562884ac-4ad4-4773-b22d-852efb4d7961raw2098catalog2019-12-06 16:23:48.116617219483890
38626a095bf1-f34c-4ba9-b933-9a470e28d4f4raw4542main2020-01-06 22:10:13.635011219483890
386311705e29-00ce-4da3-9c4d-6bc556a0acadraw4543catalog2020-01-06 22:10:15.228575219483890
38643648646a-8797-4563-9d2f-6ff2dc8290fbraw4544cart2020-01-06 22:10:42.309028219483890
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "0 ecec01ad-6f60-46b5-b125-69a7ca27a685 raw 0 catalog \n", "1 9ff63036-fac6-435a-85f4-b0d5a54c557f raw 1 product1 \n", "2 a3b37935-dd30-4edb-9328-12e6f87ac8bc raw 2 cart \n", "3 ce686a0c-a329-4159-8e1e-5ed6e1f1f0d9 raw 3 catalog \n", "1773 9735f8a7-baac-4a90-b390-9b378c0ed4e3 raw 2096 main \n", "1774 d11c3f44-5472-46ba-83b9-b11d568be624 raw 2097 catalog \n", "1775 562884ac-4ad4-4773-b22d-852efb4d7961 raw 2098 catalog \n", "3862 6a095bf1-f34c-4ba9-b933-9a470e28d4f4 raw 4542 main \n", "3863 11705e29-00ce-4da3-9c4d-6bc556a0acad raw 4543 catalog \n", "3864 3648646a-8797-4563-9d2f-6ff2dc8290fb raw 4544 cart \n", "\n", " timestamp user_id \n", "0 2019-11-01 17:59:13.273932 219483890 \n", "1 2019-11-01 17:59:28.459271 219483890 \n", "2 2019-11-01 17:59:29.502214 219483890 \n", "3 2019-11-01 17:59:32.557029 219483890 \n", "1773 2019-12-06 16:22:57.484842 219483890 \n", "1774 2019-12-06 16:23:01.331109 219483890 \n", "1775 2019-12-06 16:23:48.116617 219483890 \n", "3862 2020-01-06 22:10:13.635011 219483890 \n", "3863 2020-01-06 22:10:15.228575 219483890 \n", "3864 2020-01-06 22:10:42.309028 219483890 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 219483890]" ] }, { "cell_type": "markdown", "id": "cedba87b", "metadata": { "id": "3e7ed632" }, "source": [ "### Editing processors" ] }, { "cell_type": "markdown", "id": "9f52f455", "metadata": { "id": "SJkjuACbWoPJ" }, "source": [ "#### GroupEvents" ] }, { "cell_type": "markdown", "id": "45320158", "metadata": { "id": "a3ddebb9" }, "source": [ "With ``GroupEvents``, we can group events based on the event name. Suppose\n", "we need to assign a common name ``product`` to events ``product1`` and\n", "``product2``:" ] }, { "cell_type": "code", "execution_count": 41, "id": "c7b5fad5", "metadata": { "executionInfo": { "elapsed": 29, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "206ac0da", "tags": [] }, "outputs": [], "source": [ "def group_events(df, schema):\n", " events_to_group = ['product1', 'product2']\n", " return df[schema.event_name].isin(events_to_group)\n", "\n", "params = {\n", " 'event_name': 'product',\n", " 'func': group_events\n", "}\n", "\n", "res = stream.group_events(**params).to_dataframe()" ] }, { "cell_type": "markdown", "id": "f54324fc", "metadata": { "id": "1tAdSeRbdwZY" }, "source": [ "As we can see, user ``456870964`` now has two ``product`` events\n", "(``event_index=160, 164``) with ``event_type=‘group_alias’``)." ] }, { "cell_type": "code", "execution_count": 42, "id": "ee93a2dc", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "executionInfo": { "elapsed": 28, "status": "ok", "timestamp": 1683202275191, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "0B4vS0xdd1s1", "outputId": "921e7162-d5b0-488c-bf14-68cc39528be5", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
129d1b41a49-5c44-4d43-a31a-d0e25728728fraw129catalog2019-11-03 11:46:55.411714456870964
130dc441606-69cd-4e37-8094-200c05ac14a6raw130catalog2019-11-03 11:47:46.131302456870964
131923d1811-49b7-486b-87fd-f5a918e118bdraw131catalog2019-11-03 11:47:58.401143456870964
132f72ded4c-18c4-4bdb-9873-88e88a3f27b1group_alias132product2019-11-03 11:48:43.243587456870964
1335d2933e7-ade5-4e19-bc90-097e7f959856raw133cart2019-11-03 11:49:17.050519456870964
134aea2427f-ce2e-4046-8627-3705cfbc0d6braw134catalog2019-11-03 11:49:17.516398456870964
1356c75865f-4a7c-4079-819e-829ec93e364bgroup_alias135product2019-11-03 11:49:28.927721456870964
136c51d9a7c-7e1c-4f30-bbd7-d99c9b4d581craw136catalog2019-11-03 11:49:30.788195456870964
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "129 d1b41a49-5c44-4d43-a31a-d0e25728728f raw 129 catalog \n", "130 dc441606-69cd-4e37-8094-200c05ac14a6 raw 130 catalog \n", "131 923d1811-49b7-486b-87fd-f5a918e118bd raw 131 catalog \n", "132 f72ded4c-18c4-4bdb-9873-88e88a3f27b1 group_alias 132 product \n", "133 5d2933e7-ade5-4e19-bc90-097e7f959856 raw 133 cart \n", "134 aea2427f-ce2e-4046-8627-3705cfbc0d6b raw 134 catalog \n", "135 6c75865f-4a7c-4079-819e-829ec93e364b group_alias 135 product \n", "136 c51d9a7c-7e1c-4f30-bbd7-d99c9b4d581c raw 136 catalog \n", "\n", " timestamp user_id \n", "129 2019-11-03 11:46:55.411714 456870964 \n", "130 2019-11-03 11:47:46.131302 456870964 \n", "131 2019-11-03 11:47:58.401143 456870964 \n", "132 2019-11-03 11:48:43.243587 456870964 \n", "133 2019-11-03 11:49:17.050519 456870964 \n", "134 2019-11-03 11:49:17.516398 456870964 \n", "135 2019-11-03 11:49:28.927721 456870964 \n", "136 2019-11-03 11:49:30.788195 456870964 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 456870964]" ] }, { "cell_type": "markdown", "id": "b5408068", "metadata": { "id": "vjrKADA9ecuY" }, "source": [ "Previously, both events were named\n", "``product1`` and ``product2`` and had ``raw`` event types:" ] }, { "cell_type": "code", "execution_count": 43, "id": "35576635", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 300 }, "executionInfo": { "elapsed": 28, "status": "ok", "timestamp": 1683202275192, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "BzLnXqL3d9LM", "outputId": "d0667464-05e9-418e-a56c-aee213afbd3b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
129d1b41a49-5c44-4d43-a31a-d0e25728728fraw129catalog2019-11-03 11:46:55.411714456870964
130dc441606-69cd-4e37-8094-200c05ac14a6raw130catalog2019-11-03 11:47:46.131302456870964
131923d1811-49b7-486b-87fd-f5a918e118bdraw131catalog2019-11-03 11:47:58.401143456870964
132f72ded4c-18c4-4bdb-9873-88e88a3f27b1raw132product12019-11-03 11:48:43.243587456870964
1335d2933e7-ade5-4e19-bc90-097e7f959856raw133cart2019-11-03 11:49:17.050519456870964
134aea2427f-ce2e-4046-8627-3705cfbc0d6braw134catalog2019-11-03 11:49:17.516398456870964
1356c75865f-4a7c-4079-819e-829ec93e364braw135product22019-11-03 11:49:28.927721456870964
136c51d9a7c-7e1c-4f30-bbd7-d99c9b4d581craw136catalog2019-11-03 11:49:30.788195456870964
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "129 d1b41a49-5c44-4d43-a31a-d0e25728728f raw 129 catalog \n", "130 dc441606-69cd-4e37-8094-200c05ac14a6 raw 130 catalog \n", "131 923d1811-49b7-486b-87fd-f5a918e118bd raw 131 catalog \n", "132 f72ded4c-18c4-4bdb-9873-88e88a3f27b1 raw 132 product1 \n", "133 5d2933e7-ade5-4e19-bc90-097e7f959856 raw 133 cart \n", "134 aea2427f-ce2e-4046-8627-3705cfbc0d6b raw 134 catalog \n", "135 6c75865f-4a7c-4079-819e-829ec93e364b raw 135 product2 \n", "136 c51d9a7c-7e1c-4f30-bbd7-d99c9b4d581c raw 136 catalog \n", "\n", " timestamp user_id \n", "129 2019-11-03 11:46:55.411714 456870964 \n", "130 2019-11-03 11:47:46.131302 456870964 \n", "131 2019-11-03 11:47:58.401143 456870964 \n", "132 2019-11-03 11:48:43.243587 456870964 \n", "133 2019-11-03 11:49:17.050519 456870964 \n", "134 2019-11-03 11:49:17.516398 456870964 \n", "135 2019-11-03 11:49:28.927721 456870964 \n", "136 2019-11-03 11:49:30.788195 456870964 " ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe().query('user_id == 456870964')" ] }, { "cell_type": "markdown", "id": "4d87c941", "metadata": { "id": "4TsI3HFLg7Z_" }, "source": [ "#### CollapseLoops" ] }, { "cell_type": "code", "execution_count": 44, "id": "b3c8e7a7", "metadata": { "executionInfo": { "elapsed": 672, "status": "ok", "timestamp": 1683202275838, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "45a48d47", "tags": [] }, "outputs": [], "source": [ "res = stream.collapse_loops(suffix='loop', time_agg='max').to_dataframe()" ] }, { "cell_type": "code", "execution_count": 45, "id": "de01d74d", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 206 }, "executionInfo": { "elapsed": 454, "status": "ok", "timestamp": 1683202276288, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "LYnmqr3MjcnL", "outputId": "3072b66c-2638-4a1b-b338-b747fa61d2be", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
3327a52bf0f9-7e72-4bb6-b983-39e5dfb45f04raw3327main2019-12-24 12:58:04.8912492112338
332835ab1e14-c157-4a89-8a77-082849779902raw3328catalog2019-12-24 12:58:08.0969232112338
33296ad16457-0d77-4799-a41a-b1f13d2ea2b2raw3329catalog2019-12-24 12:58:16.4295522112338
3330e4d56465-89c5-4baa-a5e5-949311f3b5daraw3330catalog2019-12-24 12:58:44.9651042112338
333108d2acdf-bda7-443e-90b3-893c77d09f3craw3331main2019-12-24 12:58:52.9848532112338
\n", "
" ], "text/plain": [ " event_id event_type event_index event \\\n", "3327 a52bf0f9-7e72-4bb6-b983-39e5dfb45f04 raw 3327 main \n", "3328 35ab1e14-c157-4a89-8a77-082849779902 raw 3328 catalog \n", "3329 6ad16457-0d77-4799-a41a-b1f13d2ea2b2 raw 3329 catalog \n", "3330 e4d56465-89c5-4baa-a5e5-949311f3b5da raw 3330 catalog \n", "3331 08d2acdf-bda7-443e-90b3-893c77d09f3c raw 3331 main \n", "\n", " timestamp user_id \n", "3327 2019-12-24 12:58:04.891249 2112338 \n", "3328 2019-12-24 12:58:08.096923 2112338 \n", "3329 2019-12-24 12:58:16.429552 2112338 \n", "3330 2019-12-24 12:58:44.965104 2112338 \n", "3331 2019-12-24 12:58:52.984853 2112338 " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stream.to_dataframe().query('user_id == 2112338')" ] }, { "cell_type": "code", "execution_count": 46, "id": "b0880e7c", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 10, "status": "ok", "timestamp": 1683202276289, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "u5jHmoWejF4P", "outputId": "c8626cad-9de9-46aa-873c-176c2f13604b", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
2626a52bf0f9-7e72-4bb6-b983-39e5dfb45f04raw3327main2019-12-24 12:58:04.8912492112338
2627ea1ae9c2-56eb-40f2-93c0-56c3c7221b8fgroup_alias3330catalog_loop2019-12-24 12:58:44.9651042112338
262808d2acdf-bda7-443e-90b3-893c77d09f3craw3331main2019-12-24 12:58:52.9848532112338
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "2626 a52bf0f9-7e72-4bb6-b983-39e5dfb45f04 raw 3327 \n", "2627 ea1ae9c2-56eb-40f2-93c0-56c3c7221b8f group_alias 3330 \n", "2628 08d2acdf-bda7-443e-90b3-893c77d09f3c raw 3331 \n", "\n", " event timestamp user_id \n", "2626 main 2019-12-24 12:58:04.891249 2112338 \n", "2627 catalog_loop 2019-12-24 12:58:44.965104 2112338 \n", "2628 main 2019-12-24 12:58:52.984853 2112338 " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res[res['user_id'] == 2112338]" ] }, { "cell_type": "code", "execution_count": 47, "id": "c77b5469", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 143 }, "executionInfo": { "elapsed": 868, "status": "ok", "timestamp": 1683202277150, "user": { "displayName": "Julia Ostanina", "userId": "13687663492290466770" }, "user_tz": -120 }, "id": "0ea8e922", "outputId": "ad6102bb-7f8a-42a9-a172-ade7edfba88c", "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
event_idevent_typeevent_indexeventtimestampuser_id
2636a52bf0f9-7e72-4bb6-b983-39e5dfb45f04raw3327main2019-12-24 12:58:04.8912490002112338
26376b3a8abc-fc3d-496c-8a1e-7b2c8fafc966group_alias3329catalog_loop_32019-12-24 12:58:23.1638597122112338
263808d2acdf-bda7-443e-90b3-893c77d09f3craw3331main2019-12-24 12:58:52.9848530002112338
\n", "
" ], "text/plain": [ " event_id event_type event_index \\\n", "2636 a52bf0f9-7e72-4bb6-b983-39e5dfb45f04 raw 3327 \n", "2637 6b3a8abc-fc3d-496c-8a1e-7b2c8fafc966 group_alias 3329 \n", "2638 08d2acdf-bda7-443e-90b3-893c77d09f3c raw 3331 \n", "\n", " event timestamp user_id \n", "2636 main 2019-12-24 12:58:04.891249000 2112338 \n", "2637 catalog_loop_3 2019-12-24 12:58:23.163859712 2112338 \n", "2638 main 2019-12-24 12:58:52.984853000 2112338 " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "params = {\n", " 'suffix': 'count',\n", " 'time_agg': 'mean'\n", "}\n", "\n", "res = stream.collapse_loops(**params).to_dataframe()\n", "res[res['user_id'] == 2112338]" ] }, { "cell_type": "code", "execution_count": null, "id": "47d804ef", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "colab": { "name": "", "provenance": [ { "file_id": "1QlOf2MtJ3lE9cTOlTtHkPY4npVLdxFhB", "timestamp": 1671649889781 } ], "toc_visible": true, "version": "" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 5 }