Source code for retentioneering.core.core_functions.plot_graph

# * Copyright (C) 2020 Maxim Godzi, Anatoly Zaytsev, Retentioneering Team
# * This Source Code Form is subject to the terms of the Retentioneering Software Non-Exclusive License (License)
# * By using, sharing or editing this code you agree with the License terms and conditions.
# * You can obtain License text at https://github.com/retentioneering/retentioneering-tools/blob/master/LICENSE.md

from retentioneering.visualization import draw_graph
from IPython.display import display, HTML


[docs]def plot_graph(self, *,
               targets={},
               weight_col=None,
               norm_type='full',
               layout_dump=None,
               width=800,
               height=500,
               thresh=0):
    """
    Create interactive graph visualization. Each node is a unique event_col
    value, edges are transitions between events and edge weights are calculated
    metrics. By default, it is a percentage of unique users that have passed
    though a particular edge visualized with the edge thickness. Node sizes are
    Graph loop is a transition to the same node, which may happen if users
    encountered multiple errors or made any action at least twice. Graph nodes
    are movable on canvas which helps to visualize user trajectories but is also
    a cumbersome process to place all the nodes so it forms a story.

    That is why IFrame object also has a download button. By pressing it, a JSON
    configuration file with all the node parameters is downloaded. It contains
    node names, their positions, relative sizes and types. It it used as
    layout_dump parameter for layout configuration. Finally, show weights
    toggle shows and hides edge weights.

    Parameters
    ----------
    norm_type: str (optional, default 'full')
        Type of normalization used to calculate weights for graph edges. Possible
        values are:
            * None
            * 'full'
            * 'node'

    weight_col: str (optional, default None)
        Aggregation column for edge weighting. If None, number of events will be
        calculated. For example, can be specified as `client_id` or `session_id`
        if dataframe has such columns.

    targets: dict (optional, default None)
        Event mapping describing which nodes or edges should be highlighted by
        different colors for better visualisation. Dictionary keys are event_col
        values, while keys have the following possible values:
        Example: {'lost': 'red', 'purchased': 'green', 'main': 'source'}

    thresh: float (optional, default 0.01)
        Minimal edge weight value to be rendered on a graph. If a node has no
        edges of the weight >= thresh, then it is not shown on a graph. It
        is used to filter out rare event and not to clutter visualization. Nodes
        specified in targets parameter will be always shown regardless selected
        threshold.

    layout_dump: str (optional, default None)
        Path to layout configuration file relative to current directory. If
        defined, uses configuration file as a graph layout.

    width: int (optional, default 800)
        Width of plot in pixels.

    height: int (optional, default 500)
        Height of plot in pixels.

    Returns
    -------
    Plots IFrame graph of width and height size.
    Saves webpage with JS graph visualization to
    retention_config.experiments_folder.

    Return type
    -----------
    Renders IFrame object and saves graph visualization as HTML in
    experiments_folder of retention_config.
    """

    event_col = self.retention_config['event_col']

    # TODO: change downstream processing
    if targets is not None:
        for k, v in targets.items():
            if v == 'red':
                v = 'bad_target'
            if v == 'green':
                v = 'nice_target'
            targets[k] = v

    node_weights = self._obj[event_col].value_counts().to_dict()
    data = self.get_edgelist(weight_col=weight_col,
                             norm_type=norm_type)

    path = draw_graph.graph(data,
                            node_params=targets,
                            node_weights=node_weights,
                            layout_dump=layout_dump,
                            width=width,
                            height=height,
                            thresh=thresh)

    # if work from google colab user HTML display:
    try:
        import google.colab
        display(HTML(path))
    except:
        pass

    return path