Contents

Time Series Plots

Contents

Time Series Plots#

# Configuration and import
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

sns.set_style("darkgrid")
sns.set(rc={'figure.figsize':(15, 5)})

# Read genereated signals
df = pd.concat(
    [
        pd.read_parquet("./data/output/asda.parquet"),
        pd.read_parquet("./data/output/sainsburys.parquet"),
        pd.read_parquet("./data/output/waitrose.parquet"),
        pd.read_parquet("./data/output/tesco.parquet"),
        pd.read_parquet("./data/output/morrison.parquet"),
        pd.read_parquet("./data/output/lidl.parquet"),
    ],
    axis=1,
)
# Apply rolling median to not make graph super noisy
df = df.rolling('7D').median()

# Massage data

df_unstacked = (
    df.unstack().rename("average_visit_time").reset_index(level=1).rename_axis("brand").reset_index()
)
df_market_share = (
    (df.div(df.sum(axis=1), axis=0) * 100)
    .unstack()
    .rename("market_share")
    .reset_index(level=1)
    .rename_axis("brand")
    .reset_index()
)

# Some helper plot functions
def plot_visit_time_per_capita(df):
    sns.lineplot(df, hue="brand", x="datestamp", y="average_visit_time")
    fig = plt.gcf()
    fig.autofmt_xdate(rotation=20)
    ax = plt.gca()
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.title("Visit time per panel capita")
    plt.ylabel("Visit time per panel capita \n (minutes per panel population)")
    plt.xlabel("Date")
    plt.ylim(0, 0.8)


def plot_market_share(df, ylim_max: int = 45):
    sns.lineplot(df, hue="brand", x="datestamp", y="market_share")
    fig = plt.gcf()
    fig.autofmt_xdate(rotation=20)
    ax = plt.gca()
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.title("Market share by visit time per panel capita")
    plt.ylabel("Market Share (%)")
    plt.xlabel("Date")
    plt.ylim(0, ylim_max)


def plot_market_share_pct_change(df):
    df_monthly = (
        df.groupby(
            [
                "brand",
                (
                    (
                        df["datestamp"]
                        # NOTE: Add one day if month start because month begin date offset will shift to previous
                        #       month otherwise.
                        + pd.Timedelta("1D") * df["datestamp"].dt.is_month_start
                    )
                    - pd.offsets.MonthBegin()
                ),
            ]
        )["market_share"]
        .mean()
        .reset_index()
    )

    df_monthly_market_share_merged = pd.merge(
        df_monthly.assign(cmp_datestamp=df_monthly["datestamp"].apply(lambda x: x.replace(year=x.year + 1))),
        df_monthly,
        how="inner",
        left_on=["cmp_datestamp", "brand"],
        right_on=["datestamp", "brand"],
        suffixes=("_current", "_next"),
    )

    df_monthly_market_share_merged["market_share_pct_change"] = (
        df_monthly_market_share_merged["market_share_next"]
        / df_monthly_market_share_merged["market_share_current"]
        - 1
    ) * 100

    sns.lineplot(df_monthly_market_share_merged, hue="brand", x="datestamp_next", y="market_share_pct_change")
    fig = plt.gcf()
    fig.autofmt_xdate(rotation=20)
    ax = plt.gca()
    ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
    plt.title("YoY Change of market share by visit time per panel capita")
    plt.ylabel("Market Share change (%)")
    plt.xlabel("Date")

Visit time per capita for all 6 brands#

Let’s start with a simple plot of all time series.

plot_visit_time_per_capita(df_unstacked)

_images/b003b-time-series-plot_6_0.png

Spikes before the new year correspond to shopping behaviour prior to Christmas, this is somewhat expected.

Visit time per capita for Sainsbury’s, Tesco and Morrison#

We pick the three supermarket brands that should have similar “spend per unit time” for further inspection.

# Filter for Sainsbury's, Tesco and Morrison and massage data
df_stm = df[["sainsburys", "tesco", "morrison"]]

df_stm_unstacked = (
    df_stm.unstack().rename("average_visit_time").reset_index(level=1).rename_axis("brand").reset_index()
)
df_stm_market_share = (
    (df_stm.div(df_stm.sum(axis=1), axis=0) * 100)
    .unstack()
    .rename("market_share")
    .reset_index(level=1)
    .rename_axis("brand")
    .reset_index()
)

plot_visit_time_per_capita(df_stm_unstacked)

_images/b003b-time-series-plot_15_0.png

No additional remarks to be made.