{ "cells": [ { "cell_type": "markdown", "id": "d9de8716", "metadata": { "colab_type": "text", "id": "view-in-github" }, "source": [ "# Gamma-gamma Model\n", "\n", "In this notebook we show how to fit a Gamma-Gamma model in PyMC-Marketing. We compare the results with the [`lifetimes`](https://github.com/CamDavidsonPilon/lifetimes) package (no longer maintained and last meaningful update was July 2020). The model is presented in the paper: Fader, P. S., & Hardie, B. G. (2013). [The Gamma-Gamma model of monetary value](http://www.brucehardie.com/notes/025/gamma_gamma.pdf). February, 2, 1-9." ] }, { "cell_type": "markdown", "id": "a579696d", "metadata": {}, "source": [ "## Prepare Notebook" ] }, { "cell_type": "code", "execution_count": 11, "id": "813aa3e6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "import arviz as az\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "from lifetimes import GammaGammaFitter\n", "\n", "from pymc_marketing import clv\n", "\n", "# Plotting configuration\n", "az.style.use(\"arviz-darkgrid\")\n", "plt.rcParams[\"figure.figsize\"] = [10, 6]\n", "plt.rcParams[\"figure.dpi\"] = 100\n", "plt.rcParams[\"figure.facecolor\"] = \"white\"\n", "\n", "%load_ext autoreload\n", "%autoreload 2\n", "%config InlineBackend.figure_format = \"retina\"" ] }, { "cell_type": "markdown", "id": "b4e9df33", "metadata": {}, "source": [ "## Load Data\n", "\n", "We start by loading the `CDNOW` dataset." ] }, { "cell_type": "code", "execution_count": 21, "id": "4039ce96", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | frequency | \n", "recency | \n", "T | \n", "monetary_value | \n", "customer_id | \n", "
|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "30.43 | \n", "38.86 | \n", "22.35 | \n", "0 | \n", "
| 1 | \n", "1 | \n", "1.71 | \n", "38.86 | \n", "11.77 | \n", "1 | \n", "
| 2 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "2 | \n", "
| 3 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "3 | \n", "
| 4 | \n", "0 | \n", "0.00 | \n", "38.86 | \n", "0.00 | \n", "4 | \n", "
| \n", " | frequency | \n", "recency | \n", "T | \n", "monetary_value | \n", "customer_id | \n", "
|---|---|---|---|---|---|
| 0 | \n", "2 | \n", "30.43 | \n", "38.86 | \n", "22.35 | \n", "0 | \n", "
| 1 | \n", "1 | \n", "1.71 | \n", "38.86 | \n", "11.77 | \n", "1 | \n", "
| 5 | \n", "7 | \n", "29.43 | \n", "38.86 | \n", "73.74 | \n", "5 | \n", "
| 6 | \n", "1 | \n", "5.00 | \n", "38.86 | \n", "11.77 | \n", "6 | \n", "
| 8 | \n", "2 | \n", "35.71 | \n", "38.86 | \n", "25.55 | \n", "8 | \n", "
| \n", " | monetary_value | \n", "frequency | \n", "
|---|---|---|
| monetary_value | \n", "1.000000 | \n", "0.113884 | \n", "
| frequency | \n", "0.113884 | \n", "1.000000 | \n", "
| \n", " | coef | \n", "se(coef) | \n", "lower 95% bound | \n", "upper 95% bound | \n", "
|---|---|---|---|---|
| p | \n", "6.248802 | \n", "1.189687 | \n", "3.917016 | \n", "8.580589 | \n", "
| q | \n", "3.744588 | \n", "0.290166 | \n", "3.175864 | \n", "4.313313 | \n", "
| v | \n", "15.447748 | \n", "4.159994 | \n", "7.294160 | \n", "23.601336 | \n", "
\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"idata_map = model.fit(fit_method=\"map\").posterior.to_dataframe()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "b8f11643",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"| \n", " | \n", " | p | \n", "q | \n", "v | \n", "
|---|---|---|---|---|
| chain | \n", "draw | \n", "\n", " | \n", " | \n", " |
| 0 | \n", "0 | \n", "6.248787 | \n", "3.744591 | \n", "15.447813 | \n", "
\n",
"\n"
],
"text/plain": [
"\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 9 seconds.\n"
]
}
],
"source": [
"sampler_kwargs = {\n",
" \"draws\": 2_000,\n",
" \"target_accept\": 0.9,\n",
" \"chains\": 4,\n",
" \"random_seed\": 42,\n",
"}\n",
"\n",
"idata_mcmc = model.fit(**sampler_kwargs)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"id": "52c3b00e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <xarray.Dataset> Size: 208kB\n",
"Dimensions: (chain: 4, draw: 2000)\n",
"Coordinates:\n",
" * chain (chain) int64 32B 0 1 2 3\n",
" * draw (draw) int64 16kB 0 1 2 3 4 5 6 ... 1994 1995 1996 1997 1998 1999\n",
"Data variables:\n",
" p (chain, draw) float64 64kB 5.714 5.668 6.494 ... 5.196 6.557 5.684\n",
" q (chain, draw) float64 64kB 4.1 4.145 3.842 ... 4.032 3.729 3.962\n",
" v (chain, draw) float64 64kB 18.86 18.47 15.19 ... 20.83 14.82 18.38\n",
"Attributes:\n",
" created_at: 2024-06-26T12:36:52.420774+00:00\n",
" arviz_version: 0.18.0\n",
" inference_library: pymc\n",
" inference_library_version: 5.13.0\n",
" sampling_time: 8.762557983398438\n",
" tuning_steps: 1000<xarray.Dataset> Size: 992kB\n",
"Dimensions: (chain: 4, draw: 2000)\n",
"Coordinates:\n",
" * chain (chain) int64 32B 0 1 2 3\n",
" * draw (draw) int64 16kB 0 1 2 3 4 ... 1996 1997 1998 1999\n",
"Data variables: (12/17)\n",
" acceptance_rate (chain, draw) float64 64kB 1.0 0.3785 ... 0.9421\n",
" diverging (chain, draw) bool 8kB False False ... False False\n",
" energy (chain, draw) float64 64kB 4.051e+03 ... 4.052e+03\n",
" energy_error (chain, draw) float64 64kB -0.03456 ... -0.0594\n",
" index_in_trajectory (chain, draw) int64 64kB -1 -2 -16 -2 ... 27 19 19 23\n",
" largest_eigval (chain, draw) float64 64kB nan nan nan ... nan nan\n",
" ... ...\n",
" process_time_diff (chain, draw) float64 64kB 0.000166 ... 0.003844\n",
" reached_max_treedepth (chain, draw) bool 8kB False False ... False False\n",
" smallest_eigval (chain, draw) float64 64kB nan nan nan ... nan nan\n",
" step_size (chain, draw) float64 64kB 0.0635 0.0635 ... 0.06346\n",
" step_size_bar (chain, draw) float64 64kB 0.06791 ... 0.05799\n",
" tree_depth (chain, draw) int64 64kB 1 2 6 3 3 6 ... 4 5 7 5 6 6\n",
"Attributes:\n",
" created_at: 2024-06-26T12:36:52.430582+00:00\n",
" arviz_version: 0.18.0\n",
" inference_library: pymc\n",
" inference_library_version: 5.13.0\n",
" sampling_time: 8.762557983398438\n",
" tuning_steps: 1000<xarray.Dataset> Size: 45kB\n",
"Dimensions: (index: 946)\n",
"Coordinates:\n",
" * index (index) int64 8kB 0 1 5 6 8 10 ... 2347 2348 2349 2353 2355\n",
"Data variables:\n",
" frequency (index) int64 8kB 2 1 7 1 2 5 10 1 3 2 ... 1 2 1 2 7 1 2 5 4\n",
" recency (index) float64 8kB 30.43 1.71 29.43 ... 21.86 24.29 26.57\n",
" T (index) float64 8kB 38.86 38.86 38.86 ... 27.0 27.0 27.0\n",
" monetary_value (index) float64 8kB 22.35 11.77 73.74 ... 18.56 44.93 33.32\n",
" customer_id (index) int64 8kB 0 1 5 6 8 10 ... 2347 2348 2349 2353 2355| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "mcse_mean | \n", "mcse_sd | \n", "ess_bulk | \n", "ess_tail | \n", "r_hat | \n", "
|---|---|---|---|---|---|---|---|---|---|
| p | \n", "6.448 | \n", "1.340 | \n", "4.248 | \n", "9.027 | \n", "0.034 | \n", "0.024 | \n", "1559.0 | \n", "1871.0 | \n", "1.0 | \n", "
| q | \n", "3.775 | \n", "0.298 | \n", "3.238 | \n", "4.347 | \n", "0.007 | \n", "0.005 | \n", "1610.0 | \n", "2231.0 | \n", "1.0 | \n", "
| v | \n", "15.928 | \n", "4.292 | \n", "8.306 | \n", "24.053 | \n", "0.113 | \n", "0.080 | \n", "1450.0 | \n", "1775.0 | \n", "1.0 | \n", "
| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "
|---|---|---|---|---|
| x[0] | \n", "24.688 | \n", "0.516 | \n", "23.718 | \n", "25.651 | \n", "
| x[1] | \n", "18.949 | \n", "1.331 | \n", "16.300 | \n", "21.284 | \n", "
| x[2] | \n", "35.183 | \n", "0.912 | \n", "33.600 | \n", "37.005 | \n", "
| x[3] | \n", "35.183 | \n", "0.912 | \n", "33.600 | \n", "37.005 | \n", "
| x[4] | \n", "35.183 | \n", "0.912 | \n", "33.600 | \n", "37.005 | \n", "
| x[5] | \n", "71.402 | \n", "0.610 | \n", "70.245 | \n", "72.517 | \n", "
| x[6] | \n", "18.949 | \n", "1.331 | \n", "16.300 | \n", "21.284 | \n", "
| x[7] | \n", "35.183 | \n", "0.912 | \n", "33.600 | \n", "37.005 | \n", "
| x[8] | \n", "27.304 | \n", "0.395 | \n", "26.545 | \n", "28.035 | \n", "
| x[9] | \n", "35.183 | \n", "0.912 | \n", "33.600 | \n", "37.005 | \n", "
| \n", " | mean | \n", "sd | \n", "hdi_3% | \n", "hdi_97% | \n", "
|---|---|---|---|---|
| x | \n", "35.258 | \n", "0.619 | \n", "34.154 | \n", "36.469 | \n", "