|
5 | 5 | "execution_count": 1, |
6 | 6 | "id": "435ed203-5c3c-4efc-93d1-abac66ce7187", |
7 | 7 | "metadata": {}, |
8 | | - "outputs": [ |
9 | | - { |
10 | | - "name": "stderr", |
11 | | - "output_type": "stream", |
12 | | - "text": [ |
13 | | - "WARNING (pytensor.tensor.blas): Using NumPy C-API based implementation for BLAS functions.\n" |
14 | | - ] |
15 | | - } |
16 | | - ], |
| 8 | + "outputs": [], |
17 | 9 | "source": [ |
18 | 10 | "from pymc_marketing.clv import utils\n", |
19 | 11 | "\n", |
|
30 | 22 | }, |
31 | 23 | { |
32 | 24 | "cell_type": "code", |
33 | | - "execution_count": 69, |
| 25 | + "execution_count": 2, |
34 | 26 | "id": "7de7f396-1d5b-4457-916b-c29ed90aa132", |
35 | 27 | "metadata": {}, |
36 | 28 | "outputs": [], |
|
66 | 58 | }, |
67 | 59 | { |
68 | 60 | "cell_type": "code", |
69 | | - "execution_count": 70, |
| 61 | + "execution_count": 3, |
70 | 62 | "id": "932e8db6-78cf-49df-aa4a-83ee6584e5dd", |
71 | 63 | "metadata": {}, |
72 | 64 | "outputs": [ |
|
196 | 188 | "13 6 2015-02-02 True" |
197 | 189 | ] |
198 | 190 | }, |
199 | | - "execution_count": 70, |
| 191 | + "execution_count": 3, |
200 | 192 | "metadata": {}, |
201 | 193 | "output_type": "execute_result" |
202 | 194 | } |
|
223 | 215 | }, |
224 | 216 | { |
225 | 217 | "cell_type": "code", |
226 | | - "execution_count": 74, |
| 218 | + "execution_count": 4, |
227 | 219 | "id": "4c0a7de5-8825-40af-84e5-6cd0ad26a0e3", |
228 | 220 | "metadata": {}, |
229 | 221 | "outputs": [ |
|
259 | 251 | " <tr>\n", |
260 | 252 | " <th>0</th>\n", |
261 | 253 | " <td>1</td>\n", |
262 | | - " <td>1.0</td>\n", |
| 254 | + " <td>2.0</td>\n", |
263 | 255 | " <td>5.0</td>\n", |
264 | 256 | " <td>5.0</td>\n", |
265 | | - " <td>2.0</td>\n", |
| 257 | + " <td>1.5</td>\n", |
266 | 258 | " </tr>\n", |
267 | 259 | " <tr>\n", |
268 | 260 | " <th>1</th>\n", |
269 | 261 | " <td>2</td>\n", |
270 | | - " <td>0.0</td>\n", |
| 262 | + " <td>1.0</td>\n", |
271 | 263 | " <td>0.0</td>\n", |
272 | 264 | " <td>5.0</td>\n", |
273 | | - " <td>0.0</td>\n", |
| 265 | + " <td>2.0</td>\n", |
274 | 266 | " </tr>\n", |
275 | 267 | " <tr>\n", |
276 | 268 | " <th>2</th>\n", |
277 | 269 | " <td>3</td>\n", |
278 | | - " <td>1.0</td>\n", |
| 270 | + " <td>2.0</td>\n", |
279 | 271 | " <td>1.0</td>\n", |
280 | 272 | " <td>5.0</td>\n", |
281 | | - " <td>5.0</td>\n", |
| 273 | + " <td>4.5</td>\n", |
282 | 274 | " </tr>\n", |
283 | 275 | " <tr>\n", |
284 | 276 | " <th>3</th>\n", |
285 | 277 | " <td>4</td>\n", |
286 | | - " <td>1.0</td>\n", |
| 278 | + " <td>2.0</td>\n", |
287 | 279 | " <td>3.0</td>\n", |
288 | 280 | " <td>3.0</td>\n", |
289 | | - " <td>8.0</td>\n", |
| 281 | + " <td>7.0</td>\n", |
290 | 282 | " </tr>\n", |
291 | 283 | " <tr>\n", |
292 | 284 | " <th>4</th>\n", |
293 | 285 | " <td>5</td>\n", |
294 | | - " <td>0.0</td>\n", |
| 286 | + " <td>1.0</td>\n", |
295 | 287 | " <td>0.0</td>\n", |
296 | 288 | " <td>3.0</td>\n", |
297 | | - " <td>0.0</td>\n", |
| 289 | + " <td>12.0</td>\n", |
298 | 290 | " </tr>\n", |
299 | 291 | " </tbody>\n", |
300 | 292 | "</table>\n", |
301 | 293 | "</div>" |
302 | 294 | ], |
303 | 295 | "text/plain": [ |
304 | 296 | " customer_id frequency recency T monetary_value\n", |
305 | | - "0 1 1.0 5.0 5.0 2.0\n", |
306 | | - "1 2 0.0 0.0 5.0 0.0\n", |
307 | | - "2 3 1.0 1.0 5.0 5.0\n", |
308 | | - "3 4 1.0 3.0 3.0 8.0\n", |
309 | | - "4 5 0.0 0.0 3.0 0.0" |
| 297 | + "0 1 2.0 5.0 5.0 1.5\n", |
| 298 | + "1 2 1.0 0.0 5.0 2.0\n", |
| 299 | + "2 3 2.0 1.0 5.0 4.5\n", |
| 300 | + "3 4 2.0 3.0 3.0 7.0\n", |
| 301 | + "4 5 1.0 0.0 3.0 12.0" |
310 | 302 | ] |
311 | 303 | }, |
312 | | - "execution_count": 74, |
| 304 | + "execution_count": 4, |
313 | 305 | "metadata": {}, |
314 | 306 | "output_type": "execute_result" |
315 | 307 | } |
|
323 | 315 | " observation_period_end = \"2015-02-06\",\n", |
324 | 316 | " datetime_format = \"%Y-%m-%d\",\n", |
325 | 317 | " time_unit = \"W\",\n", |
326 | | - " include_first_transaction=False,\n", |
| 318 | + " include_first_transaction=True,\n", |
327 | 319 | ")\n", |
328 | 320 | "\n", |
329 | 321 | "rfm_df.head()" |
|
339 | 331 | }, |
340 | 332 | { |
341 | 333 | "cell_type": "code", |
342 | | - "execution_count": 76, |
| 334 | + "execution_count": 5, |
343 | 335 | "id": "761edfe9-1b69-4966-83bf-4f1242eda2d5", |
344 | 336 | "metadata": {}, |
345 | 337 | "outputs": [ |
|
450 | 442 | "4 0.0 5.0 " |
451 | 443 | ] |
452 | 444 | }, |
453 | | - "execution_count": 76, |
| 445 | + "execution_count": 5, |
454 | 446 | "metadata": {}, |
455 | 447 | "output_type": "execute_result" |
456 | 448 | } |
|
467 | 459 | "train_test.head()" |
468 | 460 | ] |
469 | 461 | }, |
| 462 | + { |
| 463 | + "cell_type": "markdown", |
| 464 | + "id": "73dc1b93-6a4f-4171-b838-30759b2c1e0e", |
| 465 | + "metadata": {}, |
| 466 | + "source": [ |
| 467 | + "`rfm_segments` will assign customer to segments based on their recency, frequency, and monetary value. It uses a quartile-based RFM score approach that is very computationally efficient, but defining custom segments is a rather subjective exercise. The returned dataframe also cannot be used for modeling because it does not zero out the initial transactions." |
| 468 | + ] |
| 469 | + }, |
470 | 470 | { |
471 | 471 | "cell_type": "code", |
472 | | - "execution_count": null, |
| 472 | + "execution_count": 40, |
473 | 473 | "id": "c7b3f800-8dfb-4e5a-b939-5f908281563c", |
474 | 474 | "metadata": {}, |
475 | 475 | "outputs": [], |
476 | | - "source": [] |
| 476 | + "source": [ |
| 477 | + "segments = utils.rfm_segments(\n", |
| 478 | + " test_data, \n", |
| 479 | + " customer_id_col = \"id\", \n", |
| 480 | + " datetime_col = \"date\", \n", |
| 481 | + " monetary_value_col = \"monetary_value\",\n", |
| 482 | + " observation_period_end = \"2015-02-06\",\n", |
| 483 | + " datetime_format = \"%Y-%m-%d\",\n", |
| 484 | + " time_unit = \"W\",\n", |
| 485 | + ")" |
| 486 | + ] |
| 487 | + }, |
| 488 | + { |
| 489 | + "cell_type": "code", |
| 490 | + "execution_count": 17, |
| 491 | + "id": "932ac4e5-361e-42fa-97d3-d8e508128944", |
| 492 | + "metadata": {}, |
| 493 | + "outputs": [ |
| 494 | + { |
| 495 | + "data": { |
| 496 | + "text/html": [ |
| 497 | + "<div>\n", |
| 498 | + "<style scoped>\n", |
| 499 | + " .dataframe tbody tr th:only-of-type {\n", |
| 500 | + " vertical-align: middle;\n", |
| 501 | + " }\n", |
| 502 | + "\n", |
| 503 | + " .dataframe tbody tr th {\n", |
| 504 | + " vertical-align: top;\n", |
| 505 | + " }\n", |
| 506 | + "\n", |
| 507 | + " .dataframe thead th {\n", |
| 508 | + " text-align: right;\n", |
| 509 | + " }\n", |
| 510 | + "</style>\n", |
| 511 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 512 | + " <thead>\n", |
| 513 | + " <tr style=\"text-align: right;\">\n", |
| 514 | + " <th></th>\n", |
| 515 | + " <th>customer_id</th>\n", |
| 516 | + " <th>frequency</th>\n", |
| 517 | + " <th>recency</th>\n", |
| 518 | + " <th>monetary_value</th>\n", |
| 519 | + " <th>segment</th>\n", |
| 520 | + " </tr>\n", |
| 521 | + " </thead>\n", |
| 522 | + " <tbody>\n", |
| 523 | + " <tr>\n", |
| 524 | + " <th>0</th>\n", |
| 525 | + " <td>1</td>\n", |
| 526 | + " <td>2.0</td>\n", |
| 527 | + " <td>0.0</td>\n", |
| 528 | + " <td>1.5</td>\n", |
| 529 | + " <td>Other</td>\n", |
| 530 | + " </tr>\n", |
| 531 | + " <tr>\n", |
| 532 | + " <th>1</th>\n", |
| 533 | + " <td>2</td>\n", |
| 534 | + " <td>1.0</td>\n", |
| 535 | + " <td>5.0</td>\n", |
| 536 | + " <td>2.0</td>\n", |
| 537 | + " <td>Inactive Customer</td>\n", |
| 538 | + " </tr>\n", |
| 539 | + " <tr>\n", |
| 540 | + " <th>2</th>\n", |
| 541 | + " <td>3</td>\n", |
| 542 | + " <td>2.0</td>\n", |
| 543 | + " <td>4.0</td>\n", |
| 544 | + " <td>4.5</td>\n", |
| 545 | + " <td>At Risk Customer</td>\n", |
| 546 | + " </tr>\n", |
| 547 | + " <tr>\n", |
| 548 | + " <th>3</th>\n", |
| 549 | + " <td>4</td>\n", |
| 550 | + " <td>2.0</td>\n", |
| 551 | + " <td>0.0</td>\n", |
| 552 | + " <td>7.0</td>\n", |
| 553 | + " <td>Top Spender</td>\n", |
| 554 | + " </tr>\n", |
| 555 | + " <tr>\n", |
| 556 | + " <th>4</th>\n", |
| 557 | + " <td>5</td>\n", |
| 558 | + " <td>1.0</td>\n", |
| 559 | + " <td>3.0</td>\n", |
| 560 | + " <td>12.0</td>\n", |
| 561 | + " <td>At Risk Customer</td>\n", |
| 562 | + " </tr>\n", |
| 563 | + " <tr>\n", |
| 564 | + " <th>5</th>\n", |
| 565 | + " <td>6</td>\n", |
| 566 | + " <td>1.0</td>\n", |
| 567 | + " <td>0.0</td>\n", |
| 568 | + " <td>5.0</td>\n", |
| 569 | + " <td>Top Spender</td>\n", |
| 570 | + " </tr>\n", |
| 571 | + " </tbody>\n", |
| 572 | + "</table>\n", |
| 573 | + "</div>" |
| 574 | + ], |
| 575 | + "text/plain": [ |
| 576 | + " customer_id frequency recency monetary_value segment\n", |
| 577 | + "0 1 2.0 0.0 1.5 Other\n", |
| 578 | + "1 2 1.0 5.0 2.0 Inactive Customer\n", |
| 579 | + "2 3 2.0 4.0 4.5 At Risk Customer\n", |
| 580 | + "3 4 2.0 0.0 7.0 Top Spender\n", |
| 581 | + "4 5 1.0 3.0 12.0 At Risk Customer\n", |
| 582 | + "5 6 1.0 0.0 5.0 Top Spender" |
| 583 | + ] |
| 584 | + }, |
| 585 | + "execution_count": 17, |
| 586 | + "metadata": {}, |
| 587 | + "output_type": "execute_result" |
| 588 | + } |
| 589 | + ], |
| 590 | + "source": [ |
| 591 | + "segments" |
| 592 | + ] |
477 | 593 | } |
478 | 594 | ], |
479 | 595 | "metadata": { |
|
492 | 608 | "name": "python", |
493 | 609 | "nbconvert_exporter": "python", |
494 | 610 | "pygments_lexer": "ipython3", |
495 | | - "version": "3.9.18" |
| 611 | + "version": "3.10.14" |
496 | 612 | } |
497 | 613 | }, |
498 | 614 | "nbformat": 4, |
|
0 commit comments