-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRFM_Analysis.sql
482 lines (408 loc) · 17.3 KB
/
RFM_Analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
CREATE DATABASE RFM_SegmentSales
COLLATE Latin1_General_CI_AS;
select * from dbo.sales_data_sample
--CHecking unique values
select distinct status from [dbo].[sales_data_sample] --Nice one to plot
select distinct YEAR_ID from [dbo].[sales_data_sample]
select distinct PRODUCTLINE from [dbo].[sales_data_sample] ---Nice to plot
select distinct COUNTRY from [dbo].[sales_data_sample] ---Nice to plot
select distinct DEALSIZE from [dbo].[sales_data_sample] ---Nice to plot
select distinct TERRITORY from [dbo].[sales_data_sample] ---Nice to plot
select distinct MONTH_ID from [dbo].[sales_data_sample]
where YEAR_ID = 2005
---ANALYSIS
----Let's start by grouping sales by productline
select PRODUCTLINE, sum(sales) Revenue
from [dbo].[sales_data_sample]
group by PRODUCTLINE
order by 2 desc
----Let's start by grouping sales by year
select YEAR_ID, sum(sales) Revenue
from [dbo].[sales_data_sample]
group by YEAR_ID
order by 2 desc
----Let's start by grouping sales by deal size
select DEALSIZE, sum(sales) Revenue
from [dbo].[sales_data_sample]
group by DEALSIZE
order by 2 desc
----What was the best month for sales in a specific year? How much was earned that month?
select MONTH_ID, sum(sales) Revenue, count(ORDERNUMBER) Frequency
from [dbo].[sales_data_sample]
where YEAR_ID = 2003 --change year to see the rest
group by MONTH_ID
order by 2 desc
--November seems to be the month, what product do they sell in November, Classic I believe
select MONTH_ID, PRODUCTLINE, sum(sales) Revenue, count(ORDERNUMBER) Frequency
from [dbo].[sales_data_sample]
where YEAR_ID = 2003 and MONTH_ID = 11 --change year to see the rest
group by MONTH_ID, PRODUCTLINE
order by 3 desc
--RFM ANALYSÝS
---*Recency : last order date
---*Frequency :count of total orders
---*Monetary :total spend
select*
from sales_data_sample
--
SELECT CUSTOMERNAME ,
sum(SALES) MonetaryValue,
avg(SALES) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
---
SELECT CUSTOMERNAME ,
sum(SALES) MonetaryValue,
avg(SALES) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,--Her müþteri için sipariþ sayýsýný bulur.
max(ORDERDATE) last_order_date,--Her müþterinin en son yapýlan sipariþin tarihini bulur.
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date--Tüm müþterilerin sipariþ verdiði tarihin en yenisini bulur.
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
--- müþterilerin toplam satýþ, ortalama satýþ, sipariþ sayýsý, en son sipariþ tarihi ve genel olarak en yeni sipariþ tarihini gösterir.
select
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
-- müþteri bazýnda RFM analizine dayalý istatistiksel bilgileri içeren bir tablo oluþturur ve bu tablodan tüm sütunlarý seçer
;with rfm as -- "rfm" adýnda bir geçici tablo (Common Table Expression - CTE) oluþturur
(
select
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
)
SELECT r.* FROM rfm r -- Oluþturulan rfm geçici tablosundan tüm sütunlarý seçer.
--RFM analizi sonuçlarýna dayalý olarak müþterileri Recency, Frequency ve MonetaryValue özelliklerine göre dört eþit gruba böler.
;with rfm as -- "rfm" adýnda bir geçici tablo (Common Table Expression - CTE) oluþturur
(
select
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
)
SELECT r.* ,
NTILE(4) OVER (order by Recency desc) rfm_recency,--Recency deðerlerini büyükten küçüðe sýralayarak her müþteriyi dört eþit gruba böler (dört kuartile).
NTILE(4) OVER (order by Frequency) rfm_frequency,--Frequency deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
NTILE(4) OVER (order by MonetaryValue) rfm_monetary--MonetaryValue deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
FROM rfm r
---NEXT
;with rfm as -- "rfm" adýnda bir geçici tablo (Common Table Expression - CTE) oluþturur
(
SELECT
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
FROM [dbo].[sales_data_sample]
GROUP BY CUSTOMERNAME
),--Yukarýda rfm adýnda geçici tablo(CTE)oluþturdu.
rfm_calc as
(
SELECT r.* ,
NTILE(4) OVER (order by Recency desc) rfm_recency,--Recency deðerlerini büyükten küçüðe sýralayarak her müþteriyi dört eþit gruba böler (dört kuartile).
NTILE(4) OVER (order by Frequency) rfm_frequency,--Frequency deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
NTILE(4) OVER (order by MonetaryValue) rfm_monetary--MonetaryValue deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
FROM rfm r
)--Ardýndan, rfm_calc adýnda bir baþka CTE oluþturulur. Bu CTE, rfm tablosundan gelen verileri kullanarak her bir müþteriyi Recency,
--Frequency ve MonetaryValue deðerlerine göre dört eþit gruba bölen NTILE fonksiyonu kullanýlarak RFM segmentlerini hesaplar.
SELECT c.*, rfm_recency+ rfm_frequency+ rfm_monetary as rfm_cell
FROM rfm_calc c
--Bu son sorgu, rfm_calc tablosundan gelen verilerle birlikte her bir müþterinin RFM hücre deðerini (rfm_cell) içeren sonuç kümesini döndürür.
--RFM hücre deðeri, Recency, Frequency ve MonetaryValue deðerlerinin toplamýdýr.
--Bu deðer, müþteriyi belirli bir segmente yerleþtirmek için kullanýlabilir.
--NEXT
;with rfm as -- "rfm" adýnda bir geçici tablo (Common Table Expression - CTE) oluþturur
(
SELECT
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
FROM [dbo].[sales_data_sample]
GROUP BY CUSTOMERNAME
),--Yukarýda rfm adýnda geçici tablo(CTE)oluþturdu.
rfm_calc as
(
SELECT r.* ,-- ifadesi, rfm tablosundaki tüm sütunlarý seçer.
NTILE(4) OVER (order by Recency desc) rfm_recency,--Recency deðerlerini büyükten küçüðe sýralayarak her müþteriyi dört eþit gruba böler (dört kuartile).
NTILE(4) OVER (order by Frequency) rfm_frequency,--Frequency deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
NTILE(4) OVER (order by MonetaryValue) rfm_monetary--MonetaryValue deðerlerini küçükten büyüðe sýralayarak her müþteriyi dört eþit gruba böler.
FROM rfm r
)
SELECT c.*, rfm_recency+ rfm_frequency+ rfm_monetary as rfm_cell,
cast(rfm_recency as varchar) + cast(rfm_frequency as varchar) + cast(rfm_monetary as varchar)rfm_cell_string
FROM rfm_calc c
---NEXT
--IF OBJECT_ID('tempdb..#rfm') IS NOT NULL DROP TABLE #rfm; BU KISIM ALTERNATÝF YÖNTEM
DROP TABLE IF EXISTS #rfm--Eðer #rfm adýnda bir geçici tablo zaten varsa, bu tabloyu siler.
--Bu, her sorgu çalýþtýðýnda temiz bir baþlangýç saðlamak için yapýlýr.
;with rfm as
(
select
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
),
rfm_calc as
(
select r.*,-- ifadesi, rfm tablosundaki tüm sütunlarý seçer.
NTILE(4) OVER (order by Recency desc) rfm_recency,
NTILE(4) OVER (order by Frequency) rfm_frequency,
NTILE(4) OVER (order by MonetaryValue) rfm_monetary
from rfm r
)
select
c.*, rfm_recency+ rfm_frequency+ rfm_monetary as rfm_cell,
cast(rfm_recency as varchar) + cast(rfm_frequency as varchar) + cast(rfm_monetary as varchar)rfm_cell_string
into #rfm
from rfm_calc c
--SELECT ... INTO #rfm ...: Bu kýsým, rfm_calc tablosundan gelen verilerle birlikte, her bir müþterinin RFM hücre deðerini (rfm_cell)
--içeren sonuç kümesini oluþturur ve bu verileri #rfm adýndaki geçici tabloya ekler. Ayrýca, rfm_recency, rfm_frequency,
--ve rfm_monetary deðerlerini birleþtirerek bir karakter dizesi oluþturup rfm_cell_string adýnda bir sütun oluþturur.
--KONTROL EDELÝM..
SELECT *
FROM #rfm
----Who is our best customer (this could be best answered with RFM)
DROP TABLE IF EXISTS #rfm--Eðer #rfm adýnda bir geçici tablo zaten varsa, bu tabloyu siler.
--Bu, her sorgu çalýþtýðýnda temiz bir baþlangýç saðlamak için yapýlýr.
;with rfm as
(
select
CUSTOMERNAME,
sum(sales) MonetaryValue,
avg(sales) AvgMonetaryValue,
count(ORDERNUMBER) Frequency,
max(ORDERDATE) last_order_date,
(select max(ORDERDATE) from [dbo].[sales_data_sample]) max_order_date,
DATEDIFF(DD, max(ORDERDATE), (select max(ORDERDATE) from [dbo].[sales_data_sample])) Recency
from [dbo].[sales_data_sample]
group by CUSTOMERNAME
),
rfm_calc as
(
SELECT r.*,
NTILE(4) OVER (order by Recency DESC) rfm_recency,
NTILE(4) OVER (order by Frequency) rfm_frequency,
NTILE(4) OVER (order by MonetaryValue) rfm_monetary
from rfm r
)
SELECT
c.*, rfm_recency+ rfm_frequency+ rfm_monetary as rfm_cell,
cast(rfm_recency as varchar) + cast(rfm_frequency as varchar) + cast(rfm_monetary as varchar)rfm_cell_string
INTO #rfm
FROM rfm_calc c
SELECT CUSTOMERNAME , rfm_recency, rfm_frequency, rfm_monetary,
CASE
WHEN rfm_cell_string in (111, 112 , 121, 122, 123, 132, 211, 212, 114, 141) THEN 'lost_customers' --lost customers
WHEN rfm_cell_string in (133, 134, 143, 244, 334, 343, 344, 144) then 'slipping away, cannot lose' -- (Big spenders who havent purchased lately) slipping away
WHEN rfm_cell_string in (311, 411, 331) THEN 'new customers'
WHEN rfm_cell_string in (222, 223, 233, 322) THEN 'potential churners'
WHEN rfm_cell_string in (323, 333,321, 422, 332, 432) THEN 'active' --(Customers who buy often & recently, but at low price points)
WHEN rfm_cell_string in (433, 434, 443, 444) THEN 'loyal'
END rfm_segment
FROM #rfm
SELECT*
FROM sales_data_sample
--What products are most often sold together?
WITH OrderProducts AS (
SELECT
ORDERNUMBER,
STRING_AGG(PRODUCTLINE, ',') WITHIN GROUP (ORDER BY PRODUCTLINE) AS CombinedProducts
FROM
[dbo].[sales_data_sample]
GROUP BY
ORDERNUMBER
HAVING
COUNT(DISTINCT PRODUCTLINE) > 1
)
SELECT
DISTINCT CombinedProducts,
COUNT(*) AS OccurrenceCount
FROM
OrderProducts
GROUP BY
CombinedProducts
ORDER BY
OccurrenceCount DESC;
----Aþagýdaki sorgu 'Shipped' durumundaki sipariþlerin her birinin sipariþ numarasýný ve her bir sipariþin kaç defa tekrarlandýðýný
----içeren bir sonuç kümesini döndürür. rn sütunu, her bir sipariþin tekrar sayýsýný gösterir.
SELECT ORDERNUMBER, count(*) rn-- ORDERNUMBER sütununu ve bu sütundaki deðerlerin kaç defa tekrarlandýðýný sayan bir sayacý (rn) içerir.
FROM [dbo].[sales_data_sample]
WHERE STATUS = 'Shipped'
GROUP BY ORDERNUMBER
--
--SELECT * FROM [dbo].[sales_data_sample] WHERE ORDERNUMBER= 10411
--NEXT
--Bu SQL sorgusu, [dbo].[sales_data_sample] tablosundan "Shipped" (gönderilmiþ) durumundaki sipariþleri içeren ve her bir sipariþ
--numarasýnýn kaç defa tekrarlandýðýný sayan bir alt sorgu içerir. Daha sonra, bu alt sorgunun sonucunda elde edilen geçici tablo
--veya sonuç kümesi üzerinde rn (row number) deðeri 3 olan sipariþ numaralarýný seçer.
SELECT ORDERNUMBER FROM
(
SELECT ORDERNUMBER, count(*) rn-- ORDERNUMBER sütununu ve bu sütundaki deðerlerin kaç defa tekrarlandýðýný sayan bir sayacý (rn) içerir.(row number)
FROM [dbo].[sales_data_sample]
WHERE STATUS = 'Shipped'
GROUP BY ORDERNUMBER
)m--oluþturulan geçici tabloya bir takma ad (alias) verir
WHERE rn = 3
--NEXTT
SELECT ',' + PRODUCTCODE
FROM [dbo].[sales_data_sample]
WHERE ORDERNUMBER IN(
SELECT ORDERNUMBER FROM
(
SELECT ORDERNUMBER, count(*) rn-- ORDERNUMBER sütununu ve bu sütundaki deðerlerin kaç defa tekrarlandýðýný sayan bir sayacý (rn) içerir.(row number)
FROM [dbo].[sales_data_sample]
WHERE STATUS = 'Shipped'
GROUP BY ORDERNUMBER
)m--oluþturulan geçici tabloya bir takma ad (alias) verir
WHERE rn = 2
)
FOR XML PATH('')--SQL sorgusunda, sonuç kümesini bir XML belgesine dönüþtürmek için kullanýlýr. Bu ifade,
--sorgu sonuçlarýný XML formatýnda birleþtirilmiþ bir dize olarak döndürmeye olanak tanýr.
--NEXTTT
SELECT stuff( --STUFF fonksiyonu ise baþtaki virgülü kaldýrarak düzgün birleþmiþ bir dize elde etmeye yardýmcý olur.
(SELECT ',' + PRODUCTCODE
FROM [dbo].[sales_data_sample]
WHERE ORDERNUMBER IN
(
SELECT ORDERNUMBER FROM
(
SELECT ORDERNUMBER, count(*) rn-- ORDERNUMBER sütununu ve bu sütundaki deðerlerin kaç defa tekrarlandýðýný sayan bir sayacý (rn) içerir.(row number)
FROM [dbo].[sales_data_sample]
WHERE STATUS = 'Shipped'
GROUP BY ORDERNUMBER
)m--oluþturulan geçici tabloya bir takma ad (alias) verir
WHERE rn = 2--(yani, iki farklý ürün içeren sipariþler)
)
FOR XML PATH(''))
,1,1,'')
--NEXTTT
SELECT DISTINCT ORDERNUMBER, STUFF(
(SELECT ',' + PRODUCTCODE
FROM [dbo].[sales_data_sample] p
WHERE ORDERNUMBER IN
(
SELECT ORDERNUMBER
FROM(
SELECT ORDERNUMBER, count(*) rn-- ORDERNUMBER sütununu ve bu sütundaki deðerlerin kaç defa tekrarlandýðýný sayan bir sayacý (rn) içerir.(row number)
FROM [dbo].[sales_data_sample]
WHERE STATUS = 'Shipped'
GROUP BY ORDERNUMBER
)m--oluþturulan geçici tabloya bir takma ad (alias) verir
WHERE rn = 2
)
AND p.ORDERNUMBER = s.ORDERNUMBER
FOR XML PATH(''))
,1,1,'') ProductCodes
FROM [dbo].[sales_data_sample] s
ORDER BY 2 DESC
-----EXTRAs----
--What city has the highest number of sales in a specific country
select CITY, sum (sales) Revenue
from [dbo].[sales_data_sample]
where country = 'UK'
group by CITY
order by 2 desc
---What is the best product in United States?
select country, YEAR_ID, PRODUCTLINE, sum(sales) Revenue
from [dbo].[sales_data_sample]
where country = 'USA'
group by country, YEAR_ID, PRODUCTLINE
order by 4 desc
select * from sales_data_sample
--En Fazla Ürün Çeþidi Satýn Alan Müþteriler:
SELECT
CUSTOMERNAME,
COUNT(DISTINCT PRODUCTCODE) AS UniqueProductsCount
FROM [dbo].[sales_data_sample]
GROUP BY CUSTOMERNAME
ORDER BY UniqueProductsCount DESC;
--Her Müþteri Ýçin En Yüksek Tutarlý Sipariþ:
SELECT
CUSTOMERNAME,
ORDERNUMBER,
MAX(sales) AS MaxOrderAmount
FROM [dbo].[sales_data_sample]
GROUP BY CUSTOMERNAME, ORDERNUMBER
ORDER BY MaxOrderAmount DESC;
---Bu sorgu, her bir ayýn altýnda farklý ürün kategorileri ve bu kategorilere ait toplam satýþlarý içeren bir pivot tablosu oluþturur.
WITH OrderCategoryTotals AS --geçici bir tablo (CTE) oluþturulur. Bu tablo, sipariþ tarihlerini yýl ve ay düzeyinde gruplandýrýr
--ve her bir ürün kategorisi için toplam satýþ miktarýný hesaplar.
(
SELECT
CONVERT(VARCHAR(7), ORDERDATE, 120) AS OrderMonth,--VARCHAR(7): Bu, dönüþtürülen karakter dizisinin maksimum uzunluðunu belirtir.
--Yani, en fazla 7 karakter uzunluðunda bir karakter dizisi oluþturulacaktýr.
--120: Bu, tarih ve saat biçimini belirten bir stil kodudur. 120 stili,
--"yyyy-mm" yýl ve ay formatýndaki bir tarih biçimini ifade eder.
PRODUCTLINE,
SUM(SALES) AS TotalSales
FROM [dbo].[sales_data_sample]
GROUP BY CONVERT(VARCHAR(7), ORDERDATE, 120), PRODUCTLINE
)
SELECT
*
FROM
(
SELECT
OrderMonth,
PRODUCTLINE,
TotalSales
FROM OrderCategoryTotals
) AS SourceTable
PIVOT
(
SUM(TotalSales)
FOR PRODUCTLINE IN ([Classic Cars], [Trucks and Buses], [Planes], [Vintage Cars], [Motorcycles], [Ships], [Trains])
) AS PivotTable
ORDER BY OrderMonth;
--PRODUCTLINE'a göre toplam satýþlarý ve yýllara göre bu toplamlarý içeren bir tablo olacaktýr.
SELECT
*
FROM
(
SELECT
YEAR_ID,
PRODUCTLINE,
SALES
FROM sales_data_sample
) AS DENEME
PIVOT
(
SUM(SALES)
FOR YEAR_ID IN ([2003], [2004], [2005])
)
AS PivotTable
ORDER BY 2 DESC;