EY INTERVIEW QUESTION | Segment customers based on order count, total spent, and recency
DROP TABLE IF EXISTS orders
CREATE TABLE orders (
order_id INT PRIMARY KEY,
customer_id INT NOT NULL,
order_status VARCHAR(50) NOT NULL,
order_date DATE NOT NULL,
required_date DATE NOT NULL,
shipped_date DATE,
store_id INT NOT NULL,
staff_id INT NOT NULL
);
DROP TABLE IF EXISTS order_items
CREATE TABLE order_items (
order_id INT,
item_id INT,
product_id INT NOT NULL,
quantity INT NOT NULL,
list_price DECIMAL(10, 2) NOT NULL,
discount DECIMAL(5, 2) NOT NULL,
PRIMARY KEY (order_id, item_id),
FOREIGN KEY (order_id) REFERENCES orders(order_id)
);
INSERT INTO orders (order_id, customer_id, order_status, order_date, required_date, shipped_date, store_id, staff_id) VALUES
(1, 1001, 'Pending', '2024-05-01', '2024-05-05', NULL, 1, 101),
(2, 1002, 'Shipped', '2024-05-02', '2024-05-06', '2024-07-04', 2, 102),
(3, 1001, 'Delivered', '2024-04-03', '2024-04-07', '2024-07-05', 3, 103),
(4, 1004, 'Cancelled', '2024-04-04', '2024-04-08', NULL, 1, 104),
(5, 1001, 'Pending', '2024-03-05', '2024-03-09', NULL, 2, 105),
(6, 1005, 'Shipped', '2024-03-06', '2024-03-10', '2024-07-08', 3, 106),
(7, 1002, 'Delivered', '2024-01-07', '2024-01-11', '2024-07-09', 1, 107),
(8, 1002, 'Cancelled', '2024-01-08', '2024-01-12', NULL, 2, 108),
(9, 1003, 'Pending', '2024-01-09', '2024-01-13', NULL, 3, 109),
(10, 1001, 'Shipped', '2024-05-10', '2024-05-14', '2024-07-12', 1, 110),
(11, 1004, 'Delivered', '2024-05-11', '2024-05-15', '2024-07-13', 2, 111),
(12, 1002, 'Cancelled', '2024-05-12', '2024-05-16', NULL, 3, 112),
(13, 1001, 'Pending', '2024-05-13', '2024-05-17', NULL, 1, 113),
(14, 1002, 'Shipped', '2024-05-14', '2024-05-18', '2024-07-16', 2, 114),
(15, 1001, 'Delivered', '2024-02-15', '2024-02-19', '2024-07-17', 3, 115),
(16, 1004, 'Cancelled', '2024-02-16', '2024-02-20', NULL, 1, 116),
(17, 1001, 'Pending', '2024-02-17', '2024-02-21', NULL, 2, 117),
(18, 1003, 'Shipped', '2024-01-18', '2024-01-22', '2024-07-20', 3, 118),
(19, 1006, 'Delivered', '2024-01-19', '2024-01-23', '2024-07-21', 1, 119),
(20, 1003, 'Cancelled', '2024-04-20', '2024-04-24', NULL, 2, 120);
INSERT INTO order_items (order_id, item_id, product_id, quantity, list_price, discount) VALUES
(1, 1, 501, 2, 25.00, 0.00),
(2, 1, 502, 1, 15.00, 1.50),
(3, 1, 503, 3, 20.00, 2.00),
(4, 1, 504, 2, 30.00, 0.00),
(5, 1, 505, 1, 10.00, 0.50),
(6, 1, 506, 5, 8.00, 0.80),
(7, 1, 507, 4, 12.00, 1.20),
(8, 1, 508, 2, 40.00, 0.00),
(9, 1, 509, 1, 22.00, 2.20),
(10, 1, 510, 3, 18.00, 1.80),
(11, 1, 511, 2, 25.00, 2.50),
(12, 1, 512, 1, 35.00, 0.00),
(13, 1, 513, 4, 27.00, 0.00),
(14, 1, 514, 3, 15.00, 1.50),
(15, 1, 515, 5, 10.00, 1.00),
(16, 1, 516, 2, 20.00, 0.00),
(17, 1, 517, 1, 25.00, 2.50),
(18, 1, 518, 3, 30.00, 3.00),
(19, 1, 519, 4, 8.00, 0.80),
(20, 1, 520, 5, 50.00, 0.00);
Output:
customer_id purchase_frequency purchase_recency buying_power
1 repeat buyer recent buyer big spender
2 repeat buyer not recent buyer average spender
3 repeat buyer recent buyer big spender
4 repeat buyer not recent buyer average spender
5 repeat buyer not recent buyer average spender
Rules:
1. purchase_frequecy: If total_orders>1 then repeat buyer, else one-time buyer
2. purchase recency: if last order is placed less than 3 months then recent buyer, else not recent buyer
3. buying power: If total spent by customer/max of sales > 65percent then big spender if less than
30 percent than low spender else average spender
Comments
Post a Comment