import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import warnings
'ignore')
warnings.filterwarnings(
= pd.read_csv('priscables_clean.csv') data
Priscables Electronics | Exploring the Data Through Questions
Marketing Analytics
Introduction
After completing Data Preparation, the next step is Exploratory Data Analysis (EDA). In this notebook, I explore the data further by asking key questions to uncover patterns, trends, and insights.
If you’d like to review the previous notebook on Data Preparation, click the link below:
View Data Preparation Notebook
Load libraries
1. Which City Generated the Most Revenue?
Code
import plotly.graph_objects as go
import pandas as pd
# Group, aggregate, and sort data (ascending for lowest to highest)
= data.groupby('City')['Sales'].sum().reset_index()
city_sales = city_sales.sort_values(by='Sales', ascending=True)
city_sales
# Define a mapping of full city names to shortened names
= {
city_mapping "San Francisco, CA": "San Francisco",
"Los Angeles, CA": "L.A.",
"New York City, NY": "New York",
"Boston, MA": "Boston",
"Atlanta, GA": "Atlanta",
"Dallas, TX": "Dallas",
"Seattle, WA": "Seattle",
"Portland, OR": "Portland, OR",
"Austin, TX": "Austin",
"Portland, ME": "Portland, ME"
}
# Create a list of shortened names in the sorted order
= [city_mapping.get(city, city) for city in city_sales['City']]
shortened_cities
# Create the figure (Vertical column chart)
= go.Figure(data=[go.Bar(
fig =city_sales['City'],
x=city_sales['Sales'],
y='#2660a4'
marker_color
)])
# Update layout with the specified changes
fig.update_layout(="Total Sales by City", # Added chart title
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
title_font='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=780,
width=500,
height=dict(
xaxis="",
title_text=True,
showline='black',
linecolor='array',
tickmode=city_sales['City'],
tickvals=shortened_cities,
ticktext='black',
tickcolor=2,
tickwidth="outside",
ticks=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfontrange=[-0.7, len(city_sales["City"]) - 0.3] # Fixed range
),=dict(
yaxis="", # Added y-axis title for clarity
title_text="right",
side=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'), # Updated y-axis tick color
tickfont='lightgray',
gridcolor=0.5,
gridwidth=False
zeroline
),=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
font=dict(l=50, r=50, t=100, b=50)
margin
)
# Show plot
fig.show()
2. What Was the Best Month for Sales?
Code
# Define month order
= ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
month_order
# Group and aggregate sales by month
= data.groupby('Month')['Sales'].sum().reset_index()
month_sales
# Ensure the Month column follows the correct order
'Month'] = pd.Categorical(month_sales['Month'], categories=month_order, ordered=True)
month_sales[= month_sales.sort_values(by='Month')
month_sales
# Define colors
= ['#7199c3'] * (len(month_sales) - 1) + ['#2660a4']
colors
# Create the figure (Vertical column chart)
= go.Figure(data=[go.Bar(
fig =month_sales['Month'],
x=month_sales['Sales'],
y=colors # Apply the color list
marker_color
)])
# Update layout with the specified changes
fig.update_layout(="Total Sales by Month", # Added chart title
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
title_font='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=790,
width=500,
height=dict(
xaxis="",
title_text=True,
showline='black',
linecolor='array',
tickmode=month_sales['Month'],
tickvals=month_sales['Month'],
ticktext='black',
tickcolor=2,
tickwidth="outside",
ticks=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfontrange=[-0.7, len(month_sales["Month"]) - 0.3] # Adjust range for better fit
),=dict(
yaxis="", # Corrected title
title_text="right",
side=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'), # Updated y-axis tick color
tickfont='lightgray',
gridcolor=0.5,
gridwidth=False
zeroline
),=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
font=dict(l=50, r=50, t=100, b=50)
margin
)
fig.show()
3. What Were the Best Hours for Sales?
Code
import plotly.graph_objects as go
import pandas as pd
# Group by Hour and sum Quantity Ordered
= data.groupby('Hour')['Quantity_Ordered'].sum().reset_index()
hour_quantity
# Sort by Hour
= hour_quantity.sort_values(by='Hour')
hour_quantity
= ['#7199c3' if hour not in [11, 12, 13, 18, 19, 20] else '#2660a4' for hour in hour_quantity['Hour']]
colors # Define hour labels
= ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]
hour_labels
# Create the figure (Bar Chart)
= go.Figure(data=[go.Bar(
fig =hour_quantity['Hour'],
x=hour_quantity['Quantity_Ordered'],
y=colors # Apply color mapping
marker_color
)])
# Update layout with specified modifications
fig.update_layout(="Total Quantity Ordered by Hour", # Added chart title
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
title_font='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=790, # Set width to 1000
width=500, # Set height to 700
height=dict(
xaxis="Hour",
title_text=True,
showline='black',
linecolor='array',
tickmode=hour_quantity['Hour'],
tickvals=hour_labels, # Apply custom labels
ticktext='black',
tickcolor=2,
tickwidth="outside",
ticks=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black')
tickfont
),=dict(
yaxis="",
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'), # Updated y-axis tick color
tickfont='lightgray',
gridcolor=True,
showticklabels=0.5,
gridwidth='solid',
griddash=False,
zeroline='right' # Move y-axis to the right
side
),=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
font=dict(l=50, r=50, t=100, b=50) # Adjusted top margin for title
margin
)
# Show plot
fig.show()
4. Which Products Sold the Most?
Code
# Group, aggregate, and sort data
= data.groupby('Product')['Quantity_Ordered'].sum().reset_index()
product_sales = product_sales.sort_values(by='Quantity_Ordered', ascending=False)
product_sales
# Shorten long product names automatically
= {
name_mapping "Lightning Charging Cable": "Lightning Cable",
"AAA Batteries (4-pack)": "AAA Batteries (4pk)",
"AA Batteries (4-pack)": "AA Batteries (4pk)",
"USB-C Charging Cable": "USB-C Cable",
"Wired Headphones": "Wired Headset",
"Apple Airpods Headphones": "AirPods",
"Bose SoundSport Headphones": "Bose Headset",
"27in FHD Monitor": "27in FHD",
"iPhone": "iPhone",
"27in 4K Gaming Monitor": "27in 4K",
"34in Ultrawide Monitor": "34in Ultra",
"Google Phone": "Google Phone",
"Flatscreen TV": "TV",
"Macbook Pro Laptop": "MacBook Pro",
"ThinkPad Laptop": "ThinkPad",
"20in Monitor": "20in Monitor",
"Vareebadd Phone": "Vareebadd",
"LG Washing Machine": "LG Washer",
"LG Dryer": "LG Dryer"
}
# Apply mapping safely
"Short_Name"] = product_sales["Product"].apply(lambda x: name_mapping.get(x, x))
product_sales[
# Create the figure
= go.Figure(data=[go.Bar(
fig =product_sales['Quantity_Ordered'],
x=product_sales['Short_Name'], # Use shortened names
y='#2660a4',
marker_color='h'
orientation
)])
# Update layout with the specified theme and style
fig.update_layout(={
title'text': "Product by Quantity Ordered", # **Added a proper title**
'font': {'family': "Hiragino Kaku Gothic Pro, sans-serif", 'color': 'black'}
},='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=dict(
xaxis="",
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfont=True,
showline=1.5,
linewidth='lightgray',
gridcolor=True,
showticklabels=0.5,
gridwidth=False,
zeroline="top"
side
),=dict(
yaxis="",
title_text='total ascending',
categoryorder=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfont=True,
showticklabels=True,
showline="#353535",
linecolor=1.4,
linewidth=True
automargin
),=dict(l=160, r=50, t=100, b=50),
margin=700,
height=790
width
)
# Show plot
fig.show()
5. What Are the Top Revenue Generating Products?
Code
import plotly.graph_objects as go
import pandas as pd
# Group, aggregate, and sort data
= data.groupby('Product')['Sales'].sum().reset_index()
product_sales = product_sales.sort_values(by='Sales', ascending=False)
product_sales
# Shorten long product names automatically
= {
name_mapping "Lightning Charging Cable": "Lightning Cable",
"AAA Batteries (4-pack)": "AAA Batteries (4pk)",
"AA Batteries (4-pack)": "AA Batteries (4pk)",
"USB-C Charging Cable": "USB-C Cable",
"Wired Headphones": "Wired Headset",
"Apple Airpods Headphones": "AirPods",
"Bose SoundSport Headphones": "Bose Headset",
"27in FHD Monitor": "27in FHD",
"iPhone": "iPhone",
"27in 4K Gaming Monitor": "27in 4K",
"34in Ultrawide Monitor": "34in Ultra",
"Google Phone": "Google Phone",
"Flatscreen TV": "TV",
"Macbook Pro Laptop": "MacBook Pro",
"ThinkPad Laptop": "ThinkPad",
"20in Monitor": "20in Monitor",
"Vareebadd Phone": "Vareebadd",
"LG Washing Machine": "LG Washer",
"LG Dryer": "LG Dryer"
}
# Apply mapping safely
"Short_Name"] = product_sales["Product"].apply(lambda x: name_mapping.get(x, x))
product_sales[
# Create the figure
= go.Figure(data=[go.Bar(
fig =product_sales['Sales'],
x=product_sales['Short_Name'], # Use shortened names
y='#2660a4',
marker_color='h'
orientation
)])
# Update layout with the specified theme and style
fig.update_layout(="Product by Total Sales", # Updated title
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
title_font='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=dict(
xaxis="",
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfont=True,
showline=1.5,
linewidth='lightgray',
gridcolor=True,
showticklabels=0.5,
gridwidth=False,
zeroline="top"
side
),=dict(
yaxis="",
title_text='total ascending',
categoryorder=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfont=True,
showticklabels=True,
showline="#353535",
linecolor=1.4,
linewidth=True
automargin
),=dict(l=160, r=50, t=100, b=50),
margin=700,
height=790
width
)
# Show plot
fig.show()
6. What is the Relationship between Quantity Ordered and Product Price
Code
# Group by Product Price and sum Quantity Ordered
= data.groupby('Product_Price')['Quantity_Ordered'].sum().reset_index()
price_quantity
# Sort by Product Price
= price_quantity.sort_values(by='Product_Price')
price_quantity
# Create the figure (Line chart with area effect)
= go.Figure(data=[go.Scatter(
fig =price_quantity['Product_Price'],
x=price_quantity['Quantity_Ordered'],
y='lines', # Removed markers
mode=dict(color='#2660a4', width=5), # Line color and width
line='tozeroy', # Creates the area chart effect
fill='rgba(38, 96, 164, 0.3)' # Transparent fill under the line
fillcolor
)])
# Update layout with the specified theme and style
fig.update_layout(={
title'text': "<span style='color:black'>Product Price vs. Quantity Ordered</span>",
'font': {'family': "Hiragino Kaku Gothic Pro, sans-serif", 'color': 'black'}
},='#F5F4EF',
plot_bgcolor='#F5F4EF',
paper_bgcolor=dict(
xaxis="",
title_text=True,
showline='black',
linecolor='black',
tickcolor=2,
tickwidth="outside",
ticks=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
tickfont=False # Removed x-axis grid lines
showgrid
),=dict(
yaxis="",
title_text=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'), # Moved ticks to right & changed color
tickfont='lightgray',
gridcolor=True,
showticklabels=0.5,
gridwidth='solid',
griddash=False,
zeroline='right' # Moves y-axis numbers to the right
side
),=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
font=dict(l=50, r=50, t=100, b=50) # Keep spacing consistent
margin
)
# Show plot
fig.show()
7. What do customers buy together?
Code
= pd.read_csv('sales_data.csv')
df
# Import the necessary libraries
from itertools import combinations
from collections import Counter
# Filter the DataFrame to keep only rows with duplicate Order IDs
= df[df['Order ID'].duplicated(keep=False)]
duplicate_orders
# Group products for each order and create a new DataFrame
= duplicate_orders.groupby('Order ID')['Product'].apply(','.join).reset_index(name='Grouped')
grouped_orders
# Initialize a Counter to count combinations of products
= Counter()
product_combinations
# Loop through the grouped orders and count product combinations
for row in grouped_orders['Grouped']:
if isinstance(row, str): # Check if the row is a string
= row.split(',')
product_list 2)))
product_combinations.update(Counter(combinations(product_list,
# Print a title and the top 10 product combinations
print("Product Combo | Count")
for key, value in product_combinations.most_common(11):
if key != ('Product', 'Product'):
print(f"{key} | {value}")
Product Combo | Count
('iPhone', 'Lightning Charging Cable') | 1005
('Google Phone', 'USB-C Charging Cable') | 987
('iPhone', 'Wired Headphones') | 447
('Google Phone', 'Wired Headphones') | 414
('Vareebadd Phone', 'USB-C Charging Cable') | 361
('iPhone', 'Apple Airpods Headphones') | 360
('Google Phone', 'Bose SoundSport Headphones') | 220
('USB-C Charging Cable', 'Wired Headphones') | 160
('Vareebadd Phone', 'Wired Headphones') | 143
('Lightning Charging Cable', 'Wired Headphones') | 92