Priscables Electronics | Exploring the Data Through Questions

Marketing Analytics

Author

Uzomah Teslim

Published

March 30, 2025

Introduction

After completing Data Preparation, the next step is Exploratory Data Analysis (EDA). In this notebook, I explore the data further by asking key questions to uncover patterns, trends, and insights.

If you’d like to review the previous notebook on Data Preparation, click the link below:

View Data Preparation Notebook

Load libraries

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('priscables_clean.csv')

1. Which City Generated the Most Revenue?

Code
import plotly.graph_objects as go
import pandas as pd

# Group, aggregate, and sort data (ascending for lowest to highest)
city_sales = data.groupby('City')['Sales'].sum().reset_index()
city_sales = city_sales.sort_values(by='Sales', ascending=True)

# Define a mapping of full city names to shortened names
city_mapping = {
    "San Francisco, CA": "San Francisco",
    "Los Angeles, CA": "L.A.",
    "New York City, NY": "New York",
    "Boston, MA": "Boston",
    "Atlanta, GA": "Atlanta",
    "Dallas, TX": "Dallas",
    "Seattle, WA": "Seattle",
    "Portland, OR": "Portland, OR",
    "Austin, TX": "Austin",
    "Portland, ME": "Portland, ME"
}

# Create a list of shortened names in the sorted order
shortened_cities = [city_mapping.get(city, city) for city in city_sales['City']]

# Create the figure (Vertical column chart)
fig = go.Figure(data=[go.Bar(
    x=city_sales['City'],
    y=city_sales['Sales'],
    marker_color='#2660a4'
)])

# Update layout with the specified changes
fig.update_layout(
    title_text="Total Sales by City",  # Added chart title
    title_font=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    width=780,
    height=500,
    xaxis=dict(
        title_text="",
        showline=True,
        linecolor='black',
        tickmode='array',
        tickvals=city_sales['City'],
        ticktext=shortened_cities,
        tickcolor='black',
        tickwidth=2,
        ticks="outside",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        range=[-0.7, len(city_sales["City"]) - 0.3]  # Fixed range
    ),
    yaxis=dict(
        title_text="",  # Added y-axis title for clarity
        side="right",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'),  # Updated y-axis tick color
        gridcolor='lightgray',
        gridwidth=0.5,
        zeroline=False
    ),
    font=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
    margin=dict(l=50, r=50, t=100, b=50)
)

# Show plot
fig.show()

2. What Was the Best Month for Sales?

Code
# Define month order
month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Group and aggregate sales by month
month_sales = data.groupby('Month')['Sales'].sum().reset_index()

# Ensure the Month column follows the correct order
month_sales['Month'] = pd.Categorical(month_sales['Month'], categories=month_order, ordered=True)
month_sales = month_sales.sort_values(by='Month')

# Define colors
colors = ['#7199c3'] * (len(month_sales) - 1) + ['#2660a4']

# Create the figure (Vertical column chart)
fig = go.Figure(data=[go.Bar(
    x=month_sales['Month'],
    y=month_sales['Sales'],
    marker_color=colors  # Apply the color list
)])

# Update layout with the specified changes
fig.update_layout(
    title_text="Total Sales by Month",  # Added chart title
    title_font=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    width=790,
    height=500,
    xaxis=dict(
        title_text="",
        showline=True,
        linecolor='black',
        tickmode='array',
        tickvals=month_sales['Month'],
        ticktext=month_sales['Month'],
        tickcolor='black',
        tickwidth=2,
        ticks="outside",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        range=[-0.7, len(month_sales["Month"]) - 0.3]  # Adjust range for better fit
    ),
    yaxis=dict(
        title_text="",  # Corrected title
        side="right",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'),  # Updated y-axis tick color
        gridcolor='lightgray',
        gridwidth=0.5,
        zeroline=False
    ),
    font=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
    margin=dict(l=50, r=50, t=100, b=50)
)

fig.show()

3. What Were the Best Hours for Sales?

Code
import plotly.graph_objects as go
import pandas as pd

# Group by Hour and sum Quantity Ordered
hour_quantity = data.groupby('Hour')['Quantity_Ordered'].sum().reset_index()

# Sort by Hour
hour_quantity = hour_quantity.sort_values(by='Hour')

colors = ['#7199c3' if hour not in [11, 12, 13, 18, 19, 20] else '#2660a4' for hour in hour_quantity['Hour']]
# Define hour labels
hour_labels = ["00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23"]

# Create the figure (Bar Chart)
fig = go.Figure(data=[go.Bar(
    x=hour_quantity['Hour'],
    y=hour_quantity['Quantity_Ordered'],
    marker_color=colors  # Apply color mapping
)])

# Update layout with specified modifications
fig.update_layout(
    title_text="Total Quantity Ordered by Hour",  # Added chart title
    title_font=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    width=790,  # Set width to 1000
    height=500,  # Set height to 700
    xaxis=dict(
        title_text="Hour",
        showline=True,
        linecolor='black',
        tickmode='array',
        tickvals=hour_quantity['Hour'],
        ticktext=hour_labels,  # Apply custom labels
        tickcolor='black',
        tickwidth=2,
        ticks="outside",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black')
    ),
    yaxis=dict(
        title_text="",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'),  # Updated y-axis tick color
        gridcolor='lightgray',
        showticklabels=True,
        gridwidth=0.5,
        griddash='solid',
        zeroline=False,
        side='right'  # Move y-axis to the right
    ),
    font=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
    margin=dict(l=50, r=50, t=100, b=50)  # Adjusted top margin for title
)

# Show plot
fig.show()

4. Which Products Sold the Most?

Code
# Group, aggregate, and sort data
product_sales = data.groupby('Product')['Quantity_Ordered'].sum().reset_index()
product_sales = product_sales.sort_values(by='Quantity_Ordered', ascending=False)

# Shorten long product names automatically
name_mapping = {
    "Lightning Charging Cable": "Lightning Cable",
    "AAA Batteries (4-pack)": "AAA Batteries (4pk)",
    "AA Batteries (4-pack)": "AA Batteries (4pk)",
    "USB-C Charging Cable": "USB-C Cable",
    "Wired Headphones": "Wired Headset",
    "Apple Airpods Headphones": "AirPods",
    "Bose SoundSport Headphones": "Bose Headset",
    "27in FHD Monitor": "27in FHD",
    "iPhone": "iPhone",
    "27in 4K Gaming Monitor": "27in 4K",
    "34in Ultrawide Monitor": "34in Ultra",
    "Google Phone": "Google Phone",
    "Flatscreen TV": "TV",
    "Macbook Pro Laptop": "MacBook Pro",
    "ThinkPad Laptop": "ThinkPad",
    "20in Monitor": "20in Monitor",
    "Vareebadd Phone": "Vareebadd",
    "LG Washing Machine": "LG Washer",
    "LG Dryer": "LG Dryer"
}

# Apply mapping safely
product_sales["Short_Name"] = product_sales["Product"].apply(lambda x: name_mapping.get(x, x))

# Create the figure
fig = go.Figure(data=[go.Bar(
    x=product_sales['Quantity_Ordered'],
    y=product_sales['Short_Name'],  # Use shortened names
    marker_color='#2660a4',
    orientation='h'
)])

# Update layout with the specified theme and style
fig.update_layout(
    title={
        'text': "Product by Quantity Ordered",  # **Added a proper title**
        'font': {'family': "Hiragino Kaku Gothic Pro, sans-serif", 'color': 'black'}
    },
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    xaxis=dict(
        title_text="",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        showline=True,
        linewidth=1.5,
        gridcolor='lightgray',
        showticklabels=True,
        gridwidth=0.5,
        zeroline=False,
        side="top"
    ),
    yaxis=dict(
        title_text="",
        categoryorder='total ascending',
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        showticklabels=True,
        showline=True,
        linecolor="#353535",
        linewidth=1.4,
        automargin=True
    ),
    margin=dict(l=160, r=50, t=100, b=50),
    height=700,
    width=790
)

# Show plot
fig.show()

5. What Are the Top Revenue Generating Products?

Code
import plotly.graph_objects as go
import pandas as pd

# Group, aggregate, and sort data
product_sales = data.groupby('Product')['Sales'].sum().reset_index()
product_sales = product_sales.sort_values(by='Sales', ascending=False)

# Shorten long product names automatically
name_mapping = {
    "Lightning Charging Cable": "Lightning Cable",
    "AAA Batteries (4-pack)": "AAA Batteries (4pk)",
    "AA Batteries (4-pack)": "AA Batteries (4pk)",
    "USB-C Charging Cable": "USB-C Cable",
    "Wired Headphones": "Wired Headset",
    "Apple Airpods Headphones": "AirPods",
    "Bose SoundSport Headphones": "Bose Headset",
    "27in FHD Monitor": "27in FHD",
    "iPhone": "iPhone",
    "27in 4K Gaming Monitor": "27in 4K",
    "34in Ultrawide Monitor": "34in Ultra",
    "Google Phone": "Google Phone",
    "Flatscreen TV": "TV",
    "Macbook Pro Laptop": "MacBook Pro",
    "ThinkPad Laptop": "ThinkPad",
    "20in Monitor": "20in Monitor",
    "Vareebadd Phone": "Vareebadd",
    "LG Washing Machine": "LG Washer",
    "LG Dryer": "LG Dryer"
}

# Apply mapping safely
product_sales["Short_Name"] = product_sales["Product"].apply(lambda x: name_mapping.get(x, x))

# Create the figure
fig = go.Figure(data=[go.Bar(
    x=product_sales['Sales'],
    y=product_sales['Short_Name'],  # Use shortened names
    marker_color='#2660a4',
    orientation='h'
)])

# Update layout with the specified theme and style
fig.update_layout(
    title_text="Product by Total Sales",  # Updated title
    title_font=dict(family="Hiragino Kaku Gothic Pro, sans-serif", size=20, color='black'),
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    xaxis=dict(
        title_text="",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        showline=True,
        linewidth=1.5,
        gridcolor='lightgray',
        showticklabels=True,
        gridwidth=0.5,
        zeroline=False,
        side="top"
    ),
    yaxis=dict(
        title_text="",
        categoryorder='total ascending',
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        showticklabels=True,
        showline=True,
        linecolor="#353535",
        linewidth=1.4,
        automargin=True
    ),
    margin=dict(l=160, r=50, t=100, b=50),
    height=700,
    width=790
)

# Show plot
fig.show()

6. What is the Relationship between Quantity Ordered and Product Price

Code
# Group by Product Price and sum Quantity Ordered
price_quantity = data.groupby('Product_Price')['Quantity_Ordered'].sum().reset_index()

# Sort by Product Price
price_quantity = price_quantity.sort_values(by='Product_Price')

# Create the figure (Line chart with area effect)
fig = go.Figure(data=[go.Scatter(
    x=price_quantity['Product_Price'],
    y=price_quantity['Quantity_Ordered'],
    mode='lines',  # Removed markers
    line=dict(color='#2660a4', width=5),  # Line color and width
    fill='tozeroy',  # Creates the area chart effect
    fillcolor='rgba(38, 96, 164, 0.3)'  # Transparent fill under the line
)])

# Update layout with the specified theme and style
fig.update_layout(
    title={
        'text': "<span style='color:black'>Product Price vs. Quantity Ordered</span>",
        'font': {'family': "Hiragino Kaku Gothic Pro, sans-serif", 'color': 'black'}
    },
    plot_bgcolor='#F5F4EF',
    paper_bgcolor='#F5F4EF',
    xaxis=dict(
        title_text="",
        showline=True,
        linecolor='black',
        tickcolor='black',
        tickwidth=2,
        ticks="outside",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='black'),
        showgrid=False  # Removed x-axis grid lines
    ),
    yaxis=dict(
        title_text="",
        tickfont=dict(family="Hiragino Kaku Gothic Pro, sans-serif", color='#222222'),  # Moved ticks to right & changed color
        gridcolor='lightgray',
        showticklabels=True,
        gridwidth=0.5,
        griddash='solid',
        zeroline=False,
        side='right'  # Moves y-axis numbers to the right
    ),
    font=dict(family="Hiragino Kaku Gothic Pro, sans-serif"),
    margin=dict(l=50, r=50, t=100, b=50)  # Keep spacing consistent
)

# Show plot
fig.show()

7. What do customers buy together?

Code
df = pd.read_csv('sales_data.csv')


# Import the necessary libraries
from itertools import combinations
from collections import Counter

# Filter the DataFrame to keep only rows with duplicate Order IDs
duplicate_orders = df[df['Order ID'].duplicated(keep=False)]

# Group products for each order and create a new DataFrame
grouped_orders = duplicate_orders.groupby('Order ID')['Product'].apply(','.join).reset_index(name='Grouped')

# Initialize a Counter to count combinations of products
product_combinations = Counter()

# Loop through the grouped orders and count product combinations
for row in grouped_orders['Grouped']:
    if isinstance(row, str):  # Check if the row is a string
        product_list = row.split(',')
        product_combinations.update(Counter(combinations(product_list, 2)))

# Print a title and the top 10 product combinations
print("Product Combo | Count")
for key, value in product_combinations.most_common(11):
    if key != ('Product', 'Product'):
        print(f"{key} | {value}")
Product Combo | Count
('iPhone', 'Lightning Charging Cable') | 1005
('Google Phone', 'USB-C Charging Cable') | 987
('iPhone', 'Wired Headphones') | 447
('Google Phone', 'Wired Headphones') | 414
('Vareebadd Phone', 'USB-C Charging Cable') | 361
('iPhone', 'Apple Airpods Headphones') | 360
('Google Phone', 'Bose SoundSport Headphones') | 220
('USB-C Charging Cable', 'Wired Headphones') | 160
('Vareebadd Phone', 'Wired Headphones') | 143
('Lightning Charging Cable', 'Wired Headphones') | 92