Table of Contents

Grouping and Aggregation

This guide covers summarizing data using GROUP BY and aggregate functions with the fluent query syntax.


Overview

Grouping and aggregation allow you to summarize data by calculating values across multiple rows. Velocity provides:

  • Aggregate - Aggregate function wrapper
  • AggregateType - Functions (COUNT, SUM, AVG, MIN, MAX)
  • GroupBy() method - Group rows by columns
  • Having() method - Filter aggregated groups

Aggregate Functions

Available Aggregates

AggregateType provides standard SQL aggregate functions:

AggregateType SQL Function Description Example Use
Average AVG() Average of values Average price
Count COUNT() Count rows Count customers
Max MAX() Maximum value Highest price
Min MIN() Minimum value Lowest price
StdDev STDDEV() / STDEV() Standard deviation Price variability
Sum SUM() Total numeric values Total revenue
Variance VAR() / VARIANCE() Statistical variance Spread of values

COUNT

COUNT(*)

Count all rows including NULLs:

using YndigoBlue.Velocity.Model;
using YndigoBlue.Velocity.Enums;

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Query query = new Query()
    .Select(orderCount)
    .From(orders);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    int total = results[0].GetFieldInteger("order_count");
    Console.WriteLine($"Total orders: {total}");
}

Generated SQL:

SELECT COUNT(*) AS order_count
FROM orders

COUNT(column)

Count non-NULL values in a specific column:

Table users = schema["users"];

Expression emailCount = new Expression("email_count",
    new Aggregate(AggregateType.Count, users["email"])
);

Query query = new Query()
    .Select(emailCount)
    .From(users);

Generated SQL:

SELECT COUNT(email) AS email_count
FROM users

COUNT DISTINCT

Count unique values:

Table orders = schema["orders"];

Expression uniqueCustomers = new Expression("unique_customers",
    new Aggregate(AggregateType.Count, orders["customer_id"], distinct: true)
);

Query query = new Query()
    .Select(uniqueCustomers)
    .From(orders);

Generated SQL:

SELECT COUNT(DISTINCT customer_id) AS unique_customers
FROM orders

SUM

Add numeric values across rows:

Basic SUM

Table orders = schema["orders"];

Expression totalRevenue = new Expression("total_revenue",
    new Aggregate(AggregateType.Sum, orders["total"])
);

Query query = new Query()
    .Select(totalRevenue)
    .From(orders);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    decimal revenue = results[0].GetFieldDecimal("total_revenue");
    Console.WriteLine($"Total Revenue: ${revenue:N2}");
}

Generated SQL:

SELECT SUM(total) AS total_revenue
FROM orders

SUM with Expression

Table order_items = schema["order_items"];

// Sum of (quantity * unit_price)
Expression lineTotal = new Expression("line_total", [
    order_items["quantity"],
    new ArithmeticOperator(ArithmeticType.Multiply),
    order_items["unit_price"]
]);

Expression totalSales = new Expression("total_sales",
    new Aggregate(AggregateType.Sum, lineTotal)
);

Query query = new Query()
    .Select(totalSales)
    .From(order_items);

Generated SQL:

SELECT SUM(quantity * unit_price) AS total_sales
FROM order_items

AVG (Average)

Calculate average of numeric values:

Basic Average

Table products = schema["products"];

Expression avgPrice = new Expression("average_price",
    new Aggregate(AggregateType.Average, products["price"])
);

Query query = new Query()
    .Select(avgPrice)
    .From(products);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    decimal average = results[0].GetFieldDecimal("average_price");
    Console.WriteLine($"Average Price: ${average:F2}");
}

Generated SQL:

SELECT AVG(price) AS average_price
FROM products

Average with Filter

Table products = schema["products"];

Expression avgPrice = new Expression("average_price",
    new Aggregate(AggregateType.Average, products["price"])
);

Filter inStockFilter = new Filter(
    new Criterion<int>(
        products["stock_quantity"],
        ConditionalType.GreaterThan,
        0
    )
);

Query query = new Query()
    .Select(avgPrice)
    .From(products)
    .Where(inStockFilter);

Generated SQL:

SELECT AVG(price) AS average_price
FROM products
WHERE stock_quantity > 0

MIN and MAX

Find minimum and maximum values:

MIN (Minimum)

Table products = schema["products"];

Expression lowestPrice = new Expression("lowest_price",
    new Aggregate(AggregateType.Min, products["price"])
);

Query query = new Query()
    .Select(lowestPrice)
    .From(products);

Generated SQL:

SELECT MIN(price) AS lowest_price
FROM products

MAX (Maximum)

Table orders = schema["orders"];

Expression latestOrder = new Expression("latest_order_date",
    new Aggregate(AggregateType.Max, orders["order_date"])
);

Query query = new Query()
    .Select(latestOrder)
    .From(orders);

Generated SQL:

SELECT MAX(order_date) AS latest_order_date
FROM orders

MIN and MAX Together

Table products = schema["products"];

Expression minPrice = new Expression("min_price",
    new Aggregate(AggregateType.Min, products["price"])
);

Expression maxPrice = new Expression("max_price",
    new Aggregate(AggregateType.Max, products["price"])
);

Expression priceRange = new Expression("price_range", [
    maxPrice,
    new ArithmeticOperator(ArithmeticType.Subtract),
    minPrice
]);

Query query = new Query()
    .Select([minPrice, maxPrice, priceRange])
    .From(products);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    decimal min = results[0].GetFieldDecimal("min_price");
    decimal max = results[0].GetFieldDecimal("max_price");
    decimal range = results[0].GetFieldDecimal("price_range");

    Console.WriteLine($"Price Range: ${min:F2} - ${max:F2} (${range:F2})");
}

GROUP BY

Simple Grouping

Group rows by a single column:

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orderCount
    ])
    .From(orders)
    .GroupBy(orders["customer_id"]);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    foreach (Result row in results)
    {
        int customerId = row.GetFieldInteger("customer_id");
        int count = row.GetFieldInteger("order_count");

        Console.WriteLine($"Customer {customerId}: {count} orders");
    }
}

Generated SQL:

SELECT customer_id, COUNT(*) AS order_count
FROM orders
GROUP BY customer_id

Multiple Column Grouping

Group by multiple columns:

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression totalAmount = new Expression("total_amount",
    new Aggregate(AggregateType.Sum, orders["total"])
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orders["status"],
        orderCount,
        totalAmount
    ])
    .From(orders)
    .GroupBy([
        orders["customer_id"],
        orders["status"]
    ]);

Generated SQL:

SELECT customer_id, status, COUNT(*) AS order_count, SUM(total) AS total_amount
FROM orders
GROUP BY customer_id, status

GROUP BY with Multiple Aggregates

Combine multiple aggregate functions:

Table order_items = schema["order_items"];

Expression itemCount = new Expression("item_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression totalQuantity = new Expression("total_quantity",
    new Aggregate(AggregateType.Sum, order_items["quantity"])
);

Expression avgQuantity = new Expression("avg_quantity",
    new Aggregate(AggregateType.Average, order_items["quantity"])
);

Expression minPrice = new Expression("min_price",
    new Aggregate(AggregateType.Min, order_items["unit_price"])
);

Expression maxPrice = new Expression("max_price",
    new Aggregate(AggregateType.Max, order_items["unit_price"])
);

Query query = new Query()
    .Select([
        order_items["product_id"],
        itemCount,
        totalQuantity,
        avgQuantity,
        minPrice,
        maxPrice
    ])
    .From(order_items)
    .GroupBy(order_items["product_id"]);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    foreach (Result row in results)
    {
        int productId = row.GetFieldInteger("product_id");
        int count = row.GetFieldInteger("item_count");
        int totalQty = row.GetFieldInteger("total_quantity");
        double avgQty = row.GetFieldDouble("avg_quantity");
        decimal minPx = row.GetFieldDecimal("min_price");
        decimal maxPx = row.GetFieldDecimal("max_price");

        Console.WriteLine($"Product {productId}:");
        Console.WriteLine($"  Orders: {count}");
        Console.WriteLine($"  Total Qty: {totalQty} (Avg: {avgQty:F1})");
        Console.WriteLine($"  Price Range: ${minPx:F2} - ${maxPx:F2}");
    }
}

HAVING Clause

Filter aggregated results using HAVING:

Simple HAVING

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

// Filter: only customers with more than 5 orders
Filter havingFilter = new Filter(
    new Criterion<int>(
        orderCount,
        ConditionalType.GreaterThan,
        5
    )
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orderCount
    ])
    .From(orders)
    .GroupBy(orders["customer_id"])
    .Having(havingFilter);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("High-Volume Customers:");
    foreach (Result row in results)
    {
        int customerId = row.GetFieldInteger("customer_id");
        int count = row.GetFieldInteger("order_count");

        Console.WriteLine($"Customer {customerId}: {count} orders");
    }
}

Generated SQL:

SELECT customer_id, COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 5

HAVING with Multiple Conditions

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression totalSpent = new Expression("total_spent",
    new Aggregate(AggregateType.Sum, orders["total"])
);

// Filter: customers with >3 orders AND total spent > $500
Filter havingFilter = new Filter([
    new Criterion<int>(orderCount, ConditionalType.GreaterThan, 3),
    new BooleanItem(BooleanType.And),
    new Criterion<decimal>(totalSpent, ConditionalType.GreaterThan, 500.00M)
]);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orderCount,
        totalSpent
    ])
    .From(orders)
    .GroupBy(orders["customer_id"])
    .Having(havingFilter);

Generated SQL:

SELECT customer_id, COUNT(*) AS order_count, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
HAVING COUNT(*) > 3 AND SUM(total) > 500.00

WHERE vs HAVING

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

// WHERE filters rows BEFORE grouping
Filter whereFilter = new Filter(
    new Criterion<DateTime>(
        orders["order_date"],
        ConditionalType.GreaterThan,
        DateTime.Now.AddMonths(-6)
    )
);

// HAVING filters groups AFTER aggregation
Filter havingFilter = new Filter(
    new Criterion<int>(
        orderCount,
        ConditionalType.GreaterThanOrEqualTo,
        2
    )
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orderCount
    ])
    .From(orders)
    .Where(whereFilter)      // Filter to last 6 months
    .GroupBy(orders["customer_id"])
    .Having(havingFilter);   // Filter to customers with 2+ orders

Generated SQL:

SELECT customer_id, COUNT(*) AS order_count
FROM orders
WHERE order_date > '2024-09-01'  -- WHERE filters rows first
GROUP BY customer_id
HAVING COUNT(*) >= 2              -- HAVING filters groups after

GROUP BY with ORDER BY

Sort aggregated results:

Order by Aggregate

Table orders = schema["orders"];

Expression totalSpent = new Expression("total_spent",
    new Aggregate(AggregateType.Sum, orders["total"])
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        totalSpent
    ])
    .From(orders)
    .GroupBy(orders["customer_id"])
    .OrderBy(totalSpent, OrderClauseType.Descending); // Highest spenders first

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("Top Customers by Revenue:");
    int rank = 1;
    foreach (Result row in results.Take(10))
    {
        int customerId = row.GetFieldInteger("customer_id");
        decimal spent = row.GetFieldDecimal("total_spent");

        Console.WriteLine($"{rank}. Customer {customerId}: ${spent:N2}");
        rank++;
    }
}

Generated SQL:

SELECT customer_id, SUM(total) AS total_spent
FROM orders
GROUP BY customer_id
ORDER BY total_spent DESC

Multiple Sort Columns

Table products = schema["products"];

Expression avgPrice = new Expression("avg_price",
    new Aggregate(AggregateType.Average, products["price"])
);

Expression productCount = new Expression("product_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Query query = new Query()
    .Select([
        products["category"],
        productCount,
        avgPrice
    ])
    .From(products)
    .GroupBy(products["category"])
    .OrderBy([
        new OrderClause(products["category"], OrderClauseType.Ascending),
        new OrderClause(avgPrice, OrderClauseType.Descending)
    ]);

Complete Examples

Example 1: Customer Summary Report

Table orders = schema["orders"];

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression totalSpent = new Expression("total_spent",
    new Aggregate(AggregateType.Sum, orders["total"])
);

Expression avgOrderValue = new Expression("avg_order_value",
    new Aggregate(AggregateType.Average, orders["total"])
);

Expression lastOrderDate = new Expression("last_order_date",
    new Aggregate(AggregateType.Max, orders["order_date"])
);

Query query = new Query()
    .Select([
        orders["customer_id"],
        orderCount,
        totalSpent,
        avgOrderValue,
        lastOrderDate
    ])
    .From(orders)
    .GroupBy(orders["customer_id"])
    .OrderBy(totalSpent, OrderClauseType.Descending);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("Customer Summary Report");
    Console.WriteLine("=======================\n");

    foreach (Result row in results)
    {
        int customerId = row.GetFieldInteger("customer_id");
        int orders = row.GetFieldInteger("order_count");
        decimal total = row.GetFieldDecimal("total_spent");
        decimal avg = row.GetFieldDecimal("avg_order_value");
        DateTime lastOrder = row.GetFieldDateTime("last_order_date");

        Console.WriteLine($"Customer #{customerId}");
        Console.WriteLine($"  Total Orders: {orders}");
        Console.WriteLine($"  Total Spent: ${total:N2}");
        Console.WriteLine($"  Avg Order Value: ${avg:F2}");
        Console.WriteLine($"  Last Order: {lastOrder:d}");
        Console.WriteLine();
    }
}

Example 2: Product Performance Analysis

Table order_items = schema["order_items"];

Expression timesSold = new Expression("times_sold",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression totalQuantity = new Expression("total_quantity",
    new Aggregate(AggregateType.Sum, order_items["quantity"])
);

Expression totalRevenue = new Expression("total_revenue", [
    new Aggregate(AggregateType.Sum, new Expression("line_total", [
        order_items["quantity"],
        new ArithmeticOperator(ArithmeticType.Multiply),
        order_items["unit_price"]
    ]))
]);

Expression avgQuantity = new Expression("avg_quantity",
    new Aggregate(AggregateType.Average, order_items["quantity"])
);

// Only show products sold at least 5 times
Filter havingFilter = new Filter(
    new Criterion<int>(
        timesSold,
        ConditionalType.GreaterThanOrEqualTo,
        5
    )
);

Query query = new Query()
    .Select([
        order_items["product_id"],
        timesSold,
        totalQuantity,
        totalRevenue,
        avgQuantity
    ])
    .From(order_items)
    .GroupBy(order_items["product_id"])
    .Having(havingFilter)
    .OrderBy(totalRevenue, OrderClauseType.Descending);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("Top Products by Revenue (Min 5 Sales)");
    Console.WriteLine("======================================\n");

    int rank = 1;
    foreach (Result row in results.Take(20))
    {
        int productId = row.GetFieldInteger("product_id");
        int sold = row.GetFieldInteger("times_sold");
        int qty = row.GetFieldInteger("total_quantity");
        decimal revenue = row.GetFieldDecimal("total_revenue");
        double avgQty = row.GetFieldDouble("avg_quantity");

        Console.WriteLine($"{rank}. Product #{productId}");
        Console.WriteLine($"   Times Sold: {sold}");
        Console.WriteLine($"   Total Quantity: {qty} (Avg: {avgQty:F1})");
        Console.WriteLine($"   Revenue: ${revenue:N2}");
        Console.WriteLine();

        rank++;
    }
}
Table orders = schema["orders"];

// Extract year and month from order_date
Function year = new Function(FunctionType.Year, orders["order_date"]);
year.SetAlias("order_year");

Function month = new Function(FunctionType.Month, orders["order_date"]);
month.SetAlias("order_month");

Expression orderCount = new Expression("order_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression revenue = new Expression("total_revenue",
    new Aggregate(AggregateType.Sum, orders["total"])
);

Expression avgOrderValue = new Expression("avg_order_value",
    new Aggregate(AggregateType.Average, orders["total"])
);

Query query = new Query()
    .Select([year, month, orderCount, revenue, avgOrderValue])
    .From(orders)
    .GroupBy([year, month])
    .OrderBy([
        new OrderClause(year, OrderClauseType.Descending),
        new OrderClause(month, OrderClauseType.Descending)
    ]);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("Monthly Sales Trends");
    Console.WriteLine("====================\n");

    foreach (Result row in results.Take(12))
    {
        int yr = row.GetFieldInteger("order_year");
        int mo = row.GetFieldInteger("order_month");
        int orders = row.GetFieldInteger("order_count");
        decimal rev = row.GetFieldDecimal("total_revenue");
        decimal avg = row.GetFieldDecimal("avg_order_value");

        Console.WriteLine($"{yr}-{mo:D2}:");
        Console.WriteLine($"  Orders: {orders}");
        Console.WriteLine($"  Revenue: ${rev:N2}");
        Console.WriteLine($"  Avg Order: ${avg:F2}");
        Console.WriteLine();
    }
}

Example 4: Category Performance

Table products = schema["products"];

Expression productCount = new Expression("product_count",
    new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All))
);

Expression avgPrice = new Expression("avg_price",
    new Aggregate(AggregateType.Average, products["price"])
);

Expression minPrice = new Expression("min_price",
    new Aggregate(AggregateType.Min, products["price"])
);

Expression maxPrice = new Expression("max_price",
    new Aggregate(AggregateType.Max, products["price"])
);

Expression totalInventoryValue = new Expression("total_inventory_value", [
    new Aggregate(AggregateType.Sum, new Expression("value", [
        products["stock_quantity"],
        new ArithmeticOperator(ArithmeticType.Multiply),
        products["price"]
    ]))
]);

Query query = new Query()
    .Select([
        products["category"],
        productCount,
        avgPrice,
        minPrice,
        maxPrice,
        totalInventoryValue
    ])
    .From(products)
    .GroupBy(products["category"])
    .OrderBy(totalInventoryValue, OrderClauseType.Descending);

using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
    Console.WriteLine("Category Performance Analysis");
    Console.WriteLine("=============================\n");

    foreach (Result row in results)
    {
        string category = row.GetFieldString("category");
        int count = row.GetFieldInteger("product_count");
        decimal avg = row.GetFieldDecimal("avg_price");
        decimal min = row.GetFieldDecimal("min_price");
        decimal max = row.GetFieldDecimal("max_price");
        decimal invValue = row.GetFieldDecimal("total_inventory_value");

        Console.WriteLine($"{category}:");
        Console.WriteLine($"  Products: {count}");
        Console.WriteLine($"  Price Range: ${min:F2} - ${max:F2} (Avg: ${avg:F2})");
        Console.WriteLine($"  Inventory Value: ${invValue:N2}");
        Console.WriteLine();
    }
}

Best Practices

  1. Always include grouped columns in SELECT

    • All non-aggregated columns must be in GROUP BY
    • Prevents SQL errors and ensures correct results
  2. Use HAVING for aggregate filtering

    • Use WHERE for row-level filtering
    • Use HAVING for aggregate-level filtering
    • WHERE executes before GROUP BY, HAVING after
  3. Name aggregate expressions clearly

    • Use descriptive aliases like "total_revenue", "avg_price"
    • Makes results easier to understand and access
  4. Consider NULL handling

    • Aggregates ignore NULL values (except COUNT(*))
    • COUNT(column) excludes NULLs
    • SUM/AVG return NULL if all values are NULL
  5. Index grouped columns

    • Add indexes to columns used in GROUP BY
    • Improves query performance significantly
    • Essential for large tables
  6. Order aggregated results

    • Sort by aggregate values to find top/bottom results
    • Makes reports more useful and readable
  7. Limit aggregated results when appropriate

    • Use LIMIT/TOP for top-N queries
    • Reduces result set size
    • Improves performance
  8. Test with representative data

    • Verify aggregates with known totals
    • Check edge cases (empty groups, all NULLs)
    • Validate business logic

Next Steps