Query Performance
This guide covers best practices and optimization strategies for building efficient queries in Velocity.
Overview
Query performance depends on many factors:
- Database indexes
- Query structure
- Data volume
- Network latency
- Database server resources
This guide focuses on query-level optimizations you can control through Velocity.
Indexing Strategies
Create Indexes for Filtered Columns
Columns used in WHERE clauses should have indexes:
Table users = schema["users"];
// Add index to frequently filtered column
Index statusIndex = users.CreateIndex("idx_users_status");
statusIndex.AddIndexItem(users["status"]);
// Build table with index
manager.BuildTable(users);
// Now queries filtering on status will be fast
Query query = new Query()
.Select([users["username"], users["email"]])
.From(users)
.Where(new Criterion<string>(users["status"], ConditionalType.Equals, "active"));
Index JOIN Columns
Both sides of a join condition should be indexed:
Table orders = schema["orders"];
Table customers = schema["customers"];
// Index the foreign key column
Index orderCustomerIndex = orders.CreateIndex("idx_orders_customer_id");
orderCustomerIndex.AddIndexItem(orders["customer_id"]);
// Primary keys are automatically indexed
// Join will use indexes on both sides
var join = new Join(orders, customers, "customer_id");
Query query = new Query()
.Select([orders["order_id"], customers["customer_name"]])
.From(join);
Composite Indexes
For queries with multiple filter conditions, create composite indexes:
Table products = schema["products"];
// Composite index on category and price
Index categoryPriceIndex = products.CreateIndex("idx_products_category_price");
categoryPriceIndex.AddIndexItem(products["category"]);
categoryPriceIndex.AddIndexItem(products["price"]);
// Query will use the composite index
Query query = new Query()
.Select(products["product_name"])
.From(products)
.Where(new Filter([
new Criterion<string>(products["category"], ConditionalType.Equals, "Electronics"),
new BooleanItem(BooleanType.And),
new Criterion<decimal>(products["price"], ConditionalType.LessThan, 500.00m)
]));
SELECT Optimization
Select Only Required Columns
Avoid SELECT * - fetch only what you need:
// BAD: Fetches all columns (wasteful)
Query badQuery = new Query()
.SelectAll(users);
// GOOD: Select only needed columns
Query goodQuery = new Query()
.Select([users["username"], users["email"]])
.From(users);
Benefits:
- Reduced network traffic
- Lower memory usage
- Faster query execution
- Less disk I/O
Use DISTINCT Sparingly
DISTINCT requires sorting/deduplication, which is expensive:
// Consider if DISTINCT is truly necessary
Query query = new Query()
.Select(orders["customer_id"])
.From(orders)
.Distinct(); // Adds overhead
// Better: Use GROUP BY if you need aggregates anyway
Expression orderCount = new Expression("order_count",
new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All)));
Query betterQuery = new Query()
.Select([orders["customer_id"], orderCount])
.From(orders)
.GroupBy(orders["customer_id"]);
WHERE Clause Optimization
Put Most Selective Filters First
While the database optimizer usually handles this, organize filters logically:
Table orders = schema["orders"];
// More selective filter first
Query query = new Query()
.Select(orders["order_id"])
.From(orders)
.Where(new Filter([
// Highly selective: specific user
new Criterion<int>(orders["customer_id"], ConditionalType.Equals, 12345),
new BooleanItem(BooleanType.And),
// Less selective: common status
new Criterion<string>(orders["status"], ConditionalType.Equals, "shipped")
]));
Avoid Functions on Indexed Columns
Functions prevent index usage:
Table users = schema["users"];
// BAD: LOWER(email) prevents index usage
Expression lowerEmail = new Expression([
new Function(FunctionType.Lower, users["email"])
]);
Query badQuery = new Query()
.SelectAll()
.From(users)
.Where(new Criterion<string>(lowerEmail, ConditionalType.Equals, "john@example.com"));
// GOOD: Store emails in lowercase, query directly
Query goodQuery = new Query()
.SelectAll()
.From(users)
.Where(new Criterion<string>(users["email"], ConditionalType.Equals, "john@example.com"));
Use Specific Operators
Use the most specific operator available:
Table products = schema["products"];
// GOOD: Direct equality check
Query goodQuery = new Query()
.SelectAll()
.From(products)
.Where(new Criterion<string>(products["sku"], ConditionalType.Equals, "SKU-12345"));
// BAD: LIKE is slower than equality
Query badQuery = new Query()
.SelectAll()
.From(products)
.Where(new Criterion<string>(products["sku"], ConditionalType.Like, "SKU-12345"));
Optimize IN Clauses
Large IN lists can be slow - consider alternatives:
Table orders = schema["orders"];
// If the list is small (< 100 items), IN is fine
List<int> customerIds = [1, 2, 3, 4, 5];
Query query = new Query()
.SelectAll()
.From(orders)
.Where(new Criterion<int>(orders["customer_id"], ConditionalType.In, customerIds));
// For large lists, consider a temporary table or subquery
JOIN Optimization
Join Order Matters
Start with the smallest table:
// Assume: users (small), orders (medium), order_items (large)
// GOOD: Start with smallest table
var join1 = new Join([
(users, orders, "user_id"), // users → orders
(orders, orderItems, "order_id") // orders → order_items
]);
Query goodQuery = new Query()
.SelectAll()
.From(join1);
Use EXISTS Instead of IN for Subqueries
EXISTS can be faster for correlated subqueries:
Table users = schema["users"];
Table orders = schema["orders"];
Query outerQuery = new Query()
.Select(users["username"])
.From(users);
// EXISTS subquery (often faster)
Query existsSubquery = new Query()
.Select(new Expression(new Literal<int>(1)))
.From(orders)
.Where(new Criterion<Column>(orders["user_id"], ConditionalType.Equals, users["user_id"], outerQuery));
Query goodQuery = outerQuery.Where(new Filter(
new Criterion<Expression>(new Expression(new Function(FunctionType.Exists, existsSubquery)))
));
// IN subquery (can be slower for large datasets)
Query inSubquery = new Query()
.Select(orders["user_id"])
.From(orders)
.Distinct();
Query alternateQuery = new Query()
.Select(users["username"])
.From(users)
.Where(new Criterion<Query>(users["user_id"], ConditionalType.In, inSubquery));
Avoid Cartesian Products
Always use explicit joins, not implicit cross products:
Table orders = schema["orders"];
Table customers = schema["customers"];
// BAD: Implicit cross join (Cartesian product)
Query badQuery = new Query()
.Select([orders["order_id"], customers["customer_name"]])
.From([orders, customers]) // Creates massive intermediate result
.Where(new Criterion<Column>(
orders["customer_id"],
ConditionalType.Equals,
customers["customer_id"]
));
// GOOD: Explicit join
var join = new Join(orders, customers, "customer_id");
Query goodQuery = new Query()
.Select([orders["order_id"], customers["customer_name"]])
.From(join);
Subquery Optimization
Limit Subquery Results
If checking for existence, limit results:
Table users = schema["users"];
Table orders = schema["orders"];
// Subquery only needs to find one matching row
Query subquery = new Query()
.Select(new Expression(new Literal<int>(1)))
.From(orders)
.Where(new Criterion<Column>(
orders["user_id"],
ConditionalType.Equals,
users["user_id"]
));
// Use EXISTS for better performance
Query query = new Query()
.Select(users["username"])
.From(users)
.Where(new Filter(
new Criterion<Expression>(new Expression(new Function(FunctionType.Exists, subquery)))
));
Consider JOIN vs Subquery
Sometimes a join is faster than a subquery:
Table users = schema["users"];
Table orders = schema["orders"];
// Subquery approach
Query subquery = new Query()
.Select(orders["user_id"])
.From(orders)
.Distinct();
Query subqueryApproach = new Query()
.Select(users["username"])
.From(users)
.Where(new Criterion<Query>(users["user_id"], ConditionalType.In, subquery));
// JOIN approach (often faster)
var join = new Join(users, orders, "user_id");
Query joinApproach = new Query()
.Select(users["username"])
.From(join)
.Distinct();
// Test both approaches with your data
Aggregation Performance
Filter Before Aggregating
Use WHERE to reduce rows before GROUP BY:
Table orders = schema["orders"];
Expression orderCount = new Expression("order_count",
new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All)));
// GOOD: Filter first, then group
Query goodQuery = new Query()
.Select([orders["customer_id"], orderCount])
.From(orders)
.Where(new Criterion<DateTime>(
orders["order_date"],
ConditionalType.GreaterThan,
DateTime.Now.AddDays(-30)
))
.GroupBy(orders["customer_id"]);
// LESS EFFICIENT: Group all rows, then filter
Query lessEfficientQuery = new Query()
.Select([orders["customer_id"], orderCount])
.From(orders)
.GroupBy(orders["customer_id"])
.Having(new Criterion<int>(orderCount, ConditionalType.GreaterThan, 5));
Use Appropriate Aggregates
Choose the right aggregate function:
Table products = schema["products"];
// COUNT(*) is usually fastest for row counts
Expression rowCount = new Expression("total_products",
new Aggregate(AggregateType.Count, new ArithmeticOperator(ArithmeticType.All)));
// COUNT(column) excludes NULLs
Expression nonNullCount = new Expression("products_with_price",
new Aggregate(AggregateType.Count, products["price"]));
Query query = new Query()
.Select([rowCount, nonNullCount])
.From(products);
Memory Management
Choose Retrieve vs Search Wisely
Use Retrieve() for small result sets (typically under 10,000 rows) where you need random access, row counts, or multiple iterations. Use Search() for large datasets to stream results one row at a time with minimal memory overhead. See Retrieve vs Search for the full decision matrix.
Pagination for Large Datasets
Process large datasets in batches (when database supports LIMIT/OFFSET):
// Note: Velocity doesn't directly support LIMIT/OFFSET yet
// Alternative: Use WHERE with indexed range conditions
Table orders = schema["orders"];
int batchSize = 1000;
int currentId = 0;
while (true)
{
Query batchQuery = new Query()
.SelectAll()
.From(orders)
.Where(new Filter([
new Criterion<int>(orders["order_id"], ConditionalType.GreaterThan, currentId)
]))
.OrderBy(orders["order_id"]);
using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(batchQuery))
{
if (results.Count == 0) break;
foreach (Result row in results.Take(batchSize))
{
currentId = row.GetFieldInteger("order_id");
ProcessOrder(row);
}
if (results.Count < batchSize) break;
}
}
Common Anti-Patterns
1. N+1 Query Problem
BAD:
// Fetch all users
Query usersQuery = new Query()
.Select([users["user_id"], users["username"]])
.From(users);
using (ResultSet userResults = manager.Retrieve(usersQuery))
{
foreach (Result user in userResults)
{
int userId = user.GetFieldInteger("user_id");
// PROBLEM: One query per user!
Query ordersQuery = new Query()
.Select(orders["order_id"])
.From(orders)
.Where(new Criterion<int>(orders["user_id"], ConditionalType.Equals, userId));
using (ResultSet orderResults = manager.Retrieve(ordersQuery))
{
// Process orders
}
}
}
GOOD:
// Single query with JOIN
var join = new Join(users, orders, "user_id", JoinType.LeftOuter);
Query query = new Query()
.Select([users["user_id"], users["username"], orders["order_id"]])
.From(join);
using (ResultSet results = manager.Retrieve(query))
{
// Process all data in single result set
var userOrders = results.GroupBy(r => r.GetFieldInteger("user_id"));
}
2. Over-Filtering in Memory
BAD:
// Fetch all rows, filter in memory
Query query = new Query()
.SelectAll(orders);
using (ResultSet results = manager.Retrieve(query))
{
var filtered = results.Where(r =>
r.GetFieldDateTime("order_date") > DateTime.Now.AddDays(-30) &&
r.GetFieldString("status") == "shipped"
);
}
GOOD:
// Filter in database
Query query = new Query()
.SelectAll()
.From(orders)
.Where(new Filter([
new Criterion<DateTime>(orders["order_date"], ConditionalType.GreaterThan, DateTime.Now.AddDays(-30)),
new BooleanItem(BooleanType.And),
new Criterion<string>(orders["status"], ConditionalType.Equals, "shipped")
]));
3. Unnecessary DISTINCT
BAD:
// DISTINCT when primary key is selected (redundant)
Query query = new Query()
.Select([
users["user_id"], // Primary key is already unique
users["username"]
])
.From(users)
.Distinct(); // Unnecessary overhead
GOOD:
// Remove DISTINCT when not needed
Query query = new Query()
.Select([users["user_id"], users["username"]])
.From(users);
Monitoring and Profiling
Measure Query Performance
using System.Diagnostics;
Table orders = schema["orders"];
Query query = new Query()
.SelectAll()
.From(orders)
.Where(new Criterion<DateTime>(
orders["order_date"],
ConditionalType.GreaterThan,
DateTime.Now.AddDays(-30)
));
Stopwatch sw = Stopwatch.StartNew();
using (Manager manager = new Manager(connection))
using (ResultSet results = manager.Retrieve(query))
{
sw.Stop();
Console.WriteLine($"Query executed in {sw.ElapsedMilliseconds}ms");
Console.WriteLine($"Returned {results.Count} rows");
// Process results
}
Log Slow Queries
public class QueryMonitor
{
private readonly TimeSpan _slowQueryThreshold = TimeSpan.FromSeconds(1);
public ResultSet ExecuteAndMonitor(Manager manager, Query query, string queryName)
{
Stopwatch sw = Stopwatch.StartNew();
ResultSet results = manager.Retrieve(query);
sw.Stop();
if (sw.Elapsed > _slowQueryThreshold)
{
Console.WriteLine($"[SLOW QUERY] {queryName} took {sw.ElapsedMilliseconds}ms");
Console.WriteLine($"Returned {results.Count} rows");
}
return results;
}
}
Best Practices Summary
Index strategically
- Index WHERE clause columns
- Index JOIN columns
- Consider composite indexes
Select wisely
- Fetch only needed columns
- Avoid
SELECT * - Use DISTINCT only when necessary
Filter efficiently
- Filter in the database, not in memory
- Avoid functions on indexed columns
- Put selective filters first
Join smartly
- Use explicit joins
- Start with smallest tables
- Consider EXISTS vs IN
Aggregate carefully
- Filter before grouping
- Use appropriate aggregates
- Consider WHERE vs HAVING
Manage memory
- Use
Search()for large datasets - Use
Retrieve()for small datasets - Process in batches when possible
- Use
Avoid anti-patterns
- Prevent N+1 queries
- Don't filter in memory
- Remove unnecessary DISTINCT
Monitor performance
- Measure query execution time
- Log slow queries
- Profile regularly
See Also
- Retrieve vs Search - Choosing the right execution method
- Filtering - Building efficient WHERE clauses
- Joins - Optimizing join operations
- Indexing - Creating and managing indexes