We are still cooking the magic in the way!
MySQL & Database Design
Advanced String and Date Functions
Advanced String and Date Functions
String and date manipulation are essential skills for data processing and transformation. MySQL provides powerful functions to clean, format, and analyze textual and temporal data. Mastering these functions will dramatically improve your data manipulation capabilities.
Advanced String Functions
SUBSTRING_INDEX() - Splitting Strings
SUBSTRING_INDEX() extracts parts of strings based on delimiters:
-- Extract domain from email
SELECT
email,
SUBSTRING_INDEX(email, '@', 1) AS username,
SUBSTRING_INDEX(email, '@', -1) AS domain
FROM customers;
-- Extract first name from full name
SELECT
full_name,
SUBSTRING_INDEX(full_name, ' ', 1) AS first_name,
SUBSTRING_INDEX(full_name, ' ', -1) AS last_name
FROM employees;
-- Extract subdomain from URL
SELECT
url,
SUBSTRING_INDEX(SUBSTRING_INDEX(url, '://', -1), '.', 1) AS subdomain,
SUBSTRING_INDEX(SUBSTRING_INDEX(url, '://', -1), '/', 1) AS domain
FROM websites;
-- Example: https://shop.example.com/products
-- subdomain: shop
-- domain: shop.example.com
REPLACE() and String Cleaning
-- Clean phone numbers
SELECT
phone,
REPLACE(REPLACE(REPLACE(phone, '-', ''), ' ', ''), '(', '') AS cleaned_phone
FROM contacts;
-- Standardize product codes
SELECT
product_code,
UPPER(REPLACE(TRIM(product_code), ' ', '-')) AS standardized_code
FROM products;
-- Multiple replacements with nested REPLACE
UPDATE products
SET description = REPLACE(
REPLACE(
REPLACE(description, '&', 'and'),
' ', ' '
),
'\n\n', '\n'
)
WHERE description IS NOT NULL;
REGEXP - Regular Expressions
MySQL supports regular expressions for pattern matching and extraction:
-- Find emails with specific pattern
SELECT email
FROM customers
WHERE email REGEXP '^[a-z0-9]+@[a-z0-9]+\.[a-z]{2,}$';
-- Find products with numeric codes
SELECT product_name, product_code
FROM products
WHERE product_code REGEXP '^[A-Z]{2}[0-9]{4}$';
-- Matches: AB1234, XY9999
-- Extract numbers from text
SELECT
description,
REGEXP_SUBSTR(description, '[0-9]+') AS first_number
FROM products;
MySQL 8.0+ Functions: REGEXP_REPLACE(), REGEXP_SUBSTR(), REGEXP_INSTR() provide advanced regex operations for search and replace, extraction, and position finding.
-- REGEXP_REPLACE: Remove non-alphanumeric characters
SELECT
product_name,
REGEXP_REPLACE(product_name, '[^a-zA-Z0-9 ]', '') AS cleaned_name
FROM products;
-- REGEXP_SUBSTR: Extract specific pattern
SELECT
text,
REGEXP_SUBSTR(text, '\b[A-Z]{2}[0-9]{3}\b') AS code
FROM documents;
Advanced String Manipulation
-- Pad strings to fixed width
SELECT
product_code,
LPAD(product_code, 10, '0') AS padded_code,
RPAD(customer_name, 30, '.') AS formatted_name
FROM orders;
-- Reverse string
SELECT
REVERSE('MySQL') AS reversed; -- LQSyM
-- Generate initials
SELECT
full_name,
CONCAT(
UPPER(SUBSTRING(SUBSTRING_INDEX(full_name, ' ', 1), 1, 1)),
UPPER(SUBSTRING(SUBSTRING_INDEX(full_name, ' ', -1), 1, 1))
) AS initials
FROM employees;
-- John Smith → JS
Advanced Date and Time Functions
Date Arithmetic and Calculations
-- Calculate age from birth date
SELECT
birth_date,
TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) AS age,
TIMESTAMPDIFF(MONTH, birth_date, CURDATE()) AS age_in_months
FROM customers;
-- Days until/since event
SELECT
event_name,
event_date,
DATEDIFF(event_date, CURDATE()) AS days_until_event,
CASE
WHEN DATEDIFF(event_date, CURDATE()) < 0 THEN 'Past'
WHEN DATEDIFF(event_date, CURDATE()) = 0 THEN 'Today'
ELSE 'Future'
END AS status
FROM events;
-- Business days calculation (excluding weekends)
SELECT
order_date,
delivery_date,
DATEDIFF(delivery_date, order_date) AS total_days,
DATEDIFF(delivery_date, order_date) -
(WEEK(delivery_date) - WEEK(order_date)) * 2 AS business_days_approx
FROM orders;
-- Add business days (skip weekends)
SELECT
DATE_ADD(CURDATE(),
INTERVAL 5 + (WEEKDAY(CURDATE() + INTERVAL 5 DAY) IN (5, 6)) * 2 DAY
) AS five_business_days_later;
DATE_FORMAT() - Custom Formatting
-- Various date formats
SELECT
order_date,
DATE_FORMAT(order_date, '%Y-%m-%d') AS iso_format,
DATE_FORMAT(order_date, '%M %d, %Y') AS long_format,
DATE_FORMAT(order_date, '%m/%d/%Y') AS us_format,
DATE_FORMAT(order_date, '%d.%m.%Y') AS european_format,
DATE_FORMAT(order_date, '%W, %M %D') AS day_month,
DATE_FORMAT(order_date, '%Y-Q%q') AS quarter_format
FROM orders;
-- Format: January 15, 2024
-- Format: 01/15/2024
-- Format: Monday, January 15th
-- Format: 2024-Q1
-- Time formatting
SELECT
created_at,
DATE_FORMAT(created_at, '%H:%i:%s') AS time_24h,
DATE_FORMAT(created_at, '%h:%i %p') AS time_12h,
DATE_FORMAT(created_at, '%Y-%m-%d %H:%i:%s') AS full_datetime
FROM logs;
-- 14:30:45 → 02:30 PM
STR_TO_DATE() - Parsing Dates
-- Convert string to date
SELECT
STR_TO_DATE('15-01-2024', '%d-%m-%Y') AS parsed_date,
STR_TO_DATE('January 15, 2024', '%M %d, %Y') AS parsed_date2,
STR_TO_DATE('2024/01/15 14:30:00', '%Y/%m/%d %H:%i:%s') AS parsed_datetime;
-- Import data with various date formats
UPDATE import_table
SET proper_date = STR_TO_DATE(date_string, '%m/%d/%Y')
WHERE date_string REGEXP '^[0-9]{2}/[0-9]{2}/[0-9]{4}$';
Time Zone Conversions
-- Convert between time zones
SELECT
event_time,
CONVERT_TZ(event_time, 'UTC', 'America/New_York') AS ny_time,
CONVERT_TZ(event_time, 'UTC', 'Europe/London') AS london_time,
CONVERT_TZ(event_time, 'UTC', 'Asia/Tokyo') AS tokyo_time
FROM events;
-- Store UTC, display in user timezone
SELECT
created_at AS utc_time,
CONVERT_TZ(created_at, '+00:00', user_timezone) AS local_time
FROM orders
JOIN users ON orders.user_id = users.user_id;
Important: Always store dates in UTC and convert to local time zones only for display. Use DATETIME or TIMESTAMP columns, not VARCHAR, for proper date storage.
Date Parts and Extraction
-- Extract date components
SELECT
order_date,
YEAR(order_date) AS year,
MONTH(order_date) AS month,
DAY(order_date) AS day,
QUARTER(order_date) AS quarter,
WEEK(order_date) AS week_number,
DAYOFWEEK(order_date) AS day_of_week,
DAYNAME(order_date) AS day_name,
MONTHNAME(order_date) AS month_name,
LAST_DAY(order_date) AS last_day_of_month
FROM orders;
-- Time components
SELECT
created_at,
HOUR(created_at) AS hour,
MINUTE(created_at) AS minute,
SECOND(created_at) AS second,
TIME(created_at) AS time_only,
DATE(created_at) AS date_only
FROM logs;
Practical Date Scenarios
-- First and last day of month
SELECT
CURDATE() AS today,
DATE_FORMAT(CURDATE(), '%Y-%m-01') AS first_day_of_month,
LAST_DAY(CURDATE()) AS last_day_of_month,
DATE_SUB(DATE_FORMAT(CURDATE(), '%Y-%m-01'), INTERVAL 1 DAY) AS last_day_prev_month;
-- Age ranges for analytics
SELECT
customer_name,
birth_date,
TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) AS age,
CASE
WHEN TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) < 18 THEN 'Under 18'
WHEN TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) BETWEEN 18 AND 24 THEN '18-24'
WHEN TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) BETWEEN 25 AND 34 THEN '25-34'
WHEN TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) BETWEEN 35 AND 44 THEN '35-44'
WHEN TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) BETWEEN 45 AND 54 THEN '45-54'
ELSE '55+'
END AS age_group
FROM customers;
-- Customer lifetime calculation
SELECT
customer_id,
MIN(order_date) AS first_order,
MAX(order_date) AS last_order,
DATEDIFF(MAX(order_date), MIN(order_date)) AS customer_lifetime_days,
TIMESTAMPDIFF(MONTH, MIN(order_date), MAX(order_date)) AS customer_lifetime_months,
COUNT(*) AS total_orders,
ROUND(COUNT(*) / GREATEST(TIMESTAMPDIFF(MONTH, MIN(order_date), MAX(order_date)), 1), 2) AS orders_per_month
FROM orders
GROUP BY customer_id;
Combining String and Date Functions
-- Generate human-readable descriptions
SELECT
order_id,
CONCAT(
'Order #', order_id,
' placed by ', customer_name,
' on ', DATE_FORMAT(order_date, '%M %D, %Y'),
' (', DATEDIFF(CURDATE(), order_date), ' days ago)'
) AS order_description
FROM orders
JOIN customers ON orders.customer_id = customers.customer_id;
-- Generate slugs for URLs
SELECT
title,
LOWER(
REPLACE(
REPLACE(
REGEXP_REPLACE(title, '[^a-zA-Z0-9 ]', ''),
' ', ' '
),
' ', '-'
)
) AS slug
FROM blog_posts;
-- "My Amazing Product!" → "my-amazing-product"
Real-World Example: Data Cleaning Pipeline
-- Comprehensive data cleaning
SELECT
customer_id,
-- Clean name: trim, title case
CONCAT(
UPPER(SUBSTRING(TRIM(first_name), 1, 1)),
LOWER(SUBSTRING(TRIM(first_name), 2))
) AS cleaned_first_name,
-- Standardize phone
CONCAT(
'+1',
REGEXP_REPLACE(phone, '[^0-9]', '')
) AS standardized_phone,
-- Validate and clean email
LOWER(TRIM(email)) AS cleaned_email,
-- Parse and standardize date
STR_TO_DATE(birth_date_string, '%m/%d/%Y') AS birth_date,
-- Calculate age
TIMESTAMPDIFF(YEAR, STR_TO_DATE(birth_date_string, '%m/%d/%Y'), CURDATE()) AS age
FROM raw_customer_data
WHERE email REGEXP '^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
AND LENGTH(REGEXP_REPLACE(phone, '[^0-9]', '')) = 10;
Practice Exercise:
Challenge: Write queries using advanced string and date functions:
- Extract the username and domain from all email addresses
- Find all orders placed on weekends in the last year
- Create a report showing customer tenure in years and months
Solutions:
-- 1. Email parsing
SELECT
email,
SUBSTRING_INDEX(email, '@', 1) AS username,
SUBSTRING_INDEX(email, '@', -1) AS domain,
CONCAT(
SUBSTRING(SUBSTRING_INDEX(email, '@', 1), 1, 2),
'***@',
SUBSTRING_INDEX(email, '@', -1)
) AS masked_email
FROM customers;
-- 2. Weekend orders
SELECT
order_id,
order_date,
DAYNAME(order_date) AS day_name
FROM orders
WHERE DAYOFWEEK(order_date) IN (1, 7) -- 1=Sunday, 7=Saturday
AND order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
ORDER BY order_date DESC;
-- 3. Customer tenure
SELECT
customer_name,
MIN(order_date) AS first_order,
TIMESTAMPDIFF(YEAR, MIN(order_date), CURDATE()) AS years_with_us,
MOD(TIMESTAMPDIFF(MONTH, MIN(order_date), CURDATE()), 12) AS additional_months,
CONCAT(
TIMESTAMPDIFF(YEAR, MIN(order_date), CURDATE()), ' years, ',
MOD(TIMESTAMPDIFF(MONTH, MIN(order_date), CURDATE()), 12), ' months'
) AS tenure
FROM customers
JOIN orders ON customers.customer_id = orders.customer_id
GROUP BY customers.customer_id, customer_name;
Summary
In this lesson, you mastered:
- SUBSTRING_INDEX(), REPLACE(), and REGEXP for string manipulation
- Date arithmetic with TIMESTAMPDIFF() and DATEDIFF()
- DATE_FORMAT() and STR_TO_DATE() for formatting and parsing
- Time zone conversions with CONVERT_TZ()
- Extracting date components (YEAR, MONTH, DAY, etc.)
- Real-world applications in data cleaning and transformation
Next Up: In the final lesson of this module, we'll master aggregate functions and GROUP BY for complex reporting and analytics!