跳到主要内容

关系型VS列式分析数据库

概述

对于熟悉 MySQL 的开发者来说,理解 ClickHouse 最好的方式就是通过对比。本文将从存储模型、表结构、查询方式、数据操作等多个维度对比这两种数据库,帮助你快速理解 ClickHouse 的设计理念。

存储模型对比

MySQL:行式存储

-- MySQL 表结构
CREATE TABLE user_orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
product_id INT NOT NULL,
order_date DATETIME NOT NULL,
amount DECIMAL(10,2) NOT NULL,
status ENUM('pending', 'paid', 'shipped', 'completed') DEFAULT 'pending',
INDEX idx_user_date (user_id, order_date),
INDEX idx_product (product_id)
) ENGINE=InnoDB;

MySQL 数据存储方式

磁盘上的数据布局(行式):
Row1: [1, 1001, 2001, '2024-01-15 10:00:00', 99.99, 'paid']
Row2: [2, 1002, 2002, '2024-01-15 11:00:00', 199.99, 'pending']
Row3: [3, 1001, 2003, '2024-01-15 12:00:00', 299.99, 'shipped']

ClickHouse:列式存储

-- ClickHouse 表结构
CREATE TABLE user_orders_local (
id UInt32,
user_id UInt32,
product_id UInt32,
order_date DateTime,
amount Float32,
status Enum8('pending'=1, 'paid'=2, 'shipped'=3, 'completed'=4)
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(order_date)
ORDER BY (user_id, order_date);

ClickHouse 数据存储方式

磁盘上的数据布局(列式):
id列: [1, 2, 3, ...]
user_id列: [1001, 1002, 1001, ...]
product_id列: [2001, 2002, 2003, ...]
order_date列: ['2024-01-15 10:00:00', '2024-01-15 11:00:00', ...]
amount列: [99.99, 199.99, 299.99, ...]
status列: [2, 1, 3, ...] // 枚举值

存储差异的实际影响

// 模拟查询性能差异
type QueryPerformanceComparison struct {
MySQL DatabaseEngine
ClickHouse DatabaseEngine
}

func (qpc *QueryPerformanceComparison) AnalyticalQuery() {
// 分析型查询:计算每月销售额
sql := `
SELECT
YEAR(order_date) as year,
MONTH(order_date) as month,
SUM(amount) as total_sales,
COUNT(*) as order_count
FROM user_orders
WHERE order_date >= '2024-01-01'
GROUP BY year, month
ORDER BY year, month
`

// MySQL 执行过程
mysqlSteps := []string{
"1. 全表扫描或使用索引扫描",
"2. 读取每行的所有列数据(即使只需要order_date和amount)",
"3. 在内存中进行GROUP BY和聚合",
"4. 返回结果",
}

// ClickHouse 执行过程
clickhouseSteps := []string{
"1. 分区剪枝:只读取2024年的分区",
"2. 列剪枝:只读取order_date和amount列",
"3. 向量化执行聚合计算",
"4. 并行处理多个数据块",
"5. 返回结果",
}

fmt.Printf("MySQL steps: %v\n", mysqlSteps)
fmt.Printf("ClickHouse steps: %v\n", clickhouseSteps)
}

表设计理念对比

MySQL:实体关系模型

-- MySQL 标准的关系型设计
-- 用户表
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB;

-- 产品表
CREATE TABLE products (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(200) NOT NULL,
category_id INT NOT NULL,
price DECIMAL(10,2) NOT NULL,
stock_quantity INT DEFAULT 0,
FOREIGN KEY (category_id) REFERENCES categories(id)
) ENGINE=InnoDB;

-- 订单表
CREATE TABLE orders (
id INT PRIMARY KEY AUTO_INCREMENT,
user_id INT NOT NULL,
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
total_amount DECIMAL(10,2) NOT NULL,
status ENUM('pending', 'paid', 'shipped', 'completed'),
FOREIGN KEY (user_id) REFERENCES users(id)
) ENGINE=InnoDB;

-- 订单详情表
CREATE TABLE order_items (
id INT PRIMARY KEY AUTO_INCREMENT,
order_id INT NOT NULL,
product_id INT NOT NULL,
quantity INT NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
FOREIGN KEY (order_id) REFERENCES orders(id),
FOREIGN KEY (product_id) REFERENCES products(id)
) ENGINE=InnoDB;

ClickHouse:宽表模型

-- ClickHouse 推荐的宽表设计(反规范化)
CREATE TABLE order_events_local (
-- 事件基础信息
event_time DateTime,
event_date Date,
event_type Enum8('order_created'=1, 'payment_success'=2, 'shipped'=3, 'completed'=4),

-- 用户信息(冗余存储)
user_id UInt32,
user_name String,
user_email String,
user_register_date Date,

-- 订单信息
order_id UInt64,
order_total_amount Float32,
order_status Enum8('pending'=1, 'paid'=2, 'shipped'=3, 'completed'=4),

-- 商品信息(冗余存储)
product_id UInt32,
product_name String,
product_category String,
product_price Float32,

-- 订单详情
quantity UInt16,
unit_price Float32,
line_total Float32,

-- 其他维度信息
source Enum8('web'=1, 'mobile'=2, 'api'=3),
region String,
device_type String
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(event_date)
ORDER BY (user_id, event_time, order_id);

设计理念差异对比

查询方式对比

MySQL:OLTP 查询模式

-- 典型的MySQL查询:点查询、小范围查询
-- 1. 根据主键查询单条记录
SELECT * FROM users WHERE id = 12345;

-- 2. 根据索引查询少量数据
SELECT * FROM orders
WHERE user_id = 12345
AND order_date >= '2024-01-01'
ORDER BY order_date DESC
LIMIT 10;

-- 3. 复杂的多表关联查询
SELECT
u.username,
o.order_date,
o.total_amount,
p.name as product_name,
oi.quantity
FROM users u
JOIN orders o ON u.id = o.user_id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE u.id = 12345
AND o.order_date >= '2024-01-01';

ClickHouse:OLAP 查询模式

-- 典型的ClickHouse查询:聚合分析、大数据量处理
-- 1. 大规模聚合查询
SELECT
toYYYYMM(event_date) as month,
product_category,
sum(line_total) as total_sales,
count(DISTINCT user_id) as unique_customers,
avg(order_total_amount) as avg_order_value
FROM order_events_local
WHERE event_date >= '2024-01-01'
AND event_type = 'completed'
GROUP BY month, product_category
ORDER BY month, total_sales DESC;

-- 2. 时间序列分析
SELECT
toStartOfHour(event_time) as hour,
countIf(event_type = 'order_created') as orders_created,
countIf(event_type = 'payment_success') as payments,
countIf(event_type = 'completed') as completed_orders,
round(countIf(event_type = 'completed') / countIf(event_type = 'order_created') * 100, 2) as completion_rate
FROM order_events_local
WHERE event_date = today()
GROUP BY hour
ORDER BY hour;

-- 3. 漏斗分析
SELECT
source,
uniq(user_id) as total_users,
uniqIf(user_id, event_type = 'order_created') as users_with_orders,
uniqIf(user_id, event_type = 'payment_success') as paying_users,
round(uniqIf(user_id, event_type = 'payment_success') / uniq(user_id) * 100, 2) as conversion_rate
FROM order_events_local
WHERE event_date >= today() - INTERVAL 7 DAY
GROUP BY source
ORDER BY conversion_rate DESC;

查询性能对比示例

// 模拟不同查询场景的性能对比
type QueryScenario struct {
Name string
DataSize string
MySQLTime time.Duration
ClickHouseTime time.Duration
}

func compareQueryPerformance() []QueryScenario {
return []QueryScenario{
{
Name: "单条记录查询",
DataSize: "1000万行",
MySQLTime: 1 * time.Millisecond, // 有索引,很快
ClickHouseTime: 50 * time.Millisecond, // 不适合点查询
},
{
Name: "少量数据关联查询",
DataSize: "1000万行",
MySQLTime: 100 * time.Millisecond, // 适中
ClickHouseTime: 200 * time.Millisecond, // 需要扫描更多数据
},
{
Name: "大规模聚合分析",
DataSize: "1亿行",
MySQLTime: 30 * time.Second, // 很慢
ClickHouseTime: 500 * time.Millisecond, // 非常快
},
{
Name: "复杂时间序列分析",
DataSize: "10亿行",
MySQLTime: 300 * time.Second, // 超慢或无法完成
ClickHouseTime: 2 * time.Second, // 快速完成
},
}
}

数据操作对比

MySQL:完整的CRUD操作

-- MySQL 支持完整的CRUD操作

-- CREATE - 插入数据
INSERT INTO users (username, email) VALUES ('john_doe', 'john@example.com');

-- READ - 查询数据
SELECT * FROM users WHERE username = 'john_doe';

-- UPDATE - 更新数据(MySQL的强项)
UPDATE users SET email = 'john.doe@example.com' WHERE username = 'john_doe';

-- DELETE - 删除数据(MySQL支持)
DELETE FROM users WHERE id = 123;

-- 事务支持
START TRANSACTION;
INSERT INTO orders (user_id, total_amount) VALUES (123, 199.99);
INSERT INTO order_items (order_id, product_id, quantity) VALUES (LAST_INSERT_ID(), 456, 2);
COMMIT;

ClickHouse:插入优化的操作

-- ClickHouse 主要优化插入和查询

-- INSERT - 批量插入(推荐方式)
INSERT INTO order_events_local VALUES
(now(), today(), 'order_created', 12345, 'John Doe', 'john@example.com', '2023-01-01', 1001, 199.99, 'pending', 456, 'iPhone 15', 'Electronics', 999.99, 1, 999.99, 999.99, 'web', 'US', 'mobile'),
(now(), today(), 'payment_success', 12346, 'Jane Smith', 'jane@example.com', '2023-02-01', 1002, 299.99, 'paid', 457, 'MacBook Pro', 'Electronics', 2999.99, 1, 2999.99, 2999.99, 'web', 'EU', 'desktop');

-- SELECT - 高效的分析查询(ClickHouse的强项)
SELECT product_category, sum(line_total) as revenue
FROM order_events_local
WHERE event_date >= today() - 30
GROUP BY product_category;

-- UPDATE - 有限支持(通过ALTER)
ALTER TABLE order_events_local
UPDATE user_email = 'new_email@example.com'
WHERE user_id = 12345;

-- DELETE - 有限支持(通过ALTER)
ALTER TABLE order_events_local
DELETE WHERE event_date < '2023-01-01';

-- 注意:ClickHouse的UPDATE/DELETE是异步的,不是实时生效

数据操作特性对比

// 数据操作能力对比
type DatabaseCapabilities struct {
MySQL map[string]PerformanceLevel
ClickHouse map[string]PerformanceLevel
}

type PerformanceLevel string

const (
Excellent PerformanceLevel = "优秀"
Good PerformanceLevel = "良好"
Fair PerformanceLevel = "一般"
Poor PerformanceLevel = "较差"
NotSupported PerformanceLevel = "不支持"
)

func getCapabilitiesComparison() DatabaseCapabilities {
return DatabaseCapabilities{
MySQL: map[string]PerformanceLevel{
"单条插入": Excellent,
"批量插入": Good,
"点查询": Excellent,
"范围查询": Good,
"复杂JOIN": Good,
"实时UPDATE": Excellent,
"实时DELETE": Excellent,
"大规模聚合": Poor,
"事务支持": Excellent,
},
ClickHouse: map[string]PerformanceLevel{
"单条插入": Poor,
"批量插入": Excellent,
"点查询": Fair,
"范围查询": Good,
"复杂JOIN": Fair,
"实时UPDATE": Poor,
"实时DELETE": Poor,
"大规模聚合": Excellent,
"事务支持": NotSupported,
},
}
}

索引机制对比

MySQL:丰富的索引类型

-- MySQL 支持多种索引类型
CREATE TABLE products (
id INT PRIMARY KEY AUTO_INCREMENT, -- 主键索引(聚簇索引)
name VARCHAR(200) NOT NULL,
category_id INT NOT NULL,
price DECIMAL(10,2) NOT NULL,
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

-- 普通索引
INDEX idx_category (category_id),

-- 复合索引
INDEX idx_category_price (category_id, price),

-- 唯一索引
UNIQUE INDEX idx_name (name),

-- 全文索引
FULLTEXT INDEX idx_description (description)
) ENGINE=InnoDB;

-- 查询优化器会自动选择最优索引
EXPLAIN SELECT * FROM products
WHERE category_id = 10 AND price BETWEEN 100 AND 500;

ClickHouse:稀疏索引

-- ClickHouse 使用稀疏主键索引
CREATE TABLE products_local (
id UInt32,
name String,
category_id UInt16,
price Float32,
description String,
created_date Date
) ENGINE = MergeTree()
ORDER BY (category_id, price, id) -- 这就是"索引"
SETTINGS index_granularity = 8192;

-- ClickHouse 的索引原理
-- 每8192行创建一个索引标记
-- 索引标记记录:(category_id的min/max, price的min/max, 数据位置)

索引原理对比

// 索引查询性能对比
type IndexComparison struct {
Scenario string
MySQL IndexPerformance
ClickHouse IndexPerformance
}

type IndexPerformance struct {
SeekTime time.Duration
ScanRows int
MemoryUsage string
}

func compareIndexPerformance() []IndexComparison {
return []IndexComparison{
{
Scenario: "精确查找单条记录",
MySQL: IndexPerformance{
SeekTime: 1 * time.Millisecond,
ScanRows: 1,
MemoryUsage: "低",
},
ClickHouse: IndexPerformance{
SeekTime: 10 * time.Millisecond,
ScanRows: 8192, // 需要扫描整个granule
MemoryUsage: "中",
},
},
{
Scenario: "范围查询1万条记录",
MySQL: IndexPerformance{
SeekTime: 50 * time.Millisecond,
ScanRows: 10000,
MemoryUsage: "中",
},
ClickHouse: IndexPerformance{
SeekTime: 20 * time.Millisecond,
ScanRows: 16384, // 约2个granule
MemoryUsage: "低",
},
},
{
Scenario: "聚合查询100万条记录",
MySQL: IndexPerformance{
SeekTime: 5 * time.Second,
ScanRows: 1000000,
MemoryUsage: "高",
},
ClickHouse: IndexPerformance{
SeekTime: 200 * time.Millisecond,
ScanRows: 1000000,
MemoryUsage: "中", // 列式存储,只读需要的列
},
},
}
}

集群架构对比

MySQL:主从复制 + 分库分表

ClickHouse:原生分布式架构

适用场景总结

MySQL 适用场景

// MySQL 最适合的场景
type MySQLUseCases struct {
OLTP bool // 在线事务处理
RealTimeUpdates bool // 实时更新需求
ComplexTransactions bool // 复杂事务
SmallToMediumData bool // 中小规模数据
PointQueries bool // 点查询
}

func recommendMySQL() MySQLUseCases {
return MySQLUseCases{
OLTP: true, // 电商订单系统、用户管理系统
RealTimeUpdates: true, // 库存更新、用户状态变更
ComplexTransactions: true, // 银行转账、订单处理
SmallToMediumData: true, // 千万级别以下的数据
PointQueries: true, // 根据ID查询用户信息
}
}

// 典型的MySQL应用场景
func mysqlExamples() []string {
return []string{
"电商网站的用户系统、订单系统",
"内容管理系统(CMS)",
"企业ERP、CRM系统",
"社交网络的用户关系管理",
"金融系统的账户管理",
}
}

ClickHouse 适用场景

// ClickHouse 最适合的场景
type ClickHouseUseCases struct {
OLAP bool // 在线分析处理
BigDataAnalytics bool // 大数据分析
RealTimeReporting bool // 实时报表
TimeSeriesData bool // 时间序列数据
LogAnalytics bool // 日志分析
}

func recommendClickHouse() ClickHouseUseCases {
return ClickHouseUseCases{
OLAP: true, // 销售分析、用户行为分析
BigDataAnalytics: true, // 亿级、十亿级数据分析
RealTimeReporting: true, // 实时大屏、实时报表
TimeSeriesData: true, // IoT数据、监控指标
LogAnalytics: true, // Web日志、应用日志分析
}
}

// 典型的ClickHouse应用场景
func clickhouseExamples() []string {
return []string{
"实时数据大屏和BI报表",
"广告效果分析和用户行为分析",
"IoT设备数据收集和分析",
"Web访问日志分析",
"金融风控和实时监控",
"游戏数据分析和运营分析",
}
}

技术选型建议

选择决策树

混合架构方案

对于复杂的业务系统,往往需要同时使用两种数据库:

// 混合架构设计
type HybridArchitecture struct {
MySQL MySQLRole
ClickHouse ClickHouseRole
DataFlow DataSyncStrategy
}

type MySQLRole struct {
Purpose string // "OLTP业务处理"
Tables []string // 用户表、订单表、商品表等
Operations []string // 增删改查、事务处理
}

type ClickHouseRole struct {
Purpose string // "OLAP数据分析"
Tables []string // 宽表、聚合表、物化视图
Operations []string // 大规模查询、实时报表
}

type DataSyncStrategy struct {
Method string // "CDC", "ETL", "MQ"
Frequency string // "实时", "分钟级", "小时级"
Tools []string // "Canal", "Kafka", "DataX"
}

func designHybridSystem() HybridArchitecture {
return HybridArchitecture{
MySQL: MySQLRole{
Purpose: "处理业务交易和用户操作",
Tables: []string{
"users", "orders", "products", "categories",
},
Operations: []string{
"用户注册登录", "下单支付", "库存管理", "状态更新",
},
},
ClickHouse: ClickHouseRole{
Purpose: "支持数据分析和实时报表",
Tables: []string{
"user_behavior_events", "order_analysis",
"product_sales_stats", "realtime_dashboard",
},
Operations: []string{
"用户行为分析", "销售趋势分析", "实时大屏", "运营报表",
},
},
DataFlow: DataSyncStrategy{
Method: "Canal + Kafka 实现CDC",
Frequency: "实时同步(秒级延迟)",
Tools: []string{"Canal", "Kafka", "ClickHouse Kafka Engine"},
},
}
}