字符串与 SDS 实现

学习目标

深入理解 C 字符串的局限性和 SDS 的设计目标
掌握 SDS 数据结构和内存管理策略
实现完整的 SDS 库，包括所有字符串操作
理解二进制安全和内存预分配机制
对比分析 SDS 与 Go string 的性能差异

C 字符串的局限性

1. 长度获取问题

C 字符串以 \0 结尾，获取长度需要 O(n) 时间复杂度：

// C 字符串长度计算
size_t strlen(const char *str) {
    size_t len = 0;
    while (str[len] != '\0') {
        len++;
    }
    return len;
}

问题：

每次获取长度都要遍历整个字符串
频繁的长度计算影响性能
无法快速进行字符串操作

2. 缓冲区溢出问题

C 字符串没有长度信息，容易发生缓冲区溢出：

// 危险的字符串操作
char dest[10];
char src[] = "This is a very long string";
strcpy(dest, src); // 缓冲区溢出！

问题：

没有边界检查
容易导致程序崩溃
安全漏洞风险

3. 二进制安全问题

C 字符串不能存储包含 \0 的二进制数据：

// 二进制数据被截断
char data[] = {0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x00, 0x57, 0x6f, 0x72, 0x6c, 0x64};
printf("%s", data); // 只输出 "Hello"

问题：

无法存储二进制数据
数据完整性无法保证
限制使用场景

️ SDS 设计目标

SDS（Simple Dynamic String）是 Redis 的字符串实现，解决了 C 字符串的所有问题：

1. 设计目标

O(1) 长度获取：预存长度信息
二进制安全：可以存储任意二进制数据
内存安全：避免缓冲区溢出
高效操作：减少内存重分配次数
兼容性：兼容 C 字符串函数

2. SDS 结构设计

// Redis SDS 结构（简化版）
struct sdshdr {
    int len;        // 字符串长度
    int free;       // 剩余空间
    char buf[];     // 字符数组
};

️ Go 语言 SDS 实现

1. 基础结构定义

// sds/sds.go
package sds

import (
    "fmt"
    "strconv"
    "strings"
    "unsafe"
)

// SDS 类型
type SDSType int

const (
    SDS_TYPE_8  SDSType = iota // 长度 < 256
    SDS_TYPE_16                // 长度 < 65536
    SDS_TYPE_32                // 长度 < 4294967296
    SDS_TYPE_64                // 长度 >= 4294967296
)

// SDS 结构体
type SDS struct {
    len  int      // 字符串长度
    free int      // 剩余空间
    buf  []byte   // 字符数组
    typ  SDSType  // 类型标识
}

// 创建 SDS
func NewSDS(initStr string) *SDS {
    data := []byte(initStr)
    length := len(data)
    
    // 根据长度选择类型
    var typ SDSType
    if length < 256 {
        typ = SDS_TYPE_8
    } else if length < 65536 {
        typ = SDS_TYPE_16
    } else if length < 4294967296 {
        typ = SDS_TYPE_32
    } else {
        typ = SDS_TYPE_64
    }
    
    return &SDS{
        len:  length,
        free: 0,
        buf:  data,
        typ:  typ,
    }
}

// 从字节数组创建
func NewSDSFromBytes(data []byte) *SDS {
    sds := NewSDS(string(data))
    return sds
}

// 创建空 SDS
func NewEmptySDS() *SDS {
    return &SDS{
        len:  0,
        free: 0,
        buf:  make([]byte, 0),
        typ:  SDS_TYPE_8,
    }
}

2. 基础操作实现

// sds/basic_operations.go
package sds

// 获取长度 O(1)
func (s *SDS) Len() int {
    return s.len
}

// 获取容量
func (s *SDS) Cap() int {
    return len(s.buf)
}

// 获取剩余空间
func (s *SDS) Free() int {
    return s.free
}

// 获取字符串内容
func (s *SDS) String() string {
    return string(s.buf[:s.len])
}

// 获取字节数组
func (s *SDS) Bytes() []byte {
    return s.buf[:s.len]
}

// 获取 C 字符串（以 \0 结尾）
func (s *SDS) CString() string {
    return string(s.buf[:s.len]) + "\x00"
}

// 检查是否为空
func (s *SDS) IsEmpty() bool {
    return s.len == 0
}

// 清空 SDS
func (s *SDS) Clear() {
    s.len = 0
    s.free = len(s.buf)
    s.buf = s.buf[:0]
}

3. 内存管理策略

// sds/memory_management.go
package sds

// 扩容策略
func (s *SDS) expand(newLen int) {
    currentCap := len(s.buf)
    
    // 计算新容量
    var newCap int
    if newLen < 1024*1024 { // 小于 1MB，翻倍
        newCap = newLen * 2
    } else { // 大于 1MB，每次增加 1MB
        newCap = newLen + 1024*1024
    }
    
    // 确保最小容量
    if newCap < 16 {
        newCap = 16
    }
    
    // 分配新空间
    newBuf := make([]byte, newCap)
    copy(newBuf, s.buf[:s.len])
    
    s.buf = newBuf
    s.free = newCap - s.len
    
    // 更新类型
    s.updateType()
}

// 更新 SDS 类型
func (s *SDS) updateType() {
    length := s.len
    if length < 256 {
        s.typ = SDS_TYPE_8
    } else if length < 65536 {
        s.typ = SDS_TYPE_16
    } else if length < 4294967296 {
        s.typ = SDS_TYPE_32
    } else {
        s.typ = SDS_TYPE_64
    }
}

// 惰性空间释放
func (s *SDS) Trim() {
    if s.free > s.len {
        newBuf := make([]byte, s.len)
        copy(newBuf, s.buf[:s.len])
        s.buf = newBuf
        s.free = 0
    }
}

// 预分配空间
func (s *SDS) Reserve(additional int) {
    if s.free < additional {
        s.expand(s.len + additional)
    }
}

4. 字符串操作实现

// sds/string_operations.go
package sds

// 追加字符串
func (s *SDS) Append(str string) {
    s.AppendBytes([]byte(str))
}

// 追加字节数组
func (s *SDS) AppendBytes(data []byte) {
    dataLen := len(data)
    totalLen := s.len + dataLen
    
    // 空间不足时扩容
    if s.free < dataLen {
        s.expand(totalLen)
    }
    
    // 追加数据
    copy(s.buf[s.len:], data)
    s.len = totalLen
    s.free = len(s.buf) - s.len
}

// 追加单个字符
func (s *SDS) AppendChar(c byte) {
    s.AppendBytes([]byte{c})
}

// 插入字符串
func (s *SDS) Insert(index int, str string) error {
    if index < 0 || index > s.len {
        return fmt.Errorf("index out of range")
    }
    
    data := []byte(str)
    dataLen := len(data)
    totalLen := s.len + dataLen
    
    // 空间不足时扩容
    if s.free < dataLen {
        s.expand(totalLen)
    }
    
    // 移动现有数据
    copy(s.buf[index+dataLen:], s.buf[index:s.len])
    
    // 插入新数据
    copy(s.buf[index:], data)
    s.len = totalLen
    s.free = len(s.buf) - s.len
    
    return nil
}

// 删除指定范围的字符
func (s *SDS) Delete(start, end int) error {
    if start < 0 || end > s.len || start > end {
        return fmt.Errorf("invalid range")
    }
    
    // 移动数据
    copy(s.buf[start:], s.buf[end:s.len])
    s.len -= (end - start)
    s.free = len(s.buf) - s.len
    
    return nil
}

// 截取子字符串
func (s *SDS) Substr(start, end int) (*SDS, error) {
    if start < 0 || end > s.len || start > end {
        return nil, fmt.Errorf("invalid range")
    }
    
    // 处理负索引
    if start < 0 {
        start = s.len + start
    }
    if end < 0 {
        end = s.len + end
    }
    
    if start < 0 {
        start = 0
    }
    if end > s.len {
        end = s.len
    }
    
    if start >= end {
        return NewEmptySDS(), nil
    }
    
    data := s.buf[start:end]
    return NewSDSFromBytes(data), nil
}

// 替换字符串
func (s *SDS) Replace(old, new string) int {
    if old == "" {
        return 0
    }
    
    count := 0
    pos := 0
    
    for {
        // 查找子字符串
        index := strings.Index(s.String()[pos:], old)
        if index == -1 {
            break
        }
        
        index += pos
        
        // 删除旧字符串
        s.Delete(index, index+len(old))
        
        // 插入新字符串
        s.Insert(index, new)
        
        count++
        pos = index + len(new)
    }
    
    return count
}

5. 比较和搜索操作

// sds/compare_search.go
package sds

import (
    "bytes"
    "strings"
)

// 比较字符串
func (s *SDS) Compare(other *SDS) int {
    return bytes.Compare(s.buf[:s.len], other.buf[:other.len])
}

// 比较字符串（忽略大小写）
func (s *SDS) CompareIgnoreCase(other *SDS) int {
    return strings.Compare(strings.ToLower(s.String()), strings.ToLower(other.String()))
}

// 检查是否相等
func (s *SDS) Equals(other *SDS) bool {
    return s.Compare(other) == 0
}

// 检查是否以指定字符串开头
func (s *SDS) StartsWith(prefix string) bool {
    if len(prefix) > s.len {
        return false
    }
    return string(s.buf[:len(prefix)]) == prefix
}

// 检查是否以指定字符串结尾
func (s *SDS) EndsWith(suffix string) bool {
    if len(suffix) > s.len {
        return false
    }
    return string(s.buf[s.len-len(suffix):]) == suffix
}

// 查找子字符串
func (s *SDS) Index(substr string) int {
    return strings.Index(s.String(), substr)
}

// 查找子字符串（从指定位置开始）
func (s *SDS) IndexFrom(start int, substr string) int {
    if start < 0 || start >= s.len {
        return -1
    }
    
    index := strings.Index(s.String()[start:], substr)
    if index == -1 {
        return -1
    }
    
    return start + index
}

// 查找最后一个子字符串
func (s *SDS) LastIndex(substr string) int {
    return strings.LastIndex(s.String(), substr)
}

// 查找字符
func (s *SDS) IndexByte(c byte) int {
    for i := 0; i < s.len; i++ {
        if s.buf[i] == c {
            return i
        }
    }
    return -1
}

// 查找最后一个字符
func (s *SDS) LastIndexByte(c byte) int {
    for i := s.len - 1; i >= 0; i-- {
        if s.buf[i] == c {
            return i
        }
    }
    return -1
}

6. 数值转换操作

// sds/numeric_operations.go
package sds

import (
    "strconv"
)

// 转换为整数
func (s *SDS) ToInt() (int, error) {
    return strconv.Atoi(s.String())
}

// 转换为 64 位整数
func (s *SDS) ToInt64() (int64, error) {
    return strconv.ParseInt(s.String(), 10, 64)
}

// 转换为浮点数
func (s *SDS) ToFloat64() (float64, error) {
    return strconv.ParseFloat(s.String(), 64)
}

// 转换为布尔值
func (s *SDS) ToBool() (bool, error) {
    return strconv.ParseBool(s.String())
}

// 从整数创建
func NewSDSFromInt(n int) *SDS {
    return NewSDS(strconv.Itoa(n))
}

// 从 64 位整数创建
func NewSDSFromInt64(n int64) *SDS {
    return NewSDS(strconv.FormatInt(n, 10))
}

// 从浮点数创建
func NewSDSFromFloat64(f float64) *SDS {
    return NewSDS(strconv.FormatFloat(f, 'f', -1, 64))
}

// 从布尔值创建
func NewSDSFromBool(b bool) *SDS {
    return NewSDS(strconv.FormatBool(b))
}

7. 格式化操作

// sds/format_operations.go
package sds

import (
    "fmt"
    "strings"
)

// 格式化字符串
func (s *SDS) Format(format string, args ...interface{}) {
    formatted := fmt.Sprintf(format, args...)
    s.Clear()
    s.Append(formatted)
}

// 左对齐
func (s *SDS) LJust(width int, fillchar byte) {
    if s.len >= width {
        return
    }
    
    padding := width - s.len
    for i := 0; i < padding; i++ {
        s.AppendChar(fillchar)
    }
}

// 右对齐
func (s *SDS) RJust(width int, fillchar byte) {
    if s.len >= width {
        return
    }
    
    padding := width - s.len
    for i := 0; i < padding; i++ {
        s.Insert(0, string(fillchar))
    }
}

// 居中对齐
func (s *SDS) Center(width int, fillchar byte) {
    if s.len >= width {
        return
    }
    
    padding := width - s.len
    leftPadding := padding / 2
    rightPadding := padding - leftPadding
    
    for i := 0; i < leftPadding; i++ {
        s.Insert(0, string(fillchar))
    }
    
    for i := 0; i < rightPadding; i++ {
        s.AppendChar(fillchar)
    }
}

// 去除首尾空白字符
func (s *SDS) Strip() {
    s.LStrip()
    s.RStrip()
}

// 去除左侧空白字符
func (s *SDS) LStrip() {
    start := 0
    for start < s.len && isSpace(s.buf[start]) {
        start++
    }
    
    if start > 0 {
        s.Delete(0, start)
    }
}

// 去除右侧空白字符
func (s *SDS) RStrip() {
    end := s.len
    for end > 0 && isSpace(s.buf[end-1]) {
        end--
    }
    
    if end < s.len {
        s.Delete(end, s.len)
    }
}

// 检查是否为空白字符
func isSpace(c byte) bool {
    return c == ' ' || c == '\t' || c == '\n' || c == '\r'
}

// 分割字符串
func (s *SDS) Split(sep string) []*SDS {
    parts := strings.Split(s.String(), sep)
    result := make([]*SDS, len(parts))
    
    for i, part := range parts {
        result[i] = NewSDS(part)
    }
    
    return result
}

// 连接字符串
func JoinSDS(parts []*SDS, sep string) *SDS {
    if len(parts) == 0 {
        return NewEmptySDS()
    }
    
    result := NewSDS(parts[0].String())
    for i := 1; i < len(parts); i++ {
        result.Append(sep)
        result.Append(parts[i].String())
    }
    
    return result
}

测试验证

1. 单元测试

// sds/sds_test.go
package sds

import (
    "testing"
)

func TestSDSBasic(t *testing.T) {
    sds := NewSDS("Hello")
    
    // 测试长度
    if sds.Len() != 5 {
        t.Errorf("Expected length 5, got %d", sds.Len())
    }
    
    // 测试字符串内容
    if sds.String() != "Hello" {
        t.Errorf("Expected 'Hello', got '%s'", sds.String())
    }
    
    // 测试是否为空
    if sds.IsEmpty() {
        t.Error("SDS should not be empty")
    }
}

func TestSDSAppend(t *testing.T) {
    sds := NewSDS("Hello")
    sds.Append(" World")
    
    expected := "Hello World"
    if sds.String() != expected {
        t.Errorf("Expected '%s', got '%s'", expected, sds.String())
    }
}

func TestSDSInsert(t *testing.T) {
    sds := NewSDS("Hello")
    err := sds.Insert(5, " World")
    if err != nil {
        t.Fatalf("Insert failed: %v", err)
    }
    
    expected := "Hello World"
    if sds.String() != expected {
        t.Errorf("Expected '%s', got '%s'", expected, sds.String())
    }
}

func TestSDSSubstr(t *testing.T) {
    sds := NewSDS("Hello World")
    
    // 测试正常范围
    substr, err := sds.Substr(0, 5)
    if err != nil {
        t.Fatalf("Substr failed: %v", err)
    }
    
    if substr.String() != "Hello" {
        t.Errorf("Expected 'Hello', got '%s'", substr.String())
    }
    
    // 测试负索引
    substr, err = sds.Substr(-5, -1)
    if err != nil {
        t.Fatalf("Substr with negative index failed: %v", err)
    }
    
    if substr.String() != "Worl" {
        t.Errorf("Expected 'Worl', got '%s'", substr.String())
    }
}

func TestSDSCompare(t *testing.T) {
    sds1 := NewSDS("Hello")
    sds2 := NewSDS("World")
    sds3 := NewSDS("Hello")
    
    // 测试比较
    if sds1.Compare(sds2) >= 0 {
        t.Error("Hello should be less than World")
    }
    
    if sds1.Compare(sds3) != 0 {
        t.Error("Hello should equal Hello")
    }
    
    // 测试相等
    if !sds1.Equals(sds3) {
        t.Error("Hello should equal Hello")
    }
}

func TestSDSStartsWith(t *testing.T) {
    sds := NewSDS("Hello World")
    
    if !sds.StartsWith("Hello") {
        t.Error("Should start with 'Hello'")
    }
    
    if sds.StartsWith("World") {
        t.Error("Should not start with 'World'")
    }
}

func TestSDSReplace(t *testing.T) {
    sds := NewSDS("Hello Hello World")
    count := sds.Replace("Hello", "Hi")
    
    if count != 2 {
        t.Errorf("Expected 2 replacements, got %d", count)
    }
    
    if sds.String() != "Hi Hi World" {
        t.Errorf("Expected 'Hi Hi World', got '%s'", sds.String())
    }
}

func TestSDSNumeric(t *testing.T) {
    // 测试整数转换
    sds := NewSDS("123")
    n, err := sds.ToInt()
    if err != nil {
        t.Fatalf("ToInt failed: %v", err)
    }
    
    if n != 123 {
        t.Errorf("Expected 123, got %d", n)
    }
    
    // 测试从整数创建
    sds2 := NewSDSFromInt(456)
    if sds2.String() != "456" {
        t.Errorf("Expected '456', got '%s'", sds2.String())
    }
}

func TestSDSStrip(t *testing.T) {
    sds := NewSDS("  Hello World  ")
    sds.Strip()
    
    if sds.String() != "Hello World" {
        t.Errorf("Expected 'Hello World', got '%s'", sds.String())
    }
}

func TestSDSSplit(t *testing.T) {
    sds := NewSDS("a,b,c")
    parts := sds.Split(",")
    
    if len(parts) != 3 {
        t.Errorf("Expected 3 parts, got %d", len(parts))
    }
    
    expected := []string{"a", "b", "c"}
    for i, part := range parts {
        if part.String() != expected[i] {
            t.Errorf("Expected '%s', got '%s'", expected[i], part.String())
        }
    }
}

2. 性能基准测试

// sds/benchmark_test.go
package sds

import (
    "strings"
    "testing"
)

func BenchmarkSDSAppend(b *testing.B) {
    sds := NewSDS("")
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        sds.Append("test")
    }
}

func BenchmarkGoStringAppend(b *testing.B) {
    var str string
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        str += "test"
    }
}

func BenchmarkSDSConcat(b *testing.B) {
    sds1 := NewSDS("Hello")
    sds2 := NewSDS(" World")
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        sds1.Append(sds2.String())
    }
}

func BenchmarkGoStringConcat(b *testing.B) {
    str1 := "Hello"
    str2 := " World"
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        str1 += str2
    }
}

func BenchmarkSDSSubstr(b *testing.B) {
    sds := NewSDS("Hello World")
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        sds.Substr(0, 5)
    }
}

func BenchmarkGoStringSubstr(b *testing.B) {
    str := "Hello World"
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        str[0:5]
    }
}

func BenchmarkSDSCompare(b *testing.B) {
    sds1 := NewSDS("Hello")
    sds2 := NewSDS("World")
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        sds1.Compare(sds2)
    }
}

func BenchmarkGoStringCompare(b *testing.B) {
    str1 := "Hello"
    str2 := "World"
    
    b.ResetTimer()
    for i := 0; i < b.N; i++ {
        strings.Compare(str1, str2)
    }
}

3. 内存使用测试

// sds/memory_test.go
package sds

import (
    "runtime"
    "testing"
)

func TestSDSMemoryUsage(t *testing.T) {
    var m1, m2 runtime.MemStats
    runtime.ReadMemStats(&m1)
    
    // 创建大量 SDS
    sdsList := make([]*SDS, 10000)
    for i := 0; i < 10000; i++ {
        sdsList[i] = NewSDS("test string")
    }
    
    runtime.ReadMemStats(&m2)
    
    heapGrowth := m2.HeapAlloc - m1.HeapAlloc
    t.Logf("Memory usage: %d bytes", heapGrowth)
    
    if heapGrowth > 10*1024*1024 { // 10MB
        t.Errorf("Memory usage too high: %d bytes", heapGrowth)
    }
}

func TestSDSMemoryReuse(t *testing.T) {
    sds := NewSDS("")
    
    // 测试内存重用
    for i := 0; i < 1000; i++ {
        sds.Append("test")
        sds.Clear()
    }
    
    // 检查内存使用
    if sds.Cap() > 1000 {
        t.Errorf("Capacity too high: %d", sds.Cap())
    }
}

性能对比分析

1. 内存使用对比

操作	SDS	Go String	优势
长度获取	O(1)	O(1)	相当
字符串拼接	O(1)	O(n)	SDS 优势
子字符串	O(1)	O(1)	相当
内存预分配	支持	不支持	SDS 优势

2. 性能测试结果

// 基准测试结果示例
func BenchmarkComparison(b *testing.B) {
    // SDS 性能测试
    b.Run("SDS", func(b *testing.B) {
        sds := NewSDS("")
        for i := 0; i < b.N; i++ {
            sds.Append("test")
        }
    })
    
    // Go String 性能测试
    b.Run("GoString", func(b *testing.B) {
        var str string
        for i := 0; i < b.N; i++ {
            str += "test"
        }
    })
}