Mac 平台字体Unicode范围分析器

news2025/5/9 19:00:19

字体Unicode范围分析器

#include <CoreText/CoreText.h>       // CoreText框架头文件，用于字体处理
#include <CoreFoundation/CoreFoundation.h> // CoreFoundation框架头文件
#include <stdio.h>                   // 标准输入输出
#include <stdlib.h>                  // 标准库函数
#include <string.h>                  // 字符串处理
#include <vector>                    // C++向量容器
#include <map>                       // C++映射容器
#include <string>                    // C++字符串

// 定义Unicode范围结构体，表示一个连续的Unicode字符范围
typedef struct {
    uint32_t start;  // 范围起始码点
    uint32_t end;    // 范围结束码点
} UnicodeRange;

// 全局映射表，存储字体名称和对应的Unicode范围集合
// key: 字体名称(string)
// value: 该字体支持的Unicode范围集合(vector<UnicodeRange>)
std::map<std::string, std::vector<UnicodeRange>> fontUnicodeRanges;

// 函数：获取字体支持的Unicode范围
// 参数：字体引用(CTFontRef)
// 返回值：该字体支持的Unicode范围集合(vector<UnicodeRange>)
std::vector<UnicodeRange> getSupportedUnicodeRanges(CTFontRef font) {
    std::vector<UnicodeRange> tempRanges; // 临时存储范围集合
    
    if (!font) {  // 检查字体引用是否有效
        return tempRanges; // 无效则返回空集合
    }

    // 1. 获取字体支持的字符集
    CFCharacterSetRef charset = CTFontCopyCharacterSet(font); // 复制字体字符集
    // 创建字符集的位图表示
    CFDataRef bitmapData = CFCharacterSetCreateBitmapRepresentation(kCFAllocatorDefault, charset);
    const UInt8 *bitmap = CFDataGetBytePtr(bitmapData); // 获取位图数据指针
    CFIndex length = CFDataGetLength(bitmapData);      // 获取位图数据长度

    bool inRange = false;   // 标记是否处于连续范围内
    uint32_t start = 0;     // 当前范围的起始码点
    uint32_t maxChar = 0;   // 当前范围的结束码点

    // 2. 遍历位图，检测并合并连续范围
    for (uint32_t byteIndex = 0; byteIndex < length; byteIndex++) {
        UInt8 byte = bitmap[byteIndex]; // 获取当前字节
        
        if (byte == 0) {  // 如果字节为0，表示没有支持的字符
            if (inRange) { // 如果之前处于范围内，则结束当前范围
                tempRanges.push_back({start, (byteIndex << 3) - 1});
                inRange = false;
            }
            continue; // 跳过后续处理
        }

        // 检查字节中的每一位(共8位)
        for (uint32_t bit = 0; bit < 8; bit++) {
            uint32_t currentChar = (byteIndex << 3) + bit; // 计算当前字符码点
            bool isSupported = (byte & (1 << bit)) != 0;   // 检查当前位是否被支持

            if (isSupported) {  // 如果字符被支持
                if (!inRange) { // 如果不在范围内，则开始新范围
                    start = currentChar;
                    inRange = true;
                }
                maxChar = currentChar; // 更新范围结束码点
            } else if (inRange) { // 如果字符不被支持但之前处于范围内
                tempRanges.push_back({start, currentChar - 1}); // 结束当前范围
                inRange = false;
            }
        }
    }

    // 处理最后一个范围（如果遍历结束时仍处于范围内）
    if (inRange) {
        tempRanges.push_back({start, maxChar});
    }

    // 3. 压缩范围（合并相邻或重叠的范围）
    if (!tempRanges.empty()) {
        size_t compressedCount = 0; // 压缩后的范围计数
        
        // 遍历所有范围
        for (size_t i = 1; i < tempRanges.size(); i++) {
            // 如果当前范围与前一个范围相邻或重叠
            if (tempRanges[i].start <= tempRanges[compressedCount].end + 1) {
                // 合并范围（取最大的结束码点）
                if (tempRanges[i].end > tempRanges[compressedCount].end) {
                    tempRanges[compressedCount].end = tempRanges[i].end;
                }
            } else {
                // 不重叠则保留当前范围
                compressedCount++;
                tempRanges[compressedCount] = tempRanges[i];
            }
        }
        // 调整向量大小为压缩后的数量
        tempRanges.resize(compressedCount + 1);
    }

    // 释放资源
    CFRelease(bitmapData);
    CFRelease(charset);
    
    return tempRanges; // 返回最终的范围集合
}

// 函数：获取字体名称
// 参数：字体引用(CTFontRef)
// 返回值：字体名称(string)
std::string getFontName(CTFontRef font) {
    if (!font) return ""; // 检查字体引用
    
    // 获取字体的PostScript名称
    CFStringRef fontName = CTFontCopyPostScriptName(font);
    if (!fontName) return ""; // 检查名称是否有效
    
    // 尝试直接获取C字符串
    const char* name = CFStringGetCStringPtr(fontName, kCFStringEncodingUTF8);
    std::string result;
    
    if (name) { // 如果直接获取成功
        result = name;
    } else {
        // 需要手动转换字符串
        CFIndex length = CFStringGetLength(fontName);
        CFIndex maxSize = CFStringGetMaximumSizeForEncoding(length, kCFStringEncodingUTF8) + 1;
        char* buffer = new char[maxSize]; // 分配缓冲区
        
        // 将CFString转换为C字符串
        if (CFStringGetCString(fontName, buffer, maxSize, kCFStringEncodingUTF8)) {
            result = buffer;
        }
        delete[] buffer; // 释放缓冲区
    }
    
    CFRelease(fontName); // 释放CFString
    return result;      // 返回转换后的字符串
}

// 函数：添加字体到全局映射表
// 参数：字体引用(CTFontRef)
void addFontToMap(CTFontRef font) {
    std::string fontName = getFontName(font); // 获取字体名称
    if (!fontName.empty()) { // 如果名称有效
        // 将字体名称和对应的Unicode范围存入映射表
        fontUnicodeRanges[fontName] = getSupportedUnicodeRanges(font);
    }
}

// 函数：打印所有字体的Unicode范围
void printAllFontRanges() {
    // 遍历映射表中的所有字体
    for (const auto& pair : fontUnicodeRanges) {
        printf("字体: %s\n", pair.first.c_str()); // 打印字体名称
        
        // 打印该字体支持的所有Unicode范围
        for (const auto& range : pair.second) {
            printf("  支持范围: U+%04X - U+%04X\n", range.start, range.end);
        }
        printf("\n"); // 打印空行分隔不同字体
    }
}

// 主函数
int main() {
    // 示例：创建并分析三种字体
    
    // 1. 创建苹方字体
    CTFontRef font1 = CTFontCreateWithName(CFSTR("PingFangSC-Regular"), 16.0, nullptr);
    if (font1) {
        addFontToMap(font1); // 添加到映射表
        CFRelease(font1);    // 释放字体
    }
    
    // 2. 创建Helvetica字体
    CTFontRef font2 = CTFontCreateWithName(CFSTR("Helvetica"), 16.0, nullptr);
    if (font2) {
        addFontToMap(font2);
        CFRelease(font2);
    }
    
    // 3. 创建Times New Roman字体
    CTFontRef font3 = CTFontCreateWithName(CFSTR("TimesNewRomanPSMT"), 16.0, nullptr);
    if (font3) {
        addFontToMap(font3);
        CFRelease(font3);
    }
    
    // 打印所有已分析字体的Unicode范围
    printAllFontRanges();
    
    return 0;
}