编译原理之词法分析-语法分析-中间代码生成
- 文章说明
- 源码
- 效果展示
- Gitee链接
 
文章说明
学习编译原理后,总是想制作自己的一款小语言编译器,虽然对技术不是很理解,学的不是很扎实,但还是想着尝试尝试;目前该效果只是初步设计实现下的效果,没有采用较为规范的EBNF(巴科斯范式)来进行文法的描述,因为我总觉得那样的效果对我来说有些抽象,有些困难。所以我自己简单的采用解释器模式来模拟编译的关键步骤:词法分析、语法分析、中间代码生成
源码
参见链接,部分核心代码如下:
主程序
package com.boot.compiler;
import com.boot.compiler.entity.AbstractSyntaxTree;
import com.boot.compiler.entity.Block;
import com.boot.compiler.entity.Function;
import com.boot.compiler.entity.Operation;
import com.boot.compiler.util.ir.IrAnalyzer;
import com.boot.compiler.util.lexical.LexicalAnalyzer;
import com.boot.compiler.util.semantic.SemanticAnalyzer;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.nio.charset.StandardCharsets;
import java.util.List;
/**
 * @author bbyh
 * @date 2024/3/9 22:30
 */
public class TestMain {
    private static final String PROGRAM_PATH = "D:/compiler/program.txt";
    private static final String SPLIT_WORD_PATH = "D:/compiler/split_word.txt";
    private static final String ABSTRACT_SYNTAX_TREE_PATH = "D:/compiler/abstract_syntax_tree.txt";
    private static final String IR_CODE_PATH = "D:/compiler/ir_code.txt";
    public static void main(String[] args) throws Exception {
        byte[] buf = new byte[1024 * 1024];
        String text;
        try (FileInputStream inputStream = new FileInputStream(PROGRAM_PATH)) {
            int read = inputStream.read(buf);
            text = new String(buf, 0, read);
        }
        List<Operation> operationList = LexicalAnalyzer.analyse(text);
        try (FileOutputStream outputStream = new FileOutputStream(SPLIT_WORD_PATH)) {
            for (Operation operation : operationList) {
                outputStream.write(operation.type.toString().getBytes(StandardCharsets.UTF_8));
                outputStream.write("\t".getBytes(StandardCharsets.UTF_8));
                outputStream.write(operation.value.getBytes(StandardCharsets.UTF_8));
                outputStream.write("\n".getBytes(StandardCharsets.UTF_8));
            }
        }
        AbstractSyntaxTree abstractSyntaxTree = SemanticAnalyzer.analyse(operationList);
        try (FileOutputStream outputStream = new FileOutputStream(ABSTRACT_SYNTAX_TREE_PATH)) {
            List<Function> functionList = abstractSyntaxTree.functionList;
            for (Function function : functionList) {
                outputStream.write((function.name + "\n").getBytes(StandardCharsets.UTF_8));
                List<Block> blockList = function.blockList;
                for (Block block : blockList) {
                    outputStream.write(("\t" + block.blockType + "\n").getBytes(StandardCharsets.UTF_8));
                    List<Operation> blockOperationList = block.operationList;
                    for (Operation blockOperation : blockOperationList) {
                        outputStream.write(("\t\t" + blockOperation.type + "\t" + blockOperation.value + "\n").getBytes(StandardCharsets.UTF_8));
                    }
                }
            }
        }
        IrAnalyzer.analyse(abstractSyntaxTree);
        try (FileOutputStream outputStream = new FileOutputStream(IR_CODE_PATH)) {
            List<Function> functionList = abstractSyntaxTree.functionList;
            for (Function function : functionList) {
                outputStream.write((function.name + "\n").getBytes(StandardCharsets.UTF_8));
                List<String> irList = function.irList;
                for (String irCode : irList) {
                    outputStream.write(("\t" + irCode + "\n").getBytes(StandardCharsets.UTF_8));
                }
            }
        }
    }
}
定义的关键字
package com.boot.compiler.entity;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class KeyWord {
    public OperationType type;
    public String name;
    public KeyWord(OperationType type, String name) {
        this.type = type;
        this.name = name;
    }
    public static final Map<String, OperationType> KEY_WORD_MAP = new HashMap<>(10);
    public static final Set<String> KEY_WORD_SET = new HashSet<>(10);
    static {
        KEY_WORD_SET.add("function");
        KEY_WORD_SET.add("int");
        KEY_WORD_MAP.put("function", OperationType.FUNCTION);
        KEY_WORD_MAP.put("int", OperationType.INT);
    }
}
定义的运算符
package com.boot.compiler.entity;
import java.util.*;
/**
 * @author bbyh
 * @date 2024/3/10 11:44
 */
public class Calculator {
    public OperationType type;
    public String name;
    public Calculator(OperationType type, String name) {
        this.type = type;
        this.name = name;
    }
    @Override
    public String toString() {
        return "Calculator{" +
                "type=" + type +
                ", name='" + name + '\'' +
                '}';
    }
    public static final Map<String, OperationType> CALCULATOR_MAP = new HashMap<>(10);
    public static final Set<String> CALCULATOR_SET = new HashSet<>(10);
    static {
        CALCULATOR_SET.add("(");
        CALCULATOR_SET.add(")");
        CALCULATOR_SET.add("{");
        CALCULATOR_SET.add("}");
        CALCULATOR_SET.add(";");
        CALCULATOR_SET.add("=");
        CALCULATOR_SET.add("+");
        CALCULATOR_MAP.put("(", OperationType.LEFT_LITTLE);
        CALCULATOR_MAP.put(")", OperationType.RIGHT_LITTLE);
        CALCULATOR_MAP.put("{", OperationType.LEFT_LARGE);
        CALCULATOR_MAP.put("}", OperationType.RIGHT_LARGE);
        CALCULATOR_MAP.put(";", OperationType.SEMICOLON);
        CALCULATOR_MAP.put("=", OperationType.ASSIGN);
        CALCULATOR_MAP.put("+", OperationType.ADD);
    }
}
词法分析实现
package com.boot.compiler.util.lexical;
import com.boot.compiler.entity.Operation;
import com.boot.compiler.entity.OperationType;
import com.boot.compiler.util.Character;
import java.util.ArrayList;
import java.util.List;
import static com.boot.compiler.entity.Calculator.CALCULATOR_MAP;
import static com.boot.compiler.entity.Calculator.CALCULATOR_SET;
import static com.boot.compiler.entity.KeyWord.KEY_WORD_MAP;
import static com.boot.compiler.entity.KeyWord.KEY_WORD_SET;
/**
 * @author bbyh
 * @date 2024/3/9 22:38
 */
public class LexicalAnalyzer {
    private static final String LINE_SPLIT = "\n";
    private static final String WORD_SPLIT = " ";
    private static final String TAB_SPLIT = "\t";
    private static String[] split(String text) {
        text = text.replaceAll(TAB_SPLIT, "    ");
        StringBuilder buffer = new StringBuilder();
        String[] lines = text.split(LINE_SPLIT);
        for (String line : lines) {
            buffer.append(line.trim());
        }
        return buffer.toString().split(WORD_SPLIT);
    }
    public static List<Operation> analyse(String text) {
        String[] split = split(text);
        List<Operation> wordList = new ArrayList<>(split.length);
        for (String word : split) {
            if (KEY_WORD_SET.contains(word)) {
                wordList.add(new Operation(KEY_WORD_MAP.get(word), word));
                continue;
            }
            if (CALCULATOR_SET.contains(word)) {
                wordList.add(new Operation(CALCULATOR_MAP.get(word), word));
                continue;
            }
            int current = 0;
            int start = 0;
            String subString;
            while (current < word.length()) {
                char ch = word.charAt(start);
                // 处理为运算符的情况
                if (CALCULATOR_SET.contains(ch + "")) {
                    wordList.add(new Operation(CALCULATOR_MAP.get(ch + ""), ch + ""));
                    current++;
                    start++;
                    continue;
                }
                ch = word.charAt(start);
                // 处理为"小大写字母"的情况
                if (Character.isLetter(ch)) {
                    while (Character.isLetterOrNumber(ch)) {
                        current++;
                        if (current == word.length()) {
                            break;
                        }
                        ch = word.charAt(current);
                    }
                    subString = word.substring(start, current);
                    if (KEY_WORD_SET.contains(subString)) {
                        wordList.add(new Operation(KEY_WORD_MAP.get(subString), subString));
                    } else {
                        wordList.add(new Operation(OperationType.VAR, subString));
                    }
                    start = current;
                    continue;
                }
                ch = word.charAt(start);
                // 处理为"整数"的情况
                if (Character.isNumber(ch)) {
                    while (Character.isNumber(ch)) {
                        current++;
                        if (current == word.length()) {
                            break;
                        }
                        ch = word.charAt(current);
                    }
                    subString = word.substring(start, current);
                    wordList.add(new Operation(OperationType.INT_NUMBER, subString));
                    start = current;
                }
            }
        }
        return wordList;
    }
}
语法分析实现
package com.boot.compiler.util.semantic;
import com.boot.compiler.entity.AbstractSyntaxTree;
import com.boot.compiler.entity.Operation;
import com.boot.compiler.util.semantic.executor.AbstractSemanticAnalyzerExecutor;
import java.util.List;
/**
 * @author bbyh
 * @date 2024/3/10 13:32
 */
public class SemanticAnalyzer {
    public static AbstractSyntaxTree analyse(List<Operation> operationList) {
        AbstractSemanticAnalyzerExecutor executor = new AbstractSemanticAnalyzerExecutor(operationList);
        executor.execute();
        return AbstractSemanticAnalyzerExecutor.abstractSyntaxTree;
    }
}
package com.boot.compiler.util.semantic.executor;
import com.boot.compiler.entity.AbstractSyntaxTree;
import com.boot.compiler.entity.Block;
import com.boot.compiler.entity.Function;
import com.boot.compiler.entity.Operation;
import java.util.List;
/**
 * @author bbyh
 */
public class AbstractSemanticAnalyzerExecutor {
    protected static List<Operation> operationList;
    protected static int index;
    public static AbstractSyntaxTree abstractSyntaxTree;
    public AbstractSemanticAnalyzerExecutor() {
    }
    public AbstractSemanticAnalyzerExecutor(List<Operation> operationList) {
        AbstractSemanticAnalyzerExecutor.abstractSyntaxTree = new AbstractSyntaxTree();
        AbstractSemanticAnalyzerExecutor.operationList = operationList;
        AbstractSemanticAnalyzerExecutor.index = 0;
    }
    public void execute() {
        nextExecutor().execute();
    }
    protected final AbstractSemanticAnalyzerExecutor nextExecutor() {
        Operation operation = operationList.get(index);
        switch (operation.type) {
            case FUNCTION:
                return new FunctionExecutor();
            case INT:
                return new IntExecutor();
            case LEFT_LITTLE:
                return new LeftLittleExecutor();
            case RIGHT_LITTLE:
                return new RightLittleExecutor();
            case LEFT_LARGE:
                return new LeftLargeExecutor();
            case RIGHT_LARGE:
                return new RightLargeExecutor();
            case SEMICOLON:
                return new SemicolonExecutor();
            case ASSIGN:
                return new AssignExecutor();
            case ADD:
                return new AddExecutor();
            case INT_NUMBER:
                return new IntNumberExecutor();
            case VAR:
                return new VarExecutor();
            default:
                throw new UnsupportedOperationException("语义解析出错,执行器获取失败");
        }
    }
    protected final void addOperation(Operation operation) {
        Function function = abstractSyntaxTree.functionList.get(abstractSyntaxTree.functionList.size() - 1);
        if (function.blockList == null) {
            throw new UnsupportedOperationException("语义解析出错,语句块声明缺失");
        }
        Block block = function.blockList.get(function.blockList.size() - 1);
        if (block.operationList == null) {
            throw new UnsupportedOperationException("语义解析出错,语句块声明缺失");
        }
        block.operationList.add(new Operation(operation.type, operation.value));
    }
}
IR生成
package com.boot.compiler.util.ir.executor;
import com.boot.compiler.entity.AbstractSyntaxTree;
import com.boot.compiler.entity.Block;
/**
 * @author bbyh
 * @date 2024/3/10 16:30
 */
public class AbstractIrAnalyzerExecutor {
    public static AbstractSyntaxTree abstractSyntaxTree;
    public static int indexOfFunction;
    public static int indexOfBlock;
    public AbstractIrAnalyzerExecutor() {
    }
    public AbstractIrAnalyzerExecutor(AbstractSyntaxTree abstractSyntaxTree) {
        AbstractIrAnalyzerExecutor.abstractSyntaxTree = abstractSyntaxTree;
        AbstractIrAnalyzerExecutor.indexOfFunction = 0;
        AbstractIrAnalyzerExecutor.indexOfBlock = 0;
    }
    public void execute() {
        new FunctionExecutor().execute();
    }
    public final AbstractIrAnalyzerExecutor nextBlockExecutor() {
        Block block = abstractSyntaxTree.functionList.get(indexOfFunction).blockList.get(indexOfBlock);
        switch (block.blockType){
            case SEQUENCE:
                return new SequenceBlockExecutor();
            case CONDITION:
                return new ConditionBlockExecutor();
            case LOOP:
                return new LoopBlockExecutor();
            default:
                throw new UnsupportedOperationException("语法树解析出错,执行器获取失败");
        }
    }
}
效果展示
源程序
词法分析结果
抽象语法树结果
生成的IR(中间代码),以常见的汇编格式展示,并不是规范的ARM或x86格式
采用立即数表示的ADD操作,是因为我对其中的生成IR的实现还存有一些困难点没解决,后续会考虑修正该效果
Gitee链接
参见Gitee链接(WEB-OS-SYSTEM)























