mini-dog-c编译器开发 - 05 解释器与代码生成-洪萨配资

本篇为 mini-dog-c 编译器开发系列第五篇，介绍解释器的实现，包括环境管理、表达式求值和函数调用。

1. 解释器 vs 编译器

很多人容易混淆"解释器"和"编译器"，其实区别很简单：

编译器：把源代码翻译成另一种形式（通常是机器码或字节码），然后由硬件或虚拟机执行
解释器：直接遍历源代码的中间表示（这里是 AST），一边分析一边执行，不生成额外的目标代码

mini-dog-c 的"编译"流程实际上是：源代码 → Token → AST → 解释执行，没有生成机器码或字节码这一步。

2. 值（Value）系统

解释器需要一种运行时表示数据的方式。mini-dog-c 定义了一个Value类型：

typedef enum { VALUE_INT, // 整数 VALUE_DOUBLE, // 浮点数 VALUE_CHAR, // 字符 VALUE_BOOL, // 布尔 VALUE_STRING, // 字符串 VALUE_FUNCTION, // 函数 VALUE_NULL, // 空值 } ValueType;  typedef struct { ValueType type; union { int int_value; double double_value; char char_value; bool bool_value; struct { char *string; int length; } string_value; struct { char *name; char **params; int param_count; ASTNode *body; void *closure; } function; } data; } Value;

每种字面量类型都对应一个value_create_*函数：

Value *value_create_int(int v) { Value *value = (Value *)malloc(sizeof(Value)); value->type = VALUE_INT; value->data.int_value = v; return value; }  Value *value_create_string(const char *str) { Value *value = (Value *)malloc(sizeof(Value)); value->type = VALUE_STRING; value->data.string_value.string = strdup_custom(str); value->data.string_value.length = strlen(str); return value; }

3. 环境（Environment）

变量需要存储在哪里？答案是环境。环境是一个作用域概念，保存了变量名到值的映射：

struct Env { Env *parent; // 父作用域（用于闭包） char *names[ENV_VAR_MAX]; // 变量名 Value *values[ENV_VAR_MAX]; // 对应的值 int count; // 当前作用域变量数量 }; typedef struct Env Env;

全局环境：解释器初始化时创建一个全局环境，用于存储全局变量和函数定义。

局部环境：函数调用时创建一个新的局部环境，父环境指向调用者的环境：

Env *env_create(Env *parent) { Env *env = (Env *)malloc(sizeof(Env)); env->parent = parent; env->count = 0; return env; }  bool env_define(Env *env, const char *name, Value *value) { env->names[env->count] = strdup_custom(name); env->values[env->count] = value; env->count++; return true; }  Value *env_get(Env *env, const char *name) { for (int i = 0; i < env->count; i++) if (strcmp(env->names[i], name) == 0) return env->values[i]; if (env->parent) return env_get(env->parent, name); // 向上查找 return NULL; }  bool env_set(Env *env, const char *name, Value *value) { for (int i = 0; i < env->count; i++) if (strcmp(env->names[i], name) == 0) { env->values[i] = value; return true; } return env_define(env, name, value); // 未找到则定义到当前作用域 }

4. 二元表达式求值

二元表达式的求值模式很固定：先求左操作数，再求右操作数，然后根据操作符类型计算结果：

static Value *eval_binary(Evaluator *eval, Env *env, TokenType op, ASTNode *left_node, ASTNode *right_node) { Value *left = eval_expression(eval, env, left_node); Value *right = eval_expression(eval, env, right_node);  Value *result = NULL;  switch (op) { case TOKEN_PLUS: if (left->type == VALUE_STRING && right->type == VALUE_STRING) { // 字符串连接 size_t len = strlen(left->data.string_value.string) + strlen(right->data.string_value.string) + 1; char *buf = (char *)malloc(len); strcpy(buf, left->data.string_value.string); strcat(buf, right->data.string_value.string); result = value_create_string(buf); free(buf); } else if (left->type == VALUE_INT && right->type == VALUE_INT) { result = value_create_int(left->data.int_value + right->data.int_value); } else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_double(lv + rv); } break;  case TOKEN_MINUS: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_int(left->data.int_value - right->data.int_value); else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_double(lv - rv); } break;  case TOKEN_STAR: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_int(left->data.int_value * right->data.int_value); else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_double(lv * rv); } break;  case TOKEN_SLASH: if (right->type == VALUE_INT && right->data.int_value == 0) result = value_create_null(); // 除零返回 null else if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_int(left->data.int_value / right->data.int_value); else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_double(lv / rv); } break;  case TOKEN_EQ: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_bool(left->data.int_value == right->data.int_value); else if (left->type == VALUE_BOOL && right->type == VALUE_BOOL) result = value_create_bool(left->data.bool_value == right->data.bool_value); else result = value_create_bool(false); break;  case TOKEN_NE: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_bool(left->data.int_value != right->data.int_value); else if (left->type == VALUE_BOOL && right->type == VALUE_BOOL) result = value_create_bool(left->data.bool_value != right->data.bool_value); else result = value_create_bool(true); break;  case TOKEN_LT: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_bool(left->data.int_value < right->data.int_value); else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_bool(lv < rv); } break;  case TOKEN_GT: if (left->type == VALUE_INT && right->type == VALUE_INT) result = value_create_bool(left->data.int_value > right->data.int_value); else { double lv = left->type == VALUE_INT ? left->data.int_value : left->data.double_value; double rv = right->type == VALUE_INT ? right->data.int_value : right->data.double_value; result = value_create_bool(lv > rv); } break; }  value_free(left); value_free(right); return result; }

关键细节：操作数用完后要value_free释放，否则会造成内存泄漏。同时注意eval_expression返回的值所有权归调用者，所以eval_binary负责释放左右操作数，结果的所有权交给上一级。

5. 函数调用

函数调用是最复杂的部分，需要：

从环境中查找函数
创建新的局部环境，绑定参数
执行函数体
返回结果

static Value *eval_call(Evaluator *eval, Env *env, const char *name, ASTNode **args, int arg_count) { Value *func = env_get(env, name); if (!func || func->type != VALUE_FUNCTION) return value_create_null();  // 创建新的局部环境，父环境指向调用者的环境 Env *local_env = env_create(env);  // 绑定参数 for (int i = 0; i < arg_count && i < func->data.function.param_count; i++) { Value *arg_val = eval_expression(eval, env, args[i]); env_define(local_env, func->data.function.params[i], arg_val); }  // 执行函数体 Value *result = eval_block(eval, local_env, func->data.function.body);  env_free(local_env); return result ? result : value_create_null(); }

一个重要问题：参数求值时使用的是调用者的环境（env），而不是新创建的局部环境（local_env）。这样a = 10; foo(a)中a能正确解析为 10。

6. 代码块求值

static Value *eval_block(Evaluator *eval, Env *env, ASTNode *block) { Value *result = NULL; for (int i = 0; i < block->data.block.statement_count; i++) { ASTNode *stmt = block->data.block.statements[i];  switch (stmt->type) { case AST_VAR_DECL: { Value *init = eval_expression(eval, env, stmt->data.var_decl.initializer); env_define(env, stmt->data.var_decl.name, init); break; }  case AST_RETURN_STMT: if (result) value_free(result); return stmt->data.return_stmt.value ? eval_expression(eval, env, stmt->data.return_stmt.value) : value_create_null();  case AST_IF_STMT: { Value *cond = eval_expression(eval, env, stmt->data.if_stmt.condition); bool cond_true = (cond && cond->type == VALUE_BOOL && cond->data.bool_value); if (cond) value_free(cond); ASTNode *branch = cond_true ? stmt->data.if_stmt.then_branch : stmt->data.if_stmt.else_branch; if (result) value_free(result); if (branch) return eval_block(eval, env, branch); result = value_create_null(); break; }  case AST_EXPR_STMT: { if (result) value_free(result); result = stmt->data.return_stmt.value ? eval_expression(eval, env, stmt->data.return_stmt.value) : value_create_null(); break; }  case AST_FN_DECL: { Value *fn = value_create_function( stmt->data.fn_decl.name, stmt->data.fn_decl.params, stmt->data.fn_decl.param_count, stmt->data.fn_decl.body, NULL ); env_define(env, stmt->data.fn_decl.name, fn); break; } } } return result ? result : value_create_null(); }

注意：return语句会立即返回，通过层层return把控制流和值直接传出去，不需要遍历完整个块。

7. 主循环

Value *evaluator_run(Evaluator *eval) { for (int i = 0; i < eval->ast->data.program.statement_count; i++) { ASTNode *stmt = eval->ast->data.program.statements[i];  switch (stmt->type) { case AST_VAR_DECL: { Value *init = eval_expression(eval, eval->global_env, stmt->data.var_decl.initializer); env_define(eval->global_env, stmt->data.var_decl.name, init); break; }  case AST_FN_DECL: { Value *fn = value_create_function( stmt->data.fn_decl.name, stmt->data.fn_decl.params, stmt->data.fn_decl.param_count, stmt->data.fn_decl.body, NULL ); env_define(eval->global_env, stmt->data.fn_decl.name, fn); break; }  case AST_EXPR_STMT: { Value *r = eval_expression(eval, eval->global_env, stmt->data.return_stmt.value); break; }  case AST_RETURN_STMT: return stmt->data.return_stmt.value ? eval_expression(eval, eval->global_env, stmt->data.return_stmt.value) : value_create_null();  case AST_IF_STMT: { Value *cond = eval_expression(eval, eval->global_env, stmt->data.if_stmt.condition); bool cond_true = (cond && cond->type == VALUE_BOOL && cond->data.bool_value); if (cond) value_free(cond); ASTNode *branch = cond_true ? stmt->data.if_stmt.then_branch : stmt->data.if_stmt.else_branch; if (branch) return eval_block(eval, eval->global_env, branch); break; }  case AST_BLOCK_STMT: return eval_block(eval, eval->global_env, stmt); } } return value_create_null(); }

8. 内存管理：所有权与生命周期

解释器最难的部分是内存管理。每个eval_expression调用返回一个Value*，调用者负责释放。规则如下：

操作	语义值由谁释放
`eval_binary`	释放左右操作数，返回结果（由上层释放）
`eval_unary`	释放操作数，返回结果
`eval_expression`（字面量）	返回新分配的值
`env_get`	返回借用的指针，不释放
`env_set`	接管传入值的所有权

赋值表达式是内存管理最容易出错的地方：

case AST_ASSIGN_EXPR: { Value *v = eval_expression(eval, env, node->data.assign_expr.value); if (env_set(env, node->data.assign_expr.name, v)) { // env_set 成功，值已存入环境，caller 不 free Value *stored = env_get(env, node->data.assign_expr.name); return stored ? stored : value_create_null(); } else { value_free(v); // env_set 失败时需要自己释放 return value_create_null(); } }

env_get返回借用（borrowed）引用，所以如果直接返回env_get的结果，不需要释放；如果env_set失败，则必须释放。

9. 示例程序执行过程

以一个具体程序为例：

let x = 10; let y = 20; fn add(a, b) { return a + b; } let sum = add(x, y);

Step 1：词法分析，生成 Token 序列。

Step 2：语法分析，生成 AST：

Program: VarDecl: x = IntLiteral(10) VarDecl: y = IntLiteral(20) FnDecl: add(params: [a, b]) Block: ReturnStmt: BinaryExpr(+) Ident(a) Ident(b) VarDecl: sum = CallExpr(add, [Ident(x), Ident(y)])

Step 3：解释执行。

evaluator_run遍历顶层语句：

let x = 10：在全局环境定义x = Value(10)
let y = 20：在全局环境定义y = Value(20)
fn add(a, b) { ... }：在全局环境定义add = Value(Fn)
let sum = add(x, y)：
- eval_expression求值右侧CallExpr
- eval_call查找add，创建新环境local_env
- 绑定a = Value(10)，b = Value(20)
- eval_block执行return a + b
  - eval_expression求值a + b→Value(30)
  - 遇到return，立即返回Value(30)
- Value(30)存入环境，变量sum=Value(30)