news 2026/2/12 8:46:06

Benthos使用示例

作者头像

张小明

前端开发工程师

1.2k 24
文章封面图
Benthos使用示例

展示一些完整的Benthos使用示例:

1.基础配置示例

YAML配置文件

yaml

# config.yaml input: gcp_pubsub: project: my-project subscription: my-subscription pipeline: processors: - bloblang: | # 转换数据 root = { "id": this.id, "processed_at": now(), "data": this.body.uppercase() } output: file: path: "./output/${! timestamp_unix() }.json" codec: lines

2.CSV批量处理示例

yaml

# batch_csv.yaml input: file: paths: - ./data/*.csv codec: lines pipeline: processors: # 1. CSV解析 - parse_csv: schema: | [{"name":"id","type":"int"},{"name":"name","type":"string"},{"name":"value","type":"float"}] # 2. 批量聚合(每100条一批) - group_by_value: value: ${!batch_index()} period: 1s size: 100 # 3. 批量处理 - for_each: - bloblang: | root = this root.batch_id = uuid_v4() root.processed_at = now() output: sql_raw: driver: postgres dsn: postgres://user:pass@localhost:5432/db query: | INSERT INTO records (id, name, value, batch_id, processed_at) VALUES ($1, $2, $3, $4, $5) args_mapping: | [ this.id, this.name, this.value, this.batch_id, this.processed_at ]

3.Go代码中嵌入Benthos

go

package main import ( "context" "fmt" "github.com/benthosdev/benthos/v4/public/service" _ "github.com/benthosdev/benthos/v4/public/components/all" // 导入所有组件 ) func main() { // 创建流 builder := service.NewStreamBuilder() // 配置输入 err := builder.SetYAML(` input: generate: mapping: | root.id = uuid_v4() root.timestamp = timestamp_unix() interval: 1s count: 100 pipeline: processors: - batch: count: 10 processors: - archive: format: lines - compress: algorithm: gzip output: http_client: url: http://localhost:8080/api/batch verb: POST headers: Content-Type: application/json `) if err != nil { panic(err) } // 添加自定义处理器 builder.AddProcessorFunc(func(ctx context.Context, m *service.Message) ([]*service.Message, error) { // 自定义处理逻辑 body, err := m.AsBytes() if err != nil { return nil, err } fmt.Printf("Processing: %s\n", string(body)) return []*service.Message{m}, nil }) // 运行流 stream, err := builder.Build() if err != nil { panic(err) } ctx := context.Background() if err := stream.Run(ctx); err != nil { panic(err) } }

4.Kafka到Elasticsearch批量处理

yaml

# kafka_to_es.yaml input: kafka: addresses: - localhost:9092 topics: - logs consumer_group: batch-processor batching: count: 1000 # 每批1000条 period: 30s # 或每30秒 pipeline: threads: 4 # 并行处理 processors: # 批量处理器 - bloblang: | # 解析JSON日志 root = this.parse_json() root.processed = now() root.host = this.host.lowercase() # 批量过滤 - filter_parts: bloblang: | this.level != "DEBUG" # 过滤掉DEBUG日志 # 批量映射 - mapping: | root.index = "logs-${!timestamp_format(now(),"2006-01-02")}" root.id = this.id root.type = "_doc" root.body = this output: elasticsearch: urls: - http://localhost:9200 index: ${!json().index} id: ${!json().id} action: index max_in_flight: 10 # 并发数 batching: count: 500 # 输出批次大小 period: 10s check: | root = this.index.suffix("-bulk") != ""

5.HTTP API批处理服务

go

// main.go package main import ( "context" "net/http" "github.com/benthosdev/benthos/v4/public/service" _ "github.com/benthosdev/benthos/v4/public/components/io" _ "github.com/benthosdev/benthos/v4/public/components/prometheus" ) func main() { // 创建HTTP输入和输出的流 httpStream := ` http: address: 0.0.0.0:8080 path: /ingest allowed_verbs: [POST] pipeline: processors: - batch: count: 50 period: 10s processors: - try: - bloblang: | root.items = this.map_each(item -> { "id": item.id, "processed": now(), "status": "pending" }) - catch: - bloblang: | root.error = "batch processing failed" root.items = [] - split: # 拆分为单个项目处理 - bloblang: | root = this root.batch_size = batch_size() output: broker: pattern: fan_out outputs: - redis_streams: url: tcp://localhost:6379 stream: processed_items body_key: "" max_length: 1000 - http_client: url: http://monitor:9090/metrics verb: POST headers: Content-Type: application/json batching: count: 100 - prometheus: metric_mapping: | batch_size: type: counter path: batch_size labels: source: http_ingest ` builder := service.NewStreamBuilder() // 从YAML配置 if err := builder.SetYAML(httpStream); err != nil { panic(err) } // 添加监控端点 builder.SetHTTPMux("/metrics", func(w http.ResponseWriter, r *http.Request) { // 自定义监控 w.WriteHeader(http.StatusOK) w.Write([]byte(`{"status":"healthy"}`)) }) stream, err := builder.Build() if err != nil { panic(err) } ctx := context.Background() if err := stream.Run(ctx); err != nil { panic(err) } }

6.错误处理和重试

yaml

# error_handling.yaml input: kafka: addresses: [localhost:9092] topics: [orders] batching: count: 100 pipeline: processors: - try: - bloblang: | root = this.parse_json().catch(this) - cache: resource: redis_cache operator: set key: "${!this.order_id}" value: "${!content()}" - catch: - bloblang: | root.error = error() root.original = content() root.retry_count = meta("retry_count").or(0) + 1 - retry: max_retries: 3 backoff: initial_interval: 1s max_interval: 30s output: broker: pattern: try outputs: # 主要输出 - sql_raw: driver: postgres dsn: postgres://user:pass@localhost/db query: "INSERT INTO orders VALUES ($1, $2, $3)" args_mapping: "[this.id, this.amount, this.timestamp]" # 失败时输出到死信队列 - drop_on: ${!error()} - kafka: addresses: [localhost:9092] topic: dead_letter_queue key: "${!meta(\"kafka_key\")}"

7.动态批处理配置

yaml

# dynamic_batching.yaml input: generate: count: 1000 interval: 100ms mapping: 'root = {"value": random_int()}' pipeline: processors: - batch: # 动态批次大小 count: ${!meta("batch_size").or(50)} processors: - bloblang: | root.items = this root.batch_id = uuid_v4() root.total = this.sum(item -> item.value) # 基于内容调整批次大小 - branch: processors: - bloblang: | new_size = if this.total > 1000 { 20 # 总值大时用小批次 } else { 100 # 总值小时用大批次 } meta_set("batch_size", new_size) result_map: 'root = deleted()' # 不修改内容 output: http_client: url: http://api:8080/process-batch verb: POST

8.运行和监控

bash

# 1. 安装Benthos go install github.com/benthosdev/benthos/v4/cmd/benthos@latest # 2. 运行配置文件 benthos -c config.yaml # 3. 动态重载配置 benthos -c config.yaml --reload # 4. 测试配置 benthos lint config.yaml benthos test config.yaml # 5. 查看指标 curl http://localhost:4195/stats

9.实用技巧

go

// 自定义批处理插件 func init() { service.RegisterProcessor( "batch_transform", service.NewConfigSpec(). Field(service.NewStringField("prefix")), func(conf *service.ParsedConfig, mgr *service.Resources) ( service.Processor, error, ) { prefix, err := conf.FieldString("prefix") if err != nil { return nil, err } return service.NewBatchProcessorFunc(func(ctx context.Context, batch service.MessageBatch) ([]service.MessageBatch, error) { for _, msg := range batch { msg.SetStructured(map[string]interface{}{ "prefixed": prefix + msg.AsString(), }) } return []service.MessageBatch{batch}, nil }), nil }, ) }

这些示例展示了Benthos在批处理场景下的灵活性和强大功能。你可以根据实际需求组合使用这些组件。

版权声明: 本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若内容造成侵权/违法违规/事实不符,请联系邮箱:809451989@qq.com进行投诉反馈,一经查实,立即删除!
网站建设 2026/2/7 6:33:55

Qwen3-4B-Instruct医疗问答系统案例:专业术语理解部署教程

Qwen3-4B-Instruct医疗问答系统案例:专业术语理解部署教程 1. 医疗场景下的AI语言模型新选择 你有没有遇到过这样的情况:患者拿着检查报告来问“低密度脂蛋白偏高是什么意思”,而你需要花时间解释一堆医学术语?如果有一个AI助手…

作者头像 李华
网站建设 2026/2/11 2:58:46

FSMN VAD支持哪些格式?MP3/WAV/FLAC全兼容实测

FSMN VAD支持哪些格式?MP3/WAV/FLAC全兼容实测 1. 引言:语音检测也能这么简单? 你有没有遇到过这样的问题:一堆会议录音、电话访谈或者课堂音频,想从中提取出真正有人说话的片段,但手动剪辑太费时间&…

作者头像 李华
网站建设 2026/2/10 11:13:03

Kronos金融AI终极指南:零基础掌握智能股票预测

Kronos金融AI终极指南:零基础掌握智能股票预测 【免费下载链接】Kronos Kronos: A Foundation Model for the Language of Financial Markets 项目地址: https://gitcode.com/GitHub_Trending/kronos14/Kronos 在当今瞬息万变的金融市场中,Kronos…

作者头像 李华
网站建设 2026/2/9 22:01:03

Depth Pro突破性技术:单图秒级生成精准度量深度

Depth Pro突破性技术:单图秒级生成精准度量深度 【免费下载链接】ml-depth-pro Depth Pro: Sharp Monocular Metric Depth in Less Than a Second. 项目地址: https://gitcode.com/gh_mirrors/ml/ml-depth-pro 在计算机视觉领域,单目深度估计一直…

作者头像 李华
网站建设 2026/2/3 3:01:59

MinerU年报提取自动化:上市公司财报分析前置

MinerU年报提取自动化:上市公司财报分析前置 1. 引言:为什么财报解析需要AI驱动? 每年上市公司发布的年报动辄上百页,包含大量表格、图表、财务数据和复杂排版。传统的人工摘录方式不仅耗时费力,还容易出错。即便是使…

作者头像 李华
网站建设 2026/2/4 6:26:57

ComfyUI-WanVideoWrapper视频增强:5步掌握FlashVSR超分辨率技术

ComfyUI-WanVideoWrapper视频增强:5步掌握FlashVSR超分辨率技术 【免费下载链接】ComfyUI-WanVideoWrapper 项目地址: https://gitcode.com/GitHub_Trending/co/ComfyUI-WanVideoWrapper 想要将模糊视频瞬间变成高清大片吗?ComfyUI-WanVideoWrap…

作者头像 李华