一、vllm后端
1. 使用两张GPU运行通过transformers下载的模型:
vllm serve /home/plc/cache/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/ --trust-remote-code --tensor-parallel-size 2 --dtype float16 --gpu-memory-utilization 0.9 --max-model-len 2048 --max-num-seqs 10 --worker-use-ray --port 8001 --host 0.0.0.02. 注意:一定要定位到snapshots文件夹。Qwen2.5-3B的模型输出不稳定,经常出现重复、乱码等问题。使用Qwen2.5-7B,可以有效缓解上述问题。
二、gradio前端
1. 你需要调整系统提示词和模型温度。
import gradio as gr import sqlite3 from langchain_openai import ChatOpenAI def get_glm(temperature): model = ChatOpenAI( model_name="/home/plc/cache/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/", openai_api_base="http://localhost:8001/v1", openai_api_key="token", streaming=True, temperature=temperature, ) return model class ChatbotApp: def __init__(self): self.create_feedback_db() self.max_history_length = 3 self.like_data = None self.current_state = None def create_feedback_db(self): conn = sqlite3.connect('/home/plc/feedback.db') cu = conn.cursor() create_sql = ''' CREATE TABLE IF NOT EXISTS feedback ( id INTEGER PRIMARY KEY AUTOINCREMENT, satisfaction INTEGER, question TEXT, answer TEXT, feedback_text TEXT ); ''' try: cu.execute(create_sql) conn.commit() conn.close() except Exception as e: print("Failed to create feedback:", e) def generate_text(self, prompt): llm = get_glm(0.7) full_text = "" for chunk in llm.stream(prompt): full_text += chunk.content yield full_text def user(self, user_message, history, state): history.append([user_message, None]) state.append([user_message, None]) if len(state) > self.max_history_length: state = state[-self.max_history_length:] return "", history, state def bot(self, history, state): if not state or len(state) == 0: yield history, state return prompt = "假设你是PLC领域的专家,你需要仔细思考用户提出的问题,然后告诉用户你的思考步骤,最后参考你的思考步骤告诉用户答案。" for h in state: prompt += f"用户提问: {h[0]}\n回答: {h[1]}\n" if h[1] else f"用户提问: {h[0]}\n回答: " response_generator = self.generate_text(prompt) state[-1][1] = "" for response in response_generator: state[-1][1] = response history[-1][1] = response yield history, state def record_like_dislike(self, x: gr.LikeData, state): self.like_data = x self.current_state = state.copy() if not x.liked: return [ gr.Column(visible=True), # 显示反馈区域 gr.Textbox(visible=True), gr.Button(visible=True) ] else: self._save_feedback_to_db(x, state, "") return [ gr.Column(visible=False), gr.Textbox(visible=False), gr.Button(visible=False) ] def submit_feedback(self, feedback_text): self._save_feedback_to_db(self.like_data, self.current_state, feedback_text) return [ gr.Column(visible=False), gr.Textbox(visible=False, value=""), gr.Button(visible=False) ] def close_feedback(self): return [ gr.Column(visible=False), gr.Textbox(visible=False, value=""), gr.Button(visible=False) ] def _save_feedback_to_db(self, x, state, feedback_text): conn = sqlite3.connect('/home/plc/feedback.db') cu = conn.cursor() try: question, answer = None, None for item in reversed(state): if item[1] is not None and item[1].strip() == x.value: question, answer = item break cu.execute( "INSERT INTO feedback (question, answer, satisfaction, feedback_text) VALUES (?, ?, ?, ?)", (question, answer, x.liked, feedback_text) ) conn.commit() except Exception as e: print("Failed to record feedback:", e) finally: conn.close() app = ChatbotApp() # 简化的自动滚动JS代码 scroll_js = """ function scrollToBottom() { const chatbot = document.querySelector('.chatbot'); if (chatbot) { chatbot.scrollTop = chatbot.scrollHeight; } } """ with gr.Blocks(css=""" .feedback-modal { position: fixed; top: 50%; left: 50%; transform: translate(-50%, -50%); background: white; padding: 20px; border-radius: 10px; box-shadow: 0 0 10px rgba(0,0,0,0.1); z-index: 1000; width: 80%; max-width: 500px; } """) as demo: gr.Markdown("<h1 style='text-align: center;'>PLC代码生成平台</h1>") # 添加JavaScript代码 gr.HTML(f"<script>{scroll_js}</script>") chatbot = gr.Chatbot(height=500) msg = gr.Textbox() clear = gr.Button("Clear") state = gr.State([]) # 反馈区域 with gr.Column(visible=False, elem_classes="feedback-modal") as feedback_modal: gr.Markdown("### 反馈意见") feedback_textbox = gr.Textbox( label="请告诉我们您不满意的原因", placeholder="请输入您的反馈意见...", lines=3 ) with gr.Row(): submit_feedback_btn = gr.Button("提交反馈", variant="primary") close_btn = gr.Button("取消") # 修改后的提交处理 def process_message(user_message, history, state): # 调用用户函数 empty_msg, new_history, new_state = app.user(user_message, history, state) # 返回更新后的状态和触发滚动 return empty_msg, new_history, new_state, gr.HTML("<script>scrollToBottom();</script>") # 修改后的bot处理 def process_bot(history, state): for new_history, new_state in app.bot(history, state): yield new_history, new_state, gr.HTML("<script>scrollToBottom();</script>") # 更新消息提交处理 msg.submit( process_message, [msg, chatbot, state], [msg, chatbot, state, gr.HTML(visible=False)], queue=False ).then( process_bot, [chatbot, state], [chatbot, state, gr.HTML(visible=False)] ) chatbot.like( app.record_like_dislike, [state], [feedback_modal, feedback_textbox, submit_feedback_btn] ) submit_feedback_btn.click( app.submit_feedback, [feedback_textbox], [feedback_modal, feedback_textbox, submit_feedback_btn] ) close_btn.click( app.close_feedback, [], [feedback_modal, feedback_textbox, submit_feedback_btn] ) clear.click(lambda: ([], []), None, [chatbot, state], queue=False) demo.queue() demo.launch(server_name='0.0.0.0',server_port=7870)2. 运行成功后,浏览器打开,输入url:http://localhost:7860。
三、远程服务器部署没有界面怎么办?
1. 你需要进行端口转发,将gradio前端启动后,对应的端口(本demo是7870)需要转发到本地电脑,然后就可以在本地电脑上打开。
2. 操作步骤如下:
cmd打开第一个窗口: 输入:ssh xxx@xxx 输入密码:xxx 输入:conda activate myplc 输入:vllm serve /home/plc/cache/models--Qwen--Qwen2.5-7B/snapshots/d149729398750b98c0af14eb82c78cfe92750796/ --trust-remote-code --tensor-parallel-size 2 --dtype float16 --gpu-memory-utilization 0.9 --max-model-len 2048 --max-num-seqs 10 --worker-use-ray --port 8001 --host 0.0.0.0 cmd打开第二个窗口: 输入:ssh xxx@xxx 输入密码:xxx 输入:conda activate myplc 输入:python3 /home/plc/ui.py cmd打开第三个窗口: 输入:ssh -L 7870:localhost:7870 xxx@xxx 输入密码:xxx 打开谷歌浏览器,访问:http://localhost:7870