Huggingface Serverless free LLM inference API
Archived 2 years ago
M
🔥
Member
```python
from flask import Flask, request, jsonify, send_file
import requests
app = Flask(__name__)
LLModel = "bigscience/bloom"
ApiUrl = "https://api-inference.huggingface.co/models/" + LLModel
minOutputTokens = 500000
headers = {"Authorization": "Bearer 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"}
past_user_inputs = []
generated_responses = []
def query(api_url, headers, payload, min_length):
try:
payload["min_length"] = min_length
response = requests.post(api_url, headers=headers, json=payload)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print("Error in processing the request:", e)
return None
@app.route('/')
def index():
return send_file('static/index.html')
@app.route('/chat', methods=['POST'])
def chat():
user_message = request.json.get('message')
model_path = ApiUrl
if model_path:
payload = {
"inputs": user_message,
"past_user_inputs": past_user_inputs,
"generated_responses": generated_responses
}
bot_response = query(model_path, headers=headers, payload=payload, min_length=minOutputTokens)
if bot_response is not None:
past_user_inputs.append(user_message)
generated_responses.append(bot_response[0]['generated_text'])
response_json = {"message": bot_response,
"past_user_inputs": past_user_inputs,
"generated_responses": generated_responses}
print(response_json)
return jsonify(response_json)
else:
return jsonify({"message": "Error in processing the request."}), 500
else:
return jsonify({"message": "Model path not provided."}), 400
if __name__ == '__main__':
app.run(debug=True)
```python
I really cant figure out why my min_length doesnt at all effect the output...
thanks
