- Implements Apache APISIX packaging for Cloudron platform. - Includes Dockerfile, CloudronManifest.json, and start.sh. - Configured to use Cloudron's etcd addon. 🤖 Generated with Gemini CLI Co-Authored-By: Gemini <noreply@google.com>
1048 lines
41 KiB
Perl
1048 lines
41 KiB
Perl
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
use t::APISIX 'no_plan';
|
|
|
|
log_level("info");
|
|
repeat_each(1);
|
|
no_long_string();
|
|
no_root_location();
|
|
|
|
|
|
my $resp_file = 't/assets/ai-proxy-response.json';
|
|
open(my $fh, '<', $resp_file) or die "Could not open file '$resp_file' $!";
|
|
my $resp = do { local $/; <$fh> };
|
|
close($fh);
|
|
|
|
print "Hello, World!\n";
|
|
print $resp;
|
|
|
|
|
|
add_block_preprocessor(sub {
|
|
my ($block) = @_;
|
|
|
|
if (!defined $block->request) {
|
|
$block->set_value("request", "GET /t");
|
|
}
|
|
|
|
my $http_config = $block->http_config // <<_EOC_;
|
|
server {
|
|
server_name openai;
|
|
listen 16724;
|
|
|
|
default_type 'application/json';
|
|
|
|
location /anything {
|
|
content_by_lua_block {
|
|
local json = require("cjson.safe")
|
|
|
|
if ngx.req.get_method() ~= "POST" then
|
|
ngx.status = 400
|
|
ngx.say("Unsupported request method: ", ngx.req.get_method())
|
|
end
|
|
ngx.req.read_body()
|
|
local body = ngx.req.get_body_data()
|
|
|
|
if body ~= "SELECT * FROM STUDENTS" then
|
|
ngx.status = 503
|
|
ngx.say("passthrough doesn't work")
|
|
return
|
|
end
|
|
ngx.say('{"foo", "bar"}')
|
|
}
|
|
}
|
|
|
|
location /v1/chat/completions {
|
|
content_by_lua_block {
|
|
local json = require("cjson.safe")
|
|
|
|
if ngx.req.get_method() ~= "POST" then
|
|
ngx.status = 400
|
|
ngx.say("Unsupported request method: ", ngx.req.get_method())
|
|
end
|
|
ngx.req.read_body()
|
|
local body, err = ngx.req.get_body_data()
|
|
body, err = json.decode(body)
|
|
|
|
local test_type = ngx.req.get_headers()["test-type"]
|
|
if test_type == "options" then
|
|
if body.foo == "bar" then
|
|
ngx.status = 200
|
|
ngx.say("options works")
|
|
else
|
|
ngx.status = 500
|
|
ngx.say("model options feature doesn't work")
|
|
end
|
|
return
|
|
end
|
|
|
|
local header_auth = ngx.req.get_headers()["authorization"]
|
|
local query_auth = ngx.req.get_uri_args()["apikey"]
|
|
|
|
if header_auth ~= "Bearer token" and query_auth ~= "apikey" then
|
|
ngx.status = 401
|
|
ngx.say("Unauthorized")
|
|
return
|
|
end
|
|
|
|
if header_auth == "Bearer token" or query_auth == "apikey" then
|
|
ngx.req.read_body()
|
|
local body, err = ngx.req.get_body_data()
|
|
body, err = json.decode(body)
|
|
|
|
if not body.messages or #body.messages < 1 then
|
|
ngx.status = 400
|
|
ngx.say([[{ "error": "bad request"}]])
|
|
return
|
|
end
|
|
|
|
if body.messages[1].content == "write an SQL query to get all rows from student table" then
|
|
ngx.print("SELECT * FROM STUDENTS")
|
|
return
|
|
end
|
|
|
|
ngx.status = 200
|
|
ngx.say(string.format([[
|
|
{
|
|
"choices": [
|
|
{
|
|
"finish_reason": "stop",
|
|
"index": 0,
|
|
"message": { "content": "1 + 1 = 2.", "role": "assistant" }
|
|
}
|
|
],
|
|
"created": 1723780938,
|
|
"id": "chatcmpl-9wiSIg5LYrrpxwsr2PubSQnbtod1P",
|
|
"model": "%s",
|
|
"object": "chat.completion",
|
|
"system_fingerprint": "fp_abc28019ad",
|
|
"usage": { "completion_tokens": 5, "prompt_tokens": 8, "total_tokens": 10 }
|
|
}
|
|
]], body.model))
|
|
return
|
|
end
|
|
|
|
|
|
ngx.status = 503
|
|
ngx.say("reached the end of the test suite")
|
|
}
|
|
}
|
|
|
|
location /random {
|
|
content_by_lua_block {
|
|
ngx.say("path override works")
|
|
}
|
|
}
|
|
}
|
|
_EOC_
|
|
|
|
$block->set_value("http_config", $http_config);
|
|
});
|
|
|
|
run_tests();
|
|
|
|
__DATA__
|
|
|
|
=== TEST 1: sanity
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local configs = {
|
|
{
|
|
time_window = 60,
|
|
},
|
|
{
|
|
limit = 30,
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
rejected_code = 199,
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
limit_strategy = "invalid",
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
instances = {
|
|
{
|
|
name = "instance1",
|
|
limit = 30,
|
|
time_window = 60,
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
}
|
|
},
|
|
},
|
|
{
|
|
time_window = 60,
|
|
instances = {
|
|
{
|
|
name = "instance1",
|
|
limit = 30,
|
|
time_window = 60,
|
|
}
|
|
},
|
|
},
|
|
{
|
|
limit = 30,
|
|
instances = {
|
|
{
|
|
name = "instance1",
|
|
limit = 30,
|
|
time_window = 60,
|
|
}
|
|
},
|
|
},
|
|
{
|
|
instances = {
|
|
{
|
|
name = "instance1",
|
|
limit = 30,
|
|
time_window = 60,
|
|
}
|
|
},
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
rejected_code = 403,
|
|
rejected_msg = "rate limit exceeded",
|
|
limit_strategy = "completion_tokens",
|
|
},
|
|
{
|
|
limit = 30,
|
|
time_window = 60,
|
|
instances = {
|
|
{
|
|
name = "instance1",
|
|
limit = 30,
|
|
time_window = 60,
|
|
}
|
|
},
|
|
}
|
|
}
|
|
local core = require("apisix.core")
|
|
local plugin = require("apisix.plugins.ai-rate-limiting")
|
|
for _, config in ipairs(configs) do
|
|
local ok, err = plugin.check_schema(config)
|
|
if not ok then
|
|
ngx.say(err)
|
|
else
|
|
ngx.say("passed")
|
|
end
|
|
end
|
|
ngx.say("done")
|
|
}
|
|
}
|
|
--- response_body
|
|
property "limit" is required when "time_window" is set
|
|
property "time_window" is required when "limit" is set
|
|
property "rejected_code" validation failed: expected 199 to be at least 200
|
|
property "limit_strategy" validation failed: matches none of the enum values
|
|
property "instances" validation failed: failed to validate item 2: property "name" is required
|
|
property "limit" is required when "time_window" is set
|
|
property "time_window" is required when "limit" is set
|
|
passed
|
|
passed
|
|
passed
|
|
done
|
|
|
|
|
|
|
|
=== TEST 2: set route 1, default limit_strategy: total_tokens
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy": {
|
|
"provider": "openai",
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-35-turbo-instruct",
|
|
"max_tokens": 512,
|
|
"temperature": 1.0
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
},
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"limit": 30,
|
|
"time_window": 60
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 3: reject the 3th request
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 503]
|
|
|
|
|
|
|
|
=== TEST 4: set rejected_code to 403, rejected_msg to "rate limit exceeded"
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy": {
|
|
"provider": "openai",
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-35-turbo-instruct",
|
|
"max_tokens": 512,
|
|
"temperature": 1.0
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
},
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"limit": 30,
|
|
"time_window": 60,
|
|
"rejected_code": 403,
|
|
"rejected_msg": "rate limit exceeded"
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 5: check code and message
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 403]
|
|
--- response_body eval
|
|
[
|
|
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
|
|
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
|
|
qr/\{ "content": "1 \+ 1 = 2\.", "role": "assistant" \}/,
|
|
qr/\{"error_msg":"rate limit exceeded"\}/,
|
|
]
|
|
|
|
|
|
|
|
=== TEST 6: check rate limit headers
|
|
--- request
|
|
POST /ai
|
|
{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- response_headers
|
|
X-AI-RateLimit-Limit-ai-proxy: 30
|
|
X-AI-RateLimit-Remaining-ai-proxy: 29
|
|
X-AI-RateLimit-Reset-ai-proxy: 60
|
|
|
|
|
|
|
|
=== TEST 7: check rate limit headers after 4 requests
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_header
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 403]
|
|
--- response_headers eval
|
|
[
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 29",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 19",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 9",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 0",
|
|
]
|
|
|
|
|
|
|
|
=== TEST 8: set route2 with limit_strategy: completion_tokens
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/2',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai2",
|
|
"plugins": {
|
|
"ai-proxy": {
|
|
"provider": "openai",
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-35-turbo-instruct",
|
|
"max_tokens": 512,
|
|
"temperature": 1.0
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
},
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"limit": 20,
|
|
"time_window": 45,
|
|
"limit_strategy": "completion_tokens"
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 9: reject the 5th request
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 200, 503]
|
|
|
|
|
|
|
|
=== TEST 10: check rate limit headers
|
|
--- request
|
|
POST /ai2
|
|
{ "messages": [ { "role": "system", "content": "You are a mathematician" }, { "role": "user", "content": "What is 1+1?"} ] }
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- response_headers
|
|
X-AI-RateLimit-Limit-ai-proxy: 20
|
|
X-AI-RateLimit-Remaining-ai-proxy: 19
|
|
X-AI-RateLimit-Reset-ai-proxy: 45
|
|
|
|
|
|
|
|
=== TEST 11: multi-request
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_header
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 200, 503]
|
|
--- response_headers eval
|
|
[
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 19",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 14",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 9",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 4",
|
|
"X-AI-RateLimit-Remaining-ai-proxy: 0",
|
|
]
|
|
|
|
|
|
|
|
=== TEST 12: request route 1 and route 2
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai2\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 200, 200, 200, 200, 403, 503]
|
|
|
|
|
|
|
|
=== TEST 13: ai-rate-limiting & ai-proxy-multi, with instance_health_and_rate_limiting strategy
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy-multi": {
|
|
"fallback_strategy": "instance_health_and_rate_limiting",
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt4",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 1,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-4"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
},
|
|
{
|
|
"name": "openai-gpt3",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 0,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-3"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
}
|
|
],
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"limit": 10,
|
|
"time_window": 60
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 14: fallback strategy should works
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local core = require("apisix.core")
|
|
local code, _, body = t("/ai",
|
|
ngx.HTTP_POST,
|
|
[[{
|
|
"messages": [
|
|
{ "role": "system", "content": "You are a mathematician" },
|
|
{ "role": "user", "content": "What is 1+1?" }
|
|
]
|
|
}]],
|
|
nil,
|
|
{
|
|
["test-type"] = "options",
|
|
["Content-Type"] = "application/json",
|
|
}
|
|
)
|
|
|
|
assert(code == 200, "first request should be successful")
|
|
assert(core.string.find(body, "gpt-4"),
|
|
"first request should be handled by higher priority instance")
|
|
|
|
local code, _, body = t("/ai",
|
|
ngx.HTTP_POST,
|
|
[[{
|
|
"messages": [
|
|
{ "role": "system", "content": "You are a mathematician" },
|
|
{ "role": "user", "content": "What is 1+1?" }
|
|
]
|
|
}]],
|
|
nil,
|
|
{
|
|
["test-type"] = "options",
|
|
["Content-Type"] = "application/json",
|
|
}
|
|
)
|
|
|
|
assert(code == 200, "second request should be successful")
|
|
assert(core.string.find(body, "gpt-3"),
|
|
"second request should be handled by lower priority instance")
|
|
|
|
local code, body = t("/ai",
|
|
ngx.HTTP_POST,
|
|
[[{
|
|
"messages": [
|
|
{ "role": "system", "content": "You are a mathematician" },
|
|
{ "role": "user", "content": "What is 1+1?" }
|
|
]
|
|
}]],
|
|
nil,
|
|
{
|
|
["test-type"] = "options",
|
|
["Content-Type"] = "application/json",
|
|
}
|
|
)
|
|
|
|
assert(code == 503, "third request should be failed")
|
|
assert(core.string.find(body, "all servers tried"), "all servers tried")
|
|
|
|
ngx.say("passed")
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 15: limiting to only one instance
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy-multi": {
|
|
"fallback_strategy": "instance_health_and_rate_limiting",
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt4",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 1,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-4"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
},
|
|
{
|
|
"name": "openai-gpt3",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 0,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-3"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
}
|
|
],
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt4",
|
|
"limit": 20,
|
|
"time_window": 60
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 16: 10 requests, 8 should be handled by gpt-3, 2 should be handled by gpt-4
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local core = require("apisix.core")
|
|
|
|
local instances_count = {}
|
|
for i = 1, 10 do
|
|
local code, _, body = t("/ai",
|
|
ngx.HTTP_POST,
|
|
[[{
|
|
"messages": [
|
|
{ "role": "system", "content": "You are a mathematician" },
|
|
{ "role": "user", "content": "What is 1+1?" }
|
|
]
|
|
}]],
|
|
nil,
|
|
{
|
|
["test-type"] = "options",
|
|
["Content-Type"] = "application/json",
|
|
}
|
|
)
|
|
assert(code == 200, "first request should be successful")
|
|
if core.string.find(body, "gpt-4") then
|
|
instances_count["gpt-4"] = (instances_count["gpt-4"] or 0) + 1
|
|
else
|
|
instances_count["gpt-3"] = (instances_count["gpt-3"] or 0) + 1
|
|
end
|
|
end
|
|
|
|
ngx.log(ngx.INFO, "instances_count test:", core.json.delay_encode(instances_count))
|
|
|
|
assert(instances_count["gpt-4"] <= 2, "gpt-4 should be handled by higher priority instance")
|
|
assert(instances_count["gpt-3"] >= 8, "gpt-3 should be handled by lower priority instance")
|
|
ngx.say("passed")
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 17: each instance uses different current limiting
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy-multi": {
|
|
"fallback_strategy": "instance_health_and_rate_limiting",
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt4",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 1,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-4"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
},
|
|
{
|
|
"name": "openai-gpt3",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 0,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-3"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
}
|
|
],
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt3",
|
|
"limit": 50,
|
|
"time_window": 60
|
|
},
|
|
{
|
|
"name": "openai-gpt4",
|
|
"limit": 20,
|
|
"time_window": 60
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 18: gpt3 allows 5 requests, gpt4 allows 2 requests
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 200, 200, 200, 200, 503, 503]
|
|
|
|
|
|
|
|
=== TEST 19: set limit & instances
|
|
--- config
|
|
location /t {
|
|
content_by_lua_block {
|
|
local t = require("lib.test_admin").test
|
|
local code, body = t('/apisix/admin/routes/1',
|
|
ngx.HTTP_PUT,
|
|
[[{
|
|
"uri": "/ai",
|
|
"plugins": {
|
|
"ai-proxy-multi": {
|
|
"fallback_strategy": "instance_health_and_rate_limiting",
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt4",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 1,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-4"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
},
|
|
{
|
|
"name": "openai-gpt3",
|
|
"provider": "openai",
|
|
"weight": 1,
|
|
"priority": 0,
|
|
"auth": {
|
|
"header": {
|
|
"Authorization": "Bearer token"
|
|
}
|
|
},
|
|
"options": {
|
|
"model": "gpt-3"
|
|
},
|
|
"override": {
|
|
"endpoint": "http://localhost:16724"
|
|
}
|
|
}
|
|
],
|
|
"ssl_verify": false
|
|
},
|
|
"ai-rate-limiting": {
|
|
"limit": 20,
|
|
"time_window": 60,
|
|
"instances": [
|
|
{
|
|
"name": "openai-gpt3",
|
|
"limit": 50,
|
|
"time_window": 60
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"upstream": {
|
|
"type": "roundrobin",
|
|
"nodes": {
|
|
"canbeanything.com": 1
|
|
}
|
|
}
|
|
}]]
|
|
)
|
|
|
|
if code >= 300 then
|
|
ngx.status = code
|
|
end
|
|
ngx.say(body)
|
|
}
|
|
}
|
|
--- response_body
|
|
passed
|
|
|
|
|
|
|
|
=== TEST 20: gpt3 allows 5 requests, gpt4 allows 2 requests
|
|
--- pipelined_requests eval
|
|
[
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
"POST /ai\n" . "{ \"messages\": [ { \"role\": \"system\", \"content\": \"You are a mathematician\" }, { \"role\": \"user\", \"content\": \"What is 1+1?\"} ] }",
|
|
]
|
|
--- more_headers
|
|
Authorization: Bearer token
|
|
--- error_code eval
|
|
[200, 200, 200, 200, 200, 200, 200, 503, 503]
|