Files
ReachableCEO 54cc5f7308 feat(apisix): add Cloudron package
- Implements Apache APISIX packaging for Cloudron platform.
- Includes Dockerfile, CloudronManifest.json, and start.sh.
- Configured to use Cloudron's etcd addon.

🤖 Generated with Gemini CLI
Co-Authored-By: Gemini <noreply@google.com>
2025-09-04 09:42:47 -05:00

235 lines
6.5 KiB
Lua

--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
local require = require
local setmetatable = setmetatable
local ipairs = ipairs
local type = type
local core = require("apisix.core")
local limit_count = require("apisix.plugins.limit-count.init")
local plugin_name = "ai-rate-limiting"
local instance_limit_schema = {
type = "object",
properties = {
name = {type = "string"},
limit = {type = "integer", minimum = 1},
time_window = {type = "integer", minimum = 1}
},
required = {"name", "limit", "time_window"}
}
local schema = {
type = "object",
properties = {
limit = {type = "integer", exclusiveMinimum = 0},
time_window = {type = "integer", exclusiveMinimum = 0},
show_limit_quota_header = {type = "boolean", default = true},
limit_strategy = {
type = "string",
enum = {"total_tokens", "prompt_tokens", "completion_tokens"},
default = "total_tokens",
description = "The strategy to limit the tokens"
},
instances = {
type = "array",
items = instance_limit_schema,
minItems = 1,
},
rejected_code = {
type = "integer", minimum = 200, maximum = 599, default = 503
},
rejected_msg = {
type = "string", minLength = 1
},
},
dependencies = {
limit = {"time_window"},
time_window = {"limit"}
},
anyOf = {
{
required = {"limit", "time_window"}
},
{
required = {"instances"}
}
}
}
local _M = {
version = 0.1,
priority = 1030,
name = plugin_name,
schema = schema
}
local limit_conf_cache = core.lrucache.new({
ttl = 300, count = 512
})
function _M.check_schema(conf)
return core.schema.check(schema, conf)
end
local function transform_limit_conf(plugin_conf, instance_conf, instance_name)
local key = plugin_name .. "#global"
local limit = plugin_conf.limit
local time_window = plugin_conf.time_window
local name = instance_name or ""
if instance_conf then
name = instance_conf.name
key = instance_conf.name
limit = instance_conf.limit
time_window = instance_conf.time_window
end
return {
_vid = key,
key = key,
count = limit,
time_window = time_window,
rejected_code = plugin_conf.rejected_code,
rejected_msg = plugin_conf.rejected_msg,
show_limit_quota_header = plugin_conf.show_limit_quota_header,
-- limit-count need these fields
policy = "local",
key_type = "constant",
allow_degradation = false,
sync_interval = -1,
limit_header = "X-AI-RateLimit-Limit-" .. name,
remaining_header = "X-AI-RateLimit-Remaining-" .. name,
reset_header = "X-AI-RateLimit-Reset-" .. name,
}
end
local function fetch_limit_conf_kvs(conf)
local mt = {
__index = function(t, k)
if not conf.limit then
return nil
end
local limit_conf = transform_limit_conf(conf, nil, k)
t[k] = limit_conf
return limit_conf
end
}
local limit_conf_kvs = setmetatable({}, mt)
local conf_instances = conf.instances or {}
for _, limit_conf in ipairs(conf_instances) do
limit_conf_kvs[limit_conf.name] = transform_limit_conf(conf, limit_conf)
end
return limit_conf_kvs
end
function _M.access(conf, ctx)
local ai_instance_name = ctx.picked_ai_instance_name
if not ai_instance_name then
return
end
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
local limit_conf = limit_conf_kvs[ai_instance_name]
if not limit_conf then
return
end
local code, msg = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, true)
ctx.ai_rate_limiting = code and true or false
return code, msg
end
function _M.check_instance_status(conf, ctx, instance_name)
if conf == nil then
local plugins = ctx.plugins
for i = 1, #plugins, 2 do
if plugins[i]["name"] == plugin_name then
conf = plugins[i + 1]
end
end
end
if not conf then
return true
end
instance_name = instance_name or ctx.picked_ai_instance_name
if not instance_name then
return nil, "missing instance_name"
end
if type(instance_name) ~= "string" then
return nil, "invalid instance_name"
end
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
local limit_conf = limit_conf_kvs[instance_name]
if not limit_conf then
return true
end
local code, _ = limit_count.rate_limit(limit_conf, ctx, plugin_name, 1, true)
if code then
core.log.info("rate limit for instance: ", instance_name, " code: ", code)
return false
end
return true
end
local function get_token_usage(conf, ctx)
local usage = ctx.ai_token_usage
if not usage then
return
end
return usage[conf.limit_strategy]
end
function _M.log(conf, ctx)
local instance_name = ctx.picked_ai_instance_name
if not instance_name then
return
end
if ctx.ai_rate_limiting then
return
end
local used_tokens = get_token_usage(conf, ctx)
if not used_tokens then
core.log.error("failed to get token usage for llm service")
return
end
core.log.info("instance name: ", instance_name, " used tokens: ", used_tokens)
local limit_conf_kvs = limit_conf_cache(conf, nil, fetch_limit_conf_kvs, conf)
local limit_conf = limit_conf_kvs[instance_name]
if limit_conf then
limit_count.rate_limit(limit_conf, ctx, plugin_name, used_tokens)
end
end
return _M