sondehub-infra/query/lambda_function.py

621 lines
22 KiB
Python
Raw Normal View History

2021-02-02 07:14:38 +00:00
import boto3
import botocore.credentials
from botocore.awsrequest import AWSRequest
from botocore.endpoint import URLLib3Session
from botocore.auth import SigV4Auth
2021-03-29 02:44:33 +00:00
from aws_xray_sdk.core import xray_recorder
from aws_xray_sdk.core import patch_all
2021-02-02 07:14:38 +00:00
import json
import os
2021-02-22 06:13:30 +00:00
from datetime import datetime, timedelta, timezone
import sys, traceback
2021-04-20 08:06:21 +00:00
import re
2021-04-27 22:30:28 +00:00
import html
2021-02-02 07:14:38 +00:00
2021-02-02 10:44:39 +00:00
HOST = os.getenv("ES")
2021-02-02 07:14:38 +00:00
# get current sondes, filter by date, location
2021-02-02 10:44:39 +00:00
2021-02-02 07:14:38 +00:00
def get_sondes(event, context):
path = "telm-*/_search"
payload = {
"aggs": {
"2": {
2021-02-02 10:44:39 +00:00
"terms": {
"field": "serial.keyword",
"order": {"_key": "desc"},
"size": 10000,
2021-02-02 07:14:38 +00:00
},
2021-02-02 10:44:39 +00:00
"aggs": {
"1": {
"top_hits": {
"size": 1,
"sort": [{"datetime": {"order": "desc"}}],
2021-02-02 07:14:38 +00:00
}
}
2021-02-02 10:44:39 +00:00
},
2021-02-02 07:14:38 +00:00
}
},
2021-02-02 10:44:39 +00:00
"query": {"bool": {"filter": [{"match_all": {}}]}},
2021-02-02 07:14:38 +00:00
}
# add filters
if "queryStringParameters" in event:
if "last" in event["queryStringParameters"]:
payload["query"]["bool"]["filter"].append(
{
"range": {
"datetime": {
"gte": f"now-{int(event['queryStringParameters']['last'])}s",
2021-02-02 10:44:39 +00:00
"lte": "now",
2021-02-02 07:14:38 +00:00
}
}
}
)
2021-02-02 10:44:39 +00:00
if (
"lat" in event["queryStringParameters"]
and "lon" in event["queryStringParameters"]
and "distance" in event["queryStringParameters"]
):
2021-02-02 07:14:38 +00:00
payload["query"]["bool"]["filter"].append(
{
"geo_distance": {
"distance": f"{int(event['queryStringParameters']['distance'])}m",
"position": {
2021-02-02 10:44:39 +00:00
"lat": float(event["queryStringParameters"]["lat"]),
"lon": float(event["queryStringParameters"]["lon"]),
},
2021-02-02 07:14:38 +00:00
}
}
)
# if the user doesn't specify a range we should add one - 24 hours is probably a good start
if "range" not in payload["query"]["bool"]["filter"]:
payload["query"]["bool"]["filter"].append(
2021-02-02 10:44:39 +00:00
{"range": {"datetime": {"gte": "now-1d", "lte": "now"}}}
)
results = es_request(payload, path, "POST")
buckets = results["aggregations"]["2"]["buckets"]
sondes = {
bucket["1"]["hits"]["hits"][0]["_source"]["serial"]: bucket["1"]["hits"][
"hits"
][0]["_source"]
for bucket in buckets
}
return json.dumps(sondes)
def get_telem(event, context):
durations = { # ideally we shouldn't need to predefine these, but it's a shit load of data and we don't need want to overload ES
"3d": (259200, 1200), # 3d, 20m
"1d": (86400, 600), # 1d, 10m
"6h": (21600, 60), # 6h, 1m
2021-04-01 08:47:58 +00:00
"3h": (10800, 15), # 3h, 10s
2021-02-02 10:44:39 +00:00
}
duration_query = "3h"
2021-04-09 07:14:14 +00:00
requested_time = datetime.now(timezone.utc)
2021-02-15 05:23:51 +00:00
2021-02-02 10:44:39 +00:00
if (
"queryStringParameters" in event
and "duration" in event["queryStringParameters"]
):
if event["queryStringParameters"]["duration"] in durations:
duration_query = event["queryStringParameters"]["duration"]
else:
return f"Duration must be either {', '.join(durations.keys())}"
2021-02-15 05:23:51 +00:00
if (
"queryStringParameters" in event
and "datetime" in event["queryStringParameters"]
):
2021-03-28 05:00:44 +00:00
requested_time = datetime.fromisoformat(
event["queryStringParameters"]["datetime"].replace("Z", "+00:00")
)
2021-02-15 05:23:51 +00:00
2021-02-02 10:44:39 +00:00
(duration, interval) = durations[duration_query]
2021-02-15 05:23:51 +00:00
2021-03-28 05:00:44 +00:00
lt = requested_time
gte = requested_time - timedelta(0, duration)
2021-02-02 10:44:39 +00:00
path = "telm-*/_search"
payload = {
"aggs": {
"2": {
"terms": {
"field": "serial.keyword",
"order": {"_key": "desc"},
"size": 10000,
},
"aggs": {
"3": {
"date_histogram": {
"field": "datetime",
"fixed_interval": f"{str(interval)}s",
"min_doc_count": 1,
},
"aggs": {
"1": {
"top_hits": {
2021-02-02 11:09:04 +00:00
# "docvalue_fields": [
# {"field": "position"},
# {"field": "alt"},
# {"field": "datetime"},
# ],
# "_source": "position",
2021-02-02 10:44:39 +00:00
"size": 1,
"sort": [{"datetime": {"order": "desc"}}],
}
}
},
}
},
}
},
"query": {
"bool": {
"filter": [
{"match_all": {}},
{
"range": {
2021-03-28 05:00:44 +00:00
"datetime": {"gte": gte.isoformat(), "lt": lt.isoformat()}
2021-02-02 07:14:38 +00:00
}
2021-02-02 11:09:04 +00:00
},
2021-02-02 10:44:39 +00:00
]
}
},
}
if "queryStringParameters" in event:
if "serial" in event["queryStringParameters"]:
payload["query"]["bool"]["filter"].append(
{
"match_phrase": {
"serial": str(event["queryStringParameters"]["serial"])
}
2021-02-02 07:14:38 +00:00
}
)
results = es_request(payload, path, "POST")
2021-02-02 10:44:39 +00:00
output = {
sonde["key"]: {
2021-02-02 11:09:04 +00:00
data["key_as_string"]: data["1"]["hits"]["hits"][0]["_source"]
2021-02-02 10:44:39 +00:00
for data in sonde["3"]["buckets"]
}
for sonde in results["aggregations"]["2"]["buckets"]
}
return json.dumps(output)
2021-03-28 05:00:44 +00:00
2021-02-22 06:13:30 +00:00
def datanew(event, context):
durations = { # ideally we shouldn't need to predefine these, but it's a shit load of data and we don't need want to overload ES
"3days": (259200, 1200), # 3d, 20m
"1day": (86400, 600), # 1d, 10m
2021-05-02 01:26:56 +00:00
"12hours": (43200, 300), # 12h, 2m
2021-04-04 01:54:19 +00:00
"6hours": (21600, 120), # 6h, 1m
2021-05-02 01:26:56 +00:00
"3hours": (10800, 90), # 3h, 10s
2021-04-04 01:54:19 +00:00
"1hour": (3600, 30), # 1h, 5s
2021-02-22 06:13:30 +00:00
}
duration_query = "1hour"
2021-04-09 07:14:14 +00:00
requested_time = datetime.now(timezone.utc)
2021-02-22 06:13:30 +00:00
2021-03-28 05:00:44 +00:00
max_positions = (
int(event["queryStringParameters"]["max_positions"])
if "max_positions" in event["queryStringParameters"]
else 10000
)
2021-02-22 06:13:30 +00:00
if event["queryStringParameters"]["mode"] in durations:
duration_query = event["queryStringParameters"]["mode"]
2021-04-20 08:06:21 +00:00
(duration, interval) = durations[duration_query]
elif event["queryStringParameters"]["mode"] == "single":
duration = 259200
interval = 1
2021-02-22 06:13:30 +00:00
else:
return f"Duration must be either {', '.join(durations.keys())}"
2021-04-20 08:06:21 +00:00
2021-03-28 05:00:44 +00:00
if "vehicles" in event["queryStringParameters"] and (
event["queryStringParameters"]["vehicles"] != "RS_*;*chase"
and event["queryStringParameters"]["vehicles"] != ""
):
2021-05-02 01:26:56 +00:00
interval = 2
2021-02-22 06:13:30 +00:00
2021-04-20 08:06:21 +00:00
2021-02-22 06:13:30 +00:00
if event["queryStringParameters"]["position_id"] != "0":
2021-04-20 08:06:21 +00:00
if event["queryStringParameters"]["mode"] == "single":
position_id = event["queryStringParameters"]["position_id"]
matches = re.search("(.+)-(\d{4}-\d{2}-\d{2}\w\d{2}:\d{2}:\d{2}.\d+)Z",position_id).groups()
matched_time = matches[1].replace("Z", "+00:00")
matched_vehicle = matches[0]
requested_time = datetime.fromisoformat(matched_time)
lt = requested_time
gte = requested_time
else:
requested_time = datetime.fromisoformat(
event["queryStringParameters"]["position_id"].replace("Z", "+00:00")
)
lt = datetime.now(timezone.utc)
gte = requested_time
elif event["queryStringParameters"]["mode"] == "single":
return f"Single requires a position id specified"
2021-02-22 06:13:30 +00:00
else:
2021-04-09 07:14:14 +00:00
lt = datetime.now(timezone.utc)
gte = datetime.now(timezone.utc) - timedelta(0, duration)
2021-04-10 05:06:44 +00:00
output = {"positions": {"position": []}}
if "chase_only" not in event["queryStringParameters"] or event["queryStringParameters"]["chase_only"] != "true":
path = "telm-*/_search"
payload = {
"aggs": {
"2": {
"terms": {
"field": "serial.keyword",
"order": {"_key": "desc"},
"size": 10000,
},
"aggs": {
"3": {
"date_histogram": {
"field": "datetime",
"fixed_interval": f"{str(interval)}s",
"min_doc_count": 1,
},
"aggs": {
"1": {
"top_hits": {
2021-05-23 13:14:15 +00:00
"size": 5,
2021-05-02 01:26:56 +00:00
"sort": [{"pressure": {"order": "desc","mode" : "median"}},{"humidity": {"order": "desc","mode" : "median"}},{"temp": {"order": "desc","mode" : "median"}},{"alt": {"order": "desc","mode" : "median"}}],
2021-04-10 05:06:44 +00:00
}
2021-02-22 06:13:30 +00:00
}
2021-04-10 05:06:44 +00:00
},
2021-02-22 06:13:30 +00:00
}
2021-03-28 05:00:44 +00:00
},
2021-02-22 06:13:30 +00:00
}
2021-04-10 05:06:44 +00:00
},
"query": {
"bool": {
"filter": [
{"match_all": {}},
{
"range": {
2021-04-20 08:06:21 +00:00
"datetime": {"gte": gte.isoformat(), "lte": lt.isoformat()}
2021-04-10 05:06:44 +00:00
}
},
],
2021-04-27 21:51:54 +00:00
"must_not": [{"match_phrase": {"software_name": "SondehubV1"}}, {"match_phrase": {"serial": "xxxxxxxx"}}],
2021-03-28 05:43:44 +00:00
}
2021-04-10 05:06:44 +00:00
},
}
if (
"vehicles" in event["queryStringParameters"]
and event["queryStringParameters"]["vehicles"] != "RS_*;*chase"
and event["queryStringParameters"]["vehicles"] != ""
):
payload["query"]["bool"]["filter"].append(
{
"match_phrase": {
"serial": str(event["queryStringParameters"]["vehicles"])
}
}
)
2021-04-20 08:06:21 +00:00
if event["queryStringParameters"]["mode"] == "single":
payload["query"]["bool"]["filter"].append(
{
"match_phrase": {
"serial": matched_vehicle
}
}
)
2021-04-10 05:06:44 +00:00
results = es_request(payload, path, "POST")
2021-03-28 05:43:44 +00:00
2021-03-28 05:00:44 +00:00
2021-04-10 05:06:44 +00:00
for sonde in results["aggregations"]["2"]["buckets"]:
for frame in sonde["3"]["buckets"]:
try:
frame_data = frame["1"]["hits"]["hits"][0]["_source"]
2021-04-28 11:32:22 +00:00
uploaders = {}
2021-04-10 12:04:42 +00:00
2021-04-10 05:06:44 +00:00
# Use subtype if it exists, else just use the basic type.
if "subtype" in frame_data:
2021-04-27 22:30:28 +00:00
_type = html.escape(frame_data["subtype"])
2021-04-10 05:06:44 +00:00
else:
2021-04-27 22:30:28 +00:00
_type = html.escape(frame_data["type"])
2021-03-28 05:43:44 +00:00
2021-04-10 05:06:44 +00:00
data = {
2021-04-27 22:30:28 +00:00
"manufacturer": html.escape(frame_data['manufacturer']),
"type": html.escape(_type)
2021-03-28 05:00:44 +00:00
}
2021-04-10 05:06:44 +00:00
if "temp" in frame_data:
data["temperature_external"] = frame_data["temp"]
if "humidity" in frame_data:
data["humidity"] = frame_data["humidity"]
if "pressure" in frame_data:
data["pressure"] = frame_data["pressure"]
if "sats" in frame_data:
data["sats"] = frame_data["sats"]
if "batt" in frame_data:
data["batt"] = frame_data["batt"]
if "burst_timer" in frame_data:
data["burst_timer"] = frame_data["burst_timer"]
if "frequency" in frame_data:
data["frequency"] = frame_data["frequency"]
# May need to revisit this, if the resultant strings are too long.
if "xdata" in frame_data:
2021-04-27 22:30:28 +00:00
data["xdata"] = html.escape(frame_data["xdata"])
2021-04-10 05:06:44 +00:00
output["positions"]["position"].append(
{
2021-04-27 22:30:28 +00:00
"position_id": html.escape(f'{frame_data["serial"]}-{frame_data["datetime"]}'),
2021-04-10 05:06:44 +00:00
"mission_id": "0",
2021-04-27 22:30:28 +00:00
"vehicle": html.escape(frame_data["serial"]),
"server_time": html.escape(frame_data["datetime"]),
"gps_time": html.escape(frame_data["datetime"]),
2021-04-10 05:06:44 +00:00
"gps_lat": frame_data["lat"],
"gps_lon": frame_data["lon"],
"gps_alt": frame_data["alt"],
"gps_heading": frame_data["heading"]
if "heading" in frame_data
else "",
2021-04-27 21:51:54 +00:00
"gps_speed": frame_data["vel_h"] if "vel_h" in frame_data else "",
2021-04-27 22:30:28 +00:00
"type": html.escape(_type),
2021-04-10 05:06:44 +00:00
"picture": "",
"temp_inside": "",
"data": data,
2021-04-10 12:04:42 +00:00
"callsign": uploaders,
2021-04-10 05:06:44 +00:00
"sequence": "0",
}
)
except:
traceback.print_exc(file=sys.stdout)
2021-04-28 11:32:22 +00:00
output["positions"]["position"][-1]["callsign"] = {
html.escape(x["_source"]['uploader_callsign']) : {
"snr" : x["_source"]["snr"] if "snr" in x["_source"] else None,
"rssi" : x["_source"]["rssi"] if "rssi" in x["_source"] else None
}
for x in frame["1"]["hits"]["hits"]
}
2021-04-09 07:14:14 +00:00
# get chase cars
payload = {
"aggs": {
"2": {
"terms": {
"field": "uploader_callsign.keyword",
"order": {"_key": "desc"},
"size": 10000,
},
"aggs": {
"3": {
"date_histogram": {
"field": "ts",
"fixed_interval": f"{str(interval)}s",
"min_doc_count": 1,
},
"aggs": {
"1": {
"top_hits": {
"size": 1,
"sort": [{"ts": {"order": "desc"}}],
}
}
},
}
},
}
},
"query": {
"bool": {
"filter": [
{"match_all": {}},
{
"match_phrase": {
"mobile": True
}
},
{
"range": {
"ts": {"gte": gte.isoformat(), "lt": lt.isoformat()}
}
},
]
}
},
}
path = "listeners-*/_search"
# {"position_id":"82159921","mission_id":"0","vehicle":"KB9RKU_chase",
# "server_time":"2021-04-09 06:28:55.109589","gps_time":"2021-04-09 06:28:54",
# "gps_lat":"41.539648333","gps_lon":"-89.111862667","gps_alt":"231.6","gps_heading":"",
# "gps_speed":"0","picture":"","temp_inside":"","data":{},"callsign":"","sequence":""}
results = es_request(payload, path, "POST")
for car in results["aggregations"]["2"]["buckets"]:
for frame in car["3"]["buckets"]:
try:
frame_data = frame["1"]["hits"]["hits"][0]["_source"]
data = {}
#
output["positions"]["position"].append(
{
2021-04-27 22:30:28 +00:00
"position_id": html.escape(f'{frame_data["uploader_callsign"]}-{frame_data["ts"]}'),
2021-04-09 07:14:14 +00:00
"mission_id": "0",
2021-04-27 22:30:28 +00:00
"vehicle": html.escape(f'{frame_data["uploader_callsign"]}_chase'),
"server_time": html.escape(datetime.fromtimestamp(frame_data["ts"]/1000).isoformat()),
"gps_time": html.escape(datetime.fromtimestamp(frame_data["ts"]/1000).isoformat()),
2021-04-09 07:14:14 +00:00
"gps_lat": frame_data["uploader_position"][0],
"gps_lon": frame_data["uploader_position"][1],
"gps_alt": frame_data["uploader_position"][2],
"gps_heading": "",
"gps_speed": 0,
"picture": "",
"temp_inside": "",
"data": data,
2021-04-27 22:30:28 +00:00
"callsign": html.escape(frame_data["uploader_callsign"]),
2021-04-09 07:14:14 +00:00
"sequence": "",
}
)
except:
traceback.print_exc(file=sys.stdout)
2021-03-28 05:00:44 +00:00
output["positions"]["position"] = sorted(
output["positions"]["position"], key=lambda k: k["position_id"]
)
2021-02-22 06:13:30 +00:00
return json.dumps(output)
def get_listeners(event, context):
2021-04-05 03:52:37 +00:00
path = "listeners-*/_search"
2021-02-22 06:13:30 +00:00
payload = {
"aggs": {
"2": {
2021-03-28 05:00:44 +00:00
"terms": {
"field": "uploader_callsign.keyword",
"order": {"_key": "desc"},
"size": 500,
2021-02-22 06:13:30 +00:00
},
2021-03-28 05:00:44 +00:00
"aggs": {
"1": {
"top_hits": {
"_source": False,
"size": 1,
"docvalue_fields": [
2021-04-05 03:52:37 +00:00
"uploader_position_elk",
2021-03-28 05:00:44 +00:00
"uploader_alt",
"uploader_antenna.keyword",
"software_name.keyword",
"software_version.keyword",
2021-04-05 03:52:37 +00:00
"ts",
2021-03-28 05:00:44 +00:00
],
2021-04-05 03:52:37 +00:00
"sort": [{"ts": {"order": "desc"}}],
2021-02-22 06:13:30 +00:00
}
}
2021-03-28 05:00:44 +00:00
},
2021-02-22 06:13:30 +00:00
}
},
"size": 0,
"query": {
"bool": {
2021-03-28 05:00:44 +00:00
"must": [],
"filter": [
{"match_all": {}},
2021-04-05 03:52:37 +00:00
{"exists": {"field": "uploader_position_elk"},},
2021-03-28 05:00:44 +00:00
{"exists": {"field": "uploader_antenna.keyword"},},
{"exists": {"field": "software_name.keyword"},},
{"exists": {"field": "software_version.keyword"},},
2021-04-05 03:52:37 +00:00
{"exists": {"field": "ts"},},
2021-03-28 05:00:44 +00:00
{
"range": {
2021-04-05 03:52:37 +00:00
"ts": {
2021-04-09 07:30:01 +00:00
"gte": "now-24h",
2021-03-28 05:00:44 +00:00
"lte": "now",
"format": "strict_date_optional_time",
}
}
2021-02-22 06:13:30 +00:00
},
2021-04-09 07:30:01 +00:00
2021-03-28 05:00:44 +00:00
],
"should": [],
2021-04-09 07:30:01 +00:00
"must_not": [
{"match_phrase": {"mobile": "true"}},
],
2021-02-22 06:13:30 +00:00
}
2021-03-28 05:00:44 +00:00
},
2021-02-22 06:13:30 +00:00
}
2021-03-28 05:00:44 +00:00
2021-02-22 06:13:30 +00:00
results = es_request(payload, path, "GET")
2021-03-28 05:00:44 +00:00
2021-02-22 06:13:30 +00:00
output = [
{
2021-04-27 22:30:28 +00:00
"name": html.escape(listener["key"]),
2021-03-28 05:00:44 +00:00
"tdiff_hours": (
datetime.now(timezone.utc)
- datetime.fromisoformat(
2021-04-05 03:52:37 +00:00
listener["1"]["hits"]["hits"][0]["fields"]["ts"][0].replace(
2021-03-28 05:00:44 +00:00
"Z", "+00:00"
)
)
).seconds
/ 60
/ 60,
"lon": float(
2021-04-05 03:52:37 +00:00
listener["1"]["hits"]["hits"][0]["fields"]["uploader_position_elk"][0]
2021-03-28 05:00:44 +00:00
.replace(" ", "")
.split(",")[1]
),
"lat": float(
2021-04-05 03:52:37 +00:00
listener["1"]["hits"]["hits"][0]["fields"]["uploader_position_elk"][0]
2021-03-28 05:00:44 +00:00
.replace(" ", "")
.split(",")[0]
),
"alt": float(listener["1"]["hits"]["hits"][0]["fields"]["uploader_alt"][0]) if "uploader_alt" in listener["1"]["hits"]["hits"][0]["fields"] else 0,
2021-02-22 06:13:30 +00:00
"description": f"""\n
<font size=\"-2\"><BR>\n
2021-04-27 22:30:28 +00:00
<B>Radio: {html.escape(listener["1"]["hits"]["hits"][0]["fields"]["software_name.keyword"][0])}-{html.escape(listener["1"]["hits"]["hits"][0]["fields"]["software_version.keyword"][0])}</B><BR>\n
<B>Antenna: </B>{html.escape(listener["1"]["hits"]["hits"][0]["fields"]["uploader_antenna.keyword"][0])}<BR>\n
<B>Last Contact: </B>{html.escape(listener["1"]["hits"]["hits"][0]["fields"]["ts"][0])} <BR>\n
2021-02-22 06:13:30 +00:00
</font>\n
2021-03-28 05:00:44 +00:00
""",
2021-02-22 06:13:30 +00:00
}
for listener in results["aggregations"]["2"]["buckets"]
]
return json.dumps(output)
2021-02-02 10:44:39 +00:00
2021-02-02 07:14:38 +00:00
def es_request(payload, path, method):
2021-02-02 10:44:39 +00:00
# get aws creds
2021-02-02 07:14:38 +00:00
session = boto3.Session()
params = json.dumps(payload)
2021-02-02 10:44:39 +00:00
headers = {"Host": HOST, "Content-Type": "application/json"}
request = AWSRequest(
method="POST", url=f"https://{HOST}/{path}", data=params, headers=headers
)
SigV4Auth(boto3.Session().get_credentials(), "es", "us-east-1").add_auth(request)
2021-02-02 07:14:38 +00:00
session = URLLib3Session()
r = session.send(request.prepare())
return json.loads(r.text)
if __name__ == "__main__":
2021-02-02 10:44:39 +00:00
# print(get_sondes({"queryStringParameters":{"lat":"-28.22717","lon":"153.82996","distance":"50000"}}, {}))
2021-02-22 06:13:30 +00:00
# mode: 6hours
2021-03-28 05:00:44 +00:00
# type: positions
# format: json
# max_positions: 0
# position_id: 0
# vehicles: RS_*;*chase
2021-05-02 01:26:56 +00:00
# print(
# datanew(
# {
# "queryStringParameters": {
# "type": "positions",
# "mode": "12hours",
# "position_id": "0",
# "vehicles": ""
# }
# },
# {},
2021-05-02 01:26:56 +00:00
# )
# )
print(
get_listeners(
{},{}
)
)