sondehub-infra/historic/queue_data_update/index.py

import json
import boto3
import botocore.credentials
from botocore.awsrequest import AWSRequest
from botocore.endpoint import URLLib3Session
from botocore.auth import SigV4Auth

import zlib
import base64
import datetime
import os
import gzip
from io import BytesIO

from multiprocessing import Process

http_session = URLLib3Session()

def mirror(path,params):
    session = boto3.Session()
    headers = {"Host": "search-sondes-v2-hiwdpmnjbuckpbwfhhx65mweee.us-east-1.es.amazonaws.com", "Content-Type": "application/json", "Content-Encoding":"gzip"}
    request = AWSRequest(
        method="POST", url=f"https://search-sondes-v2-hiwdpmnjbuckpbwfhhx65mweee.us-east-1.es.amazonaws.com/{path}", data=params, headers=headers
    )
    SigV4Auth(boto3.Session().get_credentials(), "es", "us-east-1").add_auth(request)
    session = URLLib3Session()
    r = session.send(request.prepare())

HOST = os.getenv("ES")

sqs = boto3.client('sqs', region_name="us-east-1")

def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

def es_request(payload, path, method):
    session = boto3.Session()

    params = json.dumps(payload)

    compressed = BytesIO()
    with gzip.GzipFile(fileobj=compressed, mode='w') as f:
        f.write(params.encode('utf-8'))
    params = compressed.getvalue()

    headers = {"Host": HOST, "Content-Type": "application/json", "Content-Encoding":"gzip"}

    request = AWSRequest(
        method=method, url=f"https://{HOST}/{path}", data=params, headers=headers
    )
    SigV4Auth(boto3.Session().get_credentials(),
              "es", "us-east-1").add_auth(request)
    #p = Process(target=mirror, args=(path,params)).start()
    r = http_session.send(request.prepare())
    return json.loads(r.text)


def handler(event, context):
    query = {
        "aggs": {
            "serials": {
                "terms": {
                    "field": "serial.keyword",
                    "size": 10000
                }
            }
        },
        "size": 0,
        "_source": {
            "excludes": []
        },
        "query": {
            "bool": {
                "must_not": [{"match_phrase": {"serial": "xxxxxxxx"}}],
                "filter": [
                    {
                        "range": {
                            "datetime": {
                                "gte": "now-24h",
                                "format": "strict_date_optional_time"
                            }
                        }
                    }
                ]
            }
        }
    }

    results = es_request(query, "telm-*/_search", "POST")
    serials = [ x['key'] for x in results['aggregations']['serials']['buckets'] ]
    for serial_batch in batch(serials, 10):
        sqs.send_message_batch(
            QueueUrl="https://sqs.us-east-1.amazonaws.com/143841941773/update-history",
            Entries=[
                {
                    "Id": str(serial_batch.index(x)),
                    "MessageBody": x
                }
            for x in serial_batch]
        )
    return [ x['key'] for x in results['aggregations']['serials']['buckets'] ]
    #TODO add to SQS queue

if __name__ == "__main__":
    print(handler({}, {}))

# this script will find list of sondes seen in the last 48 hours and add them to the queue to be updated (including the first and last date they were seen)