mirror of
https://github.com/projecthorus/sondehub-infra.git
synced 2024-12-27 00:31:08 +00:00
77 lines
2.8 KiB
Python
77 lines
2.8 KiB
Python
|
import json
|
||
|
import boto3
|
||
|
import gzip
|
||
|
from botocore.exceptions import ClientError
|
||
|
import es
|
||
|
from datetime import datetime, timedelta
|
||
|
|
||
|
|
||
|
s3 = boto3.resource('s3')
|
||
|
|
||
|
serials = {}
|
||
|
|
||
|
def fetch_es():
|
||
|
payload = {
|
||
|
"size": 1000
|
||
|
}
|
||
|
data = []
|
||
|
indexes = []
|
||
|
response = es.request(json.dumps(payload),
|
||
|
#f"predictions-*,-predictions-{datetime.now().strftime('%Y-%m')},-predictions-{(datetime.now() - timedelta(days=27)).strftime('%Y-%m')},-predictions-*-rollup/_search",
|
||
|
"predictions-2021-12/_search",
|
||
|
"POST", params={"scroll": "1m"})
|
||
|
try:
|
||
|
add_unique([x["_source"] for x in response['hits']['hits']])
|
||
|
except:
|
||
|
print(response)
|
||
|
raise
|
||
|
scroll_id = response['_scroll_id']
|
||
|
scroll_ids = [scroll_id]
|
||
|
while response['hits']['hits']:
|
||
|
print("Fetching more")
|
||
|
response = es.request(json.dumps({"scroll": "1m", "scroll_id": scroll_id }),
|
||
|
"_search/scroll", "POST")
|
||
|
scroll_id = response['_scroll_id']
|
||
|
scroll_ids.append(scroll_id)
|
||
|
add_unique([x["_source"] for x in response['hits']['hits']])
|
||
|
indexes += [x["_index"] for x in response['hits']['hits']]
|
||
|
for scroll_id in scroll_ids:
|
||
|
try:
|
||
|
scroll_delete = es.request(json.dumps({"scroll_id": scroll_id }),
|
||
|
"_search/scroll", "DELETE")
|
||
|
print(scroll_delete)
|
||
|
except RuntimeError:
|
||
|
pass
|
||
|
|
||
|
# post data to ES bulk
|
||
|
|
||
|
# dedupe indexes to clean up
|
||
|
indexes = list( dict.fromkeys(indexes) )
|
||
|
body=""
|
||
|
for key, doc in serials.items():
|
||
|
index = "predictions-" + "-".join(doc['datetime'].split("-")[0:2]) + "-rollup"
|
||
|
body += f'{{"index":{{"_index":"{index}"}}}}' + "\n" + json.dumps(doc) + "\n"
|
||
|
|
||
|
body += "\n"
|
||
|
result = es.request(body, f"_bulk", "POST")
|
||
|
if 'errors' in result and result['errors'] == True:
|
||
|
error_types = [x['index']['error']['type'] for x in result['items'] if 'error' in x['index']] # get all the error types
|
||
|
print(event)
|
||
|
print(result)
|
||
|
error_types = [a for a in error_types if a != 'mapper_parsing_exception'] # filter out mapper failures since they will never succeed
|
||
|
if error_types:
|
||
|
raise RuntimeError
|
||
|
for index in indexes:
|
||
|
if "predictions-" in index: # safety check
|
||
|
result = es.request(body, f"{index}", "DELETE")
|
||
|
|
||
|
def add_unique(es_r):
|
||
|
for row in es_r:
|
||
|
serial = row['serial']
|
||
|
if serial not in serials or datetime.fromisoformat(serials[serial]['datetime'].replace("Z", "+00:00")) < datetime.fromisoformat(row['datetime'].replace("Z", "+00:00")):
|
||
|
serials[serial] = row
|
||
|
print(f"Number of serials: {len(serials)}")
|
||
|
|
||
|
def handler(event, context):
|
||
|
print(json.dumps(event))
|
||
|
fetch_es()
|