(feat): Insertion to mongoDB done

This commit is contained in:
2025-12-28 16:22:42 -01:00
parent 1bb945f7e6
commit 1e6551a2b1
5 changed files with 187 additions and 25 deletions

11
ev2.py
View File

@@ -1,5 +1,6 @@
import logging
from utilsv2 import mongo
from utilsv2.log import logger
from utilsv2.parser import parse
@@ -11,7 +12,15 @@ def main():
if __name__ == "__main__":
logger = logging.getLogger(__name__)
# initialization
logger.info("Started")
parse("dados.txt")
cli = mongo.connect("mongodb://localhost:27017")
evs, stats = parse("dados.txt")
mongo.add_events(cli, "quakes", evs)
mongo.add_stations(cli, "stations", stats)
# cleanup
mongo.close(cli)
logger.info("Ended")

View File

@@ -3,7 +3,7 @@ import logging
logger = logging.getLogger(__name__)
logging.basicConfig(
format="%(asctime)s %(name)s [%(levelname)s]: %(message)s",
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO,
filename="ev.log",

71
utilsv2/mongo.py Normal file
View File

@@ -0,0 +1,71 @@
import logging
from typing import Any
from pymongo import MongoClient
from pymongo.collection import Collection
from pymongo.errors import ConnectionFailure
try:
from utilsv2.log import logger
from utilsv2.parser import massage_magnitudes
except ModuleNotFoundError:
from log import logger
from parser import massage_magnitudes
logger = logging.getLogger(__name__)
def connect(uri) -> MongoClient:
try:
client = MongoClient(uri)
logger.info("Connected to the DB")
except ConnectionFailure as e:
logger.critical("Could not connect to the MongoDB")
raise e
return client
def add_events(
client: MongoClient, collection: str, data: list[dict[str, Any]]
) -> None:
db = client["main"]
coll = db[collection]
data = massage_magnitudes(data)
_res = coll.insert_many(data)
if _res.acknowledged:
logger.info(f"Added {len(_res.inserted_ids)} events.")
else:
logger.info("Could not add events to the database.")
def add_stations(
client: MongoClient, collection: str, data: list[dict[str, Any]]
) -> None:
db = client["main"]
coll = db[collection]
_res = coll.insert_many(data)
if _res.acknowledged:
logger.info(f"Added {len(_res.inserted_ids)} events.")
else:
logger.info("Could not add events to the database.")
def get_ids(collection: Collection) -> set[Any]:
return set(collection.distinct("ID"))
def close(client: MongoClient) -> None:
client.close()
logger.info("Closed the DB.")
if __name__ == "__main__":
v = connect("mongodb://localhost:27017")
close(v)

View File

@@ -10,7 +10,11 @@ from utilsv2.log import logger
logger = logging.getLogger(__name__)
type evtype = dict[str, Any]
type sttype = dict[str, Any]
# INFO: Don't think we really need this
class Mag:
def __init__(self, mag: float, type: str, agency: str):
self.mag = mag
@@ -24,7 +28,7 @@ class Mag:
return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
def parse(event: list[str]) -> dict[str, Any]:
def parse_event(event: list[str]) -> evtype:
# nordic must always have the first line a type 1 line
# but a type 1 line can have the id ommited if it's the first line
# if event[0][-1] != "1" or event[0][-1] != " ":
@@ -35,16 +39,52 @@ def parse(event: list[str]) -> dict[str, Any]:
for line in event:
toParse[line[-1]].append(line)
_ret = {}
for k, v in toParse.items():
match k:
case "1":
parse_type_1(v)
aux = parse_type_1(v)
if aux:
_ret.update(aux)
case "3":
parse_type_3(v)
_ret.update(parse_type_3(v))
case "6":
_ret.update(parse_type_6(v))
case "E":
_ret.update(parse_type_e(v))
case "I":
_ret.update(parse_type_i(v))
case _:
pass
print(_ret)
return _ret
return {}
def parse_stations_V1(lines: list[str], event_id: int) -> sttype:
_ret = {"ID": event_id, "stations": {}}
for st in lines:
try:
ampl = float(st[35:40])
except ValueError:
ampl = None
station = st[1:6].strip()
if station not in _ret["stations"].keys():
_ret["stations"][station] = []
_ret["stations"][station].append(
{
"Component": st[6:9].strip(),
"I": None if st[9] == " " else st[9],
"Time": parse_dt(st[18:30], True).strftime("%H:%M:%S.%f%z"),
"Phase": st[10:15].strip(),
"Weigth": None if st[15] == " " else st[15],
"Amplitude": ampl,
}
)
return _ret
def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
@@ -52,8 +92,8 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
for line in lines:
if "Date" not in line1.keys():
dt = parse_dt(line[:21])
dist_ind = line[20]
event_id = line[21]
dist_ind = line[21]
event_id = line[22]
lat = float(line[24:31])
long = float(line[30:39])
depth = float(line[38:44])
@@ -78,6 +118,7 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
def parse_type_3(lines: list[str]) -> dict[str, Any]:
comments = {"Sentido": "", "Regiao": "", "VZ": None, "SZ": None, "FE": None}
for line in lines:
if line.startswith(" SENTIDO"):
aux = line[:-2].split(":", maxsplit=1)
@@ -94,20 +135,31 @@ def parse_type_3(lines: list[str]) -> dict[str, Any]:
else:
comments["Regiao"] = item[1:]
print(comments)
return comments
def parse_type_6():
pass
def parse_type_6(lines: list[str]) -> dict[str, list[str]]:
_ret = {"Wavename": []}
for line in lines:
_ret["Wavename"].append(line[:-2].strip())
return _ret
def parse_type_e():
pass
def parse_type_e(lines: list[str]) -> dict[str, int]:
err = {}
for line in lines:
gap = int(line[5:8])
err["GAP"] = gap
return err
def parse_type_7():
pass
def parse_type_i(lines: list[str]) -> dict[str, int]:
aux = {}
for line in lines:
aux["ID"] = int(line[60:75])
return aux
def parse_dt(_text: str, isStation=False) -> datetime | time:
@@ -125,9 +177,9 @@ def parse_dt(_text: str, isStation=False) -> datetime | time:
)
return dt
else:
h = int(_text[11:13])
m = int(_text[13:15])
s_ms = int(float(_text[16:20]) * 1000)
h = int(_text[:2])
m = int(_text[2:4])
s_ms = int(float(_text[5:]) * 1000)
s = s_ms // 1000
s_ms = s_ms % 1000
dt = time(hour=h, minute=m, second=s, microsecond=s_ms)

View File

@@ -1,9 +1,10 @@
import logging
from io import TextIOWrapper
from typing import Any
from utilsv2 import utils
from utilsv2.log import logger
from utilsv2.nordic import parse as n_parse
from utilsv2.nordic import evtype, parse_event, parse_stations_V1, sttype
logger = logging.getLogger(__name__)
@@ -35,16 +36,43 @@ def find_events(fp: TextIOWrapper) -> list[tuple[int, int]]:
return event_indices
def extract_event(fp: TextIOWrapper, event_bounds: list[tuple[int, int]]):
def split_event(lines: list[str], start: int, end: int) -> int:
for idx in range(start, end):
if lines[idx].endswith("7"):
return idx
return -1
def extract_event(
fp: TextIOWrapper, event_bounds: list[tuple[int, int]]
) -> tuple[list[evtype], list[sttype]]:
lines = fp.read().split("\n")
print(len(lines))
print(event_bounds)
events, ev_stations = [], []
for event_idx in event_bounds:
n_parse(lines[event_idx[0] : event_idx[1]])
stations = split_event(lines, event_idx[0], event_idx[1])
if stations == -1:
logger.error(f"Could not parse event at pos {event_idx}")
continue
ev = parse_event(lines[event_idx[0] : stations])
events.append(ev)
ev_stations.append(
parse_stations_V1(lines[stations + 1 : event_idx[1]], ev["ID"])
)
return events, ev_stations
def parse(fname: str) -> None:
def massage_magnitudes(data: list[evtype]) -> list[evtype]:
data = data
for idx, ev in enumerate(data):
for jdx, mag in enumerate(ev["Magnitudes"]):
data[idx]["Magnitudes"][jdx] = mag.toJSON()
return data
def parse(fname: str):
_ret = read_file(fname)
if not isinstance(_ret, TextIOWrapper):
logger.critical(_ret.__str__())
@@ -52,7 +80,9 @@ def parse(fname: str) -> None:
events = find_events(_ret)
_ret.seek(0)
extract_event(_ret, events)
evs, stations = extract_event(_ret, events)
# cleanup
_ret.close()
return evs, stations