(feat): Insertion to mongoDB done

This commit is contained in:
2025-12-28 16:22:42 -01:00
parent 1bb945f7e6
commit 1e6551a2b1
5 changed files with 187 additions and 25 deletions

11
ev2.py
View File

@@ -1,5 +1,6 @@
import logging import logging
from utilsv2 import mongo
from utilsv2.log import logger from utilsv2.log import logger
from utilsv2.parser import parse from utilsv2.parser import parse
@@ -11,7 +12,15 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# initialization
logger.info("Started") logger.info("Started")
parse("dados.txt") cli = mongo.connect("mongodb://localhost:27017")
evs, stats = parse("dados.txt")
mongo.add_events(cli, "quakes", evs)
mongo.add_stations(cli, "stations", stats)
# cleanup
mongo.close(cli)
logger.info("Ended") logger.info("Ended")

View File

@@ -3,7 +3,7 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
logging.basicConfig( logging.basicConfig(
format="%(asctime)s %(name)s [%(levelname)s]: %(message)s", format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S", datefmt="%Y-%m-%d %H:%M:%S",
level=logging.INFO, level=logging.INFO,
filename="ev.log", filename="ev.log",

71
utilsv2/mongo.py Normal file
View File

@@ -0,0 +1,71 @@
import logging
from typing import Any
from pymongo import MongoClient
from pymongo.collection import Collection
from pymongo.errors import ConnectionFailure
try:
from utilsv2.log import logger
from utilsv2.parser import massage_magnitudes
except ModuleNotFoundError:
from log import logger
from parser import massage_magnitudes
logger = logging.getLogger(__name__)
def connect(uri) -> MongoClient:
try:
client = MongoClient(uri)
logger.info("Connected to the DB")
except ConnectionFailure as e:
logger.critical("Could not connect to the MongoDB")
raise e
return client
def add_events(
client: MongoClient, collection: str, data: list[dict[str, Any]]
) -> None:
db = client["main"]
coll = db[collection]
data = massage_magnitudes(data)
_res = coll.insert_many(data)
if _res.acknowledged:
logger.info(f"Added {len(_res.inserted_ids)} events.")
else:
logger.info("Could not add events to the database.")
def add_stations(
client: MongoClient, collection: str, data: list[dict[str, Any]]
) -> None:
db = client["main"]
coll = db[collection]
_res = coll.insert_many(data)
if _res.acknowledged:
logger.info(f"Added {len(_res.inserted_ids)} events.")
else:
logger.info("Could not add events to the database.")
def get_ids(collection: Collection) -> set[Any]:
return set(collection.distinct("ID"))
def close(client: MongoClient) -> None:
client.close()
logger.info("Closed the DB.")
if __name__ == "__main__":
v = connect("mongodb://localhost:27017")
close(v)

View File

@@ -10,7 +10,11 @@ from utilsv2.log import logger
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
type evtype = dict[str, Any]
type sttype = dict[str, Any]
# INFO: Don't think we really need this
class Mag: class Mag:
def __init__(self, mag: float, type: str, agency: str): def __init__(self, mag: float, type: str, agency: str):
self.mag = mag self.mag = mag
@@ -24,7 +28,7 @@ class Mag:
return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4)
def parse(event: list[str]) -> dict[str, Any]: def parse_event(event: list[str]) -> evtype:
# nordic must always have the first line a type 1 line # nordic must always have the first line a type 1 line
# but a type 1 line can have the id ommited if it's the first line # but a type 1 line can have the id ommited if it's the first line
# if event[0][-1] != "1" or event[0][-1] != " ": # if event[0][-1] != "1" or event[0][-1] != " ":
@@ -35,16 +39,52 @@ def parse(event: list[str]) -> dict[str, Any]:
for line in event: for line in event:
toParse[line[-1]].append(line) toParse[line[-1]].append(line)
_ret = {}
for k, v in toParse.items(): for k, v in toParse.items():
match k: match k:
case "1": case "1":
parse_type_1(v) aux = parse_type_1(v)
if aux:
_ret.update(aux)
case "3": case "3":
parse_type_3(v) _ret.update(parse_type_3(v))
case "6":
_ret.update(parse_type_6(v))
case "E":
_ret.update(parse_type_e(v))
case "I":
_ret.update(parse_type_i(v))
case _: case _:
pass pass
print(_ret)
return _ret
return {}
def parse_stations_V1(lines: list[str], event_id: int) -> sttype:
_ret = {"ID": event_id, "stations": {}}
for st in lines:
try:
ampl = float(st[35:40])
except ValueError:
ampl = None
station = st[1:6].strip()
if station not in _ret["stations"].keys():
_ret["stations"][station] = []
_ret["stations"][station].append(
{
"Component": st[6:9].strip(),
"I": None if st[9] == " " else st[9],
"Time": parse_dt(st[18:30], True).strftime("%H:%M:%S.%f%z"),
"Phase": st[10:15].strip(),
"Weigth": None if st[15] == " " else st[15],
"Amplitude": ampl,
}
)
return _ret
def parse_type_1(lines: list[str]) -> dict[str, Any] | None: def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
@@ -52,8 +92,8 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
for line in lines: for line in lines:
if "Date" not in line1.keys(): if "Date" not in line1.keys():
dt = parse_dt(line[:21]) dt = parse_dt(line[:21])
dist_ind = line[20] dist_ind = line[21]
event_id = line[21] event_id = line[22]
lat = float(line[24:31]) lat = float(line[24:31])
long = float(line[30:39]) long = float(line[30:39])
depth = float(line[38:44]) depth = float(line[38:44])
@@ -78,6 +118,7 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None:
def parse_type_3(lines: list[str]) -> dict[str, Any]: def parse_type_3(lines: list[str]) -> dict[str, Any]:
comments = {"Sentido": "", "Regiao": "", "VZ": None, "SZ": None, "FE": None} comments = {"Sentido": "", "Regiao": "", "VZ": None, "SZ": None, "FE": None}
for line in lines: for line in lines:
if line.startswith(" SENTIDO"): if line.startswith(" SENTIDO"):
aux = line[:-2].split(":", maxsplit=1) aux = line[:-2].split(":", maxsplit=1)
@@ -94,20 +135,31 @@ def parse_type_3(lines: list[str]) -> dict[str, Any]:
else: else:
comments["Regiao"] = item[1:] comments["Regiao"] = item[1:]
print(comments)
return comments return comments
def parse_type_6(): def parse_type_6(lines: list[str]) -> dict[str, list[str]]:
pass _ret = {"Wavename": []}
for line in lines:
_ret["Wavename"].append(line[:-2].strip())
return _ret
def parse_type_e(): def parse_type_e(lines: list[str]) -> dict[str, int]:
pass err = {}
for line in lines:
gap = int(line[5:8])
err["GAP"] = gap
return err
def parse_type_7(): def parse_type_i(lines: list[str]) -> dict[str, int]:
pass aux = {}
for line in lines:
aux["ID"] = int(line[60:75])
return aux
def parse_dt(_text: str, isStation=False) -> datetime | time: def parse_dt(_text: str, isStation=False) -> datetime | time:
@@ -125,9 +177,9 @@ def parse_dt(_text: str, isStation=False) -> datetime | time:
) )
return dt return dt
else: else:
h = int(_text[11:13]) h = int(_text[:2])
m = int(_text[13:15]) m = int(_text[2:4])
s_ms = int(float(_text[16:20]) * 1000) s_ms = int(float(_text[5:]) * 1000)
s = s_ms // 1000 s = s_ms // 1000
s_ms = s_ms % 1000 s_ms = s_ms % 1000
dt = time(hour=h, minute=m, second=s, microsecond=s_ms) dt = time(hour=h, minute=m, second=s, microsecond=s_ms)

View File

@@ -1,9 +1,10 @@
import logging import logging
from io import TextIOWrapper from io import TextIOWrapper
from typing import Any
from utilsv2 import utils from utilsv2 import utils
from utilsv2.log import logger from utilsv2.log import logger
from utilsv2.nordic import parse as n_parse from utilsv2.nordic import evtype, parse_event, parse_stations_V1, sttype
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -35,16 +36,43 @@ def find_events(fp: TextIOWrapper) -> list[tuple[int, int]]:
return event_indices return event_indices
def extract_event(fp: TextIOWrapper, event_bounds: list[tuple[int, int]]): def split_event(lines: list[str], start: int, end: int) -> int:
for idx in range(start, end):
if lines[idx].endswith("7"):
return idx
return -1
def extract_event(
fp: TextIOWrapper, event_bounds: list[tuple[int, int]]
) -> tuple[list[evtype], list[sttype]]:
lines = fp.read().split("\n") lines = fp.read().split("\n")
print(len(lines)) events, ev_stations = [], []
print(event_bounds)
for event_idx in event_bounds: for event_idx in event_bounds:
n_parse(lines[event_idx[0] : event_idx[1]]) stations = split_event(lines, event_idx[0], event_idx[1])
if stations == -1:
logger.error(f"Could not parse event at pos {event_idx}")
continue
ev = parse_event(lines[event_idx[0] : stations])
events.append(ev)
ev_stations.append(
parse_stations_V1(lines[stations + 1 : event_idx[1]], ev["ID"])
)
return events, ev_stations
def parse(fname: str) -> None: def massage_magnitudes(data: list[evtype]) -> list[evtype]:
data = data
for idx, ev in enumerate(data):
for jdx, mag in enumerate(ev["Magnitudes"]):
data[idx]["Magnitudes"][jdx] = mag.toJSON()
return data
def parse(fname: str):
_ret = read_file(fname) _ret = read_file(fname)
if not isinstance(_ret, TextIOWrapper): if not isinstance(_ret, TextIOWrapper):
logger.critical(_ret.__str__()) logger.critical(_ret.__str__())
@@ -52,7 +80,9 @@ def parse(fname: str) -> None:
events = find_events(_ret) events = find_events(_ret)
_ret.seek(0) _ret.seek(0)
extract_event(_ret, events) evs, stations = extract_event(_ret, events)
# cleanup # cleanup
_ret.close() _ret.close()
return evs, stations