diff --git a/ev2.py b/ev2.py index ad80e58..6501387 100644 --- a/ev2.py +++ b/ev2.py @@ -1,5 +1,6 @@ import logging +from utilsv2 import mongo from utilsv2.log import logger from utilsv2.parser import parse @@ -11,7 +12,15 @@ def main(): if __name__ == "__main__": logger = logging.getLogger(__name__) + # initialization logger.info("Started") - parse("dados.txt") + cli = mongo.connect("mongodb://localhost:27017") + evs, stats = parse("dados.txt") + + mongo.add_events(cli, "quakes", evs) + mongo.add_stations(cli, "stations", stats) + + # cleanup + mongo.close(cli) logger.info("Ended") diff --git a/utilsv2/log.py b/utilsv2/log.py index 7555215..bb14b8b 100644 --- a/utilsv2/log.py +++ b/utilsv2/log.py @@ -3,7 +3,7 @@ import logging logger = logging.getLogger(__name__) logging.basicConfig( - format="%(asctime)s %(name)s [%(levelname)s]: %(message)s", + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", datefmt="%Y-%m-%d %H:%M:%S", level=logging.INFO, filename="ev.log", diff --git a/utilsv2/mongo.py b/utilsv2/mongo.py new file mode 100644 index 0000000..7aa0308 --- /dev/null +++ b/utilsv2/mongo.py @@ -0,0 +1,71 @@ +import logging +from typing import Any + +from pymongo import MongoClient +from pymongo.collection import Collection +from pymongo.errors import ConnectionFailure + +try: + from utilsv2.log import logger + from utilsv2.parser import massage_magnitudes +except ModuleNotFoundError: + from log import logger + from parser import massage_magnitudes + +logger = logging.getLogger(__name__) + + +def connect(uri) -> MongoClient: + try: + client = MongoClient(uri) + logger.info("Connected to the DB") + except ConnectionFailure as e: + logger.critical("Could not connect to the MongoDB") + raise e + + return client + + +def add_events( + client: MongoClient, collection: str, data: list[dict[str, Any]] +) -> None: + db = client["main"] + coll = db[collection] + + data = massage_magnitudes(data) + + _res = coll.insert_many(data) + + if _res.acknowledged: + logger.info(f"Added {len(_res.inserted_ids)} events.") + else: + logger.info("Could not add events to the database.") + + +def add_stations( + client: MongoClient, collection: str, data: list[dict[str, Any]] +) -> None: + db = client["main"] + coll = db[collection] + + _res = coll.insert_many(data) + + if _res.acknowledged: + logger.info(f"Added {len(_res.inserted_ids)} events.") + else: + logger.info("Could not add events to the database.") + + +def get_ids(collection: Collection) -> set[Any]: + return set(collection.distinct("ID")) + + +def close(client: MongoClient) -> None: + client.close() + logger.info("Closed the DB.") + + +if __name__ == "__main__": + v = connect("mongodb://localhost:27017") + + close(v) diff --git a/utilsv2/nordic.py b/utilsv2/nordic.py index 8d9b8c4..38ca6fc 100644 --- a/utilsv2/nordic.py +++ b/utilsv2/nordic.py @@ -10,7 +10,11 @@ from utilsv2.log import logger logger = logging.getLogger(__name__) +type evtype = dict[str, Any] +type sttype = dict[str, Any] + +# INFO: Don't think we really need this class Mag: def __init__(self, mag: float, type: str, agency: str): self.mag = mag @@ -24,7 +28,7 @@ class Mag: return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) -def parse(event: list[str]) -> dict[str, Any]: +def parse_event(event: list[str]) -> evtype: # nordic must always have the first line a type 1 line # but a type 1 line can have the id ommited if it's the first line # if event[0][-1] != "1" or event[0][-1] != " ": @@ -35,16 +39,52 @@ def parse(event: list[str]) -> dict[str, Any]: for line in event: toParse[line[-1]].append(line) + _ret = {} + for k, v in toParse.items(): match k: case "1": - parse_type_1(v) + aux = parse_type_1(v) + if aux: + _ret.update(aux) case "3": - parse_type_3(v) + _ret.update(parse_type_3(v)) + case "6": + _ret.update(parse_type_6(v)) + case "E": + _ret.update(parse_type_e(v)) + case "I": + _ret.update(parse_type_i(v)) case _: pass + print(_ret) + return _ret - return {} + +def parse_stations_V1(lines: list[str], event_id: int) -> sttype: + _ret = {"ID": event_id, "stations": {}} + for st in lines: + try: + ampl = float(st[35:40]) + except ValueError: + ampl = None + + station = st[1:6].strip() + if station not in _ret["stations"].keys(): + _ret["stations"][station] = [] + + _ret["stations"][station].append( + { + "Component": st[6:9].strip(), + "I": None if st[9] == " " else st[9], + "Time": parse_dt(st[18:30], True).strftime("%H:%M:%S.%f%z"), + "Phase": st[10:15].strip(), + "Weigth": None if st[15] == " " else st[15], + "Amplitude": ampl, + } + ) + + return _ret def parse_type_1(lines: list[str]) -> dict[str, Any] | None: @@ -52,8 +92,8 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None: for line in lines: if "Date" not in line1.keys(): dt = parse_dt(line[:21]) - dist_ind = line[20] - event_id = line[21] + dist_ind = line[21] + event_id = line[22] lat = float(line[24:31]) long = float(line[30:39]) depth = float(line[38:44]) @@ -78,6 +118,7 @@ def parse_type_1(lines: list[str]) -> dict[str, Any] | None: def parse_type_3(lines: list[str]) -> dict[str, Any]: comments = {"Sentido": "", "Regiao": "", "VZ": None, "SZ": None, "FE": None} + for line in lines: if line.startswith(" SENTIDO"): aux = line[:-2].split(":", maxsplit=1) @@ -94,20 +135,31 @@ def parse_type_3(lines: list[str]) -> dict[str, Any]: else: comments["Regiao"] = item[1:] - print(comments) return comments -def parse_type_6(): - pass +def parse_type_6(lines: list[str]) -> dict[str, list[str]]: + _ret = {"Wavename": []} + for line in lines: + _ret["Wavename"].append(line[:-2].strip()) + return _ret -def parse_type_e(): - pass +def parse_type_e(lines: list[str]) -> dict[str, int]: + err = {} + for line in lines: + gap = int(line[5:8]) + err["GAP"] = gap + + return err -def parse_type_7(): - pass +def parse_type_i(lines: list[str]) -> dict[str, int]: + aux = {} + for line in lines: + aux["ID"] = int(line[60:75]) + + return aux def parse_dt(_text: str, isStation=False) -> datetime | time: @@ -125,9 +177,9 @@ def parse_dt(_text: str, isStation=False) -> datetime | time: ) return dt else: - h = int(_text[11:13]) - m = int(_text[13:15]) - s_ms = int(float(_text[16:20]) * 1000) + h = int(_text[:2]) + m = int(_text[2:4]) + s_ms = int(float(_text[5:]) * 1000) s = s_ms // 1000 s_ms = s_ms % 1000 dt = time(hour=h, minute=m, second=s, microsecond=s_ms) diff --git a/utilsv2/parser.py b/utilsv2/parser.py index 6938679..d13bb3e 100644 --- a/utilsv2/parser.py +++ b/utilsv2/parser.py @@ -1,9 +1,10 @@ import logging from io import TextIOWrapper +from typing import Any from utilsv2 import utils from utilsv2.log import logger -from utilsv2.nordic import parse as n_parse +from utilsv2.nordic import evtype, parse_event, parse_stations_V1, sttype logger = logging.getLogger(__name__) @@ -35,16 +36,43 @@ def find_events(fp: TextIOWrapper) -> list[tuple[int, int]]: return event_indices -def extract_event(fp: TextIOWrapper, event_bounds: list[tuple[int, int]]): +def split_event(lines: list[str], start: int, end: int) -> int: + for idx in range(start, end): + if lines[idx].endswith("7"): + return idx + return -1 + + +def extract_event( + fp: TextIOWrapper, event_bounds: list[tuple[int, int]] +) -> tuple[list[evtype], list[sttype]]: lines = fp.read().split("\n") - print(len(lines)) - print(event_bounds) + events, ev_stations = [], [] for event_idx in event_bounds: - n_parse(lines[event_idx[0] : event_idx[1]]) + stations = split_event(lines, event_idx[0], event_idx[1]) + if stations == -1: + logger.error(f"Could not parse event at pos {event_idx}") + continue + ev = parse_event(lines[event_idx[0] : stations]) + events.append(ev) + ev_stations.append( + parse_stations_V1(lines[stations + 1 : event_idx[1]], ev["ID"]) + ) + + return events, ev_stations -def parse(fname: str) -> None: +def massage_magnitudes(data: list[evtype]) -> list[evtype]: + data = data + for idx, ev in enumerate(data): + for jdx, mag in enumerate(ev["Magnitudes"]): + data[idx]["Magnitudes"][jdx] = mag.toJSON() + + return data + + +def parse(fname: str): _ret = read_file(fname) if not isinstance(_ret, TextIOWrapper): logger.critical(_ret.__str__()) @@ -52,7 +80,9 @@ def parse(fname: str) -> None: events = find_events(_ret) _ret.seek(0) - extract_event(_ret, events) + evs, stations = extract_event(_ret, events) # cleanup _ret.close() + + return evs, stations