graficos, estatitsticas e filtros

This commit is contained in:
2025-12-11 14:18:03 -01:00
parent 490c88085a
commit 14dee58ab2
5 changed files with 286 additions and 69 deletions

View File

@@ -13,7 +13,26 @@ First, let's represent the data using Python's Pandas module and implement CRUD
- T2 - Implement CRUD operations through a text menu; - T2 - Implement CRUD operations through a text menu;
- T3 - Implement statistical operations such as: average, variance, standard desviation, max, min, mode; through a text menu; - T3 - Implement statistical operations such as: average, variance, standard desviation, max, min, mode; through a text menu;
- T4 - Convert from Pandas to JSON and save it in a text file; - T4 - Convert from Pandas to JSON and save it in a text file;
- T5 - to be continued ... - T5 - Calcular as seguintes estatísticas:
- Número de eventos por dia e por mês.
- Média e desvio padrão da profundidade e da magnitude por mês.
- Mediana, 1º quartil e 3º quartil da profundidade e da magnitude por mês.
- Máximo e mínimo a profundidade e da magnitude por mês.
- T6 - Para a representação gráfica:
- Um gráfico de barras com o numero de eventos por dia.
- Um gráfico de barras com o numero de eventos por mês.
- Um gráfico linear com a média +/- o desvio padrão das profundidades por mês.
- Um gráfico linear com a média +/- a desvio padrão da magnitude L por mês.
- Um gráfico tipo "boxplot" com as profundidades por mês.
- Um gráfico tipo "boxplot" com as magnitudes L por mês.
- T7 - Implementar os filtros de seleção de eventos para o cálculo / representação gráfica:
- Período temporal (Data inicial, Data final).
- Eventos com GAP menor que um determinado valor.
- Qualidade (EPI ou Todos).
- Zonas SZ.
- Zonas VZ.
- Limitar por Magnitudes L (mínimo, máximo).
- Limitar Profundidades (mínimo, máximo).
## Prazos ## Prazos
- T1 a T4 -> 10 de novembro - T1 a T4 -> 10 de novembro

View File

@@ -1,6 +1,5 @@
# pyright: basic # pyright: basic
import io import io
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
@@ -15,36 +14,41 @@ TYPE = {"Q": "Quake", "V": "Volcanic", "U": "Unknown", "E": "Explosion"}
def is_blank(l: str) -> bool: def is_blank(l: str) -> bool:
return len(l.strip(" ")) == 0 return len(l.strip(" ")) == 0
def parse_flt(v:str) -> float | None:
def parse_flt(v: str) -> float | None:
try: try:
t = float(v) t = float(v)
return t return t
except ValueError: except ValueError:
return None return None
def parse_int(v:str) -> int | None:
def parse_int(v: str) -> int | None:
try: try:
t = int(v) t = int(v)
return t return t
except ValueError: except ValueError:
return None return None
def into_dataframe(data) -> pd.DataFrame: def into_dataframe(data) -> pd.DataFrame:
if len(data) == 0: if len(data) == 0:
return pd.DataFrame() return pd.DataFrame()
aux = {k: [] for k in data.keys()} aux = {k: [] for k in data.keys()}
for (k,v) in data.items(): for k, v in data.items():
aux[k].append(v) aux[k].append(v)
return pd.DataFrame(data=aux) return pd.DataFrame(data=aux)
def _concat(preamble, df: pd.DataFrame): def _concat(preamble, df: pd.DataFrame):
for (k,v) in preamble.items(): for k, v in preamble.items():
df.insert(len(df.columns)-1, k, [v for _ in range(len(df))]) df.insert(len(df.columns) - 1, k, [v for _ in range(len(df))])
return df return df
def validate_no_stations(expected:int , stationsDF:pd.DataFrame) -> bool:
def validate_no_stations(expected: int, stationsDF: pd.DataFrame) -> bool:
uniqueStations = stationsDF["Estacao"].nunique() uniqueStations = stationsDF["Estacao"].nunique()
return expected == uniqueStations return expected == uniqueStations
@@ -55,29 +59,31 @@ def parse(fname):
data = [l for l in fp.read().split("\n")] data = [l for l in fp.read().split("\n")]
chunks = boundaries(data) chunks = boundaries(data)
df = pd.DataFrame() df = pd.DataFrame()
for (idx,c) in enumerate(chunks): for idx, c in enumerate(chunks):
a = parse_chunk(data[c[0]:c[1]]) a = parse_chunk(data[c[0] : c[1]])
aux = pd.concat([df, a], axis=0, ignore_index=True) aux = pd.concat([df, a], axis=0, ignore_index=True)
df = aux df = aux
fp.close() fp.close()
return df return df
def boundaries(data: list[str]): def boundaries(data: list[str]):
boundaries = [] boundaries = []
start = None start = None
for (idx,l) in enumerate(data): for idx, l in enumerate(data):
if start is None: if start is None:
if not is_blank(l): if not is_blank(l):
start = idx start = idx
else: else:
if is_blank(l): if is_blank(l):
boundaries.append((start,idx)) boundaries.append((start, idx))
start = None start = None
return boundaries return boundaries
def parse_chunk(chunk_lines: list[str]): def parse_chunk(chunk_lines: list[str]):
hIdx = None hIdx = None
for (idx, l) in enumerate(chunk_lines): for idx, l in enumerate(chunk_lines):
if l[-1] == "7": if l[-1] == "7":
hIdx = idx hIdx = idx
break break
@@ -89,6 +95,7 @@ def parse_chunk(chunk_lines: list[str]):
return _concat(preambleRet, phaseRet) return _concat(preambleRet, phaseRet)
def _parse_preamble(hLines: list[str]): def _parse_preamble(hLines: list[str]):
aux = defaultdict(list) aux = defaultdict(list)
@@ -111,7 +118,7 @@ def _parse_preamble(hLines: list[str]):
pass pass
headerDict = dict() headerDict = dict()
for (k,v) in aux.items(): for k, v in aux.items():
if len(v) != 0: if len(v) != 0:
headerDict.update(FUNCS[k](v)) headerDict.update(FUNCS[k](v))
return headerDict return headerDict
@@ -126,7 +133,7 @@ def _parse_type_1(data: list[str]):
m = int(aux[13:15]) m = int(aux[13:15])
s = int(aux[16:18]) s = int(aux[16:18])
mil = int(aux[19]) * 10**5 mil = int(aux[19]) * 10**5
dt = datetime(y,mo,d,h,m,s,mil) dt = datetime(y, mo, d, h, m, s, mil)
dist_ind = DIST_IND[aux[21]] dist_ind = DIST_IND[aux[21]]
ev_type = TYPE[aux[22]] ev_type = TYPE[aux[22]]
@@ -135,18 +142,28 @@ def _parse_type_1(data: list[str]):
depth = float(aux[38:43]) depth = float(aux[38:43])
no_stat = int(aux[48:51]) no_stat = int(aux[48:51])
hypo = {"Data": dt.isoformat(), "Distancia": dist_ind, "Tipo Evento": ev_type, "Latitude": lat, "Longitude": long, "Profundidade": depth, "Estacoes": no_stat, "Magnitudes": list()} hypo = {
"Data": dt.isoformat(),
"Distancia": dist_ind,
"Tipo Evento": ev_type,
"Latitude": lat,
"Longitude": long,
"Profundidade": depth,
"Estacoes": no_stat,
"Magnitudes": list(),
}
for l in data: for l in data:
hypo["Magnitudes"] = hypo["Magnitudes"] + _parse_mag(l) hypo["Magnitudes"] = hypo["Magnitudes"] + _parse_mag(l)
return hypo return hypo
def _parse_mag(line: str): def _parse_mag(line: str):
magnitudes = [] magnitudes = []
base = 55 base = 55
while base < 79: while base < 79:
m = line[base:base+4] m = line[base : base + 4]
mt = line[base+4] mt = line[base + 4]
if not is_blank(m): if not is_blank(m):
magnitudes.append({"Magnitude": m, "Tipo": mt}) magnitudes.append({"Magnitude": m, "Tipo": mt})
base += 8 base += 8
@@ -156,11 +173,24 @@ def _parse_mag(line: str):
def _parse_type_3(data: list[str]): def _parse_type_3(data: list[str]):
comments = {} comments = {}
for line in data: for line in data:
if line.startswith(" SENTIDO") or line.startswith(" REGIAO"): if line.startswith(" SENTIDO"):
c, v = line[:-2].strip().split(": ", maxsplit=1) c, v = line[:-2].strip().split(": ", maxsplit=1)
v = v.split(",")[0] v = v.split(",")[0]
comments[c.capitalize()] = v comments[c.capitalize()] = v
elif line.startswith(" REGIAO"):
c, vals = line[:-2].strip().split(": ", maxsplit=1)
_d = {}
for v in vals.split(","):
if v.startswith("SZ"):
comments["SZ"] = int(v[2:])
elif v.startswith("VZ"):
comments["VZ"] = int(v[2:])
elif v.startswith("FE"):
comments["FZ"] = v[2:]
else:
comments["Regiao"] = v
return comments return comments
@@ -173,21 +203,59 @@ def _parse_type_6(data: list[str]):
def _parse_type_7(data: list[str]): def _parse_type_7(data: list[str]):
aux = io.StringIO("\n".join(data)) aux = io.StringIO("\n".join(data))
dados = pd.read_fwf(aux, colspecs=[(1,5), (6,8),(10,15), (18,20), (20,22), (23,28), (34,38), (71,75)]) dados = pd.read_fwf(
dados.rename(columns={'STAT': "Estacao", 'SP': "Componente" , 'PHASW': "Tipo Onda", 'HR': "Hora", 'MM': "Min", 'SECON': "Seg", 'AMPL': "Amplitude", " DIST": "Distancia Epicentro"}, inplace=True) aux,
colspecs=[
(1, 5),
(6, 8),
(10, 15),
(18, 20),
(20, 22),
(23, 28),
(34, 38),
(71, 75),
],
)
dados.rename(
columns={
"STAT": "Estacao",
"SP": "Componente",
"PHASW": "Tipo Onda",
"HR": "Hora",
"MM": "Min",
"SECON": "Seg",
"AMPL": "Amplitude",
" DIST": "Distancia Epicentro",
},
inplace=True,
)
return dados return dados
def _parse_type_e(data: list[str]): def _parse_type_e(data: list[str]):
aux = data[0] aux = data[0]
error = {"Gap": int(aux[5:8]), "Origin": float(aux[14:20]), "Error_lat": float(aux[24:30]), "Error_long": float(aux[32:38]), "Error_depth": float(aux[38:43]), "Cov_xy": float(aux[43:55]), "Cov_xz": float(aux[55:67]), "Cov_yz": float(aux[67:79])} error = {
"Gap": int(aux[5:8]),
"Origin": float(aux[14:20]),
"Error_lat": float(aux[24:30]),
"Error_long": float(aux[32:38]),
"Error_depth": float(aux[38:43]),
"Cov_xy": float(aux[43:55]),
"Cov_xz": float(aux[55:67]),
"Cov_yz": float(aux[67:79]),
}
return error return error
def _parse_type_i(data: list[str]): def _parse_type_i(data: list[str]):
aux = data[0] aux = data[0]
return {"ID":int(aux[60:74])} return {"ID": int(aux[60:74])}
FUNCS = {1: _parse_type_1, 3: _parse_type_3, 6: _parse_type_6, "E": _parse_type_e, "I": _parse_type_i} FUNCS = {
1: _parse_type_1,
3: _parse_type_3,
6: _parse_type_6,
"E": _parse_type_e,
"I": _parse_type_i,
}

View File

@@ -1,6 +1,8 @@
import collections import collections
import datetime import datetime
import numpy as np
import stats
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
@@ -13,7 +15,7 @@ class Plotter:
pass pass
def plot_events_day(self): def plot_events_day(self):
values = collections.Counter(self._preprare_days()) values = collections.Counter(stats._preprare_days(self.raw_data))
x = list(values.keys()) x = list(values.keys())
y = list(values.values()) y = list(values.values())
@@ -23,7 +25,7 @@ class Plotter:
plt.show() plt.show()
def plot_events_month(self): def plot_events_month(self):
values = collections.Counter(self._preprare_months()) values = collections.Counter(stats._preprare_months(self.raw_data))
x = list(values.keys()) x = list(values.keys())
y = list(values.values()) y = list(values.values())
@@ -32,26 +34,6 @@ class Plotter:
ax.bar(x, y) ax.bar(x, y)
plt.show() plt.show()
def _preprare_days(self):
c = self.raw_data.Data.to_list()
for idx, d in enumerate(c):
aux = datetime.datetime.fromisoformat(d)
c[idx] = datetime.datetime.strftime(aux, "%Y-%m-%d")
return c
def _preprare_months(self):
c = self.raw_data.Data.to_list()
for idx, d in enumerate(c):
aux = datetime.datetime.fromisoformat(d)
c[idx] = datetime.datetime.strftime(aux, "%Y-%m")
return c
def _prepare_mags(self):
pass
# c = self.raw_data.
if __name__ == "__main__": if __name__ == "__main__":
import parser import parser
@@ -59,4 +41,10 @@ if __name__ == "__main__":
asdf = parser.parse("../dados.txt") asdf = parser.parse("../dados.txt")
a = Plotter(asdf) a = Plotter(asdf)
print(a.raw_data.dtypes) # b = stats._filter_mags(a.raw_data, more_than=2.5, less_than=2.9)
c = stats.filter_date(
a.raw_data,
after=datetime.datetime(year=2014, month=1, day=6),
before=datetime.datetime(year=2014, month=1, day=12),
)
print(c)

View File

@@ -1,12 +1,14 @@
# pyright: basic # pyright: basic
import datetime
import os import os
import sys import sys
import pandas as pd
import numpy as np import numpy as np
import pandas as pd
import utils
STAT_HEADER ="""=== Terramotos === STAT_HEADER = """=== Terramotos ===
== Estatísticas == == Estatísticas ==
""" """
@@ -26,7 +28,7 @@ FILTER_CHOICES = """[1] Magnitudes
""" """
CHOICE = {"1": "Magnitudes", "2": "Distancia","3": "Prof"} CHOICE = {"1": "Magnitudes", "2": "Distancia", "3": "Prof"}
def filter_submenu(type: str): def filter_submenu(type: str):
@@ -124,7 +126,7 @@ def stat_menu(df: pd.DataFrame):
def average(df: pd.DataFrame, filter_by): def average(df: pd.DataFrame, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -136,7 +138,7 @@ def average(df: pd.DataFrame, filter_by):
def variance(df, filter_by): def variance(df, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -149,7 +151,7 @@ def variance(df, filter_by):
def std_dev(df, filter_by): def std_dev(df, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -162,7 +164,7 @@ def std_dev(df, filter_by):
def max_v(df, filter_by): def max_v(df, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -172,7 +174,7 @@ def max_v(df, filter_by):
def min_v(df, filter_by): def min_v(df, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -182,7 +184,7 @@ def min_v(df, filter_by):
def moda(df, filter_by): def moda(df, filter_by):
events = df.drop_duplicates(subset="ID", keep='first') events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy() values = events[filter_by].to_numpy()
if filter_by == "Magnitudes": if filter_by == "Magnitudes":
@@ -191,7 +193,7 @@ def moda(df, filter_by):
uniques, count = np.unique(values, return_counts=True) uniques, count = np.unique(values, return_counts=True)
uniques_list = list(zip(uniques, count)) uniques_list = list(zip(uniques, count))
return sorted(uniques_list, reverse=True ,key=lambda x: x[1])[0][0] return sorted(uniques_list, reverse=True, key=lambda x: x[1])[0][0]
def _unpack_mags(arr: np.ndarray): def _unpack_mags(arr: np.ndarray):
@@ -201,3 +203,128 @@ def _unpack_mags(arr: np.ndarray):
newVals = np.append(newVals, float(m["Magnitude"])) newVals = np.append(newVals, float(m["Magnitude"]))
return newVals return newVals
def filter_mags(data, more_than=None, less_than=None):
"""Filters by magnitudes a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:param more_than(optional): Filter for magnitudes above threshold
:param after(optional): Filters for dates after set date
:returns: Returns a filtered pandas DataFrame
"""
v = data.drop_duplicates(subset="ID", keep="first")
_dict = {"Data": [], "MagL": []}
for idx, c in v.iterrows():
_dict["Data"].append(str(c.Data))
_dict["MagL"].append(utils.extract_mag_l(c.Magnitudes))
_df = pd.DataFrame.from_dict(_dict)
if more_than:
_df = _df[_df["MagL"] >= more_than]
if less_than:
_df = _df[_df["MagL"] <= less_than]
return _df
def filter_date(
data: pd.DataFrame,
before: datetime.datetime | None = None,
after: datetime.datetime | None = None,
) -> pd.DataFrame:
"""Filters by date a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:param before(optional): Filter for dates before set date
:param after(optional): Filters for dates after set date
:returns: Returns a filtered pandas DataFrame
"""
v = data
for idx, c in v.iterrows():
v.at[idx, "Data"] = datetime.datetime.fromisoformat(c.Data)
if after:
v = v[v["Data"] >= after]
if before:
v = v[v["Data"] >= before]
return v
def filter_depth(
data: pd.DataFrame,
less_than: float | None = None,
more_than: float | None = None,
) -> pd.DataFrame:
"""Filters by the depth a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:param less_than(optional): Filter for depths below the threshold
:param after(optional): Filters for depths deeper than threshold
:returns: Returns a filtered pandas DataFrame
"""
v = data.drop_duplicates(subset="ID", keep="first")
if more_than:
v = v[v["Profundidade"] >= more_than]
if less_than:
v = v[v["Profundidade"] >= less_than]
return v
def filter_gap(
data: pd.DataFrame,
threshold: int,
) -> pd.DataFrame:
"""Filters by the depth a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:param threshold: Filter for GAPS below the threshold
:returns: Returns a filtered pandas DataFrame
"""
v = data.drop_duplicates(subset="ID", keep="first")
v = v[v["Gap"] <= threshold]
return v
def filter_sz(
data: pd.DataFrame,
) -> pd.DataFrame:
"""Filters by SZ plane a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:returns: Returns a filtered pandas DataFrame
"""
v = data[data["SZ"].notna()]
return v
def filter_vz(
data: pd.DataFrame,
) -> pd.DataFrame:
"""Filters by VZ plane a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame
:returns: Returns a filtered pandas DataFrame
"""
v = data[data["VZ"].notna()]
return v
def _preprare_days(data):
c = data.Data.to_list()
for idx, d in enumerate(c):
aux = datetime.datetime.fromisoformat(d)
c[idx] = datetime.datetime.strftime(aux, "%Y-%m-%d")
return c
def _preprare_months(data):
c = data.Data.to_list()
for idx, d in enumerate(c):
aux = datetime.datetime.fromisoformat(d)
c[idx] = datetime.datetime.strftime(aux, "%Y-%m")
return c

View File

@@ -1,20 +1,30 @@
#! /usr/bin/env python #! /usr/bin/env python
# pyright: basic # pyright: basic
from datetime import time
import json import json
from datetime import time
from math import modf from math import modf
from typing import Any from typing import Any
import numpy as np
import pandas as pd import pandas as pd
def extract_mag_l(data) -> np.float64:
for v in data:
if v["Tipo"] == "L":
return np.float64(v["Magnitude"])
return np.float64(0.0)
def save_as_json(info: dict[str, Any]) -> bool: def save_as_json(info: dict[str, Any]) -> bool:
with open("test.json", "w") as fp: with open("test.json", "w") as fp:
json.dump(info, fp) json.dump(info, fp)
return True return True
# TODO: passar os nomes das colunas, para não haver problemas no futuro, caso se altere os nomes da dataframe # TODO: passar os nomes das colunas, para não haver problemas no futuro, caso se altere os nomes da dataframe
def create_dict_struct(df: pd.DataFrame, event_cols, station_cols) -> dict[str, Any]: def create_dict_struct(df: pd.DataFrame, event_cols, station_cols) -> dict[str, Any]:
# get all events by their id # get all events by their id
@@ -51,15 +61,20 @@ def create_stations_info_1(info: pd.DataFrame) -> dict[str, Any]:
aux = info.iloc[idx] aux = info.iloc[idx]
micro, sec = tuple(map(int, modf(aux["Seg"]))) micro, sec = tuple(map(int, modf(aux["Seg"])))
hms = time(hour=aux["Hora"],minute=aux["Min"], second=sec, microsecond=micro).strftime("%H:%M:%S.%f") hms = time(
station = {"Componente": aux["Componente"], "Hora": hms, "Distancia": float(aux["DIS"])} hour=aux["Hora"], minute=aux["Min"], second=sec, microsecond=micro
).strftime("%H:%M:%S.%f")
station = {
"Componente": aux["Componente"],
"Hora": hms,
"Distancia": float(aux["DIS"]),
}
if type(aux["Tipo Onda"]) != float: if type(aux["Tipo Onda"]) != float:
station.update({"Tipo Onda": aux["Tipo Onda"]}) station.update({"Tipo Onda": aux["Tipo Onda"]})
if aux["Tipo Onda"] == "IAML": if aux["Tipo Onda"] == "IAML":
station.update({"Amplitude": float(aux["Amplitude"])}) station.update({"Amplitude": float(aux["Amplitude"])})
if aux["Estacao"] not in stationsDict.keys(): if aux["Estacao"] not in stationsDict.keys():
stationsDict[aux["Estacao"]] = [station] stationsDict[aux["Estacao"]] = [station]
else: else:
@@ -74,7 +89,7 @@ def create_mag_info(magnitudes):
return mags return mags
if __name__ == '__main__': if __name__ == "__main__":
import parser import parser
df = parser.parse("dados.txt") df = parser.parse("dados.txt")