mais coisas de estatistica

This commit is contained in:
2025-12-11 15:25:51 -01:00
parent 14dee58ab2
commit b3d9a31792
3 changed files with 68 additions and 202 deletions

View File

@@ -7,15 +7,32 @@ import sys
from datetime import datetime from datetime import datetime
import pandas as pd import pandas as pd
from utils import crud, parser, stats, utils
from utils import parser, crud, stats, utils
HEADER = """=== Terramotos ===""" HEADER = """=== Terramotos ==="""
EVENT_COLS = ["Data", "Latitude", "Longitude", "Profundidade", "Tipo Evento", "Gap", "Magnitudes", "Regiao", "Sentido"] EVENT_COLS = [
STATION_COLS = ["Estacao", "Hora", "Min", "Seg", "Componente", "Distancia Epicentro", "Tipo Onda"] "Data",
"Latitude",
"Longitude",
"Profundidade",
"Tipo Evento",
"Gap",
"Magnitudes",
"Regiao",
"Sentido",
]
STATION_COLS = [
"Estacao",
"Hora",
"Min",
"Seg",
"Componente",
"Distancia Epicentro",
"Tipo Onda",
]
MENU ="""[1] Criar a base de dados MENU = """[1] Criar a base de dados
[3] Apagar um evento [3] Apagar um evento
[4] Apagar uma entrada de um evento [4] Apagar uma entrada de um evento
[5] Visualizar um evento [5] Visualizar um evento
@@ -31,7 +48,7 @@ MENU ="""[1] Criar a base de dados
def guardar_json(df: pd.DataFrame, fname: str) -> bool: def guardar_json(df: pd.DataFrame, fname: str) -> bool:
_retValues = utils.create_dict_struct(df, EVENT_COLS, None) _retValues = utils.create_dict_struct(df, EVENT_COLS, None)
with open(fname , "w") as fp: with open(fname, "w") as fp:
try: try:
json.dump(_retValues, fp) json.dump(_retValues, fp)
except: except:
@@ -89,7 +106,6 @@ def main():
else: else:
retInfo = "Base de dados não encontrada!" retInfo = "Base de dados não encontrada!"
case "4": case "4":
if db is not None: if db is not None:
crud.read_ids(db) crud.read_ids(db)
@@ -152,7 +168,7 @@ def main():
case "8": case "8":
if db is not None: if db is not None:
stats.stat_menu(db) stats.stats(db)
else: else:
retInfo = "Base de dados não encontrada!" retInfo = "Base de dados não encontrada!"
@@ -198,25 +214,30 @@ def _file_exists(name: str) -> bool:
return True return True
return False return False
def _event_exists(df, eid) -> bool: def _event_exists(df, eid) -> bool:
allEvents = set(df["ID"]) allEvents = set(df["ID"])
return eid in allEvents return eid in allEvents
def _get_usr_input(msg:str, asType=str): def _get_usr_input(msg: str, asType=str):
usrIn = input(msg) usrIn = input(msg)
if usrIn == "": if usrIn == "":
return None return None
return asType(usrIn) return asType(usrIn)
def _prettify_event(df): def _prettify_event(df):
preambleInfo = df.drop_duplicates(subset="ID", keep="first") preambleInfo = df.drop_duplicates(subset="ID", keep="first")
stations = df[["Estacao", "Componente", "Tipo Onda", "Amplitude"]] stations = df[["Estacao", "Componente", "Tipo Onda", "Amplitude"]]
info = df.drop_duplicates(subset="Data", keep="first") info = df.drop_duplicates(subset="Data", keep="first")
data = datetime.fromisoformat(info.Data.values[0]).strftime("%c") data = datetime.fromisoformat(info.Data.values[0]).strftime("%c")
print(f"Região: {info["Regiao"].values[0]}\nData: {data}\nLatitude: {info.Lat.values[0]}\nLongitude: {info.Long.values[0]}" print(
+ f"\nProfundidade: {info.Prof.values[0]}\nTipo de evento: {info['Tipo Ev'].values[0]}\n") f"Região: {info['Regiao'].values[0]}\nData: {data}\nLatitude: {info.Lat.values[0]}\nLongitude: {info.Long.values[0]}"
+ f"\nProfundidade: {info.Prof.values[0]}\nTipo de evento: {info['Tipo Ev'].values[0]}\n"
)
if __name__ == '__main__':
if __name__ == "__main__":
main() main()

View File

@@ -1,7 +1,6 @@
import collections import collections
import datetime import datetime
import numpy as np
import stats import stats
from matplotlib import pyplot as plt from matplotlib import pyplot as plt

View File

@@ -1,210 +1,56 @@
# pyright: basic # pyright: basic
import datetime import datetime
import os
import sys
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import utils import utils
STAT_HEADER = """=== Terramotos ===
== Estatísticas ==
"""
STAT_MENU = """[1] Média def stats(df: pd.DataFrame) -> None:
[2] Variância """Estatisticas para a DataFrame
[3] Desvio padrão :param df: DataFrame em questão"""
[4] Máximo
[5] Mínimo
[6] Moda
[Q] Voltar ao menu principal mags = mags_avg_std(df)
""" depth = depth_avg_std(df)
FILTER_CHOICES = """[1] Magnitudes median_mags = median_mags(df)
[2] Distância
[3] Profundidade
"""
CHOICE = {"1": "Magnitudes", "2": "Distancia", "3": "Prof"}
def filter_submenu(type: str): def mags_avg_std(data: pd.DataFrame) -> tuple[np.floating, np.floating]:
os.system("cls" if sys.platform == "windows" else "clear") """Media e desvio-padrao das magnitudes
print(f"{STAT_HEADER}\n = {type} = ") :param data: Dataframe com dados a filtrar
print(FILTER_CHOICES) :returns: Tuple com a media e desvio-padrao
"""
choice = input("Qual dos valores: ") filtered_data: pd.DataFrame = filter_mags(data)
vals = filtered_data["MagL"].to_numpy()
try: return (np.average(vals), np.std(vals))
usrChoice = CHOICE[choice]
return usrChoice
except KeyError:
return None
def stat_menu(df: pd.DataFrame): def depth_avg_std(data: pd.DataFrame) -> tuple[np.floating, np.floating]:
inStats = True """Media e desvio-padrao das profundidades
while inStats: :param data: Dataframe com dados a filtrar
os.system("cls" if sys.platform == "windows" else "clear") :returns: Tuple com a media e desvio-padrao
print(STAT_HEADER + "\n" + STAT_MENU) """
usrIn = input("Opção: ").lower() filtered_data: pd.DataFrame = filter_depth(data)
vals = np.average(filtered_data["Profundidade"].to_numpy())
match usrIn: return (np.average(vals), np.std(vals))
case "1":
c = filter_submenu("Média")
if c is not None:
retValue = average(df, c)
if retValue:
print(f"A média de {c} é {retValue}")
else:
print("Um erro aconteceu. Nada a apresentar de momento.")
else:
continue
case "2":
c = filter_submenu("Variância")
if c is not None:
retValue = variance(df, c)
if retValue:
print(f"A variância dos dados de {c} é {retValue}")
else:
print("Um erro aconteceu. Nada a apresentar de momento.")
else:
continue
case "3":
c = filter_submenu("Desvio Padrão")
if c is not None:
retValue = std_dev(df, c)
if retValue:
print(f"O desvio padrão de {c} é {retValue}")
else:
print("Um erro aconteceu. Nada a apresentar de momento.")
else:
continue
case "4":
c = filter_submenu("Máximo")
if c is not None:
retValue = max_v(df, c)
print(f"O valor máximo em {c} é {retValue}")
else:
continue
case "5":
c = filter_submenu("Mínimo")
if c is not None:
retValue = min_v(df, c)
print(f"O valor mínimo em {c} é {retValue}")
else:
continue
case "6":
c = filter_submenu("Mínimo")
if c is not None:
retValue = moda(df, c)
print(f"O valor moda em {c} é {retValue}")
else:
continue
case "q":
inStats = False
continue
case _:
pass
input("Clica `Enter` para continuar")
def average(df: pd.DataFrame, filter_by): def median_mags(data: pd.DataFrame):
events = df.drop_duplicates(subset="ID", keep="first") filtered_data: pd.DataFrame = filter_mags(data)
values = events[filter_by].to_numpy() vals = sorted(filtered_data["MagL"].to_numpy())
if filter_by == "Magnitudes": quartil = len(vals) // 4
values = _unpack_mags(values)
try: return (
return np.average(values) filtered_data[quartil, :]["MagL"],
except: filtered_data[quartil * 2, :]["MagL"],
return None filtered_data[quartil * 3, :]["MagL"],
)
def variance(df, filter_by): def filter_mags(data, more_than=None, less_than=None) -> pd.DataFrame:
events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy()
if filter_by == "Magnitudes":
values = _unpack_mags(values)
try:
return np.var(values)
except:
return None
def std_dev(df, filter_by):
events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy()
if filter_by == "Magnitudes":
values = _unpack_mags(values)
try:
return np.std(values)
except:
return None
def max_v(df, filter_by):
events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy()
if filter_by == "Magnitudes":
values = _unpack_mags(values)
return np.max(values)
def min_v(df, filter_by):
events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy()
if filter_by == "Magnitudes":
values = _unpack_mags(values)
return np.min(values)
def moda(df, filter_by):
events = df.drop_duplicates(subset="ID", keep="first")
values = events[filter_by].to_numpy()
if filter_by == "Magnitudes":
values = _unpack_mags(values)
uniques, count = np.unique(values, return_counts=True)
uniques_list = list(zip(uniques, count))
return sorted(uniques_list, reverse=True, key=lambda x: x[1])[0][0]
def _unpack_mags(arr: np.ndarray):
newVals = np.empty(0)
for v in arr:
for m in v:
newVals = np.append(newVals, float(m["Magnitude"]))
return newVals
def filter_mags(data, more_than=None, less_than=None):
"""Filters by magnitudes a DataFrame into a new Dataframe """Filters by magnitudes a DataFrame into a new Dataframe
:param data: Raw pandas DataFrame :param data: Raw pandas DataFrame