Vectorized data processing

This commit is contained in:
2026-05-27 14:49:00 +02:00
parent 4e9277a023
commit 2da49b50bf

View File

@@ -3,83 +3,43 @@
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com> # Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
import pandas as pd
from pathlib import Path
import argparse import argparse
from pathlib import Path
import pandas as pd
from tqdm import tqdm from tqdm import tqdm
def find_last_crank(df: pd.DataFrame, time_us: int) -> int | None:
previous_crank_hits = df.loc[: time_us - 1]
previous_crank_hits = previous_crank_hits[previous_crank_hits["crank"] == 1]
if previous_crank_hits.empty:
return None
return previous_crank_hits.index[-1]
def find_next_crank(df: pd.DataFrame, time_us: int) -> int | None:
next_crank_hits = df.loc[time_us + 1 :]
next_crank_hits = next_crank_hits[next_crank_hits["crank"] == 1]
if next_crank_hits.empty:
return None
return next_crank_hits.index[0]
def filter_data(file: Path) -> pd.DataFrame: def filter_data(file: Path) -> pd.DataFrame:
df = pd.read_csv(file).set_index("time_us", drop=False) df = pd.read_csv(file, usecols=["time_us", "crank", "cam"])
rows = []
last_crank = -1
last_crank_delta = -1
previous_crank = -1
last_cam = -1
cam_flag = 0
crank_flag = False
for _, row in tqdm(df.iterrows(), total=len(df), desc="Derivative"): crank_df = df.loc[df["crank"] == 1, ["time_us"]].copy()
time_us: int = row["time_us"]
crank: int = row["crank"] crank_df["d1"] = crank_df["time_us"].diff()
cam: int = row["cam"] crank_df["prev_d1"] = crank_df["d1"].shift(1)
c1 = 0 crank_df["d2"] = crank_df["d1"] - crank_df["prev_d1"]
c2 = 0 crank_df["ratio"] = crank_df["d2"] / crank_df["d1"]
if crank==1:
d1 = time_us-c1 crank_df = crank_df.dropna(subset=["d1", "d2", "ratio"])
d2 = d1-(c1-c2)
if crank_flag: return crank_df[["time_us", "d1", "d2", "ratio"]]
rows.append({
"time_us": time_us,
"d1": d1,
"d2": d2,
"ratio": d2/d1
})
else:
crank_flag = True
c2=c1
c1=time_us
output = pd.DataFrame(rows)
return output
parser = argparse.ArgumentParser() def main() -> None:
parser.add_argument("directory", type=Path, help="Source data directory") parser = argparse.ArgumentParser()
parser.add_argument("directory", type=Path, help="Source data directory")
args = parser.parse_args()
args = parser.parse_args() directory: Path = args.directory
directory: Path = args.directory if not directory.is_dir():
if not directory.is_dir():
parser.error(f"{directory} is not a valid directory") parser.error(f"{directory} is not a valid directory")
print(f"Processing data in: {directory}") print(f"Processing data in: {directory}")
concat_files: list[Path] = [] concat_files: list[Path] = []
for path in directory.glob("*.csv"): for path in directory.glob("*.csv"):
stem = path.stem stem = path.stem
try: try:
@@ -94,8 +54,13 @@ for path in directory.glob("*.csv"):
concat_files.append(path) concat_files.append(path)
for file in concat_files: for file in tqdm(concat_files, desc="Files"):
base_name, _ = file.stem.rsplit("_", 1) base_name, _ = file.stem.rsplit("_", 1)
output = file.parent / f"{base_name}_derivative.csv" output = file.parent / f"{base_name}_derivative.csv"
out_df = filter_data(file) out_df = filter_data(file)
out_df.to_csv(output) out_df.to_csv(output, index=False)
if __name__ == "__main__":
main()