From a502a8a0bcb608d986b86a6229281538f9eb35dd Mon Sep 17 00:00:00 2001 From: Hector van der Aa Date: Wed, 27 May 2026 11:21:14 +0200 Subject: [PATCH] Data processing chain V1 --- src/concat.py | 4 ++ src/{filter.py => filter_1.py} | 24 ++++++--- src/filter_2.py | 97 ++++++++++++++++++++++++++++++++++ src/plot.py | 4 ++ src/trim.py | 58 ++++++++++++++++++++ 5 files changed, 180 insertions(+), 7 deletions(-) rename src/{filter.py => filter_1.py} (83%) create mode 100644 src/filter_2.py create mode 100644 src/trim.py diff --git a/src/concat.py b/src/concat.py index e2a7bdd..18b1d32 100644 --- a/src/concat.py +++ b/src/concat.py @@ -1,3 +1,7 @@ +# Copyright (C) 2026 Hector van der Aa +# Copyright (C) 2026 Association Exergie +# SPDX-License-Identifier: GPL-3.0-or-later + import pandas as pd from pathlib import Path import argparse diff --git a/src/filter.py b/src/filter_1.py similarity index 83% rename from src/filter.py rename to src/filter_1.py index a38d775..309d21b 100644 --- a/src/filter.py +++ b/src/filter_1.py @@ -1,4 +1,8 @@ -from threading import Thread +# Copyright (C) 2026 Hector van der Aa +# Copyright (C) 2026 Pierre Barbier +# Copyright (C) 2026 Association Exergie +# SPDX-License-Identifier: GPL-3.0-or-later + import pandas as pd from pathlib import Path import argparse @@ -44,9 +48,11 @@ def handle_dedupe(df: pd.DataFrame, time_a: int, time_b: int) -> int: if delta_a_diff < delta_b_diff: df.loc[time_b, "crank"] = 0 + df.loc[time_b - 1000 : time_b + 1000, "cam"] = 0 return time_a else: df.loc[time_a, "crank"] = 0 + df.loc[time_a - 1000 : time_a + 1000, "cam"] = 0 return time_b @@ -57,13 +63,16 @@ def filter_data(file: Path) -> pd.DataFrame: last_crank_delta = -1 previous_crank = -1 last_cam = -1 - last_cam_delta = -1 + cam_flag = 0 + crank_flag = 0 - for _, row in tqdm(df.iterrows(), total=len(df)): + for _, row in tqdm(df.iterrows(), total=len(df), desc="Filter pass 1"): time_us: int = row["time_us"] crank: int = row["crank"] cam: int = row["cam"] if crank == 1: + crank_flag = 1 + cam_flag = 0 if last_crank != -1: delta = time_us - last_crank if ( @@ -88,8 +97,11 @@ def filter_data(file: Path) -> pd.DataFrame: last_crank = time_us if cam == 1: - if last_cam != -1: - last_cam_delta = time_us - last_cam + if crank_flag == 1: + if cam_flag == 0: + cam_flag = 1 + else: + df.loc[time_us, "cam"] = 0 last_cam = time_us @@ -125,8 +137,6 @@ for path in directory.glob("*.csv"): concat_files.append(path) -threads: list[Thread] = [] - for file in concat_files: base_name, _ = file.stem.rsplit("_", 1) output = file.parent / f"{base_name}_dedupe.csv" diff --git a/src/filter_2.py b/src/filter_2.py new file mode 100644 index 0000000..0d2168b --- /dev/null +++ b/src/filter_2.py @@ -0,0 +1,97 @@ +# Copyright (C) 2026 Hector van der Aa +# Copyright (C) 2026 Pierre Barbier +# Copyright (C) 2026 Association Exergie +# SPDX-License-Identifier: GPL-3.0-or-later + +import pandas as pd +from pathlib import Path +import argparse +from tqdm import tqdm + + +def filter_data(file: Path) -> pd.DataFrame: + df = pd.read_csv(file).set_index("time_us", drop=False) + + last_crank = -1 + last_crank_delta = -1 + last_cam = -1 + cam_flag = 0 + crank_flag = 0 + cam_proportion = 0 + crank_ctr = 0 + insert_active = False + + for _, row in tqdm(df.iterrows(), total=len(df), desc="Filter pass 2"): + # for _, row in df.iterrows(): + time_us: int = row["time_us"] + crank: int = row["crank"] + cam: int = row["cam"] + if crank == 1: + crank_flag = 1 + last_crank_delta = time_us - last_crank + if cam_flag == 1: + cam_proportion = (last_cam - last_crank) / last_crank_delta + if insert_active: + crank_ctr += 1 + if crank_ctr >= 5: + insert_time = last_crank + int( + round(cam_proportion * last_crank_delta) + ) + df.loc[ + insert_time, + "cam", + ] = 1 + print( + f"Inserted cam at: {insert_time}\nLast cam at: {last_cam}\nLast crank at: {last_crank}\nCurrent time: {time_us}\n" + ) + cam = 1 + last_cam = insert_time + crank_ctr = 0 + cam_flag = 0 + last_crank = time_us + + if cam == 1: + insert_active = True + crank_ctr = 0 + if crank_flag == 1 and cam_flag == 0: + cam_flag = 1 + + last_cam = time_us + + return df + + +parser = argparse.ArgumentParser() +parser.add_argument("directory", type=Path, help="Source data directory") + +args = parser.parse_args() + +directory: Path = args.directory + +if not directory.is_dir(): + parser.error(f"{directory} is not a valid directory") + +print(f"Processing data in: {directory}") + +concat_files: list[Path] = [] + +for path in directory.glob("*.csv"): + stem = path.stem + + try: + base_name, channel = stem.rsplit("_", 1) + except ValueError: + print(f"Skipping badly named file: {path}") + continue + + if channel != "dedupe": + print(f"Skipping unknown file: {path}") + continue + + concat_files.append(path) + +for file in concat_files: + base_name, _ = file.stem.rsplit("_", 1) + output = file.parent / f"{base_name}_rebuilt.csv" + out_df = filter_data(file) + out_df.to_csv(output) diff --git a/src/plot.py b/src/plot.py index e13161b..c07085d 100644 --- a/src/plot.py +++ b/src/plot.py @@ -1,3 +1,7 @@ +# Copyright (C) 2026 Hector van der Aa +# Copyright (C) 2026 Association Exergie +# SPDX-License-Identifier: GPL-3.0-or-later + import pandas as pd import matplotlib.pyplot as plt from pathlib import Path diff --git a/src/trim.py b/src/trim.py new file mode 100644 index 0000000..bc23786 --- /dev/null +++ b/src/trim.py @@ -0,0 +1,58 @@ +# Copyright (C) 2026 Hector van der Aa +# Copyright (C) 2026 Pierre Barbier +# Copyright (C) 2026 Association Exergie +# SPDX-License-Identifier: GPL-3.0-or-later + +import subprocess +import sys +import pandas as pd +from pathlib import Path +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("directory", type=Path, help="Source data directory") + +args = parser.parse_args() + +directory: Path = args.directory + +if not directory.is_dir(): + parser.error(f"{directory} is not a valid directory") + +print(f"Processing data in: {directory}") + +files: list[Path] = [] + +for path in directory.glob("*.csv"): + stem = path.stem + + try: + base_name, channel = stem.rsplit("_", 1) + except ValueError: + print(f"Skipping badly named file: {path}") + continue + + if channel != "rebuilt": + print(f"Skipping unknown file: {path}") + continue + + files.append(path) + + +for file in files: + print(f"Processing {file.name}") + subprocess.run([sys.executable, "src/plot.py", f"{file}"], check=True) + start_s = float(input("Start time:")) + end_s = float(input("End time:")) + + start_us = int(start_s * 1_000_000) + end_us = int(end_s * 1_000_000) + + df = pd.read_csv(file).set_index("time_us", drop=False) + df_trimmed: pd.DataFrame = df.loc[start_us:end_us].copy() + df_trimmed["time_us"] = df_trimmed["time_us"] - start_us + df_trimmed.index = df_trimmed.index - start_us + + base_name, _ = file.stem.rsplit("_", 1) + output = file.parent / f"{base_name}_trimmed.csv" + df_trimmed.to_csv(output)