Initial commit

2026-05-27 08:51:07 +02:00
commit e4d6907c5d
9 changed files with 1332 additions and 0 deletions
--- a/src/concat.py
+++ b/src/concat.py
@@ -0,0 +1,80 @@
+import pandas as pd
+from pathlib import Path
+import argparse
+
+
+def concat_pair(r_file: Path, s_file: Path) -> pd.DataFrame:
+    crank_df = pd.read_csv(r_file)
+    cam_df = pd.read_csv(s_file)
+
+    crank_values = crank_df.iloc[:, 0]
+    cam_values = cam_df.iloc[:, 0]
+
+    start_time = min(crank_values.iloc[0], cam_values.iloc[0])
+
+    crank_values = crank_values - start_time
+    cam_values = cam_values - start_time
+
+    rows = max(crank_values.max(), cam_values.max()) + 1
+
+    final_df = pd.DataFrame(index=range(rows), columns=["crank", "cam"])
+
+    final_df["crank"] = final_df.index.isin(crank_values).astype(int)
+    final_df["cam"] = final_df.index.isin(cam_values).astype(int)
+    final_df.index.name = "time_us"
+
+    return final_df
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("directory", type=Path, help="Source data directory")
+
+args = parser.parse_args()
+
+directory: Path = args.directory
+
+if not directory.is_dir():
+    parser.error(f"{directory} is not a valid directory")
+
+print(f"Processing data in: {directory}")
+
+file_groups: dict[str, dict[str, Path]] = {}
+
+for path in directory.glob("*.csv"):
+    stem = path.stem
+
+    try:
+        base_name, channel = stem.rsplit("_", 1)
+    except ValueError:
+        print(f"Skipping badly named file: {path}")
+        continue
+
+    if channel not in ("R", "S"):
+        print(f"Skipping unknown file: {path}")
+        continue
+
+    file_groups.setdefault(base_name, {})[channel] = path
+
+file_pairs: list[tuple[Path, Path]] = []
+
+for base_name, files in sorted(file_groups.items()):
+    if "R" not in files:
+        print(f"Missing R file for {base_name}")
+        continue
+
+    if "S" not in files:
+        print(f"Missing S file for {base_name}")
+        continue
+
+    file_pairs.append((files["R"], files["S"]))
+
+for r_file, s_file in file_pairs:
+    df = concat_pair(r_file, s_file)
+
+    base_name, _ = r_file.stem.rsplit("_", 1)
+    output = r_file.parent / f"{base_name}_concat.csv"
+
+    df.to_csv(output)
+
+
+exit(0)
--- a/src/filter.py
+++ b/src/filter.py
@@ -0,0 +1,134 @@
+from threading import Thread
+import pandas as pd
+from pathlib import Path
+import argparse
+from tqdm import tqdm
+
+
+def find_last_crank(df: pd.DataFrame, time_us: int) -> int | None:
+    previous_crank_hits = df.loc[: time_us - 1]
+    previous_crank_hits = previous_crank_hits[previous_crank_hits["crank"] == 1]
+
+    if previous_crank_hits.empty:
+        return None
+
+    return previous_crank_hits.index[-1]
+
+
+def find_next_crank(df: pd.DataFrame, time_us: int) -> int | None:
+    next_crank_hits = df.loc[time_us + 1 :]
+    next_crank_hits = next_crank_hits[next_crank_hits["crank"] == 1]
+
+    if next_crank_hits.empty:
+        return None
+
+    return next_crank_hits.index[0]
+
+
+def handle_dedupe(df: pd.DataFrame, time_a: int, time_b: int) -> int:
+    prev_hit = find_last_crank(df, time_a)
+    next_hit = find_next_crank(df, time_b)
+
+    if prev_hit is None or next_hit is None:
+        df.loc[time_b, "crank"] = 0
+        return time_a
+
+    delta_a_1 = abs(time_a - prev_hit)
+    delta_a_2 = abs(time_a - next_hit)
+
+    delta_a_diff = abs(delta_a_1 - delta_a_2)
+
+    delta_b_1 = abs(time_b - prev_hit)
+    delta_b_2 = abs(time_b - next_hit)
+    delta_b_diff = abs(delta_b_1 - delta_b_2)
+
+    if delta_a_diff < delta_b_diff:
+        df.loc[time_b, "crank"] = 0
+        return time_a
+    else:
+        df.loc[time_a, "crank"] = 0
+        return time_b
+
+
+def filter_data(file: Path) -> pd.DataFrame:
+    df = pd.read_csv(file).set_index("time_us", drop=False)
+
+    last_crank = -1
+    last_crank_delta = -1
+    previous_crank = -1
+    last_cam = -1
+    last_cam_delta = -1
+
+    for _, row in tqdm(df.iterrows(), total=len(df)):
+        time_us: int = row["time_us"]
+        crank: int = row["crank"]
+        cam: int = row["cam"]
+        if crank == 1:
+            if last_crank != -1:
+                delta = time_us - last_crank
+                if (
+                    last_crank_delta != -1
+                    and abs(delta - last_crank_delta) / last_crank_delta > 0.5
+                ):
+                    valid_time = handle_dedupe(df, last_crank, time_us)
+                    if valid_time == last_crank:
+                        last_crank_delta = last_crank - previous_crank
+                    else:
+                        last_crank_delta = (
+                            valid_time - previous_crank
+                            if previous_crank != -1
+                            else delta
+                        )
+                        last_crank = valid_time
+                else:
+                    last_crank_delta = delta
+                    previous_crank = last_crank
+                    last_crank = time_us
+            else:
+                last_crank = time_us
+
+        if cam == 1:
+            if last_cam != -1:
+                last_cam_delta = time_us - last_cam
+
+            last_cam = time_us
+
+    return df
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("directory", type=Path, help="Source data directory")
+
+args = parser.parse_args()
+
+directory: Path = args.directory
+
+if not directory.is_dir():
+    parser.error(f"{directory} is not a valid directory")
+
+print(f"Processing data in: {directory}")
+
+concat_files: list[Path] = []
+
+for path in directory.glob("*.csv"):
+    stem = path.stem
+
+    try:
+        base_name, channel = stem.rsplit("_", 1)
+    except ValueError:
+        print(f"Skipping badly named file: {path}")
+        continue
+
+    if channel != "concat":
+        print(f"Skipping unknown file: {path}")
+        continue
+
+    concat_files.append(path)
+
+threads: list[Thread] = []
+
+for file in concat_files:
+    base_name, _ = file.stem.rsplit("_", 1)
+    output = file.parent / f"{base_name}_dedupe.csv"
+    out_df = filter_data(file)
+    out_df.to_csv(output)
--- a/src/plot.py
+++ b/src/plot.py
@@ -0,0 +1,97 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from pathlib import Path
+import argparse
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("directory", type=Path, help="Source data directory")
+
+args = parser.parse_args()
+
+INPUT_CSV: Path = args.directory
+
+
+# Width of the artificial pulse, in microseconds.
+# Increase this if the pulses are still hard to see.
+PULSE_WIDTH_US = 500  # 5 ms
+
+# Optional: plot only a smaller time window.
+# Use None to plot the whole recording.
+START_US = None
+END_US = None
+
+
+def build_pulse_trace(edge_times_us, pulse_width_us):
+    """
+    Convert falling-edge timestamps into a drawable square pulse trace.
+
+    Each edge becomes:
+        low before edge
+        high from edge to edge + pulse_width_us
+        low after that
+    """
+    x = []
+    y = []
+
+    for t in edge_times_us:
+        x.extend([t, t, t + pulse_width_us, t + pulse_width_us])
+        y.extend([0, 1, 1, 0])
+
+    return x, y
+
+
+def main():
+    df = pd.read_csv(INPUT_CSV)
+
+    # If the CSV has an index/time column from df_final.to_csv(),
+    # use the first column as time in microseconds.
+    time_col = df.columns[0]
+
+    # If your CSV columns are: index, crank, cam
+    time_us = df[time_col]
+
+    crank_edges = time_us[df["crank"] == 1].to_numpy()
+    cam_edges = time_us[df["cam"] == 1].to_numpy()
+
+    if START_US is not None:
+        crank_edges = crank_edges[crank_edges >= START_US]
+        cam_edges = cam_edges[cam_edges >= START_US]
+
+    if END_US is not None:
+        crank_edges = crank_edges[crank_edges <= END_US]
+        cam_edges = cam_edges[cam_edges <= END_US]
+
+    crank_x, crank_y = build_pulse_trace(crank_edges, PULSE_WIDTH_US)
+    cam_x, cam_y = build_pulse_trace(cam_edges, PULSE_WIDTH_US)
+
+    # Convert microseconds to seconds for a more readable x-axis
+    crank_x = [x / 1_000_000 for x in crank_x]
+    cam_x = [x / 1_000_000 for x in cam_x]
+
+    # Offset cam vertically so both traces are readable
+    crank_y = [y * 0.8 + 0 for y in crank_y]
+    cam_y = [y * 0.8 + 1.2 for y in cam_y]
+
+    plt.figure(figsize=(14, 5))
+
+    plt.plot(crank_x, crank_y, label="Crank")
+    plt.plot(cam_x, cam_y, label="Cam")
+
+    plt.yticks([0.4, 1.6], ["Crank", "Cam"])
+    plt.xlabel("Time [s]")
+    plt.title(f"Crank and Cam Falling Edges, pulse width = {PULSE_WIDTH_US} µs")
+    plt.grid(True)
+    plt.legend()
+
+    if START_US is not None or END_US is not None:
+        start_s = START_US / 1_000_000 if START_US is not None else None
+        end_s = END_US / 1_000_000 if END_US is not None else None
+        plt.xlim(start_s, end_s)
+
+    plt.tight_layout()
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()