Initial commit

This commit is contained in:
2026-05-27 08:51:07 +02:00
commit e4d6907c5d
9 changed files with 1332 additions and 0 deletions

80
src/concat.py Normal file
View File

@@ -0,0 +1,80 @@
import pandas as pd
from pathlib import Path
import argparse
def concat_pair(r_file: Path, s_file: Path) -> pd.DataFrame:
crank_df = pd.read_csv(r_file)
cam_df = pd.read_csv(s_file)
crank_values = crank_df.iloc[:, 0]
cam_values = cam_df.iloc[:, 0]
start_time = min(crank_values.iloc[0], cam_values.iloc[0])
crank_values = crank_values - start_time
cam_values = cam_values - start_time
rows = max(crank_values.max(), cam_values.max()) + 1
final_df = pd.DataFrame(index=range(rows), columns=["crank", "cam"])
final_df["crank"] = final_df.index.isin(crank_values).astype(int)
final_df["cam"] = final_df.index.isin(cam_values).astype(int)
final_df.index.name = "time_us"
return final_df
parser = argparse.ArgumentParser()
parser.add_argument("directory", type=Path, help="Source data directory")
args = parser.parse_args()
directory: Path = args.directory
if not directory.is_dir():
parser.error(f"{directory} is not a valid directory")
print(f"Processing data in: {directory}")
file_groups: dict[str, dict[str, Path]] = {}
for path in directory.glob("*.csv"):
stem = path.stem
try:
base_name, channel = stem.rsplit("_", 1)
except ValueError:
print(f"Skipping badly named file: {path}")
continue
if channel not in ("R", "S"):
print(f"Skipping unknown file: {path}")
continue
file_groups.setdefault(base_name, {})[channel] = path
file_pairs: list[tuple[Path, Path]] = []
for base_name, files in sorted(file_groups.items()):
if "R" not in files:
print(f"Missing R file for {base_name}")
continue
if "S" not in files:
print(f"Missing S file for {base_name}")
continue
file_pairs.append((files["R"], files["S"]))
for r_file, s_file in file_pairs:
df = concat_pair(r_file, s_file)
base_name, _ = r_file.stem.rsplit("_", 1)
output = r_file.parent / f"{base_name}_concat.csv"
df.to_csv(output)
exit(0)

134
src/filter.py Normal file
View File

@@ -0,0 +1,134 @@
from threading import Thread
import pandas as pd
from pathlib import Path
import argparse
from tqdm import tqdm
def find_last_crank(df: pd.DataFrame, time_us: int) -> int | None:
previous_crank_hits = df.loc[: time_us - 1]
previous_crank_hits = previous_crank_hits[previous_crank_hits["crank"] == 1]
if previous_crank_hits.empty:
return None
return previous_crank_hits.index[-1]
def find_next_crank(df: pd.DataFrame, time_us: int) -> int | None:
next_crank_hits = df.loc[time_us + 1 :]
next_crank_hits = next_crank_hits[next_crank_hits["crank"] == 1]
if next_crank_hits.empty:
return None
return next_crank_hits.index[0]
def handle_dedupe(df: pd.DataFrame, time_a: int, time_b: int) -> int:
prev_hit = find_last_crank(df, time_a)
next_hit = find_next_crank(df, time_b)
if prev_hit is None or next_hit is None:
df.loc[time_b, "crank"] = 0
return time_a
delta_a_1 = abs(time_a - prev_hit)
delta_a_2 = abs(time_a - next_hit)
delta_a_diff = abs(delta_a_1 - delta_a_2)
delta_b_1 = abs(time_b - prev_hit)
delta_b_2 = abs(time_b - next_hit)
delta_b_diff = abs(delta_b_1 - delta_b_2)
if delta_a_diff < delta_b_diff:
df.loc[time_b, "crank"] = 0
return time_a
else:
df.loc[time_a, "crank"] = 0
return time_b
def filter_data(file: Path) -> pd.DataFrame:
df = pd.read_csv(file).set_index("time_us", drop=False)
last_crank = -1
last_crank_delta = -1
previous_crank = -1
last_cam = -1
last_cam_delta = -1
for _, row in tqdm(df.iterrows(), total=len(df)):
time_us: int = row["time_us"]
crank: int = row["crank"]
cam: int = row["cam"]
if crank == 1:
if last_crank != -1:
delta = time_us - last_crank
if (
last_crank_delta != -1
and abs(delta - last_crank_delta) / last_crank_delta > 0.5
):
valid_time = handle_dedupe(df, last_crank, time_us)
if valid_time == last_crank:
last_crank_delta = last_crank - previous_crank
else:
last_crank_delta = (
valid_time - previous_crank
if previous_crank != -1
else delta
)
last_crank = valid_time
else:
last_crank_delta = delta
previous_crank = last_crank
last_crank = time_us
else:
last_crank = time_us
if cam == 1:
if last_cam != -1:
last_cam_delta = time_us - last_cam
last_cam = time_us
return df
parser = argparse.ArgumentParser()
parser.add_argument("directory", type=Path, help="Source data directory")
args = parser.parse_args()
directory: Path = args.directory
if not directory.is_dir():
parser.error(f"{directory} is not a valid directory")
print(f"Processing data in: {directory}")
concat_files: list[Path] = []
for path in directory.glob("*.csv"):
stem = path.stem
try:
base_name, channel = stem.rsplit("_", 1)
except ValueError:
print(f"Skipping badly named file: {path}")
continue
if channel != "concat":
print(f"Skipping unknown file: {path}")
continue
concat_files.append(path)
threads: list[Thread] = []
for file in concat_files:
base_name, _ = file.stem.rsplit("_", 1)
output = file.parent / f"{base_name}_dedupe.csv"
out_df = filter_data(file)
out_df.to_csv(output)

97
src/plot.py Normal file
View File

@@ -0,0 +1,97 @@
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("directory", type=Path, help="Source data directory")
args = parser.parse_args()
INPUT_CSV: Path = args.directory
# Width of the artificial pulse, in microseconds.
# Increase this if the pulses are still hard to see.
PULSE_WIDTH_US = 500 # 5 ms
# Optional: plot only a smaller time window.
# Use None to plot the whole recording.
START_US = None
END_US = None
def build_pulse_trace(edge_times_us, pulse_width_us):
"""
Convert falling-edge timestamps into a drawable square pulse trace.
Each edge becomes:
low before edge
high from edge to edge + pulse_width_us
low after that
"""
x = []
y = []
for t in edge_times_us:
x.extend([t, t, t + pulse_width_us, t + pulse_width_us])
y.extend([0, 1, 1, 0])
return x, y
def main():
df = pd.read_csv(INPUT_CSV)
# If the CSV has an index/time column from df_final.to_csv(),
# use the first column as time in microseconds.
time_col = df.columns[0]
# If your CSV columns are: index, crank, cam
time_us = df[time_col]
crank_edges = time_us[df["crank"] == 1].to_numpy()
cam_edges = time_us[df["cam"] == 1].to_numpy()
if START_US is not None:
crank_edges = crank_edges[crank_edges >= START_US]
cam_edges = cam_edges[cam_edges >= START_US]
if END_US is not None:
crank_edges = crank_edges[crank_edges <= END_US]
cam_edges = cam_edges[cam_edges <= END_US]
crank_x, crank_y = build_pulse_trace(crank_edges, PULSE_WIDTH_US)
cam_x, cam_y = build_pulse_trace(cam_edges, PULSE_WIDTH_US)
# Convert microseconds to seconds for a more readable x-axis
crank_x = [x / 1_000_000 for x in crank_x]
cam_x = [x / 1_000_000 for x in cam_x]
# Offset cam vertically so both traces are readable
crank_y = [y * 0.8 + 0 for y in crank_y]
cam_y = [y * 0.8 + 1.2 for y in cam_y]
plt.figure(figsize=(14, 5))
plt.plot(crank_x, crank_y, label="Crank")
plt.plot(cam_x, cam_y, label="Cam")
plt.yticks([0.4, 1.6], ["Crank", "Cam"])
plt.xlabel("Time [s]")
plt.title(f"Crank and Cam Falling Edges, pulse width = {PULSE_WIDTH_US} µs")
plt.grid(True)
plt.legend()
if START_US is not None or END_US is not None:
start_s = START_US / 1_000_000 if START_US is not None else None
end_s = END_US / 1_000_000 if END_US is not None else None
plt.xlim(start_s, end_s)
plt.tight_layout()
plt.show()
if __name__ == "__main__":
main()