Vectorized data processing
This commit is contained in:
@@ -3,99 +3,64 @@
|
|||||||
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
import pandas as pd
|
|
||||||
from pathlib import Path
|
|
||||||
import argparse
|
import argparse
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
def find_last_crank(df: pd.DataFrame, time_us: int) -> int | None:
|
|
||||||
previous_crank_hits = df.loc[: time_us - 1]
|
|
||||||
previous_crank_hits = previous_crank_hits[previous_crank_hits["crank"] == 1]
|
|
||||||
|
|
||||||
if previous_crank_hits.empty:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return previous_crank_hits.index[-1]
|
|
||||||
|
|
||||||
|
|
||||||
def find_next_crank(df: pd.DataFrame, time_us: int) -> int | None:
|
|
||||||
next_crank_hits = df.loc[time_us + 1 :]
|
|
||||||
next_crank_hits = next_crank_hits[next_crank_hits["crank"] == 1]
|
|
||||||
|
|
||||||
if next_crank_hits.empty:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return next_crank_hits.index[0]
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def filter_data(file: Path) -> pd.DataFrame:
|
def filter_data(file: Path) -> pd.DataFrame:
|
||||||
df = pd.read_csv(file).set_index("time_us", drop=False)
|
df = pd.read_csv(file, usecols=["time_us", "crank", "cam"])
|
||||||
rows = []
|
|
||||||
last_crank = -1
|
|
||||||
last_crank_delta = -1
|
|
||||||
previous_crank = -1
|
|
||||||
last_cam = -1
|
|
||||||
cam_flag = 0
|
|
||||||
crank_flag = False
|
|
||||||
|
|
||||||
for _, row in tqdm(df.iterrows(), total=len(df), desc="Derivative"):
|
crank_df = df.loc[df["crank"] == 1, ["time_us"]].copy()
|
||||||
time_us: int = row["time_us"]
|
|
||||||
crank: int = row["crank"]
|
crank_df["d1"] = crank_df["time_us"].diff()
|
||||||
cam: int = row["cam"]
|
crank_df["prev_d1"] = crank_df["d1"].shift(1)
|
||||||
c1 = 0
|
crank_df["d2"] = crank_df["d1"] - crank_df["prev_d1"]
|
||||||
c2 = 0
|
crank_df["ratio"] = crank_df["d2"] / crank_df["d1"]
|
||||||
if crank==1:
|
|
||||||
d1 = time_us-c1
|
crank_df = crank_df.dropna(subset=["d1", "d2", "ratio"])
|
||||||
d2 = d1-(c1-c2)
|
|
||||||
if crank_flag:
|
return crank_df[["time_us", "d1", "d2", "ratio"]]
|
||||||
rows.append({
|
|
||||||
"time_us": time_us,
|
|
||||||
"d1": d1,
|
|
||||||
"d2": d2,
|
|
||||||
"ratio": d2/d1
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
crank_flag = True
|
|
||||||
c2=c1
|
|
||||||
c1=time_us
|
|
||||||
output = pd.DataFrame(rows)
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
def main() -> None:
|
||||||
parser.add_argument("directory", type=Path, help="Source data directory")
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("directory", type=Path, help="Source data directory")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
args = parser.parse_args()
|
directory: Path = args.directory
|
||||||
|
|
||||||
directory: Path = args.directory
|
if not directory.is_dir():
|
||||||
|
parser.error(f"{directory} is not a valid directory")
|
||||||
|
|
||||||
if not directory.is_dir():
|
print(f"Processing data in: {directory}")
|
||||||
parser.error(f"{directory} is not a valid directory")
|
|
||||||
|
|
||||||
print(f"Processing data in: {directory}")
|
concat_files: list[Path] = []
|
||||||
|
|
||||||
concat_files: list[Path] = []
|
for path in directory.glob("*.csv"):
|
||||||
|
stem = path.stem
|
||||||
|
|
||||||
for path in directory.glob("*.csv"):
|
try:
|
||||||
stem = path.stem
|
base_name, channel = stem.rsplit("_", 1)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Skipping badly named file: {path}")
|
||||||
|
continue
|
||||||
|
|
||||||
try:
|
if channel != "trimmed":
|
||||||
base_name, channel = stem.rsplit("_", 1)
|
print(f"Skipping unknown file: {path}")
|
||||||
except ValueError:
|
continue
|
||||||
print(f"Skipping badly named file: {path}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
if channel != "trimmed":
|
concat_files.append(path)
|
||||||
print(f"Skipping unknown file: {path}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
concat_files.append(path)
|
for file in tqdm(concat_files, desc="Files"):
|
||||||
|
base_name, _ = file.stem.rsplit("_", 1)
|
||||||
|
output = file.parent / f"{base_name}_derivative.csv"
|
||||||
|
|
||||||
for file in concat_files:
|
out_df = filter_data(file)
|
||||||
base_name, _ = file.stem.rsplit("_", 1)
|
out_df.to_csv(output, index=False)
|
||||||
output = file.parent / f"{base_name}_derivative.csv"
|
|
||||||
out_df = filter_data(file)
|
|
||||||
out_df.to_csv(output)
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|||||||
Reference in New Issue
Block a user