Data processing chain V1
This commit is contained in:
@@ -1,3 +1,7 @@
|
|||||||
|
# Copyright (C) 2026 Hector van der Aa <hector@h3cx.dev>
|
||||||
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
||||||
|
|||||||
@@ -1,4 +1,8 @@
|
|||||||
from threading import Thread
|
# Copyright (C) 2026 Hector van der Aa <hector@h3cx.dev>
|
||||||
|
# Copyright (C) 2026 Pierre Barbier <pierrebarbier741@gmail.com>
|
||||||
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import argparse
|
import argparse
|
||||||
@@ -44,9 +48,11 @@ def handle_dedupe(df: pd.DataFrame, time_a: int, time_b: int) -> int:
|
|||||||
|
|
||||||
if delta_a_diff < delta_b_diff:
|
if delta_a_diff < delta_b_diff:
|
||||||
df.loc[time_b, "crank"] = 0
|
df.loc[time_b, "crank"] = 0
|
||||||
|
df.loc[time_b - 1000 : time_b + 1000, "cam"] = 0
|
||||||
return time_a
|
return time_a
|
||||||
else:
|
else:
|
||||||
df.loc[time_a, "crank"] = 0
|
df.loc[time_a, "crank"] = 0
|
||||||
|
df.loc[time_a - 1000 : time_a + 1000, "cam"] = 0
|
||||||
return time_b
|
return time_b
|
||||||
|
|
||||||
|
|
||||||
@@ -57,13 +63,16 @@ def filter_data(file: Path) -> pd.DataFrame:
|
|||||||
last_crank_delta = -1
|
last_crank_delta = -1
|
||||||
previous_crank = -1
|
previous_crank = -1
|
||||||
last_cam = -1
|
last_cam = -1
|
||||||
last_cam_delta = -1
|
cam_flag = 0
|
||||||
|
crank_flag = 0
|
||||||
|
|
||||||
for _, row in tqdm(df.iterrows(), total=len(df)):
|
for _, row in tqdm(df.iterrows(), total=len(df), desc="Filter pass 1"):
|
||||||
time_us: int = row["time_us"]
|
time_us: int = row["time_us"]
|
||||||
crank: int = row["crank"]
|
crank: int = row["crank"]
|
||||||
cam: int = row["cam"]
|
cam: int = row["cam"]
|
||||||
if crank == 1:
|
if crank == 1:
|
||||||
|
crank_flag = 1
|
||||||
|
cam_flag = 0
|
||||||
if last_crank != -1:
|
if last_crank != -1:
|
||||||
delta = time_us - last_crank
|
delta = time_us - last_crank
|
||||||
if (
|
if (
|
||||||
@@ -88,8 +97,11 @@ def filter_data(file: Path) -> pd.DataFrame:
|
|||||||
last_crank = time_us
|
last_crank = time_us
|
||||||
|
|
||||||
if cam == 1:
|
if cam == 1:
|
||||||
if last_cam != -1:
|
if crank_flag == 1:
|
||||||
last_cam_delta = time_us - last_cam
|
if cam_flag == 0:
|
||||||
|
cam_flag = 1
|
||||||
|
else:
|
||||||
|
df.loc[time_us, "cam"] = 0
|
||||||
|
|
||||||
last_cam = time_us
|
last_cam = time_us
|
||||||
|
|
||||||
@@ -125,8 +137,6 @@ for path in directory.glob("*.csv"):
|
|||||||
|
|
||||||
concat_files.append(path)
|
concat_files.append(path)
|
||||||
|
|
||||||
threads: list[Thread] = []
|
|
||||||
|
|
||||||
for file in concat_files:
|
for file in concat_files:
|
||||||
base_name, _ = file.stem.rsplit("_", 1)
|
base_name, _ = file.stem.rsplit("_", 1)
|
||||||
output = file.parent / f"{base_name}_dedupe.csv"
|
output = file.parent / f"{base_name}_dedupe.csv"
|
||||||
97
src/filter_2.py
Normal file
97
src/filter_2.py
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
# Copyright (C) 2026 Hector van der Aa <hector@h3cx.dev>
|
||||||
|
# Copyright (C) 2026 Pierre Barbier <pierrebarbier741@gmail.com>
|
||||||
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
import argparse
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def filter_data(file: Path) -> pd.DataFrame:
|
||||||
|
df = pd.read_csv(file).set_index("time_us", drop=False)
|
||||||
|
|
||||||
|
last_crank = -1
|
||||||
|
last_crank_delta = -1
|
||||||
|
last_cam = -1
|
||||||
|
cam_flag = 0
|
||||||
|
crank_flag = 0
|
||||||
|
cam_proportion = 0
|
||||||
|
crank_ctr = 0
|
||||||
|
insert_active = False
|
||||||
|
|
||||||
|
for _, row in tqdm(df.iterrows(), total=len(df), desc="Filter pass 2"):
|
||||||
|
# for _, row in df.iterrows():
|
||||||
|
time_us: int = row["time_us"]
|
||||||
|
crank: int = row["crank"]
|
||||||
|
cam: int = row["cam"]
|
||||||
|
if crank == 1:
|
||||||
|
crank_flag = 1
|
||||||
|
last_crank_delta = time_us - last_crank
|
||||||
|
if cam_flag == 1:
|
||||||
|
cam_proportion = (last_cam - last_crank) / last_crank_delta
|
||||||
|
if insert_active:
|
||||||
|
crank_ctr += 1
|
||||||
|
if crank_ctr >= 5:
|
||||||
|
insert_time = last_crank + int(
|
||||||
|
round(cam_proportion * last_crank_delta)
|
||||||
|
)
|
||||||
|
df.loc[
|
||||||
|
insert_time,
|
||||||
|
"cam",
|
||||||
|
] = 1
|
||||||
|
print(
|
||||||
|
f"Inserted cam at: {insert_time}\nLast cam at: {last_cam}\nLast crank at: {last_crank}\nCurrent time: {time_us}\n"
|
||||||
|
)
|
||||||
|
cam = 1
|
||||||
|
last_cam = insert_time
|
||||||
|
crank_ctr = 0
|
||||||
|
cam_flag = 0
|
||||||
|
last_crank = time_us
|
||||||
|
|
||||||
|
if cam == 1:
|
||||||
|
insert_active = True
|
||||||
|
crank_ctr = 0
|
||||||
|
if crank_flag == 1 and cam_flag == 0:
|
||||||
|
cam_flag = 1
|
||||||
|
|
||||||
|
last_cam = time_us
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("directory", type=Path, help="Source data directory")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
directory: Path = args.directory
|
||||||
|
|
||||||
|
if not directory.is_dir():
|
||||||
|
parser.error(f"{directory} is not a valid directory")
|
||||||
|
|
||||||
|
print(f"Processing data in: {directory}")
|
||||||
|
|
||||||
|
concat_files: list[Path] = []
|
||||||
|
|
||||||
|
for path in directory.glob("*.csv"):
|
||||||
|
stem = path.stem
|
||||||
|
|
||||||
|
try:
|
||||||
|
base_name, channel = stem.rsplit("_", 1)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Skipping badly named file: {path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if channel != "dedupe":
|
||||||
|
print(f"Skipping unknown file: {path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
concat_files.append(path)
|
||||||
|
|
||||||
|
for file in concat_files:
|
||||||
|
base_name, _ = file.stem.rsplit("_", 1)
|
||||||
|
output = file.parent / f"{base_name}_rebuilt.csv"
|
||||||
|
out_df = filter_data(file)
|
||||||
|
out_df.to_csv(output)
|
||||||
@@ -1,3 +1,7 @@
|
|||||||
|
# Copyright (C) 2026 Hector van der Aa <hector@h3cx.dev>
|
||||||
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
import matplotlib.pyplot as plt
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|||||||
58
src/trim.py
Normal file
58
src/trim.py
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# Copyright (C) 2026 Hector van der Aa <hector@h3cx.dev>
|
||||||
|
# Copyright (C) 2026 Pierre Barbier <pierrebarbier741@gmail.com>
|
||||||
|
# Copyright (C) 2026 Association Exergie <association.exergie@gmail.com>
|
||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
from pathlib import Path
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("directory", type=Path, help="Source data directory")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
directory: Path = args.directory
|
||||||
|
|
||||||
|
if not directory.is_dir():
|
||||||
|
parser.error(f"{directory} is not a valid directory")
|
||||||
|
|
||||||
|
print(f"Processing data in: {directory}")
|
||||||
|
|
||||||
|
files: list[Path] = []
|
||||||
|
|
||||||
|
for path in directory.glob("*.csv"):
|
||||||
|
stem = path.stem
|
||||||
|
|
||||||
|
try:
|
||||||
|
base_name, channel = stem.rsplit("_", 1)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Skipping badly named file: {path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if channel != "rebuilt":
|
||||||
|
print(f"Skipping unknown file: {path}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
files.append(path)
|
||||||
|
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
print(f"Processing {file.name}")
|
||||||
|
subprocess.run([sys.executable, "src/plot.py", f"{file}"], check=True)
|
||||||
|
start_s = float(input("Start time:"))
|
||||||
|
end_s = float(input("End time:"))
|
||||||
|
|
||||||
|
start_us = int(start_s * 1_000_000)
|
||||||
|
end_us = int(end_s * 1_000_000)
|
||||||
|
|
||||||
|
df = pd.read_csv(file).set_index("time_us", drop=False)
|
||||||
|
df_trimmed: pd.DataFrame = df.loc[start_us:end_us].copy()
|
||||||
|
df_trimmed["time_us"] = df_trimmed["time_us"] - start_us
|
||||||
|
df_trimmed.index = df_trimmed.index - start_us
|
||||||
|
|
||||||
|
base_name, _ = file.stem.rsplit("_", 1)
|
||||||
|
output = file.parent / f"{base_name}_trimmed.csv"
|
||||||
|
df_trimmed.to_csv(output)
|
||||||
Reference in New Issue
Block a user