import csv import re def extract_insulin_data(input_file, output_file): with open(input_file, "r", encoding="utf-8") as file: lines = file.readlines() data = [] food_description = [] capturing = False for i in range(len(lines)): # Check if line starts with a whole number followed by a period (e.g., "1.", "2.") if re.match(r"^\d+\.$", lines[i].strip()): food_description = [] # Reset food description capturing = True # Start capturing text if capturing: food_description.append(lines[i].strip()) # Stop capturing at "SINGLE" or "MIXED MEAL" if "SINGLE" in lines[i] or "MIXED MEAL" in lines[i]: capturing = False if food_description: full_food_name = " ".join(food_description) # Merge lines into one, replacing new lines with spaces insulin_index = lines[i + 1].strip() # Ensure insulin index is numeric for sorting try: insulin_index_value = float(re.sub(r"[^\d.]", "", insulin_index)) # Remove non-numeric characters data.append((full_food_name, insulin_index_value)) except ValueError: pass # Skip if conversion fails # Sort data by insulin index (ascending order) data.sort(key=lambda x: x[1]) # Save results to CSV with open(output_file, "w", encoding="utf-8", newline="") as csv_file: writer = csv.writer(csv_file) writer.writerow(["Food Description", "Insulin Index"]) writer.writerows(data) # Usage example input_file = "all_text" # Update with your actual text file name output_file = "insulin_data_sorted.csv" extract_insulin_data(input_file, output_file)