From 18300f4e87be58e5c42c34ec81eed38b4bae4d91 Mon Sep 17 00:00:00 2001 From: Justin Wernick Date: Mon, 20 Mar 2023 14:04:17 +0200 Subject: Update the rules --- src/qif.rs | 95 +++++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/src/qif.rs b/src/qif.rs index 9b46af0..de36bb7 100644 --- a/src/qif.rs +++ b/src/qif.rs @@ -70,8 +70,16 @@ impl QifEntry { } pub fn clean_description(&self) -> String { - let replaced_date = replace_date(&self.description); - replace_common(&replaced_date) + let mut new_description = self.description.clone(); + for algorithm in [ + remove_date, + remove_card_number, + replace_common, + remove_payment_provider, + ] { + new_description = algorithm(&new_description); + } + new_description } } @@ -90,49 +98,72 @@ impl fmt::Display for QifEntry { } } -fn replace_date(text: &str) -> String { +fn remove_date(text: &str) -> String { lazy_static! { static ref DATE_REGEX: Regex = - Regex::new(r"(?i)(\d{2} )?(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)( \d{4})?") + Regex::new(r"(?i)\d{2} (JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)( \d{4})?") .unwrap(); } DATE_REGEX.replace_all(text, "").trim().to_string() } +fn remove_card_number(text: &str) -> String { + lazy_static! { + static ref CARD_NUM_REGEX: Regex = Regex::new(r"\d{6}\*+\d{4}").unwrap(); + } + CARD_NUM_REGEX.replace_all(text, "").trim().to_string() +} + +fn remove_payment_provider(text: &str) -> String { + lazy_static! { + static ref PURCH_REGEX: Regex = Regex::new(r"(?i)^(purch( payfast\*)?|pp \*)").unwrap(); + } + PURCH_REGEX.replace_all(text, "").trim().to_string() +} + fn replace_common(text: &str) -> String { lazy_static! { static ref COMMON_NAMES: Vec<(Regex, &'static str)> = vec!( - (Regex::new(r"(?i)(pick n pay|pnp)").unwrap(), "Pick n Pay"), - (Regex::new(r"(?i)checkers").unwrap(), "Checkers"), - (Regex::new(r"(?i)WOOLWORTHS").unwrap(), "Woolworths"), - (Regex::new(r"(?i)clicks").unwrap(), "Clicks"), - (Regex::new(r"(?i)spar").unwrap(), "Spar"), - (Regex::new(r"(?i)Crazy store").unwrap(), "Crazy Store"), - (Regex::new(r"^PNA").unwrap(), "PNA"), - (Regex::new(r"(?i)sahl").unwrap(), "SA Home Loans"), - (Regex::new(r"(?i)gautrain").unwrap(), "Gautrain"), - ( - Regex::new(r"(?i)BANK YOUR CHANGE DEBI").unwrap(), - "TO SAVINGS POCKET" - ), - (Regex::new(r"(?i)AFRIHOST").unwrap(), "Afrihost"), - (Regex::new(r"(?i)DIALDIRECT").unwrap(), "Dialdirect"), - (Regex::new(r"(?i)STEERS").unwrap(), "Steers"), - (Regex::new(r"(?i)CELL C").unwrap(), "Cell C"), - (Regex::new(r"(?i)ELECTRICITY").unwrap(), "Electricity"), - ( - Regex::new(r"(?i)(COUNTRY VIEW|STAR STOP|Shell|Sasol)").unwrap(), - "Petrol" - ), - (Regex::new(r"(?i)kung ?-?fu").unwrap(), "Kungfu Kitchen"), - ); + (r"(?i)(pick n pay|pnp)", "Pick n Pay"), + (r"(?i)checkers", "Checkers"), + (r"(?i)WOOLWORTHS", "Woolworths"), + (r"(?i)clicks", "Clicks"), + (r"(?i)spar", "Spar"), + (r"(?i)(disc memb|disc prem)", "Discovery medical aid"), + (r"(?i)10XRA", "10X Retirement Annuity"), + (r"(?i)NEDABF/MFC", "Nedbank MFC"), + (r"(?i)SARSEFLNG", "SARS Efiling"), + (r"(?i)Crazy store", "Crazy Store"), + (r"^PNA", "PNA"), + (r"^BWH", "Builders Warehouse"), + (r"^MCD ", "McDonalds"), + (r"NakedIn", "Naked Insurance"), + (r"(?i)sahl", "SA Home Loans"), + (r"(?i)gautrain", "Gautrain"), + (r"(?i)BYC DEBIT", "TO SAVINGS POCKET"), + (r"(?i)AFRIHOST", "Afrihost"), + (r"(?i)DIALDIRECT", "Dialdirect"), + (r"(?i)STEERS", "Steers"), + (r"(?i)CELL C", "Cell C"), + (r"(?i)ELECTRICITY", "Electricity"), + (r"(?i)(COUNTRY VIEW|STAR STOP|Shell|Sasol|Engen)", "Petrol"), + (r"(?i)kung ?-?fu", "Kungfu Kitchen"), + ) + .into_iter() + .map(|(from, to)| (Regex::new(from).unwrap(), to)) + .collect(); } COMMON_NAMES .iter() - .fold( - text, - |acc, next| if next.0.is_match(acc) { next.1 } else { acc }, - ) + .filter_map(|(rule, replacement)| { + if rule.is_match(text) { + Some(replacement.to_string()) + } else { + None + } + }) + .next() + .unwrap_or_else(|| text.to_owned()) .to_string() } -- cgit v1.2.3