diff options
author | Justin Wernick <justin@worthe-it.co.za> | 2023-03-20 14:04:17 +0200 |
---|---|---|
committer | Justin Wernick <justin@worthe-it.co.za> | 2023-03-20 14:33:42 +0200 |
commit | 18300f4e87be58e5c42c34ec81eed38b4bae4d91 (patch) | |
tree | 22eac5766bf36f15eb1c3105ad0ddf8f28346d22 | |
parent | fef39650efa65b4033b262e440d0f85999602412 (diff) |
Update the rules
-rw-r--r-- | src/qif.rs | 95 |
1 files changed, 63 insertions, 32 deletions
@@ -70,8 +70,16 @@ impl QifEntry { } pub fn clean_description(&self) -> String { - let replaced_date = replace_date(&self.description); - replace_common(&replaced_date) + let mut new_description = self.description.clone(); + for algorithm in [ + remove_date, + remove_card_number, + replace_common, + remove_payment_provider, + ] { + new_description = algorithm(&new_description); + } + new_description } } @@ -90,49 +98,72 @@ impl fmt::Display for QifEntry { } } -fn replace_date(text: &str) -> String { +fn remove_date(text: &str) -> String { lazy_static! { static ref DATE_REGEX: Regex = - Regex::new(r"(?i)(\d{2} )?(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)( \d{4})?") + Regex::new(r"(?i)\d{2} (JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)( \d{4})?") .unwrap(); } DATE_REGEX.replace_all(text, "").trim().to_string() } +fn remove_card_number(text: &str) -> String { + lazy_static! { + static ref CARD_NUM_REGEX: Regex = Regex::new(r"\d{6}\*+\d{4}").unwrap(); + } + CARD_NUM_REGEX.replace_all(text, "").trim().to_string() +} + +fn remove_payment_provider(text: &str) -> String { + lazy_static! { + static ref PURCH_REGEX: Regex = Regex::new(r"(?i)^(purch( payfast\*)?|pp \*)").unwrap(); + } + PURCH_REGEX.replace_all(text, "").trim().to_string() +} + fn replace_common(text: &str) -> String { lazy_static! { static ref COMMON_NAMES: Vec<(Regex, &'static str)> = vec!( - (Regex::new(r"(?i)(pick n pay|pnp)").unwrap(), "Pick n Pay"), - (Regex::new(r"(?i)checkers").unwrap(), "Checkers"), - (Regex::new(r"(?i)WOOLWORTHS").unwrap(), "Woolworths"), - (Regex::new(r"(?i)clicks").unwrap(), "Clicks"), - (Regex::new(r"(?i)spar").unwrap(), "Spar"), - (Regex::new(r"(?i)Crazy store").unwrap(), "Crazy Store"), - (Regex::new(r"^PNA").unwrap(), "PNA"), - (Regex::new(r"(?i)sahl").unwrap(), "SA Home Loans"), - (Regex::new(r"(?i)gautrain").unwrap(), "Gautrain"), - ( - Regex::new(r"(?i)BANK YOUR CHANGE DEBI").unwrap(), - "TO SAVINGS POCKET" - ), - (Regex::new(r"(?i)AFRIHOST").unwrap(), "Afrihost"), - (Regex::new(r"(?i)DIALDIRECT").unwrap(), "Dialdirect"), - (Regex::new(r"(?i)STEERS").unwrap(), "Steers"), - (Regex::new(r"(?i)CELL C").unwrap(), "Cell C"), - (Regex::new(r"(?i)ELECTRICITY").unwrap(), "Electricity"), - ( - Regex::new(r"(?i)(COUNTRY VIEW|STAR STOP|Shell|Sasol)").unwrap(), - "Petrol" - ), - (Regex::new(r"(?i)kung ?-?fu").unwrap(), "Kungfu Kitchen"), - ); + (r"(?i)(pick n pay|pnp)", "Pick n Pay"), + (r"(?i)checkers", "Checkers"), + (r"(?i)WOOLWORTHS", "Woolworths"), + (r"(?i)clicks", "Clicks"), + (r"(?i)spar", "Spar"), + (r"(?i)(disc memb|disc prem)", "Discovery medical aid"), + (r"(?i)10XRA", "10X Retirement Annuity"), + (r"(?i)NEDABF/MFC", "Nedbank MFC"), + (r"(?i)SARSEFLNG", "SARS Efiling"), + (r"(?i)Crazy store", "Crazy Store"), + (r"^PNA", "PNA"), + (r"^BWH", "Builders Warehouse"), + (r"^MCD ", "McDonalds"), + (r"NakedIn", "Naked Insurance"), + (r"(?i)sahl", "SA Home Loans"), + (r"(?i)gautrain", "Gautrain"), + (r"(?i)BYC DEBIT", "TO SAVINGS POCKET"), + (r"(?i)AFRIHOST", "Afrihost"), + (r"(?i)DIALDIRECT", "Dialdirect"), + (r"(?i)STEERS", "Steers"), + (r"(?i)CELL C", "Cell C"), + (r"(?i)ELECTRICITY", "Electricity"), + (r"(?i)(COUNTRY VIEW|STAR STOP|Shell|Sasol|Engen)", "Petrol"), + (r"(?i)kung ?-?fu", "Kungfu Kitchen"), + ) + .into_iter() + .map(|(from, to)| (Regex::new(from).unwrap(), to)) + .collect(); } COMMON_NAMES .iter() - .fold( - text, - |acc, next| if next.0.is_match(acc) { next.1 } else { acc }, - ) + .filter_map(|(rule, replacement)| { + if rule.is_match(text) { + Some(replacement.to_string()) + } else { + None + } + }) + .next() + .unwrap_or_else(|| text.to_owned()) .to_string() } |