diff options
author | Justin Worthe <justin@worthe-it.co.za> | 2017-11-29 22:12:42 +0200 |
---|---|---|
committer | Justin Worthe <justin@worthe-it.co.za> | 2017-11-29 22:12:42 +0200 |
commit | 56b731a651e96d1caa7d246b1ae8cd555b23536a (patch) | |
tree | 3cd14a0174bfee765bca337b1b5851eaa641b319 | |
parent | 817d06fc2ef4f8e258d906fad96c26f72b32c702 (diff) |
Added cleaning up of funny descriptions
Mostly branches and dates messing with import matching
-rw-r--r-- | Cargo.lock | 91 | ||||
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | src/lib.rs | 4 | ||||
-rw-r--r-- | src/qif.rs | 51 |
4 files changed, 142 insertions, 6 deletions
@@ -1,4 +1,95 @@ [root] name = "qif_parser" version = "0.1.0" +dependencies = [ + "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] +[[package]] +name = "aho-corasick" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "lazy_static" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.33" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "thread_local" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" +"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" +"checksum lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c8f31047daa365f19be14b47c29df4f7c3b581832407daabe6ae77397619237d" +"checksum libc 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "5ba3df4dcb460b9dfbd070d41c94c19209620c191b0340b929ce748a2bcd42d2" +"checksum memchr 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a" +"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b" +"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" +"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" +"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" +"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" @@ -4,3 +4,5 @@ version = "0.1.0" authors = ["Justin Worthe <justin@worthe-it.co.za>"] [dependencies] +regex = "0.2.2" +lazy_static = "1.0"
\ No newline at end of file @@ -1 +1,5 @@ +extern crate regex; + +#[macro_use] extern crate lazy_static; + pub mod qif; @@ -1,4 +1,5 @@ use std::fmt; +use regex::Regex; pub struct QifFile { header: String, @@ -54,23 +55,61 @@ impl QifEntry { let amount = lines.iter().find(|l| l.starts_with(AMOUNT_PREFIX)).expect("No amount"); let description = lines.iter().find(|l| l.starts_with(DESCRIPTION_PREFIX)).expect("No description"); QifEntry { - date: date.clone(), - amount: amount.clone(), - description: description.clone() + date: date.chars().skip(1).collect(), + amount: amount.chars().skip(1).collect(), + description: description.chars().skip(1).collect() } } pub fn is_empty(&self) -> bool { - self.amount == String::from("T0") + self.amount == String::from("0") } pub fn clean_description(&self) -> String { - self.description.clone() + let replaced_date = replace_date(&self.description); + replace_common(&replaced_date) } } impl fmt::Display for QifEntry { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}\n{}\n{}", self.date, self.amount, self.clean_description()) + write!(f, "{}{}\n{}{}\n{}{}", + DATE_PREFIX, self.date, + AMOUNT_PREFIX, self.amount, + DESCRIPTION_PREFIX, self.clean_description() + ) } } + +fn replace_date(text: &str) -> String { + lazy_static! { + static ref DATE_REGEX: Regex = Regex::new(r"(?i)(\d{2} )?(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)( \d{4})?").unwrap(); + } + DATE_REGEX.replace_all(text, "").trim().to_string() +} + +fn replace_common(text: &str) -> String { + lazy_static! { + static ref COMMON_NAMES: Vec<(Regex, &'static str)> = vec!( + (Regex::new(r"(?i)(pick n pay|pnp)").unwrap(), "Pick n Pay"), + (Regex::new(r"(?i)checkers").unwrap(), "Checkers"), + (Regex::new(r"(?i)WOOLWORTHS").unwrap(), "Woolworths"), + (Regex::new(r"(?i)spar").unwrap(), "Spar"), + (Regex::new(r"(?i)Crazy store").unwrap(), "Crazy Store"), + (Regex::new(r"^PNA").unwrap(), "PNA"), + (Regex::new(r"(?i)sahl").unwrap(), "SA Home Loans"), + (Regex::new(r"(?i)gautrain").unwrap(), "Gautrain"), + (Regex::new(r"(?i)BANK YOUR CHANGE DEBI").unwrap(), "TO SAVINGS POCKET"), + (Regex::new(r"(?i)AFRIHOST").unwrap(), "Afrihost"), + (Regex::new(r"(?i)DIALDIRECT").unwrap(), "Dialdirect"), + (Regex::new(r"(?i)STEERS").unwrap(), "Steers"), + (Regex::new(r"(?i)CELL C").unwrap(), "Cell C"), + (Regex::new(r"(?i)ELECTRICITY").unwrap(), "Electricity"), + (Regex::new(r"(?i)(COUNTRY VIEW|STAR STOP|Shell|Sasol)").unwrap(), "Petrol"), + ); + } + COMMON_NAMES.iter().fold( + text, |acc, next| if next.0.is_match(acc) {next.1} else {acc} + ).to_string() +} + |