feature: Improved CSV Parsing
Gitea Scan/plate-tool/pipeline/head This commit looks good Details

1. Superior field detection: short of having actual typos, most ways to
   express a field should now be properly registered. Further, it will
   be considerably easier to add new variants.
2. Numeric well parsing: some systems do not use alphanumeric wells like
   H12 or E7. Purely numeric wells will now be supported but only if the
   plate format is manually specified; this feels like a good tradeoff
   since a failed detection would yield very odd behaviour to a user.
This commit is contained in:
Emilia Allison 2024-02-16 20:53:20 -05:00
parent e546fa354e
commit f7f492b70e
Signed by: emilia
GPG Key ID: 05D5D1107E5100A1
6 changed files with 169 additions and 30 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "plate-tool-lib"
version = "0.3.0"
version = "0.3.1"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -1,7 +1,7 @@
use crate::transfer::Transfer;
use crate::util::*;
use super::TransferRecord;
use super::{TransferRecord, transfer_record::TransferRecordDeserializeIntermediate, mangle_headers::mangle_headers};
use lazy_static::lazy_static;
use regex::Regex;
use serde::{Deserialize, Serialize};
@ -47,29 +47,28 @@ pub fn records_to_csv(trs: Vec<TransferRecord>) -> Result<String, Box<dyn Error>
pub fn string_well_to_pt(input: &str) -> Option<(u8, u8)> {
lazy_static! {
static ref REGEX: Regex = Regex::new(r"([A-Z,a-z]+)(\d+)").unwrap();
static ref REGEX_ALT: Regex = Regex::new(r"(\d+)").unwrap();
}
if let Some(c1) = REGEX.captures(input) {
if let (Some(row), Some(col)) = (letters_to_num(&c1[1]), c1[2].parse::<u8>().ok()) {
Some((row, col))
return Some((row, col))
} else {
None
return None
}
} else {
None
}
None
}
pub fn read_csv(data: &str) -> Vec<TransferRecord> {
let (header, data) = data.split_at(data.find('\n').unwrap());
let modified: String = header.to_lowercase() + data;
let modified = mangle_headers(data);
let mut rdr = csv::Reader::from_reader(modified.as_bytes());
let mut records: Vec<TransferRecord> = Vec::new();
for record in rdr.deserialize::<TransferRecord>() {
for record in rdr.deserialize::<TransferRecordDeserializeIntermediate>() {
match record {
Ok(r) => {
//log::debug!("{:?}", r);
records.push(r);
if !r.is_empty() {
records.push(r.into());
}
}
Err(e) => {
log::debug!("{:?}", e);

View File

@ -0,0 +1,58 @@
pub fn mangle_headers(data: &str) -> String {
let (header, rows) = data.split_at(data.find('\n').unwrap());
let fields = header.trim().split(",");
let mut modified_headers: Vec<String> = Vec::new();
for field in fields {
if let Some(f) = detect_field(field) {
modified_headers.push(f.to_string());
}
}
modified_headers.join(",") + "\n" + rows
}
fn detect_field(field: &str) -> Option<Field> {
match field.trim().to_lowercase() {
x if x.contains("source") || x.contains("src") => match x {
_ if x.contains("plate") => Some(Field::SourcePlate),
_ if x.contains("well") => Some(Field::SourceWell),
_ if x.contains("format") || x.contains("fmt") => Some(Field::SourceFormat),
_ => None,
},
x if x.contains("destination") || x.contains("dest") => match x {
_ if x.contains("plate") => Some(Field::DestinationPlate),
_ if x.contains("well") => Some(Field::DestinationWell),
_ if x.contains("format") || x.contains("fmt") => Some(Field::DestinationFormat),
_ => None,
},
x if x.contains("volume") => Some(Field::Volume),
x if x.contains("concentration") => Some(Field::Concentration),
_ => None,
}
}
enum Field {
SourcePlate,
DestinationPlate,
SourceWell,
DestinationWell,
SourceFormat,
DestinationFormat,
Volume,
Concentration,
}
impl ToString for Field {
fn to_string(&self) -> String {
match self {
Field::SourcePlate => "sourceplate".to_string(),
Field::DestinationPlate => "destinationplate".to_string(),
Field::SourceWell => "sourcewell".to_string(),
Field::DestinationWell => "destinationwell".to_string(),
Field::SourceFormat => "sourceformat".to_string(),
Field::DestinationFormat => "destinationformat".to_string(),
Field::Volume => "volume".to_string(),
Field::Concentration => "concentration".to_string(),
}
}
}

View File

@ -1,6 +1,7 @@
mod transfer_record;
mod conversion;
mod auto;
mod mangle_headers;
pub use transfer_record::volume_default;
pub use transfer_record::TransferRecord;

View File

@ -1,33 +1,100 @@
use serde::{Deserialize, Serialize};
use crate::transfer::Transfer;
use crate::{plate::PlateFormat, transfer::Transfer, util::num_to_letters};
#[derive(Serialize, Deserialize, Debug, Clone)]
#[derive(Serialize, Debug, Clone)]
pub struct TransferRecord {
#[serde(rename = "Source Plate", alias = "source plate", alias = "src plate")]
#[serde(rename = "Source Plate")]
pub source_plate: String,
#[serde(rename = "Source Well", alias = "source well", alias = "src well")]
#[serde(rename = "Source Well")]
pub source_well: String,
#[serde(
rename = "Dest Plate",
alias = "dest plate",
alias = "destination plate"
)]
#[serde(rename = "Dest Plate")]
pub destination_plate: String,
#[serde(
rename = "Destination Well",
alias = "destination well",
alias = "dest well"
)]
#[serde(rename = "Destination Well")]
pub destination_well: String,
#[serde(rename = "Transfer Volume", alias = "transfer volume")]
#[serde(default = "volume_default")]
#[serde(rename = "Transfer Volume")]
pub volume: f32,
#[serde(rename = "Concentration", alias = "concentration")]
#[serde(rename = "Concentration")]
pub concentration: Option<f32>,
}
#[derive(Deserialize, Debug, Clone)]
pub struct TransferRecordDeserializeIntermediate {
#[serde(rename = "sourceplate")]
source_plate: String,
#[serde(rename = "destinationplate")]
destination_plate: String,
#[serde(rename = "sourcewell")]
source_well: String,
#[serde(rename = "sourceformat")]
source_format: Option<String>,
#[serde(rename = "destinationwell")]
destination_well: String,
#[serde(rename = "destinationformat")]
destination_format: Option<String>,
#[serde(rename = "volume")]
volume: Option<f32>,
#[serde(rename = "concentration")]
concentration: Option<f32>,
}
impl From<TransferRecordDeserializeIntermediate> for TransferRecord {
fn from(value: TransferRecordDeserializeIntermediate) -> Self {
let mut source_well: String = value.source_well;
if let Some(pformat) = value
.source_format
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
{
if let Ok(well_number) = source_well.parse::<u16>() {
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
source_well = alphanumeric;
}
}
}
let mut destination_well: String = value.destination_well;
if let Some(pformat) = value
.destination_format
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
{
if let Ok(well_number) = destination_well.parse::<u16>() {
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
destination_well = alphanumeric;
}
}
}
let volume = value.volume.unwrap_or(volume_default());
TransferRecord {
source_plate: value.source_plate,
destination_plate: value.destination_plate,
source_well,
destination_well,
volume,
concentration: value.concentration,
}
}
}
impl TransferRecordDeserializeIntermediate {
pub fn is_empty(&self) -> bool {
self.source_plate.is_empty()
|| self.destination_plate.is_empty()
|| self.source_well.is_empty()
|| self.destination_well.is_empty()
}
}
fn numeric_well_to_alphanumeric(input: u16, pformat: PlateFormat) -> Option<String> {
let column_height: u16 = pformat.size().0 as u16;
let column = input.div_ceil(column_height);
let row = input % column_height;
let row_str = num_to_letters(row as u8)?;
Some(format!("{}{}", row_str, column))
}
pub fn volume_default() -> f32 {
Transfer::default().volume
}

View File

@ -77,6 +77,20 @@ impl TryFrom<&str> for PlateFormat {
}
}
}
impl From<&PlateFormat> for u16 {
fn from(value: &PlateFormat) -> Self {
match value {
PlateFormat::W6 => 6,
PlateFormat::W12 => 12,
PlateFormat::W24 => 24,
PlateFormat::W48 => 48,
PlateFormat::W96 => 96,
PlateFormat::W384 => 384,
PlateFormat::W1536 => 1536,
PlateFormat::W3456 => 3456,
}
}
}
impl PlateFormat {
pub fn size(&self) -> (u8, u8) {