feature: Improved CSV Parsing
Gitea Scan/plate-tool/pipeline/head This commit looks good
Details
Gitea Scan/plate-tool/pipeline/head This commit looks good
Details
1. Superior field detection: short of having actual typos, most ways to express a field should now be properly registered. Further, it will be considerably easier to add new variants. 2. Numeric well parsing: some systems do not use alphanumeric wells like H12 or E7. Purely numeric wells will now be supported but only if the plate format is manually specified; this feels like a good tradeoff since a failed detection would yield very odd behaviour to a user.
This commit is contained in:
parent
e546fa354e
commit
f7f492b70e
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "plate-tool-lib"
|
||||
version = "0.3.0"
|
||||
version = "0.3.1"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use crate::transfer::Transfer;
|
||||
use crate::util::*;
|
||||
|
||||
use super::TransferRecord;
|
||||
use super::{TransferRecord, transfer_record::TransferRecordDeserializeIntermediate, mangle_headers::mangle_headers};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
@ -47,29 +47,28 @@ pub fn records_to_csv(trs: Vec<TransferRecord>) -> Result<String, Box<dyn Error>
|
|||
pub fn string_well_to_pt(input: &str) -> Option<(u8, u8)> {
|
||||
lazy_static! {
|
||||
static ref REGEX: Regex = Regex::new(r"([A-Z,a-z]+)(\d+)").unwrap();
|
||||
static ref REGEX_ALT: Regex = Regex::new(r"(\d+)").unwrap();
|
||||
}
|
||||
if let Some(c1) = REGEX.captures(input) {
|
||||
if let (Some(row), Some(col)) = (letters_to_num(&c1[1]), c1[2].parse::<u8>().ok()) {
|
||||
Some((row, col))
|
||||
return Some((row, col))
|
||||
} else {
|
||||
None
|
||||
return None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn read_csv(data: &str) -> Vec<TransferRecord> {
|
||||
let (header, data) = data.split_at(data.find('\n').unwrap());
|
||||
let modified: String = header.to_lowercase() + data;
|
||||
|
||||
let modified = mangle_headers(data);
|
||||
let mut rdr = csv::Reader::from_reader(modified.as_bytes());
|
||||
let mut records: Vec<TransferRecord> = Vec::new();
|
||||
for record in rdr.deserialize::<TransferRecord>() {
|
||||
for record in rdr.deserialize::<TransferRecordDeserializeIntermediate>() {
|
||||
match record {
|
||||
Ok(r) => {
|
||||
//log::debug!("{:?}", r);
|
||||
records.push(r);
|
||||
if !r.is_empty() {
|
||||
records.push(r.into());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
log::debug!("{:?}", e);
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
pub fn mangle_headers(data: &str) -> String {
|
||||
let (header, rows) = data.split_at(data.find('\n').unwrap());
|
||||
let fields = header.trim().split(",");
|
||||
let mut modified_headers: Vec<String> = Vec::new();
|
||||
for field in fields {
|
||||
if let Some(f) = detect_field(field) {
|
||||
modified_headers.push(f.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
modified_headers.join(",") + "\n" + rows
|
||||
}
|
||||
|
||||
fn detect_field(field: &str) -> Option<Field> {
|
||||
match field.trim().to_lowercase() {
|
||||
x if x.contains("source") || x.contains("src") => match x {
|
||||
_ if x.contains("plate") => Some(Field::SourcePlate),
|
||||
_ if x.contains("well") => Some(Field::SourceWell),
|
||||
_ if x.contains("format") || x.contains("fmt") => Some(Field::SourceFormat),
|
||||
_ => None,
|
||||
},
|
||||
x if x.contains("destination") || x.contains("dest") => match x {
|
||||
_ if x.contains("plate") => Some(Field::DestinationPlate),
|
||||
_ if x.contains("well") => Some(Field::DestinationWell),
|
||||
_ if x.contains("format") || x.contains("fmt") => Some(Field::DestinationFormat),
|
||||
_ => None,
|
||||
},
|
||||
x if x.contains("volume") => Some(Field::Volume),
|
||||
x if x.contains("concentration") => Some(Field::Concentration),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
enum Field {
|
||||
SourcePlate,
|
||||
DestinationPlate,
|
||||
SourceWell,
|
||||
DestinationWell,
|
||||
SourceFormat,
|
||||
DestinationFormat,
|
||||
Volume,
|
||||
Concentration,
|
||||
}
|
||||
|
||||
impl ToString for Field {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Field::SourcePlate => "sourceplate".to_string(),
|
||||
Field::DestinationPlate => "destinationplate".to_string(),
|
||||
Field::SourceWell => "sourcewell".to_string(),
|
||||
Field::DestinationWell => "destinationwell".to_string(),
|
||||
Field::SourceFormat => "sourceformat".to_string(),
|
||||
Field::DestinationFormat => "destinationformat".to_string(),
|
||||
Field::Volume => "volume".to_string(),
|
||||
Field::Concentration => "concentration".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
mod transfer_record;
|
||||
mod conversion;
|
||||
mod auto;
|
||||
mod mangle_headers;
|
||||
|
||||
pub use transfer_record::volume_default;
|
||||
pub use transfer_record::TransferRecord;
|
||||
|
|
|
@ -1,33 +1,100 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::transfer::Transfer;
|
||||
use crate::{plate::PlateFormat, transfer::Transfer, util::num_to_letters};
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
#[derive(Serialize, Debug, Clone)]
|
||||
pub struct TransferRecord {
|
||||
#[serde(rename = "Source Plate", alias = "source plate", alias = "src plate")]
|
||||
#[serde(rename = "Source Plate")]
|
||||
pub source_plate: String,
|
||||
#[serde(rename = "Source Well", alias = "source well", alias = "src well")]
|
||||
#[serde(rename = "Source Well")]
|
||||
pub source_well: String,
|
||||
#[serde(
|
||||
rename = "Dest Plate",
|
||||
alias = "dest plate",
|
||||
alias = "destination plate"
|
||||
)]
|
||||
#[serde(rename = "Dest Plate")]
|
||||
pub destination_plate: String,
|
||||
#[serde(
|
||||
rename = "Destination Well",
|
||||
alias = "destination well",
|
||||
alias = "dest well"
|
||||
)]
|
||||
#[serde(rename = "Destination Well")]
|
||||
pub destination_well: String,
|
||||
#[serde(rename = "Transfer Volume", alias = "transfer volume")]
|
||||
#[serde(default = "volume_default")]
|
||||
#[serde(rename = "Transfer Volume")]
|
||||
pub volume: f32,
|
||||
#[serde(rename = "Concentration", alias = "concentration")]
|
||||
#[serde(rename = "Concentration")]
|
||||
pub concentration: Option<f32>,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, Debug, Clone)]
|
||||
pub struct TransferRecordDeserializeIntermediate {
|
||||
#[serde(rename = "sourceplate")]
|
||||
source_plate: String,
|
||||
#[serde(rename = "destinationplate")]
|
||||
destination_plate: String,
|
||||
#[serde(rename = "sourcewell")]
|
||||
source_well: String,
|
||||
#[serde(rename = "sourceformat")]
|
||||
source_format: Option<String>,
|
||||
#[serde(rename = "destinationwell")]
|
||||
destination_well: String,
|
||||
#[serde(rename = "destinationformat")]
|
||||
destination_format: Option<String>,
|
||||
#[serde(rename = "volume")]
|
||||
volume: Option<f32>,
|
||||
#[serde(rename = "concentration")]
|
||||
concentration: Option<f32>,
|
||||
}
|
||||
|
||||
impl From<TransferRecordDeserializeIntermediate> for TransferRecord {
|
||||
fn from(value: TransferRecordDeserializeIntermediate) -> Self {
|
||||
let mut source_well: String = value.source_well;
|
||||
if let Some(pformat) = value
|
||||
.source_format
|
||||
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
|
||||
{
|
||||
if let Ok(well_number) = source_well.parse::<u16>() {
|
||||
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
|
||||
source_well = alphanumeric;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut destination_well: String = value.destination_well;
|
||||
if let Some(pformat) = value
|
||||
.destination_format
|
||||
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
|
||||
{
|
||||
if let Ok(well_number) = destination_well.parse::<u16>() {
|
||||
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
|
||||
destination_well = alphanumeric;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let volume = value.volume.unwrap_or(volume_default());
|
||||
|
||||
TransferRecord {
|
||||
source_plate: value.source_plate,
|
||||
destination_plate: value.destination_plate,
|
||||
source_well,
|
||||
destination_well,
|
||||
volume,
|
||||
concentration: value.concentration,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TransferRecordDeserializeIntermediate {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.source_plate.is_empty()
|
||||
|| self.destination_plate.is_empty()
|
||||
|| self.source_well.is_empty()
|
||||
|| self.destination_well.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
fn numeric_well_to_alphanumeric(input: u16, pformat: PlateFormat) -> Option<String> {
|
||||
let column_height: u16 = pformat.size().0 as u16;
|
||||
let column = input.div_ceil(column_height);
|
||||
let row = input % column_height;
|
||||
let row_str = num_to_letters(row as u8)?;
|
||||
|
||||
Some(format!("{}{}", row_str, column))
|
||||
}
|
||||
|
||||
pub fn volume_default() -> f32 {
|
||||
Transfer::default().volume
|
||||
}
|
||||
|
||||
|
|
|
@ -77,6 +77,20 @@ impl TryFrom<&str> for PlateFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
impl From<&PlateFormat> for u16 {
|
||||
fn from(value: &PlateFormat) -> Self {
|
||||
match value {
|
||||
PlateFormat::W6 => 6,
|
||||
PlateFormat::W12 => 12,
|
||||
PlateFormat::W24 => 24,
|
||||
PlateFormat::W48 => 48,
|
||||
PlateFormat::W96 => 96,
|
||||
PlateFormat::W384 => 384,
|
||||
PlateFormat::W1536 => 1536,
|
||||
PlateFormat::W3456 => 3456,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PlateFormat {
|
||||
pub fn size(&self) -> (u8, u8) {
|
||||
|
|
Loading…
Reference in New Issue