feature: Improved CSV Parsing
Gitea Scan/plate-tool/pipeline/head This commit looks good
Details
Gitea Scan/plate-tool/pipeline/head This commit looks good
Details
1. Superior field detection: short of having actual typos, most ways to express a field should now be properly registered. Further, it will be considerably easier to add new variants. 2. Numeric well parsing: some systems do not use alphanumeric wells like H12 or E7. Purely numeric wells will now be supported but only if the plate format is manually specified; this feels like a good tradeoff since a failed detection would yield very odd behaviour to a user.
This commit is contained in:
parent
e546fa354e
commit
f7f492b70e
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "plate-tool-lib"
|
name = "plate-tool-lib"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::transfer::Transfer;
|
use crate::transfer::Transfer;
|
||||||
use crate::util::*;
|
use crate::util::*;
|
||||||
|
|
||||||
use super::TransferRecord;
|
use super::{TransferRecord, transfer_record::TransferRecordDeserializeIntermediate, mangle_headers::mangle_headers};
|
||||||
use lazy_static::lazy_static;
|
use lazy_static::lazy_static;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
@ -47,29 +47,28 @@ pub fn records_to_csv(trs: Vec<TransferRecord>) -> Result<String, Box<dyn Error>
|
||||||
pub fn string_well_to_pt(input: &str) -> Option<(u8, u8)> {
|
pub fn string_well_to_pt(input: &str) -> Option<(u8, u8)> {
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
static ref REGEX: Regex = Regex::new(r"([A-Z,a-z]+)(\d+)").unwrap();
|
static ref REGEX: Regex = Regex::new(r"([A-Z,a-z]+)(\d+)").unwrap();
|
||||||
|
static ref REGEX_ALT: Regex = Regex::new(r"(\d+)").unwrap();
|
||||||
}
|
}
|
||||||
if let Some(c1) = REGEX.captures(input) {
|
if let Some(c1) = REGEX.captures(input) {
|
||||||
if let (Some(row), Some(col)) = (letters_to_num(&c1[1]), c1[2].parse::<u8>().ok()) {
|
if let (Some(row), Some(col)) = (letters_to_num(&c1[1]), c1[2].parse::<u8>().ok()) {
|
||||||
Some((row, col))
|
return Some((row, col))
|
||||||
} else {
|
} else {
|
||||||
None
|
return None
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_csv(data: &str) -> Vec<TransferRecord> {
|
pub fn read_csv(data: &str) -> Vec<TransferRecord> {
|
||||||
let (header, data) = data.split_at(data.find('\n').unwrap());
|
let modified = mangle_headers(data);
|
||||||
let modified: String = header.to_lowercase() + data;
|
|
||||||
|
|
||||||
let mut rdr = csv::Reader::from_reader(modified.as_bytes());
|
let mut rdr = csv::Reader::from_reader(modified.as_bytes());
|
||||||
let mut records: Vec<TransferRecord> = Vec::new();
|
let mut records: Vec<TransferRecord> = Vec::new();
|
||||||
for record in rdr.deserialize::<TransferRecord>() {
|
for record in rdr.deserialize::<TransferRecordDeserializeIntermediate>() {
|
||||||
match record {
|
match record {
|
||||||
Ok(r) => {
|
Ok(r) => {
|
||||||
//log::debug!("{:?}", r);
|
if !r.is_empty() {
|
||||||
records.push(r);
|
records.push(r.into());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
log::debug!("{:?}", e);
|
log::debug!("{:?}", e);
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
pub fn mangle_headers(data: &str) -> String {
|
||||||
|
let (header, rows) = data.split_at(data.find('\n').unwrap());
|
||||||
|
let fields = header.trim().split(",");
|
||||||
|
let mut modified_headers: Vec<String> = Vec::new();
|
||||||
|
for field in fields {
|
||||||
|
if let Some(f) = detect_field(field) {
|
||||||
|
modified_headers.push(f.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
modified_headers.join(",") + "\n" + rows
|
||||||
|
}
|
||||||
|
|
||||||
|
fn detect_field(field: &str) -> Option<Field> {
|
||||||
|
match field.trim().to_lowercase() {
|
||||||
|
x if x.contains("source") || x.contains("src") => match x {
|
||||||
|
_ if x.contains("plate") => Some(Field::SourcePlate),
|
||||||
|
_ if x.contains("well") => Some(Field::SourceWell),
|
||||||
|
_ if x.contains("format") || x.contains("fmt") => Some(Field::SourceFormat),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
x if x.contains("destination") || x.contains("dest") => match x {
|
||||||
|
_ if x.contains("plate") => Some(Field::DestinationPlate),
|
||||||
|
_ if x.contains("well") => Some(Field::DestinationWell),
|
||||||
|
_ if x.contains("format") || x.contains("fmt") => Some(Field::DestinationFormat),
|
||||||
|
_ => None,
|
||||||
|
},
|
||||||
|
x if x.contains("volume") => Some(Field::Volume),
|
||||||
|
x if x.contains("concentration") => Some(Field::Concentration),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Field {
|
||||||
|
SourcePlate,
|
||||||
|
DestinationPlate,
|
||||||
|
SourceWell,
|
||||||
|
DestinationWell,
|
||||||
|
SourceFormat,
|
||||||
|
DestinationFormat,
|
||||||
|
Volume,
|
||||||
|
Concentration,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ToString for Field {
|
||||||
|
fn to_string(&self) -> String {
|
||||||
|
match self {
|
||||||
|
Field::SourcePlate => "sourceplate".to_string(),
|
||||||
|
Field::DestinationPlate => "destinationplate".to_string(),
|
||||||
|
Field::SourceWell => "sourcewell".to_string(),
|
||||||
|
Field::DestinationWell => "destinationwell".to_string(),
|
||||||
|
Field::SourceFormat => "sourceformat".to_string(),
|
||||||
|
Field::DestinationFormat => "destinationformat".to_string(),
|
||||||
|
Field::Volume => "volume".to_string(),
|
||||||
|
Field::Concentration => "concentration".to_string(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,7 @@
|
||||||
mod transfer_record;
|
mod transfer_record;
|
||||||
mod conversion;
|
mod conversion;
|
||||||
mod auto;
|
mod auto;
|
||||||
|
mod mangle_headers;
|
||||||
|
|
||||||
pub use transfer_record::volume_default;
|
pub use transfer_record::volume_default;
|
||||||
pub use transfer_record::TransferRecord;
|
pub use transfer_record::TransferRecord;
|
||||||
|
|
|
@ -1,33 +1,100 @@
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::transfer::Transfer;
|
use crate::{plate::PlateFormat, transfer::Transfer, util::num_to_letters};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
#[derive(Serialize, Debug, Clone)]
|
||||||
pub struct TransferRecord {
|
pub struct TransferRecord {
|
||||||
#[serde(rename = "Source Plate", alias = "source plate", alias = "src plate")]
|
#[serde(rename = "Source Plate")]
|
||||||
pub source_plate: String,
|
pub source_plate: String,
|
||||||
#[serde(rename = "Source Well", alias = "source well", alias = "src well")]
|
#[serde(rename = "Source Well")]
|
||||||
pub source_well: String,
|
pub source_well: String,
|
||||||
#[serde(
|
#[serde(rename = "Dest Plate")]
|
||||||
rename = "Dest Plate",
|
|
||||||
alias = "dest plate",
|
|
||||||
alias = "destination plate"
|
|
||||||
)]
|
|
||||||
pub destination_plate: String,
|
pub destination_plate: String,
|
||||||
#[serde(
|
#[serde(rename = "Destination Well")]
|
||||||
rename = "Destination Well",
|
|
||||||
alias = "destination well",
|
|
||||||
alias = "dest well"
|
|
||||||
)]
|
|
||||||
pub destination_well: String,
|
pub destination_well: String,
|
||||||
#[serde(rename = "Transfer Volume", alias = "transfer volume")]
|
#[serde(rename = "Transfer Volume")]
|
||||||
#[serde(default = "volume_default")]
|
|
||||||
pub volume: f32,
|
pub volume: f32,
|
||||||
#[serde(rename = "Concentration", alias = "concentration")]
|
#[serde(rename = "Concentration")]
|
||||||
pub concentration: Option<f32>,
|
pub concentration: Option<f32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Deserialize, Debug, Clone)]
|
||||||
|
pub struct TransferRecordDeserializeIntermediate {
|
||||||
|
#[serde(rename = "sourceplate")]
|
||||||
|
source_plate: String,
|
||||||
|
#[serde(rename = "destinationplate")]
|
||||||
|
destination_plate: String,
|
||||||
|
#[serde(rename = "sourcewell")]
|
||||||
|
source_well: String,
|
||||||
|
#[serde(rename = "sourceformat")]
|
||||||
|
source_format: Option<String>,
|
||||||
|
#[serde(rename = "destinationwell")]
|
||||||
|
destination_well: String,
|
||||||
|
#[serde(rename = "destinationformat")]
|
||||||
|
destination_format: Option<String>,
|
||||||
|
#[serde(rename = "volume")]
|
||||||
|
volume: Option<f32>,
|
||||||
|
#[serde(rename = "concentration")]
|
||||||
|
concentration: Option<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<TransferRecordDeserializeIntermediate> for TransferRecord {
|
||||||
|
fn from(value: TransferRecordDeserializeIntermediate) -> Self {
|
||||||
|
let mut source_well: String = value.source_well;
|
||||||
|
if let Some(pformat) = value
|
||||||
|
.source_format
|
||||||
|
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
|
||||||
|
{
|
||||||
|
if let Ok(well_number) = source_well.parse::<u16>() {
|
||||||
|
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
|
||||||
|
source_well = alphanumeric;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut destination_well: String = value.destination_well;
|
||||||
|
if let Some(pformat) = value
|
||||||
|
.destination_format
|
||||||
|
.and_then(|x| PlateFormat::try_from(x.as_str()).ok())
|
||||||
|
{
|
||||||
|
if let Ok(well_number) = destination_well.parse::<u16>() {
|
||||||
|
if let Some(alphanumeric) = numeric_well_to_alphanumeric(well_number, pformat) {
|
||||||
|
destination_well = alphanumeric;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let volume = value.volume.unwrap_or(volume_default());
|
||||||
|
|
||||||
|
TransferRecord {
|
||||||
|
source_plate: value.source_plate,
|
||||||
|
destination_plate: value.destination_plate,
|
||||||
|
source_well,
|
||||||
|
destination_well,
|
||||||
|
volume,
|
||||||
|
concentration: value.concentration,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TransferRecordDeserializeIntermediate {
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.source_plate.is_empty()
|
||||||
|
|| self.destination_plate.is_empty()
|
||||||
|
|| self.source_well.is_empty()
|
||||||
|
|| self.destination_well.is_empty()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn numeric_well_to_alphanumeric(input: u16, pformat: PlateFormat) -> Option<String> {
|
||||||
|
let column_height: u16 = pformat.size().0 as u16;
|
||||||
|
let column = input.div_ceil(column_height);
|
||||||
|
let row = input % column_height;
|
||||||
|
let row_str = num_to_letters(row as u8)?;
|
||||||
|
|
||||||
|
Some(format!("{}{}", row_str, column))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn volume_default() -> f32 {
|
pub fn volume_default() -> f32 {
|
||||||
Transfer::default().volume
|
Transfer::default().volume
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -77,6 +77,20 @@ impl TryFrom<&str> for PlateFormat {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
impl From<&PlateFormat> for u16 {
|
||||||
|
fn from(value: &PlateFormat) -> Self {
|
||||||
|
match value {
|
||||||
|
PlateFormat::W6 => 6,
|
||||||
|
PlateFormat::W12 => 12,
|
||||||
|
PlateFormat::W24 => 24,
|
||||||
|
PlateFormat::W48 => 48,
|
||||||
|
PlateFormat::W96 => 96,
|
||||||
|
PlateFormat::W384 => 384,
|
||||||
|
PlateFormat::W1536 => 1536,
|
||||||
|
PlateFormat::W3456 => 3456,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl PlateFormat {
|
impl PlateFormat {
|
||||||
pub fn size(&self) -> (u8, u8) {
|
pub fn size(&self) -> (u8, u8) {
|
||||||
|
|
Loading…
Reference in New Issue