From 8f92222362541c3320db7ea918e4abcc4a1e2e94 Mon Sep 17 00:00:00 2001 From: Patrick Auernig Date: Thu, 11 Apr 2024 14:53:27 +0200 Subject: [PATCH] Implement file parsing --- src/bytes.rs | 75 +++++++++++++++++++++ src/error.rs | 21 ++++++ src/lib.rs | 171 ++++++++++++++++++++++++++++++++++++++++++++--- src/namespace.rs | 112 +++++++++++++++++++++++++++++++ 4 files changed, 370 insertions(+), 9 deletions(-) create mode 100644 src/bytes.rs create mode 100644 src/error.rs create mode 100644 src/namespace.rs diff --git a/src/bytes.rs b/src/bytes.rs new file mode 100644 index 0000000..449c528 --- /dev/null +++ b/src/bytes.rs @@ -0,0 +1,75 @@ +use std::io::Read; + +use crate::error::Result; + + +pub fn read_bytes(mut reader: R) -> Result<[u8; BYTES]> +where + R: Read, +{ + let mut buf = [0u8; BYTES]; + reader.read_exact(&mut buf)?; + Ok(buf) +} + + +pub fn read_i32_le(reader: R) -> Result +where + R: Read, +{ + let buf = read_bytes::<4, _>(reader)?; + Ok(i32::from_le_bytes(buf)) +} + + +pub fn read_u32_le(reader: R) -> Result +where + R: Read, +{ + let buf = read_bytes::<4, _>(reader)?; + Ok(u32::from_le_bytes(buf)) +} + + +pub fn read_i64_le(reader: R) -> Result +where + R: Read, +{ + let buf = read_bytes::<8, _>(reader)?; + Ok(i64::from_le_bytes(buf)) +} + + +pub fn read_u64_le(reader: R) -> Result +where + R: Read, +{ + let buf = read_bytes::<8, _>(reader)?; + Ok(u64::from_le_bytes(buf)) +} + + +pub fn read_string(mut reader: R) -> Result +where + R: Read, +{ + let len = read_i32_le(&mut reader)?; + + let string = if len > 0 { + let len = len as usize; + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + String::from_utf8(buf)? + } else if len < 0 { + let len = (len * -2) as usize; + let mut buf = vec![0u8; len]; + reader.read_exact(&mut buf)?; + String::from_utf8_lossy(&buf).to_string() + } else { + String::new() + }; + + let string = string.trim_end_matches('\0').to_owned(); + + Ok(string) +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..6a321e6 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,21 @@ +use std::io; + + +pub type Result = std::result::Result; + + +#[derive(Debug, thiserror::Error)] +#[non_exhaustive] +pub enum Error { + #[error("Invalid version {0}")] + InvalidVersion(u8), + + #[error("Invalid localized string index {0}")] + InvalidLocalizedStringIndex(usize), + + #[error("Failed to parse string")] + StringParsingFailed(#[from] std::string::FromUtf8Error), + + #[error(transparent)] + Io(#[from] io::Error), +} diff --git a/src/lib.rs b/src/lib.rs index 7d12d9a..76be8cf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,14 +1,167 @@ -pub fn add(left: usize, right: usize) -> usize { - left + right +mod bytes; +pub mod error; +mod namespace; + +use core::fmt; +use std::collections::HashMap; +use std::io::{Read, Seek, SeekFrom}; + +use crate::bytes::{read_bytes, read_i32_le, read_string, read_u32_le, read_u64_le}; +use crate::error::{Error, Result}; +use crate::namespace::Namespace; + + +const MAGIC: [u8; 16] = [ + 0x0E, 0x14, 0x74, 0x75, 0x67, 0x4A, 0x03, 0xFC, 0x4A, 0x15, 0x90, 0x9D, 0xC3, 0x37, 0x7F, 0x1B, +]; + + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +#[repr(u8)] +pub enum Version { + Legacy = 0, + Compact = 1, + Optimized = 2, + OptimizedCityHash = 3, } -#[cfg(test)] -mod tests { - use super::*; +impl TryFrom for Version { + type Error = Error; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); + fn try_from(value: u8) -> Result { + let val = match value { + 0 => Self::Legacy, + 1 => Self::Compact, + 2 => Self::Optimized, + 3 => Self::OptimizedCityHash, + _ => return Err(Error::InvalidVersion(value)), + }; + + Ok(val) } } + + +#[derive(Debug, Clone)] +pub struct LocalizedString { + pub value: String, + pub ref_count: Option, +} + +impl fmt::Display for LocalizedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.value) + } +} + +impl core::ops::Deref for LocalizedString { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl From for LocalizedString { + fn from(value: String) -> Self { + Self { + value, + ref_count: None, + } + } +} + + +#[derive(Debug)] +pub struct Localization { + pub version: Version, + pub strings: Vec, + pub namespaces: HashMap, + pub entries_count: Option, +} + +impl Localization { + pub fn from_reader(mut reader: R) -> Result + where + R: Read + Seek, + { + let version = read_version(&mut reader)?; + + let strings = read_localization_strings(&mut reader, version)?; + + let entries_count = if version >= Version::Optimized { + Some(read_u32_le(&mut reader)?) + } else { + None + }; + + let namespaces = namespace::read_all(&mut reader, &strings, version)?; + + Ok(Localization { + version, + strings, + namespaces, + entries_count, + }) + } + + pub fn get_namespaced_string(&self, namespace: N, key: K) -> Option<&LocalizedString> + where + N: AsRef, + K: AsRef, + { + self.namespaces + .get(namespace.as_ref()) + .and_then(|ns| ns.get(key)) + } +} + + +fn read_version(mut reader: R) -> Result +where + R: Read + Seek, +{ + let buf = read_bytes::<16, _>(&mut reader)?; + + let version = if buf == MAGIC { + let buf = read_bytes::<1, _>(&mut reader)?; + buf[0].try_into()? + } else { + reader.rewind()?; + Version::Legacy + }; + + Ok(version) +} + + +fn read_localization_strings(mut reader: R, version: Version) -> Result> +where + R: Read + Seek, +{ + let mut localized = vec![]; + + if version >= Version::Compact { + let offset = read_u64_le(&mut reader)?; + let stream_pos_bak = reader.stream_position()?; + reader.seek(SeekFrom::Start(offset))?; + + let strings_count = read_i32_le(&mut reader)?; + + for _ in 0..strings_count { + let value = read_string(&mut reader)?; + + let ref_count = if version >= Version::Optimized { + Some(read_i32_le(&mut reader)?) + } else { + None + }; + + localized.push(LocalizedString { value, ref_count }); + } + + reader.seek(SeekFrom::Start(stream_pos_bak))?; + } + + Ok(localized) +} diff --git a/src/namespace.rs b/src/namespace.rs new file mode 100644 index 0000000..2b7eccd --- /dev/null +++ b/src/namespace.rs @@ -0,0 +1,112 @@ +use std::collections::HashMap; +use std::io::Read; + +use crate::bytes::{read_i32_le, read_string, read_u32_le}; +use crate::error::{Error, Result}; +use crate::{LocalizedString, Version}; + + +#[derive(Debug)] +pub struct Namespace { + pub key_hash: Option, + pub members: HashMap, u32)>, +} + +impl Namespace { + pub fn new() -> Self { + Self { + key_hash: None, + members: HashMap::new(), + } + } + + pub fn add( + &mut self, + key: String, + key_hash: Option, + source_hash: u32, + localized: LocalizedString, + ) { + self.members.insert(key, (localized, key_hash, source_hash)); + } + + pub fn get(&self, key: K) -> Option<&LocalizedString> + where + K: AsRef, + { + self.members.get(key.as_ref()).map(|(k, _, _)| k) + } + + pub fn for_each(&self) -> impl Iterator { + self.members.iter().map(|(k, (v, _, _))| (k, v)) + } +} + + +pub(crate) fn read_all( + mut reader: R, + localized: &[LocalizedString], + version: Version, +) -> Result> +where + R: Read, +{ + let namespace_count = read_u32_le(&mut reader)?; + let mut namespaces = HashMap::new(); + + for _ in 0..namespace_count { + let (namespace_key, namespace) = read_one(&mut reader, localized, version)?; + namespaces.insert(namespace_key, namespace); + } + + Ok(namespaces) +} + + +fn read_one( + mut reader: R, + localized: &[LocalizedString], + version: Version, +) -> Result<(String, Namespace)> +where + R: Read, +{ + let mut namespace = Namespace::new(); + + if version >= Version::Optimized { + namespace.key_hash = Some(read_i32_le(&mut reader)?); + } + + let namespace_key = read_string(&mut reader)?; + let entries = read_u32_le(&mut reader)?; + + for _ in 0..entries { + let string_key_hash = if version >= Version::Optimized { + Some(read_u32_le(&mut reader)?) + } else { + None + }; + + let string_key = read_string(&mut reader)?; + let source_string_hash = read_u32_le(&mut reader)?; + + let localized_string = if version >= Version::Compact { + let index = read_i32_le(&mut reader)? as usize; + localized + .get(index) + .ok_or_else(|| Error::InvalidLocalizedStringIndex(index))? + .clone() + } else { + read_string(&mut reader)?.into() + }; + + namespace.add( + string_key, + string_key_hash, + source_string_hash, + localized_string, + ); + } + + Ok((namespace_key, namespace)) +}