Implement file parsing

This commit is contained in:
Patrick Auernig 2024-04-11 14:53:27 +02:00
parent b1962e0d4e
commit 8f92222362
4 changed files with 370 additions and 9 deletions

75
src/bytes.rs Normal file
View File

@ -0,0 +1,75 @@
use std::io::Read;
use crate::error::Result;
pub fn read_bytes<const BYTES: usize, R>(mut reader: R) -> Result<[u8; BYTES]>
where
R: Read,
{
let mut buf = [0u8; BYTES];
reader.read_exact(&mut buf)?;
Ok(buf)
}
pub fn read_i32_le<R>(reader: R) -> Result<i32>
where
R: Read,
{
let buf = read_bytes::<4, _>(reader)?;
Ok(i32::from_le_bytes(buf))
}
pub fn read_u32_le<R>(reader: R) -> Result<u32>
where
R: Read,
{
let buf = read_bytes::<4, _>(reader)?;
Ok(u32::from_le_bytes(buf))
}
pub fn read_i64_le<R>(reader: R) -> Result<i64>
where
R: Read,
{
let buf = read_bytes::<8, _>(reader)?;
Ok(i64::from_le_bytes(buf))
}
pub fn read_u64_le<R>(reader: R) -> Result<u64>
where
R: Read,
{
let buf = read_bytes::<8, _>(reader)?;
Ok(u64::from_le_bytes(buf))
}
pub fn read_string<R>(mut reader: R) -> Result<String>
where
R: Read,
{
let len = read_i32_le(&mut reader)?;
let string = if len > 0 {
let len = len as usize;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf)?;
String::from_utf8(buf)?
} else if len < 0 {
let len = (len * -2) as usize;
let mut buf = vec![0u8; len];
reader.read_exact(&mut buf)?;
String::from_utf8_lossy(&buf).to_string()
} else {
String::new()
};
let string = string.trim_end_matches('\0').to_owned();
Ok(string)
}

21
src/error.rs Normal file
View File

@ -0,0 +1,21 @@
use std::io;
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum Error {
#[error("Invalid version {0}")]
InvalidVersion(u8),
#[error("Invalid localized string index {0}")]
InvalidLocalizedStringIndex(usize),
#[error("Failed to parse string")]
StringParsingFailed(#[from] std::string::FromUtf8Error),
#[error(transparent)]
Io(#[from] io::Error),
}

View File

@ -1,14 +1,167 @@
pub fn add(left: usize, right: usize) -> usize {
left + right
mod bytes;
pub mod error;
mod namespace;
use core::fmt;
use std::collections::HashMap;
use std::io::{Read, Seek, SeekFrom};
use crate::bytes::{read_bytes, read_i32_le, read_string, read_u32_le, read_u64_le};
use crate::error::{Error, Result};
use crate::namespace::Namespace;
const MAGIC: [u8; 16] = [
0x0E, 0x14, 0x74, 0x75, 0x67, 0x4A, 0x03, 0xFC, 0x4A, 0x15, 0x90, 0x9D, 0xC3, 0x37, 0x7F, 0x1B,
];
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[repr(u8)]
pub enum Version {
Legacy = 0,
Compact = 1,
Optimized = 2,
OptimizedCityHash = 3,
}
#[cfg(test)]
mod tests {
use super::*;
impl TryFrom<u8> for Version {
type Error = Error;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
fn try_from(value: u8) -> Result<Self, Self::Error> {
let val = match value {
0 => Self::Legacy,
1 => Self::Compact,
2 => Self::Optimized,
3 => Self::OptimizedCityHash,
_ => return Err(Error::InvalidVersion(value)),
};
Ok(val)
}
}
#[derive(Debug, Clone)]
pub struct LocalizedString {
pub value: String,
pub ref_count: Option<i32>,
}
impl fmt::Display for LocalizedString {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.value)
}
}
impl core::ops::Deref for LocalizedString {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.value
}
}
impl From<String> for LocalizedString {
fn from(value: String) -> Self {
Self {
value,
ref_count: None,
}
}
}
#[derive(Debug)]
pub struct Localization {
pub version: Version,
pub strings: Vec<LocalizedString>,
pub namespaces: HashMap<String, Namespace>,
pub entries_count: Option<u32>,
}
impl Localization {
pub fn from_reader<R>(mut reader: R) -> Result<Self>
where
R: Read + Seek,
{
let version = read_version(&mut reader)?;
let strings = read_localization_strings(&mut reader, version)?;
let entries_count = if version >= Version::Optimized {
Some(read_u32_le(&mut reader)?)
} else {
None
};
let namespaces = namespace::read_all(&mut reader, &strings, version)?;
Ok(Localization {
version,
strings,
namespaces,
entries_count,
})
}
pub fn get_namespaced_string<N, K>(&self, namespace: N, key: K) -> Option<&LocalizedString>
where
N: AsRef<str>,
K: AsRef<str>,
{
self.namespaces
.get(namespace.as_ref())
.and_then(|ns| ns.get(key))
}
}
fn read_version<R>(mut reader: R) -> Result<Version>
where
R: Read + Seek,
{
let buf = read_bytes::<16, _>(&mut reader)?;
let version = if buf == MAGIC {
let buf = read_bytes::<1, _>(&mut reader)?;
buf[0].try_into()?
} else {
reader.rewind()?;
Version::Legacy
};
Ok(version)
}
fn read_localization_strings<R>(mut reader: R, version: Version) -> Result<Vec<LocalizedString>>
where
R: Read + Seek,
{
let mut localized = vec![];
if version >= Version::Compact {
let offset = read_u64_le(&mut reader)?;
let stream_pos_bak = reader.stream_position()?;
reader.seek(SeekFrom::Start(offset))?;
let strings_count = read_i32_le(&mut reader)?;
for _ in 0..strings_count {
let value = read_string(&mut reader)?;
let ref_count = if version >= Version::Optimized {
Some(read_i32_le(&mut reader)?)
} else {
None
};
localized.push(LocalizedString { value, ref_count });
}
reader.seek(SeekFrom::Start(stream_pos_bak))?;
}
Ok(localized)
}

112
src/namespace.rs Normal file
View File

@ -0,0 +1,112 @@
use std::collections::HashMap;
use std::io::Read;
use crate::bytes::{read_i32_le, read_string, read_u32_le};
use crate::error::{Error, Result};
use crate::{LocalizedString, Version};
#[derive(Debug)]
pub struct Namespace {
pub key_hash: Option<i32>,
pub members: HashMap<String, (LocalizedString, Option<u32>, u32)>,
}
impl Namespace {
pub fn new() -> Self {
Self {
key_hash: None,
members: HashMap::new(),
}
}
pub fn add(
&mut self,
key: String,
key_hash: Option<u32>,
source_hash: u32,
localized: LocalizedString,
) {
self.members.insert(key, (localized, key_hash, source_hash));
}
pub fn get<K>(&self, key: K) -> Option<&LocalizedString>
where
K: AsRef<str>,
{
self.members.get(key.as_ref()).map(|(k, _, _)| k)
}
pub fn for_each(&self) -> impl Iterator<Item = (&String, &LocalizedString)> {
self.members.iter().map(|(k, (v, _, _))| (k, v))
}
}
pub(crate) fn read_all<R>(
mut reader: R,
localized: &[LocalizedString],
version: Version,
) -> Result<HashMap<String, Namespace>>
where
R: Read,
{
let namespace_count = read_u32_le(&mut reader)?;
let mut namespaces = HashMap::new();
for _ in 0..namespace_count {
let (namespace_key, namespace) = read_one(&mut reader, localized, version)?;
namespaces.insert(namespace_key, namespace);
}
Ok(namespaces)
}
fn read_one<R>(
mut reader: R,
localized: &[LocalizedString],
version: Version,
) -> Result<(String, Namespace)>
where
R: Read,
{
let mut namespace = Namespace::new();
if version >= Version::Optimized {
namespace.key_hash = Some(read_i32_le(&mut reader)?);
}
let namespace_key = read_string(&mut reader)?;
let entries = read_u32_le(&mut reader)?;
for _ in 0..entries {
let string_key_hash = if version >= Version::Optimized {
Some(read_u32_le(&mut reader)?)
} else {
None
};
let string_key = read_string(&mut reader)?;
let source_string_hash = read_u32_le(&mut reader)?;
let localized_string = if version >= Version::Compact {
let index = read_i32_le(&mut reader)? as usize;
localized
.get(index)
.ok_or_else(|| Error::InvalidLocalizedStringIndex(index))?
.clone()
} else {
read_string(&mut reader)?.into()
};
namespace.add(
string_key,
string_key_hash,
source_string_hash,
localized_string,
);
}
Ok((namespace_key, namespace))
}