Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paid 1196 reshuffle structure #131

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Fix imports
ggaspersic committed May 28, 2024
commit 757f599c4fd86845e15dcc9b47c8d4831ce00bd1
8 changes: 4 additions & 4 deletions src/namespace/feature/executors.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
use crate::namespace::{feature, vwmap};
use std::error::Error;
use std::io::Error as IOError;
use std::io::ErrorKind;

use std::cell::RefCell;

use crate::namespace;
use dyn_clone::{clone_trait_object, DynClone};
use fasthash::murmur3;

use crate::namespace;
use crate::namespace::feature::transformers::{
TransformerBinner, TransformerCombine, TransformerLogRatioBinner, TransformerWeight,
};
use crate::namespace::feature::{parser, transformers};
use crate::namespace::feature::parser;
use crate::namespace::vwmap;

pub fn default_seeds(to_namespace_index: u32) -> [u32; 5] {
let to_namespace_index = to_namespace_index ^ 1u32 << 31; // compatibility with earlier version
@@ -52,7 +52,7 @@ impl ExecutorToNamespace {
pub fn emit_i32<const SEED_ID: usize>(&mut self, to_data: i32, hash_value: f32) {
let hash_index = murmur3::hash32_with_seed(to_data.to_le_bytes(), *unsafe {
self.namespace_seeds.get_unchecked(SEED_ID)
}) & feature::parser::MASK31;
}) & namespace::parser::MASK31;
self.tmp_data.push((hash_index, hash_value));
}

13 changes: 6 additions & 7 deletions src/namespace/feature/parser.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@ use crate::namespace::vwmap::{
NamespaceDescriptor, NamespaceFormat, NamespaceType, VwNamespaceMap,
};
use nom::bytes::complete::take_while;
use nom::character;
use nom::character::complete;
use nom::number;
use nom::sequence::tuple;
@@ -39,13 +38,13 @@ struct NSStage1Parse {
#[allow(dead_code)]
name: String,
definition: String,
from_namespaces: Vec<std::string::String>,
from_namespaces: Vec<String>,
processing: Cell<bool>,
done: Cell<bool>,
}

pub struct NamespaceTransformsParser {
denormalized: HashMap<std::string::String, NSStage1Parse>, // to_namespace_str -> list of from_namespace_str
denormalized: HashMap<String, NSStage1Parse>, // to_namespace_str -> list of from_namespace_str
}

impl NamespaceTransformsParser {
@@ -301,10 +300,10 @@ pub fn name_char(c: char) -> bool {
// identifier = namespace or function name
pub fn parse_identifier(input: &str) -> IResult<&str, String> {
let (input, (_, first_char, rest, _)) = tuple((
character::complete::space0,
complete::space0,
complete::one_of("abcdefghijklmnopqrstuvwzxyABCDEFGHIJKLMNOPQRSTUVWZXY_"),
take_while(name_char),
character::complete::space0,
complete::space0,
))(input)?;
let mut s = first_char.to_string();
s.push_str(rest);
@@ -326,9 +325,9 @@ pub fn parse_function_params_namespaces(input: &str) -> IResult<&str, Vec<String

pub fn parse_float(input: &str) -> IResult<&str, f32> {
let (input, (_, f, _)) = tuple((
character::complete::space0,
complete::space0,
number::complete::float,
character::complete::space0,
complete::space0,
))(input)?;
Ok((input, f))
}
21 changes: 10 additions & 11 deletions src/namespace/feature/transformers.rs
Original file line number Diff line number Diff line change
@@ -9,7 +9,6 @@ use crate::namespace::feature::executors::{
ExecutorFromNamespace, ExecutorToNamespace, FunctionExecutorTrait, SeedNumber,
TransformExecutors,
};
use crate::namespace::feature::parser;
use crate::namespace::feature::parser::Namespace;
use crate::namespace::vwmap::{NamespaceDescriptor, NamespaceFormat, NamespaceType};

@@ -608,7 +607,7 @@ mod tests {
let record_buffer = [
6, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(4, 6) | IS_NOT_SINGLE_MASK,
// Feature triple
1775699190 & MASK31, // Hash location
@@ -629,7 +628,7 @@ mod tests {
let record_buffer = [
6, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(4, 6) | IS_NOT_SINGLE_MASK,
// Feature triple
1775699190 & MASK31, // Hash location
@@ -675,7 +674,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple
@@ -699,7 +698,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple
@@ -727,7 +726,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple
@@ -751,7 +750,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple
@@ -779,7 +778,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple
@@ -824,7 +823,7 @@ mod tests {
let record_buffer = [
6, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(4, 6) | IS_NOT_SINGLE_MASK,
// Feature triple
1775699190 & MASK31, // Hash location
@@ -856,7 +855,7 @@ mod tests {
let record_buffer = [
7, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(4, 6) | IS_NOT_SINGLE_MASK,
// Feature triple
1775699190 & MASK31, // Hash location
@@ -907,7 +906,7 @@ mod tests {
let record_buffer = [
9, // length
0, // label
(1.0_f32).to_bits(), // Example weight
1.0_f32.to_bits(), // Example weight
nd(5, 7) | IS_NOT_SINGLE_MASK,
nd(7, 9) | IS_NOT_SINGLE_MASK,
// Feature triple