Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store categories from pandas. #11303

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if(PLUGIN_SYCL)
string(REPLACE " -isystem ${CONDA_PREFIX}/include" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
endif()

project(xgboost LANGUAGES CXX C VERSION 3.0.0)
project(xgboost LANGUAGES CXX C VERSION 3.1.0)
include(cmake/Utils.cmake)
list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules")

Expand Down
4 changes: 2 additions & 2 deletions R-package/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: xgboost
Type: Package
Title: Extreme Gradient Boosting
Version: 3.0.0.0
Date: 2024-11-26
Version: 3.1.0.0
Date: 2025-03-03
Authors@R: c(
person("Tianqi", "Chen", role = c("aut"),
email = "[email protected]"),
Expand Down
26 changes: 13 additions & 13 deletions R-package/configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for xgboost 3.0.0.
# Generated by GNU Autoconf 2.71 for xgboost 3.1.0.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
Expand Down Expand Up @@ -607,8 +607,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='xgboost'
PACKAGE_TARNAME='xgboost'
PACKAGE_VERSION='3.0.0'
PACKAGE_STRING='xgboost 3.0.0'
PACKAGE_VERSION='3.1.0'
PACKAGE_STRING='xgboost 3.1.0'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1262,7 +1262,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures xgboost 3.0.0 to adapt to many kinds of systems.
\`configure' configures xgboost 3.1.0 to adapt to many kinds of systems.

Usage: $0 [OPTION]... [VAR=VALUE]...

Expand Down Expand Up @@ -1324,7 +1324,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of xgboost 3.0.0:";;
short | recursive ) echo "Configuration of xgboost 3.1.0:";;
esac
cat <<\_ACEOF

Expand Down Expand Up @@ -1407,7 +1407,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
xgboost configure 3.0.0
xgboost configure 3.1.0
generated by GNU Autoconf 2.71

Copyright (C) 2021 Free Software Foundation, Inc.
Expand Down Expand Up @@ -1668,7 +1668,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.

It was created by xgboost $as_me 3.0.0, which was
It was created by xgboost $as_me 3.1.0, which was
generated by GNU Autoconf 2.71. Invocation command line was

$ $0$ac_configure_args_raw
Expand Down Expand Up @@ -2796,11 +2796,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5
printf %s "checking for $CXX option to enable C++11 features... " >&6; }
if test ${ac_cv_prog_cxx_cxx11+y}
if test ${ac_cv_prog_cxx_11+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_cv_prog_cxx_cxx11=no
ac_cv_prog_cxx_11=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
Expand Down Expand Up @@ -2842,11 +2842,11 @@ if test x$ac_prog_cxx_stdcxx = xno
then :
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5
printf %s "checking for $CXX option to enable C++98 features... " >&6; }
if test ${ac_cv_prog_cxx_cxx98+y}
if test ${ac_cv_prog_cxx_98+y}
then :
printf %s "(cached) " >&6
else $as_nop
ac_cv_prog_cxx_cxx98=no
ac_cv_prog_cxx_98=no
ac_save_CXX=$CXX
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
Expand Down Expand Up @@ -3855,7 +3855,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by xgboost $as_me 3.0.0, which was
This file was extended by xgboost $as_me 3.1.0, which was
generated by GNU Autoconf 2.71. Invocation command line was

CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -3919,7 +3919,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
xgboost config.status 3.0.0
xgboost config.status 3.1.0
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"

Expand Down
2 changes: 1 addition & 1 deletion R-package/configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

AC_PREREQ(2.69)

AC_INIT([xgboost],[3.0.0],[],[xgboost],[])
AC_INIT([xgboost],[3.1.0],[],[xgboost],[])

: ${R_HOME=`R RHOME`}
if test -z "${R_HOME}"; then
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.in
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ OBJECTS= \
$(PKGROOT)/src/gbm/gblinear_model.o \
$(PKGROOT)/src/data/adapter.o \
$(PKGROOT)/src/data/array_interface.o \
$(PKGROOT)/src/data/cat_container.o \
$(PKGROOT)/src/data/simple_dmatrix.o \
$(PKGROOT)/src/data/data.o \
$(PKGROOT)/src/data/sparse_page_raw_format.o \
Expand Down
1 change: 1 addition & 0 deletions R-package/src/Makevars.win.in
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ OBJECTS= \
$(PKGROOT)/src/gbm/gblinear_model.o \
$(PKGROOT)/src/data/adapter.o \
$(PKGROOT)/src/data/array_interface.o \
$(PKGROOT)/src/data/cat_container.o \
$(PKGROOT)/src/data/simple_dmatrix.o \
$(PKGROOT)/src/data/data.o \
$(PKGROOT)/src/data/sparse_page_raw_format.o \
Expand Down
41 changes: 32 additions & 9 deletions include/xgboost/data.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2015-2024, XGBoost Contributors
* Copyright 2015-2025, XGBoost Contributors
* \file data.h
* \brief The input data structure of xgboost.
* \author Tianqi Chen
Expand All @@ -8,8 +8,8 @@
#define XGBOOST_DATA_H_

#include <dmlc/base.h>
#include <dmlc/data.h>
#include <dmlc/serializer.h>
#include <dmlc/io.h> // for Stream
#include <dmlc/serializer.h> // for Handler
#include <xgboost/base.h>
#include <xgboost/host_device_vector.h>
#include <xgboost/linalg.h>
Expand Down Expand Up @@ -42,13 +42,16 @@ enum class FeatureType : uint8_t { kNumerical = 0, kCategorical = 1 };

enum class DataSplitMode : int { kRow = 0, kCol = 1 };

/*!
* \brief Meta information about dataset, always sit in memory.
// Forward declaration of the container used by the meta info.
struct CatContainer;

/**
* @brief Meta information about dataset, always sit in memory.
*/
class MetaInfo {
public:
/*! \brief number of data fields in MetaInfo */
static constexpr uint64_t kNumField = 12;
static constexpr uint64_t kNumField = 13;

/*! \brief number of rows in the data */
bst_idx_t num_row_{0}; // NOLINT
Expand Down Expand Up @@ -100,9 +103,9 @@ class MetaInfo {
*/
HostDeviceVector<float> feature_weights;

/*! \brief default constructor */
MetaInfo() = default;
MetaInfo();
MetaInfo(MetaInfo&& that) = default;
MetaInfo(MetaInfo const& that) = delete;
MetaInfo& operator=(MetaInfo&& that) = default;
MetaInfo& operator=(MetaInfo const& that) = delete;

Expand Down Expand Up @@ -205,6 +208,16 @@ class MetaInfo {
* @brief Flag for whether the DMatrix has categorical features.
*/
bool HasCategorical() const { return has_categorical_; }
/**
* @brief Getters for categories.
*/
[[nodiscard]] CatContainer const* Cats() const;
[[nodiscard]] CatContainer* Cats();
[[nodiscard]] std::shared_ptr<CatContainer const> CatsShared() const;
/**
* @brief Setter for categories.
*/
void Cats(std::shared_ptr<CatContainer> cats);

private:
void SetInfoFromHost(Context const* ctx, StringView key, Json arr);
Expand All @@ -213,6 +226,8 @@ class MetaInfo {
/*! \brief argsort of labels */
mutable std::vector<size_t> label_order_cache_;
bool has_categorical_{false};

std::shared_ptr<CatContainer> cats_;
};

/*! \brief Element from a sparse vector */
Expand Down Expand Up @@ -691,7 +706,15 @@ class DMatrix {
* @param slice_id Index of the current slice
* @return DMatrix containing the slice of columns
*/
virtual DMatrix *SliceCol(int num_slices, int slice_id) = 0;
virtual DMatrix* SliceCol(int num_slices, int slice_id) = 0;
/**
* @brief Accessor for the string representation of the categories.
*/
CatContainer const* Cats() const { return this->CatsShared().get(); }
[[nodiscard]] virtual std::shared_ptr<CatContainer const> CatsShared() const {
LOG(FATAL) << "Not implemented for the current DMatrix type.";
return nullptr;
}

protected:
virtual BatchSet<SparsePage> GetRowBatches() = 0;
Expand Down
3 changes: 2 additions & 1 deletion include/xgboost/predictor.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
* performs predictions for a gradient booster.
*/
#pragma once
#include <xgboost/base.h>
#include <dmlc/registry.h> // for FunctionRegEntryBase
#include <xgboost/base.h> // for bst_tree_t
#include <xgboost/cache.h> // for DMatrixCache
#include <xgboost/context.h> // for Context
#include <xgboost/context.h>
Expand Down
2 changes: 1 addition & 1 deletion include/xgboost/version_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#define XGBOOST_VERSION_CONFIG_H_

#define XGBOOST_VER_MAJOR 3 /* NOLINT */
#define XGBOOST_VER_MINOR 0 /* NOLINT */
#define XGBOOST_VER_MINOR 1 /* NOLINT */
#define XGBOOST_VER_PATCH 0 /* NOLINT */

#endif // XGBOOST_VERSION_CONFIG_H_
2 changes: 1 addition & 1 deletion jvm-packages/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>XGBoost JVM Package</name>
<description>JVM Package for XGBoost</description>
Expand Down
4 changes: 2 additions & 2 deletions jvm-packages/xgboost4j-example/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
</parent>
<name>xgboost4j-example</name>
<artifactId>xgboost4j-example_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
Expand Down
4 changes: 2 additions & 2 deletions jvm-packages/xgboost4j-flink/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
</parent>

<name>xgboost4j-flink</name>
<artifactId>xgboost4j-flink_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
<properties>
<flink-ml.version>2.2.0</flink-ml.version>
</properties>
Expand Down
2 changes: 1 addition & 1 deletion jvm-packages/xgboost4j-spark-gpu/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
</parent>
<name>xgboost4j-spark-gpu</name>
<artifactId>xgboost4j-spark-gpu_2.12</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion jvm-packages/xgboost4j-spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
</parent>
<name>xgboost4j-spark</name>
<artifactId>xgboost4j-spark_2.12</artifactId>
Expand Down
4 changes: 2 additions & 2 deletions jvm-packages/xgboost4j/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
<parent>
<groupId>ml.dmlc</groupId>
<artifactId>xgboost-jvm_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
</parent>
<name>xgboost4j</name>
<artifactId>xgboost4j_2.12</artifactId>
<version>3.0.0-SNAPSHOT</version>
<version>3.1.0-SNAPSHOT</version>
<packaging>jar</packaging>

<dependencies>
Expand Down
2 changes: 2 additions & 0 deletions ops/script/lint_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class LintersPaths:
"tests/python/test_early_stopping.py",
"tests/python/test_multi_target.py",
"tests/python/test_objectives.py",
"tests/python/test_ordinal.py",
"tests/python/test_predict.py",
"tests/python/test_quantile_dmatrix.py",
"tests/python/test_tracker.py",
Expand Down Expand Up @@ -101,6 +102,7 @@ class LintersPaths:
"tests/python-gpu/load_pickle.py",
"tests/python-gpu/test_gpu_training_continuation.py",
"tests/python/test_model_io.py",
"tests/python/test_ordinal.py",
"tests/test_distributed/test_federated/",
"tests/test_distributed/test_gpu_federated/",
"tests/test_distributed/test_with_dask/test_ranking.py",
Expand Down
2 changes: 1 addition & 1 deletion python-package/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ authors = [
{ name = "Hyunsu Cho", email = "[email protected]" },
{ name = "Jiaming Yuan", email = "[email protected]" }
]
version = "3.0.0-dev"
version = "3.1.0-dev"
requires-python = ">=3.10"
license = { text = "Apache-2.0" }
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion python-package/pyproject.toml.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors = [
{ name = "Hyunsu Cho", email = "[email protected]" },
{ name = "Jiaming Yuan", email = "[email protected]" }
]
version = "3.0.0-dev"
version = "3.1.0-dev"
requires-python = ">=3.10"
license = { text = "Apache-2.0" }
classifiers = [
Expand Down
2 changes: 1 addition & 1 deletion python-package/xgboost/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.0.0-dev
3.1.0-dev
Loading
Loading