Skip to content

Commit 17a0ff5

Browse files
pitrouzanmato1984
andauthored
apacheGH-45190: [C++][Compute] Add rank_quantile function (apache#45259)
### Rationale for this change Add a "rank_quantile" function following the Wikipedia definition: https://en.wikipedia.org/wiki/Percentile_rank ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes, an additional compute function. * GitHub Issue: apache#45190 Lead-authored-by: Antoine Pitrou <[email protected]> Co-authored-by: Rossi Sun <[email protected]> Co-authored-by: Antoine Pitrou <[email protected]> Signed-off-by: Antoine Pitrou <[email protected]>
1 parent dc581f0 commit 17a0ff5

File tree

6 files changed

+429
-173
lines changed

6 files changed

+429
-173
lines changed

cpp/src/arrow/compute/api_vector.cc

+11
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ using compute::DictionaryEncodeOptions;
4848
using compute::FilterOptions;
4949
using compute::NullPlacement;
5050
using compute::RankOptions;
51+
using compute::RankQuantileOptions;
5152

5253
template <>
5354
struct EnumTraits<FilterOptions::NullSelectionBehavior>
@@ -151,6 +152,9 @@ static auto kRankOptionsType = GetFunctionOptionsType<RankOptions>(
151152
DataMember("sort_keys", &RankOptions::sort_keys),
152153
DataMember("null_placement", &RankOptions::null_placement),
153154
DataMember("tiebreaker", &RankOptions::tiebreaker));
155+
static auto kRankQuantileOptionsType = GetFunctionOptionsType<RankQuantileOptions>(
156+
DataMember("sort_keys", &RankQuantileOptions::sort_keys),
157+
DataMember("null_placement", &RankQuantileOptions::null_placement));
154158
static auto kPairwiseOptionsType = GetFunctionOptionsType<PairwiseOptions>(
155159
DataMember("periods", &PairwiseOptions::periods));
156160
static auto kListFlattenOptionsType = GetFunctionOptionsType<ListFlattenOptions>(
@@ -228,6 +232,13 @@ RankOptions::RankOptions(std::vector<SortKey> sort_keys, NullPlacement null_plac
228232
tiebreaker(tiebreaker) {}
229233
constexpr char RankOptions::kTypeName[];
230234

235+
RankQuantileOptions::RankQuantileOptions(std::vector<SortKey> sort_keys,
236+
NullPlacement null_placement)
237+
: FunctionOptions(internal::kRankQuantileOptionsType),
238+
sort_keys(std::move(sort_keys)),
239+
null_placement(null_placement) {}
240+
constexpr char RankQuantileOptions::kTypeName[];
241+
231242
PairwiseOptions::PairwiseOptions(int64_t periods)
232243
: FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {}
233244
constexpr char PairwiseOptions::kTypeName[];

cpp/src/arrow/compute/api_vector.h

+19
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,25 @@ class ARROW_EXPORT RankOptions : public FunctionOptions {
195195
Tiebreaker tiebreaker;
196196
};
197197

198+
/// \brief Quantile rank options
199+
class ARROW_EXPORT RankQuantileOptions : public FunctionOptions {
200+
public:
201+
explicit RankQuantileOptions(std::vector<SortKey> sort_keys = {},
202+
NullPlacement null_placement = NullPlacement::AtEnd);
203+
/// Convenience constructor for array inputs
204+
explicit RankQuantileOptions(SortOrder order,
205+
NullPlacement null_placement = NullPlacement::AtEnd)
206+
: RankQuantileOptions({SortKey("", order)}, null_placement) {}
207+
208+
static constexpr char const kTypeName[] = "RankQuantileOptions";
209+
static RankQuantileOptions Defaults() { return RankQuantileOptions(); }
210+
211+
/// Column key(s) to order by and how to order by these sort keys.
212+
std::vector<SortKey> sort_keys;
213+
/// Whether nulls and NaNs are placed at the start or at the end
214+
NullPlacement null_placement;
215+
};
216+
198217
/// \brief Partitioning options for NthToIndices
199218
class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
200219
public:

0 commit comments

Comments
 (0)