@@ -593,6 +593,107 @@ namespace orc {
593
593
int32_t toScale;
594
594
};
595
595
596
+ template <typename FileTypeBatch>
597
+ class DecimalToTimestampColumnReader : public ConvertToTimestampColumnReader {
598
+ public:
599
+ DecimalToTimestampColumnReader (const Type& _readType, const Type& fileType,
600
+ StripeStreams& stripe, bool _throwOnOverflow)
601
+ : ConvertToTimestampColumnReader(_readType, fileType, stripe, _throwOnOverflow),
602
+ precision (static_cast <int32_t >(fileType.getPrecision())),
603
+ scale(static_cast <int32_t >(fileType.getScale())) {}
604
+
605
+ void next (ColumnVectorBatch& rowBatch, uint64_t numValues, char * notNull) override {
606
+ ConvertColumnReader::next (rowBatch, numValues, notNull);
607
+ const auto & srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get ());
608
+ auto & dstBatch = *SafeCastBatchTo<TimestampVectorBatch*>(&rowBatch);
609
+ for (uint64_t i = 0 ; i < rowBatch.numElements ; ++i) {
610
+ if (!rowBatch.hasNulls || rowBatch.notNull [i]) {
611
+ convertDecimalToTimestamp (dstBatch, i, srcBatch);
612
+ }
613
+ }
614
+ }
615
+
616
+ private:
617
+ void convertDecimalToTimestamp (TimestampVectorBatch& dstBatch, uint64_t idx,
618
+ const FileTypeBatch& srcBatch) {
619
+ constexpr int SecondToNanoFactor = 9 ;
620
+ // Following constant comes from java.time.Instant
621
+ // '-1000000000-01-01T00:00Z'
622
+ constexpr int64_t MIN_EPOCH_SECONDS = -31557014167219200L ;
623
+ // '1000000000-12-31T23:59:59.999999999Z'
624
+ constexpr int64_t MAX_EPOCH_SECONDS = 31556889864403199L ;
625
+ // dummy variable, there's no risk of overflow
626
+ bool overflow = false ;
627
+
628
+ Int128 i128 (srcBatch.values [idx]);
629
+ Int128 integerPortion = scaleDownInt128ByPowerOfTen (i128, scale);
630
+ if (integerPortion < MIN_EPOCH_SECONDS || integerPortion > MAX_EPOCH_SECONDS) {
631
+ handleOverflow<Decimal, int64_t >(dstBatch, idx, throwOnOverflow);
632
+ return ;
633
+ }
634
+ i128 -= scaleUpInt128ByPowerOfTen (integerPortion, scale, overflow);
635
+ Int128 fractionPortion = std::move (i128);
636
+ if (scale < SecondToNanoFactor) {
637
+ fractionPortion =
638
+ scaleUpInt128ByPowerOfTen (fractionPortion, SecondToNanoFactor - scale, overflow);
639
+ } else {
640
+ fractionPortion = scaleDownInt128ByPowerOfTen (fractionPortion, scale - SecondToNanoFactor);
641
+ }
642
+ if (fractionPortion < 0 ) {
643
+ fractionPortion += 1e9 ;
644
+ integerPortion -= 1 ;
645
+ }
646
+ // line 630 has guaranteed toLong() will not overflow
647
+ dstBatch.data [idx] = integerPortion.toLong ();
648
+ dstBatch.nanoseconds [idx] = fractionPortion.toLong ();
649
+
650
+ if (needConvertTimezone) {
651
+ dstBatch.data [idx] = readerTimezone.convertFromUTC (dstBatch.data [idx]);
652
+ }
653
+ }
654
+
655
+ const int32_t precision;
656
+ const int32_t scale;
657
+ };
658
+
659
+ template <typename FileTypeBatch>
660
+ class DecimalToStringVariantColumnReader : public ConvertToStringVariantColumnReader {
661
+ public:
662
+ DecimalToStringVariantColumnReader (const Type& _readType, const Type& fileType,
663
+ StripeStreams& stripe, bool _throwOnOverflow)
664
+ : ConvertToStringVariantColumnReader(_readType, fileType, stripe, _throwOnOverflow),
665
+ scale (fileType.getScale()) {}
666
+
667
+ uint64_t convertToStrBuffer (ColumnVectorBatch& rowBatch, uint64_t numValues) override {
668
+ uint64_t size = 0 ;
669
+ strBuffer.resize (numValues);
670
+ const auto & srcBatch = *SafeCastBatchTo<const FileTypeBatch*>(data.get ());
671
+ if (readType.getKind () == STRING) {
672
+ for (uint64_t i = 0 ; i < rowBatch.numElements ; ++i) {
673
+ if (!rowBatch.hasNulls || rowBatch.notNull [i]) {
674
+ strBuffer[i] = Int128 (srcBatch.values [i]).toDecimalString (scale, true );
675
+ size += strBuffer[i].size ();
676
+ }
677
+ }
678
+ } else {
679
+ const auto maxLength = readType.getMaximumLength ();
680
+ for (uint64_t i = 0 ; i < rowBatch.numElements ; ++i) {
681
+ if (!rowBatch.hasNulls || rowBatch.notNull [i]) {
682
+ strBuffer[i] = Int128 (srcBatch.values [i]).toDecimalString (scale, true );
683
+ }
684
+ if (strBuffer[i].size () > maxLength) {
685
+ strBuffer[i].resize (maxLength);
686
+ }
687
+ size += strBuffer[i].size ();
688
+ }
689
+ }
690
+ return size;
691
+ }
692
+
693
+ private:
694
+ const int32_t scale;
695
+ };
696
+
596
697
#define DEFINE_NUMERIC_CONVERT_READER (FROM, TO, TYPE ) \
597
698
using FROM##To##TO##ColumnReader = \
598
699
NumericConvertColumnReader<FROM##VectorBatch, TO##VectorBatch, TYPE>;
@@ -621,6 +722,14 @@ namespace orc {
621
722
using Decimal128##To##TO##ColumnReader = \
622
723
DecimalConvertColumnReader<Decimal128VectorBatch, TO##VectorBatch>;
623
724
725
+ #define DEFINE_DECIMAL_CONVERT_TO_TIMESTAMP_READER \
726
+ using Decimal64ToTimestampColumnReader = DecimalToTimestampColumnReader<Decimal64VectorBatch>; \
727
+ using Decimal128ToTimestampColumnReader = DecimalToTimestampColumnReader<Decimal128VectorBatch>;
728
+
729
+ #define DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER (TO ) \
730
+ using Decimal64To##TO##ColumnReader = DecimalToStringVariantColumnReader<Decimal64VectorBatch>; \
731
+ using Decimal128To##TO##ColumnReader = DecimalToStringVariantColumnReader<Decimal128VectorBatch>;
732
+
624
733
DEFINE_NUMERIC_CONVERT_READER (Boolean , Byte , int8_t )
625
734
DEFINE_NUMERIC_CONVERT_READER(Boolean , Short, int16_t )
626
735
DEFINE_NUMERIC_CONVERT_READER(Boolean , Int, int32_t )
@@ -720,6 +829,11 @@ namespace orc {
720
829
DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal64)
721
830
DEFINE_DECIMAL_CONVERT_TO_DECIMAL_READER(Decimal128)
722
831
832
+ DEFINE_DECIMAL_CONVERT_TO_TIMESTAMP_READER
833
+ DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(String)
834
+ DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(Char)
835
+ DEFINE_DECIMAL_CONVERT_TO_STRING_VARINT_READER(Varchar)
836
+
723
837
#define CREATE_READER (NAME ) \
724
838
return std::make_unique<NAME>(_readType, fileType, stripe, throwOnOverflow);
725
839
@@ -935,13 +1049,6 @@ namespace orc {
935
1049
CASE_EXCEPTION
936
1050
}
937
1051
}
938
- case STRING:
939
- case BINARY:
940
- case TIMESTAMP:
941
- case LIST:
942
- case MAP:
943
- case STRUCT:
944
- case UNION:
945
1052
case DECIMAL: {
946
1053
switch (_readType.getKind ()) {
947
1054
CASE_CREATE_FROM_DECIMAL_READER (BOOLEAN, Boolean )
@@ -951,6 +1058,11 @@ namespace orc {
951
1058
CASE_CREATE_FROM_DECIMAL_READER (LONG, Long)
952
1059
CASE_CREATE_FROM_DECIMAL_READER (FLOAT, Float)
953
1060
CASE_CREATE_FROM_DECIMAL_READER (DOUBLE, Double)
1061
+ CASE_CREATE_FROM_DECIMAL_READER (STRING, String)
1062
+ CASE_CREATE_FROM_DECIMAL_READER (CHAR, Char)
1063
+ CASE_CREATE_FROM_DECIMAL_READER (VARCHAR, Varchar)
1064
+ CASE_CREATE_FROM_DECIMAL_READER (TIMESTAMP, Timestamp)
1065
+ CASE_CREATE_FROM_DECIMAL_READER (TIMESTAMP_INSTANT, Timestamp)
954
1066
case DECIMAL: {
955
1067
if (isDecimal64 (fileType)) {
956
1068
if (isDecimal64 (_readType)) {
@@ -966,11 +1078,6 @@ namespace orc {
966
1078
}
967
1079
}
968
1080
}
969
- case STRING:
970
- case CHAR:
971
- case VARCHAR:
972
- case TIMESTAMP:
973
- case TIMESTAMP_INSTANT:
974
1081
case BINARY:
975
1082
case LIST:
976
1083
case MAP:
@@ -980,6 +1087,13 @@ namespace orc {
980
1087
CASE_EXCEPTION
981
1088
}
982
1089
}
1090
+ case STRING:
1091
+ case BINARY:
1092
+ case TIMESTAMP:
1093
+ case LIST:
1094
+ case MAP:
1095
+ case STRUCT:
1096
+ case UNION:
983
1097
case DATE:
984
1098
case VARCHAR:
985
1099
case CHAR:
0 commit comments