22#include " direct_builder.h"
33#include " signals.h"
44
5+ #include < util/generic/overloaded.h>
56#include < ydb/core/formats/arrow/accessor/composite_serial/accessor.h>
67#include < ydb/core/formats/arrow/accessor/plain/constructor.h>
8+ #include < ydb/core/formats/arrow/accessor/sub_columns/json_value_path.h>
79#include < ydb/core/formats/arrow/save_load/loader.h>
810#include < ydb/core/formats/arrow/size_calcer.h>
911#include < ydb/core/formats/arrow/splitter/simple.h>
1012
1113#include < ydb/library/formats/arrow/protos/accessor.pb.h>
1214#include < ydb/library/formats/arrow/simple_arrays_cache.h>
1315
16+ #include < yql/essentials/minikql/jsonpath/parser/parser.h>
1417#include < yql/essentials/types/binary_json/format.h>
1518#include < yql/essentials/types/binary_json/write.h>
1619
@@ -113,103 +116,16 @@ TString TSubColumnsArray::SerializeToString(const TChunkConstructionData& extern
113116 return result;
114117}
115118
116- class TJsonRestorer {
117- private:
118- NJson::TJsonValue Result;
119-
120- public:
121- bool IsNull () const {
122- return !Result.IsDefined ();
123- }
124-
125- TConclusion<NBinaryJson::TBinaryJson> Finish () {
126- auto bJson = NBinaryJson::SerializeToBinaryJson (Result.GetStringRobust ());
127- if (const TString* val = std::get_if<TString>(&bJson)) {
128- return TConclusionStatus::Fail (*val);
129- } else if (const NBinaryJson::TBinaryJson* val = std::get_if<NBinaryJson::TBinaryJson>(&bJson)) {
130- return std::move (*val);
131- } else {
132- return TConclusionStatus::Fail (" undefined case for binary json construction" );
133- }
134- }
135-
136- void SetValueByPath (const TString& path, const NJson::TJsonValue& jsonValue) {
137- ui32 start = 0 ;
138- bool enqueue = false ;
139- bool wasEnqueue = false ;
140- NJson::TJsonValue* current = &Result;
141- for (ui32 i = 0 ; i < path.size (); ++i) {
142- if (path[i] == ' \\ ' ) {
143- ++i;
144- continue ;
145- }
146- if (path[i] == ' \' ' || path[i] == ' \" ' ) {
147- wasEnqueue = true ;
148- enqueue = !enqueue;
149- continue ;
150- }
151- if (enqueue) {
152- continue ;
153- }
154- if (path[i] == ' .' ) {
155- if (wasEnqueue) {
156- AFL_VERIFY (i > start + 2 );
157- TStringBuf key (path.data () + start + 1 , (i - 1 ) - start - 1 );
158- NJson::TJsonValue* currentNext = nullptr ;
159- if (current->GetValuePointer (key, ¤tNext)) {
160- current = currentNext;
161- } else {
162- current = ¤t->InsertValue (key, NJson::JSON_MAP);
163- }
164- } else {
165- AFL_VERIFY (i > start);
166- TStringBuf key (path.data () + start, i - start);
167- NJson::TJsonValue* currentNext = nullptr ;
168- if (current->GetValuePointer (key, ¤tNext)) {
169- current = currentNext;
170- } else {
171- ui32 keyIndex;
172- if (key.StartsWith (" [" ) && key.EndsWith (" ]" ) && TryFromString<ui32>(key.data () + 1 , key.size () - 2 , keyIndex)) {
173- AFL_VERIFY (!current->IsDefined () || current->IsArray () || (current->IsMap () && current->GetMapSafe ().empty ()));
174- current->SetType (NJson::JSON_ARRAY);
175- if (current->GetArraySafe ().size () <= keyIndex) {
176- current->GetArraySafe ().resize (keyIndex + 1 );
177- }
178- current = ¤t->GetArraySafe ()[keyIndex];
179- } else {
180- AFL_VERIFY (!current->IsArray ())(" current_type" , current->GetType ())(" current" , current->GetStringRobust ());
181- current = ¤t->InsertValue (key, NJson::JSON_MAP);
182- }
183- }
184- }
185- wasEnqueue = false ;
186- start = i + 1 ;
187- }
188- }
189- if (wasEnqueue) {
190- AFL_VERIFY (path.size () > start + 2 )(" path" , path)(" start" , start);
191- TStringBuf key (path.data () + start + 1 , (path.size () - 1 ) - start - 1 );
192- current->InsertValue (key, jsonValue);
193- } else {
194- AFL_VERIFY (path.size () >= start)(" path" , path)(" start" , start);
195- TStringBuf key (path.data () + start, (path.size ()) - start);
196- ui32 keyIndex;
197- if (key.StartsWith (" [" ) && key.EndsWith (" ]" ) && TryFromString<ui32>(key.data () + 1 , key.size () - 2 , keyIndex)) {
198- AFL_VERIFY (!current->IsDefined () || current->IsArray () || (current->IsMap () && current->GetMapSafe ().empty ()));
199- current->SetType (NJson::JSON_ARRAY);
200-
201- if (current->GetArraySafe ().size () <= keyIndex) {
202- current->GetArraySafe ().resize (keyIndex + 1 );
203- }
204- current->GetArraySafe ()[keyIndex] = jsonValue;
205- } else {
206- AFL_VERIFY (!current->IsArray ())(" key" , key)(" current" , current->GetStringRobust ())(" full" , Result.GetStringRobust ())(
207- " current_type" , current->GetType ());
208- current->InsertValue (key, jsonValue);
209- }
210- }
211- }
212- };
119+ TConclusion<NBinaryJson::TBinaryJson> ToBinaryJson (const TJsonRestorer& restorer) {
120+ return std::visit (TOverloaded{
121+ [](TString&& val) -> TConclusion<NBinaryJson::TBinaryJson> {
122+ return TConclusionStatus::Fail (std::move (val));
123+ },
124+ [](NBinaryJson::TBinaryJson&& val) -> TConclusion<NBinaryJson::TBinaryJson> {
125+ return std::move (val);
126+ }},
127+ NBinaryJson::SerializeToBinaryJson (restorer.GetResult ().GetStringRobust ()));
128+ }
213129
214130std::shared_ptr<arrow::Array> TSubColumnsArray::BuildBJsonArray (const TColumnConstructionContext& context) const {
215131 auto it = BuildUnorderedIterator ();
@@ -233,7 +149,7 @@ std::shared_ptr<arrow::Array> TSubColumnsArray::BuildBJsonArray(const TColumnCon
233149 if (value.IsNull ()) {
234150 TStatusValidator::Validate (builder->AppendNull ());
235151 } else {
236- const TConclusion<NBinaryJson::TBinaryJson> bJson = value. Finish ( );
152+ const TConclusion<NBinaryJson::TBinaryJson> bJson = ToBinaryJson (value );
237153 NArrow::Append<arrow::BinaryType>(*builder, arrow::util::string_view (bJson->data (), bJson->size ()));
238154 }
239155 };
@@ -268,4 +184,32 @@ IChunkedArray::TLocalDataAddress TSubColumnsArray::DoGetLocalData(
268184 return TLocalDataAddress (BuildBJsonArray (TColumnConstructionContext ()), 0 , 0 );
269185}
270186
187+ bool TJsonRestorer::IsNull () const {
188+ return !Result.IsDefined ();
189+ }
190+
191+ const NJson::TJsonValue& TJsonRestorer::GetResult () const {
192+ return Result;
193+ }
194+
195+ void TJsonRestorer::SetValueByPath (const TString& path, const NJson::TJsonValue& jsonValue) {
196+ // Path may be empty (for backward compatibility), so make it $."" in this case
197+ auto splitResult = NSubColumns::SplitJsonPath (NSubColumns::ToJsonPath (path.empty () ? " \"\" " : path), NSubColumns::TJsonPathSplitSettings{.FillTypes = true });
198+ AFL_VERIFY (splitResult.IsSuccess ())(" error" , splitResult.GetErrorMessage ())(" path" , path);
199+ const auto [pathItems, pathTypes, _] = splitResult.DetachResult ();
200+ AFL_VERIFY (pathItems.size () > 0 );
201+ AFL_VERIFY (pathItems.size () == pathTypes.size ());
202+ NJson::TJsonValue* current = &Result;
203+ for (decltype (pathItems)::size_type i = 0 ; i < pathItems.size () - 1 ; ++i) {
204+ AFL_VERIFY (pathTypes[i] == NYql::NJsonPath::EJsonPathItemType::MemberAccess);
205+ NJson::TJsonValue* currentNext = nullptr ;
206+ if (current->GetValuePointer (pathItems[i], ¤tNext)) {
207+ current = currentNext;
208+ } else {
209+ current = ¤t->InsertValue (pathItems[i], NJson::JSON_MAP);
210+ }
211+ }
212+ current->InsertValue (pathItems[pathItems.size () - 1 ], jsonValue);
213+ }
214+
271215} // namespace NKikimr::NArrow::NAccessor
0 commit comments