Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store the collection information in a struct instead of a tuple #711

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 3 additions & 9 deletions include/podio/RNTupleReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"
#include "podio/utilities/DatamodelRegistryIOHelpers.h"
#include "podio/utilities/RootHelpers.h"

#include <string>
#include <string_view>
Expand Down Expand Up @@ -161,15 +162,8 @@ class RNTupleReader {
std::unordered_map<std::string, std::vector<unsigned>> m_readerEntries{};
std::unordered_map<std::string, unsigned> m_totalEntries{};

struct CollectionInfo {
std::vector<unsigned int> id{};
std::vector<std::string> name{};
std::vector<std::string> type{};
std::vector<short> isSubsetCollection{};
std::vector<SchemaVersionT> schemaVersion{};
};

std::unordered_map<std::string, CollectionInfo> m_collectionInfo{};
/// Map each category to the collections that have been written and are available
std::unordered_map<std::string, std::vector<podio::root_utils::CollectionWriteInfo>> m_collectionInfo{};

std::vector<std::string> m_availableCategories{};

Expand Down
9 changes: 3 additions & 6 deletions include/podio/RNTupleWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,9 @@ class RNTupleWriter {
struct CategoryInfo {
std::unique_ptr<ROOT::Experimental::RNTupleWriter> writer{nullptr}; ///< The RNTupleWriter for this category

// The following are assumed to run in parallel!
std::vector<uint32_t> ids{}; ///< The ids of all collections
std::vector<std::string> names{}; ///< The names of all collections
std::vector<std::string> types{}; ///< The types of all collections
std::vector<short> subsetCollections{}; ///< The flags identifying the subcollections
std::vector<SchemaVersionT> schemaVersions{}; ///< The schema versions of all collections
/// Collection info for this category
std::vector<root_utils::CollectionWriteInfo> collInfo{};
std::vector<std::string> names{}; ///< The names of all collections to write

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
Expand Down
2 changes: 1 addition & 1 deletion include/podio/ROOTLegacyReader.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class ROOTLegacyReader {
private:
std::pair<TTree*, unsigned> getLocalTreeAndEntry(const std::string& treename);

void createCollectionBranches(const std::vector<root_utils::CollectionWriteInfoT>& collInfo);
void createCollectionBranches(const std::vector<root_utils::CollectionWriteInfo>& collInfo);

podio::GenericParameters readEventMetaData();

Expand Down
9 changes: 4 additions & 5 deletions include/podio/ROOTWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,10 @@ class ROOTWriter {
/// Helper struct to group together all necessary state to write / process a
/// given category. Created during the first writing of a category
struct CategoryInfo {
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<root_utils::CollectionWriteInfoT> collInfo{}; ///< Collection info for this category
podio::CollectionIDTable idTable{}; ///< The collection id table for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category
TTree* tree{nullptr}; ///< The TTree to which this category is written
std::vector<root_utils::CollectionBranches> branches{}; ///< The branches for this category
std::vector<root_utils::CollectionWriteInfo> collInfo{}; ///< Collection info for this category
std::vector<std::string> collsToWrite{}; ///< The collections to write for this category

// Storage for the keys & values of all the parameters of this category
// (resp. at least the current entry)
Expand Down
9 changes: 9 additions & 0 deletions include/podio/utilities/RootHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@ namespace root_utils {
// A collection of additional information that describes the collection: the
// collectionID, the collection (data) type, whether it is a subset
// collection, and its schema version
struct CollectionWriteInfo {
uint32_t collectionID{static_cast<uint32_t>(-1)}; ///< collection id
std::string dataType{}; ///< The fully qualified data type
bool isSubset{false}; ///< Whether this collection is a subset collection or not
unsigned int schemaVersion{0}; ///< The schema version of the collection type
std::string name{}; ///< The name of the collection
};
// The format used until version 1.2
using CollectionWriteInfoT = std::tuple<uint32_t, std::string, bool, unsigned int>;

// for backwards compatibility
using CollectionInfoWithoutSchemaT = std::tuple<int, std::string, bool>;

Expand Down
53 changes: 19 additions & 34 deletions src/RNTupleReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@
#include "podio/CollectionIDTable.h"
#include "podio/DatamodelRegistry.h"
#include "podio/GenericParameters.h"
#include "podio/utilities/RootHelpers.h"
#include "rootUtils.h"

#include <ROOT/RError.hxx>

#include <algorithm>
#include <cstdint>
#include <memory>

// Adjust for the move of this out of ROOT v7 in
Expand Down Expand Up @@ -48,27 +50,11 @@ bool RNTupleReader::initCategory(const std::string& category) {
// Assume that the metadata is the same in all files
auto filename = m_filenames[0];

auto& collInfo = m_collectionInfo[category];
auto collInfo = m_metadata_readers[filename]->GetView<std::vector<root_utils::CollectionWriteInfo>>(
{root_utils::collInfoName(category)});

auto id = m_metadata_readers[filename]->GetView<std::vector<unsigned int>>(root_utils::idTableName(category));
collInfo.id = id(0);

auto collectionName =
m_metadata_readers[filename]->GetView<std::vector<std::string>>(root_utils::collectionName(category));
collInfo.name = collectionName(0);

auto collectionType =
m_metadata_readers[filename]->GetView<std::vector<std::string>>(root_utils::collInfoName(category));
collInfo.type = collectionType(0);

auto subsetCollection =
m_metadata_readers[filename]->GetView<std::vector<short>>(root_utils::subsetCollection(category));
collInfo.isSubsetCollection = subsetCollection(0);

auto schemaVersion = m_metadata_readers[filename]->GetView<std::vector<SchemaVersionT>>("schemaVersion_" + category);
collInfo.schemaVersion = schemaVersion(0);

m_idTables[category] = std::make_shared<CollectionIDTable>(collInfo.id, collInfo.name);
m_collectionInfo[category] = collInfo(0);
m_idTables[category] = root_utils::makeCollIdTable(collInfo(0));

return true;
}
Expand Down Expand Up @@ -162,7 +148,7 @@ std::unique_ptr<ROOTFrameData> RNTupleReader::readEntry(const std::string& categ
// Make sure to not silently ignore non-existant but requested collections
if (!collsToRead.empty()) {
for (const auto& name : collsToRead) {
if (std::ranges::find(collInfo.name, name) == collInfo.name.end()) {
if (std::ranges::find(collInfo, name, &root_utils::CollectionWriteInfo::name) == collInfo.end()) {
throw std::invalid_argument(name + " is not available from Frame");
}
}
Expand All @@ -184,47 +170,46 @@ std::unique_ptr<ROOTFrameData> RNTupleReader::readEntry(const std::string& categ
// we set all the fields there in any case.
auto dentry = m_readers[category][readerIndex]->GetModel().CreateEntry();

for (size_t i = 0; i < collInfo.id.size(); ++i) {
if (!collsToRead.empty() && std::ranges::find(collsToRead, collInfo.name[i]) == collsToRead.end()) {
for (const auto& coll : collInfo) {
if (!collsToRead.empty() && std::ranges::find(collsToRead, coll.name) == collsToRead.end()) {
continue;
}
const auto& collType = collInfo.type[i];
const auto& collType = coll.dataType;
const auto& bufferFactory = podio::CollectionBufferFactory::instance();
auto maybeBuffers =
bufferFactory.createBuffers(collType, collInfo.schemaVersion[i], collInfo.isSubsetCollection[i]);
auto maybeBuffers = bufferFactory.createBuffers(collType, coll.schemaVersion, coll.isSubset);
auto collBuffers = maybeBuffers.value_or(podio::CollectionReadBuffers{});

if (!maybeBuffers) {
std::cout << "WARNING: Buffers couldn't be created for collection " << collInfo.name[i] << " of type "
<< collInfo.type[i] << " and schema version " << collInfo.schemaVersion[i] << std::endl;
std::cout << "WARNING: Buffers couldn't be created for collection " << coll.name << " of type " << coll.dataType
<< " and schema version " << coll.schemaVersion << std::endl;
return nullptr;
}

if (collInfo.isSubsetCollection[i]) {
auto brName = root_utils::subsetBranch(collInfo.name[i]);
if (coll.isSubset) {
auto brName = root_utils::subsetBranch(coll.name);
auto vec = new std::vector<podio::ObjectID>;
dentry->BindRawPtr(brName, vec);
collBuffers.references->at(0) = std::unique_ptr<std::vector<podio::ObjectID>>(vec);
} else {
dentry->BindRawPtr(collInfo.name[i], collBuffers.data);
dentry->BindRawPtr(coll.name, collBuffers.data);

const auto relVecNames = podio::DatamodelRegistry::instance().getRelationNames(collType);
for (size_t j = 0; j < relVecNames.relations.size(); ++j) {
const auto relName = relVecNames.relations[j];
auto vec = new std::vector<podio::ObjectID>;
const auto brName = root_utils::refBranch(collInfo.name[i], relName);
const auto brName = root_utils::refBranch(coll.name, relName);
dentry->BindRawPtr(brName, vec);
collBuffers.references->at(j) = std::unique_ptr<std::vector<podio::ObjectID>>(vec);
}

for (size_t j = 0; j < relVecNames.vectorMembers.size(); ++j) {
const auto vecName = relVecNames.vectorMembers[j];
const auto brName = root_utils::vecBranch(collInfo.name[i], vecName);
const auto brName = root_utils::vecBranch(coll.name, vecName);
dentry->BindRawPtr(brName, collBuffers.vectorMembers->at(j).second);
}
}

buffers.emplace(collInfo.name[i], std::move(collBuffers));
buffers.emplace(coll.name, std::move(collBuffers));
}

m_readers[category][readerIndex]->LoadEntry(localEntry, *dentry);
Expand Down
23 changes: 8 additions & 15 deletions src/RNTupleWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "podio/DatamodelRegistry.h"
#include "podio/SchemaEvolution.h"
#include "podio/podioVersion.h"
#include "podio/utilities/RootHelpers.h"
#include "rootUtils.h"

#include "TFile.h"
Expand Down Expand Up @@ -81,14 +82,13 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat
auto model = createModels(collections);
catInfo.writer = ROOT::Experimental::RNTupleWriter::Append(std::move(model), category, *m_file.get(), {});

catInfo.collInfo.reserve(collections.size());
for (const auto& [name, coll] : collections) {
catInfo.ids.emplace_back(coll->getID());
catInfo.types.emplace_back(coll->getTypeName());
catInfo.subsetCollections.emplace_back(coll->isSubsetCollection());
catInfo.schemaVersions.emplace_back(coll->getSchemaVersion());
catInfo.collInfo.emplace_back(coll->getID(), std::string(coll->getTypeName()), coll->isSubsetCollection(),
coll->getSchemaVersion(), name);
}
} else {
if (!root_utils::checkConsistentColls(catInfo.names, collsToWrite)) {
if (!root_utils::checkConsistentColls(catInfo.collInfo, collsToWrite)) {
throw std::runtime_error("Trying to write category '" + category + "' with inconsistent collection content. " +
root_utils::getInconsistentCollsMsg(catInfo.names, collsToWrite));
}
Expand Down Expand Up @@ -251,16 +251,9 @@ void RNTupleWriter::finish() {
}

for (auto& [category, collInfo] : m_categories) {
auto idField = metadata->MakeField<std::vector<unsigned int>>({root_utils::idTableName(category)});
*idField = collInfo.ids;
auto collectionNameField = metadata->MakeField<std::vector<std::string>>({root_utils::collectionName(category)});
*collectionNameField = collInfo.names;
auto collectionTypeField = metadata->MakeField<std::vector<std::string>>({root_utils::collInfoName(category)});
*collectionTypeField = collInfo.types;
auto subsetCollectionField = metadata->MakeField<std::vector<short>>({root_utils::subsetCollection(category)});
*subsetCollectionField = collInfo.subsetCollections;
auto schemaVersionField = metadata->MakeField<std::vector<SchemaVersionT>>({"schemaVersion_" + category});
*schemaVersionField = collInfo.schemaVersions;
auto collInfoField =
metadata->MakeField<std::vector<root_utils::CollectionWriteInfo>>({root_utils::collInfoName(category)});
*collInfoField = collInfo.collInfo;
}

metadata->Freeze();
Expand Down
11 changes: 8 additions & 3 deletions src/ROOTLegacyReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ void ROOTLegacyReader::openFiles(const std::vector<std::string>& filenames) {
collInfoBranch->SetAddress(&collectionInfo);
collInfoBranch->GetEntry(0);
}
createCollectionBranches(*collectionInfo);
std::vector<root_utils::CollectionWriteInfo> collInfo;
collInfo.reserve(collectionInfo->size());
for (auto& [id, typeName, isSubsetColl, schemaVersion] : *collectionInfo) {
collInfo.emplace_back(id, std::move(typeName), isSubsetColl, schemaVersion);
}
createCollectionBranches(collInfo);
delete collectionInfo;
} else {
std::cout << "PODIO: Reconstructing CollectionTypeInfo branch from other sources in file: \'"
Expand All @@ -170,10 +175,10 @@ unsigned ROOTLegacyReader::getEntries(const std::string& name) const {
return m_chain->GetEntries();
}

void ROOTLegacyReader::createCollectionBranches(const std::vector<root_utils::CollectionWriteInfoT>& collInfo) {
void ROOTLegacyReader::createCollectionBranches(const std::vector<root_utils::CollectionWriteInfo>& collInfo) {
size_t collectionIndex{0};

for (const auto& [collID, collType, isSubsetColl, collSchemaVersion] : collInfo) {
for (const auto& [collID, collType, isSubsetColl, collSchemaVersion, _] : collInfo) {
// We only write collections that are in the collectionIDTable, so no need
// to check here
const auto name = m_table->name(collID).value();
Expand Down
Loading
Loading